OpenCC 1.2.0__cp38-cp38-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencc/__init__.py +49 -0
- opencc/clib/__init__.py +0 -0
- opencc/clib/bin/opencc +0 -0
- opencc/clib/bin/opencc_dict +0 -0
- opencc/clib/bin/opencc_phrase_extract +0 -0
- opencc/clib/include/opencc/BinaryDict.hpp +53 -0
- opencc/clib/include/opencc/Common.hpp +82 -0
- opencc/clib/include/opencc/Config.hpp +49 -0
- opencc/clib/include/opencc/Conversion.hpp +47 -0
- opencc/clib/include/opencc/ConversionChain.hpp +43 -0
- opencc/clib/include/opencc/Converter.hpp +51 -0
- opencc/clib/include/opencc/DartsDict.hpp +60 -0
- opencc/clib/include/opencc/Dict.hpp +92 -0
- opencc/clib/include/opencc/DictConverter.hpp +32 -0
- opencc/clib/include/opencc/DictEntry.hpp +173 -0
- opencc/clib/include/opencc/DictGroup.hpp +57 -0
- opencc/clib/include/opencc/Exception.hpp +88 -0
- opencc/clib/include/opencc/Export.hpp +40 -0
- opencc/clib/include/opencc/Lexicon.hpp +70 -0
- opencc/clib/include/opencc/MarisaDict.hpp +63 -0
- opencc/clib/include/opencc/MaxMatchSegmentation.hpp +43 -0
- opencc/clib/include/opencc/Optional.hpp +76 -0
- opencc/clib/include/opencc/PhraseExtract.hpp +195 -0
- opencc/clib/include/opencc/Segmentation.hpp +32 -0
- opencc/clib/include/opencc/Segments.hpp +118 -0
- opencc/clib/include/opencc/SerializableDict.hpp +77 -0
- opencc/clib/include/opencc/SerializedValues.hpp +52 -0
- opencc/clib/include/opencc/SimpleConverter.hpp +113 -0
- opencc/clib/include/opencc/TextDict.hpp +60 -0
- opencc/clib/include/opencc/UTF8StringSlice.hpp +246 -0
- opencc/clib/include/opencc/UTF8Util.hpp +291 -0
- opencc/clib/include/opencc/opencc.h +161 -0
- opencc/clib/include/opencc/opencc_config.h +21 -0
- opencc/clib/lib/cmake/opencc/OpenCCConfig.cmake +31 -0
- opencc/clib/lib/cmake/opencc/OpenCCConfigVersion.cmake +65 -0
- opencc/clib/lib/cmake/opencc/OpenCCTargets-release.cmake +29 -0
- opencc/clib/lib/cmake/opencc/OpenCCTargets.cmake +110 -0
- opencc/clib/lib/libmarisa.a +0 -0
- opencc/clib/lib/libopencc.a +0 -0
- opencc/clib/lib/pkgconfig/opencc.pc +11 -0
- opencc/clib/opencc_clib.cpython-38-x86_64-linux-gnu.so +0 -0
- opencc/clib/share/opencc/HKVariants.ocd2 +0 -0
- opencc/clib/share/opencc/HKVariantsRev.ocd2 +0 -0
- opencc/clib/share/opencc/HKVariantsRevPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/JPShinjitaiCharacters.ocd2 +0 -0
- opencc/clib/share/opencc/JPShinjitaiPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/JPVariants.ocd2 +0 -0
- opencc/clib/share/opencc/JPVariantsRev.ocd2 +0 -0
- opencc/clib/share/opencc/STCharacters.ocd2 +0 -0
- opencc/clib/share/opencc/STPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/TSCharacters.ocd2 +0 -0
- opencc/clib/share/opencc/TSPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/TWPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/TWPhrasesRev.ocd2 +0 -0
- opencc/clib/share/opencc/TWVariants.ocd2 +0 -0
- opencc/clib/share/opencc/TWVariantsRev.ocd2 +0 -0
- opencc/clib/share/opencc/TWVariantsRevPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/hk2s.json +33 -0
- opencc/clib/share/opencc/hk2t.json +22 -0
- opencc/clib/share/opencc/jp2t.json +25 -0
- opencc/clib/share/opencc/s2hk.json +27 -0
- opencc/clib/share/opencc/s2t.json +22 -0
- opencc/clib/share/opencc/s2tw.json +27 -0
- opencc/clib/share/opencc/s2twp.json +32 -0
- opencc/clib/share/opencc/t2hk.json +16 -0
- opencc/clib/share/opencc/t2jp.json +16 -0
- opencc/clib/share/opencc/t2s.json +22 -0
- opencc/clib/share/opencc/t2tw.json +16 -0
- opencc/clib/share/opencc/tw2s.json +33 -0
- opencc/clib/share/opencc/tw2sp.json +36 -0
- opencc/clib/share/opencc/tw2t.json +22 -0
- opencc/py.typed +0 -0
- opencc-1.2.0.dist-info/AUTHORS +12 -0
- opencc-1.2.0.dist-info/LICENSE +56 -0
- opencc-1.2.0.dist-info/METADATA +347 -0
- opencc-1.2.0.dist-info/RECORD +78 -0
- opencc-1.2.0.dist-info/WHEEL +5 -0
- opencc-1.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
#include "Segments.hpp"
|
|
23
|
+
#include "UTF8Util.hpp"
|
|
24
|
+
|
|
25
|
+
namespace opencc {
|
|
26
|
+
/**
|
|
27
|
+
* Key-values pair entry
|
|
28
|
+
* @ingroup opencc_cpp_api
|
|
29
|
+
*/
|
|
30
|
+
class OPENCC_EXPORT DictEntry {
|
|
31
|
+
public:
|
|
32
|
+
virtual ~DictEntry() {}
|
|
33
|
+
|
|
34
|
+
virtual std::string Key() const = 0;
|
|
35
|
+
|
|
36
|
+
virtual std::vector<std::string> Values() const = 0;
|
|
37
|
+
|
|
38
|
+
virtual std::string GetDefault() const = 0;
|
|
39
|
+
|
|
40
|
+
virtual size_t NumValues() const = 0;
|
|
41
|
+
|
|
42
|
+
virtual std::string ToString() const = 0;
|
|
43
|
+
|
|
44
|
+
size_t KeyLength() const { return Key().length(); }
|
|
45
|
+
|
|
46
|
+
bool operator<(const DictEntry& that) const { return Key() < that.Key(); }
|
|
47
|
+
|
|
48
|
+
bool operator==(const DictEntry& that) const { return Key() == that.Key(); }
|
|
49
|
+
|
|
50
|
+
static bool UPtrLessThan(const std::unique_ptr<DictEntry>& a,
|
|
51
|
+
const std::unique_ptr<DictEntry>& b) {
|
|
52
|
+
return *a < *b;
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
class OPENCC_EXPORT NoValueDictEntry : public DictEntry {
|
|
57
|
+
public:
|
|
58
|
+
NoValueDictEntry(const std::string& _key) : key(_key) {}
|
|
59
|
+
|
|
60
|
+
virtual ~NoValueDictEntry() {}
|
|
61
|
+
|
|
62
|
+
virtual std::string Key() const { return key; }
|
|
63
|
+
|
|
64
|
+
virtual std::vector<std::string> Values() const {
|
|
65
|
+
return std::vector<std::string>();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
virtual std::string GetDefault() const { return key; }
|
|
69
|
+
|
|
70
|
+
virtual size_t NumValues() const { return 0; }
|
|
71
|
+
|
|
72
|
+
virtual std::string ToString() const { return key; }
|
|
73
|
+
|
|
74
|
+
private:
|
|
75
|
+
std::string key;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
class OPENCC_EXPORT SingleValueDictEntry : public DictEntry {
|
|
79
|
+
public:
|
|
80
|
+
virtual std::string Value() const = 0;
|
|
81
|
+
|
|
82
|
+
virtual std::vector<std::string> Values() const {
|
|
83
|
+
return std::vector<std::string>{Value()};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
virtual std::string GetDefault() const { return Value(); }
|
|
87
|
+
|
|
88
|
+
virtual size_t NumValues() const { return 1; }
|
|
89
|
+
|
|
90
|
+
virtual std::string ToString() const {
|
|
91
|
+
return std::string(Key()) + "\t" + Value();
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
class OPENCC_EXPORT StrSingleValueDictEntry : public SingleValueDictEntry {
|
|
96
|
+
public:
|
|
97
|
+
StrSingleValueDictEntry(const std::string& _key, const std::string& _value)
|
|
98
|
+
: key(_key), value(_value) {}
|
|
99
|
+
|
|
100
|
+
virtual ~StrSingleValueDictEntry() {}
|
|
101
|
+
|
|
102
|
+
virtual std::string Key() const { return key; }
|
|
103
|
+
|
|
104
|
+
virtual std::string Value() const { return value; }
|
|
105
|
+
|
|
106
|
+
private:
|
|
107
|
+
std::string key;
|
|
108
|
+
std::string value;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
class OPENCC_EXPORT MultiValueDictEntry : public DictEntry {
|
|
112
|
+
public:
|
|
113
|
+
virtual std::string GetDefault() const {
|
|
114
|
+
if (NumValues() > 0) {
|
|
115
|
+
return Values().at(0);
|
|
116
|
+
} else {
|
|
117
|
+
return Key();
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
virtual std::string ToString() const;
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
class OPENCC_EXPORT StrMultiValueDictEntry : public MultiValueDictEntry {
|
|
125
|
+
public:
|
|
126
|
+
StrMultiValueDictEntry(const std::string& _key,
|
|
127
|
+
const std::vector<std::string>& _values)
|
|
128
|
+
: key(_key), values(_values) {}
|
|
129
|
+
|
|
130
|
+
virtual ~StrMultiValueDictEntry() {}
|
|
131
|
+
|
|
132
|
+
virtual std::string Key() const { return key; }
|
|
133
|
+
|
|
134
|
+
size_t NumValues() const { return values.size(); }
|
|
135
|
+
|
|
136
|
+
std::vector<std::string> Values() const { return values; }
|
|
137
|
+
|
|
138
|
+
private:
|
|
139
|
+
std::string key;
|
|
140
|
+
std::vector<std::string> values;
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
class OPENCC_EXPORT DictEntryFactory {
|
|
144
|
+
public:
|
|
145
|
+
static DictEntry* New(const std::string& key) {
|
|
146
|
+
return new NoValueDictEntry(key);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
static DictEntry* New(const std::string& key, const std::string& value) {
|
|
150
|
+
return new StrSingleValueDictEntry(key, value);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
static DictEntry* New(const std::string& key,
|
|
154
|
+
const std::vector<std::string>& values) {
|
|
155
|
+
if (values.size() == 0) {
|
|
156
|
+
return New(key);
|
|
157
|
+
} else if (values.size() == 1) {
|
|
158
|
+
return New(key, values.front());
|
|
159
|
+
}
|
|
160
|
+
return new StrMultiValueDictEntry(key, values);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
static DictEntry* New(const DictEntry* entry) {
|
|
164
|
+
if (entry->NumValues() == 0) {
|
|
165
|
+
return new NoValueDictEntry(entry->Key());
|
|
166
|
+
} else if (entry->NumValues() == 1) {
|
|
167
|
+
return new StrSingleValueDictEntry(entry->Key(), entry->Values().front());
|
|
168
|
+
} else {
|
|
169
|
+
return new StrMultiValueDictEntry(entry->Key(), entry->Values());
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
};
|
|
173
|
+
} // namespace opencc
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include <list>
|
|
22
|
+
|
|
23
|
+
#include "Common.hpp"
|
|
24
|
+
#include "Dict.hpp"
|
|
25
|
+
|
|
26
|
+
namespace opencc {
|
|
27
|
+
/**
|
|
28
|
+
* Group of dictionaries
|
|
29
|
+
* @ingroup opencc_cpp_api
|
|
30
|
+
*/
|
|
31
|
+
class OPENCC_EXPORT DictGroup : public Dict {
|
|
32
|
+
public:
|
|
33
|
+
DictGroup(const std::list<DictPtr>& dicts);
|
|
34
|
+
|
|
35
|
+
static DictGroupPtr NewFromDict(const Dict& dict);
|
|
36
|
+
|
|
37
|
+
virtual ~DictGroup();
|
|
38
|
+
|
|
39
|
+
virtual size_t KeyMaxLength() const;
|
|
40
|
+
|
|
41
|
+
virtual Optional<const DictEntry*> Match(const char* word, size_t len) const;
|
|
42
|
+
|
|
43
|
+
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
|
|
44
|
+
size_t len) const;
|
|
45
|
+
|
|
46
|
+
virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
|
|
47
|
+
size_t len) const;
|
|
48
|
+
|
|
49
|
+
virtual LexiconPtr GetLexicon() const;
|
|
50
|
+
|
|
51
|
+
const std::list<DictPtr> GetDicts() const { return dicts; }
|
|
52
|
+
|
|
53
|
+
private:
|
|
54
|
+
const size_t keyMaxLength;
|
|
55
|
+
const std::list<DictPtr> dicts;
|
|
56
|
+
};
|
|
57
|
+
} // namespace opencc
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include <sstream>
|
|
22
|
+
#include <stdexcept>
|
|
23
|
+
#include <string>
|
|
24
|
+
|
|
25
|
+
#include "Export.hpp"
|
|
26
|
+
|
|
27
|
+
#if defined(_MSC_VER) && _MSC_VER < 1900
|
|
28
|
+
// Before Visual Studio 2015 (14.0), C++ 11 "noexcept" qualifier is not
|
|
29
|
+
// supported
|
|
30
|
+
#define noexcept
|
|
31
|
+
#endif // ifdef _MSC_VER
|
|
32
|
+
|
|
33
|
+
namespace opencc {
|
|
34
|
+
|
|
35
|
+
class OPENCC_EXPORT Exception {
|
|
36
|
+
public:
|
|
37
|
+
Exception() {}
|
|
38
|
+
|
|
39
|
+
virtual ~Exception() throw() {}
|
|
40
|
+
|
|
41
|
+
Exception(const std::string& _message) : message(_message) {}
|
|
42
|
+
|
|
43
|
+
virtual const char* what() const noexcept { return message.c_str(); }
|
|
44
|
+
|
|
45
|
+
protected:
|
|
46
|
+
std::string message;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
class OPENCC_EXPORT FileNotFound : public Exception {
|
|
50
|
+
public:
|
|
51
|
+
FileNotFound(const std::string& fileName)
|
|
52
|
+
: Exception(fileName + " not found or not accessible.") {}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
class OPENCC_EXPORT FileNotWritable : public Exception {
|
|
56
|
+
public:
|
|
57
|
+
FileNotWritable(const std::string& fileName)
|
|
58
|
+
: Exception(fileName + " not writable.") {}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
class OPENCC_EXPORT InvalidFormat : public Exception {
|
|
62
|
+
public:
|
|
63
|
+
InvalidFormat(const std::string& message)
|
|
64
|
+
: Exception("Invalid format: " + message) {}
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
class OPENCC_EXPORT InvalidTextDictionary : public InvalidFormat {
|
|
68
|
+
public:
|
|
69
|
+
InvalidTextDictionary(const std::string& _message, size_t lineNum)
|
|
70
|
+
: InvalidFormat("") {
|
|
71
|
+
std::ostringstream buffer;
|
|
72
|
+
buffer << "Invalid text dictionary at line " << lineNum << ": " << _message;
|
|
73
|
+
message = buffer.str();
|
|
74
|
+
}
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
class OPENCC_EXPORT InvalidUTF8 : public Exception {
|
|
78
|
+
public:
|
|
79
|
+
InvalidUTF8(const std::string& _message)
|
|
80
|
+
: Exception("Invalid UTF8: " + _message) {}
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
class OPENCC_EXPORT ShouldNotBeHere : public Exception {
|
|
84
|
+
public:
|
|
85
|
+
ShouldNotBeHere() : Exception("ShouldNotBeHere! This must be a bug.") {}
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
} // namespace opencc
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#if defined(Opencc_BUILT_AS_STATIC) || !defined(_WIN32)
|
|
22
|
+
#define OPENCC_EXPORT
|
|
23
|
+
#define OPENCC_NO_EXPORT
|
|
24
|
+
#else // if defined(Opencc_BUILT_AS_STATIC) || !defined(_WIN32)
|
|
25
|
+
#ifndef OPENCC_EXPORT
|
|
26
|
+
#ifdef libopencc_EXPORTS
|
|
27
|
+
|
|
28
|
+
/* We are building this library */
|
|
29
|
+
#define OPENCC_EXPORT __declspec(dllexport)
|
|
30
|
+
#else // ifdef libopencc_EXPORTS
|
|
31
|
+
|
|
32
|
+
/* We are using this library */
|
|
33
|
+
#define OPENCC_EXPORT __declspec(dllimport)
|
|
34
|
+
#endif // ifdef libopencc_EXPORTS
|
|
35
|
+
#endif // ifndef OPENCC_EXPORT
|
|
36
|
+
|
|
37
|
+
#ifndef OPENCC_NO_EXPORT
|
|
38
|
+
#define OPENCC_NO_EXPORT
|
|
39
|
+
#endif // ifndef OPENCC_NO_EXPORT
|
|
40
|
+
#endif // if defined(Opencc_BUILT_AS_STATIC) || !defined(_WIN32)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
#include "DictEntry.hpp"
|
|
23
|
+
|
|
24
|
+
namespace opencc {
|
|
25
|
+
/**
|
|
26
|
+
* Storage of all entries
|
|
27
|
+
* @ingroup opencc_cpp_api
|
|
28
|
+
*/
|
|
29
|
+
class OPENCC_EXPORT Lexicon {
|
|
30
|
+
public:
|
|
31
|
+
Lexicon() {}
|
|
32
|
+
Lexicon(std::vector<std::unique_ptr<DictEntry>> entries_)
|
|
33
|
+
: entries(std::move(entries_)) {}
|
|
34
|
+
Lexicon(const Lexicon&) = delete;
|
|
35
|
+
Lexicon& operator=(const Lexicon&) = delete;
|
|
36
|
+
|
|
37
|
+
// Lexicon will take the ownership of the entry.
|
|
38
|
+
void Add(DictEntry* entry) { entries.emplace_back(entry); }
|
|
39
|
+
|
|
40
|
+
void Add(std::unique_ptr<DictEntry> entry) {
|
|
41
|
+
entries.push_back(std::move(entry));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
void Sort();
|
|
45
|
+
|
|
46
|
+
// Returns true if the lexicon is sorted by key.
|
|
47
|
+
bool IsSorted();
|
|
48
|
+
|
|
49
|
+
// Returns true if every key unique (after sorted).
|
|
50
|
+
// When dupkey is set, it is set to the duplicate key.
|
|
51
|
+
bool IsUnique(std::string* dupkey = nullptr);
|
|
52
|
+
|
|
53
|
+
const DictEntry* At(size_t index) const { return entries.at(index).get(); }
|
|
54
|
+
|
|
55
|
+
size_t Length() const { return entries.size(); }
|
|
56
|
+
|
|
57
|
+
std::vector<std::unique_ptr<DictEntry>>::const_iterator begin() const {
|
|
58
|
+
return entries.begin();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
std::vector<std::unique_ptr<DictEntry>>::const_iterator end() const {
|
|
62
|
+
return entries.end();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
static LexiconPtr ParseLexiconFromFile(FILE* fp);
|
|
66
|
+
|
|
67
|
+
private:
|
|
68
|
+
std::vector<std::unique_ptr<DictEntry>> entries;
|
|
69
|
+
};
|
|
70
|
+
} // namespace opencc
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
#include "SerializableDict.hpp"
|
|
23
|
+
|
|
24
|
+
namespace opencc {
|
|
25
|
+
/**
|
|
26
|
+
* Darts dictionary
|
|
27
|
+
* @ingroup opencc_cpp_api
|
|
28
|
+
*/
|
|
29
|
+
class OPENCC_EXPORT MarisaDict : public Dict, public SerializableDict {
|
|
30
|
+
public:
|
|
31
|
+
virtual ~MarisaDict();
|
|
32
|
+
|
|
33
|
+
virtual size_t KeyMaxLength() const;
|
|
34
|
+
|
|
35
|
+
virtual Optional<const DictEntry*> Match(const char* word, size_t len) const;
|
|
36
|
+
|
|
37
|
+
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
|
|
38
|
+
size_t len) const;
|
|
39
|
+
|
|
40
|
+
virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
|
|
41
|
+
size_t len) const;
|
|
42
|
+
|
|
43
|
+
virtual LexiconPtr GetLexicon() const;
|
|
44
|
+
|
|
45
|
+
virtual void SerializeToFile(FILE* fp) const;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Constructs a MarisaDict from another dictionary.
|
|
49
|
+
*/
|
|
50
|
+
static MarisaDictPtr NewFromDict(const Dict& thatDict);
|
|
51
|
+
|
|
52
|
+
static MarisaDictPtr NewFromFile(FILE* fp);
|
|
53
|
+
|
|
54
|
+
private:
|
|
55
|
+
MarisaDict();
|
|
56
|
+
|
|
57
|
+
size_t maxLength;
|
|
58
|
+
LexiconPtr lexicon;
|
|
59
|
+
|
|
60
|
+
class MarisaInternal;
|
|
61
|
+
std::unique_ptr<MarisaInternal> internal;
|
|
62
|
+
};
|
|
63
|
+
} // namespace opencc
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
#include "DictGroup.hpp"
|
|
23
|
+
#include "Segmentation.hpp"
|
|
24
|
+
|
|
25
|
+
namespace opencc {
|
|
26
|
+
/**
|
|
27
|
+
* Implementation of maximal match segmentation
|
|
28
|
+
* @ingroup opencc_cpp_api
|
|
29
|
+
*/
|
|
30
|
+
class OPENCC_EXPORT MaxMatchSegmentation : public Segmentation {
|
|
31
|
+
public:
|
|
32
|
+
MaxMatchSegmentation(const DictPtr _dict) : dict(_dict) {}
|
|
33
|
+
|
|
34
|
+
virtual ~MaxMatchSegmentation() {}
|
|
35
|
+
|
|
36
|
+
virtual SegmentsPtr Segment(const std::string& text) const;
|
|
37
|
+
|
|
38
|
+
const DictPtr GetDict() const { return dict; }
|
|
39
|
+
|
|
40
|
+
private:
|
|
41
|
+
const DictPtr dict;
|
|
42
|
+
};
|
|
43
|
+
} // namespace opencc
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
namespace opencc {
|
|
22
|
+
/**
|
|
23
|
+
* A class that wraps type T into a nullable type.
|
|
24
|
+
* @ingroup opencc_cpp_api
|
|
25
|
+
*/
|
|
26
|
+
template <typename T> class Optional {
|
|
27
|
+
public:
|
|
28
|
+
/**
|
|
29
|
+
* The constructor of Optional.
|
|
30
|
+
*/
|
|
31
|
+
Optional(T actual) : isNull(false), data(actual) {}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Returns true if the instance is null.
|
|
35
|
+
*/
|
|
36
|
+
bool IsNull() const { return isNull; }
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Returns the containing data of the instance.
|
|
40
|
+
*/
|
|
41
|
+
const T& Get() const { return data; }
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Constructs a null instance.
|
|
45
|
+
*/
|
|
46
|
+
static Optional<T> Null() { return Optional(); }
|
|
47
|
+
|
|
48
|
+
private:
|
|
49
|
+
Optional() : isNull(true) {}
|
|
50
|
+
|
|
51
|
+
bool isNull;
|
|
52
|
+
T data;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Specialization of Optional for pointers.
|
|
57
|
+
*
|
|
58
|
+
* Reduce a bool.
|
|
59
|
+
*/
|
|
60
|
+
template <typename T> class Optional<T*> {
|
|
61
|
+
private:
|
|
62
|
+
Optional() : data(nullptr) {}
|
|
63
|
+
|
|
64
|
+
typedef T* TPtr;
|
|
65
|
+
TPtr data;
|
|
66
|
+
|
|
67
|
+
public:
|
|
68
|
+
Optional(TPtr actual) : data(actual) {}
|
|
69
|
+
|
|
70
|
+
bool IsNull() const { return data == nullptr; }
|
|
71
|
+
|
|
72
|
+
const TPtr& Get() const { return data; }
|
|
73
|
+
|
|
74
|
+
static Optional<TPtr> Null() { return Optional(); }
|
|
75
|
+
};
|
|
76
|
+
} // namespace opencc
|