OpenCC 1.2.0__cp38-cp38-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. opencc/__init__.py +49 -0
  2. opencc/clib/__init__.py +0 -0
  3. opencc/clib/bin/opencc +0 -0
  4. opencc/clib/bin/opencc_dict +0 -0
  5. opencc/clib/bin/opencc_phrase_extract +0 -0
  6. opencc/clib/include/opencc/BinaryDict.hpp +53 -0
  7. opencc/clib/include/opencc/Common.hpp +82 -0
  8. opencc/clib/include/opencc/Config.hpp +49 -0
  9. opencc/clib/include/opencc/Conversion.hpp +47 -0
  10. opencc/clib/include/opencc/ConversionChain.hpp +43 -0
  11. opencc/clib/include/opencc/Converter.hpp +51 -0
  12. opencc/clib/include/opencc/DartsDict.hpp +60 -0
  13. opencc/clib/include/opencc/Dict.hpp +92 -0
  14. opencc/clib/include/opencc/DictConverter.hpp +32 -0
  15. opencc/clib/include/opencc/DictEntry.hpp +173 -0
  16. opencc/clib/include/opencc/DictGroup.hpp +57 -0
  17. opencc/clib/include/opencc/Exception.hpp +88 -0
  18. opencc/clib/include/opencc/Export.hpp +40 -0
  19. opencc/clib/include/opencc/Lexicon.hpp +70 -0
  20. opencc/clib/include/opencc/MarisaDict.hpp +63 -0
  21. opencc/clib/include/opencc/MaxMatchSegmentation.hpp +43 -0
  22. opencc/clib/include/opencc/Optional.hpp +76 -0
  23. opencc/clib/include/opencc/PhraseExtract.hpp +195 -0
  24. opencc/clib/include/opencc/Segmentation.hpp +32 -0
  25. opencc/clib/include/opencc/Segments.hpp +118 -0
  26. opencc/clib/include/opencc/SerializableDict.hpp +77 -0
  27. opencc/clib/include/opencc/SerializedValues.hpp +52 -0
  28. opencc/clib/include/opencc/SimpleConverter.hpp +113 -0
  29. opencc/clib/include/opencc/TextDict.hpp +60 -0
  30. opencc/clib/include/opencc/UTF8StringSlice.hpp +246 -0
  31. opencc/clib/include/opencc/UTF8Util.hpp +291 -0
  32. opencc/clib/include/opencc/opencc.h +161 -0
  33. opencc/clib/include/opencc/opencc_config.h +21 -0
  34. opencc/clib/lib/cmake/opencc/OpenCCConfig.cmake +31 -0
  35. opencc/clib/lib/cmake/opencc/OpenCCConfigVersion.cmake +65 -0
  36. opencc/clib/lib/cmake/opencc/OpenCCTargets-release.cmake +29 -0
  37. opencc/clib/lib/cmake/opencc/OpenCCTargets.cmake +110 -0
  38. opencc/clib/lib/libmarisa.a +0 -0
  39. opencc/clib/lib/libopencc.a +0 -0
  40. opencc/clib/lib/pkgconfig/opencc.pc +11 -0
  41. opencc/clib/opencc_clib.cpython-38-x86_64-linux-gnu.so +0 -0
  42. opencc/clib/share/opencc/HKVariants.ocd2 +0 -0
  43. opencc/clib/share/opencc/HKVariantsRev.ocd2 +0 -0
  44. opencc/clib/share/opencc/HKVariantsRevPhrases.ocd2 +0 -0
  45. opencc/clib/share/opencc/JPShinjitaiCharacters.ocd2 +0 -0
  46. opencc/clib/share/opencc/JPShinjitaiPhrases.ocd2 +0 -0
  47. opencc/clib/share/opencc/JPVariants.ocd2 +0 -0
  48. opencc/clib/share/opencc/JPVariantsRev.ocd2 +0 -0
  49. opencc/clib/share/opencc/STCharacters.ocd2 +0 -0
  50. opencc/clib/share/opencc/STPhrases.ocd2 +0 -0
  51. opencc/clib/share/opencc/TSCharacters.ocd2 +0 -0
  52. opencc/clib/share/opencc/TSPhrases.ocd2 +0 -0
  53. opencc/clib/share/opencc/TWPhrases.ocd2 +0 -0
  54. opencc/clib/share/opencc/TWPhrasesRev.ocd2 +0 -0
  55. opencc/clib/share/opencc/TWVariants.ocd2 +0 -0
  56. opencc/clib/share/opencc/TWVariantsRev.ocd2 +0 -0
  57. opencc/clib/share/opencc/TWVariantsRevPhrases.ocd2 +0 -0
  58. opencc/clib/share/opencc/hk2s.json +33 -0
  59. opencc/clib/share/opencc/hk2t.json +22 -0
  60. opencc/clib/share/opencc/jp2t.json +25 -0
  61. opencc/clib/share/opencc/s2hk.json +27 -0
  62. opencc/clib/share/opencc/s2t.json +22 -0
  63. opencc/clib/share/opencc/s2tw.json +27 -0
  64. opencc/clib/share/opencc/s2twp.json +32 -0
  65. opencc/clib/share/opencc/t2hk.json +16 -0
  66. opencc/clib/share/opencc/t2jp.json +16 -0
  67. opencc/clib/share/opencc/t2s.json +22 -0
  68. opencc/clib/share/opencc/t2tw.json +16 -0
  69. opencc/clib/share/opencc/tw2s.json +33 -0
  70. opencc/clib/share/opencc/tw2sp.json +36 -0
  71. opencc/clib/share/opencc/tw2t.json +22 -0
  72. opencc/py.typed +0 -0
  73. opencc-1.2.0.dist-info/AUTHORS +12 -0
  74. opencc-1.2.0.dist-info/LICENSE +56 -0
  75. opencc-1.2.0.dist-info/METADATA +347 -0
  76. opencc-1.2.0.dist-info/RECORD +78 -0
  77. opencc-1.2.0.dist-info/WHEEL +5 -0
  78. opencc-1.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,195 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include <functional>
22
+ #include <unordered_map>
23
+
24
+ #include "Common.hpp"
25
+ #include "UTF8StringSlice.hpp"
26
+
27
+ namespace opencc {
28
+
29
+ class OPENCC_EXPORT PhraseExtract {
30
+ public:
31
+ typedef UTF8StringSlice::LengthType LengthType;
32
+
33
+ typedef UTF8StringSliceBase<unsigned char> UTF8StringSlice8Bit;
34
+
35
+ PhraseExtract();
36
+
37
+ virtual ~PhraseExtract();
38
+
39
+ void Extract(const std::string& text) {
40
+ SetFullText(text);
41
+ ExtractSuffixes();
42
+ CalculateFrequency();
43
+ CalculateSuffixEntropy();
44
+ ReleaseSuffixes();
45
+ ExtractPrefixes();
46
+ CalculatePrefixEntropy();
47
+ ReleasePrefixes();
48
+ ExtractWordCandidates();
49
+ CalculateCohesions();
50
+ SelectWords();
51
+ }
52
+
53
+ void SetFullText(const std::string& fullText) {
54
+ utf8FullText = UTF8StringSlice(fullText.c_str());
55
+ }
56
+
57
+ void SetFullText(const char* fullText) {
58
+ utf8FullText = UTF8StringSlice(fullText);
59
+ }
60
+
61
+ void SetFullText(const UTF8StringSlice& fullText) { utf8FullText = fullText; }
62
+
63
+ void SetWordMinLength(const LengthType _wordMinLength) {
64
+ wordMinLength = _wordMinLength;
65
+ }
66
+
67
+ void SetWordMaxLength(const LengthType _wordMaxLength) {
68
+ wordMaxLength = _wordMaxLength;
69
+ }
70
+
71
+ void SetPrefixSetLength(const LengthType _prefixSetLength) {
72
+ prefixSetLength = _prefixSetLength;
73
+ }
74
+
75
+ void SetSuffixSetLength(const LengthType _suffixSetLength) {
76
+ suffixSetLength = _suffixSetLength;
77
+ }
78
+
79
+ // PreCalculationFilter is called after frequencies statistics.
80
+ void SetPreCalculationFilter(
81
+ const std::function<bool(const PhraseExtract&,
82
+ const UTF8StringSlice8Bit&)>& filter) {
83
+ preCalculationFilter = filter;
84
+ }
85
+
86
+ void SetPostCalculationFilter(
87
+ const std::function<bool(const PhraseExtract&,
88
+ const UTF8StringSlice8Bit&)>& filter) {
89
+ postCalculationFilter = filter;
90
+ }
91
+
92
+ void ReleaseSuffixes() { std::vector<UTF8StringSlice8Bit>().swap(suffixes); }
93
+
94
+ void ReleasePrefixes() { std::vector<UTF8StringSlice8Bit>().swap(prefixes); }
95
+
96
+ const std::vector<UTF8StringSlice8Bit>& Words() const { return words; }
97
+
98
+ const std::vector<UTF8StringSlice8Bit>& WordCandidates() const {
99
+ return wordCandidates;
100
+ }
101
+
102
+ struct Signals {
103
+ size_t frequency;
104
+ double cohesion;
105
+ double suffixEntropy;
106
+ double prefixEntropy;
107
+ };
108
+
109
+ const Signals& Signal(const UTF8StringSlice8Bit& wordCandidate) const;
110
+
111
+ double Cohesion(const UTF8StringSlice8Bit& wordCandidate) const;
112
+
113
+ double Entropy(const UTF8StringSlice8Bit& wordCandidate) const;
114
+
115
+ double SuffixEntropy(const UTF8StringSlice8Bit& wordCandidate) const;
116
+
117
+ double PrefixEntropy(const UTF8StringSlice8Bit& wordCandidate) const;
118
+
119
+ size_t Frequency(const UTF8StringSlice8Bit& word) const;
120
+
121
+ double Probability(const UTF8StringSlice8Bit& word) const;
122
+
123
+ double LogProbability(const UTF8StringSlice8Bit& word) const;
124
+
125
+ void Reset();
126
+
127
+ void ExtractSuffixes();
128
+
129
+ void ExtractPrefixes();
130
+
131
+ void ExtractWordCandidates();
132
+
133
+ void CalculateFrequency();
134
+
135
+ void CalculateCohesions();
136
+
137
+ void CalculateSuffixEntropy();
138
+
139
+ void CalculatePrefixEntropy();
140
+
141
+ void SelectWords();
142
+
143
+ static bool
144
+ DefaultPreCalculationFilter(const PhraseExtract&,
145
+ const PhraseExtract::UTF8StringSlice8Bit&);
146
+
147
+ static bool
148
+ DefaultPostCalculationFilter(const PhraseExtract&,
149
+ const PhraseExtract::UTF8StringSlice8Bit&);
150
+
151
+ private:
152
+ class DictType;
153
+
154
+ // Pointwise Mutual Information
155
+ double PMI(const UTF8StringSlice8Bit& wordCandidate,
156
+ const UTF8StringSlice8Bit& part1,
157
+ const UTF8StringSlice8Bit& part2) const;
158
+
159
+ double CalculateCohesion(const UTF8StringSlice8Bit& wordCandidate) const;
160
+
161
+ double CalculateEntropy(
162
+ const std::unordered_map<UTF8StringSlice8Bit, size_t,
163
+ UTF8StringSlice8Bit::Hasher>& choices) const;
164
+
165
+ LengthType wordMinLength;
166
+ LengthType wordMaxLength;
167
+ LengthType prefixSetLength;
168
+ LengthType suffixSetLength;
169
+ std::function<bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>
170
+ preCalculationFilter;
171
+ std::function<bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>
172
+ postCalculationFilter;
173
+
174
+ bool prefixesExtracted;
175
+ bool suffixesExtracted;
176
+ bool frequenciesCalculated;
177
+ bool wordCandidatesExtracted;
178
+ bool cohesionsCalculated;
179
+ bool prefixEntropiesCalculated;
180
+ bool suffixEntropiesCalculated;
181
+ bool wordsSelected;
182
+
183
+ UTF8StringSlice utf8FullText;
184
+ size_t totalOccurrence;
185
+ double logTotalOccurrence;
186
+ std::vector<UTF8StringSlice8Bit> prefixes;
187
+ std::vector<UTF8StringSlice8Bit> suffixes;
188
+ std::vector<UTF8StringSlice8Bit> wordCandidates;
189
+ std::vector<UTF8StringSlice8Bit> words;
190
+ DictType* signals;
191
+
192
+ friend class PhraseExtractTest;
193
+ };
194
+
195
+ } // namespace opencc
@@ -0,0 +1,32 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Common.hpp"
22
+
23
+ namespace opencc {
24
+ /**
25
+ * Abstract segmentation
26
+ * @ingroup opencc_cpp_api
27
+ */
28
+ class OPENCC_EXPORT Segmentation {
29
+ public:
30
+ virtual SegmentsPtr Segment(const std::string& text) const = 0;
31
+ };
32
+ } // namespace opencc
@@ -0,0 +1,118 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include <iterator>
22
+ #include <sstream>
23
+
24
+ #include "Common.hpp"
25
+
26
+ namespace opencc {
27
+ /**
28
+ * Segmented text
29
+ * @ingroup opencc_cpp_api
30
+ */
31
+ class OPENCC_EXPORT Segments {
32
+ public:
33
+ Segments() {}
34
+
35
+ Segments(std::initializer_list<const char*> initList) {
36
+ for (const char* item : initList) {
37
+ AddSegment(item);
38
+ }
39
+ }
40
+
41
+ Segments(std::initializer_list<std::string> initList) {
42
+ for (const std::string& item : initList) {
43
+ AddSegment(item);
44
+ }
45
+ }
46
+
47
+ void AddSegment(const char* unmanagedString) {
48
+ indexes.push_back(std::make_pair(unmanaged.size(), false));
49
+ unmanaged.push_back(unmanagedString);
50
+ }
51
+
52
+ void AddSegment(const std::string& str) {
53
+ indexes.push_back(std::make_pair(managed.size(), true));
54
+ managed.push_back(str);
55
+ }
56
+
57
+ class iterator {
58
+ public:
59
+ using iterator_category = std::input_iterator_tag;
60
+ using value_type = const char*;
61
+
62
+ iterator(const Segments* const _segments, size_t _cursor)
63
+ : segments(_segments), cursor(_cursor) {}
64
+
65
+ iterator& operator++() {
66
+ cursor++;
67
+ return *this;
68
+ }
69
+
70
+ bool operator==(const iterator& that) const {
71
+ return cursor == that.cursor && segments == that.segments;
72
+ }
73
+
74
+ bool operator!=(const iterator& that) const {
75
+ return !this->operator==(that);
76
+ }
77
+
78
+ const char* operator*() const { return segments->At(cursor); }
79
+
80
+ private:
81
+ const Segments* const segments;
82
+ size_t cursor;
83
+ };
84
+
85
+ const char* At(size_t cursor) const {
86
+ const auto& index = indexes[cursor];
87
+ if (index.second) {
88
+ return managed[index.first].c_str();
89
+ } else {
90
+ return unmanaged[index.first];
91
+ }
92
+ }
93
+
94
+ size_t Length() const { return indexes.size(); }
95
+
96
+ iterator begin() const { return iterator(this, 0); }
97
+
98
+ iterator end() const { return iterator(this, indexes.size()); }
99
+
100
+ std::string ToString() const {
101
+ // TODO implement a nested structure to reduce concatenation,
102
+ // like a purely functional differential list
103
+ std::ostringstream buffer;
104
+ for (const char* segment : *this) {
105
+ buffer << segment;
106
+ }
107
+ return buffer.str();
108
+ }
109
+
110
+ private:
111
+ Segments(const Segments&) {}
112
+
113
+ std::vector<const char*> unmanaged;
114
+ std::vector<std::string> managed;
115
+ // index, managed
116
+ std::vector<std::pair<size_t, bool>> indexes;
117
+ };
118
+ } // namespace opencc
@@ -0,0 +1,77 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Dict.hpp"
22
+
23
+ namespace opencc {
24
+ /**
25
+ * Serializable dictionary interface
26
+ * @ingroup opencc_cpp_api
27
+ */
28
+ class OPENCC_EXPORT SerializableDict {
29
+ public:
30
+ /**
31
+ * Serializes the dictionary and writes in to a file.
32
+ */
33
+ virtual void SerializeToFile(FILE* fp) const = 0;
34
+
35
+ /**
36
+ * Serializes the dictionary and writes in to a file.
37
+ */
38
+ virtual void SerializeToFile(const std::string& fileName) const {
39
+ FILE* fp = fopen(fileName.c_str(), "wb");
40
+ if (fp == NULL) {
41
+ throw FileNotWritable(fileName);
42
+ }
43
+ SerializeToFile(fp);
44
+ fclose(fp);
45
+ }
46
+
47
+ template <typename DICT>
48
+ static bool TryLoadFromFile(const std::string& fileName,
49
+ std::shared_ptr<DICT>* dict) {
50
+ FILE* fp =
51
+ #ifdef _MSC_VER
52
+ // well, the 'GetPlatformString' shall return a 'wstring'
53
+ _wfopen(UTF8Util::GetPlatformString(fileName).c_str(), L"rb")
54
+ #else
55
+ fopen(UTF8Util::GetPlatformString(fileName).c_str(), "rb")
56
+ #endif // _MSC_VER
57
+ ;
58
+
59
+ if (fp == NULL) {
60
+ return false;
61
+ }
62
+ std::shared_ptr<DICT> loadedDict = DICT::NewFromFile(fp);
63
+ fclose(fp);
64
+ *dict = loadedDict;
65
+ return true;
66
+ }
67
+
68
+ template <typename DICT>
69
+ static std::shared_ptr<DICT> NewFromFile(const std::string& fileName) {
70
+ std::shared_ptr<DICT> dict;
71
+ if (!TryLoadFromFile<DICT>(fileName, &dict)) {
72
+ throw FileNotFound(fileName);
73
+ }
74
+ return dict;
75
+ }
76
+ };
77
+ } // namespace opencc
@@ -0,0 +1,52 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include <cstdint>
22
+
23
+ #include "Common.hpp"
24
+ #include "SerializableDict.hpp"
25
+
26
+ namespace opencc {
27
+ /**
28
+ * Binary format for dictionary values serialization.
29
+ * @ingroup opencc_cpp_api
30
+ */
31
+ class OPENCC_EXPORT SerializedValues : public SerializableDict {
32
+ public:
33
+ SerializedValues(const LexiconPtr& _lexicon) : lexicon(_lexicon) {}
34
+
35
+ virtual ~SerializedValues() {}
36
+
37
+ virtual void SerializeToFile(FILE* fp) const;
38
+
39
+ static std::shared_ptr<SerializedValues> NewFromFile(FILE* fp);
40
+
41
+ const LexiconPtr& GetLexicon() const { return lexicon; }
42
+
43
+ size_t KeyMaxLength() const;
44
+
45
+ private:
46
+ LexiconPtr lexicon;
47
+
48
+ void ConstructBuffer(std::string* valueBuffer,
49
+ std::vector<uint16_t>* valueBytes,
50
+ uint32_t* valueTotalLength) const;
51
+ };
52
+ } // namespace opencc
@@ -0,0 +1,113 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #include "Export.hpp"
20
+ #include <string>
21
+ #include <vector>
22
+
23
+ #ifndef __OPENCC_SIMPLECONVERTER_HPP_
24
+ #define __OPENCC_SIMPLECONVERTER_HPP_
25
+
26
+ /**
27
+ * @defgroup opencc_simple_api OpenCC C++ Simple API
28
+ *
29
+ * Simple API in C++ language
30
+ */
31
+
32
+ namespace opencc {
33
+
34
+ /**
35
+ * A high level converter
36
+ * This interface does not require C++11 to compile.
37
+ * @ingroup opencc_simple_api
38
+ */
39
+ class OPENCC_EXPORT SimpleConverter {
40
+ public:
41
+ /**
42
+ * Constructor of SimpleConverter
43
+ * @param configFileName File name of configuration.
44
+ */
45
+ explicit SimpleConverter(const std::string& configFileName);
46
+
47
+ /**
48
+ * Constructor of SimpleConverter
49
+ * @param configFileName File name of configuration.
50
+ * @param paths Additional paths to locate configuration and dictionary files.
51
+ */
52
+ SimpleConverter(const std::string& configFileName,
53
+ const std::vector<std::string>& paths);
54
+
55
+ /**
56
+ * Constructor of SimpleConverter
57
+ * @param configFileName File name of configuration.
58
+ * @param paths Additional paths to locate configuration and dictionary files.
59
+ * @param argv0 Path of the executable (argv[0]), in addition to additional
60
+ * paths.
61
+ */
62
+ SimpleConverter(const std::string& configFileName,
63
+ const std::vector<std::string>& paths, const char* argv0);
64
+
65
+ ~SimpleConverter();
66
+
67
+ /**
68
+ * Converts a text
69
+ * @param input Text to be converted.
70
+ */
71
+ std::string Convert(const std::string& input) const;
72
+
73
+ /**
74
+ * Converts a text
75
+ * @param input A C-Style std::string (terminated by '\0') to be converted.
76
+ */
77
+ std::string Convert(const char* input) const;
78
+
79
+ /**
80
+ * Converts a text
81
+ * @param input A C-Style std::string limited by a given length to be
82
+ * converted.
83
+ * @param length Maximal length in byte of the input std::string.
84
+ */
85
+ std::string Convert(const char* input, size_t length) const;
86
+
87
+ /**
88
+ * Converts a text and writes to an allocated buffer
89
+ * Please make sure the buffer has sufficient space.
90
+ * @param input A C-Style std::string (terminated by '\0') to be converted.
91
+ * @param output Buffer to write the converted text.
92
+ * @return Length of converted text.
93
+ */
94
+ size_t Convert(const char* input, char* output) const;
95
+
96
+ /**
97
+ * Converts a text and writes to an allocated buffer
98
+ * Please make sure the buffer has sufficient space.
99
+ * @param input A C-Style std::string limited by a given length to be
100
+ * converted.
101
+ * @param length Maximal length in byte of the input std::string.
102
+ * @param output Buffer to write the converted text.
103
+ * @return Length of converted text.
104
+ */
105
+ size_t Convert(const char* input, size_t length, char* output) const;
106
+
107
+ private:
108
+ const void* internalData;
109
+ };
110
+
111
+ } // namespace opencc
112
+
113
+ #endif
@@ -0,0 +1,60 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Common.hpp"
22
+ #include "SerializableDict.hpp"
23
+
24
+ namespace opencc {
25
+ /**
26
+ * Text dictionary
27
+ * @ingroup opencc_cpp_api
28
+ */
29
+ class OPENCC_EXPORT TextDict : public Dict, public SerializableDict {
30
+ public:
31
+ /**
32
+ * Constructor of TextDict.
33
+ * _lexicon must be sorted.
34
+ */
35
+ TextDict(const LexiconPtr& _lexicon);
36
+
37
+ virtual ~TextDict();
38
+
39
+ virtual size_t KeyMaxLength() const;
40
+
41
+ virtual Optional<const DictEntry*> Match(const char* word, size_t len) const;
42
+
43
+ virtual LexiconPtr GetLexicon() const;
44
+
45
+ virtual void SerializeToFile(FILE* fp) const;
46
+
47
+ /**
48
+ * Constructs a TextDict from another dictionary.
49
+ */
50
+ static TextDictPtr NewFromDict(const Dict& dict);
51
+
52
+ static TextDictPtr NewFromFile(FILE* fp);
53
+
54
+ static TextDictPtr NewFromSortedFile(FILE* fp);
55
+
56
+ private:
57
+ const size_t maxLength;
58
+ const LexiconPtr lexicon;
59
+ };
60
+ } // namespace opencc