OpenCC 1.2.0__cp38-cp38-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. opencc/__init__.py +49 -0
  2. opencc/clib/__init__.py +0 -0
  3. opencc/clib/bin/opencc +0 -0
  4. opencc/clib/bin/opencc_dict +0 -0
  5. opencc/clib/bin/opencc_phrase_extract +0 -0
  6. opencc/clib/include/opencc/BinaryDict.hpp +53 -0
  7. opencc/clib/include/opencc/Common.hpp +82 -0
  8. opencc/clib/include/opencc/Config.hpp +49 -0
  9. opencc/clib/include/opencc/Conversion.hpp +47 -0
  10. opencc/clib/include/opencc/ConversionChain.hpp +43 -0
  11. opencc/clib/include/opencc/Converter.hpp +51 -0
  12. opencc/clib/include/opencc/DartsDict.hpp +60 -0
  13. opencc/clib/include/opencc/Dict.hpp +92 -0
  14. opencc/clib/include/opencc/DictConverter.hpp +32 -0
  15. opencc/clib/include/opencc/DictEntry.hpp +173 -0
  16. opencc/clib/include/opencc/DictGroup.hpp +57 -0
  17. opencc/clib/include/opencc/Exception.hpp +88 -0
  18. opencc/clib/include/opencc/Export.hpp +40 -0
  19. opencc/clib/include/opencc/Lexicon.hpp +70 -0
  20. opencc/clib/include/opencc/MarisaDict.hpp +63 -0
  21. opencc/clib/include/opencc/MaxMatchSegmentation.hpp +43 -0
  22. opencc/clib/include/opencc/Optional.hpp +76 -0
  23. opencc/clib/include/opencc/PhraseExtract.hpp +195 -0
  24. opencc/clib/include/opencc/Segmentation.hpp +32 -0
  25. opencc/clib/include/opencc/Segments.hpp +118 -0
  26. opencc/clib/include/opencc/SerializableDict.hpp +77 -0
  27. opencc/clib/include/opencc/SerializedValues.hpp +52 -0
  28. opencc/clib/include/opencc/SimpleConverter.hpp +113 -0
  29. opencc/clib/include/opencc/TextDict.hpp +60 -0
  30. opencc/clib/include/opencc/UTF8StringSlice.hpp +246 -0
  31. opencc/clib/include/opencc/UTF8Util.hpp +291 -0
  32. opencc/clib/include/opencc/opencc.h +161 -0
  33. opencc/clib/include/opencc/opencc_config.h +21 -0
  34. opencc/clib/lib/cmake/opencc/OpenCCConfig.cmake +31 -0
  35. opencc/clib/lib/cmake/opencc/OpenCCConfigVersion.cmake +65 -0
  36. opencc/clib/lib/cmake/opencc/OpenCCTargets-release.cmake +29 -0
  37. opencc/clib/lib/cmake/opencc/OpenCCTargets.cmake +110 -0
  38. opencc/clib/lib/libmarisa.a +0 -0
  39. opencc/clib/lib/libopencc.a +0 -0
  40. opencc/clib/lib/pkgconfig/opencc.pc +11 -0
  41. opencc/clib/opencc_clib.cpython-38-x86_64-linux-gnu.so +0 -0
  42. opencc/clib/share/opencc/HKVariants.ocd2 +0 -0
  43. opencc/clib/share/opencc/HKVariantsRev.ocd2 +0 -0
  44. opencc/clib/share/opencc/HKVariantsRevPhrases.ocd2 +0 -0
  45. opencc/clib/share/opencc/JPShinjitaiCharacters.ocd2 +0 -0
  46. opencc/clib/share/opencc/JPShinjitaiPhrases.ocd2 +0 -0
  47. opencc/clib/share/opencc/JPVariants.ocd2 +0 -0
  48. opencc/clib/share/opencc/JPVariantsRev.ocd2 +0 -0
  49. opencc/clib/share/opencc/STCharacters.ocd2 +0 -0
  50. opencc/clib/share/opencc/STPhrases.ocd2 +0 -0
  51. opencc/clib/share/opencc/TSCharacters.ocd2 +0 -0
  52. opencc/clib/share/opencc/TSPhrases.ocd2 +0 -0
  53. opencc/clib/share/opencc/TWPhrases.ocd2 +0 -0
  54. opencc/clib/share/opencc/TWPhrasesRev.ocd2 +0 -0
  55. opencc/clib/share/opencc/TWVariants.ocd2 +0 -0
  56. opencc/clib/share/opencc/TWVariantsRev.ocd2 +0 -0
  57. opencc/clib/share/opencc/TWVariantsRevPhrases.ocd2 +0 -0
  58. opencc/clib/share/opencc/hk2s.json +33 -0
  59. opencc/clib/share/opencc/hk2t.json +22 -0
  60. opencc/clib/share/opencc/jp2t.json +25 -0
  61. opencc/clib/share/opencc/s2hk.json +27 -0
  62. opencc/clib/share/opencc/s2t.json +22 -0
  63. opencc/clib/share/opencc/s2tw.json +27 -0
  64. opencc/clib/share/opencc/s2twp.json +32 -0
  65. opencc/clib/share/opencc/t2hk.json +16 -0
  66. opencc/clib/share/opencc/t2jp.json +16 -0
  67. opencc/clib/share/opencc/t2s.json +22 -0
  68. opencc/clib/share/opencc/t2tw.json +16 -0
  69. opencc/clib/share/opencc/tw2s.json +33 -0
  70. opencc/clib/share/opencc/tw2sp.json +36 -0
  71. opencc/clib/share/opencc/tw2t.json +22 -0
  72. opencc/py.typed +0 -0
  73. opencc-1.2.0.dist-info/AUTHORS +12 -0
  74. opencc-1.2.0.dist-info/LICENSE +56 -0
  75. opencc-1.2.0.dist-info/METADATA +347 -0
  76. opencc-1.2.0.dist-info/RECORD +78 -0
  77. opencc-1.2.0.dist-info/WHEEL +5 -0
  78. opencc-1.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,246 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #include <cstring>
20
+
21
+ #include "Common.hpp"
22
+ #include "UTF8Util.hpp"
23
+
24
+ namespace opencc {
25
+
26
+ namespace internal {
27
+
28
+ inline size_t FNVHash(const char* text, const size_t byteLength,
29
+ const size_t FNV_prime, const size_t FNV_offset_basis) {
30
+ size_t hash = FNV_offset_basis;
31
+ for (const char* pstr = text; pstr < text + byteLength; pstr++) {
32
+ hash ^= *pstr;
33
+ hash *= FNV_prime;
34
+ }
35
+ return hash;
36
+ }
37
+
38
+ template <int> size_t FNVHash(const char* text, const size_t byteLength);
39
+
40
+ template <>
41
+ inline size_t FNVHash<4>(const char* text, const size_t byteLength) {
42
+ return FNVHash(text, byteLength, 16777619UL, 2166136261UL);
43
+ }
44
+
45
+ #if SIZE_MAX == 0xffffffffffffffff
46
+ template <>
47
+ inline size_t FNVHash<8>(const char* text, const size_t byteLength) {
48
+ return FNVHash(text, byteLength, 1099511628211UL, 14695981039346656037UL);
49
+ }
50
+ #endif
51
+
52
+ } // namespace internal
53
+
54
+ template <typename LENGTH_TYPE> class UTF8StringSliceBase {
55
+ public:
56
+ typedef LENGTH_TYPE LengthType;
57
+
58
+ UTF8StringSliceBase(const char* _str)
59
+ : str(_str), utf8Length(static_cast<LengthType>(UTF8Util::Length(_str))),
60
+ byteLength(static_cast<LengthType>(strlen(_str))) {}
61
+
62
+ UTF8StringSliceBase(const char* _str, const LengthType _utf8Length)
63
+ : str(_str), utf8Length(_utf8Length) {
64
+ CalculateByteLength();
65
+ }
66
+
67
+ UTF8StringSliceBase(const char* _str, const LengthType _utf8Length,
68
+ const LengthType _byteLength)
69
+ : str(_str), utf8Length(_utf8Length), byteLength(_byteLength) {
70
+ CalculateByteLength();
71
+ }
72
+
73
+ LengthType UTF8Length() const { return utf8Length; }
74
+
75
+ LengthType ByteLength() const { return byteLength; }
76
+
77
+ UTF8StringSliceBase Left(const LengthType numberOfCharacters) const {
78
+ if (numberOfCharacters == UTF8Length()) {
79
+ return *this;
80
+ } else {
81
+ return UTF8StringSliceBase(str, numberOfCharacters);
82
+ }
83
+ }
84
+
85
+ UTF8StringSliceBase Right(const LengthType numberOfCharacters) const {
86
+ if (numberOfCharacters == UTF8Length()) {
87
+ return *this;
88
+ } else {
89
+ const char* pstr = str + byteLength;
90
+ for (size_t i = 0; i < numberOfCharacters; i++) {
91
+ pstr = UTF8Util::PrevChar(pstr);
92
+ }
93
+ return UTF8StringSliceBase(pstr, numberOfCharacters);
94
+ }
95
+ }
96
+
97
+ UTF8StringSliceBase SubString(const LengthType offset,
98
+ const LengthType numberOfCharacters) const {
99
+ if (offset == 0) {
100
+ return Left(numberOfCharacters);
101
+ } else {
102
+ const char* pstr = str;
103
+ for (size_t i = 0; i < offset; i++) {
104
+ pstr = UTF8Util::NextChar(pstr);
105
+ }
106
+ return UTF8StringSliceBase(pstr, numberOfCharacters);
107
+ }
108
+ }
109
+
110
+ std::string ToString() const { return std::string(str, str + byteLength); }
111
+
112
+ const char* CString() const { return str; }
113
+
114
+ LengthType CommonPrefixLength(const UTF8StringSliceBase& that) const {
115
+ if (str == that.str) {
116
+ return (std::min)(utf8Length, that.utf8Length);
117
+ } else {
118
+ const char* pstr1 = str;
119
+ const char* pstr2 = that.str;
120
+ for (size_t length = 0; length < utf8Length && length < that.utf8Length;
121
+ length++) {
122
+ size_t charLen1 = UTF8Util::NextCharLength(pstr1);
123
+ size_t charLen2 = UTF8Util::NextCharLength(pstr2);
124
+ if (charLen1 != charLen2 || strncmp(pstr1, pstr2, charLen1) != 0) {
125
+ return length;
126
+ }
127
+ pstr1 += charLen1;
128
+ pstr2 += charLen2;
129
+ }
130
+ return 0;
131
+ }
132
+ }
133
+
134
+ void MoveRight() {
135
+ if (utf8Length > 0) {
136
+ const size_t charLen = UTF8Util::NextCharLength(str);
137
+ str += charLen;
138
+ utf8Length--;
139
+ byteLength -= charLen;
140
+ }
141
+ }
142
+
143
+ void MoveLeft() {
144
+ if (utf8Length > 0) {
145
+ const size_t charLen = UTF8Util::PrevCharLength(str + byteLength);
146
+ utf8Length--;
147
+ byteLength -= charLen;
148
+ }
149
+ }
150
+
151
+ int ReverseCompare(const UTF8StringSliceBase& that) const {
152
+ const char* pstr1 = str + byteLength;
153
+ const char* pstr2 = that.str + that.byteLength;
154
+ const size_t length = (std::min)(utf8Length, that.utf8Length);
155
+ for (size_t i = 0; i < length; i++) {
156
+ const size_t charLen1 = UTF8Util::PrevCharLength(pstr1);
157
+ const size_t charLen2 = UTF8Util::PrevCharLength(pstr2);
158
+ pstr1 -= charLen1;
159
+ pstr2 -= charLen2;
160
+ const int cmp = strncmp(pstr1, pstr2, (std::min)(charLen1, charLen2));
161
+ if (cmp < 0) {
162
+ return -1;
163
+ } else if (cmp > 0) {
164
+ return 1;
165
+ } else if (charLen1 < charLen2) {
166
+ return -1;
167
+ } else if (charLen1 > charLen2) {
168
+ return 1;
169
+ }
170
+ }
171
+ if (utf8Length < that.utf8Length) {
172
+ return -1;
173
+ } else if (utf8Length > that.utf8Length) {
174
+ return 1;
175
+ } else {
176
+ return 0;
177
+ }
178
+ }
179
+
180
+ LengthType FindBytePosition(const UTF8StringSliceBase& pattern) const {
181
+ return static_cast<LengthType>(
182
+ ToString().find(pattern.str, 0, pattern.byteLength));
183
+ }
184
+
185
+ bool operator<(const UTF8StringSliceBase& that) const {
186
+ return Compare(that) < 0;
187
+ }
188
+
189
+ bool operator>(const UTF8StringSliceBase& that) const {
190
+ return Compare(that) > 0;
191
+ }
192
+
193
+ bool operator==(const UTF8StringSliceBase& that) const {
194
+ return (str == that.str && utf8Length == that.utf8Length) ||
195
+ Compare(that) == 0;
196
+ }
197
+
198
+ bool operator!=(const UTF8StringSliceBase& that) const {
199
+ return !this->operator==(that);
200
+ }
201
+
202
+ class Hasher {
203
+ public:
204
+ size_t operator()(const UTF8StringSliceBase& text) const {
205
+ return internal::FNVHash<sizeof(size_t)>(text.CString(),
206
+ text.ByteLength());
207
+ }
208
+ };
209
+
210
+ private:
211
+ inline int Compare(const UTF8StringSliceBase& that) const {
212
+ int cmp = strncmp(str, that.str, (std::min)(byteLength, that.byteLength));
213
+ if (cmp == 0) {
214
+ if (utf8Length < that.utf8Length) {
215
+ cmp = -1;
216
+ } else if (utf8Length > that.utf8Length) {
217
+ cmp = 1;
218
+ } else {
219
+ cmp = 0;
220
+ }
221
+ }
222
+ return cmp;
223
+ }
224
+
225
+ void CalculateByteLength() {
226
+ const char* pstr = str;
227
+ for (size_t i = 0; i < utf8Length; i++) {
228
+ pstr = UTF8Util::NextChar(pstr);
229
+ }
230
+ byteLength = static_cast<LengthType>(pstr - str);
231
+ }
232
+
233
+ const char* str;
234
+ LengthType utf8Length;
235
+ LengthType byteLength;
236
+ };
237
+
238
+ typedef UTF8StringSliceBase<size_t> UTF8StringSlice;
239
+
240
+ template <typename LENGTH_TYPE>
241
+ std::ostream& operator<<(::std::ostream& os,
242
+ const UTF8StringSliceBase<LENGTH_TYPE>& str) {
243
+ return os << str.ToString();
244
+ }
245
+
246
+ } // namespace opencc
@@ -0,0 +1,291 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2013 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #ifdef _MSC_VER
22
+ #ifndef NOMINMAX
23
+ #define NOMINMAX
24
+ #endif
25
+ #include <Windows.h>
26
+ #endif // _MSC_VER
27
+
28
+ #include <cstring>
29
+
30
+ #include "Common.hpp"
31
+ #include "Exception.hpp"
32
+
33
+ namespace opencc {
34
+ /**
35
+ * UTF8 std::string utilities
36
+ * @ingroup opencc_cpp_api
37
+ */
38
+ class OPENCC_EXPORT UTF8Util {
39
+ public:
40
+ /**
41
+ * Detect UTF8 BOM and skip it.
42
+ */
43
+ static void SkipUtf8Bom(FILE* fp);
44
+
45
+ /**
46
+ * Returns the length in byte for the next UTF8 character.
47
+ * On error returns 0.
48
+ */
49
+ static size_t NextCharLengthNoException(const char* str) {
50
+ char ch = *str;
51
+ if ((ch & 0xF0) == 0xE0) {
52
+ return 3;
53
+ } else if ((ch & 0x80) == 0x00) {
54
+ return 1;
55
+ } else if ((ch & 0xE0) == 0xC0) {
56
+ return 2;
57
+ } else if ((ch & 0xF8) == 0xF0) {
58
+ return 4;
59
+ } else if ((ch & 0xFC) == 0xF8) {
60
+ return 5;
61
+ } else if ((ch & 0xFE) == 0xFC) {
62
+ return 6;
63
+ }
64
+ return 0;
65
+ }
66
+
67
+ /**
68
+ * Returns the length in byte for the next UTF8 character.
69
+ */
70
+ static size_t NextCharLength(const char* str) {
71
+ size_t length = NextCharLengthNoException(str);
72
+ if (length == 0) {
73
+ throw InvalidUTF8(str);
74
+ }
75
+ return length;
76
+ }
77
+
78
+ /**
79
+ * Returns the length in byte for the previous UTF8 character.
80
+ */
81
+ static size_t PrevCharLength(const char* str) {
82
+ {
83
+ const size_t length = NextCharLengthNoException(str - 3);
84
+ if (length == 3) {
85
+ return length;
86
+ }
87
+ }
88
+ {
89
+ const size_t length = NextCharLengthNoException(str - 1);
90
+ if (length == 1) {
91
+ return length;
92
+ }
93
+ }
94
+ {
95
+ const size_t length = NextCharLengthNoException(str - 2);
96
+ if (length == 2) {
97
+ return length;
98
+ }
99
+ }
100
+ for (size_t i = 4; i <= 6; i++) {
101
+ const size_t length = NextCharLengthNoException(str - i);
102
+ if (length == i) {
103
+ return length;
104
+ }
105
+ }
106
+ throw InvalidUTF8(str);
107
+ }
108
+
109
+ /**
110
+ * Returns the char* pointer over the next UTF8 character.
111
+ */
112
+ static const char* NextChar(const char* str) {
113
+ return str + NextCharLength(str);
114
+ }
115
+
116
+ /**
117
+ * Move the char* pointer before the previous UTF8 character.
118
+ */
119
+ static const char* PrevChar(const char* str) {
120
+ return str - PrevCharLength(str);
121
+ }
122
+
123
+ /**
124
+ * Returns the UTF8 length of a valid UTF8 std::string.
125
+ */
126
+ static size_t Length(const char* str) {
127
+ size_t length = 0;
128
+ while (*str != '\0') {
129
+ str = NextChar(str);
130
+ length++;
131
+ }
132
+ return length;
133
+ }
134
+
135
+ /**
136
+ * Finds a character in the same line.
137
+ * @param str The text to be searched in.
138
+ * @param ch The character to find.
139
+ * @return The pointer that points to the found chacter in str or EOL/EOF.
140
+ */
141
+ static const char* FindNextInline(const char* str, const char ch) {
142
+ while (!IsLineEndingOrFileEnding(*str) && *str != ch) {
143
+ str = NextChar(str);
144
+ }
145
+ return str;
146
+ }
147
+
148
+ /**
149
+ * Returns true if the character is a line ending or end of file.
150
+ */
151
+ static bool IsLineEndingOrFileEnding(const char ch) {
152
+ return ch == '\0' || ch == '\n' || ch == '\r';
153
+ }
154
+
155
+ /**
156
+ * Copies a substring with given length to a new string.
157
+ */
158
+ static std::string FromSubstr(const char* str, size_t length) {
159
+ std::string newStr;
160
+ newStr.resize(length);
161
+ strncpy(const_cast<char*>(newStr.c_str()), str, length);
162
+ return newStr;
163
+ }
164
+
165
+ /**
166
+ * Returns true if the given std::string is longer or as long as the given
167
+ * length.
168
+ */
169
+ static bool NotShorterThan(const char* str, size_t byteLength) {
170
+ while (byteLength > 0) {
171
+ if (*str == '\0') {
172
+ return false;
173
+ }
174
+ byteLength--;
175
+ str++;
176
+ }
177
+ return true;
178
+ }
179
+
180
+ /**
181
+ * Truncates a std::string with a maximal length in byte.
182
+ * No UTF8 character will be broken.
183
+ */
184
+ static std::string TruncateUTF8(const char* str, size_t maxByteLength) {
185
+ std::string wordTrunc;
186
+ if (NotShorterThan(str, maxByteLength)) {
187
+ size_t len = 0;
188
+ const char* pStr = str;
189
+ for (;;) {
190
+ const size_t charLength = NextCharLength(pStr);
191
+ if (len + charLength > maxByteLength) {
192
+ break;
193
+ }
194
+ pStr += charLength;
195
+ len += charLength;
196
+ }
197
+ wordTrunc = FromSubstr(str, len);
198
+ } else {
199
+ wordTrunc = str;
200
+ }
201
+ return wordTrunc;
202
+ }
203
+
204
+ /**
205
+ * Replaces all patterns in a std::string in place.
206
+ */
207
+ static void ReplaceAll(std::string& str, const char* from, const char* to) {
208
+ std::string::size_type pos = 0;
209
+ std::string::size_type fromLen = strlen(from);
210
+ std::string::size_type toLen = strlen(to);
211
+ while ((pos = str.find(from, pos)) != std::string::npos) {
212
+ str.replace(pos, fromLen, to);
213
+ pos += toLen;
214
+ }
215
+ }
216
+
217
+ /**
218
+ * Joins a std::string vector in to a std::string with a separator.
219
+ */
220
+ static std::string Join(const std::vector<std::string>& strings,
221
+ const std::string& separator) {
222
+ std::ostringstream buffer;
223
+ bool first = true;
224
+ for (const auto& str : strings) {
225
+ if (!first) {
226
+ buffer << separator;
227
+ }
228
+ buffer << str;
229
+ first = false;
230
+ }
231
+ return buffer.str();
232
+ }
233
+
234
+ /**
235
+ * Joins a std::string vector in to a std::string.
236
+ */
237
+ static std::string Join(const std::vector<std::string>& strings) {
238
+ std::ostringstream buffer;
239
+ for (const auto& str : strings) {
240
+ buffer << str;
241
+ }
242
+ return buffer.str();
243
+ }
244
+
245
+ static void GetByteMap(const char* str, const size_t utf8Length,
246
+ std::vector<size_t>* byteMap) {
247
+ if (byteMap->size() < utf8Length) {
248
+ byteMap->resize(utf8Length);
249
+ }
250
+ const char* pstr = str;
251
+ for (size_t i = 0; i < utf8Length; i++) {
252
+ (*byteMap)[i] = pstr - str;
253
+ pstr = NextChar(pstr);
254
+ }
255
+ }
256
+
257
+ #ifdef _MSC_VER
258
+ static std::wstring GetPlatformString(const std::string& str) {
259
+ return U8ToU16(str);
260
+ }
261
+ #else
262
+ static std::string GetPlatformString(const std::string& str) { return str; }
263
+ #endif // _MSC_VER
264
+
265
+ #ifdef _MSC_VER
266
+ static std::string U16ToU8(const std::wstring& wstr) {
267
+ std::string ret;
268
+ int length = static_cast<int>(wstr.length());
269
+ int convcnt = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, NULL, 0,
270
+ NULL, NULL);
271
+ if (convcnt > 0) {
272
+ ret.resize(convcnt);
273
+ WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, &ret[0], convcnt,
274
+ NULL, NULL);
275
+ }
276
+ return ret;
277
+ }
278
+
279
+ static std::wstring U8ToU16(const std::string& str) {
280
+ std::wstring ret;
281
+ int length = static_cast<int>(str.length());
282
+ int convcnt = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), length, NULL, 0);
283
+ if (convcnt > 0) {
284
+ ret.resize(convcnt);
285
+ MultiByteToWideChar(CP_UTF8, 0, str.c_str(), length, &ret[0], convcnt);
286
+ }
287
+ return ret;
288
+ }
289
+ #endif // _MSC_VER
290
+ };
291
+ } // namespace opencc
@@ -0,0 +1,161 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #ifndef __OPENCC_H_
20
+ #define __OPENCC_H_
21
+
22
+ #ifdef __cplusplus
23
+
24
+ #include "Export.hpp"
25
+ #include "SimpleConverter.hpp"
26
+ #include <string>
27
+
28
+ extern "C" {
29
+ #else
30
+ #include <stddef.h>
31
+ #endif
32
+
33
+ #ifndef OPENCC_EXPORT
34
+ #define OPENCC_EXPORT
35
+ #endif
36
+
37
+ /**
38
+ * @defgroup opencc_c_api OpenCC C API
39
+ *
40
+ * API in C language
41
+ */
42
+
43
+ /**
44
+ * Filename of default Simplified to Traditional configuration
45
+ *
46
+ * @ingroup opencc_c_api
47
+ */
48
+ #define OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD "s2t.json"
49
+
50
+ /**
51
+ * Filename of default Traditional to Simplified configuration
52
+ *
53
+ * @ingroup opencc_c_api
54
+ */
55
+ #define OPENCC_DEFAULT_CONFIG_TRAD_TO_SIMP "t2s.json"
56
+
57
+ /**
58
+ * Type of opencc descriptor
59
+ *
60
+ * @ingroup opencc_c_api
61
+ */
62
+ typedef void* opencc_t;
63
+
64
+ /**
65
+ * Makes an instance of opencc
66
+ *
67
+ * @param configFileName Location of configuration file. If this is set to NULL,
68
+ * OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD will be loaded.
69
+ * @return A description pointer of the newly allocated instance of
70
+ * opencc. On error the return value will be (opencc_t) -1.
71
+ * @ingroup opencc_c_api
72
+ */
73
+ OPENCC_EXPORT opencc_t opencc_open(const char* configFileName);
74
+ #ifdef _MSC_VER
75
+ /**
76
+ * Makes an instance of opencc (wide char / Unicode)
77
+ *
78
+ * @param configFileName Location of configuration file. If this is set to NULL,
79
+ * OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD will be loaded.
80
+ * @return A description pointer of the newly allocated instance of
81
+ * opencc. On error the return value will be (opencc_t) -1.
82
+ * @ingroup opencc_c_api
83
+ */
84
+ OPENCC_EXPORT opencc_t opencc_open_w(const wchar_t* configFileName);
85
+ #endif /* _MSC_VER */
86
+
87
+ /**
88
+ * Destroys an instance of opencc
89
+ *
90
+ * @param opencc The description pointer.
91
+ * @return 0 on success or non-zero number on failure.
92
+ * @ingroup opencc_c_api
93
+ */
94
+ OPENCC_EXPORT int opencc_close(opencc_t opencc);
95
+
96
+ /**
97
+ * Converts UTF-8 std::string
98
+ *
99
+ * @param opencc The opencc description pointer.
100
+ * @param input The UTF-8 encoded std::string.
101
+ * @param length The maximum length in byte to convert. If length is (size_t)-1,
102
+ * the whole std::string (terminated by '\0') will be converted.
103
+ * @param output The buffer to store converted text. You MUST make sure this
104
+ * buffer has sufficient space.
105
+ *
106
+ * @return The length of converted std::string or (size_t)-1 on error.
107
+ *
108
+ * @ingroup opencc_c_api
109
+ */
110
+ OPENCC_EXPORT size_t opencc_convert_utf8_to_buffer(opencc_t opencc,
111
+ const char* input,
112
+ size_t length, char* output);
113
+
114
+ /**
115
+ * Converts UTF-8 std::string
116
+ * This function returns an allocated C-Style std::string, which stores
117
+ * the converted std::string.
118
+ * You MUST call opencc_convert_utf8_free() to release allocated memory.
119
+ *
120
+ * @param opencc The opencc description pointer.
121
+ * @param input The UTF-8 encoded std::string.
122
+ * @param length The maximum length in byte to convert. If length is (size_t)-1,
123
+ * the whole std::string (terminated by '\0') will be converted.
124
+ *
125
+ * @return The newly allocated UTF-8 std::string that stores text
126
+ * converted, or NULL on error.
127
+ * @ingroup opencc_c_api
128
+ */
129
+ OPENCC_EXPORT char* opencc_convert_utf8(opencc_t opencc, const char* input,
130
+ size_t length);
131
+
132
+ /**
133
+ * Releases allocated buffer by opencc_convert_utf8
134
+ *
135
+ * @param str Pointer to the allocated std::string buffer by
136
+ * opencc_convert_utf8.
137
+ *
138
+ * @ingroup opencc_c_api
139
+ */
140
+ OPENCC_EXPORT void opencc_convert_utf8_free(char* str);
141
+
142
+ /**
143
+ * Returns the last error message
144
+ *
145
+ * Note that this function is the only one which is NOT thread-safe.
146
+ *
147
+ * @ingroup opencc_c_api
148
+ */
149
+ OPENCC_EXPORT const char* opencc_error(void);
150
+
151
+ #ifdef __cplusplus
152
+ } // extern "C"
153
+ #endif
154
+
155
+ /**
156
+ * @defgroup opencc_cpp_api OpenCC C++ Comprehensive API
157
+ *
158
+ * Comprehensive API in C++ language
159
+ */
160
+
161
+ #endif
@@ -0,0 +1,21 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2021 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #define OPENCC_ENABLE_DARTS