nodejieba-plus 3.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/FUNDING.yml +12 -0
- package/.github/workflows/github_release.yml +61 -0
- package/.github/workflows/npm_publish.yml +24 -0
- package/.github/workflows/stale-issues.yml +24 -0
- package/.github/workflows/test.yml +42 -0
- package/.gitmodules +3 -0
- package/.npmignore +15 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +78 -0
- package/LICENSE +21 -0
- package/README.md +349 -0
- package/binding.gyp +63 -0
- package/index.js +77 -0
- package/lib/index.cpp +3 -0
- package/lib/nodejieba.cpp +218 -0
- package/lib/nodejieba.h +28 -0
- package/lib/utils.h +47 -0
- package/package.json +48 -0
- package/submodules/cppjieba/.github/workflows/cmake.yml +51 -0
- package/submodules/cppjieba/.github/workflows/stale-issues.yml +24 -0
- package/submodules/cppjieba/.gitmodules +3 -0
- package/submodules/cppjieba/CHANGELOG.md +305 -0
- package/submodules/cppjieba/CMakeLists.txt +42 -0
- package/submodules/cppjieba/LICENSE +20 -0
- package/submodules/cppjieba/README.md +280 -0
- package/submodules/cppjieba/deps/limonp/.github/workflows/cmake.yml +43 -0
- package/submodules/cppjieba/deps/limonp/.gitmodules +0 -0
- package/submodules/cppjieba/deps/limonp/CHANGELOG.md +160 -0
- package/submodules/cppjieba/deps/limonp/CMakeLists.txt +61 -0
- package/submodules/cppjieba/deps/limonp/LICENSE +20 -0
- package/submodules/cppjieba/deps/limonp/README.md +38 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/ArgvContext.hpp +70 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/Closure.hpp +206 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/Colors.hpp +31 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/Condition.hpp +38 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/Config.hpp +103 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/ForcePublic.hpp +7 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/LocalVector.hpp +139 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/Logging.hpp +90 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/NonCopyable.hpp +21 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/StdExtension.hpp +157 -0
- package/submodules/cppjieba/deps/limonp/include/limonp/StringUtil.hpp +386 -0
- package/submodules/cppjieba/deps/limonp/test/CMakeLists.txt +8 -0
- package/submodules/cppjieba/deps/limonp/test/demo.cpp +40 -0
- package/submodules/cppjieba/deps/limonp/test/testdata/1.conf +5 -0
- package/submodules/cppjieba/deps/limonp/test/testdata/StdExtension.data +3 -0
- package/submodules/cppjieba/deps/limonp/test/testdata/dict.gbk +50 -0
- package/submodules/cppjieba/deps/limonp/test/testdata/dict.utf8 +50 -0
- package/submodules/cppjieba/deps/limonp/test/testdata/io_testfile +2 -0
- package/submodules/cppjieba/deps/limonp/test/testdata/jieba.dict.0.1.utf8 +93 -0
- package/submodules/cppjieba/deps/limonp/test/testdata/jieba.dict.0.utf8 +93 -0
- package/submodules/cppjieba/deps/limonp/test/testdata/jieba.dict.1.utf8 +67 -0
- package/submodules/cppjieba/deps/limonp/test/testdata/jieba.dict.2.utf8 +64 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/CMakeLists.txt +30 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/TArgvContext.cpp +16 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/TCastFloat.cpp +19 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/TClosure.cpp +85 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/TColorPrint.cpp +20 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/TConfig.cpp +17 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/TLocalVector.cpp +41 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/TLogging.cpp +12 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/TStdExtension.cpp +95 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/TStringUtil.cpp +183 -0
- package/submodules/cppjieba/deps/limonp/test/unittest/gtest_main.cpp +39 -0
- package/submodules/cppjieba/dict/README.md +31 -0
- package/submodules/cppjieba/dict/hmm_model.utf8 +34 -0
- package/submodules/cppjieba/dict/idf.utf8 +258826 -0
- package/submodules/cppjieba/dict/jieba.dict.utf8 +348982 -0
- package/submodules/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
- package/submodules/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
- package/submodules/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
- package/submodules/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
- package/submodules/cppjieba/dict/stop_words.utf8 +1534 -0
- package/submodules/cppjieba/dict/user.dict.utf8 +4 -0
- package/submodules/cppjieba/include/cppjieba/DictTrie.hpp +381 -0
- package/submodules/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
- package/submodules/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
- package/submodules/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
- package/submodules/cppjieba/include/cppjieba/Jieba.hpp +169 -0
- package/submodules/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
- package/submodules/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
- package/submodules/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
- package/submodules/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
- package/submodules/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
- package/submodules/cppjieba/include/cppjieba/QuerySegment.hpp +89 -0
- package/submodules/cppjieba/include/cppjieba/SegmentBase.hpp +48 -0
- package/submodules/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
- package/submodules/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
- package/submodules/cppjieba/include/cppjieba/Trie.hpp +200 -0
- package/submodules/cppjieba/include/cppjieba/Unicode.hpp +231 -0
- package/submodules/cppjieba/test/CMakeLists.txt +4 -0
- package/submodules/cppjieba/test/load_test.cpp +54 -0
- package/submodules/cppjieba/test/testdata/curl.res +1 -0
- package/submodules/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
- package/submodules/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
- package/submodules/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
- package/submodules/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
- package/submodules/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
- package/submodules/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
- package/submodules/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
- package/submodules/cppjieba/test/testdata/load_test.urls +2 -0
- package/submodules/cppjieba/test/testdata/review.100 +100 -0
- package/submodules/cppjieba/test/testdata/review.100.res +200 -0
- package/submodules/cppjieba/test/testdata/server.conf +19 -0
- package/submodules/cppjieba/test/testdata/testlines.gbk +9 -0
- package/submodules/cppjieba/test/testdata/testlines.utf8 +8 -0
- package/submodules/cppjieba/test/testdata/userdict.2.utf8 +1 -0
- package/submodules/cppjieba/test/testdata/userdict.english +2 -0
- package/submodules/cppjieba/test/testdata/userdict.utf8 +8 -0
- package/submodules/cppjieba/test/testdata/weicheng.utf8 +247 -0
- package/submodules/cppjieba/test/unittest/CMakeLists.txt +33 -0
- package/submodules/cppjieba/test/unittest/gtest_main.cpp +39 -0
- package/submodules/cppjieba/test/unittest/jieba_test.cpp +166 -0
- package/submodules/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
- package/submodules/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
- package/submodules/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
- package/submodules/cppjieba/test/unittest/segments_test.cpp +256 -0
- package/submodules/cppjieba/test/unittest/textrank_test.cpp +86 -0
- package/submodules/cppjieba/test/unittest/trie_test.cpp +177 -0
- package/submodules/cppjieba/test/unittest/unicode_test.cpp +43 -0
- package/test/debug_split +0 -0
- package/test/debug_split2 +0 -0
- package/test/debug_split3 +0 -0
- package/test/load_dict_test.js +14 -0
- package/test/missing_binding_test.js +42 -0
- package/test/test.js +366 -0
- package/test/testdata/userdict.utf8 +1 -0
- package/tsconfig.json +59 -0
- package/types/index.d.ts +30 -0
- package/typescript_demo.ts +38 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
/************************************
|
|
2
|
+
* file enc : ascii
|
|
3
|
+
* author : wuyanyi09@gmail.com
|
|
4
|
+
************************************/
|
|
5
|
+
#ifndef LIMONP_STR_FUNCTS_H
|
|
6
|
+
#define LIMONP_STR_FUNCTS_H
|
|
7
|
+
#include <fstream>
|
|
8
|
+
#include <iostream>
|
|
9
|
+
#include <string>
|
|
10
|
+
#include <vector>
|
|
11
|
+
#include <algorithm>
|
|
12
|
+
#include <cctype>
|
|
13
|
+
#include <map>
|
|
14
|
+
#include <cassert>
|
|
15
|
+
#include <ctime>
|
|
16
|
+
#include <stdint.h>
|
|
17
|
+
#include <stdio.h>
|
|
18
|
+
#include <stdarg.h>
|
|
19
|
+
#include <memory.h>
|
|
20
|
+
#include <functional>
|
|
21
|
+
#include <locale>
|
|
22
|
+
#include <sstream>
|
|
23
|
+
#include <sys/types.h>
|
|
24
|
+
#include <iterator>
|
|
25
|
+
#include <algorithm>
|
|
26
|
+
#include "StdExtension.hpp"
|
|
27
|
+
|
|
28
|
+
namespace limonp {
|
|
29
|
+
using namespace std;
|
|
30
|
+
inline string StringFormat(const char* fmt, ...) {
|
|
31
|
+
int size = 256;
|
|
32
|
+
std::string str;
|
|
33
|
+
va_list ap;
|
|
34
|
+
while (1) {
|
|
35
|
+
str.resize(size);
|
|
36
|
+
va_start(ap, fmt);
|
|
37
|
+
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
|
|
38
|
+
va_end(ap);
|
|
39
|
+
if (n > -1 && n < size) {
|
|
40
|
+
str.resize(n);
|
|
41
|
+
return str;
|
|
42
|
+
}
|
|
43
|
+
if (n > -1)
|
|
44
|
+
size = n + 1;
|
|
45
|
+
else
|
|
46
|
+
size *= 2;
|
|
47
|
+
}
|
|
48
|
+
return str;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
template<class T>
|
|
52
|
+
void Join(T begin, T end, string& res, const string& connector) {
|
|
53
|
+
if(begin == end) {
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
stringstream ss;
|
|
57
|
+
ss<<*begin;
|
|
58
|
+
begin++;
|
|
59
|
+
while(begin != end) {
|
|
60
|
+
ss << connector << *begin;
|
|
61
|
+
begin ++;
|
|
62
|
+
}
|
|
63
|
+
res = ss.str();
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
template<class T>
|
|
67
|
+
string Join(T begin, T end, const string& connector) {
|
|
68
|
+
string res;
|
|
69
|
+
Join(begin ,end, res, connector);
|
|
70
|
+
return res;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
inline string& Upper(string& str) {
|
|
74
|
+
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
|
|
75
|
+
return str;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
inline string& Lower(string& str) {
|
|
79
|
+
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
|
|
80
|
+
return str;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
inline bool IsSpace(unsigned c) {
|
|
84
|
+
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
|
|
85
|
+
return c > 0xff ? false : std::isspace(c & 0xff) != 0;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
inline std::string& LTrim(std::string &s) {
|
|
89
|
+
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
|
|
90
|
+
return !std::isspace(ch);
|
|
91
|
+
}));
|
|
92
|
+
return s;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
inline std::string& RTrim(std::string &s) {
|
|
96
|
+
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) {
|
|
97
|
+
return !std::isspace(ch);
|
|
98
|
+
}).base(), s.end());
|
|
99
|
+
return s;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
inline std::string& Trim(std::string &s) {
|
|
103
|
+
return LTrim(RTrim(s));
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
inline std::string& LTrim(std::string& s, char x) {
|
|
107
|
+
s.erase(s.begin(), std::find_if(s.begin(), s.end(),
|
|
108
|
+
[x](unsigned char c) { return !std::isspace(c) && c != x; }));
|
|
109
|
+
return s;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
inline std::string& RTrim(std::string& s, char x) {
|
|
113
|
+
s.erase(std::find_if(s.rbegin(), s.rend(),
|
|
114
|
+
[x](unsigned char c) { return !std::isspace(c) && c != x; }).base(), s.end());
|
|
115
|
+
return s;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
inline std::string& Trim(std::string &s, char x) {
|
|
119
|
+
return LTrim(RTrim(s, x), x);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
|
|
123
|
+
res.clear();
|
|
124
|
+
size_t Start = 0;
|
|
125
|
+
size_t end = 0;
|
|
126
|
+
string sub;
|
|
127
|
+
while(Start < src.size()) {
|
|
128
|
+
end = src.find_first_of(pattern, Start);
|
|
129
|
+
if(string::npos == end || res.size() >= maxsplit) {
|
|
130
|
+
sub = src.substr(Start);
|
|
131
|
+
res.push_back(sub);
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
sub = src.substr(Start, end - Start);
|
|
135
|
+
res.push_back(sub);
|
|
136
|
+
Start = end + 1;
|
|
137
|
+
}
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
|
|
142
|
+
vector<string> res;
|
|
143
|
+
Split(src, res, pattern, maxsplit);
|
|
144
|
+
return res;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
inline bool StartsWith(const string& str, const string& prefix) {
|
|
148
|
+
if(prefix.length() > str.length()) {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
return 0 == str.compare(0, prefix.length(), prefix);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
inline bool EndsWith(const string& str, const string& suffix) {
|
|
155
|
+
if(suffix.length() > str.length()) {
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
inline bool IsInStr(const string& str, char ch) {
|
|
162
|
+
return str.find(ch) != string::npos;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
inline uint16_t TwocharToUint16(char high, char low) {
|
|
166
|
+
return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
template <class Uint16Container>
|
|
170
|
+
bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
|
|
171
|
+
if(!str) {
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
char ch1, ch2;
|
|
175
|
+
uint16_t tmp;
|
|
176
|
+
vec.clear();
|
|
177
|
+
for(size_t i = 0; i < len;) {
|
|
178
|
+
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
|
179
|
+
vec.push_back(str[i]);
|
|
180
|
+
i++;
|
|
181
|
+
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
|
|
182
|
+
ch1 = (str[i] >> 2) & 0x07;
|
|
183
|
+
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
|
184
|
+
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
|
185
|
+
vec.push_back(tmp);
|
|
186
|
+
i += 2;
|
|
187
|
+
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
|
|
188
|
+
ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
|
|
189
|
+
ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
|
190
|
+
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
|
191
|
+
vec.push_back(tmp);
|
|
192
|
+
i += 3;
|
|
193
|
+
} else {
|
|
194
|
+
return false;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return true;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
template <class Uint16Container>
|
|
201
|
+
bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
|
|
202
|
+
return Utf8ToUnicode(str.c_str(), str.size(), vec);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
template <class Uint32Container>
|
|
206
|
+
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
|
|
207
|
+
uint32_t tmp;
|
|
208
|
+
vec.clear();
|
|
209
|
+
for(size_t i = 0; i < str.size();) {
|
|
210
|
+
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
|
211
|
+
// 7bit, total 7bit
|
|
212
|
+
tmp = (uint8_t)(str[i]) & 0x7f;
|
|
213
|
+
i++;
|
|
214
|
+
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
|
|
215
|
+
// 5bit, total 5bit
|
|
216
|
+
tmp = (uint8_t)(str[i]) & 0x1f;
|
|
217
|
+
|
|
218
|
+
// 6bit, total 11bit
|
|
219
|
+
tmp <<= 6;
|
|
220
|
+
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
|
221
|
+
i += 2;
|
|
222
|
+
} else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
|
|
223
|
+
// 4bit, total 4bit
|
|
224
|
+
tmp = (uint8_t)(str[i]) & 0x0f;
|
|
225
|
+
|
|
226
|
+
// 6bit, total 10bit
|
|
227
|
+
tmp <<= 6;
|
|
228
|
+
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
|
229
|
+
|
|
230
|
+
// 6bit, total 16bit
|
|
231
|
+
tmp <<= 6;
|
|
232
|
+
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
|
233
|
+
|
|
234
|
+
i += 3;
|
|
235
|
+
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
|
|
236
|
+
// 3bit, total 3bit
|
|
237
|
+
tmp = (uint8_t)(str[i]) & 0x07;
|
|
238
|
+
|
|
239
|
+
// 6bit, total 9bit
|
|
240
|
+
tmp <<= 6;
|
|
241
|
+
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
|
242
|
+
|
|
243
|
+
// 6bit, total 15bit
|
|
244
|
+
tmp <<= 6;
|
|
245
|
+
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
|
246
|
+
|
|
247
|
+
// 6bit, total 21bit
|
|
248
|
+
tmp <<= 6;
|
|
249
|
+
tmp |= (uint8_t)(str[i+3]) & 0x3f;
|
|
250
|
+
|
|
251
|
+
i += 4;
|
|
252
|
+
} else {
|
|
253
|
+
return false;
|
|
254
|
+
}
|
|
255
|
+
vec.push_back(tmp);
|
|
256
|
+
}
|
|
257
|
+
return true;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
template <class Uint32ContainerConIter>
|
|
261
|
+
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
|
|
262
|
+
res.clear();
|
|
263
|
+
uint32_t ui;
|
|
264
|
+
while(begin != end) {
|
|
265
|
+
ui = *begin;
|
|
266
|
+
if(ui <= 0x7f) {
|
|
267
|
+
res += char(ui);
|
|
268
|
+
} else if(ui <= 0x7ff) {
|
|
269
|
+
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
|
270
|
+
res += char((ui & 0x3f) | 0x80);
|
|
271
|
+
} else if(ui <= 0xffff) {
|
|
272
|
+
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
|
273
|
+
res += char(((ui >> 6) & 0x3f) | 0x80);
|
|
274
|
+
res += char((ui & 0x3f) | 0x80);
|
|
275
|
+
} else {
|
|
276
|
+
res += char(((ui >> 18) & 0x03) | 0xf0);
|
|
277
|
+
res += char(((ui >> 12) & 0x3f) | 0x80);
|
|
278
|
+
res += char(((ui >> 6) & 0x3f) | 0x80);
|
|
279
|
+
res += char((ui & 0x3f) | 0x80);
|
|
280
|
+
}
|
|
281
|
+
begin ++;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
template <class Uint16ContainerConIter>
|
|
286
|
+
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
|
287
|
+
res.clear();
|
|
288
|
+
uint16_t ui;
|
|
289
|
+
while(begin != end) {
|
|
290
|
+
ui = *begin;
|
|
291
|
+
if(ui <= 0x7f) {
|
|
292
|
+
res += char(ui);
|
|
293
|
+
} else if(ui <= 0x7ff) {
|
|
294
|
+
res += char(((ui>>6) & 0x1f) | 0xc0);
|
|
295
|
+
res += char((ui & 0x3f) | 0x80);
|
|
296
|
+
} else {
|
|
297
|
+
res += char(((ui >> 12) & 0x0f )| 0xe0);
|
|
298
|
+
res += char(((ui>>6) & 0x3f )| 0x80 );
|
|
299
|
+
res += char((ui & 0x3f) | 0x80);
|
|
300
|
+
}
|
|
301
|
+
begin ++;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
template <class Uint16Container>
|
|
307
|
+
bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
|
|
308
|
+
vec.clear();
|
|
309
|
+
if(!str) {
|
|
310
|
+
return true;
|
|
311
|
+
}
|
|
312
|
+
size_t i = 0;
|
|
313
|
+
while(i < len) {
|
|
314
|
+
if(0 == (str[i] & 0x80)) {
|
|
315
|
+
vec.push_back(uint16_t(str[i]));
|
|
316
|
+
i++;
|
|
317
|
+
} else {
|
|
318
|
+
if(i + 1 < len) { //&& (str[i+1] & 0x80))
|
|
319
|
+
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
|
|
320
|
+
vec.push_back(tmp);
|
|
321
|
+
i += 2;
|
|
322
|
+
} else {
|
|
323
|
+
return false;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return true;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
template <class Uint16Container>
|
|
331
|
+
bool GBKTrans(const string& str, Uint16Container& vec) {
|
|
332
|
+
return GBKTrans(str.c_str(), str.size(), vec);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
template <class Uint16ContainerConIter>
|
|
336
|
+
void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
|
337
|
+
res.clear();
|
|
338
|
+
//pair<char, char> pa;
|
|
339
|
+
char first, second;
|
|
340
|
+
while(begin != end) {
|
|
341
|
+
//pa = uint16ToChar2(*begin);
|
|
342
|
+
first = ((*begin)>>8) & 0x00ff;
|
|
343
|
+
second = (*begin) & 0x00ff;
|
|
344
|
+
if(first & 0x80) {
|
|
345
|
+
res += first;
|
|
346
|
+
res += second;
|
|
347
|
+
} else {
|
|
348
|
+
res += second;
|
|
349
|
+
}
|
|
350
|
+
begin++;
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/*
|
|
355
|
+
* format example: "%Y-%m-%d %H:%M:%S"
|
|
356
|
+
*/
|
|
357
|
+
inline void GetTime(const string& format, string& timeStr) {
|
|
358
|
+
time_t timeNow;
|
|
359
|
+
time(&timeNow);
|
|
360
|
+
|
|
361
|
+
struct tm tmNow;
|
|
362
|
+
|
|
363
|
+
#if defined(_WIN32) || defined(_WIN64)
|
|
364
|
+
errno_t e = localtime_s(&tmNow, &timeNow);
|
|
365
|
+
assert(e == 0);
|
|
366
|
+
#else
|
|
367
|
+
struct tm * tm_tmp = localtime_r(&timeNow, &tmNow);
|
|
368
|
+
assert(tm_tmp != nullptr);
|
|
369
|
+
#endif
|
|
370
|
+
|
|
371
|
+
timeStr.resize(64);
|
|
372
|
+
|
|
373
|
+
size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), &tmNow);
|
|
374
|
+
|
|
375
|
+
timeStr.resize(len);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
inline string PathJoin(const string& path1, const string& path2) {
|
|
379
|
+
if(EndsWith(path1, "/")) {
|
|
380
|
+
return path1 + path2;
|
|
381
|
+
}
|
|
382
|
+
return path1 + "/" + path2;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
}
|
|
386
|
+
#endif
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#include "limonp/StringUtil.hpp"
|
|
2
|
+
#include "limonp/Logging.hpp"
|
|
3
|
+
|
|
4
|
+
using namespace std;
|
|
5
|
+
|
|
6
|
+
#define print(x) std::cout << x << std::endl
|
|
7
|
+
|
|
8
|
+
int main(int argc, char** argv) {
|
|
9
|
+
string strname = "hello, world";
|
|
10
|
+
print(strname); //hello, world
|
|
11
|
+
map<string, int> mp;
|
|
12
|
+
mp["hello"] = 1;
|
|
13
|
+
mp["world"] = 2;
|
|
14
|
+
print(mp); // {"hello":1, "world":2}
|
|
15
|
+
|
|
16
|
+
string res;
|
|
17
|
+
res << mp;
|
|
18
|
+
print(res); // {"hello":1, "world":2}
|
|
19
|
+
|
|
20
|
+
string str;
|
|
21
|
+
str = limonp::StringFormat("%s, %s", "hello", "world");
|
|
22
|
+
print(str); //hello, world
|
|
23
|
+
|
|
24
|
+
const char * a[] = {"hello", "world"};
|
|
25
|
+
str = limonp::Join(a, a + sizeof(a)/sizeof(*a), ",");
|
|
26
|
+
print(str); //hello,world
|
|
27
|
+
|
|
28
|
+
str = "hello, world";
|
|
29
|
+
vector<string> buf;
|
|
30
|
+
limonp::Split(str, buf, ",");
|
|
31
|
+
print(buf); //["hello", " world"]
|
|
32
|
+
|
|
33
|
+
int arr[] = {1,10,100,1000};
|
|
34
|
+
vector<int> vec_i(arr, arr + sizeof(arr)/sizeof(arr[0]));
|
|
35
|
+
print(vec_i); //[1, 10, 100, 1000]
|
|
36
|
+
|
|
37
|
+
XLOG(INFO) << "hello, world"; //2014-04-05 20:52:37 demo.cpp:35 INFO hello, world
|
|
38
|
+
//In the same way, `LOG(DEBUG),LOG(WARNING),LOG(ERROR),LOG(FATAL)`.
|
|
39
|
+
return EXIT_SUCCESS;
|
|
40
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
AT&T 3 nz
|
|
2
|
+
B�� 3 n
|
|
3
|
+
c# 3 nz
|
|
4
|
+
C# 3 nz
|
|
5
|
+
c++ 3 nz
|
|
6
|
+
C++ 3 nz
|
|
7
|
+
T�� 4 n
|
|
8
|
+
һ 217830 m
|
|
9
|
+
һһ 1670 m
|
|
10
|
+
һһ�� 11 m
|
|
11
|
+
һһ�� 3 m
|
|
12
|
+
һһ�� 8 m
|
|
13
|
+
һһ�о� 34 i
|
|
14
|
+
һһ�� 9 m
|
|
15
|
+
һһ��Ӧ 43 l
|
|
16
|
+
һһ�� 2 m
|
|
17
|
+
һһ���� 4 l
|
|
18
|
+
һ�� 18 d
|
|
19
|
+
һ����ʶ 3 i
|
|
20
|
+
һ���� 3 m
|
|
21
|
+
һ����� 24 m
|
|
22
|
+
һ�� 22 m
|
|
23
|
+
һ�߰˲� 3 l
|
|
24
|
+
һ�� 442 m
|
|
25
|
+
һ��һǧ 4 m
|
|
26
|
+
һ��һǧ��ٶ�ʮ�� 2 m
|
|
27
|
+
һ��һǧ�˰ٰ�ʮ�� 2 m
|
|
28
|
+
һ��һǧ��� 2 m
|
|
29
|
+
һ��һǧ���ʮ��� 4 m
|
|
30
|
+
һ����ǧ 5 m
|
|
31
|
+
һ����ǧ�� 2 m
|
|
32
|
+
һ����ǧ�� 2 m
|
|
33
|
+
һ����ǧ� 2 m
|
|
34
|
+
һ���� 4 m
|
|
35
|
+
һ������ 4 m
|
|
36
|
+
һ����ǧ 8 m
|
|
37
|
+
һ����ǧ���һʮ�� 2 m
|
|
38
|
+
һ����ǧ��ٽ� 4 m
|
|
39
|
+
һ����ǧ���� 2 m
|
|
40
|
+
һ����ǧ�� 2 m
|
|
41
|
+
һ���� 124 m
|
|
42
|
+
һ������ 4 m
|
|
43
|
+
һ����ǧ 3 m
|
|
44
|
+
һ��� 62 m
|
|
45
|
+
һ���ǧ 2 m
|
|
46
|
+
һ���ǧ�� 2 m
|
|
47
|
+
һ��� 10 m
|
|
48
|
+
һ���ǧ 7 m
|
|
49
|
+
һ���ǧ�� 2 m
|
|
50
|
+
һ���ǧ��� 4 m
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
AT&T 3 nz
|
|
2
|
+
B超 3 n
|
|
3
|
+
c# 3 nz
|
|
4
|
+
C# 3 nz
|
|
5
|
+
c++ 3 nz
|
|
6
|
+
C++ 3 nz
|
|
7
|
+
T恤 4 n
|
|
8
|
+
一 217830 m
|
|
9
|
+
一一 1670 m
|
|
10
|
+
一一二 11 m
|
|
11
|
+
一一例 3 m
|
|
12
|
+
一一分 8 m
|
|
13
|
+
一一列举 34 i
|
|
14
|
+
一一对 9 m
|
|
15
|
+
一一对应 43 l
|
|
16
|
+
一一记 2 m
|
|
17
|
+
一一道来 4 l
|
|
18
|
+
一丁 18 d
|
|
19
|
+
一丁不识 3 i
|
|
20
|
+
一丁点 3 m
|
|
21
|
+
一丁点儿 24 m
|
|
22
|
+
一七 22 m
|
|
23
|
+
一七八不 3 l
|
|
24
|
+
一万 442 m
|
|
25
|
+
一万一千 4 m
|
|
26
|
+
一万一千五百二十颗 2 m
|
|
27
|
+
一万一千八百八十斤 2 m
|
|
28
|
+
一万一千多间 2 m
|
|
29
|
+
一万一千零九十五册 4 m
|
|
30
|
+
一万七千 5 m
|
|
31
|
+
一万七千余 2 m
|
|
32
|
+
一万七千多 2 m
|
|
33
|
+
一万七千多户 2 m
|
|
34
|
+
一万万 4 m
|
|
35
|
+
一万万两 4 m
|
|
36
|
+
一万三千 8 m
|
|
37
|
+
一万三千五百一十七 2 m
|
|
38
|
+
一万三千五百斤 4 m
|
|
39
|
+
一万三千余种 2 m
|
|
40
|
+
一万三千块 2 m
|
|
41
|
+
一万两 124 m
|
|
42
|
+
一万两万 4 m
|
|
43
|
+
一万两千 3 m
|
|
44
|
+
一万个 62 m
|
|
45
|
+
一万九千 2 m
|
|
46
|
+
一万九千余 2 m
|
|
47
|
+
一万二 10 m
|
|
48
|
+
一万二千 7 m
|
|
49
|
+
一万二千两 2 m
|
|
50
|
+
一万二千五百 4 m
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
龙鸣狮吼 3 nr
|
|
2
|
+
龙齐诺 2 nr
|
|
3
|
+
龙齿 3 n
|
|
4
|
+
龚 176 nr
|
|
5
|
+
龚世萍 2 nr
|
|
6
|
+
龚书铎 2 nr
|
|
7
|
+
龚二人 2 nr
|
|
8
|
+
龚云甫 3 nr
|
|
9
|
+
龚伟强 5 nr
|
|
10
|
+
龚先生 4 nr
|
|
11
|
+
龚光杰 44 nr
|
|
12
|
+
龚古尔 24 nr
|
|
13
|
+
龚子敬 2 nr
|
|
14
|
+
龚孝升 12 nr
|
|
15
|
+
龚学平 2 nr
|
|
16
|
+
龚完敬 5 nr
|
|
17
|
+
龚定庵 3 nr
|
|
18
|
+
龚定敬 2 nr
|
|
19
|
+
龚宝铨 5 nr
|
|
20
|
+
龚家村 3 nr
|
|
21
|
+
龚建国 29 nr
|
|
22
|
+
龚德俊 6 nr
|
|
23
|
+
龚心瀚 3 nr
|
|
24
|
+
龚志国 2 nr
|
|
25
|
+
龚意田 3 nr
|
|
26
|
+
龚慈恩 3 nr
|
|
27
|
+
龚施茜 3 nr
|
|
28
|
+
龚晓犁 2 nr
|
|
29
|
+
龚普洛 3 nr
|
|
30
|
+
龚智超 7 nr
|
|
31
|
+
龚松林 10 nr
|
|
32
|
+
龚永明 3 nr
|
|
33
|
+
龚永泉 5 nr
|
|
34
|
+
龚泽艺 256 nr
|
|
35
|
+
龚睿 8 nrfg
|
|
36
|
+
龚祖同 2 nr
|
|
37
|
+
龚秋婷 3 nr
|
|
38
|
+
龚老爷 2 nr
|
|
39
|
+
龚育之 19 nr
|
|
40
|
+
龚自珍 28 nr
|
|
41
|
+
龚蓓苾 3 nr
|
|
42
|
+
龚虹嘉 3 nr
|
|
43
|
+
龚诗嘉 3 nr
|
|
44
|
+
龛 223 ng
|
|
45
|
+
龜 2 zg
|
|
46
|
+
龟 903 ns
|
|
47
|
+
龟儿子 123 n
|
|
48
|
+
龟兆 3 nz
|
|
49
|
+
龟兹 215 ns
|
|
50
|
+
龟兹王 3 nrt
|
|
51
|
+
龟冷搘床 3 v
|
|
52
|
+
龟冷支床 3 n
|
|
53
|
+
龟卜 3 n
|
|
54
|
+
龟厌不告 3 l
|
|
55
|
+
龟壳 33 n
|
|
56
|
+
龟壳花 3 n
|
|
57
|
+
龟头 34 n
|
|
58
|
+
龟头炎 3 n
|
|
59
|
+
龟山 23 ns
|
|
60
|
+
龟山乡 3 ns
|
|
61
|
+
龟山岛 3 ns
|
|
62
|
+
龟年鹤寿 3 ns
|
|
63
|
+
龟年鹤算 3 l
|
|
64
|
+
龟文 3 nz
|
|
65
|
+
龟文写迹 3 n
|
|
66
|
+
龟文鸟迹 3 n
|
|
67
|
+
龟板 10 n
|
|
68
|
+
龟毛免角 3 n
|
|
69
|
+
龟毛兔角 3 n
|
|
70
|
+
龟溪 3 ns
|
|
71
|
+
龟玉 3 nz
|
|
72
|
+
龟王 3 nz
|
|
73
|
+
龟甲 92 ns
|
|
74
|
+
龟甲胶 3 nz
|
|
75
|
+
龟筮 3 n
|
|
76
|
+
龟纹 3 n
|
|
77
|
+
龟缩 29 v
|
|
78
|
+
龟肉 3 n
|
|
79
|
+
龟背 21 n
|
|
80
|
+
龟背竹 3 n
|
|
81
|
+
龟苓膏 3 n
|
|
82
|
+
龟苗 3 n
|
|
83
|
+
龟裂 34 v
|
|
84
|
+
龟足 5 v
|
|
85
|
+
龟鉴 2 n
|
|
86
|
+
龟镜 3 nz
|
|
87
|
+
龟鳖 3 n
|
|
88
|
+
龟鹤遐寿 3 l
|
|
89
|
+
龟龄鹤算 3 n
|
|
90
|
+
龟龙片甲 3 nz
|
|
91
|
+
龟龙麟凤 3 ns
|
|
92
|
+
龠 5 g
|
|
93
|
+
龢 732 zg
|