jieba-rb 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +19 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +85 -0
  8. data/Rakefile +15 -0
  9. data/ext/cppjieba/.gitignore +17 -0
  10. data/ext/cppjieba/.travis.yml +22 -0
  11. data/ext/cppjieba/CMakeLists.txt +28 -0
  12. data/ext/cppjieba/ChangeLog.md +236 -0
  13. data/ext/cppjieba/README.md +285 -0
  14. data/ext/cppjieba/README_EN.md +111 -0
  15. data/ext/cppjieba/appveyor.yml +32 -0
  16. data/ext/cppjieba/deps/CMakeLists.txt +1 -0
  17. data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
  18. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
  28. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
  29. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
  41. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
  42. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  43. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
  44. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
  45. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  46. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
  47. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
  48. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
  49. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
  50. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
  51. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
  52. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
  53. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
  54. data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
  55. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
  56. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
  57. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
  58. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
  59. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
  60. data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
  61. data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
  62. data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
  63. data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
  64. data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
  65. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
  66. data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
  67. data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
  68. data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
  69. data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
  70. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
  71. data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
  72. data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
  73. data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
  74. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
  75. data/ext/cppjieba/dict/README.md +31 -0
  76. data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
  77. data/ext/cppjieba/dict/idf.utf8 +258826 -0
  78. data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
  79. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  80. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  81. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  82. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  83. data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
  84. data/ext/cppjieba/dict/user.dict.utf8 +4 -0
  85. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
  86. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  87. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  88. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  89. data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
  90. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  91. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  92. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  93. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  94. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  95. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
  96. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
  97. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +24 -0
  98. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  99. data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
  100. data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
  101. data/ext/jieba/extconf.rb +28 -0
  102. data/ext/jieba/jieba.c +11 -0
  103. data/ext/jieba/jieba.h +11 -0
  104. data/ext/jieba/keyword.cc +92 -0
  105. data/ext/jieba/keyword.h +17 -0
  106. data/ext/jieba/segment.cc +107 -0
  107. data/ext/jieba/segment.h +17 -0
  108. data/ext/jieba/tagging.cc +76 -0
  109. data/ext/jieba/tagging.h +17 -0
  110. data/jieba_rb.gemspec +51 -0
  111. data/lib/jieba-rb.rb +66 -0
  112. data/lib/jieba_rb/version.rb +3 -0
  113. data/test/test_keyword.rb +17 -0
  114. data/test/test_segment.rb +32 -0
  115. data/test/test_tagging.rb +22 -0
  116. data/test/user.dict.utf8 +23 -0
  117. metadata +219 -0
@@ -0,0 +1,21 @@
1
+ /************************************
2
+ ************************************/
3
+ #ifndef LIMONP_NONCOPYABLE_H
4
+ #define LIMONP_NONCOPYABLE_H
5
+
6
+ namespace limonp {
7
+
8
+ class NonCopyable {
9
+ protected:
10
+ NonCopyable() {
11
+ }
12
+ ~NonCopyable() {
13
+ }
14
+ private:
15
+ NonCopyable(const NonCopyable& );
16
+ const NonCopyable& operator=(const NonCopyable& );
17
+ }; // class NonCopyable
18
+
19
+ } // namespace limonp
20
+
21
+ #endif // LIMONP_NONCOPYABLE_H
@@ -0,0 +1,159 @@
1
+ #ifndef LIMONP_STD_EXTEMSION_HPP
2
+ #define LIMONP_STD_EXTEMSION_HPP
3
+
4
+ #include <map>
5
+
6
+ #ifdef __APPLE__
7
+ #include <unordered_map>
8
+ #include <unordered_set>
9
+ #elif(__cplusplus == 201103L)
10
+ #include <unordered_map>
11
+ #include <unordered_set>
12
+ #elif defined _MSC_VER
13
+ #include <unordered_map>
14
+ #include <unordered_set>
15
+ #else
16
+ #include <tr1/unordered_map>
17
+ #include <tr1/unordered_set>
18
+ namespace std {
19
+ using std::tr1::unordered_map;
20
+ using std::tr1::unordered_set;
21
+ }
22
+
23
+ #endif
24
+
25
+ #include <set>
26
+ #include <string>
27
+ #include <vector>
28
+ #include <deque>
29
+ #include <fstream>
30
+ #include <sstream>
31
+
32
+ #define print(x) std::cout << x << std::endl
33
+
34
+ namespace std {
35
+
36
+ template<typename T>
37
+ ostream& operator << (ostream& os, const vector<T>& v) {
38
+ if(v.empty()) {
39
+ return os << "[]";
40
+ }
41
+ os<<"["<<v[0];
42
+ for(size_t i = 1; i < v.size(); i++) {
43
+ os<<", "<<v[i];
44
+ }
45
+ os<<"]";
46
+ return os;
47
+ }
48
+
49
+ template<>
50
+ inline ostream& operator << (ostream& os, const vector<string>& v) {
51
+ if(v.empty()) {
52
+ return os << "[]";
53
+ }
54
+ os<<"[\""<<v[0];
55
+ for(size_t i = 1; i < v.size(); i++) {
56
+ os<<"\", \""<<v[i];
57
+ }
58
+ os<<"\"]";
59
+ return os;
60
+ }
61
+
62
+ template<typename T>
63
+ ostream& operator << (ostream& os, const deque<T>& dq) {
64
+ if(dq.empty()) {
65
+ return os << "[]";
66
+ }
67
+ os<<"[\""<<dq[0];
68
+ for(size_t i = 1; i < dq.size(); i++) {
69
+ os<<"\", \""<<dq[i];
70
+ }
71
+ os<<"\"]";
72
+ return os;
73
+ }
74
+
75
+
76
+ template<class T1, class T2>
77
+ ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
78
+ os << pr.first << ":" << pr.second ;
79
+ return os;
80
+ }
81
+
82
+
83
+ template<class T>
84
+ string& operator << (string& str, const T& obj) {
85
+ stringstream ss;
86
+ ss << obj; // call ostream& operator << (ostream& os,
87
+ return str = ss.str();
88
+ }
89
+
90
+ template<class T1, class T2>
91
+ ostream& operator << (ostream& os, const map<T1, T2>& mp) {
92
+ if(mp.empty()) {
93
+ os<<"{}";
94
+ return os;
95
+ }
96
+ os<<'{';
97
+ typename map<T1, T2>::const_iterator it = mp.begin();
98
+ os<<*it;
99
+ it++;
100
+ while(it != mp.end()) {
101
+ os<<", "<<*it;
102
+ it++;
103
+ }
104
+ os<<'}';
105
+ return os;
106
+ }
107
+ template<class T1, class T2>
108
+ ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
109
+ if(mp.empty()) {
110
+ return os << "{}";
111
+ }
112
+ os<<'{';
113
+ typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
114
+ os<<*it;
115
+ it++;
116
+ while(it != mp.end()) {
117
+ os<<", "<<*it++;
118
+ }
119
+ return os<<'}';
120
+ }
121
+
122
+ template<class T>
123
+ ostream& operator << (ostream& os, const set<T>& st) {
124
+ if(st.empty()) {
125
+ os << "{}";
126
+ return os;
127
+ }
128
+ os<<'{';
129
+ typename set<T>::const_iterator it = st.begin();
130
+ os<<*it;
131
+ it++;
132
+ while(it != st.end()) {
133
+ os<<", "<<*it;
134
+ it++;
135
+ }
136
+ os<<'}';
137
+ return os;
138
+ }
139
+
140
+ template<class KeyType, class ContainType>
141
+ bool IsIn(const ContainType& contain, const KeyType& key) {
142
+ return contain.end() != contain.find(key);
143
+ }
144
+
145
+ template<class T>
146
+ basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
147
+ return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
148
+ }
149
+
150
+ template<class T>
151
+ ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
152
+ ostreambuf_iterator<T> itr (ofs);
153
+ copy(s.begin(), s.end(), itr);
154
+ return ofs;
155
+ }
156
+
157
+ } // namespace std
158
+
159
+ #endif
@@ -0,0 +1,365 @@
1
+ /************************************
2
+ * file enc : ascii
3
+ * author : wuyanyi09@gmail.com
4
+ ************************************/
5
+ #ifndef LIMONP_STR_FUNCTS_H
6
+ #define LIMONP_STR_FUNCTS_H
7
+ #include <fstream>
8
+ #include <iostream>
9
+ #include <string>
10
+ #include <vector>
11
+ #include <algorithm>
12
+ #include <cctype>
13
+ #include <map>
14
+ #include <stdint.h>
15
+ #include <stdio.h>
16
+ #include <stdarg.h>
17
+ #include <memory.h>
18
+ #include <functional>
19
+ #include <locale>
20
+ #include <sstream>
21
+ #include <sys/types.h>
22
+ #include <iterator>
23
+ #include <algorithm>
24
+ #include "StdExtension.hpp"
25
+
26
+ namespace limonp {
27
+ using namespace std;
28
+ inline string StringFormat(const char* fmt, ...) {
29
+ int size = 256;
30
+ std::string str;
31
+ va_list ap;
32
+ while (1) {
33
+ str.resize(size);
34
+ va_start(ap, fmt);
35
+ int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
36
+ va_end(ap);
37
+ if (n > -1 && n < size) {
38
+ str.resize(n);
39
+ return str;
40
+ }
41
+ if (n > -1)
42
+ size = n + 1;
43
+ else
44
+ size *= 2;
45
+ }
46
+ return str;
47
+ }
48
+
49
+ template<class T>
50
+ void Join(T begin, T end, string& res, const string& connector) {
51
+ if(begin == end) {
52
+ return;
53
+ }
54
+ stringstream ss;
55
+ ss<<*begin;
56
+ begin++;
57
+ while(begin != end) {
58
+ ss << connector << *begin;
59
+ begin ++;
60
+ }
61
+ res = ss.str();
62
+ }
63
+
64
+ template<class T>
65
+ string Join(T begin, T end, const string& connector) {
66
+ string res;
67
+ Join(begin ,end, res, connector);
68
+ return res;
69
+ }
70
+
71
+ inline string& Upper(string& str) {
72
+ transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
73
+ return str;
74
+ }
75
+
76
+ inline string& Lower(string& str) {
77
+ transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
78
+ return str;
79
+ }
80
+
81
+ inline bool IsSpace(unsigned c) {
82
+ // when passing large int as the argument of isspace, it core dump, so here need a type cast.
83
+ return c > 0xff ? false : std::isspace(c & 0xff);
84
+ }
85
+
86
+ inline std::string& LTrim(std::string &s) {
87
+ s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
88
+ return s;
89
+ }
90
+
91
+ inline std::string& RTrim(std::string &s) {
92
+ s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
93
+ return s;
94
+ }
95
+
96
+ inline std::string& Trim(std::string &s) {
97
+ return LTrim(RTrim(s));
98
+ }
99
+
100
+ inline std::string& LTrim(std::string & s, char x) {
101
+ s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
102
+ return s;
103
+ }
104
+
105
+ inline std::string& RTrim(std::string & s, char x) {
106
+ s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
107
+ return s;
108
+ }
109
+
110
+ inline std::string& Trim(std::string &s, char x) {
111
+ return LTrim(RTrim(s, x), x);
112
+ }
113
+
114
+ inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
115
+ res.clear();
116
+ size_t Start = 0;
117
+ size_t end = 0;
118
+ string sub;
119
+ while(Start < src.size()) {
120
+ end = src.find_first_of(pattern, Start);
121
+ if(string::npos == end || res.size() >= maxsplit) {
122
+ sub = src.substr(Start);
123
+ res.push_back(sub);
124
+ return;
125
+ }
126
+ sub = src.substr(Start, end - Start);
127
+ res.push_back(sub);
128
+ Start = end + 1;
129
+ }
130
+ return;
131
+ }
132
+
133
+ inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
134
+ vector<string> res;
135
+ Split(src, res, pattern, maxsplit);
136
+ return res;
137
+ }
138
+
139
+ inline bool StartsWith(const string& str, const string& prefix) {
140
+ if(prefix.length() > str.length()) {
141
+ return false;
142
+ }
143
+ return 0 == str.compare(0, prefix.length(), prefix);
144
+ }
145
+
146
+ inline bool EndsWith(const string& str, const string& suffix) {
147
+ if(suffix.length() > str.length()) {
148
+ return false;
149
+ }
150
+ return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
151
+ }
152
+
153
+ inline bool IsInStr(const string& str, char ch) {
154
+ return str.find(ch) != string::npos;
155
+ }
156
+
157
+ inline uint16_t TwocharToUint16(char high, char low) {
158
+ return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
159
+ }
160
+
161
+ template <class Uint16Container>
162
+ bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
163
+ if(!str) {
164
+ return false;
165
+ }
166
+ char ch1, ch2;
167
+ uint16_t tmp;
168
+ vec.clear();
169
+ for(size_t i = 0; i < len;) {
170
+ if(!(str[i] & 0x80)) { // 0xxxxxxx
171
+ vec.push_back(str[i]);
172
+ i++;
173
+ } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
174
+ ch1 = (str[i] >> 2) & 0x07;
175
+ ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
176
+ tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
177
+ vec.push_back(tmp);
178
+ i += 2;
179
+ } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
180
+ ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
181
+ ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
182
+ tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
183
+ vec.push_back(tmp);
184
+ i += 3;
185
+ } else {
186
+ return false;
187
+ }
188
+ }
189
+ return true;
190
+ }
191
+
192
+ template <class Uint16Container>
193
+ bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
194
+ return Utf8ToUnicode(str.c_str(), str.size(), vec);
195
+ }
196
+
197
+ template <class Uint32Container>
198
+ bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
199
+ uint32_t tmp;
200
+ vec.clear();
201
+ for(size_t i = 0; i < str.size();) {
202
+ if(!(str[i] & 0x80)) { // 0xxxxxxx
203
+ // 7bit, total 7bit
204
+ tmp = (uint8_t)(str[i]) & 0x7f;
205
+ i++;
206
+ } else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
207
+ // 5bit, total 5bit
208
+ tmp = (uint8_t)(str[i]) & 0x1f;
209
+
210
+ // 6bit, total 11bit
211
+ tmp <<= 6;
212
+ tmp |= (uint8_t)(str[i+1]) & 0x3f;
213
+ i += 2;
214
+ } else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
215
+ // 4bit, total 4bit
216
+ tmp = (uint8_t)(str[i]) & 0x0f;
217
+
218
+ // 6bit, total 10bit
219
+ tmp <<= 6;
220
+ tmp |= (uint8_t)(str[i+1]) & 0x3f;
221
+
222
+ // 6bit, total 16bit
223
+ tmp <<= 6;
224
+ tmp |= (uint8_t)(str[i+2]) & 0x3f;
225
+
226
+ i += 3;
227
+ } else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
228
+ // 3bit, total 3bit
229
+ tmp = (uint8_t)(str[i]) & 0x07;
230
+
231
+ // 6bit, total 9bit
232
+ tmp <<= 6;
233
+ tmp |= (uint8_t)(str[i+1]) & 0x3f;
234
+
235
+ // 6bit, total 15bit
236
+ tmp <<= 6;
237
+ tmp |= (uint8_t)(str[i+2]) & 0x3f;
238
+
239
+ // 6bit, total 21bit
240
+ tmp <<= 6;
241
+ tmp |= (uint8_t)(str[i+3]) & 0x3f;
242
+
243
+ i += 4;
244
+ } else {
245
+ return false;
246
+ }
247
+ vec.push_back(tmp);
248
+ }
249
+ return true;
250
+ }
251
+
252
+ template <class Uint32ContainerConIter>
253
+ void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
254
+ res.clear();
255
+ uint32_t ui;
256
+ while(begin != end) {
257
+ ui = *begin;
258
+ if(ui <= 0x7f) {
259
+ res += char(ui);
260
+ } else if(ui <= 0x7ff) {
261
+ res += char(((ui >> 6) & 0x1f) | 0xc0);
262
+ res += char((ui & 0x3f) | 0x80);
263
+ } else if(ui <= 0xffff) {
264
+ res += char(((ui >> 12) & 0x0f) | 0xe0);
265
+ res += char(((ui >> 6) & 0x3f) | 0x80);
266
+ res += char((ui & 0x3f) | 0x80);
267
+ } else {
268
+ res += char(((ui >> 18) & 0x03) | 0xf0);
269
+ res += char(((ui >> 12) & 0x3f) | 0x80);
270
+ res += char(((ui >> 6) & 0x3f) | 0x80);
271
+ res += char((ui & 0x3f) | 0x80);
272
+ }
273
+ begin ++;
274
+ }
275
+ }
276
+
277
+ template <class Uint16ContainerConIter>
278
+ void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
279
+ res.clear();
280
+ uint16_t ui;
281
+ while(begin != end) {
282
+ ui = *begin;
283
+ if(ui <= 0x7f) {
284
+ res += char(ui);
285
+ } else if(ui <= 0x7ff) {
286
+ res += char(((ui>>6) & 0x1f) | 0xc0);
287
+ res += char((ui & 0x3f) | 0x80);
288
+ } else {
289
+ res += char(((ui >> 12) & 0x0f )| 0xe0);
290
+ res += char(((ui>>6) & 0x3f )| 0x80 );
291
+ res += char((ui & 0x3f) | 0x80);
292
+ }
293
+ begin ++;
294
+ }
295
+ }
296
+
297
+
298
+ template <class Uint16Container>
299
+ bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
300
+ vec.clear();
301
+ if(!str) {
302
+ return true;
303
+ }
304
+ size_t i = 0;
305
+ while(i < len) {
306
+ if(0 == (str[i] & 0x80)) {
307
+ vec.push_back(uint16_t(str[i]));
308
+ i++;
309
+ } else {
310
+ if(i + 1 < len) { //&& (str[i+1] & 0x80))
311
+ uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
312
+ vec.push_back(tmp);
313
+ i += 2;
314
+ } else {
315
+ return false;
316
+ }
317
+ }
318
+ }
319
+ return true;
320
+ }
321
+
322
+ template <class Uint16Container>
323
+ bool GBKTrans(const string& str, Uint16Container& vec) {
324
+ return GBKTrans(str.c_str(), str.size(), vec);
325
+ }
326
+
327
+ template <class Uint16ContainerConIter>
328
+ void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
329
+ res.clear();
330
+ //pair<char, char> pa;
331
+ char first, second;
332
+ while(begin != end) {
333
+ //pa = uint16ToChar2(*begin);
334
+ first = ((*begin)>>8) & 0x00ff;
335
+ second = (*begin) & 0x00ff;
336
+ if(first & 0x80) {
337
+ res += first;
338
+ res += second;
339
+ } else {
340
+ res += second;
341
+ }
342
+ begin++;
343
+ }
344
+ }
345
+
346
+ /*
347
+ * format example: "%Y-%m-%d %H:%M:%S"
348
+ */
349
+ inline void GetTime(const string& format, string& timeStr) {
350
+ time_t timeNow;
351
+ time(&timeNow);
352
+ timeStr.resize(64);
353
+ size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
354
+ timeStr.resize(len);
355
+ }
356
+
357
+ inline string PathJoin(const string& path1, const string& path2) {
358
+ if(EndsWith(path1, "/")) {
359
+ return path1 + path2;
360
+ }
361
+ return path1 + "/" + path2;
362
+ }
363
+
364
+ }
365
+ #endif