cppjieba_rb 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +2 -2
  5. data/cppjieba_rb.gemspec +4 -4
  6. data/lib/cppjieba_rb/version.rb +1 -1
  7. metadata +17 -135
  8. data/ext/cppjieba/.gitignore +0 -17
  9. data/ext/cppjieba/.travis.yml +0 -21
  10. data/ext/cppjieba/CMakeLists.txt +0 -28
  11. data/ext/cppjieba/ChangeLog.md +0 -236
  12. data/ext/cppjieba/README.md +0 -292
  13. data/ext/cppjieba/README_EN.md +0 -113
  14. data/ext/cppjieba/appveyor.yml +0 -32
  15. data/ext/cppjieba/deps/CMakeLists.txt +0 -1
  16. data/ext/cppjieba/deps/gtest/CMakeLists.txt +0 -5
  17. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +0 -283
  18. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +0 -230
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +0 -1421
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +0 -487
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +0 -796
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +0 -232
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +0 -176
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +0 -259
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +0 -2155
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +0 -358
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +0 -58
  28. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +0 -308
  29. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +0 -210
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +0 -1226
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +0 -233
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +0 -4822
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +0 -301
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +0 -619
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +0 -1788
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +0 -350
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +0 -968
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +0 -336
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +0 -3330
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +0 -296
  41. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  42. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +0 -681
  43. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +0 -509
  44. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  45. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +0 -48
  46. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +0 -1234
  47. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +0 -380
  48. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +0 -1038
  49. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +0 -746
  50. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +0 -356
  51. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +0 -110
  52. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +0 -110
  53. data/ext/cppjieba/deps/gtest/src/gtest.cc +0 -4898
  54. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +0 -39
  55. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +0 -70
  56. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +0 -49
  57. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +0 -67
  58. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +0 -65
  59. data/ext/cppjieba/deps/limonp/Closure.hpp +0 -206
  60. data/ext/cppjieba/deps/limonp/Colors.hpp +0 -31
  61. data/ext/cppjieba/deps/limonp/Condition.hpp +0 -38
  62. data/ext/cppjieba/deps/limonp/Config.hpp +0 -103
  63. data/ext/cppjieba/deps/limonp/FileLock.hpp +0 -74
  64. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +0 -7
  65. data/ext/cppjieba/deps/limonp/LocalVector.hpp +0 -139
  66. data/ext/cppjieba/deps/limonp/Logging.hpp +0 -76
  67. data/ext/cppjieba/deps/limonp/Md5.hpp +0 -411
  68. data/ext/cppjieba/deps/limonp/MutexLock.hpp +0 -51
  69. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +0 -21
  70. data/ext/cppjieba/deps/limonp/StdExtension.hpp +0 -159
  71. data/ext/cppjieba/deps/limonp/StringUtil.hpp +0 -365
  72. data/ext/cppjieba/deps/limonp/Thread.hpp +0 -44
  73. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +0 -86
  74. data/ext/cppjieba/dict/README.md +0 -31
  75. data/ext/cppjieba/dict/hmm_model.utf8 +0 -34
  76. data/ext/cppjieba/dict/idf.utf8 +0 -258826
  77. data/ext/cppjieba/dict/jieba.dict.utf8 +0 -348982
  78. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +0 -6653
  79. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +0 -166
  80. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +0 -259
  81. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +0 -5222
  82. data/ext/cppjieba/dict/stop_words.utf8 +0 -1534
  83. data/ext/cppjieba/dict/user.dict.utf8 +0 -4
  84. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +0 -277
  85. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +0 -93
  86. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +0 -129
  87. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +0 -190
  88. data/ext/cppjieba/include/cppjieba/Jieba.hpp +0 -130
  89. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +0 -153
  90. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +0 -137
  91. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +0 -109
  92. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +0 -77
  93. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +0 -54
  94. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +0 -90
  95. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +0 -46
  96. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +0 -23
  97. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +0 -190
  98. data/ext/cppjieba/include/cppjieba/Trie.hpp +0 -174
  99. data/ext/cppjieba/include/cppjieba/Unicode.hpp +0 -227
  100. data/ext/cppjieba/test/CMakeLists.txt +0 -5
  101. data/ext/cppjieba/test/demo.cpp +0 -80
  102. data/ext/cppjieba/test/load_test.cpp +0 -54
  103. data/ext/cppjieba/test/testdata/curl.res +0 -1
  104. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +0 -109750
  105. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +0 -34
  106. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +0 -348982
  107. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +0 -93
  108. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +0 -93
  109. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +0 -67
  110. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +0 -64
  111. data/ext/cppjieba/test/testdata/load_test.urls +0 -2
  112. data/ext/cppjieba/test/testdata/review.100 +0 -100
  113. data/ext/cppjieba/test/testdata/review.100.res +0 -200
  114. data/ext/cppjieba/test/testdata/server.conf +0 -19
  115. data/ext/cppjieba/test/testdata/testlines.gbk +0 -9
  116. data/ext/cppjieba/test/testdata/testlines.utf8 +0 -8
  117. data/ext/cppjieba/test/testdata/userdict.2.utf8 +0 -1
  118. data/ext/cppjieba/test/testdata/userdict.english +0 -2
  119. data/ext/cppjieba/test/testdata/userdict.utf8 +0 -8
  120. data/ext/cppjieba/test/testdata/weicheng.utf8 +0 -247
  121. data/ext/cppjieba/test/unittest/CMakeLists.txt +0 -24
  122. data/ext/cppjieba/test/unittest/gtest_main.cpp +0 -39
  123. data/ext/cppjieba/test/unittest/jieba_test.cpp +0 -133
  124. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +0 -79
  125. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +0 -41
  126. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +0 -43
  127. data/ext/cppjieba/test/unittest/segments_test.cpp +0 -256
  128. data/ext/cppjieba/test/unittest/textrank_test.cpp +0 -86
  129. data/ext/cppjieba/test/unittest/trie_test.cpp +0 -177
  130. data/ext/cppjieba/test/unittest/unicode_test.cpp +0 -43
@@ -1,21 +0,0 @@
1
- /************************************
2
- ************************************/
3
- #ifndef LIMONP_NONCOPYABLE_H
4
- #define LIMONP_NONCOPYABLE_H
5
-
6
- namespace limonp {
7
-
8
- class NonCopyable {
9
- protected:
10
- NonCopyable() {
11
- }
12
- ~NonCopyable() {
13
- }
14
- private:
15
- NonCopyable(const NonCopyable& );
16
- const NonCopyable& operator=(const NonCopyable& );
17
- }; // class NonCopyable
18
-
19
- } // namespace limonp
20
-
21
- #endif // LIMONP_NONCOPYABLE_H
@@ -1,159 +0,0 @@
1
- #ifndef LIMONP_STD_EXTEMSION_HPP
2
- #define LIMONP_STD_EXTEMSION_HPP
3
-
4
- #include <map>
5
-
6
- #ifdef __APPLE__
7
- #include <unordered_map>
8
- #include <unordered_set>
9
- #elif(__cplusplus == 201103L)
10
- #include <unordered_map>
11
- #include <unordered_set>
12
- #elif defined _MSC_VER
13
- #include <unordered_map>
14
- #include <unordered_set>
15
- #else
16
- #include <tr1/unordered_map>
17
- #include <tr1/unordered_set>
18
- namespace std {
19
- using std::tr1::unordered_map;
20
- using std::tr1::unordered_set;
21
- }
22
-
23
- #endif
24
-
25
- #include <set>
26
- #include <string>
27
- #include <vector>
28
- #include <deque>
29
- #include <fstream>
30
- #include <sstream>
31
-
32
- #define print(x) std::cout << x << std::endl
33
-
34
- namespace std {
35
-
36
- template<typename T>
37
- ostream& operator << (ostream& os, const vector<T>& v) {
38
- if(v.empty()) {
39
- return os << "[]";
40
- }
41
- os<<"["<<v[0];
42
- for(size_t i = 1; i < v.size(); i++) {
43
- os<<", "<<v[i];
44
- }
45
- os<<"]";
46
- return os;
47
- }
48
-
49
- template<>
50
- inline ostream& operator << (ostream& os, const vector<string>& v) {
51
- if(v.empty()) {
52
- return os << "[]";
53
- }
54
- os<<"[\""<<v[0];
55
- for(size_t i = 1; i < v.size(); i++) {
56
- os<<"\", \""<<v[i];
57
- }
58
- os<<"\"]";
59
- return os;
60
- }
61
-
62
- template<typename T>
63
- ostream& operator << (ostream& os, const deque<T>& dq) {
64
- if(dq.empty()) {
65
- return os << "[]";
66
- }
67
- os<<"[\""<<dq[0];
68
- for(size_t i = 1; i < dq.size(); i++) {
69
- os<<"\", \""<<dq[i];
70
- }
71
- os<<"\"]";
72
- return os;
73
- }
74
-
75
-
76
- template<class T1, class T2>
77
- ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
78
- os << pr.first << ":" << pr.second ;
79
- return os;
80
- }
81
-
82
-
83
- template<class T>
84
- string& operator << (string& str, const T& obj) {
85
- stringstream ss;
86
- ss << obj; // call ostream& operator << (ostream& os,
87
- return str = ss.str();
88
- }
89
-
90
- template<class T1, class T2>
91
- ostream& operator << (ostream& os, const map<T1, T2>& mp) {
92
- if(mp.empty()) {
93
- os<<"{}";
94
- return os;
95
- }
96
- os<<'{';
97
- typename map<T1, T2>::const_iterator it = mp.begin();
98
- os<<*it;
99
- it++;
100
- while(it != mp.end()) {
101
- os<<", "<<*it;
102
- it++;
103
- }
104
- os<<'}';
105
- return os;
106
- }
107
- template<class T1, class T2>
108
- ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
109
- if(mp.empty()) {
110
- return os << "{}";
111
- }
112
- os<<'{';
113
- typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
114
- os<<*it;
115
- it++;
116
- while(it != mp.end()) {
117
- os<<", "<<*it++;
118
- }
119
- return os<<'}';
120
- }
121
-
122
- template<class T>
123
- ostream& operator << (ostream& os, const set<T>& st) {
124
- if(st.empty()) {
125
- os << "{}";
126
- return os;
127
- }
128
- os<<'{';
129
- typename set<T>::const_iterator it = st.begin();
130
- os<<*it;
131
- it++;
132
- while(it != st.end()) {
133
- os<<", "<<*it;
134
- it++;
135
- }
136
- os<<'}';
137
- return os;
138
- }
139
-
140
- template<class KeyType, class ContainType>
141
- bool IsIn(const ContainType& contain, const KeyType& key) {
142
- return contain.end() != contain.find(key);
143
- }
144
-
145
- template<class T>
146
- basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
147
- return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
148
- }
149
-
150
- template<class T>
151
- ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
152
- ostreambuf_iterator<T> itr (ofs);
153
- copy(s.begin(), s.end(), itr);
154
- return ofs;
155
- }
156
-
157
- } // namespace std
158
-
159
- #endif
@@ -1,365 +0,0 @@
1
- /************************************
2
- * file enc : ascii
3
- * author : wuyanyi09@gmail.com
4
- ************************************/
5
- #ifndef LIMONP_STR_FUNCTS_H
6
- #define LIMONP_STR_FUNCTS_H
7
- #include <fstream>
8
- #include <iostream>
9
- #include <string>
10
- #include <vector>
11
- #include <algorithm>
12
- #include <cctype>
13
- #include <map>
14
- #include <stdint.h>
15
- #include <stdio.h>
16
- #include <stdarg.h>
17
- #include <memory.h>
18
- #include <functional>
19
- #include <locale>
20
- #include <sstream>
21
- #include <sys/types.h>
22
- #include <iterator>
23
- #include <algorithm>
24
- #include "StdExtension.hpp"
25
-
26
- namespace limonp {
27
- using namespace std;
28
- inline string StringFormat(const char* fmt, ...) {
29
- int size = 256;
30
- std::string str;
31
- va_list ap;
32
- while (1) {
33
- str.resize(size);
34
- va_start(ap, fmt);
35
- int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
36
- va_end(ap);
37
- if (n > -1 && n < size) {
38
- str.resize(n);
39
- return str;
40
- }
41
- if (n > -1)
42
- size = n + 1;
43
- else
44
- size *= 2;
45
- }
46
- return str;
47
- }
48
-
49
- template<class T>
50
- void Join(T begin, T end, string& res, const string& connector) {
51
- if(begin == end) {
52
- return;
53
- }
54
- stringstream ss;
55
- ss<<*begin;
56
- begin++;
57
- while(begin != end) {
58
- ss << connector << *begin;
59
- begin ++;
60
- }
61
- res = ss.str();
62
- }
63
-
64
- template<class T>
65
- string Join(T begin, T end, const string& connector) {
66
- string res;
67
- Join(begin ,end, res, connector);
68
- return res;
69
- }
70
-
71
- inline string& Upper(string& str) {
72
- transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
73
- return str;
74
- }
75
-
76
- inline string& Lower(string& str) {
77
- transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
78
- return str;
79
- }
80
-
81
- inline bool IsSpace(unsigned c) {
82
- // when passing large int as the argument of isspace, it core dump, so here need a type cast.
83
- return c > 0xff ? false : std::isspace(c & 0xff);
84
- }
85
-
86
- inline std::string& LTrim(std::string &s) {
87
- s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
88
- return s;
89
- }
90
-
91
- inline std::string& RTrim(std::string &s) {
92
- s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
93
- return s;
94
- }
95
-
96
- inline std::string& Trim(std::string &s) {
97
- return LTrim(RTrim(s));
98
- }
99
-
100
- inline std::string& LTrim(std::string & s, char x) {
101
- s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
102
- return s;
103
- }
104
-
105
- inline std::string& RTrim(std::string & s, char x) {
106
- s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
107
- return s;
108
- }
109
-
110
- inline std::string& Trim(std::string &s, char x) {
111
- return LTrim(RTrim(s, x), x);
112
- }
113
-
114
- inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
115
- res.clear();
116
- size_t Start = 0;
117
- size_t end = 0;
118
- string sub;
119
- while(Start < src.size()) {
120
- end = src.find_first_of(pattern, Start);
121
- if(string::npos == end || res.size() >= maxsplit) {
122
- sub = src.substr(Start);
123
- res.push_back(sub);
124
- return;
125
- }
126
- sub = src.substr(Start, end - Start);
127
- res.push_back(sub);
128
- Start = end + 1;
129
- }
130
- return;
131
- }
132
-
133
- inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
134
- vector<string> res;
135
- Split(src, res, pattern, maxsplit);
136
- return res;
137
- }
138
-
139
- inline bool StartsWith(const string& str, const string& prefix) {
140
- if(prefix.length() > str.length()) {
141
- return false;
142
- }
143
- return 0 == str.compare(0, prefix.length(), prefix);
144
- }
145
-
146
- inline bool EndsWith(const string& str, const string& suffix) {
147
- if(suffix.length() > str.length()) {
148
- return false;
149
- }
150
- return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
151
- }
152
-
153
- inline bool IsInStr(const string& str, char ch) {
154
- return str.find(ch) != string::npos;
155
- }
156
-
157
- inline uint16_t TwocharToUint16(char high, char low) {
158
- return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
159
- }
160
-
161
- template <class Uint16Container>
162
- bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
163
- if(!str) {
164
- return false;
165
- }
166
- char ch1, ch2;
167
- uint16_t tmp;
168
- vec.clear();
169
- for(size_t i = 0; i < len;) {
170
- if(!(str[i] & 0x80)) { // 0xxxxxxx
171
- vec.push_back(str[i]);
172
- i++;
173
- } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
174
- ch1 = (str[i] >> 2) & 0x07;
175
- ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
176
- tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
177
- vec.push_back(tmp);
178
- i += 2;
179
- } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
180
- ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
181
- ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
182
- tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
183
- vec.push_back(tmp);
184
- i += 3;
185
- } else {
186
- return false;
187
- }
188
- }
189
- return true;
190
- }
191
-
192
- template <class Uint16Container>
193
- bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
194
- return Utf8ToUnicode(str.c_str(), str.size(), vec);
195
- }
196
-
197
- template <class Uint32Container>
198
- bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
199
- uint32_t tmp;
200
- vec.clear();
201
- for(size_t i = 0; i < str.size();) {
202
- if(!(str[i] & 0x80)) { // 0xxxxxxx
203
- // 7bit, total 7bit
204
- tmp = (uint8_t)(str[i]) & 0x7f;
205
- i++;
206
- } else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
207
- // 5bit, total 5bit
208
- tmp = (uint8_t)(str[i]) & 0x1f;
209
-
210
- // 6bit, total 11bit
211
- tmp <<= 6;
212
- tmp |= (uint8_t)(str[i+1]) & 0x3f;
213
- i += 2;
214
- } else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
215
- // 4bit, total 4bit
216
- tmp = (uint8_t)(str[i]) & 0x0f;
217
-
218
- // 6bit, total 10bit
219
- tmp <<= 6;
220
- tmp |= (uint8_t)(str[i+1]) & 0x3f;
221
-
222
- // 6bit, total 16bit
223
- tmp <<= 6;
224
- tmp |= (uint8_t)(str[i+2]) & 0x3f;
225
-
226
- i += 3;
227
- } else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
228
- // 3bit, total 3bit
229
- tmp = (uint8_t)(str[i]) & 0x07;
230
-
231
- // 6bit, total 9bit
232
- tmp <<= 6;
233
- tmp |= (uint8_t)(str[i+1]) & 0x3f;
234
-
235
- // 6bit, total 15bit
236
- tmp <<= 6;
237
- tmp |= (uint8_t)(str[i+2]) & 0x3f;
238
-
239
- // 6bit, total 21bit
240
- tmp <<= 6;
241
- tmp |= (uint8_t)(str[i+3]) & 0x3f;
242
-
243
- i += 4;
244
- } else {
245
- return false;
246
- }
247
- vec.push_back(tmp);
248
- }
249
- return true;
250
- }
251
-
252
- template <class Uint32ContainerConIter>
253
- void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
254
- res.clear();
255
- uint32_t ui;
256
- while(begin != end) {
257
- ui = *begin;
258
- if(ui <= 0x7f) {
259
- res += char(ui);
260
- } else if(ui <= 0x7ff) {
261
- res += char(((ui >> 6) & 0x1f) | 0xc0);
262
- res += char((ui & 0x3f) | 0x80);
263
- } else if(ui <= 0xffff) {
264
- res += char(((ui >> 12) & 0x0f) | 0xe0);
265
- res += char(((ui >> 6) & 0x3f) | 0x80);
266
- res += char((ui & 0x3f) | 0x80);
267
- } else {
268
- res += char(((ui >> 18) & 0x03) | 0xf0);
269
- res += char(((ui >> 12) & 0x3f) | 0x80);
270
- res += char(((ui >> 6) & 0x3f) | 0x80);
271
- res += char((ui & 0x3f) | 0x80);
272
- }
273
- begin ++;
274
- }
275
- }
276
-
277
- template <class Uint16ContainerConIter>
278
- void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
279
- res.clear();
280
- uint16_t ui;
281
- while(begin != end) {
282
- ui = *begin;
283
- if(ui <= 0x7f) {
284
- res += char(ui);
285
- } else if(ui <= 0x7ff) {
286
- res += char(((ui>>6) & 0x1f) | 0xc0);
287
- res += char((ui & 0x3f) | 0x80);
288
- } else {
289
- res += char(((ui >> 12) & 0x0f )| 0xe0);
290
- res += char(((ui>>6) & 0x3f )| 0x80 );
291
- res += char((ui & 0x3f) | 0x80);
292
- }
293
- begin ++;
294
- }
295
- }
296
-
297
-
298
- template <class Uint16Container>
299
- bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
300
- vec.clear();
301
- if(!str) {
302
- return true;
303
- }
304
- size_t i = 0;
305
- while(i < len) {
306
- if(0 == (str[i] & 0x80)) {
307
- vec.push_back(uint16_t(str[i]));
308
- i++;
309
- } else {
310
- if(i + 1 < len) { //&& (str[i+1] & 0x80))
311
- uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
312
- vec.push_back(tmp);
313
- i += 2;
314
- } else {
315
- return false;
316
- }
317
- }
318
- }
319
- return true;
320
- }
321
-
322
- template <class Uint16Container>
323
- bool GBKTrans(const string& str, Uint16Container& vec) {
324
- return GBKTrans(str.c_str(), str.size(), vec);
325
- }
326
-
327
- template <class Uint16ContainerConIter>
328
- void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
329
- res.clear();
330
- //pair<char, char> pa;
331
- char first, second;
332
- while(begin != end) {
333
- //pa = uint16ToChar2(*begin);
334
- first = ((*begin)>>8) & 0x00ff;
335
- second = (*begin) & 0x00ff;
336
- if(first & 0x80) {
337
- res += first;
338
- res += second;
339
- } else {
340
- res += second;
341
- }
342
- begin++;
343
- }
344
- }
345
-
346
- /*
347
- * format example: "%Y-%m-%d %H:%M:%S"
348
- */
349
- inline void GetTime(const string& format, string& timeStr) {
350
- time_t timeNow;
351
- time(&timeNow);
352
- timeStr.resize(64);
353
- size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
354
- timeStr.resize(len);
355
- }
356
-
357
- inline string PathJoin(const string& path1, const string& path2) {
358
- if(EndsWith(path1, "/")) {
359
- return path1 + path2;
360
- }
361
- return path1 + "/" + path2;
362
- }
363
-
364
- }
365
- #endif