cppjieba_rb 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (130) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +2 -2
  5. data/cppjieba_rb.gemspec +4 -4
  6. data/lib/cppjieba_rb/version.rb +1 -1
  7. metadata +17 -135
  8. data/ext/cppjieba/.gitignore +0 -17
  9. data/ext/cppjieba/.travis.yml +0 -21
  10. data/ext/cppjieba/CMakeLists.txt +0 -28
  11. data/ext/cppjieba/ChangeLog.md +0 -236
  12. data/ext/cppjieba/README.md +0 -292
  13. data/ext/cppjieba/README_EN.md +0 -113
  14. data/ext/cppjieba/appveyor.yml +0 -32
  15. data/ext/cppjieba/deps/CMakeLists.txt +0 -1
  16. data/ext/cppjieba/deps/gtest/CMakeLists.txt +0 -5
  17. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +0 -283
  18. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +0 -230
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +0 -1421
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +0 -487
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +0 -796
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +0 -232
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +0 -176
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +0 -259
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +0 -2155
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +0 -358
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +0 -58
  28. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +0 -308
  29. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +0 -210
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +0 -1226
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +0 -233
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +0 -4822
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +0 -301
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +0 -619
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +0 -1788
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +0 -350
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +0 -968
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +0 -336
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +0 -3330
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +0 -296
  41. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  42. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +0 -681
  43. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +0 -509
  44. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  45. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +0 -48
  46. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +0 -1234
  47. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +0 -380
  48. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +0 -1038
  49. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +0 -746
  50. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +0 -356
  51. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +0 -110
  52. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +0 -110
  53. data/ext/cppjieba/deps/gtest/src/gtest.cc +0 -4898
  54. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +0 -39
  55. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +0 -70
  56. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +0 -49
  57. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +0 -67
  58. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +0 -65
  59. data/ext/cppjieba/deps/limonp/Closure.hpp +0 -206
  60. data/ext/cppjieba/deps/limonp/Colors.hpp +0 -31
  61. data/ext/cppjieba/deps/limonp/Condition.hpp +0 -38
  62. data/ext/cppjieba/deps/limonp/Config.hpp +0 -103
  63. data/ext/cppjieba/deps/limonp/FileLock.hpp +0 -74
  64. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +0 -7
  65. data/ext/cppjieba/deps/limonp/LocalVector.hpp +0 -139
  66. data/ext/cppjieba/deps/limonp/Logging.hpp +0 -76
  67. data/ext/cppjieba/deps/limonp/Md5.hpp +0 -411
  68. data/ext/cppjieba/deps/limonp/MutexLock.hpp +0 -51
  69. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +0 -21
  70. data/ext/cppjieba/deps/limonp/StdExtension.hpp +0 -159
  71. data/ext/cppjieba/deps/limonp/StringUtil.hpp +0 -365
  72. data/ext/cppjieba/deps/limonp/Thread.hpp +0 -44
  73. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +0 -86
  74. data/ext/cppjieba/dict/README.md +0 -31
  75. data/ext/cppjieba/dict/hmm_model.utf8 +0 -34
  76. data/ext/cppjieba/dict/idf.utf8 +0 -258826
  77. data/ext/cppjieba/dict/jieba.dict.utf8 +0 -348982
  78. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +0 -6653
  79. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +0 -166
  80. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +0 -259
  81. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +0 -5222
  82. data/ext/cppjieba/dict/stop_words.utf8 +0 -1534
  83. data/ext/cppjieba/dict/user.dict.utf8 +0 -4
  84. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +0 -277
  85. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +0 -93
  86. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +0 -129
  87. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +0 -190
  88. data/ext/cppjieba/include/cppjieba/Jieba.hpp +0 -130
  89. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +0 -153
  90. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +0 -137
  91. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +0 -109
  92. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +0 -77
  93. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +0 -54
  94. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +0 -90
  95. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +0 -46
  96. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +0 -23
  97. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +0 -190
  98. data/ext/cppjieba/include/cppjieba/Trie.hpp +0 -174
  99. data/ext/cppjieba/include/cppjieba/Unicode.hpp +0 -227
  100. data/ext/cppjieba/test/CMakeLists.txt +0 -5
  101. data/ext/cppjieba/test/demo.cpp +0 -80
  102. data/ext/cppjieba/test/load_test.cpp +0 -54
  103. data/ext/cppjieba/test/testdata/curl.res +0 -1
  104. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +0 -109750
  105. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +0 -34
  106. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +0 -348982
  107. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +0 -93
  108. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +0 -93
  109. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +0 -67
  110. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +0 -64
  111. data/ext/cppjieba/test/testdata/load_test.urls +0 -2
  112. data/ext/cppjieba/test/testdata/review.100 +0 -100
  113. data/ext/cppjieba/test/testdata/review.100.res +0 -200
  114. data/ext/cppjieba/test/testdata/server.conf +0 -19
  115. data/ext/cppjieba/test/testdata/testlines.gbk +0 -9
  116. data/ext/cppjieba/test/testdata/testlines.utf8 +0 -8
  117. data/ext/cppjieba/test/testdata/userdict.2.utf8 +0 -1
  118. data/ext/cppjieba/test/testdata/userdict.english +0 -2
  119. data/ext/cppjieba/test/testdata/userdict.utf8 +0 -8
  120. data/ext/cppjieba/test/testdata/weicheng.utf8 +0 -247
  121. data/ext/cppjieba/test/unittest/CMakeLists.txt +0 -24
  122. data/ext/cppjieba/test/unittest/gtest_main.cpp +0 -39
  123. data/ext/cppjieba/test/unittest/jieba_test.cpp +0 -133
  124. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +0 -79
  125. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +0 -41
  126. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +0 -43
  127. data/ext/cppjieba/test/unittest/segments_test.cpp +0 -256
  128. data/ext/cppjieba/test/unittest/textrank_test.cpp +0 -86
  129. data/ext/cppjieba/test/unittest/trie_test.cpp +0 -177
  130. data/ext/cppjieba/test/unittest/unicode_test.cpp +0 -43
@@ -1,21 +0,0 @@
1
- /************************************
2
- ************************************/
3
- #ifndef LIMONP_NONCOPYABLE_H
4
- #define LIMONP_NONCOPYABLE_H
5
-
6
- namespace limonp {
7
-
8
- class NonCopyable {
9
- protected:
10
- NonCopyable() {
11
- }
12
- ~NonCopyable() {
13
- }
14
- private:
15
- NonCopyable(const NonCopyable& );
16
- const NonCopyable& operator=(const NonCopyable& );
17
- }; // class NonCopyable
18
-
19
- } // namespace limonp
20
-
21
- #endif // LIMONP_NONCOPYABLE_H
@@ -1,159 +0,0 @@
1
- #ifndef LIMONP_STD_EXTEMSION_HPP
2
- #define LIMONP_STD_EXTEMSION_HPP
3
-
4
- #include <map>
5
-
6
- #ifdef __APPLE__
7
- #include <unordered_map>
8
- #include <unordered_set>
9
- #elif(__cplusplus == 201103L)
10
- #include <unordered_map>
11
- #include <unordered_set>
12
- #elif defined _MSC_VER
13
- #include <unordered_map>
14
- #include <unordered_set>
15
- #else
16
- #include <tr1/unordered_map>
17
- #include <tr1/unordered_set>
18
- namespace std {
19
- using std::tr1::unordered_map;
20
- using std::tr1::unordered_set;
21
- }
22
-
23
- #endif
24
-
25
- #include <set>
26
- #include <string>
27
- #include <vector>
28
- #include <deque>
29
- #include <fstream>
30
- #include <sstream>
31
-
32
- #define print(x) std::cout << x << std::endl
33
-
34
- namespace std {
35
-
36
- template<typename T>
37
- ostream& operator << (ostream& os, const vector<T>& v) {
38
- if(v.empty()) {
39
- return os << "[]";
40
- }
41
- os<<"["<<v[0];
42
- for(size_t i = 1; i < v.size(); i++) {
43
- os<<", "<<v[i];
44
- }
45
- os<<"]";
46
- return os;
47
- }
48
-
49
- template<>
50
- inline ostream& operator << (ostream& os, const vector<string>& v) {
51
- if(v.empty()) {
52
- return os << "[]";
53
- }
54
- os<<"[\""<<v[0];
55
- for(size_t i = 1; i < v.size(); i++) {
56
- os<<"\", \""<<v[i];
57
- }
58
- os<<"\"]";
59
- return os;
60
- }
61
-
62
- template<typename T>
63
- ostream& operator << (ostream& os, const deque<T>& dq) {
64
- if(dq.empty()) {
65
- return os << "[]";
66
- }
67
- os<<"[\""<<dq[0];
68
- for(size_t i = 1; i < dq.size(); i++) {
69
- os<<"\", \""<<dq[i];
70
- }
71
- os<<"\"]";
72
- return os;
73
- }
74
-
75
-
76
- template<class T1, class T2>
77
- ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
78
- os << pr.first << ":" << pr.second ;
79
- return os;
80
- }
81
-
82
-
83
- template<class T>
84
- string& operator << (string& str, const T& obj) {
85
- stringstream ss;
86
- ss << obj; // call ostream& operator << (ostream& os,
87
- return str = ss.str();
88
- }
89
-
90
- template<class T1, class T2>
91
- ostream& operator << (ostream& os, const map<T1, T2>& mp) {
92
- if(mp.empty()) {
93
- os<<"{}";
94
- return os;
95
- }
96
- os<<'{';
97
- typename map<T1, T2>::const_iterator it = mp.begin();
98
- os<<*it;
99
- it++;
100
- while(it != mp.end()) {
101
- os<<", "<<*it;
102
- it++;
103
- }
104
- os<<'}';
105
- return os;
106
- }
107
- template<class T1, class T2>
108
- ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
109
- if(mp.empty()) {
110
- return os << "{}";
111
- }
112
- os<<'{';
113
- typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
114
- os<<*it;
115
- it++;
116
- while(it != mp.end()) {
117
- os<<", "<<*it++;
118
- }
119
- return os<<'}';
120
- }
121
-
122
- template<class T>
123
- ostream& operator << (ostream& os, const set<T>& st) {
124
- if(st.empty()) {
125
- os << "{}";
126
- return os;
127
- }
128
- os<<'{';
129
- typename set<T>::const_iterator it = st.begin();
130
- os<<*it;
131
- it++;
132
- while(it != st.end()) {
133
- os<<", "<<*it;
134
- it++;
135
- }
136
- os<<'}';
137
- return os;
138
- }
139
-
140
- template<class KeyType, class ContainType>
141
- bool IsIn(const ContainType& contain, const KeyType& key) {
142
- return contain.end() != contain.find(key);
143
- }
144
-
145
- template<class T>
146
- basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
147
- return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
148
- }
149
-
150
- template<class T>
151
- ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
152
- ostreambuf_iterator<T> itr (ofs);
153
- copy(s.begin(), s.end(), itr);
154
- return ofs;
155
- }
156
-
157
- } // namespace std
158
-
159
- #endif
@@ -1,365 +0,0 @@
1
- /************************************
2
- * file enc : ascii
3
- * author : wuyanyi09@gmail.com
4
- ************************************/
5
- #ifndef LIMONP_STR_FUNCTS_H
6
- #define LIMONP_STR_FUNCTS_H
7
- #include <fstream>
8
- #include <iostream>
9
- #include <string>
10
- #include <vector>
11
- #include <algorithm>
12
- #include <cctype>
13
- #include <map>
14
- #include <stdint.h>
15
- #include <stdio.h>
16
- #include <stdarg.h>
17
- #include <memory.h>
18
- #include <functional>
19
- #include <locale>
20
- #include <sstream>
21
- #include <sys/types.h>
22
- #include <iterator>
23
- #include <algorithm>
24
- #include "StdExtension.hpp"
25
-
26
- namespace limonp {
27
- using namespace std;
28
- inline string StringFormat(const char* fmt, ...) {
29
- int size = 256;
30
- std::string str;
31
- va_list ap;
32
- while (1) {
33
- str.resize(size);
34
- va_start(ap, fmt);
35
- int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
36
- va_end(ap);
37
- if (n > -1 && n < size) {
38
- str.resize(n);
39
- return str;
40
- }
41
- if (n > -1)
42
- size = n + 1;
43
- else
44
- size *= 2;
45
- }
46
- return str;
47
- }
48
-
49
- template<class T>
50
- void Join(T begin, T end, string& res, const string& connector) {
51
- if(begin == end) {
52
- return;
53
- }
54
- stringstream ss;
55
- ss<<*begin;
56
- begin++;
57
- while(begin != end) {
58
- ss << connector << *begin;
59
- begin ++;
60
- }
61
- res = ss.str();
62
- }
63
-
64
- template<class T>
65
- string Join(T begin, T end, const string& connector) {
66
- string res;
67
- Join(begin ,end, res, connector);
68
- return res;
69
- }
70
-
71
- inline string& Upper(string& str) {
72
- transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
73
- return str;
74
- }
75
-
76
- inline string& Lower(string& str) {
77
- transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
78
- return str;
79
- }
80
-
81
- inline bool IsSpace(unsigned c) {
82
- // when passing large int as the argument of isspace, it core dump, so here need a type cast.
83
- return c > 0xff ? false : std::isspace(c & 0xff);
84
- }
85
-
86
- inline std::string& LTrim(std::string &s) {
87
- s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
88
- return s;
89
- }
90
-
91
- inline std::string& RTrim(std::string &s) {
92
- s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
93
- return s;
94
- }
95
-
96
- inline std::string& Trim(std::string &s) {
97
- return LTrim(RTrim(s));
98
- }
99
-
100
- inline std::string& LTrim(std::string & s, char x) {
101
- s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
102
- return s;
103
- }
104
-
105
- inline std::string& RTrim(std::string & s, char x) {
106
- s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
107
- return s;
108
- }
109
-
110
- inline std::string& Trim(std::string &s, char x) {
111
- return LTrim(RTrim(s, x), x);
112
- }
113
-
114
- inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
115
- res.clear();
116
- size_t Start = 0;
117
- size_t end = 0;
118
- string sub;
119
- while(Start < src.size()) {
120
- end = src.find_first_of(pattern, Start);
121
- if(string::npos == end || res.size() >= maxsplit) {
122
- sub = src.substr(Start);
123
- res.push_back(sub);
124
- return;
125
- }
126
- sub = src.substr(Start, end - Start);
127
- res.push_back(sub);
128
- Start = end + 1;
129
- }
130
- return;
131
- }
132
-
133
- inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
134
- vector<string> res;
135
- Split(src, res, pattern, maxsplit);
136
- return res;
137
- }
138
-
139
- inline bool StartsWith(const string& str, const string& prefix) {
140
- if(prefix.length() > str.length()) {
141
- return false;
142
- }
143
- return 0 == str.compare(0, prefix.length(), prefix);
144
- }
145
-
146
- inline bool EndsWith(const string& str, const string& suffix) {
147
- if(suffix.length() > str.length()) {
148
- return false;
149
- }
150
- return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
151
- }
152
-
153
- inline bool IsInStr(const string& str, char ch) {
154
- return str.find(ch) != string::npos;
155
- }
156
-
157
- inline uint16_t TwocharToUint16(char high, char low) {
158
- return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
159
- }
160
-
161
- template <class Uint16Container>
162
- bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
163
- if(!str) {
164
- return false;
165
- }
166
- char ch1, ch2;
167
- uint16_t tmp;
168
- vec.clear();
169
- for(size_t i = 0; i < len;) {
170
- if(!(str[i] & 0x80)) { // 0xxxxxxx
171
- vec.push_back(str[i]);
172
- i++;
173
- } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
174
- ch1 = (str[i] >> 2) & 0x07;
175
- ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
176
- tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
177
- vec.push_back(tmp);
178
- i += 2;
179
- } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
180
- ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
181
- ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
182
- tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
183
- vec.push_back(tmp);
184
- i += 3;
185
- } else {
186
- return false;
187
- }
188
- }
189
- return true;
190
- }
191
-
192
- template <class Uint16Container>
193
- bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
194
- return Utf8ToUnicode(str.c_str(), str.size(), vec);
195
- }
196
-
197
- template <class Uint32Container>
198
- bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
199
- uint32_t tmp;
200
- vec.clear();
201
- for(size_t i = 0; i < str.size();) {
202
- if(!(str[i] & 0x80)) { // 0xxxxxxx
203
- // 7bit, total 7bit
204
- tmp = (uint8_t)(str[i]) & 0x7f;
205
- i++;
206
- } else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
207
- // 5bit, total 5bit
208
- tmp = (uint8_t)(str[i]) & 0x1f;
209
-
210
- // 6bit, total 11bit
211
- tmp <<= 6;
212
- tmp |= (uint8_t)(str[i+1]) & 0x3f;
213
- i += 2;
214
- } else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
215
- // 4bit, total 4bit
216
- tmp = (uint8_t)(str[i]) & 0x0f;
217
-
218
- // 6bit, total 10bit
219
- tmp <<= 6;
220
- tmp |= (uint8_t)(str[i+1]) & 0x3f;
221
-
222
- // 6bit, total 16bit
223
- tmp <<= 6;
224
- tmp |= (uint8_t)(str[i+2]) & 0x3f;
225
-
226
- i += 3;
227
- } else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
228
- // 3bit, total 3bit
229
- tmp = (uint8_t)(str[i]) & 0x07;
230
-
231
- // 6bit, total 9bit
232
- tmp <<= 6;
233
- tmp |= (uint8_t)(str[i+1]) & 0x3f;
234
-
235
- // 6bit, total 15bit
236
- tmp <<= 6;
237
- tmp |= (uint8_t)(str[i+2]) & 0x3f;
238
-
239
- // 6bit, total 21bit
240
- tmp <<= 6;
241
- tmp |= (uint8_t)(str[i+3]) & 0x3f;
242
-
243
- i += 4;
244
- } else {
245
- return false;
246
- }
247
- vec.push_back(tmp);
248
- }
249
- return true;
250
- }
251
-
252
- template <class Uint32ContainerConIter>
253
- void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
254
- res.clear();
255
- uint32_t ui;
256
- while(begin != end) {
257
- ui = *begin;
258
- if(ui <= 0x7f) {
259
- res += char(ui);
260
- } else if(ui <= 0x7ff) {
261
- res += char(((ui >> 6) & 0x1f) | 0xc0);
262
- res += char((ui & 0x3f) | 0x80);
263
- } else if(ui <= 0xffff) {
264
- res += char(((ui >> 12) & 0x0f) | 0xe0);
265
- res += char(((ui >> 6) & 0x3f) | 0x80);
266
- res += char((ui & 0x3f) | 0x80);
267
- } else {
268
- res += char(((ui >> 18) & 0x03) | 0xf0);
269
- res += char(((ui >> 12) & 0x3f) | 0x80);
270
- res += char(((ui >> 6) & 0x3f) | 0x80);
271
- res += char((ui & 0x3f) | 0x80);
272
- }
273
- begin ++;
274
- }
275
- }
276
-
277
- template <class Uint16ContainerConIter>
278
- void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
279
- res.clear();
280
- uint16_t ui;
281
- while(begin != end) {
282
- ui = *begin;
283
- if(ui <= 0x7f) {
284
- res += char(ui);
285
- } else if(ui <= 0x7ff) {
286
- res += char(((ui>>6) & 0x1f) | 0xc0);
287
- res += char((ui & 0x3f) | 0x80);
288
- } else {
289
- res += char(((ui >> 12) & 0x0f )| 0xe0);
290
- res += char(((ui>>6) & 0x3f )| 0x80 );
291
- res += char((ui & 0x3f) | 0x80);
292
- }
293
- begin ++;
294
- }
295
- }
296
-
297
-
298
- template <class Uint16Container>
299
- bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
300
- vec.clear();
301
- if(!str) {
302
- return true;
303
- }
304
- size_t i = 0;
305
- while(i < len) {
306
- if(0 == (str[i] & 0x80)) {
307
- vec.push_back(uint16_t(str[i]));
308
- i++;
309
- } else {
310
- if(i + 1 < len) { //&& (str[i+1] & 0x80))
311
- uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
312
- vec.push_back(tmp);
313
- i += 2;
314
- } else {
315
- return false;
316
- }
317
- }
318
- }
319
- return true;
320
- }
321
-
322
- template <class Uint16Container>
323
- bool GBKTrans(const string& str, Uint16Container& vec) {
324
- return GBKTrans(str.c_str(), str.size(), vec);
325
- }
326
-
327
- template <class Uint16ContainerConIter>
328
- void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
329
- res.clear();
330
- //pair<char, char> pa;
331
- char first, second;
332
- while(begin != end) {
333
- //pa = uint16ToChar2(*begin);
334
- first = ((*begin)>>8) & 0x00ff;
335
- second = (*begin) & 0x00ff;
336
- if(first & 0x80) {
337
- res += first;
338
- res += second;
339
- } else {
340
- res += second;
341
- }
342
- begin++;
343
- }
344
- }
345
-
346
- /*
347
- * format example: "%Y-%m-%d %H:%M:%S"
348
- */
349
- inline void GetTime(const string& format, string& timeStr) {
350
- time_t timeNow;
351
- time(&timeNow);
352
- timeStr.resize(64);
353
- size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
354
- timeStr.resize(len);
355
- }
356
-
357
- inline string PathJoin(const string& path1, const string& path2) {
358
- if(EndsWith(path1, "/")) {
359
- return path1 + path2;
360
- }
361
- return path1 + "/" + path2;
362
- }
363
-
364
- }
365
- #endif