cppjieba_rb 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +81 -0
  8. data/Rakefile +20 -0
  9. data/cppjieba_rb.gemspec +50 -0
  10. data/ext/cppjieba/.gitignore +17 -0
  11. data/ext/cppjieba/.travis.yml +22 -0
  12. data/ext/cppjieba/CMakeLists.txt +28 -0
  13. data/ext/cppjieba/ChangeLog.md +236 -0
  14. data/ext/cppjieba/README.md +285 -0
  15. data/ext/cppjieba/README_EN.md +111 -0
  16. data/ext/cppjieba/appveyor.yml +32 -0
  17. data/ext/cppjieba/deps/CMakeLists.txt +1 -0
  18. data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
  28. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
  29. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
  41. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
  42. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
  43. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  44. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
  45. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
  46. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  47. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
  48. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
  49. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
  50. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
  51. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
  52. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
  53. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
  54. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
  55. data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
  56. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
  57. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
  58. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
  59. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
  60. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
  61. data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
  62. data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
  63. data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
  64. data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
  65. data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
  66. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
  67. data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
  68. data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
  69. data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
  70. data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
  71. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
  72. data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
  73. data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
  74. data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
  75. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
  76. data/ext/cppjieba/dict/README.md +31 -0
  77. data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
  78. data/ext/cppjieba/dict/idf.utf8 +258826 -0
  79. data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
  80. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  81. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  82. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  83. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  84. data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
  85. data/ext/cppjieba/dict/user.dict.utf8 +4 -0
  86. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
  87. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  88. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  89. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  90. data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
  91. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  92. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  93. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  94. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  95. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  96. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
  97. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
  98. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
  99. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  100. data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
  101. data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
  102. data/ext/cppjieba/test/CMakeLists.txt +5 -0
  103. data/ext/cppjieba/test/demo.cpp +80 -0
  104. data/ext/cppjieba/test/load_test.cpp +54 -0
  105. data/ext/cppjieba/test/testdata/curl.res +1 -0
  106. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
  107. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
  108. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
  109. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
  110. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
  111. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
  112. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
  113. data/ext/cppjieba/test/testdata/load_test.urls +2 -0
  114. data/ext/cppjieba/test/testdata/review.100 +100 -0
  115. data/ext/cppjieba/test/testdata/review.100.res +200 -0
  116. data/ext/cppjieba/test/testdata/server.conf +19 -0
  117. data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
  118. data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
  119. data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
  120. data/ext/cppjieba/test/testdata/userdict.english +2 -0
  121. data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
  122. data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
  123. data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
  124. data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
  125. data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
  126. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
  127. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
  128. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
  129. data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
  130. data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
  131. data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
  132. data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
  133. data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
  134. data/ext/cppjieba_rb/extconf.rb +26 -0
  135. data/ext/cppjieba_rb/internal.cc +148 -0
  136. data/lib/cppjieba_rb/segment.rb +20 -0
  137. data/lib/cppjieba_rb/version.rb +3 -0
  138. data/lib/cppjieba_rb.rb +34 -0
  139. data/test/test_keyword.rb +17 -0
  140. data/test/test_segment.rb +24 -0
  141. data/test/test_tagging.rb +19 -0
  142. metadata +244 -0
@@ -0,0 +1,51 @@
1
+ #ifndef LIMONP_MUTEX_LOCK_HPP
2
+ #define LIMONP_MUTEX_LOCK_HPP
3
+
4
+ #include <pthread.h>
5
+ #include "NonCopyable.hpp"
6
+ #include "Logging.hpp"
7
+
8
+ namespace limonp {
9
+
10
+ class MutexLock: NonCopyable {
11
+ public:
12
+ MutexLock() {
13
+ XCHECK(!pthread_mutex_init(&mutex_, NULL));
14
+ }
15
+ ~MutexLock() {
16
+ XCHECK(!pthread_mutex_destroy(&mutex_));
17
+ }
18
+ pthread_mutex_t* GetPthreadMutex() {
19
+ return &mutex_;
20
+ }
21
+
22
+ private:
23
+ void Lock() {
24
+ XCHECK(!pthread_mutex_lock(&mutex_));
25
+ }
26
+ void Unlock() {
27
+ XCHECK(!pthread_mutex_unlock(&mutex_));
28
+ }
29
+ friend class MutexLockGuard;
30
+
31
+ pthread_mutex_t mutex_;
32
+ }; // class MutexLock
33
+
34
+ class MutexLockGuard: NonCopyable {
35
+ public:
36
+ explicit MutexLockGuard(MutexLock & mutex)
37
+ : mutex_(mutex) {
38
+ mutex_.Lock();
39
+ }
40
+ ~MutexLockGuard() {
41
+ mutex_.Unlock();
42
+ }
43
+ private:
44
+ MutexLock & mutex_;
45
+ }; // class MutexLockGuard
46
+
47
+ #define MutexLockGuard(x) XCHECK(false);
48
+
49
+ } // namespace limonp
50
+
51
+ #endif // LIMONP_MUTEX_LOCK_HPP
@@ -0,0 +1,21 @@
1
+ /************************************
2
+ ************************************/
3
+ #ifndef LIMONP_NONCOPYABLE_H
4
+ #define LIMONP_NONCOPYABLE_H
5
+
6
+ namespace limonp {
7
+
8
+ class NonCopyable {
9
+ protected:
10
+ NonCopyable() {
11
+ }
12
+ ~NonCopyable() {
13
+ }
14
+ private:
15
+ NonCopyable(const NonCopyable& );
16
+ const NonCopyable& operator=(const NonCopyable& );
17
+ }; // class NonCopyable
18
+
19
+ } // namespace limonp
20
+
21
+ #endif // LIMONP_NONCOPYABLE_H
@@ -0,0 +1,159 @@
1
+ #ifndef LIMONP_STD_EXTEMSION_HPP
2
+ #define LIMONP_STD_EXTEMSION_HPP
3
+
4
+ #include <map>
5
+
6
+ #ifdef __APPLE__
7
+ #include <unordered_map>
8
+ #include <unordered_set>
9
+ #elif(__cplusplus == 201103L)
10
+ #include <unordered_map>
11
+ #include <unordered_set>
12
+ #elif defined _MSC_VER
13
+ #include <unordered_map>
14
+ #include <unordered_set>
15
+ #else
16
+ #include <tr1/unordered_map>
17
+ #include <tr1/unordered_set>
18
+ namespace std {
19
+ using std::tr1::unordered_map;
20
+ using std::tr1::unordered_set;
21
+ }
22
+
23
+ #endif
24
+
25
+ #include <set>
26
+ #include <string>
27
+ #include <vector>
28
+ #include <deque>
29
+ #include <fstream>
30
+ #include <sstream>
31
+
32
+ #define print(x) std::cout << x << std::endl
33
+
34
+ namespace std {
35
+
36
+ template<typename T>
37
+ ostream& operator << (ostream& os, const vector<T>& v) {
38
+ if(v.empty()) {
39
+ return os << "[]";
40
+ }
41
+ os<<"["<<v[0];
42
+ for(size_t i = 1; i < v.size(); i++) {
43
+ os<<", "<<v[i];
44
+ }
45
+ os<<"]";
46
+ return os;
47
+ }
48
+
49
+ template<>
50
+ inline ostream& operator << (ostream& os, const vector<string>& v) {
51
+ if(v.empty()) {
52
+ return os << "[]";
53
+ }
54
+ os<<"[\""<<v[0];
55
+ for(size_t i = 1; i < v.size(); i++) {
56
+ os<<"\", \""<<v[i];
57
+ }
58
+ os<<"\"]";
59
+ return os;
60
+ }
61
+
62
+ template<typename T>
63
+ ostream& operator << (ostream& os, const deque<T>& dq) {
64
+ if(dq.empty()) {
65
+ return os << "[]";
66
+ }
67
+ os<<"[\""<<dq[0];
68
+ for(size_t i = 1; i < dq.size(); i++) {
69
+ os<<"\", \""<<dq[i];
70
+ }
71
+ os<<"\"]";
72
+ return os;
73
+ }
74
+
75
+
76
+ template<class T1, class T2>
77
+ ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
78
+ os << pr.first << ":" << pr.second ;
79
+ return os;
80
+ }
81
+
82
+
83
+ template<class T>
84
+ string& operator << (string& str, const T& obj) {
85
+ stringstream ss;
86
+ ss << obj; // call ostream& operator << (ostream& os,
87
+ return str = ss.str();
88
+ }
89
+
90
+ template<class T1, class T2>
91
+ ostream& operator << (ostream& os, const map<T1, T2>& mp) {
92
+ if(mp.empty()) {
93
+ os<<"{}";
94
+ return os;
95
+ }
96
+ os<<'{';
97
+ typename map<T1, T2>::const_iterator it = mp.begin();
98
+ os<<*it;
99
+ it++;
100
+ while(it != mp.end()) {
101
+ os<<", "<<*it;
102
+ it++;
103
+ }
104
+ os<<'}';
105
+ return os;
106
+ }
107
+ template<class T1, class T2>
108
+ ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
109
+ if(mp.empty()) {
110
+ return os << "{}";
111
+ }
112
+ os<<'{';
113
+ typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
114
+ os<<*it;
115
+ it++;
116
+ while(it != mp.end()) {
117
+ os<<", "<<*it++;
118
+ }
119
+ return os<<'}';
120
+ }
121
+
122
+ template<class T>
123
+ ostream& operator << (ostream& os, const set<T>& st) {
124
+ if(st.empty()) {
125
+ os << "{}";
126
+ return os;
127
+ }
128
+ os<<'{';
129
+ typename set<T>::const_iterator it = st.begin();
130
+ os<<*it;
131
+ it++;
132
+ while(it != st.end()) {
133
+ os<<", "<<*it;
134
+ it++;
135
+ }
136
+ os<<'}';
137
+ return os;
138
+ }
139
+
140
+ template<class KeyType, class ContainType>
141
+ bool IsIn(const ContainType& contain, const KeyType& key) {
142
+ return contain.end() != contain.find(key);
143
+ }
144
+
145
+ template<class T>
146
+ basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
147
+ return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
148
+ }
149
+
150
+ template<class T>
151
+ ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
152
+ ostreambuf_iterator<T> itr (ofs);
153
+ copy(s.begin(), s.end(), itr);
154
+ return ofs;
155
+ }
156
+
157
+ } // namespace std
158
+
159
+ #endif
@@ -0,0 +1,365 @@
1
+ /************************************
2
+ * file enc : ascii
3
+ * author : wuyanyi09@gmail.com
4
+ ************************************/
5
+ #ifndef LIMONP_STR_FUNCTS_H
6
+ #define LIMONP_STR_FUNCTS_H
7
+ #include <fstream>
8
+ #include <iostream>
9
+ #include <string>
10
+ #include <vector>
11
+ #include <algorithm>
12
+ #include <cctype>
13
+ #include <map>
14
+ #include <stdint.h>
15
+ #include <stdio.h>
16
+ #include <stdarg.h>
17
+ #include <memory.h>
18
+ #include <functional>
19
+ #include <locale>
20
+ #include <sstream>
21
+ #include <sys/types.h>
22
+ #include <iterator>
23
+ #include <algorithm>
24
+ #include "StdExtension.hpp"
25
+
26
+ namespace limonp {
27
+ using namespace std;
28
+ inline string StringFormat(const char* fmt, ...) {
29
+ int size = 256;
30
+ std::string str;
31
+ va_list ap;
32
+ while (1) {
33
+ str.resize(size);
34
+ va_start(ap, fmt);
35
+ int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
36
+ va_end(ap);
37
+ if (n > -1 && n < size) {
38
+ str.resize(n);
39
+ return str;
40
+ }
41
+ if (n > -1)
42
+ size = n + 1;
43
+ else
44
+ size *= 2;
45
+ }
46
+ return str;
47
+ }
48
+
49
+ template<class T>
50
+ void Join(T begin, T end, string& res, const string& connector) {
51
+ if(begin == end) {
52
+ return;
53
+ }
54
+ stringstream ss;
55
+ ss<<*begin;
56
+ begin++;
57
+ while(begin != end) {
58
+ ss << connector << *begin;
59
+ begin ++;
60
+ }
61
+ res = ss.str();
62
+ }
63
+
64
+ template<class T>
65
+ string Join(T begin, T end, const string& connector) {
66
+ string res;
67
+ Join(begin ,end, res, connector);
68
+ return res;
69
+ }
70
+
71
+ inline string& Upper(string& str) {
72
+ transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
73
+ return str;
74
+ }
75
+
76
+ inline string& Lower(string& str) {
77
+ transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
78
+ return str;
79
+ }
80
+
81
+ inline bool IsSpace(unsigned c) {
82
+ // when passing large int as the argument of isspace, it core dump, so here need a type cast.
83
+ return c > 0xff ? false : std::isspace(c & 0xff);
84
+ }
85
+
86
+ inline std::string& LTrim(std::string &s) {
87
+ s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
88
+ return s;
89
+ }
90
+
91
+ inline std::string& RTrim(std::string &s) {
92
+ s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
93
+ return s;
94
+ }
95
+
96
+ inline std::string& Trim(std::string &s) {
97
+ return LTrim(RTrim(s));
98
+ }
99
+
100
+ inline std::string& LTrim(std::string & s, char x) {
101
+ s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
102
+ return s;
103
+ }
104
+
105
+ inline std::string& RTrim(std::string & s, char x) {
106
+ s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
107
+ return s;
108
+ }
109
+
110
+ inline std::string& Trim(std::string &s, char x) {
111
+ return LTrim(RTrim(s, x), x);
112
+ }
113
+
114
+ inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
115
+ res.clear();
116
+ size_t Start = 0;
117
+ size_t end = 0;
118
+ string sub;
119
+ while(Start < src.size()) {
120
+ end = src.find_first_of(pattern, Start);
121
+ if(string::npos == end || res.size() >= maxsplit) {
122
+ sub = src.substr(Start);
123
+ res.push_back(sub);
124
+ return;
125
+ }
126
+ sub = src.substr(Start, end - Start);
127
+ res.push_back(sub);
128
+ Start = end + 1;
129
+ }
130
+ return;
131
+ }
132
+
133
+ inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
134
+ vector<string> res;
135
+ Split(src, res, pattern, maxsplit);
136
+ return res;
137
+ }
138
+
139
+ inline bool StartsWith(const string& str, const string& prefix) {
140
+ if(prefix.length() > str.length()) {
141
+ return false;
142
+ }
143
+ return 0 == str.compare(0, prefix.length(), prefix);
144
+ }
145
+
146
+ inline bool EndsWith(const string& str, const string& suffix) {
147
+ if(suffix.length() > str.length()) {
148
+ return false;
149
+ }
150
+ return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
151
+ }
152
+
153
+ inline bool IsInStr(const string& str, char ch) {
154
+ return str.find(ch) != string::npos;
155
+ }
156
+
157
+ inline uint16_t TwocharToUint16(char high, char low) {
158
+ return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
159
+ }
160
+
161
+ template <class Uint16Container>
162
+ bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
163
+ if(!str) {
164
+ return false;
165
+ }
166
+ char ch1, ch2;
167
+ uint16_t tmp;
168
+ vec.clear();
169
+ for(size_t i = 0; i < len;) {
170
+ if(!(str[i] & 0x80)) { // 0xxxxxxx
171
+ vec.push_back(str[i]);
172
+ i++;
173
+ } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
174
+ ch1 = (str[i] >> 2) & 0x07;
175
+ ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
176
+ tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
177
+ vec.push_back(tmp);
178
+ i += 2;
179
+ } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
180
+ ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
181
+ ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
182
+ tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
183
+ vec.push_back(tmp);
184
+ i += 3;
185
+ } else {
186
+ return false;
187
+ }
188
+ }
189
+ return true;
190
+ }
191
+
192
+ template <class Uint16Container>
193
+ bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
194
+ return Utf8ToUnicode(str.c_str(), str.size(), vec);
195
+ }
196
+
197
+ template <class Uint32Container>
198
+ bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
199
+ uint32_t tmp;
200
+ vec.clear();
201
+ for(size_t i = 0; i < str.size();) {
202
+ if(!(str[i] & 0x80)) { // 0xxxxxxx
203
+ // 7bit, total 7bit
204
+ tmp = (uint8_t)(str[i]) & 0x7f;
205
+ i++;
206
+ } else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
207
+ // 5bit, total 5bit
208
+ tmp = (uint8_t)(str[i]) & 0x1f;
209
+
210
+ // 6bit, total 11bit
211
+ tmp <<= 6;
212
+ tmp |= (uint8_t)(str[i+1]) & 0x3f;
213
+ i += 2;
214
+ } else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
215
+ // 4bit, total 4bit
216
+ tmp = (uint8_t)(str[i]) & 0x0f;
217
+
218
+ // 6bit, total 10bit
219
+ tmp <<= 6;
220
+ tmp |= (uint8_t)(str[i+1]) & 0x3f;
221
+
222
+ // 6bit, total 16bit
223
+ tmp <<= 6;
224
+ tmp |= (uint8_t)(str[i+2]) & 0x3f;
225
+
226
+ i += 3;
227
+ } else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
228
+ // 3bit, total 3bit
229
+ tmp = (uint8_t)(str[i]) & 0x07;
230
+
231
+ // 6bit, total 9bit
232
+ tmp <<= 6;
233
+ tmp |= (uint8_t)(str[i+1]) & 0x3f;
234
+
235
+ // 6bit, total 15bit
236
+ tmp <<= 6;
237
+ tmp |= (uint8_t)(str[i+2]) & 0x3f;
238
+
239
+ // 6bit, total 21bit
240
+ tmp <<= 6;
241
+ tmp |= (uint8_t)(str[i+3]) & 0x3f;
242
+
243
+ i += 4;
244
+ } else {
245
+ return false;
246
+ }
247
+ vec.push_back(tmp);
248
+ }
249
+ return true;
250
+ }
251
+
252
+ template <class Uint32ContainerConIter>
253
+ void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
254
+ res.clear();
255
+ uint32_t ui;
256
+ while(begin != end) {
257
+ ui = *begin;
258
+ if(ui <= 0x7f) {
259
+ res += char(ui);
260
+ } else if(ui <= 0x7ff) {
261
+ res += char(((ui >> 6) & 0x1f) | 0xc0);
262
+ res += char((ui & 0x3f) | 0x80);
263
+ } else if(ui <= 0xffff) {
264
+ res += char(((ui >> 12) & 0x0f) | 0xe0);
265
+ res += char(((ui >> 6) & 0x3f) | 0x80);
266
+ res += char((ui & 0x3f) | 0x80);
267
+ } else {
268
+ res += char(((ui >> 18) & 0x03) | 0xf0);
269
+ res += char(((ui >> 12) & 0x3f) | 0x80);
270
+ res += char(((ui >> 6) & 0x3f) | 0x80);
271
+ res += char((ui & 0x3f) | 0x80);
272
+ }
273
+ begin ++;
274
+ }
275
+ }
276
+
277
+ template <class Uint16ContainerConIter>
278
+ void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
279
+ res.clear();
280
+ uint16_t ui;
281
+ while(begin != end) {
282
+ ui = *begin;
283
+ if(ui <= 0x7f) {
284
+ res += char(ui);
285
+ } else if(ui <= 0x7ff) {
286
+ res += char(((ui>>6) & 0x1f) | 0xc0);
287
+ res += char((ui & 0x3f) | 0x80);
288
+ } else {
289
+ res += char(((ui >> 12) & 0x0f )| 0xe0);
290
+ res += char(((ui>>6) & 0x3f )| 0x80 );
291
+ res += char((ui & 0x3f) | 0x80);
292
+ }
293
+ begin ++;
294
+ }
295
+ }
296
+
297
+
298
+ template <class Uint16Container>
299
+ bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
300
+ vec.clear();
301
+ if(!str) {
302
+ return true;
303
+ }
304
+ size_t i = 0;
305
+ while(i < len) {
306
+ if(0 == (str[i] & 0x80)) {
307
+ vec.push_back(uint16_t(str[i]));
308
+ i++;
309
+ } else {
310
+ if(i + 1 < len) { //&& (str[i+1] & 0x80))
311
+ uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
312
+ vec.push_back(tmp);
313
+ i += 2;
314
+ } else {
315
+ return false;
316
+ }
317
+ }
318
+ }
319
+ return true;
320
+ }
321
+
322
+ template <class Uint16Container>
323
+ bool GBKTrans(const string& str, Uint16Container& vec) {
324
+ return GBKTrans(str.c_str(), str.size(), vec);
325
+ }
326
+
327
+ template <class Uint16ContainerConIter>
328
+ void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
329
+ res.clear();
330
+ //pair<char, char> pa;
331
+ char first, second;
332
+ while(begin != end) {
333
+ //pa = uint16ToChar2(*begin);
334
+ first = ((*begin)>>8) & 0x00ff;
335
+ second = (*begin) & 0x00ff;
336
+ if(first & 0x80) {
337
+ res += first;
338
+ res += second;
339
+ } else {
340
+ res += second;
341
+ }
342
+ begin++;
343
+ }
344
+ }
345
+
346
+ /*
347
+ * format example: "%Y-%m-%d %H:%M:%S"
348
+ */
349
+ inline void GetTime(const string& format, string& timeStr) {
350
+ time_t timeNow;
351
+ time(&timeNow);
352
+ timeStr.resize(64);
353
+ size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
354
+ timeStr.resize(len);
355
+ }
356
+
357
+ inline string PathJoin(const string& path1, const string& path2) {
358
+ if(EndsWith(path1, "/")) {
359
+ return path1 + path2;
360
+ }
361
+ return path1 + "/" + path2;
362
+ }
363
+
364
+ }
365
+ #endif
@@ -0,0 +1,44 @@
1
+ #ifndef LIMONP_THREAD_HPP
2
+ #define LIMONP_THREAD_HPP
3
+
4
+ #include "Logging.hpp"
5
+ #include "NonCopyable.hpp"
6
+
7
+ namespace limonp {
8
+
9
+ class IThread: NonCopyable {
10
+ public:
11
+ IThread(): isStarted(false), isJoined(false) {
12
+ }
13
+ virtual ~IThread() {
14
+ if(isStarted && !isJoined) {
15
+ XCHECK(!pthread_detach(thread_));
16
+ }
17
+ };
18
+
19
+ virtual void Run() = 0;
20
+ void Start() {
21
+ XCHECK(!isStarted);
22
+ XCHECK(!pthread_create(&thread_, NULL, Worker, this));
23
+ isStarted = true;
24
+ }
25
+ void Join() {
26
+ XCHECK(!isJoined);
27
+ XCHECK(!pthread_join(thread_, NULL));
28
+ isJoined = true;
29
+ }
30
+ private:
31
+ static void * Worker(void * data) {
32
+ IThread * ptr = (IThread* ) data;
33
+ ptr->Run();
34
+ return NULL;
35
+ }
36
+
37
+ pthread_t thread_;
38
+ bool isStarted;
39
+ bool isJoined;
40
+ }; // class IThread
41
+
42
+ } // namespace limonp
43
+
44
+ #endif // LIMONP_THREAD_HPP