cppjieba_rb 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.gitmodules +3 -0
- data/.travis.yml +26 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +81 -0
- data/Rakefile +20 -0
- data/cppjieba_rb.gemspec +50 -0
- data/ext/cppjieba/.gitignore +17 -0
- data/ext/cppjieba/.travis.yml +22 -0
- data/ext/cppjieba/CMakeLists.txt +28 -0
- data/ext/cppjieba/ChangeLog.md +236 -0
- data/ext/cppjieba/README.md +285 -0
- data/ext/cppjieba/README_EN.md +111 -0
- data/ext/cppjieba/appveyor.yml +32 -0
- data/ext/cppjieba/deps/CMakeLists.txt +1 -0
- data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
- data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
- data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
- data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
- data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
- data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
- data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
- data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
- data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
- data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
- data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
- data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
- data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
- data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
- data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
- data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
- data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
- data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
- data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
- data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
- data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
- data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
- data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
- data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
- data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
- data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
- data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
- data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
- data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
- data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
- data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
- data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
- data/ext/cppjieba/dict/README.md +31 -0
- data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
- data/ext/cppjieba/dict/idf.utf8 +258826 -0
- data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
- data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
- data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
- data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
- data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
- data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
- data/ext/cppjieba/dict/user.dict.utf8 +4 -0
- data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
- data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
- data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
- data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
- data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
- data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
- data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
- data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
- data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
- data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
- data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
- data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
- data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
- data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
- data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
- data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
- data/ext/cppjieba/test/CMakeLists.txt +5 -0
- data/ext/cppjieba/test/demo.cpp +80 -0
- data/ext/cppjieba/test/load_test.cpp +54 -0
- data/ext/cppjieba/test/testdata/curl.res +1 -0
- data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
- data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
- data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
- data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
- data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
- data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
- data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
- data/ext/cppjieba/test/testdata/load_test.urls +2 -0
- data/ext/cppjieba/test/testdata/review.100 +100 -0
- data/ext/cppjieba/test/testdata/review.100.res +200 -0
- data/ext/cppjieba/test/testdata/server.conf +19 -0
- data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
- data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
- data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
- data/ext/cppjieba/test/testdata/userdict.english +2 -0
- data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
- data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
- data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
- data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
- data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
- data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
- data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
- data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
- data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
- data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
- data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
- data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
- data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
- data/ext/cppjieba_rb/extconf.rb +26 -0
- data/ext/cppjieba_rb/internal.cc +148 -0
- data/lib/cppjieba_rb/segment.rb +20 -0
- data/lib/cppjieba_rb/version.rb +3 -0
- data/lib/cppjieba_rb.rb +34 -0
- data/test/test_keyword.rb +17 -0
- data/test/test_segment.rb +24 -0
- data/test/test_tagging.rb +19 -0
- metadata +244 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
#ifndef LIMONP_MUTEX_LOCK_HPP
|
|
2
|
+
#define LIMONP_MUTEX_LOCK_HPP
|
|
3
|
+
|
|
4
|
+
#include <pthread.h>
|
|
5
|
+
#include "NonCopyable.hpp"
|
|
6
|
+
#include "Logging.hpp"
|
|
7
|
+
|
|
8
|
+
namespace limonp {
|
|
9
|
+
|
|
10
|
+
class MutexLock: NonCopyable {
|
|
11
|
+
public:
|
|
12
|
+
MutexLock() {
|
|
13
|
+
XCHECK(!pthread_mutex_init(&mutex_, NULL));
|
|
14
|
+
}
|
|
15
|
+
~MutexLock() {
|
|
16
|
+
XCHECK(!pthread_mutex_destroy(&mutex_));
|
|
17
|
+
}
|
|
18
|
+
pthread_mutex_t* GetPthreadMutex() {
|
|
19
|
+
return &mutex_;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
private:
|
|
23
|
+
void Lock() {
|
|
24
|
+
XCHECK(!pthread_mutex_lock(&mutex_));
|
|
25
|
+
}
|
|
26
|
+
void Unlock() {
|
|
27
|
+
XCHECK(!pthread_mutex_unlock(&mutex_));
|
|
28
|
+
}
|
|
29
|
+
friend class MutexLockGuard;
|
|
30
|
+
|
|
31
|
+
pthread_mutex_t mutex_;
|
|
32
|
+
}; // class MutexLock
|
|
33
|
+
|
|
34
|
+
class MutexLockGuard: NonCopyable {
|
|
35
|
+
public:
|
|
36
|
+
explicit MutexLockGuard(MutexLock & mutex)
|
|
37
|
+
: mutex_(mutex) {
|
|
38
|
+
mutex_.Lock();
|
|
39
|
+
}
|
|
40
|
+
~MutexLockGuard() {
|
|
41
|
+
mutex_.Unlock();
|
|
42
|
+
}
|
|
43
|
+
private:
|
|
44
|
+
MutexLock & mutex_;
|
|
45
|
+
}; // class MutexLockGuard
|
|
46
|
+
|
|
47
|
+
#define MutexLockGuard(x) XCHECK(false);
|
|
48
|
+
|
|
49
|
+
} // namespace limonp
|
|
50
|
+
|
|
51
|
+
#endif // LIMONP_MUTEX_LOCK_HPP
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/************************************
|
|
2
|
+
************************************/
|
|
3
|
+
#ifndef LIMONP_NONCOPYABLE_H
|
|
4
|
+
#define LIMONP_NONCOPYABLE_H
|
|
5
|
+
|
|
6
|
+
namespace limonp {
|
|
7
|
+
|
|
8
|
+
class NonCopyable {
|
|
9
|
+
protected:
|
|
10
|
+
NonCopyable() {
|
|
11
|
+
}
|
|
12
|
+
~NonCopyable() {
|
|
13
|
+
}
|
|
14
|
+
private:
|
|
15
|
+
NonCopyable(const NonCopyable& );
|
|
16
|
+
const NonCopyable& operator=(const NonCopyable& );
|
|
17
|
+
}; // class NonCopyable
|
|
18
|
+
|
|
19
|
+
} // namespace limonp
|
|
20
|
+
|
|
21
|
+
#endif // LIMONP_NONCOPYABLE_H
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
#ifndef LIMONP_STD_EXTEMSION_HPP
|
|
2
|
+
#define LIMONP_STD_EXTEMSION_HPP
|
|
3
|
+
|
|
4
|
+
#include <map>
|
|
5
|
+
|
|
6
|
+
#ifdef __APPLE__
|
|
7
|
+
#include <unordered_map>
|
|
8
|
+
#include <unordered_set>
|
|
9
|
+
#elif(__cplusplus == 201103L)
|
|
10
|
+
#include <unordered_map>
|
|
11
|
+
#include <unordered_set>
|
|
12
|
+
#elif defined _MSC_VER
|
|
13
|
+
#include <unordered_map>
|
|
14
|
+
#include <unordered_set>
|
|
15
|
+
#else
|
|
16
|
+
#include <tr1/unordered_map>
|
|
17
|
+
#include <tr1/unordered_set>
|
|
18
|
+
namespace std {
|
|
19
|
+
using std::tr1::unordered_map;
|
|
20
|
+
using std::tr1::unordered_set;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
#endif
|
|
24
|
+
|
|
25
|
+
#include <set>
|
|
26
|
+
#include <string>
|
|
27
|
+
#include <vector>
|
|
28
|
+
#include <deque>
|
|
29
|
+
#include <fstream>
|
|
30
|
+
#include <sstream>
|
|
31
|
+
|
|
32
|
+
#define print(x) std::cout << x << std::endl
|
|
33
|
+
|
|
34
|
+
namespace std {
|
|
35
|
+
|
|
36
|
+
template<typename T>
|
|
37
|
+
ostream& operator << (ostream& os, const vector<T>& v) {
|
|
38
|
+
if(v.empty()) {
|
|
39
|
+
return os << "[]";
|
|
40
|
+
}
|
|
41
|
+
os<<"["<<v[0];
|
|
42
|
+
for(size_t i = 1; i < v.size(); i++) {
|
|
43
|
+
os<<", "<<v[i];
|
|
44
|
+
}
|
|
45
|
+
os<<"]";
|
|
46
|
+
return os;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
template<>
|
|
50
|
+
inline ostream& operator << (ostream& os, const vector<string>& v) {
|
|
51
|
+
if(v.empty()) {
|
|
52
|
+
return os << "[]";
|
|
53
|
+
}
|
|
54
|
+
os<<"[\""<<v[0];
|
|
55
|
+
for(size_t i = 1; i < v.size(); i++) {
|
|
56
|
+
os<<"\", \""<<v[i];
|
|
57
|
+
}
|
|
58
|
+
os<<"\"]";
|
|
59
|
+
return os;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
template<typename T>
|
|
63
|
+
ostream& operator << (ostream& os, const deque<T>& dq) {
|
|
64
|
+
if(dq.empty()) {
|
|
65
|
+
return os << "[]";
|
|
66
|
+
}
|
|
67
|
+
os<<"[\""<<dq[0];
|
|
68
|
+
for(size_t i = 1; i < dq.size(); i++) {
|
|
69
|
+
os<<"\", \""<<dq[i];
|
|
70
|
+
}
|
|
71
|
+
os<<"\"]";
|
|
72
|
+
return os;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
template<class T1, class T2>
|
|
77
|
+
ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
|
|
78
|
+
os << pr.first << ":" << pr.second ;
|
|
79
|
+
return os;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
template<class T>
|
|
84
|
+
string& operator << (string& str, const T& obj) {
|
|
85
|
+
stringstream ss;
|
|
86
|
+
ss << obj; // call ostream& operator << (ostream& os,
|
|
87
|
+
return str = ss.str();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
template<class T1, class T2>
|
|
91
|
+
ostream& operator << (ostream& os, const map<T1, T2>& mp) {
|
|
92
|
+
if(mp.empty()) {
|
|
93
|
+
os<<"{}";
|
|
94
|
+
return os;
|
|
95
|
+
}
|
|
96
|
+
os<<'{';
|
|
97
|
+
typename map<T1, T2>::const_iterator it = mp.begin();
|
|
98
|
+
os<<*it;
|
|
99
|
+
it++;
|
|
100
|
+
while(it != mp.end()) {
|
|
101
|
+
os<<", "<<*it;
|
|
102
|
+
it++;
|
|
103
|
+
}
|
|
104
|
+
os<<'}';
|
|
105
|
+
return os;
|
|
106
|
+
}
|
|
107
|
+
template<class T1, class T2>
|
|
108
|
+
ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
|
|
109
|
+
if(mp.empty()) {
|
|
110
|
+
return os << "{}";
|
|
111
|
+
}
|
|
112
|
+
os<<'{';
|
|
113
|
+
typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
|
|
114
|
+
os<<*it;
|
|
115
|
+
it++;
|
|
116
|
+
while(it != mp.end()) {
|
|
117
|
+
os<<", "<<*it++;
|
|
118
|
+
}
|
|
119
|
+
return os<<'}';
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
template<class T>
|
|
123
|
+
ostream& operator << (ostream& os, const set<T>& st) {
|
|
124
|
+
if(st.empty()) {
|
|
125
|
+
os << "{}";
|
|
126
|
+
return os;
|
|
127
|
+
}
|
|
128
|
+
os<<'{';
|
|
129
|
+
typename set<T>::const_iterator it = st.begin();
|
|
130
|
+
os<<*it;
|
|
131
|
+
it++;
|
|
132
|
+
while(it != st.end()) {
|
|
133
|
+
os<<", "<<*it;
|
|
134
|
+
it++;
|
|
135
|
+
}
|
|
136
|
+
os<<'}';
|
|
137
|
+
return os;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
template<class KeyType, class ContainType>
|
|
141
|
+
bool IsIn(const ContainType& contain, const KeyType& key) {
|
|
142
|
+
return contain.end() != contain.find(key);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
template<class T>
|
|
146
|
+
basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
|
|
147
|
+
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
template<class T>
|
|
151
|
+
ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
|
|
152
|
+
ostreambuf_iterator<T> itr (ofs);
|
|
153
|
+
copy(s.begin(), s.end(), itr);
|
|
154
|
+
return ofs;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
} // namespace std
|
|
158
|
+
|
|
159
|
+
#endif
|
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
/************************************
|
|
2
|
+
* file enc : ascii
|
|
3
|
+
* author : wuyanyi09@gmail.com
|
|
4
|
+
************************************/
|
|
5
|
+
#ifndef LIMONP_STR_FUNCTS_H
|
|
6
|
+
#define LIMONP_STR_FUNCTS_H
|
|
7
|
+
#include <fstream>
|
|
8
|
+
#include <iostream>
|
|
9
|
+
#include <string>
|
|
10
|
+
#include <vector>
|
|
11
|
+
#include <algorithm>
|
|
12
|
+
#include <cctype>
|
|
13
|
+
#include <map>
|
|
14
|
+
#include <stdint.h>
|
|
15
|
+
#include <stdio.h>
|
|
16
|
+
#include <stdarg.h>
|
|
17
|
+
#include <memory.h>
|
|
18
|
+
#include <functional>
|
|
19
|
+
#include <locale>
|
|
20
|
+
#include <sstream>
|
|
21
|
+
#include <sys/types.h>
|
|
22
|
+
#include <iterator>
|
|
23
|
+
#include <algorithm>
|
|
24
|
+
#include "StdExtension.hpp"
|
|
25
|
+
|
|
26
|
+
namespace limonp {
|
|
27
|
+
using namespace std;
|
|
28
|
+
inline string StringFormat(const char* fmt, ...) {
|
|
29
|
+
int size = 256;
|
|
30
|
+
std::string str;
|
|
31
|
+
va_list ap;
|
|
32
|
+
while (1) {
|
|
33
|
+
str.resize(size);
|
|
34
|
+
va_start(ap, fmt);
|
|
35
|
+
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
|
|
36
|
+
va_end(ap);
|
|
37
|
+
if (n > -1 && n < size) {
|
|
38
|
+
str.resize(n);
|
|
39
|
+
return str;
|
|
40
|
+
}
|
|
41
|
+
if (n > -1)
|
|
42
|
+
size = n + 1;
|
|
43
|
+
else
|
|
44
|
+
size *= 2;
|
|
45
|
+
}
|
|
46
|
+
return str;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
template<class T>
|
|
50
|
+
void Join(T begin, T end, string& res, const string& connector) {
|
|
51
|
+
if(begin == end) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
stringstream ss;
|
|
55
|
+
ss<<*begin;
|
|
56
|
+
begin++;
|
|
57
|
+
while(begin != end) {
|
|
58
|
+
ss << connector << *begin;
|
|
59
|
+
begin ++;
|
|
60
|
+
}
|
|
61
|
+
res = ss.str();
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
template<class T>
|
|
65
|
+
string Join(T begin, T end, const string& connector) {
|
|
66
|
+
string res;
|
|
67
|
+
Join(begin ,end, res, connector);
|
|
68
|
+
return res;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
inline string& Upper(string& str) {
|
|
72
|
+
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
|
|
73
|
+
return str;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
inline string& Lower(string& str) {
|
|
77
|
+
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
|
|
78
|
+
return str;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
inline bool IsSpace(unsigned c) {
|
|
82
|
+
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
|
|
83
|
+
return c > 0xff ? false : std::isspace(c & 0xff);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
inline std::string& LTrim(std::string &s) {
|
|
87
|
+
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
|
|
88
|
+
return s;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
inline std::string& RTrim(std::string &s) {
|
|
92
|
+
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
|
|
93
|
+
return s;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
inline std::string& Trim(std::string &s) {
|
|
97
|
+
return LTrim(RTrim(s));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
inline std::string& LTrim(std::string & s, char x) {
|
|
101
|
+
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
|
|
102
|
+
return s;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
inline std::string& RTrim(std::string & s, char x) {
|
|
106
|
+
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
|
|
107
|
+
return s;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
inline std::string& Trim(std::string &s, char x) {
|
|
111
|
+
return LTrim(RTrim(s, x), x);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
|
|
115
|
+
res.clear();
|
|
116
|
+
size_t Start = 0;
|
|
117
|
+
size_t end = 0;
|
|
118
|
+
string sub;
|
|
119
|
+
while(Start < src.size()) {
|
|
120
|
+
end = src.find_first_of(pattern, Start);
|
|
121
|
+
if(string::npos == end || res.size() >= maxsplit) {
|
|
122
|
+
sub = src.substr(Start);
|
|
123
|
+
res.push_back(sub);
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
sub = src.substr(Start, end - Start);
|
|
127
|
+
res.push_back(sub);
|
|
128
|
+
Start = end + 1;
|
|
129
|
+
}
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
|
|
134
|
+
vector<string> res;
|
|
135
|
+
Split(src, res, pattern, maxsplit);
|
|
136
|
+
return res;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
inline bool StartsWith(const string& str, const string& prefix) {
|
|
140
|
+
if(prefix.length() > str.length()) {
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
return 0 == str.compare(0, prefix.length(), prefix);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
inline bool EndsWith(const string& str, const string& suffix) {
|
|
147
|
+
if(suffix.length() > str.length()) {
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
150
|
+
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
inline bool IsInStr(const string& str, char ch) {
|
|
154
|
+
return str.find(ch) != string::npos;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
inline uint16_t TwocharToUint16(char high, char low) {
|
|
158
|
+
return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
template <class Uint16Container>
|
|
162
|
+
bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
|
|
163
|
+
if(!str) {
|
|
164
|
+
return false;
|
|
165
|
+
}
|
|
166
|
+
char ch1, ch2;
|
|
167
|
+
uint16_t tmp;
|
|
168
|
+
vec.clear();
|
|
169
|
+
for(size_t i = 0; i < len;) {
|
|
170
|
+
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
|
171
|
+
vec.push_back(str[i]);
|
|
172
|
+
i++;
|
|
173
|
+
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
|
|
174
|
+
ch1 = (str[i] >> 2) & 0x07;
|
|
175
|
+
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
|
176
|
+
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
|
177
|
+
vec.push_back(tmp);
|
|
178
|
+
i += 2;
|
|
179
|
+
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
|
|
180
|
+
ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
|
|
181
|
+
ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
|
182
|
+
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
|
183
|
+
vec.push_back(tmp);
|
|
184
|
+
i += 3;
|
|
185
|
+
} else {
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
return true;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
template <class Uint16Container>
|
|
193
|
+
bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
|
|
194
|
+
return Utf8ToUnicode(str.c_str(), str.size(), vec);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
template <class Uint32Container>
|
|
198
|
+
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
|
|
199
|
+
uint32_t tmp;
|
|
200
|
+
vec.clear();
|
|
201
|
+
for(size_t i = 0; i < str.size();) {
|
|
202
|
+
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
|
203
|
+
// 7bit, total 7bit
|
|
204
|
+
tmp = (uint8_t)(str[i]) & 0x7f;
|
|
205
|
+
i++;
|
|
206
|
+
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
|
|
207
|
+
// 5bit, total 5bit
|
|
208
|
+
tmp = (uint8_t)(str[i]) & 0x1f;
|
|
209
|
+
|
|
210
|
+
// 6bit, total 11bit
|
|
211
|
+
tmp <<= 6;
|
|
212
|
+
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
|
213
|
+
i += 2;
|
|
214
|
+
} else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
|
|
215
|
+
// 4bit, total 4bit
|
|
216
|
+
tmp = (uint8_t)(str[i]) & 0x0f;
|
|
217
|
+
|
|
218
|
+
// 6bit, total 10bit
|
|
219
|
+
tmp <<= 6;
|
|
220
|
+
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
|
221
|
+
|
|
222
|
+
// 6bit, total 16bit
|
|
223
|
+
tmp <<= 6;
|
|
224
|
+
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
|
225
|
+
|
|
226
|
+
i += 3;
|
|
227
|
+
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
|
|
228
|
+
// 3bit, total 3bit
|
|
229
|
+
tmp = (uint8_t)(str[i]) & 0x07;
|
|
230
|
+
|
|
231
|
+
// 6bit, total 9bit
|
|
232
|
+
tmp <<= 6;
|
|
233
|
+
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
|
234
|
+
|
|
235
|
+
// 6bit, total 15bit
|
|
236
|
+
tmp <<= 6;
|
|
237
|
+
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
|
238
|
+
|
|
239
|
+
// 6bit, total 21bit
|
|
240
|
+
tmp <<= 6;
|
|
241
|
+
tmp |= (uint8_t)(str[i+3]) & 0x3f;
|
|
242
|
+
|
|
243
|
+
i += 4;
|
|
244
|
+
} else {
|
|
245
|
+
return false;
|
|
246
|
+
}
|
|
247
|
+
vec.push_back(tmp);
|
|
248
|
+
}
|
|
249
|
+
return true;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
template <class Uint32ContainerConIter>
|
|
253
|
+
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
|
|
254
|
+
res.clear();
|
|
255
|
+
uint32_t ui;
|
|
256
|
+
while(begin != end) {
|
|
257
|
+
ui = *begin;
|
|
258
|
+
if(ui <= 0x7f) {
|
|
259
|
+
res += char(ui);
|
|
260
|
+
} else if(ui <= 0x7ff) {
|
|
261
|
+
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
|
262
|
+
res += char((ui & 0x3f) | 0x80);
|
|
263
|
+
} else if(ui <= 0xffff) {
|
|
264
|
+
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
|
265
|
+
res += char(((ui >> 6) & 0x3f) | 0x80);
|
|
266
|
+
res += char((ui & 0x3f) | 0x80);
|
|
267
|
+
} else {
|
|
268
|
+
res += char(((ui >> 18) & 0x03) | 0xf0);
|
|
269
|
+
res += char(((ui >> 12) & 0x3f) | 0x80);
|
|
270
|
+
res += char(((ui >> 6) & 0x3f) | 0x80);
|
|
271
|
+
res += char((ui & 0x3f) | 0x80);
|
|
272
|
+
}
|
|
273
|
+
begin ++;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
template <class Uint16ContainerConIter>
|
|
278
|
+
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
|
279
|
+
res.clear();
|
|
280
|
+
uint16_t ui;
|
|
281
|
+
while(begin != end) {
|
|
282
|
+
ui = *begin;
|
|
283
|
+
if(ui <= 0x7f) {
|
|
284
|
+
res += char(ui);
|
|
285
|
+
} else if(ui <= 0x7ff) {
|
|
286
|
+
res += char(((ui>>6) & 0x1f) | 0xc0);
|
|
287
|
+
res += char((ui & 0x3f) | 0x80);
|
|
288
|
+
} else {
|
|
289
|
+
res += char(((ui >> 12) & 0x0f )| 0xe0);
|
|
290
|
+
res += char(((ui>>6) & 0x3f )| 0x80 );
|
|
291
|
+
res += char((ui & 0x3f) | 0x80);
|
|
292
|
+
}
|
|
293
|
+
begin ++;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
template <class Uint16Container>
|
|
299
|
+
bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
|
|
300
|
+
vec.clear();
|
|
301
|
+
if(!str) {
|
|
302
|
+
return true;
|
|
303
|
+
}
|
|
304
|
+
size_t i = 0;
|
|
305
|
+
while(i < len) {
|
|
306
|
+
if(0 == (str[i] & 0x80)) {
|
|
307
|
+
vec.push_back(uint16_t(str[i]));
|
|
308
|
+
i++;
|
|
309
|
+
} else {
|
|
310
|
+
if(i + 1 < len) { //&& (str[i+1] & 0x80))
|
|
311
|
+
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
|
|
312
|
+
vec.push_back(tmp);
|
|
313
|
+
i += 2;
|
|
314
|
+
} else {
|
|
315
|
+
return false;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
return true;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
template <class Uint16Container>
|
|
323
|
+
bool GBKTrans(const string& str, Uint16Container& vec) {
|
|
324
|
+
return GBKTrans(str.c_str(), str.size(), vec);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
template <class Uint16ContainerConIter>
|
|
328
|
+
void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
|
329
|
+
res.clear();
|
|
330
|
+
//pair<char, char> pa;
|
|
331
|
+
char first, second;
|
|
332
|
+
while(begin != end) {
|
|
333
|
+
//pa = uint16ToChar2(*begin);
|
|
334
|
+
first = ((*begin)>>8) & 0x00ff;
|
|
335
|
+
second = (*begin) & 0x00ff;
|
|
336
|
+
if(first & 0x80) {
|
|
337
|
+
res += first;
|
|
338
|
+
res += second;
|
|
339
|
+
} else {
|
|
340
|
+
res += second;
|
|
341
|
+
}
|
|
342
|
+
begin++;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/*
|
|
347
|
+
* format example: "%Y-%m-%d %H:%M:%S"
|
|
348
|
+
*/
|
|
349
|
+
inline void GetTime(const string& format, string& timeStr) {
|
|
350
|
+
time_t timeNow;
|
|
351
|
+
time(&timeNow);
|
|
352
|
+
timeStr.resize(64);
|
|
353
|
+
size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
|
|
354
|
+
timeStr.resize(len);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
inline string PathJoin(const string& path1, const string& path2) {
|
|
358
|
+
if(EndsWith(path1, "/")) {
|
|
359
|
+
return path1 + path2;
|
|
360
|
+
}
|
|
361
|
+
return path1 + "/" + path2;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
}
|
|
365
|
+
#endif
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#ifndef LIMONP_THREAD_HPP
|
|
2
|
+
#define LIMONP_THREAD_HPP
|
|
3
|
+
|
|
4
|
+
#include "Logging.hpp"
|
|
5
|
+
#include "NonCopyable.hpp"
|
|
6
|
+
|
|
7
|
+
namespace limonp {
|
|
8
|
+
|
|
9
|
+
class IThread: NonCopyable {
|
|
10
|
+
public:
|
|
11
|
+
IThread(): isStarted(false), isJoined(false) {
|
|
12
|
+
}
|
|
13
|
+
virtual ~IThread() {
|
|
14
|
+
if(isStarted && !isJoined) {
|
|
15
|
+
XCHECK(!pthread_detach(thread_));
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
virtual void Run() = 0;
|
|
20
|
+
void Start() {
|
|
21
|
+
XCHECK(!isStarted);
|
|
22
|
+
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
|
|
23
|
+
isStarted = true;
|
|
24
|
+
}
|
|
25
|
+
void Join() {
|
|
26
|
+
XCHECK(!isJoined);
|
|
27
|
+
XCHECK(!pthread_join(thread_, NULL));
|
|
28
|
+
isJoined = true;
|
|
29
|
+
}
|
|
30
|
+
private:
|
|
31
|
+
static void * Worker(void * data) {
|
|
32
|
+
IThread * ptr = (IThread* ) data;
|
|
33
|
+
ptr->Run();
|
|
34
|
+
return NULL;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
pthread_t thread_;
|
|
38
|
+
bool isStarted;
|
|
39
|
+
bool isJoined;
|
|
40
|
+
}; // class IThread
|
|
41
|
+
|
|
42
|
+
} // namespace limonp
|
|
43
|
+
|
|
44
|
+
#endif // LIMONP_THREAD_HPP
|