cppjieba_rb 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.gitmodules +3 -0
- data/.travis.yml +26 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +81 -0
- data/Rakefile +20 -0
- data/cppjieba_rb.gemspec +50 -0
- data/ext/cppjieba/.gitignore +17 -0
- data/ext/cppjieba/.travis.yml +22 -0
- data/ext/cppjieba/CMakeLists.txt +28 -0
- data/ext/cppjieba/ChangeLog.md +236 -0
- data/ext/cppjieba/README.md +285 -0
- data/ext/cppjieba/README_EN.md +111 -0
- data/ext/cppjieba/appveyor.yml +32 -0
- data/ext/cppjieba/deps/CMakeLists.txt +1 -0
- data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
- data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
- data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
- data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
- data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
- data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
- data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
- data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
- data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
- data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
- data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
- data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
- data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
- data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
- data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
- data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
- data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
- data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
- data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
- data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
- data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
- data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
- data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
- data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
- data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
- data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
- data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
- data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
- data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
- data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
- data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
- data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
- data/ext/cppjieba/dict/README.md +31 -0
- data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
- data/ext/cppjieba/dict/idf.utf8 +258826 -0
- data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
- data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
- data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
- data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
- data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
- data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
- data/ext/cppjieba/dict/user.dict.utf8 +4 -0
- data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
- data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
- data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
- data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
- data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
- data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
- data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
- data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
- data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
- data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
- data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
- data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
- data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
- data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
- data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
- data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
- data/ext/cppjieba/test/CMakeLists.txt +5 -0
- data/ext/cppjieba/test/demo.cpp +80 -0
- data/ext/cppjieba/test/load_test.cpp +54 -0
- data/ext/cppjieba/test/testdata/curl.res +1 -0
- data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
- data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
- data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
- data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
- data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
- data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
- data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
- data/ext/cppjieba/test/testdata/load_test.urls +2 -0
- data/ext/cppjieba/test/testdata/review.100 +100 -0
- data/ext/cppjieba/test/testdata/review.100.res +200 -0
- data/ext/cppjieba/test/testdata/server.conf +19 -0
- data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
- data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
- data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
- data/ext/cppjieba/test/testdata/userdict.english +2 -0
- data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
- data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
- data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
- data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
- data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
- data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
- data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
- data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
- data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
- data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
- data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
- data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
- data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
- data/ext/cppjieba_rb/extconf.rb +26 -0
- data/ext/cppjieba_rb/internal.cc +148 -0
- data/lib/cppjieba_rb/segment.rb +20 -0
- data/lib/cppjieba_rb/version.rb +3 -0
- data/lib/cppjieba_rb.rb +34 -0
- data/test/test_keyword.rb +17 -0
- data/test/test_segment.rb +24 -0
- data/test/test_tagging.rb +19 -0
- metadata +244 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
#ifndef LIMONP_MUTEX_LOCK_HPP
|
2
|
+
#define LIMONP_MUTEX_LOCK_HPP
|
3
|
+
|
4
|
+
#include <pthread.h>
|
5
|
+
#include "NonCopyable.hpp"
|
6
|
+
#include "Logging.hpp"
|
7
|
+
|
8
|
+
namespace limonp {
|
9
|
+
|
10
|
+
class MutexLock: NonCopyable {
|
11
|
+
public:
|
12
|
+
MutexLock() {
|
13
|
+
XCHECK(!pthread_mutex_init(&mutex_, NULL));
|
14
|
+
}
|
15
|
+
~MutexLock() {
|
16
|
+
XCHECK(!pthread_mutex_destroy(&mutex_));
|
17
|
+
}
|
18
|
+
pthread_mutex_t* GetPthreadMutex() {
|
19
|
+
return &mutex_;
|
20
|
+
}
|
21
|
+
|
22
|
+
private:
|
23
|
+
void Lock() {
|
24
|
+
XCHECK(!pthread_mutex_lock(&mutex_));
|
25
|
+
}
|
26
|
+
void Unlock() {
|
27
|
+
XCHECK(!pthread_mutex_unlock(&mutex_));
|
28
|
+
}
|
29
|
+
friend class MutexLockGuard;
|
30
|
+
|
31
|
+
pthread_mutex_t mutex_;
|
32
|
+
}; // class MutexLock
|
33
|
+
|
34
|
+
class MutexLockGuard: NonCopyable {
|
35
|
+
public:
|
36
|
+
explicit MutexLockGuard(MutexLock & mutex)
|
37
|
+
: mutex_(mutex) {
|
38
|
+
mutex_.Lock();
|
39
|
+
}
|
40
|
+
~MutexLockGuard() {
|
41
|
+
mutex_.Unlock();
|
42
|
+
}
|
43
|
+
private:
|
44
|
+
MutexLock & mutex_;
|
45
|
+
}; // class MutexLockGuard
|
46
|
+
|
47
|
+
#define MutexLockGuard(x) XCHECK(false);
|
48
|
+
|
49
|
+
} // namespace limonp
|
50
|
+
|
51
|
+
#endif // LIMONP_MUTEX_LOCK_HPP
|
@@ -0,0 +1,21 @@
|
|
1
|
+
/************************************
|
2
|
+
************************************/
|
3
|
+
#ifndef LIMONP_NONCOPYABLE_H
|
4
|
+
#define LIMONP_NONCOPYABLE_H
|
5
|
+
|
6
|
+
namespace limonp {
|
7
|
+
|
8
|
+
class NonCopyable {
|
9
|
+
protected:
|
10
|
+
NonCopyable() {
|
11
|
+
}
|
12
|
+
~NonCopyable() {
|
13
|
+
}
|
14
|
+
private:
|
15
|
+
NonCopyable(const NonCopyable& );
|
16
|
+
const NonCopyable& operator=(const NonCopyable& );
|
17
|
+
}; // class NonCopyable
|
18
|
+
|
19
|
+
} // namespace limonp
|
20
|
+
|
21
|
+
#endif // LIMONP_NONCOPYABLE_H
|
@@ -0,0 +1,159 @@
|
|
1
|
+
#ifndef LIMONP_STD_EXTEMSION_HPP
|
2
|
+
#define LIMONP_STD_EXTEMSION_HPP
|
3
|
+
|
4
|
+
#include <map>
|
5
|
+
|
6
|
+
#ifdef __APPLE__
|
7
|
+
#include <unordered_map>
|
8
|
+
#include <unordered_set>
|
9
|
+
#elif(__cplusplus == 201103L)
|
10
|
+
#include <unordered_map>
|
11
|
+
#include <unordered_set>
|
12
|
+
#elif defined _MSC_VER
|
13
|
+
#include <unordered_map>
|
14
|
+
#include <unordered_set>
|
15
|
+
#else
|
16
|
+
#include <tr1/unordered_map>
|
17
|
+
#include <tr1/unordered_set>
|
18
|
+
namespace std {
|
19
|
+
using std::tr1::unordered_map;
|
20
|
+
using std::tr1::unordered_set;
|
21
|
+
}
|
22
|
+
|
23
|
+
#endif
|
24
|
+
|
25
|
+
#include <set>
|
26
|
+
#include <string>
|
27
|
+
#include <vector>
|
28
|
+
#include <deque>
|
29
|
+
#include <fstream>
|
30
|
+
#include <sstream>
|
31
|
+
|
32
|
+
#define print(x) std::cout << x << std::endl
|
33
|
+
|
34
|
+
namespace std {
|
35
|
+
|
36
|
+
template<typename T>
|
37
|
+
ostream& operator << (ostream& os, const vector<T>& v) {
|
38
|
+
if(v.empty()) {
|
39
|
+
return os << "[]";
|
40
|
+
}
|
41
|
+
os<<"["<<v[0];
|
42
|
+
for(size_t i = 1; i < v.size(); i++) {
|
43
|
+
os<<", "<<v[i];
|
44
|
+
}
|
45
|
+
os<<"]";
|
46
|
+
return os;
|
47
|
+
}
|
48
|
+
|
49
|
+
template<>
|
50
|
+
inline ostream& operator << (ostream& os, const vector<string>& v) {
|
51
|
+
if(v.empty()) {
|
52
|
+
return os << "[]";
|
53
|
+
}
|
54
|
+
os<<"[\""<<v[0];
|
55
|
+
for(size_t i = 1; i < v.size(); i++) {
|
56
|
+
os<<"\", \""<<v[i];
|
57
|
+
}
|
58
|
+
os<<"\"]";
|
59
|
+
return os;
|
60
|
+
}
|
61
|
+
|
62
|
+
template<typename T>
|
63
|
+
ostream& operator << (ostream& os, const deque<T>& dq) {
|
64
|
+
if(dq.empty()) {
|
65
|
+
return os << "[]";
|
66
|
+
}
|
67
|
+
os<<"[\""<<dq[0];
|
68
|
+
for(size_t i = 1; i < dq.size(); i++) {
|
69
|
+
os<<"\", \""<<dq[i];
|
70
|
+
}
|
71
|
+
os<<"\"]";
|
72
|
+
return os;
|
73
|
+
}
|
74
|
+
|
75
|
+
|
76
|
+
template<class T1, class T2>
|
77
|
+
ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
|
78
|
+
os << pr.first << ":" << pr.second ;
|
79
|
+
return os;
|
80
|
+
}
|
81
|
+
|
82
|
+
|
83
|
+
template<class T>
|
84
|
+
string& operator << (string& str, const T& obj) {
|
85
|
+
stringstream ss;
|
86
|
+
ss << obj; // call ostream& operator << (ostream& os,
|
87
|
+
return str = ss.str();
|
88
|
+
}
|
89
|
+
|
90
|
+
template<class T1, class T2>
|
91
|
+
ostream& operator << (ostream& os, const map<T1, T2>& mp) {
|
92
|
+
if(mp.empty()) {
|
93
|
+
os<<"{}";
|
94
|
+
return os;
|
95
|
+
}
|
96
|
+
os<<'{';
|
97
|
+
typename map<T1, T2>::const_iterator it = mp.begin();
|
98
|
+
os<<*it;
|
99
|
+
it++;
|
100
|
+
while(it != mp.end()) {
|
101
|
+
os<<", "<<*it;
|
102
|
+
it++;
|
103
|
+
}
|
104
|
+
os<<'}';
|
105
|
+
return os;
|
106
|
+
}
|
107
|
+
template<class T1, class T2>
|
108
|
+
ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
|
109
|
+
if(mp.empty()) {
|
110
|
+
return os << "{}";
|
111
|
+
}
|
112
|
+
os<<'{';
|
113
|
+
typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
|
114
|
+
os<<*it;
|
115
|
+
it++;
|
116
|
+
while(it != mp.end()) {
|
117
|
+
os<<", "<<*it++;
|
118
|
+
}
|
119
|
+
return os<<'}';
|
120
|
+
}
|
121
|
+
|
122
|
+
template<class T>
|
123
|
+
ostream& operator << (ostream& os, const set<T>& st) {
|
124
|
+
if(st.empty()) {
|
125
|
+
os << "{}";
|
126
|
+
return os;
|
127
|
+
}
|
128
|
+
os<<'{';
|
129
|
+
typename set<T>::const_iterator it = st.begin();
|
130
|
+
os<<*it;
|
131
|
+
it++;
|
132
|
+
while(it != st.end()) {
|
133
|
+
os<<", "<<*it;
|
134
|
+
it++;
|
135
|
+
}
|
136
|
+
os<<'}';
|
137
|
+
return os;
|
138
|
+
}
|
139
|
+
|
140
|
+
template<class KeyType, class ContainType>
|
141
|
+
bool IsIn(const ContainType& contain, const KeyType& key) {
|
142
|
+
return contain.end() != contain.find(key);
|
143
|
+
}
|
144
|
+
|
145
|
+
template<class T>
|
146
|
+
basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
|
147
|
+
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
|
148
|
+
}
|
149
|
+
|
150
|
+
template<class T>
|
151
|
+
ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
|
152
|
+
ostreambuf_iterator<T> itr (ofs);
|
153
|
+
copy(s.begin(), s.end(), itr);
|
154
|
+
return ofs;
|
155
|
+
}
|
156
|
+
|
157
|
+
} // namespace std
|
158
|
+
|
159
|
+
#endif
|
@@ -0,0 +1,365 @@
|
|
1
|
+
/************************************
|
2
|
+
* file enc : ascii
|
3
|
+
* author : wuyanyi09@gmail.com
|
4
|
+
************************************/
|
5
|
+
#ifndef LIMONP_STR_FUNCTS_H
|
6
|
+
#define LIMONP_STR_FUNCTS_H
|
7
|
+
#include <fstream>
|
8
|
+
#include <iostream>
|
9
|
+
#include <string>
|
10
|
+
#include <vector>
|
11
|
+
#include <algorithm>
|
12
|
+
#include <cctype>
|
13
|
+
#include <map>
|
14
|
+
#include <stdint.h>
|
15
|
+
#include <stdio.h>
|
16
|
+
#include <stdarg.h>
|
17
|
+
#include <memory.h>
|
18
|
+
#include <functional>
|
19
|
+
#include <locale>
|
20
|
+
#include <sstream>
|
21
|
+
#include <sys/types.h>
|
22
|
+
#include <iterator>
|
23
|
+
#include <algorithm>
|
24
|
+
#include "StdExtension.hpp"
|
25
|
+
|
26
|
+
namespace limonp {
|
27
|
+
using namespace std;
|
28
|
+
inline string StringFormat(const char* fmt, ...) {
|
29
|
+
int size = 256;
|
30
|
+
std::string str;
|
31
|
+
va_list ap;
|
32
|
+
while (1) {
|
33
|
+
str.resize(size);
|
34
|
+
va_start(ap, fmt);
|
35
|
+
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
|
36
|
+
va_end(ap);
|
37
|
+
if (n > -1 && n < size) {
|
38
|
+
str.resize(n);
|
39
|
+
return str;
|
40
|
+
}
|
41
|
+
if (n > -1)
|
42
|
+
size = n + 1;
|
43
|
+
else
|
44
|
+
size *= 2;
|
45
|
+
}
|
46
|
+
return str;
|
47
|
+
}
|
48
|
+
|
49
|
+
template<class T>
|
50
|
+
void Join(T begin, T end, string& res, const string& connector) {
|
51
|
+
if(begin == end) {
|
52
|
+
return;
|
53
|
+
}
|
54
|
+
stringstream ss;
|
55
|
+
ss<<*begin;
|
56
|
+
begin++;
|
57
|
+
while(begin != end) {
|
58
|
+
ss << connector << *begin;
|
59
|
+
begin ++;
|
60
|
+
}
|
61
|
+
res = ss.str();
|
62
|
+
}
|
63
|
+
|
64
|
+
template<class T>
|
65
|
+
string Join(T begin, T end, const string& connector) {
|
66
|
+
string res;
|
67
|
+
Join(begin ,end, res, connector);
|
68
|
+
return res;
|
69
|
+
}
|
70
|
+
|
71
|
+
inline string& Upper(string& str) {
|
72
|
+
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
|
73
|
+
return str;
|
74
|
+
}
|
75
|
+
|
76
|
+
inline string& Lower(string& str) {
|
77
|
+
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
|
78
|
+
return str;
|
79
|
+
}
|
80
|
+
|
81
|
+
inline bool IsSpace(unsigned c) {
|
82
|
+
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
|
83
|
+
return c > 0xff ? false : std::isspace(c & 0xff);
|
84
|
+
}
|
85
|
+
|
86
|
+
inline std::string& LTrim(std::string &s) {
|
87
|
+
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
|
88
|
+
return s;
|
89
|
+
}
|
90
|
+
|
91
|
+
inline std::string& RTrim(std::string &s) {
|
92
|
+
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
|
93
|
+
return s;
|
94
|
+
}
|
95
|
+
|
96
|
+
inline std::string& Trim(std::string &s) {
|
97
|
+
return LTrim(RTrim(s));
|
98
|
+
}
|
99
|
+
|
100
|
+
inline std::string& LTrim(std::string & s, char x) {
|
101
|
+
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
|
102
|
+
return s;
|
103
|
+
}
|
104
|
+
|
105
|
+
inline std::string& RTrim(std::string & s, char x) {
|
106
|
+
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
|
107
|
+
return s;
|
108
|
+
}
|
109
|
+
|
110
|
+
inline std::string& Trim(std::string &s, char x) {
|
111
|
+
return LTrim(RTrim(s, x), x);
|
112
|
+
}
|
113
|
+
|
114
|
+
inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
|
115
|
+
res.clear();
|
116
|
+
size_t Start = 0;
|
117
|
+
size_t end = 0;
|
118
|
+
string sub;
|
119
|
+
while(Start < src.size()) {
|
120
|
+
end = src.find_first_of(pattern, Start);
|
121
|
+
if(string::npos == end || res.size() >= maxsplit) {
|
122
|
+
sub = src.substr(Start);
|
123
|
+
res.push_back(sub);
|
124
|
+
return;
|
125
|
+
}
|
126
|
+
sub = src.substr(Start, end - Start);
|
127
|
+
res.push_back(sub);
|
128
|
+
Start = end + 1;
|
129
|
+
}
|
130
|
+
return;
|
131
|
+
}
|
132
|
+
|
133
|
+
inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
|
134
|
+
vector<string> res;
|
135
|
+
Split(src, res, pattern, maxsplit);
|
136
|
+
return res;
|
137
|
+
}
|
138
|
+
|
139
|
+
inline bool StartsWith(const string& str, const string& prefix) {
|
140
|
+
if(prefix.length() > str.length()) {
|
141
|
+
return false;
|
142
|
+
}
|
143
|
+
return 0 == str.compare(0, prefix.length(), prefix);
|
144
|
+
}
|
145
|
+
|
146
|
+
inline bool EndsWith(const string& str, const string& suffix) {
|
147
|
+
if(suffix.length() > str.length()) {
|
148
|
+
return false;
|
149
|
+
}
|
150
|
+
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
|
151
|
+
}
|
152
|
+
|
153
|
+
inline bool IsInStr(const string& str, char ch) {
|
154
|
+
return str.find(ch) != string::npos;
|
155
|
+
}
|
156
|
+
|
157
|
+
inline uint16_t TwocharToUint16(char high, char low) {
|
158
|
+
return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
|
159
|
+
}
|
160
|
+
|
161
|
+
template <class Uint16Container>
|
162
|
+
bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
|
163
|
+
if(!str) {
|
164
|
+
return false;
|
165
|
+
}
|
166
|
+
char ch1, ch2;
|
167
|
+
uint16_t tmp;
|
168
|
+
vec.clear();
|
169
|
+
for(size_t i = 0; i < len;) {
|
170
|
+
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
171
|
+
vec.push_back(str[i]);
|
172
|
+
i++;
|
173
|
+
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
|
174
|
+
ch1 = (str[i] >> 2) & 0x07;
|
175
|
+
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
176
|
+
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
177
|
+
vec.push_back(tmp);
|
178
|
+
i += 2;
|
179
|
+
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
|
180
|
+
ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
|
181
|
+
ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
182
|
+
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
183
|
+
vec.push_back(tmp);
|
184
|
+
i += 3;
|
185
|
+
} else {
|
186
|
+
return false;
|
187
|
+
}
|
188
|
+
}
|
189
|
+
return true;
|
190
|
+
}
|
191
|
+
|
192
|
+
template <class Uint16Container>
|
193
|
+
bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
|
194
|
+
return Utf8ToUnicode(str.c_str(), str.size(), vec);
|
195
|
+
}
|
196
|
+
|
197
|
+
template <class Uint32Container>
|
198
|
+
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
|
199
|
+
uint32_t tmp;
|
200
|
+
vec.clear();
|
201
|
+
for(size_t i = 0; i < str.size();) {
|
202
|
+
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
203
|
+
// 7bit, total 7bit
|
204
|
+
tmp = (uint8_t)(str[i]) & 0x7f;
|
205
|
+
i++;
|
206
|
+
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
|
207
|
+
// 5bit, total 5bit
|
208
|
+
tmp = (uint8_t)(str[i]) & 0x1f;
|
209
|
+
|
210
|
+
// 6bit, total 11bit
|
211
|
+
tmp <<= 6;
|
212
|
+
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
213
|
+
i += 2;
|
214
|
+
} else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
|
215
|
+
// 4bit, total 4bit
|
216
|
+
tmp = (uint8_t)(str[i]) & 0x0f;
|
217
|
+
|
218
|
+
// 6bit, total 10bit
|
219
|
+
tmp <<= 6;
|
220
|
+
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
221
|
+
|
222
|
+
// 6bit, total 16bit
|
223
|
+
tmp <<= 6;
|
224
|
+
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
225
|
+
|
226
|
+
i += 3;
|
227
|
+
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
|
228
|
+
// 3bit, total 3bit
|
229
|
+
tmp = (uint8_t)(str[i]) & 0x07;
|
230
|
+
|
231
|
+
// 6bit, total 9bit
|
232
|
+
tmp <<= 6;
|
233
|
+
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
234
|
+
|
235
|
+
// 6bit, total 15bit
|
236
|
+
tmp <<= 6;
|
237
|
+
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
238
|
+
|
239
|
+
// 6bit, total 21bit
|
240
|
+
tmp <<= 6;
|
241
|
+
tmp |= (uint8_t)(str[i+3]) & 0x3f;
|
242
|
+
|
243
|
+
i += 4;
|
244
|
+
} else {
|
245
|
+
return false;
|
246
|
+
}
|
247
|
+
vec.push_back(tmp);
|
248
|
+
}
|
249
|
+
return true;
|
250
|
+
}
|
251
|
+
|
252
|
+
template <class Uint32ContainerConIter>
|
253
|
+
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
|
254
|
+
res.clear();
|
255
|
+
uint32_t ui;
|
256
|
+
while(begin != end) {
|
257
|
+
ui = *begin;
|
258
|
+
if(ui <= 0x7f) {
|
259
|
+
res += char(ui);
|
260
|
+
} else if(ui <= 0x7ff) {
|
261
|
+
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
262
|
+
res += char((ui & 0x3f) | 0x80);
|
263
|
+
} else if(ui <= 0xffff) {
|
264
|
+
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
265
|
+
res += char(((ui >> 6) & 0x3f) | 0x80);
|
266
|
+
res += char((ui & 0x3f) | 0x80);
|
267
|
+
} else {
|
268
|
+
res += char(((ui >> 18) & 0x03) | 0xf0);
|
269
|
+
res += char(((ui >> 12) & 0x3f) | 0x80);
|
270
|
+
res += char(((ui >> 6) & 0x3f) | 0x80);
|
271
|
+
res += char((ui & 0x3f) | 0x80);
|
272
|
+
}
|
273
|
+
begin ++;
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
template <class Uint16ContainerConIter>
|
278
|
+
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
279
|
+
res.clear();
|
280
|
+
uint16_t ui;
|
281
|
+
while(begin != end) {
|
282
|
+
ui = *begin;
|
283
|
+
if(ui <= 0x7f) {
|
284
|
+
res += char(ui);
|
285
|
+
} else if(ui <= 0x7ff) {
|
286
|
+
res += char(((ui>>6) & 0x1f) | 0xc0);
|
287
|
+
res += char((ui & 0x3f) | 0x80);
|
288
|
+
} else {
|
289
|
+
res += char(((ui >> 12) & 0x0f )| 0xe0);
|
290
|
+
res += char(((ui>>6) & 0x3f )| 0x80 );
|
291
|
+
res += char((ui & 0x3f) | 0x80);
|
292
|
+
}
|
293
|
+
begin ++;
|
294
|
+
}
|
295
|
+
}
|
296
|
+
|
297
|
+
|
298
|
+
template <class Uint16Container>
|
299
|
+
bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
|
300
|
+
vec.clear();
|
301
|
+
if(!str) {
|
302
|
+
return true;
|
303
|
+
}
|
304
|
+
size_t i = 0;
|
305
|
+
while(i < len) {
|
306
|
+
if(0 == (str[i] & 0x80)) {
|
307
|
+
vec.push_back(uint16_t(str[i]));
|
308
|
+
i++;
|
309
|
+
} else {
|
310
|
+
if(i + 1 < len) { //&& (str[i+1] & 0x80))
|
311
|
+
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
|
312
|
+
vec.push_back(tmp);
|
313
|
+
i += 2;
|
314
|
+
} else {
|
315
|
+
return false;
|
316
|
+
}
|
317
|
+
}
|
318
|
+
}
|
319
|
+
return true;
|
320
|
+
}
|
321
|
+
|
322
|
+
template <class Uint16Container>
|
323
|
+
bool GBKTrans(const string& str, Uint16Container& vec) {
|
324
|
+
return GBKTrans(str.c_str(), str.size(), vec);
|
325
|
+
}
|
326
|
+
|
327
|
+
template <class Uint16ContainerConIter>
|
328
|
+
void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
329
|
+
res.clear();
|
330
|
+
//pair<char, char> pa;
|
331
|
+
char first, second;
|
332
|
+
while(begin != end) {
|
333
|
+
//pa = uint16ToChar2(*begin);
|
334
|
+
first = ((*begin)>>8) & 0x00ff;
|
335
|
+
second = (*begin) & 0x00ff;
|
336
|
+
if(first & 0x80) {
|
337
|
+
res += first;
|
338
|
+
res += second;
|
339
|
+
} else {
|
340
|
+
res += second;
|
341
|
+
}
|
342
|
+
begin++;
|
343
|
+
}
|
344
|
+
}
|
345
|
+
|
346
|
+
/*
|
347
|
+
* format example: "%Y-%m-%d %H:%M:%S"
|
348
|
+
*/
|
349
|
+
inline void GetTime(const string& format, string& timeStr) {
|
350
|
+
time_t timeNow;
|
351
|
+
time(&timeNow);
|
352
|
+
timeStr.resize(64);
|
353
|
+
size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
|
354
|
+
timeStr.resize(len);
|
355
|
+
}
|
356
|
+
|
357
|
+
inline string PathJoin(const string& path1, const string& path2) {
|
358
|
+
if(EndsWith(path1, "/")) {
|
359
|
+
return path1 + path2;
|
360
|
+
}
|
361
|
+
return path1 + "/" + path2;
|
362
|
+
}
|
363
|
+
|
364
|
+
}
|
365
|
+
#endif
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#ifndef LIMONP_THREAD_HPP
|
2
|
+
#define LIMONP_THREAD_HPP
|
3
|
+
|
4
|
+
#include "Logging.hpp"
|
5
|
+
#include "NonCopyable.hpp"
|
6
|
+
|
7
|
+
namespace limonp {
|
8
|
+
|
9
|
+
class IThread: NonCopyable {
|
10
|
+
public:
|
11
|
+
IThread(): isStarted(false), isJoined(false) {
|
12
|
+
}
|
13
|
+
virtual ~IThread() {
|
14
|
+
if(isStarted && !isJoined) {
|
15
|
+
XCHECK(!pthread_detach(thread_));
|
16
|
+
}
|
17
|
+
};
|
18
|
+
|
19
|
+
virtual void Run() = 0;
|
20
|
+
void Start() {
|
21
|
+
XCHECK(!isStarted);
|
22
|
+
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
|
23
|
+
isStarted = true;
|
24
|
+
}
|
25
|
+
void Join() {
|
26
|
+
XCHECK(!isJoined);
|
27
|
+
XCHECK(!pthread_join(thread_, NULL));
|
28
|
+
isJoined = true;
|
29
|
+
}
|
30
|
+
private:
|
31
|
+
static void * Worker(void * data) {
|
32
|
+
IThread * ptr = (IThread* ) data;
|
33
|
+
ptr->Run();
|
34
|
+
return NULL;
|
35
|
+
}
|
36
|
+
|
37
|
+
pthread_t thread_;
|
38
|
+
bool isStarted;
|
39
|
+
bool isJoined;
|
40
|
+
}; // class IThread
|
41
|
+
|
42
|
+
} // namespace limonp
|
43
|
+
|
44
|
+
#endif // LIMONP_THREAD_HPP
|