cppjieba_rb 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +21 -0
- data/.github/workflows/linting.yml +30 -0
- data/.github/workflows/release.yml +42 -0
- data/.github/workflows/tests.yml +47 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +45 -0
- data/.ruby-version +1 -0
- data/.yamllint +35 -0
- data/CHANGELOG.md +17 -0
- data/Gemfile +11 -0
- data/README.md +5 -5
- data/Rakefile +16 -7
- data/cppjieba_rb.gemspec +46 -33
- data/ext/cppjieba/.github/workflows/cmake.yml +52 -0
- data/ext/cppjieba/.github/workflows/stale-issues.yml +24 -0
- data/ext/cppjieba/.gitmodules +3 -0
- data/ext/cppjieba/{ChangeLog.md → CHANGELOG.md} +50 -1
- data/ext/cppjieba/CMakeLists.txt +11 -14
- data/ext/cppjieba/LICENSE +20 -0
- data/ext/cppjieba/README.md +9 -18
- data/ext/cppjieba/deps/limonp/.github/workflows/cmake.yml +43 -0
- data/ext/cppjieba/deps/limonp/.gitignore +9 -0
- data/ext/cppjieba/deps/limonp/CHANGELOG.md +160 -0
- data/ext/cppjieba/deps/limonp/CMakeLists.txt +61 -0
- data/ext/cppjieba/deps/limonp/LICENSE +20 -0
- data/ext/cppjieba/deps/limonp/README.md +38 -0
- data/ext/cppjieba/deps/limonp/{LocalVector.hpp → include/limonp/LocalVector.hpp} +3 -3
- data/ext/cppjieba/deps/limonp/{Logging.hpp → include/limonp/Logging.hpp} +17 -3
- data/ext/cppjieba/deps/limonp/{StringUtil.hpp → include/limonp/StringUtil.hpp} +31 -10
- data/ext/cppjieba/deps/limonp/test/CMakeLists.txt +8 -0
- data/ext/cppjieba/deps/limonp/test/demo.cpp +40 -0
- data/ext/cppjieba/deps/limonp/test/testdata/1.conf +5 -0
- data/ext/cppjieba/deps/limonp/test/testdata/StdExtension.data +3 -0
- data/ext/cppjieba/deps/limonp/test/testdata/dict.gbk +50 -0
- data/ext/cppjieba/deps/limonp/test/testdata/dict.utf8 +50 -0
- data/ext/cppjieba/deps/limonp/test/testdata/io_testfile +2 -0
- data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.1.utf8 +93 -0
- data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.utf8 +93 -0
- data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.1.utf8 +67 -0
- data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.2.utf8 +64 -0
- data/ext/cppjieba/deps/limonp/test/unittest/CMakeLists.txt +30 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TArgvContext.cpp +16 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TCastFloat.cpp +19 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TClosure.cpp +85 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TColorPrint.cpp +20 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TConfig.cpp +17 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TLocalVector.cpp +41 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TLogging.cpp +12 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TStdExtension.cpp +95 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TStringUtil.cpp +183 -0
- data/ext/cppjieba/include/cppjieba/DictTrie.hpp +9 -0
- data/ext/cppjieba/include/cppjieba/Jieba.hpp +4 -0
- data/ext/cppjieba/include/cppjieba/Trie.hpp +27 -1
- data/ext/cppjieba/test/CMakeLists.txt +4 -3
- data/ext/cppjieba/test/unittest/CMakeLists.txt +16 -7
- data/ext/cppjieba_rb/extconf.rb +11 -6
- data/lib/cppjieba_rb/segment.rb +4 -1
- data/lib/cppjieba_rb/version.rb +3 -1
- data/lib/cppjieba_rb.rb +12 -5
- data/test/test_keyword.rb +8 -8
- data/test/test_segment.rb +14 -10
- data/test/test_stop_word_filter.rb +5 -3
- data/test/test_tagging.rb +5 -2
- metadata +63 -140
- data/.travis.yml +0 -30
- data/ext/cppjieba/.travis.yml +0 -21
- data/ext/cppjieba/README_EN.md +0 -115
- data/ext/cppjieba/appveyor.yml +0 -32
- data/ext/cppjieba/deps/CMakeLists.txt +0 -1
- data/ext/cppjieba/deps/gtest/CMakeLists.txt +0 -5
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +0 -283
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +0 -230
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +0 -1421
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +0 -487
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +0 -796
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +0 -232
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +0 -176
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +0 -259
- data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +0 -2155
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +0 -358
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +0 -58
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +0 -308
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +0 -210
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +0 -1226
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +0 -233
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +0 -4822
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +0 -301
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +0 -619
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +0 -1788
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +0 -350
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +0 -968
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +0 -336
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +0 -3330
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +0 -296
- data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +0 -681
- data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +0 -509
- data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/gtest-all.cc +0 -48
- data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +0 -1234
- data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +0 -380
- data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +0 -1038
- data/ext/cppjieba/deps/gtest/src/gtest-port.cc +0 -746
- data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +0 -356
- data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +0 -110
- data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +0 -110
- data/ext/cppjieba/deps/gtest/src/gtest.cc +0 -4898
- data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +0 -49
- data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +0 -67
- data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +0 -65
- data/ext/cppjieba/deps/limonp/FileLock.hpp +0 -74
- data/ext/cppjieba/deps/limonp/Md5.hpp +0 -411
- data/ext/cppjieba/deps/limonp/MutexLock.hpp +0 -51
- data/ext/cppjieba/deps/limonp/Thread.hpp +0 -44
- data/ext/cppjieba/deps/limonp/ThreadPool.hpp +0 -86
- data/ext/cppjieba/test/demo.cpp +0 -80
- /data/ext/cppjieba/deps/{gtest/src/.deps/.dirstamp → limonp/.gitmodules} +0 -0
- /data/ext/cppjieba/deps/limonp/{ArgvContext.hpp → include/limonp/ArgvContext.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{Closure.hpp → include/limonp/Closure.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{Colors.hpp → include/limonp/Colors.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{Condition.hpp → include/limonp/Condition.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{Config.hpp → include/limonp/Config.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{ForcePublic.hpp → include/limonp/ForcePublic.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{NonCopyable.hpp → include/limonp/NonCopyable.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{StdExtension.hpp → include/limonp/StdExtension.hpp} +0 -0
- /data/ext/cppjieba/deps/{gtest/src/gtest_main.cc → limonp/test/unittest/gtest_main.cpp} +0 -0
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
#ifndef LIMONP_THREAD_HPP
|
|
2
|
-
#define LIMONP_THREAD_HPP
|
|
3
|
-
|
|
4
|
-
#include "Logging.hpp"
|
|
5
|
-
#include "NonCopyable.hpp"
|
|
6
|
-
|
|
7
|
-
namespace limonp {
|
|
8
|
-
|
|
9
|
-
class IThread: NonCopyable {
|
|
10
|
-
public:
|
|
11
|
-
IThread(): isStarted(false), isJoined(false) {
|
|
12
|
-
}
|
|
13
|
-
virtual ~IThread() {
|
|
14
|
-
if(isStarted && !isJoined) {
|
|
15
|
-
XCHECK(!pthread_detach(thread_));
|
|
16
|
-
}
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
virtual void Run() = 0;
|
|
20
|
-
void Start() {
|
|
21
|
-
XCHECK(!isStarted);
|
|
22
|
-
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
|
|
23
|
-
isStarted = true;
|
|
24
|
-
}
|
|
25
|
-
void Join() {
|
|
26
|
-
XCHECK(!isJoined);
|
|
27
|
-
XCHECK(!pthread_join(thread_, NULL));
|
|
28
|
-
isJoined = true;
|
|
29
|
-
}
|
|
30
|
-
private:
|
|
31
|
-
static void * Worker(void * data) {
|
|
32
|
-
IThread * ptr = (IThread* ) data;
|
|
33
|
-
ptr->Run();
|
|
34
|
-
return NULL;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
pthread_t thread_;
|
|
38
|
-
bool isStarted;
|
|
39
|
-
bool isJoined;
|
|
40
|
-
}; // class IThread
|
|
41
|
-
|
|
42
|
-
} // namespace limonp
|
|
43
|
-
|
|
44
|
-
#endif // LIMONP_THREAD_HPP
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
#ifndef LIMONP_THREAD_POOL_HPP
|
|
2
|
-
#define LIMONP_THREAD_POOL_HPP
|
|
3
|
-
|
|
4
|
-
#include "Thread.hpp"
|
|
5
|
-
#include "BlockingQueue.hpp"
|
|
6
|
-
#include "BoundedBlockingQueue.hpp"
|
|
7
|
-
#include "Closure.hpp"
|
|
8
|
-
|
|
9
|
-
namespace limonp {
|
|
10
|
-
|
|
11
|
-
using namespace std;
|
|
12
|
-
|
|
13
|
-
//class ThreadPool;
|
|
14
|
-
class ThreadPool: NonCopyable {
|
|
15
|
-
public:
|
|
16
|
-
class Worker: public IThread {
|
|
17
|
-
public:
|
|
18
|
-
Worker(ThreadPool* pool): ptThreadPool_(pool) {
|
|
19
|
-
assert(ptThreadPool_);
|
|
20
|
-
}
|
|
21
|
-
virtual ~Worker() {
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
virtual void Run() {
|
|
25
|
-
while (true) {
|
|
26
|
-
ClosureInterface* closure = ptThreadPool_->queue_.Pop();
|
|
27
|
-
if (closure == NULL) {
|
|
28
|
-
break;
|
|
29
|
-
}
|
|
30
|
-
try {
|
|
31
|
-
closure->Run();
|
|
32
|
-
} catch(std::exception& e) {
|
|
33
|
-
XLOG(ERROR) << e.what();
|
|
34
|
-
} catch(...) {
|
|
35
|
-
XLOG(ERROR) << " unknown exception.";
|
|
36
|
-
}
|
|
37
|
-
delete closure;
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
private:
|
|
41
|
-
ThreadPool * ptThreadPool_;
|
|
42
|
-
}; // class Worker
|
|
43
|
-
|
|
44
|
-
ThreadPool(size_t thread_num)
|
|
45
|
-
: threads_(thread_num),
|
|
46
|
-
queue_(thread_num) {
|
|
47
|
-
assert(thread_num);
|
|
48
|
-
for(size_t i = 0; i < threads_.size(); i ++) {
|
|
49
|
-
threads_[i] = new Worker(this);
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
~ThreadPool() {
|
|
53
|
-
Stop();
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
void Start() {
|
|
57
|
-
for(size_t i = 0; i < threads_.size(); i++) {
|
|
58
|
-
threads_[i]->Start();
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
void Stop() {
|
|
62
|
-
for(size_t i = 0; i < threads_.size(); i ++) {
|
|
63
|
-
queue_.Push(NULL);
|
|
64
|
-
}
|
|
65
|
-
for(size_t i = 0; i < threads_.size(); i ++) {
|
|
66
|
-
threads_[i]->Join();
|
|
67
|
-
delete threads_[i];
|
|
68
|
-
}
|
|
69
|
-
threads_.clear();
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
void Add(ClosureInterface* task) {
|
|
73
|
-
assert(task);
|
|
74
|
-
queue_.Push(task);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
private:
|
|
78
|
-
friend class Worker;
|
|
79
|
-
|
|
80
|
-
vector<IThread*> threads_;
|
|
81
|
-
BoundedBlockingQueue<ClosureInterface*> queue_;
|
|
82
|
-
}; // class ThreadPool
|
|
83
|
-
|
|
84
|
-
} // namespace limonp
|
|
85
|
-
|
|
86
|
-
#endif // LIMONP_THREAD_POOL_HPP
|
data/ext/cppjieba/test/demo.cpp
DELETED
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
#include "cppjieba/Jieba.hpp"
|
|
2
|
-
|
|
3
|
-
using namespace std;
|
|
4
|
-
|
|
5
|
-
const char* const DICT_PATH = "../dict/jieba.dict.utf8";
|
|
6
|
-
const char* const HMM_PATH = "../dict/hmm_model.utf8";
|
|
7
|
-
const char* const USER_DICT_PATH = "../dict/user.dict.utf8";
|
|
8
|
-
const char* const IDF_PATH = "../dict/idf.utf8";
|
|
9
|
-
const char* const STOP_WORD_PATH = "../dict/stop_words.utf8";
|
|
10
|
-
|
|
11
|
-
int main(int argc, char** argv) {
|
|
12
|
-
cppjieba::Jieba jieba(DICT_PATH,
|
|
13
|
-
HMM_PATH,
|
|
14
|
-
USER_DICT_PATH,
|
|
15
|
-
IDF_PATH,
|
|
16
|
-
STOP_WORD_PATH);
|
|
17
|
-
vector<string> words;
|
|
18
|
-
vector<cppjieba::Word> jiebawords;
|
|
19
|
-
string s;
|
|
20
|
-
string result;
|
|
21
|
-
|
|
22
|
-
s = "他来到了网易杭研大厦";
|
|
23
|
-
cout << s << endl;
|
|
24
|
-
cout << "[demo] Cut With HMM" << endl;
|
|
25
|
-
jieba.Cut(s, words, true);
|
|
26
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
|
27
|
-
|
|
28
|
-
cout << "[demo] Cut Without HMM " << endl;
|
|
29
|
-
jieba.Cut(s, words, false);
|
|
30
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
|
31
|
-
|
|
32
|
-
s = "我来到北京清华大学";
|
|
33
|
-
cout << s << endl;
|
|
34
|
-
cout << "[demo] CutAll" << endl;
|
|
35
|
-
jieba.CutAll(s, words);
|
|
36
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
|
37
|
-
|
|
38
|
-
s = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
|
|
39
|
-
cout << s << endl;
|
|
40
|
-
cout << "[demo] CutForSearch" << endl;
|
|
41
|
-
jieba.CutForSearch(s, words);
|
|
42
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
|
43
|
-
|
|
44
|
-
cout << "[demo] Insert User Word" << endl;
|
|
45
|
-
jieba.Cut("男默女泪", words);
|
|
46
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
|
47
|
-
jieba.InsertUserWord("男默女泪");
|
|
48
|
-
jieba.Cut("男默女泪", words);
|
|
49
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
|
50
|
-
|
|
51
|
-
cout << "[demo] CutForSearch Word With Offset" << endl;
|
|
52
|
-
jieba.CutForSearch(s, jiebawords, true);
|
|
53
|
-
cout << jiebawords << endl;
|
|
54
|
-
|
|
55
|
-
cout << "[demo] Lookup Tag for Single Token" << endl;
|
|
56
|
-
const int DemoTokenMaxLen = 32;
|
|
57
|
-
char DemoTokens[][DemoTokenMaxLen] = {"拖拉机", "CEO", "123", "。"};
|
|
58
|
-
vector<pair<string, string> > LookupTagres(sizeof(DemoTokens) / DemoTokenMaxLen);
|
|
59
|
-
vector<pair<string, string> >::iterator it;
|
|
60
|
-
for (it = LookupTagres.begin(); it != LookupTagres.end(); it++) {
|
|
61
|
-
it->first = DemoTokens[it - LookupTagres.begin()];
|
|
62
|
-
it->second = jieba.LookupTag(it->first);
|
|
63
|
-
}
|
|
64
|
-
cout << LookupTagres << endl;
|
|
65
|
-
|
|
66
|
-
cout << "[demo] Tagging" << endl;
|
|
67
|
-
vector<pair<string, string> > tagres;
|
|
68
|
-
s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
|
|
69
|
-
jieba.Tag(s, tagres);
|
|
70
|
-
cout << s << endl;
|
|
71
|
-
cout << tagres << endl;
|
|
72
|
-
|
|
73
|
-
cout << "[demo] Keyword Extraction" << endl;
|
|
74
|
-
const size_t topk = 5;
|
|
75
|
-
vector<cppjieba::KeywordExtractor::Word> keywordres;
|
|
76
|
-
jieba.extractor.Extract(s, keywordres, topk);
|
|
77
|
-
cout << s << endl;
|
|
78
|
-
cout << keywordres << endl;
|
|
79
|
-
return EXIT_SUCCESS;
|
|
80
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|