cppjieba_rb 0.4.1 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.editorconfig +21 -0
- data/.github/workflows/linting.yml +30 -0
- data/.github/workflows/release.yml +42 -0
- data/.github/workflows/tests.yml +47 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +45 -0
- data/.ruby-version +1 -0
- data/.yamllint +35 -0
- data/CHANGELOG.md +17 -0
- data/Gemfile +11 -0
- data/README.md +5 -5
- data/Rakefile +16 -7
- data/cppjieba_rb.gemspec +46 -33
- data/ext/cppjieba/.github/workflows/cmake.yml +52 -0
- data/ext/cppjieba/.github/workflows/stale-issues.yml +24 -0
- data/ext/cppjieba/.gitmodules +3 -0
- data/ext/cppjieba/{ChangeLog.md → CHANGELOG.md} +50 -1
- data/ext/cppjieba/CMakeLists.txt +11 -14
- data/ext/cppjieba/LICENSE +20 -0
- data/ext/cppjieba/README.md +9 -18
- data/ext/cppjieba/deps/limonp/.github/workflows/cmake.yml +43 -0
- data/ext/cppjieba/deps/limonp/.gitignore +9 -0
- data/ext/cppjieba/deps/limonp/CHANGELOG.md +160 -0
- data/ext/cppjieba/deps/limonp/CMakeLists.txt +61 -0
- data/ext/cppjieba/deps/limonp/LICENSE +20 -0
- data/ext/cppjieba/deps/limonp/README.md +38 -0
- data/ext/cppjieba/deps/limonp/{LocalVector.hpp → include/limonp/LocalVector.hpp} +3 -3
- data/ext/cppjieba/deps/limonp/{Logging.hpp → include/limonp/Logging.hpp} +17 -3
- data/ext/cppjieba/deps/limonp/{StringUtil.hpp → include/limonp/StringUtil.hpp} +31 -10
- data/ext/cppjieba/deps/limonp/test/CMakeLists.txt +8 -0
- data/ext/cppjieba/deps/limonp/test/demo.cpp +40 -0
- data/ext/cppjieba/deps/limonp/test/testdata/1.conf +5 -0
- data/ext/cppjieba/deps/limonp/test/testdata/StdExtension.data +3 -0
- data/ext/cppjieba/deps/limonp/test/testdata/dict.gbk +50 -0
- data/ext/cppjieba/deps/limonp/test/testdata/dict.utf8 +50 -0
- data/ext/cppjieba/deps/limonp/test/testdata/io_testfile +2 -0
- data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.1.utf8 +93 -0
- data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.utf8 +93 -0
- data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.1.utf8 +67 -0
- data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.2.utf8 +64 -0
- data/ext/cppjieba/deps/limonp/test/unittest/CMakeLists.txt +30 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TArgvContext.cpp +16 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TCastFloat.cpp +19 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TClosure.cpp +85 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TColorPrint.cpp +20 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TConfig.cpp +17 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TLocalVector.cpp +41 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TLogging.cpp +12 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TStdExtension.cpp +95 -0
- data/ext/cppjieba/deps/limonp/test/unittest/TStringUtil.cpp +183 -0
- data/ext/cppjieba/include/cppjieba/DictTrie.hpp +9 -0
- data/ext/cppjieba/include/cppjieba/Jieba.hpp +4 -0
- data/ext/cppjieba/include/cppjieba/Trie.hpp +27 -1
- data/ext/cppjieba/test/CMakeLists.txt +4 -3
- data/ext/cppjieba/test/unittest/CMakeLists.txt +16 -7
- data/ext/cppjieba_rb/extconf.rb +11 -6
- data/ext/cppjieba_rb/internal.cc +1 -1
- data/lib/cppjieba_rb/segment.rb +4 -1
- data/lib/cppjieba_rb/version.rb +3 -1
- data/lib/cppjieba_rb.rb +12 -5
- data/test/test_keyword.rb +8 -8
- data/test/test_segment.rb +14 -10
- data/test/test_stop_word_filter.rb +5 -3
- data/test/test_tagging.rb +5 -2
- metadata +63 -140
- data/.travis.yml +0 -30
- data/ext/cppjieba/.travis.yml +0 -21
- data/ext/cppjieba/README_EN.md +0 -115
- data/ext/cppjieba/appveyor.yml +0 -32
- data/ext/cppjieba/deps/CMakeLists.txt +0 -1
- data/ext/cppjieba/deps/gtest/CMakeLists.txt +0 -5
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +0 -283
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +0 -230
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +0 -1421
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +0 -487
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +0 -796
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +0 -232
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +0 -176
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +0 -259
- data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +0 -2155
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +0 -358
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +0 -58
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +0 -308
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +0 -210
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +0 -1226
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +0 -233
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +0 -4822
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +0 -301
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +0 -619
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +0 -1788
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +0 -350
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +0 -968
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +0 -336
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +0 -3330
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +0 -296
- data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +0 -681
- data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +0 -509
- data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/gtest-all.cc +0 -48
- data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +0 -1234
- data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +0 -380
- data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +0 -1038
- data/ext/cppjieba/deps/gtest/src/gtest-port.cc +0 -746
- data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +0 -356
- data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +0 -110
- data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +0 -110
- data/ext/cppjieba/deps/gtest/src/gtest.cc +0 -4898
- data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +0 -49
- data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +0 -67
- data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +0 -65
- data/ext/cppjieba/deps/limonp/FileLock.hpp +0 -74
- data/ext/cppjieba/deps/limonp/Md5.hpp +0 -411
- data/ext/cppjieba/deps/limonp/MutexLock.hpp +0 -51
- data/ext/cppjieba/deps/limonp/Thread.hpp +0 -44
- data/ext/cppjieba/deps/limonp/ThreadPool.hpp +0 -86
- data/ext/cppjieba/test/demo.cpp +0 -80
- /data/ext/cppjieba/deps/{gtest/src/.deps/.dirstamp → limonp/.gitmodules} +0 -0
- /data/ext/cppjieba/deps/limonp/{ArgvContext.hpp → include/limonp/ArgvContext.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{Closure.hpp → include/limonp/Closure.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{Colors.hpp → include/limonp/Colors.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{Condition.hpp → include/limonp/Condition.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{Config.hpp → include/limonp/Config.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{ForcePublic.hpp → include/limonp/ForcePublic.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{NonCopyable.hpp → include/limonp/NonCopyable.hpp} +0 -0
- /data/ext/cppjieba/deps/limonp/{StdExtension.hpp → include/limonp/StdExtension.hpp} +0 -0
- /data/ext/cppjieba/deps/{gtest/src/gtest_main.cc → limonp/test/unittest/gtest_main.cpp} +0 -0
@@ -1,44 +0,0 @@
|
|
1
|
-
#ifndef LIMONP_THREAD_HPP
|
2
|
-
#define LIMONP_THREAD_HPP
|
3
|
-
|
4
|
-
#include "Logging.hpp"
|
5
|
-
#include "NonCopyable.hpp"
|
6
|
-
|
7
|
-
namespace limonp {
|
8
|
-
|
9
|
-
class IThread: NonCopyable {
|
10
|
-
public:
|
11
|
-
IThread(): isStarted(false), isJoined(false) {
|
12
|
-
}
|
13
|
-
virtual ~IThread() {
|
14
|
-
if(isStarted && !isJoined) {
|
15
|
-
XCHECK(!pthread_detach(thread_));
|
16
|
-
}
|
17
|
-
};
|
18
|
-
|
19
|
-
virtual void Run() = 0;
|
20
|
-
void Start() {
|
21
|
-
XCHECK(!isStarted);
|
22
|
-
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
|
23
|
-
isStarted = true;
|
24
|
-
}
|
25
|
-
void Join() {
|
26
|
-
XCHECK(!isJoined);
|
27
|
-
XCHECK(!pthread_join(thread_, NULL));
|
28
|
-
isJoined = true;
|
29
|
-
}
|
30
|
-
private:
|
31
|
-
static void * Worker(void * data) {
|
32
|
-
IThread * ptr = (IThread* ) data;
|
33
|
-
ptr->Run();
|
34
|
-
return NULL;
|
35
|
-
}
|
36
|
-
|
37
|
-
pthread_t thread_;
|
38
|
-
bool isStarted;
|
39
|
-
bool isJoined;
|
40
|
-
}; // class IThread
|
41
|
-
|
42
|
-
} // namespace limonp
|
43
|
-
|
44
|
-
#endif // LIMONP_THREAD_HPP
|
@@ -1,86 +0,0 @@
|
|
1
|
-
#ifndef LIMONP_THREAD_POOL_HPP
|
2
|
-
#define LIMONP_THREAD_POOL_HPP
|
3
|
-
|
4
|
-
#include "Thread.hpp"
|
5
|
-
#include "BlockingQueue.hpp"
|
6
|
-
#include "BoundedBlockingQueue.hpp"
|
7
|
-
#include "Closure.hpp"
|
8
|
-
|
9
|
-
namespace limonp {
|
10
|
-
|
11
|
-
using namespace std;
|
12
|
-
|
13
|
-
//class ThreadPool;
|
14
|
-
class ThreadPool: NonCopyable {
|
15
|
-
public:
|
16
|
-
class Worker: public IThread {
|
17
|
-
public:
|
18
|
-
Worker(ThreadPool* pool): ptThreadPool_(pool) {
|
19
|
-
assert(ptThreadPool_);
|
20
|
-
}
|
21
|
-
virtual ~Worker() {
|
22
|
-
}
|
23
|
-
|
24
|
-
virtual void Run() {
|
25
|
-
while (true) {
|
26
|
-
ClosureInterface* closure = ptThreadPool_->queue_.Pop();
|
27
|
-
if (closure == NULL) {
|
28
|
-
break;
|
29
|
-
}
|
30
|
-
try {
|
31
|
-
closure->Run();
|
32
|
-
} catch(std::exception& e) {
|
33
|
-
XLOG(ERROR) << e.what();
|
34
|
-
} catch(...) {
|
35
|
-
XLOG(ERROR) << " unknown exception.";
|
36
|
-
}
|
37
|
-
delete closure;
|
38
|
-
}
|
39
|
-
}
|
40
|
-
private:
|
41
|
-
ThreadPool * ptThreadPool_;
|
42
|
-
}; // class Worker
|
43
|
-
|
44
|
-
ThreadPool(size_t thread_num)
|
45
|
-
: threads_(thread_num),
|
46
|
-
queue_(thread_num) {
|
47
|
-
assert(thread_num);
|
48
|
-
for(size_t i = 0; i < threads_.size(); i ++) {
|
49
|
-
threads_[i] = new Worker(this);
|
50
|
-
}
|
51
|
-
}
|
52
|
-
~ThreadPool() {
|
53
|
-
Stop();
|
54
|
-
}
|
55
|
-
|
56
|
-
void Start() {
|
57
|
-
for(size_t i = 0; i < threads_.size(); i++) {
|
58
|
-
threads_[i]->Start();
|
59
|
-
}
|
60
|
-
}
|
61
|
-
void Stop() {
|
62
|
-
for(size_t i = 0; i < threads_.size(); i ++) {
|
63
|
-
queue_.Push(NULL);
|
64
|
-
}
|
65
|
-
for(size_t i = 0; i < threads_.size(); i ++) {
|
66
|
-
threads_[i]->Join();
|
67
|
-
delete threads_[i];
|
68
|
-
}
|
69
|
-
threads_.clear();
|
70
|
-
}
|
71
|
-
|
72
|
-
void Add(ClosureInterface* task) {
|
73
|
-
assert(task);
|
74
|
-
queue_.Push(task);
|
75
|
-
}
|
76
|
-
|
77
|
-
private:
|
78
|
-
friend class Worker;
|
79
|
-
|
80
|
-
vector<IThread*> threads_;
|
81
|
-
BoundedBlockingQueue<ClosureInterface*> queue_;
|
82
|
-
}; // class ThreadPool
|
83
|
-
|
84
|
-
} // namespace limonp
|
85
|
-
|
86
|
-
#endif // LIMONP_THREAD_POOL_HPP
|
data/ext/cppjieba/test/demo.cpp
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
#include "cppjieba/Jieba.hpp"
|
2
|
-
|
3
|
-
using namespace std;
|
4
|
-
|
5
|
-
const char* const DICT_PATH = "../dict/jieba.dict.utf8";
|
6
|
-
const char* const HMM_PATH = "../dict/hmm_model.utf8";
|
7
|
-
const char* const USER_DICT_PATH = "../dict/user.dict.utf8";
|
8
|
-
const char* const IDF_PATH = "../dict/idf.utf8";
|
9
|
-
const char* const STOP_WORD_PATH = "../dict/stop_words.utf8";
|
10
|
-
|
11
|
-
int main(int argc, char** argv) {
|
12
|
-
cppjieba::Jieba jieba(DICT_PATH,
|
13
|
-
HMM_PATH,
|
14
|
-
USER_DICT_PATH,
|
15
|
-
IDF_PATH,
|
16
|
-
STOP_WORD_PATH);
|
17
|
-
vector<string> words;
|
18
|
-
vector<cppjieba::Word> jiebawords;
|
19
|
-
string s;
|
20
|
-
string result;
|
21
|
-
|
22
|
-
s = "他来到了网易杭研大厦";
|
23
|
-
cout << s << endl;
|
24
|
-
cout << "[demo] Cut With HMM" << endl;
|
25
|
-
jieba.Cut(s, words, true);
|
26
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
27
|
-
|
28
|
-
cout << "[demo] Cut Without HMM " << endl;
|
29
|
-
jieba.Cut(s, words, false);
|
30
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
31
|
-
|
32
|
-
s = "我来到北京清华大学";
|
33
|
-
cout << s << endl;
|
34
|
-
cout << "[demo] CutAll" << endl;
|
35
|
-
jieba.CutAll(s, words);
|
36
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
37
|
-
|
38
|
-
s = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
|
39
|
-
cout << s << endl;
|
40
|
-
cout << "[demo] CutForSearch" << endl;
|
41
|
-
jieba.CutForSearch(s, words);
|
42
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
43
|
-
|
44
|
-
cout << "[demo] Insert User Word" << endl;
|
45
|
-
jieba.Cut("男默女泪", words);
|
46
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
47
|
-
jieba.InsertUserWord("男默女泪");
|
48
|
-
jieba.Cut("男默女泪", words);
|
49
|
-
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
50
|
-
|
51
|
-
cout << "[demo] CutForSearch Word With Offset" << endl;
|
52
|
-
jieba.CutForSearch(s, jiebawords, true);
|
53
|
-
cout << jiebawords << endl;
|
54
|
-
|
55
|
-
cout << "[demo] Lookup Tag for Single Token" << endl;
|
56
|
-
const int DemoTokenMaxLen = 32;
|
57
|
-
char DemoTokens[][DemoTokenMaxLen] = {"拖拉机", "CEO", "123", "。"};
|
58
|
-
vector<pair<string, string> > LookupTagres(sizeof(DemoTokens) / DemoTokenMaxLen);
|
59
|
-
vector<pair<string, string> >::iterator it;
|
60
|
-
for (it = LookupTagres.begin(); it != LookupTagres.end(); it++) {
|
61
|
-
it->first = DemoTokens[it - LookupTagres.begin()];
|
62
|
-
it->second = jieba.LookupTag(it->first);
|
63
|
-
}
|
64
|
-
cout << LookupTagres << endl;
|
65
|
-
|
66
|
-
cout << "[demo] Tagging" << endl;
|
67
|
-
vector<pair<string, string> > tagres;
|
68
|
-
s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
|
69
|
-
jieba.Tag(s, tagres);
|
70
|
-
cout << s << endl;
|
71
|
-
cout << tagres << endl;
|
72
|
-
|
73
|
-
cout << "[demo] Keyword Extraction" << endl;
|
74
|
-
const size_t topk = 5;
|
75
|
-
vector<cppjieba::KeywordExtractor::Word> keywordres;
|
76
|
-
jieba.extractor.Extract(s, keywordres, topk);
|
77
|
-
cout << s << endl;
|
78
|
-
cout << keywordres << endl;
|
79
|
-
return EXIT_SUCCESS;
|
80
|
-
}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|