cppjieba_rb 0.4.1 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +21 -0
  3. data/.github/workflows/linting.yml +30 -0
  4. data/.github/workflows/release.yml +42 -0
  5. data/.github/workflows/tests.yml +47 -0
  6. data/.gitignore +1 -0
  7. data/.rubocop.yml +45 -0
  8. data/.ruby-version +1 -0
  9. data/.yamllint +35 -0
  10. data/CHANGELOG.md +17 -0
  11. data/Gemfile +11 -0
  12. data/README.md +5 -5
  13. data/Rakefile +16 -7
  14. data/cppjieba_rb.gemspec +46 -33
  15. data/ext/cppjieba/.github/workflows/cmake.yml +52 -0
  16. data/ext/cppjieba/.github/workflows/stale-issues.yml +24 -0
  17. data/ext/cppjieba/.gitmodules +3 -0
  18. data/ext/cppjieba/{ChangeLog.md → CHANGELOG.md} +50 -1
  19. data/ext/cppjieba/CMakeLists.txt +11 -14
  20. data/ext/cppjieba/LICENSE +20 -0
  21. data/ext/cppjieba/README.md +9 -18
  22. data/ext/cppjieba/deps/limonp/.github/workflows/cmake.yml +43 -0
  23. data/ext/cppjieba/deps/limonp/.gitignore +9 -0
  24. data/ext/cppjieba/deps/limonp/CHANGELOG.md +160 -0
  25. data/ext/cppjieba/deps/limonp/CMakeLists.txt +61 -0
  26. data/ext/cppjieba/deps/limonp/LICENSE +20 -0
  27. data/ext/cppjieba/deps/limonp/README.md +38 -0
  28. data/ext/cppjieba/deps/limonp/{LocalVector.hpp → include/limonp/LocalVector.hpp} +3 -3
  29. data/ext/cppjieba/deps/limonp/{Logging.hpp → include/limonp/Logging.hpp} +17 -3
  30. data/ext/cppjieba/deps/limonp/{StringUtil.hpp → include/limonp/StringUtil.hpp} +31 -10
  31. data/ext/cppjieba/deps/limonp/test/CMakeLists.txt +8 -0
  32. data/ext/cppjieba/deps/limonp/test/demo.cpp +40 -0
  33. data/ext/cppjieba/deps/limonp/test/testdata/1.conf +5 -0
  34. data/ext/cppjieba/deps/limonp/test/testdata/StdExtension.data +3 -0
  35. data/ext/cppjieba/deps/limonp/test/testdata/dict.gbk +50 -0
  36. data/ext/cppjieba/deps/limonp/test/testdata/dict.utf8 +50 -0
  37. data/ext/cppjieba/deps/limonp/test/testdata/io_testfile +2 -0
  38. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.1.utf8 +93 -0
  39. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.utf8 +93 -0
  40. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.1.utf8 +67 -0
  41. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.2.utf8 +64 -0
  42. data/ext/cppjieba/deps/limonp/test/unittest/CMakeLists.txt +30 -0
  43. data/ext/cppjieba/deps/limonp/test/unittest/TArgvContext.cpp +16 -0
  44. data/ext/cppjieba/deps/limonp/test/unittest/TCastFloat.cpp +19 -0
  45. data/ext/cppjieba/deps/limonp/test/unittest/TClosure.cpp +85 -0
  46. data/ext/cppjieba/deps/limonp/test/unittest/TColorPrint.cpp +20 -0
  47. data/ext/cppjieba/deps/limonp/test/unittest/TConfig.cpp +17 -0
  48. data/ext/cppjieba/deps/limonp/test/unittest/TLocalVector.cpp +41 -0
  49. data/ext/cppjieba/deps/limonp/test/unittest/TLogging.cpp +12 -0
  50. data/ext/cppjieba/deps/limonp/test/unittest/TStdExtension.cpp +95 -0
  51. data/ext/cppjieba/deps/limonp/test/unittest/TStringUtil.cpp +183 -0
  52. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +9 -0
  53. data/ext/cppjieba/include/cppjieba/Jieba.hpp +4 -0
  54. data/ext/cppjieba/include/cppjieba/Trie.hpp +27 -1
  55. data/ext/cppjieba/test/CMakeLists.txt +4 -3
  56. data/ext/cppjieba/test/unittest/CMakeLists.txt +16 -7
  57. data/ext/cppjieba_rb/extconf.rb +11 -6
  58. data/ext/cppjieba_rb/internal.cc +1 -1
  59. data/lib/cppjieba_rb/segment.rb +4 -1
  60. data/lib/cppjieba_rb/version.rb +3 -1
  61. data/lib/cppjieba_rb.rb +12 -5
  62. data/test/test_keyword.rb +8 -8
  63. data/test/test_segment.rb +14 -10
  64. data/test/test_stop_word_filter.rb +5 -3
  65. data/test/test_tagging.rb +5 -2
  66. metadata +63 -140
  67. data/.travis.yml +0 -30
  68. data/ext/cppjieba/.travis.yml +0 -21
  69. data/ext/cppjieba/README_EN.md +0 -115
  70. data/ext/cppjieba/appveyor.yml +0 -32
  71. data/ext/cppjieba/deps/CMakeLists.txt +0 -1
  72. data/ext/cppjieba/deps/gtest/CMakeLists.txt +0 -5
  73. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +0 -283
  74. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +0 -230
  75. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +0 -1421
  76. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +0 -487
  77. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +0 -796
  78. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +0 -232
  79. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +0 -176
  80. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +0 -259
  81. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +0 -2155
  82. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +0 -358
  83. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +0 -58
  84. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +0 -308
  85. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +0 -210
  86. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +0 -1226
  87. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +0 -233
  88. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +0 -4822
  89. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +0 -301
  90. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +0 -619
  91. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +0 -1788
  92. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +0 -350
  93. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +0 -968
  94. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +0 -336
  95. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +0 -3330
  96. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +0 -296
  97. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +0 -681
  98. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +0 -509
  99. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  100. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +0 -48
  101. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +0 -1234
  102. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +0 -380
  103. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +0 -1038
  104. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +0 -746
  105. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +0 -356
  106. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +0 -110
  107. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +0 -110
  108. data/ext/cppjieba/deps/gtest/src/gtest.cc +0 -4898
  109. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +0 -49
  110. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +0 -67
  111. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +0 -65
  112. data/ext/cppjieba/deps/limonp/FileLock.hpp +0 -74
  113. data/ext/cppjieba/deps/limonp/Md5.hpp +0 -411
  114. data/ext/cppjieba/deps/limonp/MutexLock.hpp +0 -51
  115. data/ext/cppjieba/deps/limonp/Thread.hpp +0 -44
  116. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +0 -86
  117. data/ext/cppjieba/test/demo.cpp +0 -80
  118. /data/ext/cppjieba/deps/{gtest/src/.deps/.dirstamp → limonp/.gitmodules} +0 -0
  119. /data/ext/cppjieba/deps/limonp/{ArgvContext.hpp → include/limonp/ArgvContext.hpp} +0 -0
  120. /data/ext/cppjieba/deps/limonp/{Closure.hpp → include/limonp/Closure.hpp} +0 -0
  121. /data/ext/cppjieba/deps/limonp/{Colors.hpp → include/limonp/Colors.hpp} +0 -0
  122. /data/ext/cppjieba/deps/limonp/{Condition.hpp → include/limonp/Condition.hpp} +0 -0
  123. /data/ext/cppjieba/deps/limonp/{Config.hpp → include/limonp/Config.hpp} +0 -0
  124. /data/ext/cppjieba/deps/limonp/{ForcePublic.hpp → include/limonp/ForcePublic.hpp} +0 -0
  125. /data/ext/cppjieba/deps/limonp/{NonCopyable.hpp → include/limonp/NonCopyable.hpp} +0 -0
  126. /data/ext/cppjieba/deps/limonp/{StdExtension.hpp → include/limonp/StdExtension.hpp} +0 -0
  127. /data/ext/cppjieba/deps/{gtest/src/gtest_main.cc → limonp/test/unittest/gtest_main.cpp} +0 -0
@@ -1,44 +0,0 @@
1
- #ifndef LIMONP_THREAD_HPP
2
- #define LIMONP_THREAD_HPP
3
-
4
- #include "Logging.hpp"
5
- #include "NonCopyable.hpp"
6
-
7
- namespace limonp {
8
-
9
- class IThread: NonCopyable {
10
- public:
11
- IThread(): isStarted(false), isJoined(false) {
12
- }
13
- virtual ~IThread() {
14
- if(isStarted && !isJoined) {
15
- XCHECK(!pthread_detach(thread_));
16
- }
17
- };
18
-
19
- virtual void Run() = 0;
20
- void Start() {
21
- XCHECK(!isStarted);
22
- XCHECK(!pthread_create(&thread_, NULL, Worker, this));
23
- isStarted = true;
24
- }
25
- void Join() {
26
- XCHECK(!isJoined);
27
- XCHECK(!pthread_join(thread_, NULL));
28
- isJoined = true;
29
- }
30
- private:
31
- static void * Worker(void * data) {
32
- IThread * ptr = (IThread* ) data;
33
- ptr->Run();
34
- return NULL;
35
- }
36
-
37
- pthread_t thread_;
38
- bool isStarted;
39
- bool isJoined;
40
- }; // class IThread
41
-
42
- } // namespace limonp
43
-
44
- #endif // LIMONP_THREAD_HPP
@@ -1,86 +0,0 @@
1
- #ifndef LIMONP_THREAD_POOL_HPP
2
- #define LIMONP_THREAD_POOL_HPP
3
-
4
- #include "Thread.hpp"
5
- #include "BlockingQueue.hpp"
6
- #include "BoundedBlockingQueue.hpp"
7
- #include "Closure.hpp"
8
-
9
- namespace limonp {
10
-
11
- using namespace std;
12
-
13
- //class ThreadPool;
14
- class ThreadPool: NonCopyable {
15
- public:
16
- class Worker: public IThread {
17
- public:
18
- Worker(ThreadPool* pool): ptThreadPool_(pool) {
19
- assert(ptThreadPool_);
20
- }
21
- virtual ~Worker() {
22
- }
23
-
24
- virtual void Run() {
25
- while (true) {
26
- ClosureInterface* closure = ptThreadPool_->queue_.Pop();
27
- if (closure == NULL) {
28
- break;
29
- }
30
- try {
31
- closure->Run();
32
- } catch(std::exception& e) {
33
- XLOG(ERROR) << e.what();
34
- } catch(...) {
35
- XLOG(ERROR) << " unknown exception.";
36
- }
37
- delete closure;
38
- }
39
- }
40
- private:
41
- ThreadPool * ptThreadPool_;
42
- }; // class Worker
43
-
44
- ThreadPool(size_t thread_num)
45
- : threads_(thread_num),
46
- queue_(thread_num) {
47
- assert(thread_num);
48
- for(size_t i = 0; i < threads_.size(); i ++) {
49
- threads_[i] = new Worker(this);
50
- }
51
- }
52
- ~ThreadPool() {
53
- Stop();
54
- }
55
-
56
- void Start() {
57
- for(size_t i = 0; i < threads_.size(); i++) {
58
- threads_[i]->Start();
59
- }
60
- }
61
- void Stop() {
62
- for(size_t i = 0; i < threads_.size(); i ++) {
63
- queue_.Push(NULL);
64
- }
65
- for(size_t i = 0; i < threads_.size(); i ++) {
66
- threads_[i]->Join();
67
- delete threads_[i];
68
- }
69
- threads_.clear();
70
- }
71
-
72
- void Add(ClosureInterface* task) {
73
- assert(task);
74
- queue_.Push(task);
75
- }
76
-
77
- private:
78
- friend class Worker;
79
-
80
- vector<IThread*> threads_;
81
- BoundedBlockingQueue<ClosureInterface*> queue_;
82
- }; // class ThreadPool
83
-
84
- } // namespace limonp
85
-
86
- #endif // LIMONP_THREAD_POOL_HPP
@@ -1,80 +0,0 @@
1
- #include "cppjieba/Jieba.hpp"
2
-
3
- using namespace std;
4
-
5
- const char* const DICT_PATH = "../dict/jieba.dict.utf8";
6
- const char* const HMM_PATH = "../dict/hmm_model.utf8";
7
- const char* const USER_DICT_PATH = "../dict/user.dict.utf8";
8
- const char* const IDF_PATH = "../dict/idf.utf8";
9
- const char* const STOP_WORD_PATH = "../dict/stop_words.utf8";
10
-
11
- int main(int argc, char** argv) {
12
- cppjieba::Jieba jieba(DICT_PATH,
13
- HMM_PATH,
14
- USER_DICT_PATH,
15
- IDF_PATH,
16
- STOP_WORD_PATH);
17
- vector<string> words;
18
- vector<cppjieba::Word> jiebawords;
19
- string s;
20
- string result;
21
-
22
- s = "他来到了网易杭研大厦";
23
- cout << s << endl;
24
- cout << "[demo] Cut With HMM" << endl;
25
- jieba.Cut(s, words, true);
26
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
27
-
28
- cout << "[demo] Cut Without HMM " << endl;
29
- jieba.Cut(s, words, false);
30
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
31
-
32
- s = "我来到北京清华大学";
33
- cout << s << endl;
34
- cout << "[demo] CutAll" << endl;
35
- jieba.CutAll(s, words);
36
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
37
-
38
- s = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
39
- cout << s << endl;
40
- cout << "[demo] CutForSearch" << endl;
41
- jieba.CutForSearch(s, words);
42
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
43
-
44
- cout << "[demo] Insert User Word" << endl;
45
- jieba.Cut("男默女泪", words);
46
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
47
- jieba.InsertUserWord("男默女泪");
48
- jieba.Cut("男默女泪", words);
49
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
50
-
51
- cout << "[demo] CutForSearch Word With Offset" << endl;
52
- jieba.CutForSearch(s, jiebawords, true);
53
- cout << jiebawords << endl;
54
-
55
- cout << "[demo] Lookup Tag for Single Token" << endl;
56
- const int DemoTokenMaxLen = 32;
57
- char DemoTokens[][DemoTokenMaxLen] = {"拖拉机", "CEO", "123", "。"};
58
- vector<pair<string, string> > LookupTagres(sizeof(DemoTokens) / DemoTokenMaxLen);
59
- vector<pair<string, string> >::iterator it;
60
- for (it = LookupTagres.begin(); it != LookupTagres.end(); it++) {
61
- it->first = DemoTokens[it - LookupTagres.begin()];
62
- it->second = jieba.LookupTag(it->first);
63
- }
64
- cout << LookupTagres << endl;
65
-
66
- cout << "[demo] Tagging" << endl;
67
- vector<pair<string, string> > tagres;
68
- s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
69
- jieba.Tag(s, tagres);
70
- cout << s << endl;
71
- cout << tagres << endl;
72
-
73
- cout << "[demo] Keyword Extraction" << endl;
74
- const size_t topk = 5;
75
- vector<cppjieba::KeywordExtractor::Word> keywordres;
76
- jieba.extractor.Extract(s, keywordres, topk);
77
- cout << s << endl;
78
- cout << keywordres << endl;
79
- return EXIT_SUCCESS;
80
- }