cppjieba_rb 0.4.1 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +21 -0
  3. data/.github/workflows/linting.yml +30 -0
  4. data/.github/workflows/release.yml +42 -0
  5. data/.github/workflows/tests.yml +47 -0
  6. data/.gitignore +1 -0
  7. data/.rubocop.yml +45 -0
  8. data/.ruby-version +1 -0
  9. data/.yamllint +35 -0
  10. data/CHANGELOG.md +17 -0
  11. data/Gemfile +11 -0
  12. data/README.md +5 -5
  13. data/Rakefile +16 -7
  14. data/cppjieba_rb.gemspec +46 -33
  15. data/ext/cppjieba/.github/workflows/cmake.yml +52 -0
  16. data/ext/cppjieba/.github/workflows/stale-issues.yml +24 -0
  17. data/ext/cppjieba/.gitmodules +3 -0
  18. data/ext/cppjieba/{ChangeLog.md → CHANGELOG.md} +50 -1
  19. data/ext/cppjieba/CMakeLists.txt +11 -14
  20. data/ext/cppjieba/LICENSE +20 -0
  21. data/ext/cppjieba/README.md +9 -18
  22. data/ext/cppjieba/deps/limonp/.github/workflows/cmake.yml +43 -0
  23. data/ext/cppjieba/deps/limonp/.gitignore +9 -0
  24. data/ext/cppjieba/deps/limonp/CHANGELOG.md +160 -0
  25. data/ext/cppjieba/deps/limonp/CMakeLists.txt +61 -0
  26. data/ext/cppjieba/deps/limonp/LICENSE +20 -0
  27. data/ext/cppjieba/deps/limonp/README.md +38 -0
  28. data/ext/cppjieba/deps/limonp/{LocalVector.hpp → include/limonp/LocalVector.hpp} +3 -3
  29. data/ext/cppjieba/deps/limonp/{Logging.hpp → include/limonp/Logging.hpp} +17 -3
  30. data/ext/cppjieba/deps/limonp/{StringUtil.hpp → include/limonp/StringUtil.hpp} +31 -10
  31. data/ext/cppjieba/deps/limonp/test/CMakeLists.txt +8 -0
  32. data/ext/cppjieba/deps/limonp/test/demo.cpp +40 -0
  33. data/ext/cppjieba/deps/limonp/test/testdata/1.conf +5 -0
  34. data/ext/cppjieba/deps/limonp/test/testdata/StdExtension.data +3 -0
  35. data/ext/cppjieba/deps/limonp/test/testdata/dict.gbk +50 -0
  36. data/ext/cppjieba/deps/limonp/test/testdata/dict.utf8 +50 -0
  37. data/ext/cppjieba/deps/limonp/test/testdata/io_testfile +2 -0
  38. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.1.utf8 +93 -0
  39. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.utf8 +93 -0
  40. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.1.utf8 +67 -0
  41. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.2.utf8 +64 -0
  42. data/ext/cppjieba/deps/limonp/test/unittest/CMakeLists.txt +30 -0
  43. data/ext/cppjieba/deps/limonp/test/unittest/TArgvContext.cpp +16 -0
  44. data/ext/cppjieba/deps/limonp/test/unittest/TCastFloat.cpp +19 -0
  45. data/ext/cppjieba/deps/limonp/test/unittest/TClosure.cpp +85 -0
  46. data/ext/cppjieba/deps/limonp/test/unittest/TColorPrint.cpp +20 -0
  47. data/ext/cppjieba/deps/limonp/test/unittest/TConfig.cpp +17 -0
  48. data/ext/cppjieba/deps/limonp/test/unittest/TLocalVector.cpp +41 -0
  49. data/ext/cppjieba/deps/limonp/test/unittest/TLogging.cpp +12 -0
  50. data/ext/cppjieba/deps/limonp/test/unittest/TStdExtension.cpp +95 -0
  51. data/ext/cppjieba/deps/limonp/test/unittest/TStringUtil.cpp +183 -0
  52. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +9 -0
  53. data/ext/cppjieba/include/cppjieba/Jieba.hpp +4 -0
  54. data/ext/cppjieba/include/cppjieba/Trie.hpp +27 -1
  55. data/ext/cppjieba/test/CMakeLists.txt +4 -3
  56. data/ext/cppjieba/test/unittest/CMakeLists.txt +16 -7
  57. data/ext/cppjieba_rb/extconf.rb +11 -6
  58. data/ext/cppjieba_rb/internal.cc +1 -1
  59. data/lib/cppjieba_rb/segment.rb +4 -1
  60. data/lib/cppjieba_rb/version.rb +3 -1
  61. data/lib/cppjieba_rb.rb +12 -5
  62. data/test/test_keyword.rb +8 -8
  63. data/test/test_segment.rb +14 -10
  64. data/test/test_stop_word_filter.rb +5 -3
  65. data/test/test_tagging.rb +5 -2
  66. metadata +63 -140
  67. data/.travis.yml +0 -30
  68. data/ext/cppjieba/.travis.yml +0 -21
  69. data/ext/cppjieba/README_EN.md +0 -115
  70. data/ext/cppjieba/appveyor.yml +0 -32
  71. data/ext/cppjieba/deps/CMakeLists.txt +0 -1
  72. data/ext/cppjieba/deps/gtest/CMakeLists.txt +0 -5
  73. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +0 -283
  74. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +0 -230
  75. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +0 -1421
  76. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +0 -487
  77. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +0 -796
  78. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +0 -232
  79. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +0 -176
  80. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +0 -259
  81. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +0 -2155
  82. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +0 -358
  83. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +0 -58
  84. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +0 -308
  85. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +0 -210
  86. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +0 -1226
  87. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +0 -233
  88. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +0 -4822
  89. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +0 -301
  90. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +0 -619
  91. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +0 -1788
  92. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +0 -350
  93. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +0 -968
  94. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +0 -336
  95. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +0 -3330
  96. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +0 -296
  97. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +0 -681
  98. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +0 -509
  99. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  100. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +0 -48
  101. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +0 -1234
  102. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +0 -380
  103. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +0 -1038
  104. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +0 -746
  105. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +0 -356
  106. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +0 -110
  107. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +0 -110
  108. data/ext/cppjieba/deps/gtest/src/gtest.cc +0 -4898
  109. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +0 -49
  110. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +0 -67
  111. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +0 -65
  112. data/ext/cppjieba/deps/limonp/FileLock.hpp +0 -74
  113. data/ext/cppjieba/deps/limonp/Md5.hpp +0 -411
  114. data/ext/cppjieba/deps/limonp/MutexLock.hpp +0 -51
  115. data/ext/cppjieba/deps/limonp/Thread.hpp +0 -44
  116. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +0 -86
  117. data/ext/cppjieba/test/demo.cpp +0 -80
  118. /data/ext/cppjieba/deps/{gtest/src/.deps/.dirstamp → limonp/.gitmodules} +0 -0
  119. /data/ext/cppjieba/deps/limonp/{ArgvContext.hpp → include/limonp/ArgvContext.hpp} +0 -0
  120. /data/ext/cppjieba/deps/limonp/{Closure.hpp → include/limonp/Closure.hpp} +0 -0
  121. /data/ext/cppjieba/deps/limonp/{Colors.hpp → include/limonp/Colors.hpp} +0 -0
  122. /data/ext/cppjieba/deps/limonp/{Condition.hpp → include/limonp/Condition.hpp} +0 -0
  123. /data/ext/cppjieba/deps/limonp/{Config.hpp → include/limonp/Config.hpp} +0 -0
  124. /data/ext/cppjieba/deps/limonp/{ForcePublic.hpp → include/limonp/ForcePublic.hpp} +0 -0
  125. /data/ext/cppjieba/deps/limonp/{NonCopyable.hpp → include/limonp/NonCopyable.hpp} +0 -0
  126. /data/ext/cppjieba/deps/limonp/{StdExtension.hpp → include/limonp/StdExtension.hpp} +0 -0
  127. /data/ext/cppjieba/deps/{gtest/src/gtest_main.cc → limonp/test/unittest/gtest_main.cpp} +0 -0
@@ -1,44 +0,0 @@
1
- #ifndef LIMONP_THREAD_HPP
2
- #define LIMONP_THREAD_HPP
3
-
4
- #include "Logging.hpp"
5
- #include "NonCopyable.hpp"
6
-
7
- namespace limonp {
8
-
9
- class IThread: NonCopyable {
10
- public:
11
- IThread(): isStarted(false), isJoined(false) {
12
- }
13
- virtual ~IThread() {
14
- if(isStarted && !isJoined) {
15
- XCHECK(!pthread_detach(thread_));
16
- }
17
- };
18
-
19
- virtual void Run() = 0;
20
- void Start() {
21
- XCHECK(!isStarted);
22
- XCHECK(!pthread_create(&thread_, NULL, Worker, this));
23
- isStarted = true;
24
- }
25
- void Join() {
26
- XCHECK(!isJoined);
27
- XCHECK(!pthread_join(thread_, NULL));
28
- isJoined = true;
29
- }
30
- private:
31
- static void * Worker(void * data) {
32
- IThread * ptr = (IThread* ) data;
33
- ptr->Run();
34
- return NULL;
35
- }
36
-
37
- pthread_t thread_;
38
- bool isStarted;
39
- bool isJoined;
40
- }; // class IThread
41
-
42
- } // namespace limonp
43
-
44
- #endif // LIMONP_THREAD_HPP
@@ -1,86 +0,0 @@
1
- #ifndef LIMONP_THREAD_POOL_HPP
2
- #define LIMONP_THREAD_POOL_HPP
3
-
4
- #include "Thread.hpp"
5
- #include "BlockingQueue.hpp"
6
- #include "BoundedBlockingQueue.hpp"
7
- #include "Closure.hpp"
8
-
9
- namespace limonp {
10
-
11
- using namespace std;
12
-
13
- //class ThreadPool;
14
- class ThreadPool: NonCopyable {
15
- public:
16
- class Worker: public IThread {
17
- public:
18
- Worker(ThreadPool* pool): ptThreadPool_(pool) {
19
- assert(ptThreadPool_);
20
- }
21
- virtual ~Worker() {
22
- }
23
-
24
- virtual void Run() {
25
- while (true) {
26
- ClosureInterface* closure = ptThreadPool_->queue_.Pop();
27
- if (closure == NULL) {
28
- break;
29
- }
30
- try {
31
- closure->Run();
32
- } catch(std::exception& e) {
33
- XLOG(ERROR) << e.what();
34
- } catch(...) {
35
- XLOG(ERROR) << " unknown exception.";
36
- }
37
- delete closure;
38
- }
39
- }
40
- private:
41
- ThreadPool * ptThreadPool_;
42
- }; // class Worker
43
-
44
- ThreadPool(size_t thread_num)
45
- : threads_(thread_num),
46
- queue_(thread_num) {
47
- assert(thread_num);
48
- for(size_t i = 0; i < threads_.size(); i ++) {
49
- threads_[i] = new Worker(this);
50
- }
51
- }
52
- ~ThreadPool() {
53
- Stop();
54
- }
55
-
56
- void Start() {
57
- for(size_t i = 0; i < threads_.size(); i++) {
58
- threads_[i]->Start();
59
- }
60
- }
61
- void Stop() {
62
- for(size_t i = 0; i < threads_.size(); i ++) {
63
- queue_.Push(NULL);
64
- }
65
- for(size_t i = 0; i < threads_.size(); i ++) {
66
- threads_[i]->Join();
67
- delete threads_[i];
68
- }
69
- threads_.clear();
70
- }
71
-
72
- void Add(ClosureInterface* task) {
73
- assert(task);
74
- queue_.Push(task);
75
- }
76
-
77
- private:
78
- friend class Worker;
79
-
80
- vector<IThread*> threads_;
81
- BoundedBlockingQueue<ClosureInterface*> queue_;
82
- }; // class ThreadPool
83
-
84
- } // namespace limonp
85
-
86
- #endif // LIMONP_THREAD_POOL_HPP
@@ -1,80 +0,0 @@
1
- #include "cppjieba/Jieba.hpp"
2
-
3
- using namespace std;
4
-
5
- const char* const DICT_PATH = "../dict/jieba.dict.utf8";
6
- const char* const HMM_PATH = "../dict/hmm_model.utf8";
7
- const char* const USER_DICT_PATH = "../dict/user.dict.utf8";
8
- const char* const IDF_PATH = "../dict/idf.utf8";
9
- const char* const STOP_WORD_PATH = "../dict/stop_words.utf8";
10
-
11
- int main(int argc, char** argv) {
12
- cppjieba::Jieba jieba(DICT_PATH,
13
- HMM_PATH,
14
- USER_DICT_PATH,
15
- IDF_PATH,
16
- STOP_WORD_PATH);
17
- vector<string> words;
18
- vector<cppjieba::Word> jiebawords;
19
- string s;
20
- string result;
21
-
22
- s = "他来到了网易杭研大厦";
23
- cout << s << endl;
24
- cout << "[demo] Cut With HMM" << endl;
25
- jieba.Cut(s, words, true);
26
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
27
-
28
- cout << "[demo] Cut Without HMM " << endl;
29
- jieba.Cut(s, words, false);
30
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
31
-
32
- s = "我来到北京清华大学";
33
- cout << s << endl;
34
- cout << "[demo] CutAll" << endl;
35
- jieba.CutAll(s, words);
36
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
37
-
38
- s = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
39
- cout << s << endl;
40
- cout << "[demo] CutForSearch" << endl;
41
- jieba.CutForSearch(s, words);
42
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
43
-
44
- cout << "[demo] Insert User Word" << endl;
45
- jieba.Cut("男默女泪", words);
46
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
47
- jieba.InsertUserWord("男默女泪");
48
- jieba.Cut("男默女泪", words);
49
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
50
-
51
- cout << "[demo] CutForSearch Word With Offset" << endl;
52
- jieba.CutForSearch(s, jiebawords, true);
53
- cout << jiebawords << endl;
54
-
55
- cout << "[demo] Lookup Tag for Single Token" << endl;
56
- const int DemoTokenMaxLen = 32;
57
- char DemoTokens[][DemoTokenMaxLen] = {"拖拉机", "CEO", "123", "。"};
58
- vector<pair<string, string> > LookupTagres(sizeof(DemoTokens) / DemoTokenMaxLen);
59
- vector<pair<string, string> >::iterator it;
60
- for (it = LookupTagres.begin(); it != LookupTagres.end(); it++) {
61
- it->first = DemoTokens[it - LookupTagres.begin()];
62
- it->second = jieba.LookupTag(it->first);
63
- }
64
- cout << LookupTagres << endl;
65
-
66
- cout << "[demo] Tagging" << endl;
67
- vector<pair<string, string> > tagres;
68
- s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
69
- jieba.Tag(s, tagres);
70
- cout << s << endl;
71
- cout << tagres << endl;
72
-
73
- cout << "[demo] Keyword Extraction" << endl;
74
- const size_t topk = 5;
75
- vector<cppjieba::KeywordExtractor::Word> keywordres;
76
- jieba.extractor.Extract(s, keywordres, topk);
77
- cout << s << endl;
78
- cout << keywordres << endl;
79
- return EXIT_SUCCESS;
80
- }