cppjieba_rb 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +21 -0
  3. data/.github/workflows/linting.yml +30 -0
  4. data/.github/workflows/release.yml +42 -0
  5. data/.github/workflows/tests.yml +47 -0
  6. data/.gitignore +1 -0
  7. data/.rubocop.yml +45 -0
  8. data/.ruby-version +1 -0
  9. data/.yamllint +35 -0
  10. data/CHANGELOG.md +17 -0
  11. data/Gemfile +11 -0
  12. data/README.md +5 -5
  13. data/Rakefile +16 -7
  14. data/cppjieba_rb.gemspec +46 -33
  15. data/ext/cppjieba/.github/workflows/cmake.yml +52 -0
  16. data/ext/cppjieba/.github/workflows/stale-issues.yml +24 -0
  17. data/ext/cppjieba/.gitmodules +3 -0
  18. data/ext/cppjieba/{ChangeLog.md → CHANGELOG.md} +50 -1
  19. data/ext/cppjieba/CMakeLists.txt +11 -14
  20. data/ext/cppjieba/LICENSE +20 -0
  21. data/ext/cppjieba/README.md +9 -18
  22. data/ext/cppjieba/deps/limonp/.github/workflows/cmake.yml +43 -0
  23. data/ext/cppjieba/deps/limonp/.gitignore +9 -0
  24. data/ext/cppjieba/deps/limonp/CHANGELOG.md +160 -0
  25. data/ext/cppjieba/deps/limonp/CMakeLists.txt +61 -0
  26. data/ext/cppjieba/deps/limonp/LICENSE +20 -0
  27. data/ext/cppjieba/deps/limonp/README.md +38 -0
  28. data/ext/cppjieba/deps/limonp/{LocalVector.hpp → include/limonp/LocalVector.hpp} +3 -3
  29. data/ext/cppjieba/deps/limonp/{Logging.hpp → include/limonp/Logging.hpp} +17 -3
  30. data/ext/cppjieba/deps/limonp/{StringUtil.hpp → include/limonp/StringUtil.hpp} +31 -10
  31. data/ext/cppjieba/deps/limonp/test/CMakeLists.txt +8 -0
  32. data/ext/cppjieba/deps/limonp/test/demo.cpp +40 -0
  33. data/ext/cppjieba/deps/limonp/test/testdata/1.conf +5 -0
  34. data/ext/cppjieba/deps/limonp/test/testdata/StdExtension.data +3 -0
  35. data/ext/cppjieba/deps/limonp/test/testdata/dict.gbk +50 -0
  36. data/ext/cppjieba/deps/limonp/test/testdata/dict.utf8 +50 -0
  37. data/ext/cppjieba/deps/limonp/test/testdata/io_testfile +2 -0
  38. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.1.utf8 +93 -0
  39. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.0.utf8 +93 -0
  40. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.1.utf8 +67 -0
  41. data/ext/cppjieba/deps/limonp/test/testdata/jieba.dict.2.utf8 +64 -0
  42. data/ext/cppjieba/deps/limonp/test/unittest/CMakeLists.txt +30 -0
  43. data/ext/cppjieba/deps/limonp/test/unittest/TArgvContext.cpp +16 -0
  44. data/ext/cppjieba/deps/limonp/test/unittest/TCastFloat.cpp +19 -0
  45. data/ext/cppjieba/deps/limonp/test/unittest/TClosure.cpp +85 -0
  46. data/ext/cppjieba/deps/limonp/test/unittest/TColorPrint.cpp +20 -0
  47. data/ext/cppjieba/deps/limonp/test/unittest/TConfig.cpp +17 -0
  48. data/ext/cppjieba/deps/limonp/test/unittest/TLocalVector.cpp +41 -0
  49. data/ext/cppjieba/deps/limonp/test/unittest/TLogging.cpp +12 -0
  50. data/ext/cppjieba/deps/limonp/test/unittest/TStdExtension.cpp +95 -0
  51. data/ext/cppjieba/deps/limonp/test/unittest/TStringUtil.cpp +183 -0
  52. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +9 -0
  53. data/ext/cppjieba/include/cppjieba/Jieba.hpp +4 -0
  54. data/ext/cppjieba/include/cppjieba/Trie.hpp +27 -1
  55. data/ext/cppjieba/test/CMakeLists.txt +4 -3
  56. data/ext/cppjieba/test/unittest/CMakeLists.txt +16 -7
  57. data/ext/cppjieba_rb/extconf.rb +11 -6
  58. data/lib/cppjieba_rb/segment.rb +4 -1
  59. data/lib/cppjieba_rb/version.rb +3 -1
  60. data/lib/cppjieba_rb.rb +12 -5
  61. data/test/test_keyword.rb +8 -8
  62. data/test/test_segment.rb +14 -10
  63. data/test/test_stop_word_filter.rb +5 -3
  64. data/test/test_tagging.rb +5 -2
  65. metadata +63 -140
  66. data/.travis.yml +0 -30
  67. data/ext/cppjieba/.travis.yml +0 -21
  68. data/ext/cppjieba/README_EN.md +0 -115
  69. data/ext/cppjieba/appveyor.yml +0 -32
  70. data/ext/cppjieba/deps/CMakeLists.txt +0 -1
  71. data/ext/cppjieba/deps/gtest/CMakeLists.txt +0 -5
  72. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +0 -283
  73. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +0 -230
  74. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +0 -1421
  75. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +0 -487
  76. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +0 -796
  77. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +0 -232
  78. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +0 -176
  79. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +0 -259
  80. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +0 -2155
  81. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +0 -358
  82. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +0 -58
  83. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +0 -308
  84. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +0 -210
  85. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +0 -1226
  86. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +0 -233
  87. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +0 -4822
  88. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +0 -301
  89. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +0 -619
  90. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +0 -1788
  91. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +0 -350
  92. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +0 -968
  93. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +0 -336
  94. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +0 -3330
  95. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +0 -296
  96. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +0 -681
  97. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +0 -509
  98. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  99. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +0 -48
  100. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +0 -1234
  101. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +0 -380
  102. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +0 -1038
  103. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +0 -746
  104. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +0 -356
  105. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +0 -110
  106. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +0 -110
  107. data/ext/cppjieba/deps/gtest/src/gtest.cc +0 -4898
  108. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +0 -49
  109. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +0 -67
  110. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +0 -65
  111. data/ext/cppjieba/deps/limonp/FileLock.hpp +0 -74
  112. data/ext/cppjieba/deps/limonp/Md5.hpp +0 -411
  113. data/ext/cppjieba/deps/limonp/MutexLock.hpp +0 -51
  114. data/ext/cppjieba/deps/limonp/Thread.hpp +0 -44
  115. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +0 -86
  116. data/ext/cppjieba/test/demo.cpp +0 -80
  117. /data/ext/cppjieba/deps/{gtest/src/.deps/.dirstamp → limonp/.gitmodules} +0 -0
  118. /data/ext/cppjieba/deps/limonp/{ArgvContext.hpp → include/limonp/ArgvContext.hpp} +0 -0
  119. /data/ext/cppjieba/deps/limonp/{Closure.hpp → include/limonp/Closure.hpp} +0 -0
  120. /data/ext/cppjieba/deps/limonp/{Colors.hpp → include/limonp/Colors.hpp} +0 -0
  121. /data/ext/cppjieba/deps/limonp/{Condition.hpp → include/limonp/Condition.hpp} +0 -0
  122. /data/ext/cppjieba/deps/limonp/{Config.hpp → include/limonp/Config.hpp} +0 -0
  123. /data/ext/cppjieba/deps/limonp/{ForcePublic.hpp → include/limonp/ForcePublic.hpp} +0 -0
  124. /data/ext/cppjieba/deps/limonp/{NonCopyable.hpp → include/limonp/NonCopyable.hpp} +0 -0
  125. /data/ext/cppjieba/deps/limonp/{StdExtension.hpp → include/limonp/StdExtension.hpp} +0 -0
  126. /data/ext/cppjieba/deps/{gtest/src/gtest_main.cc → limonp/test/unittest/gtest_main.cpp} +0 -0
@@ -1,44 +0,0 @@
1
- #ifndef LIMONP_THREAD_HPP
2
- #define LIMONP_THREAD_HPP
3
-
4
- #include "Logging.hpp"
5
- #include "NonCopyable.hpp"
6
-
7
- namespace limonp {
8
-
9
- class IThread: NonCopyable {
10
- public:
11
- IThread(): isStarted(false), isJoined(false) {
12
- }
13
- virtual ~IThread() {
14
- if(isStarted && !isJoined) {
15
- XCHECK(!pthread_detach(thread_));
16
- }
17
- };
18
-
19
- virtual void Run() = 0;
20
- void Start() {
21
- XCHECK(!isStarted);
22
- XCHECK(!pthread_create(&thread_, NULL, Worker, this));
23
- isStarted = true;
24
- }
25
- void Join() {
26
- XCHECK(!isJoined);
27
- XCHECK(!pthread_join(thread_, NULL));
28
- isJoined = true;
29
- }
30
- private:
31
- static void * Worker(void * data) {
32
- IThread * ptr = (IThread* ) data;
33
- ptr->Run();
34
- return NULL;
35
- }
36
-
37
- pthread_t thread_;
38
- bool isStarted;
39
- bool isJoined;
40
- }; // class IThread
41
-
42
- } // namespace limonp
43
-
44
- #endif // LIMONP_THREAD_HPP
@@ -1,86 +0,0 @@
1
- #ifndef LIMONP_THREAD_POOL_HPP
2
- #define LIMONP_THREAD_POOL_HPP
3
-
4
- #include "Thread.hpp"
5
- #include "BlockingQueue.hpp"
6
- #include "BoundedBlockingQueue.hpp"
7
- #include "Closure.hpp"
8
-
9
- namespace limonp {
10
-
11
- using namespace std;
12
-
13
- //class ThreadPool;
14
- class ThreadPool: NonCopyable {
15
- public:
16
- class Worker: public IThread {
17
- public:
18
- Worker(ThreadPool* pool): ptThreadPool_(pool) {
19
- assert(ptThreadPool_);
20
- }
21
- virtual ~Worker() {
22
- }
23
-
24
- virtual void Run() {
25
- while (true) {
26
- ClosureInterface* closure = ptThreadPool_->queue_.Pop();
27
- if (closure == NULL) {
28
- break;
29
- }
30
- try {
31
- closure->Run();
32
- } catch(std::exception& e) {
33
- XLOG(ERROR) << e.what();
34
- } catch(...) {
35
- XLOG(ERROR) << " unknown exception.";
36
- }
37
- delete closure;
38
- }
39
- }
40
- private:
41
- ThreadPool * ptThreadPool_;
42
- }; // class Worker
43
-
44
- ThreadPool(size_t thread_num)
45
- : threads_(thread_num),
46
- queue_(thread_num) {
47
- assert(thread_num);
48
- for(size_t i = 0; i < threads_.size(); i ++) {
49
- threads_[i] = new Worker(this);
50
- }
51
- }
52
- ~ThreadPool() {
53
- Stop();
54
- }
55
-
56
- void Start() {
57
- for(size_t i = 0; i < threads_.size(); i++) {
58
- threads_[i]->Start();
59
- }
60
- }
61
- void Stop() {
62
- for(size_t i = 0; i < threads_.size(); i ++) {
63
- queue_.Push(NULL);
64
- }
65
- for(size_t i = 0; i < threads_.size(); i ++) {
66
- threads_[i]->Join();
67
- delete threads_[i];
68
- }
69
- threads_.clear();
70
- }
71
-
72
- void Add(ClosureInterface* task) {
73
- assert(task);
74
- queue_.Push(task);
75
- }
76
-
77
- private:
78
- friend class Worker;
79
-
80
- vector<IThread*> threads_;
81
- BoundedBlockingQueue<ClosureInterface*> queue_;
82
- }; // class ThreadPool
83
-
84
- } // namespace limonp
85
-
86
- #endif // LIMONP_THREAD_POOL_HPP
@@ -1,80 +0,0 @@
1
- #include "cppjieba/Jieba.hpp"
2
-
3
- using namespace std;
4
-
5
- const char* const DICT_PATH = "../dict/jieba.dict.utf8";
6
- const char* const HMM_PATH = "../dict/hmm_model.utf8";
7
- const char* const USER_DICT_PATH = "../dict/user.dict.utf8";
8
- const char* const IDF_PATH = "../dict/idf.utf8";
9
- const char* const STOP_WORD_PATH = "../dict/stop_words.utf8";
10
-
11
- int main(int argc, char** argv) {
12
- cppjieba::Jieba jieba(DICT_PATH,
13
- HMM_PATH,
14
- USER_DICT_PATH,
15
- IDF_PATH,
16
- STOP_WORD_PATH);
17
- vector<string> words;
18
- vector<cppjieba::Word> jiebawords;
19
- string s;
20
- string result;
21
-
22
- s = "他来到了网易杭研大厦";
23
- cout << s << endl;
24
- cout << "[demo] Cut With HMM" << endl;
25
- jieba.Cut(s, words, true);
26
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
27
-
28
- cout << "[demo] Cut Without HMM " << endl;
29
- jieba.Cut(s, words, false);
30
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
31
-
32
- s = "我来到北京清华大学";
33
- cout << s << endl;
34
- cout << "[demo] CutAll" << endl;
35
- jieba.CutAll(s, words);
36
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
37
-
38
- s = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
39
- cout << s << endl;
40
- cout << "[demo] CutForSearch" << endl;
41
- jieba.CutForSearch(s, words);
42
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
43
-
44
- cout << "[demo] Insert User Word" << endl;
45
- jieba.Cut("男默女泪", words);
46
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
47
- jieba.InsertUserWord("男默女泪");
48
- jieba.Cut("男默女泪", words);
49
- cout << limonp::Join(words.begin(), words.end(), "/") << endl;
50
-
51
- cout << "[demo] CutForSearch Word With Offset" << endl;
52
- jieba.CutForSearch(s, jiebawords, true);
53
- cout << jiebawords << endl;
54
-
55
- cout << "[demo] Lookup Tag for Single Token" << endl;
56
- const int DemoTokenMaxLen = 32;
57
- char DemoTokens[][DemoTokenMaxLen] = {"拖拉机", "CEO", "123", "。"};
58
- vector<pair<string, string> > LookupTagres(sizeof(DemoTokens) / DemoTokenMaxLen);
59
- vector<pair<string, string> >::iterator it;
60
- for (it = LookupTagres.begin(); it != LookupTagres.end(); it++) {
61
- it->first = DemoTokens[it - LookupTagres.begin()];
62
- it->second = jieba.LookupTag(it->first);
63
- }
64
- cout << LookupTagres << endl;
65
-
66
- cout << "[demo] Tagging" << endl;
67
- vector<pair<string, string> > tagres;
68
- s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
69
- jieba.Tag(s, tagres);
70
- cout << s << endl;
71
- cout << tagres << endl;
72
-
73
- cout << "[demo] Keyword Extraction" << endl;
74
- const size_t topk = 5;
75
- vector<cppjieba::KeywordExtractor::Word> keywordres;
76
- jieba.extractor.Extract(s, keywordres, topk);
77
- cout << s << endl;
78
- cout << keywordres << endl;
79
- return EXIT_SUCCESS;
80
- }