cppjieba_rb 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +81 -0
  8. data/Rakefile +20 -0
  9. data/cppjieba_rb.gemspec +50 -0
  10. data/ext/cppjieba/.gitignore +17 -0
  11. data/ext/cppjieba/.travis.yml +22 -0
  12. data/ext/cppjieba/CMakeLists.txt +28 -0
  13. data/ext/cppjieba/ChangeLog.md +236 -0
  14. data/ext/cppjieba/README.md +285 -0
  15. data/ext/cppjieba/README_EN.md +111 -0
  16. data/ext/cppjieba/appveyor.yml +32 -0
  17. data/ext/cppjieba/deps/CMakeLists.txt +1 -0
  18. data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
  28. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
  29. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
  41. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
  42. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
  43. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  44. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
  45. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
  46. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  47. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
  48. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
  49. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
  50. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
  51. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
  52. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
  53. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
  54. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
  55. data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
  56. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
  57. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
  58. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
  59. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
  60. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
  61. data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
  62. data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
  63. data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
  64. data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
  65. data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
  66. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
  67. data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
  68. data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
  69. data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
  70. data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
  71. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
  72. data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
  73. data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
  74. data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
  75. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
  76. data/ext/cppjieba/dict/README.md +31 -0
  77. data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
  78. data/ext/cppjieba/dict/idf.utf8 +258826 -0
  79. data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
  80. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  81. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  82. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  83. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  84. data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
  85. data/ext/cppjieba/dict/user.dict.utf8 +4 -0
  86. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
  87. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  88. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  89. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  90. data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
  91. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  92. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  93. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  94. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  95. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  96. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
  97. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
  98. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
  99. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  100. data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
  101. data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
  102. data/ext/cppjieba/test/CMakeLists.txt +5 -0
  103. data/ext/cppjieba/test/demo.cpp +80 -0
  104. data/ext/cppjieba/test/load_test.cpp +54 -0
  105. data/ext/cppjieba/test/testdata/curl.res +1 -0
  106. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
  107. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
  108. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
  109. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
  110. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
  111. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
  112. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
  113. data/ext/cppjieba/test/testdata/load_test.urls +2 -0
  114. data/ext/cppjieba/test/testdata/review.100 +100 -0
  115. data/ext/cppjieba/test/testdata/review.100.res +200 -0
  116. data/ext/cppjieba/test/testdata/server.conf +19 -0
  117. data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
  118. data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
  119. data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
  120. data/ext/cppjieba/test/testdata/userdict.english +2 -0
  121. data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
  122. data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
  123. data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
  124. data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
  125. data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
  126. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
  127. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
  128. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
  129. data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
  130. data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
  131. data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
  132. data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
  133. data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
  134. data/ext/cppjieba_rb/extconf.rb +26 -0
  135. data/ext/cppjieba_rb/internal.cc +148 -0
  136. data/lib/cppjieba_rb/segment.rb +20 -0
  137. data/lib/cppjieba_rb/version.rb +3 -0
  138. data/lib/cppjieba_rb.rb +34 -0
  139. data/test/test_keyword.rb +17 -0
  140. data/test/test_segment.rb +24 -0
  141. data/test/test_tagging.rb +19 -0
  142. metadata +244 -0
@@ -0,0 +1,233 @@
1
+ // Copyright 2003 Google Inc.
2
+ // All rights reserved.
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ //
30
+ // Authors: Dan Egnor (egnor@google.com)
31
+ //
32
+ // A "smart" pointer type with reference tracking. Every pointer to a
33
+ // particular object is kept on a circular linked list. When the last pointer
34
+ // to an object is destroyed or reassigned, the object is deleted.
35
+ //
36
+ // Used properly, this deletes the object when the last reference goes away.
37
+ // There are several caveats:
38
+ // - Like all reference counting schemes, cycles lead to leaks.
39
+ // - Each smart pointer is actually two pointers (8 bytes instead of 4).
40
+ // - Every time a pointer is assigned, the entire list of pointers to that
41
+ // object is traversed. This class is therefore NOT SUITABLE when there
42
+ // will often be more than two or three pointers to a particular object.
43
+ // - References are only tracked as long as linked_ptr<> objects are copied.
44
+ // If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
45
+ // will happen (double deletion).
46
+ //
47
+ // A good use of this class is storing object references in STL containers.
48
+ // You can safely put linked_ptr<> in a vector<>.
49
+ // Other uses may not be as good.
50
+ //
51
+ // Note: If you use an incomplete type with linked_ptr<>, the class
52
+ // *containing* linked_ptr<> must have a constructor and destructor (even
53
+ // if they do nothing!).
54
+ //
55
+ // Bill Gibbons suggested we use something like this.
56
+ //
57
+ // Thread Safety:
58
+ // Unlike other linked_ptr implementations, in this implementation
59
+ // a linked_ptr object is thread-safe in the sense that:
60
+ // - it's safe to copy linked_ptr objects concurrently,
61
+ // - it's safe to copy *from* a linked_ptr and read its underlying
62
+ // raw pointer (e.g. via get()) concurrently, and
63
+ // - it's safe to write to two linked_ptrs that point to the same
64
+ // shared object concurrently.
65
+ // TODO(wan@google.com): rename this to safe_linked_ptr to avoid
66
+ // confusion with normal linked_ptr.
67
+
68
+ #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
69
+ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
70
+
71
+ #include <stdlib.h>
72
+ #include <assert.h>
73
+
74
+ #include "gtest/internal/gtest-port.h"
75
+
76
+ namespace testing {
77
+ namespace internal {
78
+
79
+ // Protects copying of all linked_ptr objects.
80
+ GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
81
+
82
+ // This is used internally by all instances of linked_ptr<>. It needs to be
83
+ // a non-template class because different types of linked_ptr<> can refer to
84
+ // the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
85
+ // So, it needs to be possible for different types of linked_ptr to participate
86
+ // in the same circular linked list, so we need a single class type here.
87
+ //
88
+ // DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr<T>.
89
+ class linked_ptr_internal {
90
+ public:
91
+ // Create a new circle that includes only this instance.
92
+ void join_new() {
93
+ next_ = this;
94
+ }
95
+
96
+ // Many linked_ptr operations may change p.link_ for some linked_ptr
97
+ // variable p in the same circle as this object. Therefore we need
98
+ // to prevent two such operations from occurring concurrently.
99
+ //
100
+ // Note that different types of linked_ptr objects can coexist in a
101
+ // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
102
+ // linked_ptr<Derived2>). Therefore we must use a single mutex to
103
+ // protect all linked_ptr objects. This can create serious
104
+ // contention in production code, but is acceptable in a testing
105
+ // framework.
106
+
107
+ // Join an existing circle.
108
+ // L < g_linked_ptr_mutex
109
+ void join(linked_ptr_internal const* ptr) {
110
+ MutexLock lock(&g_linked_ptr_mutex);
111
+
112
+ linked_ptr_internal const* p = ptr;
113
+ while (p->next_ != ptr) p = p->next_;
114
+ p->next_ = this;
115
+ next_ = ptr;
116
+ }
117
+
118
+ // Leave whatever circle we're part of. Returns true if we were the
119
+ // last member of the circle. Once this is done, you can join() another.
120
+ // L < g_linked_ptr_mutex
121
+ bool depart() {
122
+ MutexLock lock(&g_linked_ptr_mutex);
123
+
124
+ if (next_ == this) return true;
125
+ linked_ptr_internal const* p = next_;
126
+ while (p->next_ != this) p = p->next_;
127
+ p->next_ = next_;
128
+ return false;
129
+ }
130
+
131
+ private:
132
+ mutable linked_ptr_internal const* next_;
133
+ };
134
+
135
+ template <typename T>
136
+ class linked_ptr {
137
+ public:
138
+ typedef T element_type;
139
+
140
+ // Take over ownership of a raw pointer. This should happen as soon as
141
+ // possible after the object is created.
142
+ explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
143
+ ~linked_ptr() { depart(); }
144
+
145
+ // Copy an existing linked_ptr<>, adding ourselves to the list of references.
146
+ template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
147
+ linked_ptr(linked_ptr const& ptr) { // NOLINT
148
+ assert(&ptr != this);
149
+ copy(&ptr);
150
+ }
151
+
152
+ // Assignment releases the old value and acquires the new.
153
+ template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
154
+ depart();
155
+ copy(&ptr);
156
+ return *this;
157
+ }
158
+
159
+ linked_ptr& operator=(linked_ptr const& ptr) {
160
+ if (&ptr != this) {
161
+ depart();
162
+ copy(&ptr);
163
+ }
164
+ return *this;
165
+ }
166
+
167
+ // Smart pointer members.
168
+ void reset(T* ptr = NULL) {
169
+ depart();
170
+ capture(ptr);
171
+ }
172
+ T* get() const { return value_; }
173
+ T* operator->() const { return value_; }
174
+ T& operator*() const { return *value_; }
175
+
176
+ bool operator==(T* p) const { return value_ == p; }
177
+ bool operator!=(T* p) const { return value_ != p; }
178
+ template <typename U>
179
+ bool operator==(linked_ptr<U> const& ptr) const {
180
+ return value_ == ptr.get();
181
+ }
182
+ template <typename U>
183
+ bool operator!=(linked_ptr<U> const& ptr) const {
184
+ return value_ != ptr.get();
185
+ }
186
+
187
+ private:
188
+ template <typename U>
189
+ friend class linked_ptr;
190
+
191
+ T* value_;
192
+ linked_ptr_internal link_;
193
+
194
+ void depart() {
195
+ if (link_.depart()) delete value_;
196
+ }
197
+
198
+ void capture(T* ptr) {
199
+ value_ = ptr;
200
+ link_.join_new();
201
+ }
202
+
203
+ template <typename U> void copy(linked_ptr<U> const* ptr) {
204
+ value_ = ptr->get();
205
+ if (value_)
206
+ link_.join(&ptr->link_);
207
+ else
208
+ link_.join_new();
209
+ }
210
+ };
211
+
212
+ template<typename T> inline
213
+ bool operator==(T* ptr, const linked_ptr<T>& x) {
214
+ return ptr == x.get();
215
+ }
216
+
217
+ template<typename T> inline
218
+ bool operator!=(T* ptr, const linked_ptr<T>& x) {
219
+ return ptr != x.get();
220
+ }
221
+
222
+ // A function to convert T* into linked_ptr<T>
223
+ // Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
224
+ // for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
225
+ template <typename T>
226
+ linked_ptr<T> make_linked_ptr(T* ptr) {
227
+ return linked_ptr<T>(ptr);
228
+ }
229
+
230
+ } // namespace internal
231
+ } // namespace testing
232
+
233
+ #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_