cppjieba_rb 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +81 -0
  8. data/Rakefile +20 -0
  9. data/cppjieba_rb.gemspec +50 -0
  10. data/ext/cppjieba/.gitignore +17 -0
  11. data/ext/cppjieba/.travis.yml +22 -0
  12. data/ext/cppjieba/CMakeLists.txt +28 -0
  13. data/ext/cppjieba/ChangeLog.md +236 -0
  14. data/ext/cppjieba/README.md +285 -0
  15. data/ext/cppjieba/README_EN.md +111 -0
  16. data/ext/cppjieba/appveyor.yml +32 -0
  17. data/ext/cppjieba/deps/CMakeLists.txt +1 -0
  18. data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
  28. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
  29. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
  41. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
  42. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
  43. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  44. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
  45. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
  46. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  47. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
  48. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
  49. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
  50. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
  51. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
  52. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
  53. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
  54. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
  55. data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
  56. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
  57. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
  58. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
  59. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
  60. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
  61. data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
  62. data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
  63. data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
  64. data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
  65. data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
  66. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
  67. data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
  68. data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
  69. data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
  70. data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
  71. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
  72. data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
  73. data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
  74. data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
  75. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
  76. data/ext/cppjieba/dict/README.md +31 -0
  77. data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
  78. data/ext/cppjieba/dict/idf.utf8 +258826 -0
  79. data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
  80. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  81. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  82. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  83. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  84. data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
  85. data/ext/cppjieba/dict/user.dict.utf8 +4 -0
  86. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
  87. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  88. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  89. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  90. data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
  91. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  92. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  93. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  94. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  95. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  96. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
  97. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
  98. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
  99. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  100. data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
  101. data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
  102. data/ext/cppjieba/test/CMakeLists.txt +5 -0
  103. data/ext/cppjieba/test/demo.cpp +80 -0
  104. data/ext/cppjieba/test/load_test.cpp +54 -0
  105. data/ext/cppjieba/test/testdata/curl.res +1 -0
  106. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
  107. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
  108. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
  109. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
  110. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
  111. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
  112. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
  113. data/ext/cppjieba/test/testdata/load_test.urls +2 -0
  114. data/ext/cppjieba/test/testdata/review.100 +100 -0
  115. data/ext/cppjieba/test/testdata/review.100.res +200 -0
  116. data/ext/cppjieba/test/testdata/server.conf +19 -0
  117. data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
  118. data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
  119. data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
  120. data/ext/cppjieba/test/testdata/userdict.english +2 -0
  121. data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
  122. data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
  123. data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
  124. data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
  125. data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
  126. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
  127. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
  128. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
  129. data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
  130. data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
  131. data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
  132. data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
  133. data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
  134. data/ext/cppjieba_rb/extconf.rb +26 -0
  135. data/ext/cppjieba_rb/internal.cc +148 -0
  136. data/lib/cppjieba_rb/segment.rb +20 -0
  137. data/lib/cppjieba_rb/version.rb +3 -0
  138. data/lib/cppjieba_rb.rb +34 -0
  139. data/test/test_keyword.rb +17 -0
  140. data/test/test_segment.rb +24 -0
  141. data/test/test_tagging.rb +19 -0
  142. metadata +244 -0
@@ -0,0 +1,233 @@
1
+ // Copyright 2003 Google Inc.
2
+ // All rights reserved.
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ //
30
+ // Authors: Dan Egnor (egnor@google.com)
31
+ //
32
+ // A "smart" pointer type with reference tracking. Every pointer to a
33
+ // particular object is kept on a circular linked list. When the last pointer
34
+ // to an object is destroyed or reassigned, the object is deleted.
35
+ //
36
+ // Used properly, this deletes the object when the last reference goes away.
37
+ // There are several caveats:
38
+ // - Like all reference counting schemes, cycles lead to leaks.
39
+ // - Each smart pointer is actually two pointers (8 bytes instead of 4).
40
+ // - Every time a pointer is assigned, the entire list of pointers to that
41
+ // object is traversed. This class is therefore NOT SUITABLE when there
42
+ // will often be more than two or three pointers to a particular object.
43
+ // - References are only tracked as long as linked_ptr<> objects are copied.
44
+ // If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
45
+ // will happen (double deletion).
46
+ //
47
+ // A good use of this class is storing object references in STL containers.
48
+ // You can safely put linked_ptr<> in a vector<>.
49
+ // Other uses may not be as good.
50
+ //
51
+ // Note: If you use an incomplete type with linked_ptr<>, the class
52
+ // *containing* linked_ptr<> must have a constructor and destructor (even
53
+ // if they do nothing!).
54
+ //
55
+ // Bill Gibbons suggested we use something like this.
56
+ //
57
+ // Thread Safety:
58
+ // Unlike other linked_ptr implementations, in this implementation
59
+ // a linked_ptr object is thread-safe in the sense that:
60
+ // - it's safe to copy linked_ptr objects concurrently,
61
+ // - it's safe to copy *from* a linked_ptr and read its underlying
62
+ // raw pointer (e.g. via get()) concurrently, and
63
+ // - it's safe to write to two linked_ptrs that point to the same
64
+ // shared object concurrently.
65
+ // TODO(wan@google.com): rename this to safe_linked_ptr to avoid
66
+ // confusion with normal linked_ptr.
67
+
68
+ #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
69
+ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
70
+
71
+ #include <stdlib.h>
72
+ #include <assert.h>
73
+
74
+ #include "gtest/internal/gtest-port.h"
75
+
76
+ namespace testing {
77
+ namespace internal {
78
+
79
+ // Protects copying of all linked_ptr objects.
80
+ GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
81
+
82
+ // This is used internally by all instances of linked_ptr<>. It needs to be
83
+ // a non-template class because different types of linked_ptr<> can refer to
84
+ // the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
85
+ // So, it needs to be possible for different types of linked_ptr to participate
86
+ // in the same circular linked list, so we need a single class type here.
87
+ //
88
+ // DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr<T>.
89
+ class linked_ptr_internal {
90
+ public:
91
+ // Create a new circle that includes only this instance.
92
+ void join_new() {
93
+ next_ = this;
94
+ }
95
+
96
+ // Many linked_ptr operations may change p.link_ for some linked_ptr
97
+ // variable p in the same circle as this object. Therefore we need
98
+ // to prevent two such operations from occurring concurrently.
99
+ //
100
+ // Note that different types of linked_ptr objects can coexist in a
101
+ // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
102
+ // linked_ptr<Derived2>). Therefore we must use a single mutex to
103
+ // protect all linked_ptr objects. This can create serious
104
+ // contention in production code, but is acceptable in a testing
105
+ // framework.
106
+
107
+ // Join an existing circle.
108
+ // L < g_linked_ptr_mutex
109
+ void join(linked_ptr_internal const* ptr) {
110
+ MutexLock lock(&g_linked_ptr_mutex);
111
+
112
+ linked_ptr_internal const* p = ptr;
113
+ while (p->next_ != ptr) p = p->next_;
114
+ p->next_ = this;
115
+ next_ = ptr;
116
+ }
117
+
118
+ // Leave whatever circle we're part of. Returns true if we were the
119
+ // last member of the circle. Once this is done, you can join() another.
120
+ // L < g_linked_ptr_mutex
121
+ bool depart() {
122
+ MutexLock lock(&g_linked_ptr_mutex);
123
+
124
+ if (next_ == this) return true;
125
+ linked_ptr_internal const* p = next_;
126
+ while (p->next_ != this) p = p->next_;
127
+ p->next_ = next_;
128
+ return false;
129
+ }
130
+
131
+ private:
132
+ mutable linked_ptr_internal const* next_;
133
+ };
134
+
135
+ template <typename T>
136
+ class linked_ptr {
137
+ public:
138
+ typedef T element_type;
139
+
140
+ // Take over ownership of a raw pointer. This should happen as soon as
141
+ // possible after the object is created.
142
+ explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
143
+ ~linked_ptr() { depart(); }
144
+
145
+ // Copy an existing linked_ptr<>, adding ourselves to the list of references.
146
+ template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
147
+ linked_ptr(linked_ptr const& ptr) { // NOLINT
148
+ assert(&ptr != this);
149
+ copy(&ptr);
150
+ }
151
+
152
+ // Assignment releases the old value and acquires the new.
153
+ template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
154
+ depart();
155
+ copy(&ptr);
156
+ return *this;
157
+ }
158
+
159
+ linked_ptr& operator=(linked_ptr const& ptr) {
160
+ if (&ptr != this) {
161
+ depart();
162
+ copy(&ptr);
163
+ }
164
+ return *this;
165
+ }
166
+
167
+ // Smart pointer members.
168
+ void reset(T* ptr = NULL) {
169
+ depart();
170
+ capture(ptr);
171
+ }
172
+ T* get() const { return value_; }
173
+ T* operator->() const { return value_; }
174
+ T& operator*() const { return *value_; }
175
+
176
+ bool operator==(T* p) const { return value_ == p; }
177
+ bool operator!=(T* p) const { return value_ != p; }
178
+ template <typename U>
179
+ bool operator==(linked_ptr<U> const& ptr) const {
180
+ return value_ == ptr.get();
181
+ }
182
+ template <typename U>
183
+ bool operator!=(linked_ptr<U> const& ptr) const {
184
+ return value_ != ptr.get();
185
+ }
186
+
187
+ private:
188
+ template <typename U>
189
+ friend class linked_ptr;
190
+
191
+ T* value_;
192
+ linked_ptr_internal link_;
193
+
194
+ void depart() {
195
+ if (link_.depart()) delete value_;
196
+ }
197
+
198
+ void capture(T* ptr) {
199
+ value_ = ptr;
200
+ link_.join_new();
201
+ }
202
+
203
+ template <typename U> void copy(linked_ptr<U> const* ptr) {
204
+ value_ = ptr->get();
205
+ if (value_)
206
+ link_.join(&ptr->link_);
207
+ else
208
+ link_.join_new();
209
+ }
210
+ };
211
+
212
+ template<typename T> inline
213
+ bool operator==(T* ptr, const linked_ptr<T>& x) {
214
+ return ptr == x.get();
215
+ }
216
+
217
+ template<typename T> inline
218
+ bool operator!=(T* ptr, const linked_ptr<T>& x) {
219
+ return ptr != x.get();
220
+ }
221
+
222
+ // A function to convert T* into linked_ptr<T>
223
+ // Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
224
+ // for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
225
+ template <typename T>
226
+ linked_ptr<T> make_linked_ptr(T* ptr) {
227
+ return linked_ptr<T>(ptr);
228
+ }
229
+
230
+ } // namespace internal
231
+ } // namespace testing
232
+
233
+ #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_