cppjieba_rb 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.gitmodules +3 -0
- data/.travis.yml +26 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +81 -0
- data/Rakefile +20 -0
- data/cppjieba_rb.gemspec +50 -0
- data/ext/cppjieba/.gitignore +17 -0
- data/ext/cppjieba/.travis.yml +22 -0
- data/ext/cppjieba/CMakeLists.txt +28 -0
- data/ext/cppjieba/ChangeLog.md +236 -0
- data/ext/cppjieba/README.md +285 -0
- data/ext/cppjieba/README_EN.md +111 -0
- data/ext/cppjieba/appveyor.yml +32 -0
- data/ext/cppjieba/deps/CMakeLists.txt +1 -0
- data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
- data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
- data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
- data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
- data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
- data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
- data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
- data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
- data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
- data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
- data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
- data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
- data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
- data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
- data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
- data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
- data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
- data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
- data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
- data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
- data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
- data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
- data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
- data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
- data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
- data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
- data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
- data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
- data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
- data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
- data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
- data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
- data/ext/cppjieba/dict/README.md +31 -0
- data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
- data/ext/cppjieba/dict/idf.utf8 +258826 -0
- data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
- data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
- data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
- data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
- data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
- data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
- data/ext/cppjieba/dict/user.dict.utf8 +4 -0
- data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
- data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
- data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
- data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
- data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
- data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
- data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
- data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
- data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
- data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
- data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
- data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
- data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
- data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
- data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
- data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
- data/ext/cppjieba/test/CMakeLists.txt +5 -0
- data/ext/cppjieba/test/demo.cpp +80 -0
- data/ext/cppjieba/test/load_test.cpp +54 -0
- data/ext/cppjieba/test/testdata/curl.res +1 -0
- data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
- data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
- data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
- data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
- data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
- data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
- data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
- data/ext/cppjieba/test/testdata/load_test.urls +2 -0
- data/ext/cppjieba/test/testdata/review.100 +100 -0
- data/ext/cppjieba/test/testdata/review.100.res +200 -0
- data/ext/cppjieba/test/testdata/server.conf +19 -0
- data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
- data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
- data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
- data/ext/cppjieba/test/testdata/userdict.english +2 -0
- data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
- data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
- data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
- data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
- data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
- data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
- data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
- data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
- data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
- data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
- data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
- data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
- data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
- data/ext/cppjieba_rb/extconf.rb +26 -0
- data/ext/cppjieba_rb/internal.cc +148 -0
- data/lib/cppjieba_rb/segment.rb +20 -0
- data/lib/cppjieba_rb/version.rb +3 -0
- data/lib/cppjieba_rb.rb +34 -0
- data/test/test_keyword.rb +17 -0
- data/test/test_segment.rb +24 -0
- data/test/test_tagging.rb +19 -0
- metadata +244 -0
@@ -0,0 +1,233 @@
|
|
1
|
+
// Copyright 2003 Google Inc.
|
2
|
+
// All rights reserved.
|
3
|
+
//
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
5
|
+
// modification, are permitted provided that the following conditions are
|
6
|
+
// met:
|
7
|
+
//
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
12
|
+
// in the documentation and/or other materials provided with the
|
13
|
+
// distribution.
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
16
|
+
// this software without specific prior written permission.
|
17
|
+
//
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
//
|
30
|
+
// Authors: Dan Egnor (egnor@google.com)
|
31
|
+
//
|
32
|
+
// A "smart" pointer type with reference tracking. Every pointer to a
|
33
|
+
// particular object is kept on a circular linked list. When the last pointer
|
34
|
+
// to an object is destroyed or reassigned, the object is deleted.
|
35
|
+
//
|
36
|
+
// Used properly, this deletes the object when the last reference goes away.
|
37
|
+
// There are several caveats:
|
38
|
+
// - Like all reference counting schemes, cycles lead to leaks.
|
39
|
+
// - Each smart pointer is actually two pointers (8 bytes instead of 4).
|
40
|
+
// - Every time a pointer is assigned, the entire list of pointers to that
|
41
|
+
// object is traversed. This class is therefore NOT SUITABLE when there
|
42
|
+
// will often be more than two or three pointers to a particular object.
|
43
|
+
// - References are only tracked as long as linked_ptr<> objects are copied.
|
44
|
+
// If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
|
45
|
+
// will happen (double deletion).
|
46
|
+
//
|
47
|
+
// A good use of this class is storing object references in STL containers.
|
48
|
+
// You can safely put linked_ptr<> in a vector<>.
|
49
|
+
// Other uses may not be as good.
|
50
|
+
//
|
51
|
+
// Note: If you use an incomplete type with linked_ptr<>, the class
|
52
|
+
// *containing* linked_ptr<> must have a constructor and destructor (even
|
53
|
+
// if they do nothing!).
|
54
|
+
//
|
55
|
+
// Bill Gibbons suggested we use something like this.
|
56
|
+
//
|
57
|
+
// Thread Safety:
|
58
|
+
// Unlike other linked_ptr implementations, in this implementation
|
59
|
+
// a linked_ptr object is thread-safe in the sense that:
|
60
|
+
// - it's safe to copy linked_ptr objects concurrently,
|
61
|
+
// - it's safe to copy *from* a linked_ptr and read its underlying
|
62
|
+
// raw pointer (e.g. via get()) concurrently, and
|
63
|
+
// - it's safe to write to two linked_ptrs that point to the same
|
64
|
+
// shared object concurrently.
|
65
|
+
// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
|
66
|
+
// confusion with normal linked_ptr.
|
67
|
+
|
68
|
+
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
|
69
|
+
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
|
70
|
+
|
71
|
+
#include <stdlib.h>
|
72
|
+
#include <assert.h>
|
73
|
+
|
74
|
+
#include "gtest/internal/gtest-port.h"
|
75
|
+
|
76
|
+
namespace testing {
|
77
|
+
namespace internal {
|
78
|
+
|
79
|
+
// Protects copying of all linked_ptr objects.
|
80
|
+
GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
|
81
|
+
|
82
|
+
// This is used internally by all instances of linked_ptr<>. It needs to be
|
83
|
+
// a non-template class because different types of linked_ptr<> can refer to
|
84
|
+
// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
|
85
|
+
// So, it needs to be possible for different types of linked_ptr to participate
|
86
|
+
// in the same circular linked list, so we need a single class type here.
|
87
|
+
//
|
88
|
+
// DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr<T>.
|
89
|
+
class linked_ptr_internal {
|
90
|
+
public:
|
91
|
+
// Create a new circle that includes only this instance.
|
92
|
+
void join_new() {
|
93
|
+
next_ = this;
|
94
|
+
}
|
95
|
+
|
96
|
+
// Many linked_ptr operations may change p.link_ for some linked_ptr
|
97
|
+
// variable p in the same circle as this object. Therefore we need
|
98
|
+
// to prevent two such operations from occurring concurrently.
|
99
|
+
//
|
100
|
+
// Note that different types of linked_ptr objects can coexist in a
|
101
|
+
// circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
|
102
|
+
// linked_ptr<Derived2>). Therefore we must use a single mutex to
|
103
|
+
// protect all linked_ptr objects. This can create serious
|
104
|
+
// contention in production code, but is acceptable in a testing
|
105
|
+
// framework.
|
106
|
+
|
107
|
+
// Join an existing circle.
|
108
|
+
// L < g_linked_ptr_mutex
|
109
|
+
void join(linked_ptr_internal const* ptr) {
|
110
|
+
MutexLock lock(&g_linked_ptr_mutex);
|
111
|
+
|
112
|
+
linked_ptr_internal const* p = ptr;
|
113
|
+
while (p->next_ != ptr) p = p->next_;
|
114
|
+
p->next_ = this;
|
115
|
+
next_ = ptr;
|
116
|
+
}
|
117
|
+
|
118
|
+
// Leave whatever circle we're part of. Returns true if we were the
|
119
|
+
// last member of the circle. Once this is done, you can join() another.
|
120
|
+
// L < g_linked_ptr_mutex
|
121
|
+
bool depart() {
|
122
|
+
MutexLock lock(&g_linked_ptr_mutex);
|
123
|
+
|
124
|
+
if (next_ == this) return true;
|
125
|
+
linked_ptr_internal const* p = next_;
|
126
|
+
while (p->next_ != this) p = p->next_;
|
127
|
+
p->next_ = next_;
|
128
|
+
return false;
|
129
|
+
}
|
130
|
+
|
131
|
+
private:
|
132
|
+
mutable linked_ptr_internal const* next_;
|
133
|
+
};
|
134
|
+
|
135
|
+
template <typename T>
|
136
|
+
class linked_ptr {
|
137
|
+
public:
|
138
|
+
typedef T element_type;
|
139
|
+
|
140
|
+
// Take over ownership of a raw pointer. This should happen as soon as
|
141
|
+
// possible after the object is created.
|
142
|
+
explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
|
143
|
+
~linked_ptr() { depart(); }
|
144
|
+
|
145
|
+
// Copy an existing linked_ptr<>, adding ourselves to the list of references.
|
146
|
+
template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
|
147
|
+
linked_ptr(linked_ptr const& ptr) { // NOLINT
|
148
|
+
assert(&ptr != this);
|
149
|
+
copy(&ptr);
|
150
|
+
}
|
151
|
+
|
152
|
+
// Assignment releases the old value and acquires the new.
|
153
|
+
template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
|
154
|
+
depart();
|
155
|
+
copy(&ptr);
|
156
|
+
return *this;
|
157
|
+
}
|
158
|
+
|
159
|
+
linked_ptr& operator=(linked_ptr const& ptr) {
|
160
|
+
if (&ptr != this) {
|
161
|
+
depart();
|
162
|
+
copy(&ptr);
|
163
|
+
}
|
164
|
+
return *this;
|
165
|
+
}
|
166
|
+
|
167
|
+
// Smart pointer members.
|
168
|
+
void reset(T* ptr = NULL) {
|
169
|
+
depart();
|
170
|
+
capture(ptr);
|
171
|
+
}
|
172
|
+
T* get() const { return value_; }
|
173
|
+
T* operator->() const { return value_; }
|
174
|
+
T& operator*() const { return *value_; }
|
175
|
+
|
176
|
+
bool operator==(T* p) const { return value_ == p; }
|
177
|
+
bool operator!=(T* p) const { return value_ != p; }
|
178
|
+
template <typename U>
|
179
|
+
bool operator==(linked_ptr<U> const& ptr) const {
|
180
|
+
return value_ == ptr.get();
|
181
|
+
}
|
182
|
+
template <typename U>
|
183
|
+
bool operator!=(linked_ptr<U> const& ptr) const {
|
184
|
+
return value_ != ptr.get();
|
185
|
+
}
|
186
|
+
|
187
|
+
private:
|
188
|
+
template <typename U>
|
189
|
+
friend class linked_ptr;
|
190
|
+
|
191
|
+
T* value_;
|
192
|
+
linked_ptr_internal link_;
|
193
|
+
|
194
|
+
void depart() {
|
195
|
+
if (link_.depart()) delete value_;
|
196
|
+
}
|
197
|
+
|
198
|
+
void capture(T* ptr) {
|
199
|
+
value_ = ptr;
|
200
|
+
link_.join_new();
|
201
|
+
}
|
202
|
+
|
203
|
+
template <typename U> void copy(linked_ptr<U> const* ptr) {
|
204
|
+
value_ = ptr->get();
|
205
|
+
if (value_)
|
206
|
+
link_.join(&ptr->link_);
|
207
|
+
else
|
208
|
+
link_.join_new();
|
209
|
+
}
|
210
|
+
};
|
211
|
+
|
212
|
+
template<typename T> inline
|
213
|
+
bool operator==(T* ptr, const linked_ptr<T>& x) {
|
214
|
+
return ptr == x.get();
|
215
|
+
}
|
216
|
+
|
217
|
+
template<typename T> inline
|
218
|
+
bool operator!=(T* ptr, const linked_ptr<T>& x) {
|
219
|
+
return ptr != x.get();
|
220
|
+
}
|
221
|
+
|
222
|
+
// A function to convert T* into linked_ptr<T>
|
223
|
+
// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
|
224
|
+
// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
|
225
|
+
template <typename T>
|
226
|
+
linked_ptr<T> make_linked_ptr(T* ptr) {
|
227
|
+
return linked_ptr<T>(ptr);
|
228
|
+
}
|
229
|
+
|
230
|
+
} // namespace internal
|
231
|
+
} // namespace testing
|
232
|
+
|
233
|
+
#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
|