cppjieba_rb 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +81 -0
  8. data/Rakefile +20 -0
  9. data/cppjieba_rb.gemspec +50 -0
  10. data/ext/cppjieba/.gitignore +17 -0
  11. data/ext/cppjieba/.travis.yml +22 -0
  12. data/ext/cppjieba/CMakeLists.txt +28 -0
  13. data/ext/cppjieba/ChangeLog.md +236 -0
  14. data/ext/cppjieba/README.md +285 -0
  15. data/ext/cppjieba/README_EN.md +111 -0
  16. data/ext/cppjieba/appveyor.yml +32 -0
  17. data/ext/cppjieba/deps/CMakeLists.txt +1 -0
  18. data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
  28. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
  29. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
  41. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
  42. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
  43. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  44. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
  45. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
  46. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  47. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
  48. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
  49. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
  50. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
  51. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
  52. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
  53. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
  54. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
  55. data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
  56. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
  57. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
  58. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
  59. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
  60. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
  61. data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
  62. data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
  63. data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
  64. data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
  65. data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
  66. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
  67. data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
  68. data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
  69. data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
  70. data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
  71. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
  72. data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
  73. data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
  74. data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
  75. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
  76. data/ext/cppjieba/dict/README.md +31 -0
  77. data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
  78. data/ext/cppjieba/dict/idf.utf8 +258826 -0
  79. data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
  80. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  81. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  82. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  83. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  84. data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
  85. data/ext/cppjieba/dict/user.dict.utf8 +4 -0
  86. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
  87. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  88. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  89. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  90. data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
  91. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  92. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  93. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  94. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  95. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  96. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
  97. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
  98. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
  99. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  100. data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
  101. data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
  102. data/ext/cppjieba/test/CMakeLists.txt +5 -0
  103. data/ext/cppjieba/test/demo.cpp +80 -0
  104. data/ext/cppjieba/test/load_test.cpp +54 -0
  105. data/ext/cppjieba/test/testdata/curl.res +1 -0
  106. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
  107. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
  108. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
  109. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
  110. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
  111. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
  112. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
  113. data/ext/cppjieba/test/testdata/load_test.urls +2 -0
  114. data/ext/cppjieba/test/testdata/review.100 +100 -0
  115. data/ext/cppjieba/test/testdata/review.100.res +200 -0
  116. data/ext/cppjieba/test/testdata/server.conf +19 -0
  117. data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
  118. data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
  119. data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
  120. data/ext/cppjieba/test/testdata/userdict.english +2 -0
  121. data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
  122. data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
  123. data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
  124. data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
  125. data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
  126. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
  127. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
  128. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
  129. data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
  130. data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
  131. data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
  132. data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
  133. data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
  134. data/ext/cppjieba_rb/extconf.rb +26 -0
  135. data/ext/cppjieba_rb/internal.cc +148 -0
  136. data/lib/cppjieba_rb/segment.rb +20 -0
  137. data/lib/cppjieba_rb/version.rb +3 -0
  138. data/lib/cppjieba_rb.rb +34 -0
  139. data/test/test_keyword.rb +17 -0
  140. data/test/test_segment.rb +24 -0
  141. data/test/test_tagging.rb +19 -0
  142. metadata +244 -0
@@ -0,0 +1,259 @@
1
+ #初始状态的概率
2
+ #格式
3
+ #状态:概率
4
+ B,a:-4.7623052146
5
+ B,ad:-6.68006603678
6
+ B,ag:-3.14e+100
7
+ B,an:-8.69708322302
8
+ B,b:-5.01837436211
9
+ B,bg:-3.14e+100
10
+ B,c:-3.42388018495
11
+ B,d:-3.97504752976
12
+ B,df:-8.88897423083
13
+ B,dg:-3.14e+100
14
+ B,e:-8.56355183039
15
+ B,en:-3.14e+100
16
+ B,f:-5.49163041848
17
+ B,g:-3.14e+100
18
+ B,h:-13.53336513
19
+ B,i:-6.11578472756
20
+ B,in:-3.14e+100
21
+ B,j:-5.05761912847
22
+ B,jn:-3.14e+100
23
+ B,k:-3.14e+100
24
+ B,l:-4.90588358466
25
+ B,ln:-3.14e+100
26
+ B,m:-3.6524299819
27
+ B,mg:-3.14e+100
28
+ B,mq:-6.7869530014
29
+ B,n:-1.69662577975
30
+ B,ng:-3.14e+100
31
+ B,nr:-2.23104959138
32
+ B,nrfg:-5.87372217541
33
+ B,nrt:-4.98564273352
34
+ B,ns:-2.8228438315
35
+ B,nt:-4.84609166818
36
+ B,nz:-3.94698846058
37
+ B,o:-8.43349870215
38
+ B,p:-4.20098413209
39
+ B,q:-6.99812385896
40
+ B,qe:-3.14e+100
41
+ B,qg:-3.14e+100
42
+ B,r:-3.40981877908
43
+ B,rg:-3.14e+100
44
+ B,rr:-12.4347528413
45
+ B,rz:-7.94611647157
46
+ B,s:-5.52267359084
47
+ B,t:-3.36474790945
48
+ B,tg:-3.14e+100
49
+ B,u:-9.1639172775
50
+ B,ud:-3.14e+100
51
+ B,ug:-3.14e+100
52
+ B,uj:-3.14e+100
53
+ B,ul:-3.14e+100
54
+ B,uv:-3.14e+100
55
+ B,uz:-3.14e+100
56
+ B,v:-2.67405848743
57
+ B,vd:-9.04472876024
58
+ B,vg:-3.14e+100
59
+ B,vi:-12.4347528413
60
+ B,vn:-4.33156108902
61
+ B,vq:-12.1470707689
62
+ B,w:-3.14e+100
63
+ B,x:-3.14e+100
64
+ B,y:-9.84448567586
65
+ B,yg:-3.14e+100
66
+ B,z:-7.04568111149
67
+ B,zg:-3.14e+100
68
+ E,a:-3.14e+100
69
+ E,ad:-3.14e+100
70
+ E,ag:-3.14e+100
71
+ E,an:-3.14e+100
72
+ E,b:-3.14e+100
73
+ E,bg:-3.14e+100
74
+ E,c:-3.14e+100
75
+ E,d:-3.14e+100
76
+ E,df:-3.14e+100
77
+ E,dg:-3.14e+100
78
+ E,e:-3.14e+100
79
+ E,en:-3.14e+100
80
+ E,f:-3.14e+100
81
+ E,g:-3.14e+100
82
+ E,h:-3.14e+100
83
+ E,i:-3.14e+100
84
+ E,in:-3.14e+100
85
+ E,j:-3.14e+100
86
+ E,jn:-3.14e+100
87
+ E,k:-3.14e+100
88
+ E,l:-3.14e+100
89
+ E,ln:-3.14e+100
90
+ E,m:-3.14e+100
91
+ E,mg:-3.14e+100
92
+ E,mq:-3.14e+100
93
+ E,n:-3.14e+100
94
+ E,ng:-3.14e+100
95
+ E,nr:-3.14e+100
96
+ E,nrfg:-3.14e+100
97
+ E,nrt:-3.14e+100
98
+ E,ns:-3.14e+100
99
+ E,nt:-3.14e+100
100
+ E,nz:-3.14e+100
101
+ E,o:-3.14e+100
102
+ E,p:-3.14e+100
103
+ E,q:-3.14e+100
104
+ E,qe:-3.14e+100
105
+ E,qg:-3.14e+100
106
+ E,r:-3.14e+100
107
+ E,rg:-3.14e+100
108
+ E,rr:-3.14e+100
109
+ E,rz:-3.14e+100
110
+ E,s:-3.14e+100
111
+ E,t:-3.14e+100
112
+ E,tg:-3.14e+100
113
+ E,u:-3.14e+100
114
+ E,ud:-3.14e+100
115
+ E,ug:-3.14e+100
116
+ E,uj:-3.14e+100
117
+ E,ul:-3.14e+100
118
+ E,uv:-3.14e+100
119
+ E,uz:-3.14e+100
120
+ E,v:-3.14e+100
121
+ E,vd:-3.14e+100
122
+ E,vg:-3.14e+100
123
+ E,vi:-3.14e+100
124
+ E,vn:-3.14e+100
125
+ E,vq:-3.14e+100
126
+ E,w:-3.14e+100
127
+ E,x:-3.14e+100
128
+ E,y:-3.14e+100
129
+ E,yg:-3.14e+100
130
+ E,z:-3.14e+100
131
+ E,zg:-3.14e+100
132
+ M,a:-3.14e+100
133
+ M,ad:-3.14e+100
134
+ M,ag:-3.14e+100
135
+ M,an:-3.14e+100
136
+ M,b:-3.14e+100
137
+ M,bg:-3.14e+100
138
+ M,c:-3.14e+100
139
+ M,d:-3.14e+100
140
+ M,df:-3.14e+100
141
+ M,dg:-3.14e+100
142
+ M,e:-3.14e+100
143
+ M,en:-3.14e+100
144
+ M,f:-3.14e+100
145
+ M,g:-3.14e+100
146
+ M,h:-3.14e+100
147
+ M,i:-3.14e+100
148
+ M,in:-3.14e+100
149
+ M,j:-3.14e+100
150
+ M,jn:-3.14e+100
151
+ M,k:-3.14e+100
152
+ M,l:-3.14e+100
153
+ M,ln:-3.14e+100
154
+ M,m:-3.14e+100
155
+ M,mg:-3.14e+100
156
+ M,mq:-3.14e+100
157
+ M,n:-3.14e+100
158
+ M,ng:-3.14e+100
159
+ M,nr:-3.14e+100
160
+ M,nrfg:-3.14e+100
161
+ M,nrt:-3.14e+100
162
+ M,ns:-3.14e+100
163
+ M,nt:-3.14e+100
164
+ M,nz:-3.14e+100
165
+ M,o:-3.14e+100
166
+ M,p:-3.14e+100
167
+ M,q:-3.14e+100
168
+ M,qe:-3.14e+100
169
+ M,qg:-3.14e+100
170
+ M,r:-3.14e+100
171
+ M,rg:-3.14e+100
172
+ M,rr:-3.14e+100
173
+ M,rz:-3.14e+100
174
+ M,s:-3.14e+100
175
+ M,t:-3.14e+100
176
+ M,tg:-3.14e+100
177
+ M,u:-3.14e+100
178
+ M,ud:-3.14e+100
179
+ M,ug:-3.14e+100
180
+ M,uj:-3.14e+100
181
+ M,ul:-3.14e+100
182
+ M,uv:-3.14e+100
183
+ M,uz:-3.14e+100
184
+ M,v:-3.14e+100
185
+ M,vd:-3.14e+100
186
+ M,vg:-3.14e+100
187
+ M,vi:-3.14e+100
188
+ M,vn:-3.14e+100
189
+ M,vq:-3.14e+100
190
+ M,w:-3.14e+100
191
+ M,x:-3.14e+100
192
+ M,y:-3.14e+100
193
+ M,yg:-3.14e+100
194
+ M,z:-3.14e+100
195
+ M,zg:-3.14e+100
196
+ S,a:-3.90253968313
197
+ S,ad:-11.0484584802
198
+ S,ag:-6.95411391796
199
+ S,an:-12.8402179494
200
+ S,b:-6.47288876397
201
+ S,bg:-3.14e+100
202
+ S,c:-4.78696679586
203
+ S,d:-3.90391976418
204
+ S,df:-3.14e+100
205
+ S,dg:-8.9483976513
206
+ S,e:-5.94251300628
207
+ S,en:-3.14e+100
208
+ S,f:-5.19482024998
209
+ S,g:-6.50782681533
210
+ S,h:-8.65056320738
211
+ S,i:-3.14e+100
212
+ S,in:-3.14e+100
213
+ S,j:-4.91199211964
214
+ S,jn:-3.14e+100
215
+ S,k:-6.94032059583
216
+ S,l:-3.14e+100
217
+ S,ln:-3.14e+100
218
+ S,m:-3.26920065212
219
+ S,mg:-10.8253149289
220
+ S,mq:-3.14e+100
221
+ S,n:-3.85514838976
222
+ S,ng:-4.9134348611
223
+ S,nr:-4.48366310396
224
+ S,nrfg:-3.14e+100
225
+ S,nrt:-3.14e+100
226
+ S,ns:-3.14e+100
227
+ S,nt:-12.1470707689
228
+ S,nz:-3.14e+100
229
+ S,o:-8.46446092775
230
+ S,p:-2.98684018136
231
+ S,q:-4.88865861826
232
+ S,qe:-3.14e+100
233
+ S,qg:-3.14e+100
234
+ S,r:-2.76353367841
235
+ S,rg:-10.2752685919
236
+ S,rr:-3.14e+100
237
+ S,rz:-3.14e+100
238
+ S,s:-3.14e+100
239
+ S,t:-3.14e+100
240
+ S,tg:-6.27284253188
241
+ S,u:-6.94032059583
242
+ S,ud:-7.72823016105
243
+ S,ug:-7.53940370266
244
+ S,uj:-6.85251045118
245
+ S,ul:-8.41537131755
246
+ S,uv:-8.15808672229
247
+ S,uz:-9.29925862537
248
+ S,v:-3.05329230341
249
+ S,vd:-3.14e+100
250
+ S,vg:-5.94301818437
251
+ S,vi:-3.14e+100
252
+ S,vn:-11.4539235883
253
+ S,vq:-3.14e+100
254
+ S,w:-3.14e+100
255
+ S,x:-8.42741965607
256
+ S,y:-6.19707946995
257
+ S,yg:-13.53336513
258
+ S,z:-3.14e+100
259
+ S,zg:-3.14e+100