cppjieba_rb 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +2 -2
  5. data/cppjieba_rb.gemspec +4 -4
  6. data/lib/cppjieba_rb/version.rb +1 -1
  7. metadata +17 -135
  8. data/ext/cppjieba/.gitignore +0 -17
  9. data/ext/cppjieba/.travis.yml +0 -21
  10. data/ext/cppjieba/CMakeLists.txt +0 -28
  11. data/ext/cppjieba/ChangeLog.md +0 -236
  12. data/ext/cppjieba/README.md +0 -292
  13. data/ext/cppjieba/README_EN.md +0 -113
  14. data/ext/cppjieba/appveyor.yml +0 -32
  15. data/ext/cppjieba/deps/CMakeLists.txt +0 -1
  16. data/ext/cppjieba/deps/gtest/CMakeLists.txt +0 -5
  17. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +0 -283
  18. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +0 -230
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +0 -1421
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +0 -487
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +0 -796
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +0 -232
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +0 -176
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +0 -259
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +0 -2155
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +0 -358
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +0 -58
  28. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +0 -308
  29. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +0 -210
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +0 -1226
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +0 -233
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +0 -4822
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +0 -301
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +0 -619
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +0 -1788
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +0 -350
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +0 -968
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +0 -336
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +0 -3330
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +0 -296
  41. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  42. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +0 -681
  43. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +0 -509
  44. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  45. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +0 -48
  46. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +0 -1234
  47. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +0 -380
  48. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +0 -1038
  49. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +0 -746
  50. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +0 -356
  51. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +0 -110
  52. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +0 -110
  53. data/ext/cppjieba/deps/gtest/src/gtest.cc +0 -4898
  54. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +0 -39
  55. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +0 -70
  56. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +0 -49
  57. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +0 -67
  58. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +0 -65
  59. data/ext/cppjieba/deps/limonp/Closure.hpp +0 -206
  60. data/ext/cppjieba/deps/limonp/Colors.hpp +0 -31
  61. data/ext/cppjieba/deps/limonp/Condition.hpp +0 -38
  62. data/ext/cppjieba/deps/limonp/Config.hpp +0 -103
  63. data/ext/cppjieba/deps/limonp/FileLock.hpp +0 -74
  64. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +0 -7
  65. data/ext/cppjieba/deps/limonp/LocalVector.hpp +0 -139
  66. data/ext/cppjieba/deps/limonp/Logging.hpp +0 -76
  67. data/ext/cppjieba/deps/limonp/Md5.hpp +0 -411
  68. data/ext/cppjieba/deps/limonp/MutexLock.hpp +0 -51
  69. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +0 -21
  70. data/ext/cppjieba/deps/limonp/StdExtension.hpp +0 -159
  71. data/ext/cppjieba/deps/limonp/StringUtil.hpp +0 -365
  72. data/ext/cppjieba/deps/limonp/Thread.hpp +0 -44
  73. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +0 -86
  74. data/ext/cppjieba/dict/README.md +0 -31
  75. data/ext/cppjieba/dict/hmm_model.utf8 +0 -34
  76. data/ext/cppjieba/dict/idf.utf8 +0 -258826
  77. data/ext/cppjieba/dict/jieba.dict.utf8 +0 -348982
  78. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +0 -6653
  79. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +0 -166
  80. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +0 -259
  81. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +0 -5222
  82. data/ext/cppjieba/dict/stop_words.utf8 +0 -1534
  83. data/ext/cppjieba/dict/user.dict.utf8 +0 -4
  84. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +0 -277
  85. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +0 -93
  86. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +0 -129
  87. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +0 -190
  88. data/ext/cppjieba/include/cppjieba/Jieba.hpp +0 -130
  89. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +0 -153
  90. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +0 -137
  91. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +0 -109
  92. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +0 -77
  93. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +0 -54
  94. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +0 -90
  95. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +0 -46
  96. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +0 -23
  97. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +0 -190
  98. data/ext/cppjieba/include/cppjieba/Trie.hpp +0 -174
  99. data/ext/cppjieba/include/cppjieba/Unicode.hpp +0 -227
  100. data/ext/cppjieba/test/CMakeLists.txt +0 -5
  101. data/ext/cppjieba/test/demo.cpp +0 -80
  102. data/ext/cppjieba/test/load_test.cpp +0 -54
  103. data/ext/cppjieba/test/testdata/curl.res +0 -1
  104. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +0 -109750
  105. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +0 -34
  106. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +0 -348982
  107. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +0 -93
  108. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +0 -93
  109. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +0 -67
  110. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +0 -64
  111. data/ext/cppjieba/test/testdata/load_test.urls +0 -2
  112. data/ext/cppjieba/test/testdata/review.100 +0 -100
  113. data/ext/cppjieba/test/testdata/review.100.res +0 -200
  114. data/ext/cppjieba/test/testdata/server.conf +0 -19
  115. data/ext/cppjieba/test/testdata/testlines.gbk +0 -9
  116. data/ext/cppjieba/test/testdata/testlines.utf8 +0 -8
  117. data/ext/cppjieba/test/testdata/userdict.2.utf8 +0 -1
  118. data/ext/cppjieba/test/testdata/userdict.english +0 -2
  119. data/ext/cppjieba/test/testdata/userdict.utf8 +0 -8
  120. data/ext/cppjieba/test/testdata/weicheng.utf8 +0 -247
  121. data/ext/cppjieba/test/unittest/CMakeLists.txt +0 -24
  122. data/ext/cppjieba/test/unittest/gtest_main.cpp +0 -39
  123. data/ext/cppjieba/test/unittest/jieba_test.cpp +0 -133
  124. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +0 -79
  125. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +0 -41
  126. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +0 -43
  127. data/ext/cppjieba/test/unittest/segments_test.cpp +0 -256
  128. data/ext/cppjieba/test/unittest/textrank_test.cpp +0 -86
  129. data/ext/cppjieba/test/unittest/trie_test.cpp +0 -177
  130. data/ext/cppjieba/test/unittest/unicode_test.cpp +0 -43
@@ -1,292 +0,0 @@
1
- # CppJieba [English](README_EN.md)
2
-
3
- [![Build Status](https://travis-ci.org/yanyiwu/cppjieba.png?branch=master)](https://travis-ci.org/yanyiwu/cppjieba)
4
- [![Author](https://img.shields.io/badge/author-@yanyiwu-blue.svg?style=flat)](http://yanyiwu.com/)
5
- [![Platform](https://img.shields.io/badge/platform-Linux,%20OS%20X,%20Windows-green.svg?style=flat)](https://github.com/yanyiwu/cppjieba)
6
- [![Performance](https://img.shields.io/badge/performance-excellent-brightgreen.svg?style=flat)](http://yanyiwu.com/work/2015/06/14/jieba-series-performance-test.html)
7
- [![License](https://img.shields.io/badge/license-MIT-yellow.svg?style=flat)](http://yanyiwu.mit-license.org)
8
- [![Build status](https://ci.appveyor.com/api/projects/status/wl30fjnm2rhft6ta/branch/master?svg=true)](https://ci.appveyor.com/project/yanyiwu/cppjieba/branch/master)
9
-
10
- [![logo](http://7viirv.com1.z0.glb.clouddn.com/CppJiebaLogo-v1.png)](https://github.com/yanyiwu/cppjieba)
11
-
12
- ## 简介
13
-
14
- CppJieba是"结巴(Jieba)"中文分词的C++版本
15
-
16
- ## 特性
17
-
18
- + 源代码都写进头文件`include/cppjieba/*.hpp`里,`include`即可使用。
19
- + 支持`utf8`编码。
20
- + 项目自带较为完善的单元测试,核心功能中文分词(utf8)的稳定性接受过线上环境检验。
21
- + 支持载自定义用户词典,多路径时支持分隔符'|'或者';'分隔。
22
- + 支持 `Linux` , `Mac OSX`, `Windows` 操作系统。
23
-
24
- ## 用法
25
-
26
- ### 依赖软件
27
-
28
- * `g++ (version >= 4.1 is recommended) or clang++`;
29
- * `cmake (version >= 2.6 is recommended)`;
30
-
31
- ### 下载和编译
32
-
33
- ```sh
34
- git clone --depth=10 --branch=master git://github.com/yanyiwu/cppjieba.git
35
- cd cppjieba
36
- mkdir build
37
- cd build
38
- cmake ..
39
- make
40
- ```
41
-
42
- 有兴趣的可以跑跑测试(可选):
43
-
44
- ```
45
- make test
46
- ```
47
-
48
- ## Demo
49
-
50
- ```
51
- ./demo
52
- ```
53
-
54
- 结果示例:
55
-
56
- ```
57
- [demo] Cut With HMM
58
- 他/来到/了/网易/杭研/大厦
59
- [demo] Cut Without HMM
60
- 他/来到/了/网易/杭/研/大厦
61
- 我来到北京清华大学
62
- [demo] CutAll
63
- 我/来到/北京/清华/清华大学/华大/大学
64
- 小明硕士毕业于中国科学院计算所,后在日本京都大学深造
65
- [demo] CutForSearch
66
- 小明/硕士/毕业/于/中国/科学/学院/科学院/中国科学院/计算/计算所/,/后/在/日本/京都/大学/日本京都大学/深造
67
- [demo] Insert User Word
68
- 男默/女泪
69
- 男默女泪
70
- [demo] CutForSearch Word With Offset
71
- [{"word": "小明", "offset": 0}, {"word": "硕士", "offset": 6}, {"word": "毕业", "offset": 12}, {"word": "于", "offset": 18}, {"word": "中国", "offset": 21}, {"word": "科学", "offset": 27}, {"word": "学院", "offset": 30}, {"word": "科学院", "offset": 27}, {"word": "中国科学院", "offset": 21}, {"word": "计算", "offset": 36}, {"word": "计算所", "offset": 36}, {"word": ",", "offset": 45}, {"word": "后", "offset": 48}, {"word": "在", "offset": 51}, {"word": "日本", "offset": 54}, {"word": "京都", "offset": 60}, {"word": "大学", "offset": 66}, {"word": "日本京都大学", "offset": 54}, {"word": "深造", "offset": 72}]
72
- [demo] Tagging
73
- 我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
74
- [我:r, 是:v, 拖拉机:n, 学院:n, 手扶拖拉机:n, 专业:n, 的:uj, 。:x, 不用:v, 多久:m, ,:x, 我:r, 就:d, 会:v, 升职:v, 加薪:nr, ,:x, 当上:t, CEO:eng, ,:x, 走上:v, 人生:n, 巅峰:n, 。:x]
75
- [demo] Keyword Extraction
76
- 我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
77
- [{"word": "CEO", "offset": [93], "weight": 11.7392}, {"word": "升职", "offset": [72], "weight": 10.8562}, {"word": "加薪", "offset": [78], "weight": 10.6426}, {"word": "手扶拖拉机", "offset": [21], "weight": 10.0089}, {"word": "巅峰", "offset": [111], "weight": 9.49396}]
78
- ```
79
-
80
- 详细请看 `test/demo.cpp`.
81
-
82
- ### 分词结果示例
83
-
84
- **MPSegment**
85
-
86
- Output:
87
- ```
88
- 我来到北京清华大学
89
- 我/来到/北京/清华大学
90
-
91
- 他来到了网易杭研大厦
92
- 他/来到/了/网易/杭/研/大厦
93
-
94
- 小明硕士毕业于中国科学院计算所,后在日本京都大学深造
95
- 小/明/硕士/毕业/于/中国科学院/计算所/,/后/在/日本京都大学/深造
96
-
97
- ```
98
-
99
- **HMMSegment**
100
-
101
- ```
102
- 我来到北京清华大学
103
- 我来/到/北京/清华大学
104
-
105
- 他来到了网易杭研大厦
106
- 他来/到/了/网易/杭/研大厦
107
-
108
- 小明硕士毕业于中国科学院计算所,后在日本京都大学深造
109
- 小明/硕士/毕业于/中国/科学院/计算所/,/后/在/日/本/京/都/大/学/深/造
110
-
111
- ```
112
-
113
- **MixSegment**
114
-
115
- ```
116
- 我来到北京清华大学
117
- 我/来到/北京/清华大学
118
-
119
- 他来到了网易杭研大厦
120
- 他/来到/了/网易/杭研/大厦
121
-
122
- 小明硕士毕业于中国科学院计算所,后在日本京都大学深造
123
- 小明/硕士/毕业/于/中国科学院/计算所/,/后/在/日本京都大学/深造
124
-
125
- ```
126
-
127
- **FullSegment**
128
-
129
- ```
130
- 我来到北京清华大学
131
- 我/来到/北京/清华/清华大学/华大/大学
132
-
133
- 他来到了网易杭研大厦
134
- 他/来到/了/网易/杭/研/大厦
135
-
136
- 小明硕士毕业于中国科学院计算所,后在日本京都大学深造
137
- 小/明/硕士/毕业/于/中国/中国科学院/科学/科学院/学院/计算/计算所/,/后/在/日本/日本京都大学/京都/京都大学/大学/深造
138
-
139
- ```
140
-
141
- **QuerySegment**
142
-
143
- ```
144
- 我来到北京清华大学
145
- 我/来到/北京/清华/清华大学/华大/大学
146
-
147
- 他来到了网易杭研大厦
148
- 他/来到/了/网易/杭研/大厦
149
-
150
- 小明硕士毕业于中国科学院计算所,后在日本京都大学深造
151
- 小明/硕士/毕业/于/中国/中国科学院/科学/科学院/学院/计算所/,/后/在/中国/中国科学院/科学/科学院/学院/日本/日本京都大学/京都/京都大学/大学/深造
152
-
153
- ```
154
-
155
- 以上依次是MP,HMM,Mix三种方法的效果。
156
-
157
- 可以看出效果最好的是Mix,也就是融合MP和HMM的切词算法。即可以准确切出词典已有的词,又可以切出像"杭研"这样的未登录词。
158
-
159
- Full方法切出所有字典里的词语。
160
-
161
- Query方法先使用Mix方法切词,对于切出来的较长的词再使用Full方法。
162
-
163
- ### 自定义用户词典
164
-
165
- 自定义词典示例请看`dict/user.dict.utf8`。
166
-
167
- 没有使用自定义用户词典时的结果:
168
-
169
- ```
170
- 令狐冲/是/云/计算/行业/的/专家
171
- ```
172
-
173
- 使用自定义用户词典时的结果:
174
-
175
- ```
176
- 令狐冲/是/云计算/行业/的/专家
177
- ```
178
-
179
- ### 关键词抽取
180
-
181
- ```
182
- 我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
183
- ["CEO:11.7392", "升职:10.8562", "加薪:10.6426", "手扶拖拉机:10.0089", "巅峰:9.49396"]
184
- ```
185
-
186
- 详细请见 `test/demo.cpp`.
187
-
188
- ### 词性标注
189
-
190
- ```
191
- 我是蓝翔技工拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上总经理,出任CEO,迎娶白富美,走上人生巅峰。
192
- ["我:r", "是:v", "拖拉机:n", "学院:n", "手扶拖拉机:n", "专业:n", "的:uj", "。:x", "不用:v", "多久:m", ",:x", "我:r", "就:d", "会:v", "升职:v", "加薪:nr", ",:x", "当上:t", "CEO:eng", ",:x", "走上:v", "人生:n", "巅峰:n", "。:x"]
193
- ```
194
-
195
- 详细请看 `test/demo.cpp`.
196
-
197
- 支持自定义词性。
198
- 比如在(`dict/user.dict.utf8`)增加一行
199
-
200
- ```
201
- 蓝翔 nz
202
- ```
203
-
204
- 结果如下:
205
-
206
- ```
207
- ["我:r", "是:v", "蓝翔:nz", "技工:n", "拖拉机:n", "学院:n", "手扶拖拉机:n", "专业:n", "的:uj", "。:x", "不用:v", "多久:m", ",:x", "我:r", "就:d", "会:v", "升职:v", "加薪:nr", ",:x", "当:t", "上:f", "总经理:n", ",:x", "出任:v", "CEO:eng", ",:x", "迎娶:v", "白富美:x", ",:x", "走上:v", "人生:n", "巅峰:n", "。:x"]
208
- ```
209
-
210
- ## 其它词典资料分享
211
-
212
- + [dict.367W.utf8] iLife(562193561 at qq.com)
213
-
214
- ## 应用
215
-
216
- + [GoJieba] go语言版本的结巴中文分词。
217
- + [NodeJieba] Node.js 版本的结巴中文分词。
218
- + [simhash] 中文文档的的相似度计算
219
- + [exjieba] Erlang 版本的结巴中文分词。
220
- + [jiebaR] R语言版本的结巴中文分词。
221
- + [cjieba] C语言版本的结巴分词。
222
- + [jieba_rb] Ruby 版本的结巴分词。
223
- + [iosjieba] iOS 版本的结巴分词。
224
- + [SqlJieba] MySQL 全文索引的结巴中文分词插件。
225
- + [pg_jieba] PostgreSQL 数据库的分词插件。
226
- + [gitbook-plugin-search-pro] 支持中文搜索的 gitbook 插件。
227
- + [ngx_http_cppjieba_module] Nginx 分词插件。
228
- + [cppjiebapy] 由 [jannson] 开发的供 python 模块调用的项目 [cppjiebapy], 相关讨论 [cppjiebapy_discussion] .
229
- + [cppjieba-py] 由 [bung87] 基于 pybind11 封装的 python 模块,使用体验上接近于原jieba。
230
- + [KeywordServer] 50行搭建一个中文关键词抽取服务。
231
- + [cppjieba-server] CppJieba HTTP 服务器。
232
- + [phpjieba] php版本的结巴分词扩展。
233
- + [perl5-jieba] Perl版本的结巴分词扩展。
234
-
235
- ## 线上演示
236
-
237
- [Web-Demo](http://cppjieba-webdemo.herokuapp.com/)
238
- (建议使用chrome打开)
239
-
240
- ## 性能评测
241
-
242
- [Jieba中文分词系列性能评测]
243
-
244
- ## 客服
245
-
246
- + Email: `i@yanyiwu.com`
247
- + QQ: 64162451
248
- + WeChat: ![image](http://7viirv.com1.z0.glb.clouddn.com/5a7d1b5c0d_yanyiwu_personal_qrcodes.jpg)
249
-
250
- ## 鸣谢
251
-
252
- "结巴"中文分词作者: [SunJunyi](https://github.com/fxsjy)
253
-
254
- ## 许可证
255
-
256
- [MIT](http://yanyiwu.mit-license.org)
257
-
258
- ## 作者
259
-
260
- - [yanyiwu](yanyiwu.com)
261
- - [aholic](https://github.com/aholic)
262
-
263
- [GoJieba]:https://github.com/yanyiwu/gojieba
264
- [CppJieba]:https://github.com/yanyiwu/cppjieba
265
- [jannson]:https://github.com/jannson
266
- [cppjiebapy]:https://github.com/jannson/cppjiebapy
267
- [bung87]:https://github.com/bung87
268
- [cppjieba-py]:https://github.com/bung87/cppjieba-py
269
- [cppjiebapy_discussion]:https://github.com/yanyiwu/cppjieba/issues/1
270
- [NodeJieba]:https://github.com/yanyiwu/nodejieba
271
- [jiebaR]:https://github.com/qinwf/jiebaR
272
- [simhash]:https://github.com/yanyiwu/simhash
273
- [代码详解]:https://github.com/yanyiwu/cppjieba/wiki/CppJieba%E4%BB%A3%E7%A0%81%E8%AF%A6%E8%A7%A3
274
- [issue25]:https://github.com/yanyiwu/cppjieba/issues/25
275
- [exjieba]:https://github.com/falood/exjieba
276
- [KeywordServer]:https://github.com/yanyiwu/keyword_server
277
- [ngx_http_cppjieba_module]:https://github.com/yanyiwu/ngx_http_cppjieba_module
278
- [dict.367W.utf8]:https://github.com/qinwf/BigDict
279
- [cjieba]:http://github.com/yanyiwu/cjieba
280
- [jieba_rb]:https://github.com/altkatz/jieba_rb
281
- [iosjieba]:https://github.com/yanyiwu/iosjieba
282
- [SqlJieba]:https://github.com/yanyiwu/sqljieba
283
- [Jieba中文分词系列性能评测]:http://yanyiwu.com/work/2015/06/14/jieba-series-performance-test.html
284
- [pg_jieba]:https://github.com/jaiminpan/pg_jieba
285
- [gitbook-plugin-search-pro]:https://plugins.gitbook.com/plugin/search-pro
286
- [cppjieba-server]:https://github.com/yanyiwu/cppjieba-server
287
- [phpjieba]:https://github.com/jonnywang/phpjieba
288
- [perl5-jieba]:https://metacpan.org/pod/distribution/Lingua-ZH-Jieba/lib/Lingua/ZH/Jieba.pod
289
-
290
-
291
- [![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/yanyiwu/cppjieba/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
292
-
@@ -1,113 +0,0 @@
1
- # CppJieba [简体中文](README.md)
2
-
3
- [![Build Status](https://travis-ci.org/yanyiwu/cppjieba.png?branch=master)](https://travis-ci.org/yanyiwu/cppjieba)
4
- [![Author](https://img.shields.io/badge/author-@yanyiwu-blue.svg?style=flat)](http://yanyiwu.com/)
5
- [![Platform](https://img.shields.io/badge/platform-Linux,%20OS%20X,%20Windows-green.svg?style=flat)](https://github.com/yanyiwu/cppjieba)
6
- [![Performance](https://img.shields.io/badge/performance-excellent-brightgreen.svg?style=flat)](http://yanyiwu.com/work/2015/06/14/jieba-series-performance-test.html)
7
- [![License](https://img.shields.io/badge/license-MIT-yellow.svg?style=flat)](http://yanyiwu.mit-license.org)
8
- [![Build status](https://ci.appveyor.com/api/projects/status/wl30fjnm2rhft6ta/branch/master?svg=true)](https://ci.appveyor.com/project/yanyiwu/cppjieba/branch/master)
9
-
10
- [![logo](http://7viirv.com1.z0.glb.clouddn.com/CppJiebaLogo-v1.png)](https://github.com/yanyiwu/cppjieba)
11
-
12
- ## Introduction
13
-
14
- The Jieba Chinese Word Segmentation Implemented By C++ .
15
-
16
- ## Usage
17
-
18
- ### Dependencies
19
-
20
- + `g++ (version >= 4.1 is recommended) or clang++`;
21
- + `cmake (version >= 2.6 is recommended)`;
22
-
23
- ### Download & Compile
24
-
25
- ```sh
26
- git clone --depth=10 --branch=master git://github.com/yanyiwu/cppjieba.git
27
- cd cppjieba
28
- mkdir build
29
- cd build
30
- cmake ..
31
- make
32
- ```
33
-
34
- ### Unit Testing
35
-
36
- ```
37
- make test
38
- ```
39
-
40
- ## Demo
41
-
42
- ```
43
- ./demo
44
- ```
45
-
46
- Output:
47
-
48
- ```
49
- [demo] Cut With HMM
50
- 他/来到/了/网易/杭研/大厦
51
- [demo] Cut Without HMM
52
- 他/来到/了/网易/杭/研/大厦
53
- 我来到北京清华大学
54
- [demo] CutAll
55
- 我/来到/北京/清华/清华大学/华大/大学
56
- 小明硕士毕业于中国科学院计算所,后在日本京都大学深造
57
- [demo] CutForSearch
58
- 小明/硕士/毕业/于/中国/科学/学院/科学院/中国科学院/计算/计算所/,/后/在/日本/京都/大学/日本京都大学/深造
59
- [demo] Insert User Word
60
- 男默/女泪
61
- 男默女泪
62
- [demo] CutForSearch Word With Offset
63
- [{"word": "小明", "offset": 0}, {"word": "硕士", "offset": 6}, {"word": "毕业", "offset": 12}, {"word": "于", "offset": 18}, {"word": "中国", "offset": 21}, {"word": "科学", "offset": 27}, {"word": "学院", "offset": 30}, {"word": "科学院", "offset": 27}, {"word": "中国科学院", "offset": 21}, {"word": "计算", "offset": 36}, {"word": "计算所", "offset": 36}, {"word": ",", "offset": 45}, {"word": "后", "offset": 48}, {"word": "在", "offset": 51}, {"word": "日本", "offset": 54}, {"word": "京都", "offset": 60}, {"word": "大学", "offset": 66}, {"word": "日本京都大学", "offset": 54}, {"word": "深造", "offset": 72}]
64
- [demo] Tagging
65
- 我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
66
- [我:r, 是:v, 拖拉机:n, 学院:n, 手扶拖拉机:n, 专业:n, 的:uj, 。:x, 不用:v, 多久:m, ,:x, 我:r, 就:d, 会:v, 升职:v, 加薪:nr, ,:x, 当上:t, CEO:eng, ,:x, 走上:v, 人生:n, 巅峰:n, 。:x]
67
- [demo] Keyword Extraction
68
- 我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
69
- [{"word": "CEO", "offset": [93], "weight": 11.7392}, {"word": "升职", "offset": [72], "weight": 10.8562}, {"word": "加薪", "offset": [78], "weight": 10.6426}, {"word": "手扶拖拉机", "offset": [21], "weight": 10.0089}, {"word": "巅峰", "offset": [111], "weight": 9.49396}]
70
- ```
71
-
72
- Please see details in `test/demo.cpp`.
73
-
74
- ## Cases
75
-
76
- + [GoJieba]
77
- + [NodeJieba]
78
- + [simhash]
79
- + [exjieba]
80
- + [jiebaR]
81
- + [cjieba]
82
- + [jieba_rb]
83
- + [iosjieba]
84
- + [SqlJieba]
85
- + [pg_jieba]
86
- + [ngx_http_cppjieba_module]
87
- + [gitbook-plugin-search-pro]
88
- + [cppjieba-server]
89
- + [perl5-jieba]
90
-
91
- ## Contact
92
-
93
- + Email: `i@yanyiwu.com`
94
- + QQ: 64162451
95
- + WeChat: ![image](http://7viirv.com1.z0.glb.clouddn.com/5a7d1b5c0d_yanyiwu_personal_qrcodes.jpg)
96
-
97
- [GoJieba]:https://github.com/yanyiwu/gojieba
98
- [CppJieba]:https://github.com/yanyiwu/cppjieba
99
- [jannson]:https://github.com/jannson
100
- [cppjiebapy]:https://github.com/jannson/cppjiebapy
101
- [cppjiebapy_discussion]:https://github.com/yanyiwu/cppjieba/issues/1
102
- [NodeJieba]:https://github.com/yanyiwu/nodejieba
103
- [jiebaR]:https://github.com/qinwf/jiebaR
104
- [simhash]:https://github.com/yanyiwu/simhash
105
- [exjieba]:https://github.com/falood/exjieba
106
- [cjieba]:http://github.com/yanyiwu/cjieba
107
- [jieba_rb]:https://github.com/altkatz/jieba_rb
108
- [iosjieba]:https://github.com/yanyiwu/iosjieba
109
- [SqlJieba]:https://github.com/yanyiwu/sqljieba
110
- [pg_jieba]:https://github.com/jaiminpan/pg_jieba
111
- [gitbook-plugin-search-pro]:https://plugins.gitbook.com/plugin/search-pro
112
- [cppjieba-server]:https://github.com/yanyiwu/cppjieba-server
113
- [perl5-jieba]:https://metacpan.org/pod/distribution/Lingua-ZH-Jieba/lib/Lingua/ZH/Jieba.pod
@@ -1,32 +0,0 @@
1
- os: Visual Studio 2015
2
-
3
- platform: x64
4
-
5
- # clone directory
6
- clone_folder: c:\projects\cppjieba
7
-
8
- # scripts to run before build
9
- before_build:
10
- - echo Running cmake...
11
- - cd c:\projects\cppjieba
12
- - cmake .
13
-
14
- build:
15
- project: ALL_BUILD.vcxproj # path to Visual Studio solution or project
16
-
17
- # scripts to run after build
18
- after_build:
19
- - cd Debug
20
- - demo.exe
21
- - load_test.exe
22
- - cd ..
23
- - COPY .\test\Debug\test.run.exe .\test\test.run.exe
24
- - cd test
25
- - test.run.exe
26
- - cd ..
27
- - 7z a c:\projects\all.zip * -tzip
28
- - cd c:\projects
29
-
30
- artifacts:
31
- - path: all.zip
32
- name: all.zip
@@ -1 +0,0 @@
1
- ADD_SUBDIRECTORY(gtest)
@@ -1,5 +0,0 @@
1
- INCLUDE_DIRECTORIES(./ include)
2
- ADD_LIBRARY(gtest STATIC src/gtest-all.cc)
3
- if(NOT MSVC)
4
- TARGET_LINK_LIBRARIES(gtest pthread)
5
- endif()