jieba-rb 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.gitmodules +3 -0
- data/.travis.yml +19 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +85 -0
- data/Rakefile +15 -0
- data/ext/cppjieba/.gitignore +17 -0
- data/ext/cppjieba/.travis.yml +22 -0
- data/ext/cppjieba/CMakeLists.txt +28 -0
- data/ext/cppjieba/ChangeLog.md +236 -0
- data/ext/cppjieba/README.md +285 -0
- data/ext/cppjieba/README_EN.md +111 -0
- data/ext/cppjieba/appveyor.yml +32 -0
- data/ext/cppjieba/deps/CMakeLists.txt +1 -0
- data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
- data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
- data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
- data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
- data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
- data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
- data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
- data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
- data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
- data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
- data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
- data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
- data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
- data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
- data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
- data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
- data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
- data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
- data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
- data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
- data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
- data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
- data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
- data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
- data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
- data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
- data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
- data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
- data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
- data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
- data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
- data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
- data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
- data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
- data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
- data/ext/cppjieba/dict/README.md +31 -0
- data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
- data/ext/cppjieba/dict/idf.utf8 +258826 -0
- data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
- data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
- data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
- data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
- data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
- data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
- data/ext/cppjieba/dict/user.dict.utf8 +4 -0
- data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
- data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
- data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
- data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
- data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
- data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
- data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
- data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
- data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
- data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
- data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
- data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
- data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +24 -0
- data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
- data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
- data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
- data/ext/jieba/extconf.rb +28 -0
- data/ext/jieba/jieba.c +11 -0
- data/ext/jieba/jieba.h +11 -0
- data/ext/jieba/keyword.cc +92 -0
- data/ext/jieba/keyword.h +17 -0
- data/ext/jieba/segment.cc +107 -0
- data/ext/jieba/segment.h +17 -0
- data/ext/jieba/tagging.cc +76 -0
- data/ext/jieba/tagging.h +17 -0
- data/jieba_rb.gemspec +51 -0
- data/lib/jieba-rb.rb +66 -0
- data/lib/jieba_rb/version.rb +3 -0
- data/test/test_keyword.rb +17 -0
- data/test/test_segment.rb +32 -0
- data/test/test_tagging.rb +22 -0
- data/test/user.dict.utf8 +23 -0
- metadata +219 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
# CppJieba [English](README_EN.md)
|
|
2
|
+
|
|
3
|
+
[](https://travis-ci.org/yanyiwu/cppjieba)
|
|
4
|
+
[](http://yanyiwu.com/)
|
|
5
|
+
[](https://github.com/yanyiwu/cppjieba)
|
|
6
|
+
[](http://yanyiwu.com/work/2015/06/14/jieba-series-performance-test.html)
|
|
7
|
+
[](http://yanyiwu.mit-license.org)
|
|
8
|
+
[](https://ci.appveyor.com/project/yanyiwu/cppjieba/branch/master)
|
|
9
|
+
|
|
10
|
+
[](https://github.com/yanyiwu/cppjieba)
|
|
11
|
+
|
|
12
|
+
## 简介
|
|
13
|
+
|
|
14
|
+
CppJieba是"结巴(Jieba)"中文分词的C++版本
|
|
15
|
+
|
|
16
|
+
## 特性
|
|
17
|
+
|
|
18
|
+
+ 源代码都写进头文件`include/cppjieba/*.hpp`里,`include`即可使用。
|
|
19
|
+
+ 支持`utf8`编码。
|
|
20
|
+
+ 项目自带较为完善的单元测试,核心功能中文分词(utf8)的稳定性接受过线上环境检验。
|
|
21
|
+
+ 支持载自定义用户词典,多路径时支持分隔符'|'或者';'分隔。
|
|
22
|
+
+ 支持 `Linux` , `Mac OSX`, `Windows` 操作系统。
|
|
23
|
+
|
|
24
|
+
## 用法
|
|
25
|
+
|
|
26
|
+
### 依赖软件
|
|
27
|
+
|
|
28
|
+
* `g++ (version >= 4.1 is recommended) or clang++`;
|
|
29
|
+
* `cmake (version >= 2.6 is recommended)`;
|
|
30
|
+
|
|
31
|
+
### 下载和编译
|
|
32
|
+
|
|
33
|
+
```sh
|
|
34
|
+
git clone --depth=10 --branch=master git://github.com/yanyiwu/cppjieba.git
|
|
35
|
+
cd cppjieba
|
|
36
|
+
mkdir build
|
|
37
|
+
cd build
|
|
38
|
+
cmake ..
|
|
39
|
+
make
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
有兴趣的可以跑跑测试(可选):
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
make test
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Demo
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
./demo
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
结果示例:
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
[demo] Cut With HMM
|
|
58
|
+
他/来到/了/网易/杭研/大厦
|
|
59
|
+
[demo] Cut Without HMM
|
|
60
|
+
他/来到/了/网易/杭/研/大厦
|
|
61
|
+
我来到北京清华大学
|
|
62
|
+
[demo] CutAll
|
|
63
|
+
我/来到/北京/清华/清华大学/华大/大学
|
|
64
|
+
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
|
65
|
+
[demo] CutForSearch
|
|
66
|
+
小明/硕士/毕业/于/中国/科学/学院/科学院/中国科学院/计算/计算所/,/后/在/日本/京都/大学/日本京都大学/深造
|
|
67
|
+
[demo] Insert User Word
|
|
68
|
+
男默/女泪
|
|
69
|
+
男默女泪
|
|
70
|
+
[demo] CutForSearch Word With Offset
|
|
71
|
+
[{"word": "小明", "offset": 0}, {"word": "硕士", "offset": 6}, {"word": "毕业", "offset": 12}, {"word": "于", "offset": 18}, {"word": "中国", "offset": 21}, {"word": "科学", "offset": 27}, {"word": "学院", "offset": 30}, {"word": "科学院", "offset": 27}, {"word": "中国科学院", "offset": 21}, {"word": "计算", "offset": 36}, {"word": "计算所", "offset": 36}, {"word": ",", "offset": 45}, {"word": "后", "offset": 48}, {"word": "在", "offset": 51}, {"word": "日本", "offset": 54}, {"word": "京都", "offset": 60}, {"word": "大学", "offset": 66}, {"word": "日本京都大学", "offset": 54}, {"word": "深造", "offset": 72}]
|
|
72
|
+
[demo] Tagging
|
|
73
|
+
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
|
74
|
+
[我:r, 是:v, 拖拉机:n, 学院:n, 手扶拖拉机:n, 专业:n, 的:uj, 。:x, 不用:v, 多久:m, ,:x, 我:r, 就:d, 会:v, 升职:v, 加薪:nr, ,:x, 当上:t, CEO:eng, ,:x, 走上:v, 人生:n, 巅峰:n, 。:x]
|
|
75
|
+
[demo] Keyword Extraction
|
|
76
|
+
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
|
77
|
+
[{"word": "CEO", "offset": [93], "weight": 11.7392}, {"word": "升职", "offset": [72], "weight": 10.8562}, {"word": "加薪", "offset": [78], "weight": 10.6426}, {"word": "手扶拖拉机", "offset": [21], "weight": 10.0089}, {"word": "巅峰", "offset": [111], "weight": 9.49396}]
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
详细请看 `test/demo.cpp`.
|
|
81
|
+
|
|
82
|
+
### 分词结果示例
|
|
83
|
+
|
|
84
|
+
**MPSegment**
|
|
85
|
+
|
|
86
|
+
Output:
|
|
87
|
+
```
|
|
88
|
+
我来到北京清华大学
|
|
89
|
+
我/来到/北京/清华大学
|
|
90
|
+
|
|
91
|
+
他来到了网易杭研大厦
|
|
92
|
+
他/来到/了/网易/杭/研/大厦
|
|
93
|
+
|
|
94
|
+
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
|
95
|
+
小/明/硕士/毕业/于/中国科学院/计算所/,/后/在/日本京都大学/深造
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
**HMMSegment**
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
我来到北京清华大学
|
|
103
|
+
我来/到/北京/清华大学
|
|
104
|
+
|
|
105
|
+
他来到了网易杭研大厦
|
|
106
|
+
他来/到/了/网易/杭/研大厦
|
|
107
|
+
|
|
108
|
+
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
|
109
|
+
小明/硕士/毕业于/中国/科学院/计算所/,/后/在/日/本/京/都/大/学/深/造
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**MixSegment**
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
我来到北京清华大学
|
|
117
|
+
我/来到/北京/清华大学
|
|
118
|
+
|
|
119
|
+
他来到了网易杭研大厦
|
|
120
|
+
他/来到/了/网易/杭研/大厦
|
|
121
|
+
|
|
122
|
+
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
|
123
|
+
小明/硕士/毕业/于/中国科学院/计算所/,/后/在/日本京都大学/深造
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
**FullSegment**
|
|
128
|
+
|
|
129
|
+
```
|
|
130
|
+
我来到北京清华大学
|
|
131
|
+
我/来到/北京/清华/清华大学/华大/大学
|
|
132
|
+
|
|
133
|
+
他来到了网易杭研大厦
|
|
134
|
+
他/来到/了/网易/杭/研/大厦
|
|
135
|
+
|
|
136
|
+
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
|
137
|
+
小/明/硕士/毕业/于/中国/中国科学院/科学/科学院/学院/计算/计算所/,/后/在/日本/日本京都大学/京都/京都大学/大学/深造
|
|
138
|
+
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**QuerySegment**
|
|
142
|
+
|
|
143
|
+
```
|
|
144
|
+
我来到北京清华大学
|
|
145
|
+
我/来到/北京/清华/清华大学/华大/大学
|
|
146
|
+
|
|
147
|
+
他来到了网易杭研大厦
|
|
148
|
+
他/来到/了/网易/杭研/大厦
|
|
149
|
+
|
|
150
|
+
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
|
151
|
+
小明/硕士/毕业/于/中国/中国科学院/科学/科学院/学院/计算所/,/后/在/中国/中国科学院/科学/科学院/学院/日本/日本京都大学/京都/京都大学/大学/深造
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
以上依次是MP,HMM,Mix三种方法的效果。
|
|
156
|
+
|
|
157
|
+
可以看出效果最好的是Mix,也就是融合MP和HMM的切词算法。即可以准确切出词典已有的词,又可以切出像"杭研"这样的未登录词。
|
|
158
|
+
|
|
159
|
+
Full方法切出所有字典里的词语。
|
|
160
|
+
|
|
161
|
+
Query方法先使用Mix方法切词,对于切出来的较长的词再使用Full方法。
|
|
162
|
+
|
|
163
|
+
### 自定义用户词典
|
|
164
|
+
|
|
165
|
+
自定义词典示例请看`dict/user.dict.utf8`。
|
|
166
|
+
|
|
167
|
+
没有使用自定义用户词典时的结果:
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
令狐冲/是/云/计算/行业/的/专家
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
使用自定义用户词典时的结果:
|
|
174
|
+
|
|
175
|
+
```
|
|
176
|
+
令狐冲/是/云计算/行业/的/专家
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### 关键词抽取
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
|
183
|
+
["CEO:11.7392", "升职:10.8562", "加薪:10.6426", "手扶拖拉机:10.0089", "巅峰:9.49396"]
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
详细请见 `test/demo.cpp`.
|
|
187
|
+
|
|
188
|
+
### 词性标注
|
|
189
|
+
|
|
190
|
+
```
|
|
191
|
+
我是蓝翔技工拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上总经理,出任CEO,迎娶白富美,走上人生巅峰。
|
|
192
|
+
["我:r", "是:v", "拖拉机:n", "学院:n", "手扶拖拉机:n", "专业:n", "的:uj", "。:x", "不用:v", "多久:m", ",:x", "我:r", "就:d", "会:v", "升职:v", "加薪:nr", ",:x", "当上:t", "CEO:eng", ",:x", "走上:v", "人生:n", "巅峰:n", "。:x"]
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
详细请看 `test/demo.cpp`.
|
|
196
|
+
|
|
197
|
+
支持自定义词性。
|
|
198
|
+
比如在(`dict/user.dict.utf8`)增加一行
|
|
199
|
+
|
|
200
|
+
```
|
|
201
|
+
蓝翔 nz
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
结果如下:
|
|
205
|
+
|
|
206
|
+
```
|
|
207
|
+
["我:r", "是:v", "蓝翔:nz", "技工:n", "拖拉机:n", "学院:n", "手扶拖拉机:n", "专业:n", "的:uj", "。:x", "不用:v", "多久:m", ",:x", "我:r", "就:d", "会:v", "升职:v", "加薪:nr", ",:x", "当:t", "上:f", "总经理:n", ",:x", "出任:v", "CEO:eng", ",:x", "迎娶:v", "白富美:x", ",:x", "走上:v", "人生:n", "巅峰:n", "。:x"]
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
## 其它词典资料分享
|
|
211
|
+
|
|
212
|
+
+ [dict.367W.utf8] iLife(562193561 at qq.com)
|
|
213
|
+
|
|
214
|
+
## 应用
|
|
215
|
+
|
|
216
|
+
+ [GoJieba] go语言版本的结巴中文分词。
|
|
217
|
+
+ [NodeJieba] Node.js 版本的结巴中文分词。
|
|
218
|
+
+ [simhash] 中文文档的的相似度计算
|
|
219
|
+
+ [exjieba] Erlang 版本的结巴中文分词。
|
|
220
|
+
+ [jiebaR] R语言版本的结巴中文分词。
|
|
221
|
+
+ [cjieba] C语言版本的结巴分词。
|
|
222
|
+
+ [jieba_rb] Ruby 版本的结巴分词。
|
|
223
|
+
+ [iosjieba] iOS 版本的结巴分词。
|
|
224
|
+
+ [SqlJieba] MySQL 全文索引的结巴中文分词插件。
|
|
225
|
+
+ [pg_jieba] PostgreSQL 数据库的分词插件。
|
|
226
|
+
+ [gitbook-plugin-search-pro] 支持中文搜索的 gitbook 插件。
|
|
227
|
+
+ [ngx_http_cppjieba_module] Nginx 分词插件。
|
|
228
|
+
+ [cppjiebapy] 由 [jannson] 开发的供 python 模块调用的项目 [cppjiebapy], 相关讨论 [cppjiebapy_discussion] .
|
|
229
|
+
+ [KeywordServer] 50行搭建一个中文关键词抽取服务。
|
|
230
|
+
+ [cppjieba-server] CppJieba HTTP 服务器。
|
|
231
|
+
|
|
232
|
+
## 线上演示
|
|
233
|
+
|
|
234
|
+
[Web-Demo](http://cppjieba-webdemo.herokuapp.com/)
|
|
235
|
+
(建议使用chrome打开)
|
|
236
|
+
|
|
237
|
+
## 性能评测
|
|
238
|
+
|
|
239
|
+
[Jieba中文分词系列性能评测]
|
|
240
|
+
|
|
241
|
+
## 客服
|
|
242
|
+
|
|
243
|
+
+ Email: `i@yanyiwu.com`
|
|
244
|
+
+ QQ: 64162451
|
|
245
|
+
+ WeChat: 
|
|
246
|
+
|
|
247
|
+
## 鸣谢
|
|
248
|
+
|
|
249
|
+
"结巴"中文分词作者: [SunJunyi](https://github.com/fxsjy)
|
|
250
|
+
|
|
251
|
+
## 许可证
|
|
252
|
+
|
|
253
|
+
[MIT](http://yanyiwu.mit-license.org)
|
|
254
|
+
|
|
255
|
+
## 作者
|
|
256
|
+
|
|
257
|
+
- [yanyiwu](yanyiwu.com)
|
|
258
|
+
- [aholic](https://github.com/aholic)
|
|
259
|
+
|
|
260
|
+
[GoJieba]:https://github.com/yanyiwu/gojieba
|
|
261
|
+
[CppJieba]:https://github.com/yanyiwu/cppjieba
|
|
262
|
+
[jannson]:https://github.com/jannson
|
|
263
|
+
[cppjiebapy]:https://github.com/jannson/cppjiebapy
|
|
264
|
+
[cppjiebapy_discussion]:https://github.com/yanyiwu/cppjieba/issues/1
|
|
265
|
+
[NodeJieba]:https://github.com/yanyiwu/nodejieba
|
|
266
|
+
[jiebaR]:https://github.com/qinwf/jiebaR
|
|
267
|
+
[simhash]:https://github.com/yanyiwu/simhash
|
|
268
|
+
[代码详解]:https://github.com/yanyiwu/cppjieba/wiki/CppJieba%E4%BB%A3%E7%A0%81%E8%AF%A6%E8%A7%A3
|
|
269
|
+
[issue25]:https://github.com/yanyiwu/cppjieba/issues/25
|
|
270
|
+
[exjieba]:https://github.com/falood/exjieba
|
|
271
|
+
[KeywordServer]:https://github.com/yanyiwu/keyword_server
|
|
272
|
+
[ngx_http_cppjieba_module]:https://github.com/yanyiwu/ngx_http_cppjieba_module
|
|
273
|
+
[dict.367W.utf8]:https://github.com/qinwf/BigDict
|
|
274
|
+
[cjieba]:http://github.com/yanyiwu/cjieba
|
|
275
|
+
[jieba_rb]:https://github.com/altkatz/jieba_rb
|
|
276
|
+
[iosjieba]:https://github.com/yanyiwu/iosjieba
|
|
277
|
+
[SqlJieba]:https://github.com/yanyiwu/sqljieba
|
|
278
|
+
[Jieba中文分词系列性能评测]:http://yanyiwu.com/work/2015/06/14/jieba-series-performance-test.html
|
|
279
|
+
[pg_jieba]:https://github.com/jaiminpan/pg_jieba
|
|
280
|
+
[gitbook-plugin-search-pro]:https://plugins.gitbook.com/plugin/search-pro
|
|
281
|
+
[cppjieba-server]:https://github.com/yanyiwu/cppjieba-server
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
[](https://bitdeli.com/free "Bitdeli Badge")
|
|
285
|
+
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# CppJieba [简体中文](README.md)
|
|
2
|
+
|
|
3
|
+
[](https://travis-ci.org/yanyiwu/cppjieba)
|
|
4
|
+
[](http://yanyiwu.com/)
|
|
5
|
+
[](https://github.com/yanyiwu/cppjieba)
|
|
6
|
+
[](http://yanyiwu.com/work/2015/06/14/jieba-series-performance-test.html)
|
|
7
|
+
[](http://yanyiwu.mit-license.org)
|
|
8
|
+
[](https://ci.appveyor.com/project/yanyiwu/cppjieba/branch/master)
|
|
9
|
+
|
|
10
|
+
[](https://github.com/yanyiwu/cppjieba)
|
|
11
|
+
|
|
12
|
+
## Introduction
|
|
13
|
+
|
|
14
|
+
The Jieba Chinese Word Segmentation Implemented By C++ .
|
|
15
|
+
|
|
16
|
+
## Usage
|
|
17
|
+
|
|
18
|
+
### Dependencies
|
|
19
|
+
|
|
20
|
+
+ `g++ (version >= 4.1 is recommended) or clang++`;
|
|
21
|
+
+ `cmake (version >= 2.6 is recommended)`;
|
|
22
|
+
|
|
23
|
+
### Download & Compile
|
|
24
|
+
|
|
25
|
+
```sh
|
|
26
|
+
git clone --depth=10 --branch=master git://github.com/yanyiwu/cppjieba.git
|
|
27
|
+
cd cppjieba
|
|
28
|
+
mkdir build
|
|
29
|
+
cd build
|
|
30
|
+
cmake ..
|
|
31
|
+
make
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Unit Testing
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
make test
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Demo
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
./demo
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Output:
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
[demo] Cut With HMM
|
|
50
|
+
他/来到/了/网易/杭研/大厦
|
|
51
|
+
[demo] Cut Without HMM
|
|
52
|
+
他/来到/了/网易/杭/研/大厦
|
|
53
|
+
我来到北京清华大学
|
|
54
|
+
[demo] CutAll
|
|
55
|
+
我/来到/北京/清华/清华大学/华大/大学
|
|
56
|
+
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
|
57
|
+
[demo] CutForSearch
|
|
58
|
+
小明/硕士/毕业/于/中国/科学/学院/科学院/中国科学院/计算/计算所/,/后/在/日本/京都/大学/日本京都大学/深造
|
|
59
|
+
[demo] Insert User Word
|
|
60
|
+
男默/女泪
|
|
61
|
+
男默女泪
|
|
62
|
+
[demo] CutForSearch Word With Offset
|
|
63
|
+
[{"word": "小明", "offset": 0}, {"word": "硕士", "offset": 6}, {"word": "毕业", "offset": 12}, {"word": "于", "offset": 18}, {"word": "中国", "offset": 21}, {"word": "科学", "offset": 27}, {"word": "学院", "offset": 30}, {"word": "科学院", "offset": 27}, {"word": "中国科学院", "offset": 21}, {"word": "计算", "offset": 36}, {"word": "计算所", "offset": 36}, {"word": ",", "offset": 45}, {"word": "后", "offset": 48}, {"word": "在", "offset": 51}, {"word": "日本", "offset": 54}, {"word": "京都", "offset": 60}, {"word": "大学", "offset": 66}, {"word": "日本京都大学", "offset": 54}, {"word": "深造", "offset": 72}]
|
|
64
|
+
[demo] Tagging
|
|
65
|
+
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
|
66
|
+
[我:r, 是:v, 拖拉机:n, 学院:n, 手扶拖拉机:n, 专业:n, 的:uj, 。:x, 不用:v, 多久:m, ,:x, 我:r, 就:d, 会:v, 升职:v, 加薪:nr, ,:x, 当上:t, CEO:eng, ,:x, 走上:v, 人生:n, 巅峰:n, 。:x]
|
|
67
|
+
[demo] Keyword Extraction
|
|
68
|
+
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
|
69
|
+
[{"word": "CEO", "offset": [93], "weight": 11.7392}, {"word": "升职", "offset": [72], "weight": 10.8562}, {"word": "加薪", "offset": [78], "weight": 10.6426}, {"word": "手扶拖拉机", "offset": [21], "weight": 10.0089}, {"word": "巅峰", "offset": [111], "weight": 9.49396}]
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Please see details in `test/demo.cpp`.
|
|
73
|
+
|
|
74
|
+
## Cases
|
|
75
|
+
|
|
76
|
+
+ [GoJieba]
|
|
77
|
+
+ [NodeJieba]
|
|
78
|
+
+ [simhash]
|
|
79
|
+
+ [exjieba]
|
|
80
|
+
+ [jiebaR]
|
|
81
|
+
+ [cjieba]
|
|
82
|
+
+ [jieba_rb]
|
|
83
|
+
+ [iosjieba]
|
|
84
|
+
+ [SqlJieba]
|
|
85
|
+
+ [pg_jieba]
|
|
86
|
+
+ [ngx_http_cppjieba_module]
|
|
87
|
+
+ [gitbook-plugin-search-pro]
|
|
88
|
+
+ [cppjieba-server]
|
|
89
|
+
|
|
90
|
+
## Contact
|
|
91
|
+
|
|
92
|
+
+ Email: `i@yanyiwu.com`
|
|
93
|
+
+ QQ: 64162451
|
|
94
|
+
+ WeChat: 
|
|
95
|
+
|
|
96
|
+
[GoJieba]:https://github.com/yanyiwu/gojieba
|
|
97
|
+
[CppJieba]:https://github.com/yanyiwu/cppjieba
|
|
98
|
+
[jannson]:https://github.com/jannson
|
|
99
|
+
[cppjiebapy]:https://github.com/jannson/cppjiebapy
|
|
100
|
+
[cppjiebapy_discussion]:https://github.com/yanyiwu/cppjieba/issues/1
|
|
101
|
+
[NodeJieba]:https://github.com/yanyiwu/nodejieba
|
|
102
|
+
[jiebaR]:https://github.com/qinwf/jiebaR
|
|
103
|
+
[simhash]:https://github.com/yanyiwu/simhash
|
|
104
|
+
[exjieba]:https://github.com/falood/exjieba
|
|
105
|
+
[cjieba]:http://github.com/yanyiwu/cjieba
|
|
106
|
+
[jieba_rb]:https://github.com/altkatz/jieba_rb
|
|
107
|
+
[iosjieba]:https://github.com/yanyiwu/iosjieba
|
|
108
|
+
[SqlJieba]:https://github.com/yanyiwu/sqljieba
|
|
109
|
+
[pg_jieba]:https://github.com/jaiminpan/pg_jieba
|
|
110
|
+
[gitbook-plugin-search-pro]:https://plugins.gitbook.com/plugin/search-pro
|
|
111
|
+
[cppjieba-server]:https://github.com/yanyiwu/cppjieba-server
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
os: Visual Studio 2015
|
|
2
|
+
|
|
3
|
+
platform: x64
|
|
4
|
+
|
|
5
|
+
# clone directory
|
|
6
|
+
clone_folder: c:\projects\cppjieba
|
|
7
|
+
|
|
8
|
+
# scripts to run before build
|
|
9
|
+
before_build:
|
|
10
|
+
- echo Running cmake...
|
|
11
|
+
- cd c:\projects\cppjieba
|
|
12
|
+
- cmake .
|
|
13
|
+
|
|
14
|
+
build:
|
|
15
|
+
project: ALL_BUILD.vcxproj # path to Visual Studio solution or project
|
|
16
|
+
|
|
17
|
+
# scripts to run after build
|
|
18
|
+
after_build:
|
|
19
|
+
- cd Debug
|
|
20
|
+
- demo.exe
|
|
21
|
+
- load_test.exe
|
|
22
|
+
- cd ..
|
|
23
|
+
- COPY .\test\Debug\test.run.exe .\test\test.run.exe
|
|
24
|
+
- cd test
|
|
25
|
+
- test.run.exe
|
|
26
|
+
- cd ..
|
|
27
|
+
- 7z a c:\projects\all.zip * -tzip
|
|
28
|
+
- cd c:\projects
|
|
29
|
+
|
|
30
|
+
artifacts:
|
|
31
|
+
- path: all.zip
|
|
32
|
+
name: all.zip
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ADD_SUBDIRECTORY(gtest)
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
// Copyright 2005, Google Inc.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
|
5
|
+
// modification, are permitted provided that the following conditions are
|
|
6
|
+
// met:
|
|
7
|
+
//
|
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
|
12
|
+
// in the documentation and/or other materials provided with the
|
|
13
|
+
// distribution.
|
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
|
16
|
+
// this software without specific prior written permission.
|
|
17
|
+
//
|
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29
|
+
//
|
|
30
|
+
// Author: wan@google.com (Zhanyong Wan)
|
|
31
|
+
//
|
|
32
|
+
// The Google C++ Testing Framework (Google Test)
|
|
33
|
+
//
|
|
34
|
+
// This header file defines the public API for death tests. It is
|
|
35
|
+
// #included by gtest.h so a user doesn't need to include this
|
|
36
|
+
// directly.
|
|
37
|
+
|
|
38
|
+
#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
|
|
39
|
+
#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
|
|
40
|
+
|
|
41
|
+
#include "gtest/internal/gtest-death-test-internal.h"
|
|
42
|
+
|
|
43
|
+
namespace testing {
|
|
44
|
+
|
|
45
|
+
// This flag controls the style of death tests. Valid values are "threadsafe",
|
|
46
|
+
// meaning that the death test child process will re-execute the test binary
|
|
47
|
+
// from the start, running only a single death test, or "fast",
|
|
48
|
+
// meaning that the child process will execute the test logic immediately
|
|
49
|
+
// after forking.
|
|
50
|
+
GTEST_DECLARE_string_(death_test_style);
|
|
51
|
+
|
|
52
|
+
#if GTEST_HAS_DEATH_TEST
|
|
53
|
+
|
|
54
|
+
// The following macros are useful for writing death tests.
|
|
55
|
+
|
|
56
|
+
// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
|
|
57
|
+
// executed:
|
|
58
|
+
//
|
|
59
|
+
// 1. It generates a warning if there is more than one active
|
|
60
|
+
// thread. This is because it's safe to fork() or clone() only
|
|
61
|
+
// when there is a single thread.
|
|
62
|
+
//
|
|
63
|
+
// 2. The parent process clone()s a sub-process and runs the death
|
|
64
|
+
// test in it; the sub-process exits with code 0 at the end of the
|
|
65
|
+
// death test, if it hasn't exited already.
|
|
66
|
+
//
|
|
67
|
+
// 3. The parent process waits for the sub-process to terminate.
|
|
68
|
+
//
|
|
69
|
+
// 4. The parent process checks the exit code and error message of
|
|
70
|
+
// the sub-process.
|
|
71
|
+
//
|
|
72
|
+
// Examples:
|
|
73
|
+
//
|
|
74
|
+
// ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
|
|
75
|
+
// for (int i = 0; i < 5; i++) {
|
|
76
|
+
// EXPECT_DEATH(server.ProcessRequest(i),
|
|
77
|
+
// "Invalid request .* in ProcessRequest()")
|
|
78
|
+
// << "Failed to die on request " << i);
|
|
79
|
+
// }
|
|
80
|
+
//
|
|
81
|
+
// ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
|
|
82
|
+
//
|
|
83
|
+
// bool KilledBySIGHUP(int exit_code) {
|
|
84
|
+
// return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
|
|
85
|
+
// }
|
|
86
|
+
//
|
|
87
|
+
// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
|
|
88
|
+
//
|
|
89
|
+
// On the regular expressions used in death tests:
|
|
90
|
+
//
|
|
91
|
+
// On POSIX-compliant systems (*nix), we use the <regex.h> library,
|
|
92
|
+
// which uses the POSIX extended regex syntax.
|
|
93
|
+
//
|
|
94
|
+
// On other platforms (e.g. Windows), we only support a simple regex
|
|
95
|
+
// syntax implemented as part of Google Test. This limited
|
|
96
|
+
// implementation should be enough most of the time when writing
|
|
97
|
+
// death tests; though it lacks many features you can find in PCRE
|
|
98
|
+
// or POSIX extended regex syntax. For example, we don't support
|
|
99
|
+
// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
|
|
100
|
+
// repetition count ("x{5,7}"), among others.
|
|
101
|
+
//
|
|
102
|
+
// Below is the syntax that we do support. We chose it to be a
|
|
103
|
+
// subset of both PCRE and POSIX extended regex, so it's easy to
|
|
104
|
+
// learn wherever you come from. In the following: 'A' denotes a
|
|
105
|
+
// literal character, period (.), or a single \\ escape sequence;
|
|
106
|
+
// 'x' and 'y' denote regular expressions; 'm' and 'n' are for
|
|
107
|
+
// natural numbers.
|
|
108
|
+
//
|
|
109
|
+
// c matches any literal character c
|
|
110
|
+
// \\d matches any decimal digit
|
|
111
|
+
// \\D matches any character that's not a decimal digit
|
|
112
|
+
// \\f matches \f
|
|
113
|
+
// \\n matches \n
|
|
114
|
+
// \\r matches \r
|
|
115
|
+
// \\s matches any ASCII whitespace, including \n
|
|
116
|
+
// \\S matches any character that's not a whitespace
|
|
117
|
+
// \\t matches \t
|
|
118
|
+
// \\v matches \v
|
|
119
|
+
// \\w matches any letter, _, or decimal digit
|
|
120
|
+
// \\W matches any character that \\w doesn't match
|
|
121
|
+
// \\c matches any literal character c, which must be a punctuation
|
|
122
|
+
// . matches any single character except \n
|
|
123
|
+
// A? matches 0 or 1 occurrences of A
|
|
124
|
+
// A* matches 0 or many occurrences of A
|
|
125
|
+
// A+ matches 1 or many occurrences of A
|
|
126
|
+
// ^ matches the beginning of a string (not that of each line)
|
|
127
|
+
// $ matches the end of a string (not that of each line)
|
|
128
|
+
// xy matches x followed by y
|
|
129
|
+
//
|
|
130
|
+
// If you accidentally use PCRE or POSIX extended regex features
|
|
131
|
+
// not implemented by us, you will get a run-time failure. In that
|
|
132
|
+
// case, please try to rewrite your regular expression within the
|
|
133
|
+
// above syntax.
|
|
134
|
+
//
|
|
135
|
+
// This implementation is *not* meant to be as highly tuned or robust
|
|
136
|
+
// as a compiled regex library, but should perform well enough for a
|
|
137
|
+
// death test, which already incurs significant overhead by launching
|
|
138
|
+
// a child process.
|
|
139
|
+
//
|
|
140
|
+
// Known caveats:
|
|
141
|
+
//
|
|
142
|
+
// A "threadsafe" style death test obtains the path to the test
|
|
143
|
+
// program from argv[0] and re-executes it in the sub-process. For
|
|
144
|
+
// simplicity, the current implementation doesn't search the PATH
|
|
145
|
+
// when launching the sub-process. This means that the user must
|
|
146
|
+
// invoke the test program via a path that contains at least one
|
|
147
|
+
// path separator (e.g. path/to/foo_test and
|
|
148
|
+
// /absolute/path/to/bar_test are fine, but foo_test is not). This
|
|
149
|
+
// is rarely a problem as people usually don't put the test binary
|
|
150
|
+
// directory in PATH.
|
|
151
|
+
//
|
|
152
|
+
// TODO(wan@google.com): make thread-safe death tests search the PATH.
|
|
153
|
+
|
|
154
|
+
// Asserts that a given statement causes the program to exit, with an
|
|
155
|
+
// integer exit status that satisfies predicate, and emitting error output
|
|
156
|
+
// that matches regex.
|
|
157
|
+
# define ASSERT_EXIT(statement, predicate, regex) \
|
|
158
|
+
GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)
|
|
159
|
+
|
|
160
|
+
// Like ASSERT_EXIT, but continues on to successive tests in the
|
|
161
|
+
// test case, if any:
|
|
162
|
+
# define EXPECT_EXIT(statement, predicate, regex) \
|
|
163
|
+
GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)
|
|
164
|
+
|
|
165
|
+
// Asserts that a given statement causes the program to exit, either by
|
|
166
|
+
// explicitly exiting with a nonzero exit code or being killed by a
|
|
167
|
+
// signal, and emitting error output that matches regex.
|
|
168
|
+
# define ASSERT_DEATH(statement, regex) \
|
|
169
|
+
ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
|
|
170
|
+
|
|
171
|
+
// Like ASSERT_DEATH, but continues on to successive tests in the
|
|
172
|
+
// test case, if any:
|
|
173
|
+
# define EXPECT_DEATH(statement, regex) \
|
|
174
|
+
EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
|
|
175
|
+
|
|
176
|
+
// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
|
|
177
|
+
|
|
178
|
+
// Tests that an exit code describes a normal exit with a given exit code.
|
|
179
|
+
class GTEST_API_ ExitedWithCode {
|
|
180
|
+
public:
|
|
181
|
+
explicit ExitedWithCode(int exit_code);
|
|
182
|
+
bool operator()(int exit_status) const;
|
|
183
|
+
private:
|
|
184
|
+
// No implementation - assignment is unsupported.
|
|
185
|
+
void operator=(const ExitedWithCode& other);
|
|
186
|
+
|
|
187
|
+
const int exit_code_;
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
# if !GTEST_OS_WINDOWS
|
|
191
|
+
// Tests that an exit code describes an exit due to termination by a
|
|
192
|
+
// given signal.
|
|
193
|
+
class GTEST_API_ KilledBySignal {
|
|
194
|
+
public:
|
|
195
|
+
explicit KilledBySignal(int signum);
|
|
196
|
+
bool operator()(int exit_status) const;
|
|
197
|
+
private:
|
|
198
|
+
const int signum_;
|
|
199
|
+
};
|
|
200
|
+
# endif // !GTEST_OS_WINDOWS
|
|
201
|
+
|
|
202
|
+
// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
|
|
203
|
+
// The death testing framework causes this to have interesting semantics,
|
|
204
|
+
// since the sideeffects of the call are only visible in opt mode, and not
|
|
205
|
+
// in debug mode.
|
|
206
|
+
//
|
|
207
|
+
// In practice, this can be used to test functions that utilize the
|
|
208
|
+
// LOG(DFATAL) macro using the following style:
|
|
209
|
+
//
|
|
210
|
+
// int DieInDebugOr12(int* sideeffect) {
|
|
211
|
+
// if (sideeffect) {
|
|
212
|
+
// *sideeffect = 12;
|
|
213
|
+
// }
|
|
214
|
+
// LOG(DFATAL) << "death";
|
|
215
|
+
// return 12;
|
|
216
|
+
// }
|
|
217
|
+
//
|
|
218
|
+
// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
|
|
219
|
+
// int sideeffect = 0;
|
|
220
|
+
// // Only asserts in dbg.
|
|
221
|
+
// EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
|
|
222
|
+
//
|
|
223
|
+
// #ifdef NDEBUG
|
|
224
|
+
// // opt-mode has sideeffect visible.
|
|
225
|
+
// EXPECT_EQ(12, sideeffect);
|
|
226
|
+
// #else
|
|
227
|
+
// // dbg-mode no visible sideeffect.
|
|
228
|
+
// EXPECT_EQ(0, sideeffect);
|
|
229
|
+
// #endif
|
|
230
|
+
// }
|
|
231
|
+
//
|
|
232
|
+
// This will assert that DieInDebugReturn12InOpt() crashes in debug
|
|
233
|
+
// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
|
|
234
|
+
// appropriate fallback value (12 in this case) in opt mode. If you
|
|
235
|
+
// need to test that a function has appropriate side-effects in opt
|
|
236
|
+
// mode, include assertions against the side-effects. A general
|
|
237
|
+
// pattern for this is:
|
|
238
|
+
//
|
|
239
|
+
// EXPECT_DEBUG_DEATH({
|
|
240
|
+
// // Side-effects here will have an effect after this statement in
|
|
241
|
+
// // opt mode, but none in debug mode.
|
|
242
|
+
// EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
|
|
243
|
+
// }, "death");
|
|
244
|
+
//
|
|
245
|
+
# ifdef NDEBUG
|
|
246
|
+
|
|
247
|
+
# define EXPECT_DEBUG_DEATH(statement, regex) \
|
|
248
|
+
do { statement; } while (::testing::internal::AlwaysFalse())
|
|
249
|
+
|
|
250
|
+
# define ASSERT_DEBUG_DEATH(statement, regex) \
|
|
251
|
+
do { statement; } while (::testing::internal::AlwaysFalse())
|
|
252
|
+
|
|
253
|
+
# else
|
|
254
|
+
|
|
255
|
+
# define EXPECT_DEBUG_DEATH(statement, regex) \
|
|
256
|
+
EXPECT_DEATH(statement, regex)
|
|
257
|
+
|
|
258
|
+
# define ASSERT_DEBUG_DEATH(statement, regex) \
|
|
259
|
+
ASSERT_DEATH(statement, regex)
|
|
260
|
+
|
|
261
|
+
# endif // NDEBUG for EXPECT_DEBUG_DEATH
|
|
262
|
+
#endif // GTEST_HAS_DEATH_TEST
|
|
263
|
+
|
|
264
|
+
// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
|
|
265
|
+
// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
|
|
266
|
+
// death tests are supported; otherwise they just issue a warning. This is
|
|
267
|
+
// useful when you are combining death test assertions with normal test
|
|
268
|
+
// assertions in one test.
|
|
269
|
+
#if GTEST_HAS_DEATH_TEST
|
|
270
|
+
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
|
|
271
|
+
EXPECT_DEATH(statement, regex)
|
|
272
|
+
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
|
|
273
|
+
ASSERT_DEATH(statement, regex)
|
|
274
|
+
#else
|
|
275
|
+
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
|
|
276
|
+
GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
|
|
277
|
+
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
|
|
278
|
+
GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
|
|
279
|
+
#endif
|
|
280
|
+
|
|
281
|
+
} // namespace testing
|
|
282
|
+
|
|
283
|
+
#endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
|