cppjieba_rb 0.3.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +3 -0
- data/README.md +1 -1
- data/Rakefile +2 -2
- data/cppjieba_rb.gemspec +5 -5
- data/ext/cppjieba/ChangeLog.md +13 -0
- data/ext/cppjieba/README.md +14 -17
- data/ext/cppjieba/README_EN.md +2 -0
- data/ext/cppjieba/deps/limonp/StdExtension.hpp +1 -3
- data/ext/cppjieba/deps/limonp/StringUtil.hpp +1 -1
- data/ext/cppjieba/include/cppjieba/FullSegment.hpp +1 -1
- data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +0 -1
- data/ext/cppjieba/include/cppjieba/Unicode.hpp +2 -2
- data/ext/cppjieba_rb/cppjieba_rb.c +1 -0
- data/ext/cppjieba_rb/internal.cc +1 -1
- data/lib/cppjieba_rb/version.rb +1 -1
- metadata +24 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0307422c11b4c5b703e3017990e706e242e56f062eca887a0445e3b5b3c20bcc
|
4
|
+
data.tar.gz: 2b0fb44719d6a0614ceadb99bfd7a49b7a814dc81acd2900897230f6167e7ec1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b642ca8a5afc99e45fb18bcafd06bf4d24a6ea6c5b7e5b5dffa2a76e61418e0f170cf7a8a61c911c8ab035ca71f810100eafbdf7390347d0466f9f7b39f62d99
|
7
|
+
data.tar.gz: dcbf422afc0d0c85e5db5ed25c90dedff46767d9a6f3c52250f256584ef082437718da0c921c1291e2c1623944529ebd11c80b8e6247c84ee27b2febffaea8ef
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/cppjieba_rb.svg)](http://badge.fury.io/rb/cppjieba_rb)
|
4
4
|
|
5
|
-
[![Build Status](https://travis-ci.
|
5
|
+
[![Build Status](https://travis-ci.com/erickguan/cppjieba_rb.svg?branch=master)](https://travis-ci.com/erickguan/cppjieba_rb)
|
6
6
|
|
7
7
|
[![Patreon](https://img.shields.io/badge/back_on-patreon-red.svg)](https://www.patreon.com/fantasticfears)
|
8
8
|
|
data/Rakefile
CHANGED
@@ -2,8 +2,8 @@ require "bundler/gem_tasks"
|
|
2
2
|
require 'rake/testtask'
|
3
3
|
require 'rake/extensiontask'
|
4
4
|
|
5
|
-
gem = Gem::Specification.load(File.dirname(__FILE__) + '/
|
6
|
-
Rake::ExtensionTask.new(
|
5
|
+
gem = Gem::Specification.load(File.dirname(__FILE__) + '/cppjieba_rb.gemspec')
|
6
|
+
Rake::ExtensionTask.new("cppjieba_rb", gem) do |ext|
|
7
7
|
ext.lib_dir = File.join('lib', 'cppjieba_rb')
|
8
8
|
end
|
9
9
|
|
data/cppjieba_rb.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ['Erick Guan']
|
10
10
|
spec.email = ['fantasticfears@gmail.com']
|
11
11
|
spec.summary = 'cppjieba binding for ruby'
|
12
|
-
spec.description = 'cppjieba binding for ruby'
|
12
|
+
spec.description = 'cppjieba binding for ruby. Mainly used by Discourse.'
|
13
13
|
spec.homepage = 'https://github.com/fantasticfears/cppjieba_rb'
|
14
14
|
spec.required_ruby_version = '>=2.3.0'
|
15
15
|
spec.license = 'MIT'
|
@@ -43,8 +43,8 @@ Gem::Specification.new do |spec|
|
|
43
43
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
44
44
|
spec.require_paths = ['lib']
|
45
45
|
|
46
|
-
spec.add_development_dependency 'bundler', '~>
|
47
|
-
spec.add_development_dependency 'rake'
|
48
|
-
spec.add_development_dependency 'rake-compiler'
|
49
|
-
spec.add_development_dependency 'minitest'
|
46
|
+
spec.add_development_dependency 'bundler', '~> 2.2', '>= 2.2.10'
|
47
|
+
spec.add_development_dependency 'rake', '~> 13'
|
48
|
+
spec.add_development_dependency 'rake-compiler', '~> 1.1'
|
49
|
+
spec.add_development_dependency 'minitest', '~> 5.14'
|
50
50
|
end
|
data/ext/cppjieba/ChangeLog.md
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
# CppJieba ChangeLog
|
2
2
|
|
3
|
+
## v5.0.3
|
4
|
+
|
5
|
+
+ Upgrade [limonp](https://github.com/yanyiwu/limonp) -> v0.6.3
|
6
|
+
|
7
|
+
## v5.0.2
|
8
|
+
|
9
|
+
+ Upgrade [limonp](https://github.com/yanyiwu/limonp) -> v0.6.1
|
10
|
+
|
11
|
+
## v5.0.1
|
12
|
+
|
13
|
+
+ Make Compiler Happier.
|
14
|
+
+ Add PHP, DLang Links.
|
15
|
+
|
3
16
|
## v5.0.0
|
4
17
|
|
5
18
|
+ Notice(**api changed**) : Jieba class 3 arguments -> 5 arguments, and use KeywordExtractor in Jieba
|
data/ext/cppjieba/README.md
CHANGED
@@ -2,12 +2,15 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://travis-ci.org/yanyiwu/cppjieba.png?branch=master)](https://travis-ci.org/yanyiwu/cppjieba)
|
4
4
|
[![Author](https://img.shields.io/badge/author-@yanyiwu-blue.svg?style=flat)](http://yanyiwu.com/)
|
5
|
+
[![Donate](https://img.shields.io/badge/donate-eos_gitdeveloper-orange.svg)](https://eosflare.io/account/gitdeveloper)
|
5
6
|
[![Platform](https://img.shields.io/badge/platform-Linux,%20OS%20X,%20Windows-green.svg?style=flat)](https://github.com/yanyiwu/cppjieba)
|
6
7
|
[![Performance](https://img.shields.io/badge/performance-excellent-brightgreen.svg?style=flat)](http://yanyiwu.com/work/2015/06/14/jieba-series-performance-test.html)
|
8
|
+
[![Tag](https://img.shields.io/github/v/tag/yanyiwu/cppjieba.svg)](https://github.com/yanyiwu/cppjieba/releases)
|
7
9
|
[![License](https://img.shields.io/badge/license-MIT-yellow.svg?style=flat)](http://yanyiwu.mit-license.org)
|
8
10
|
[![Build status](https://ci.appveyor.com/api/projects/status/wl30fjnm2rhft6ta/branch/master?svg=true)](https://ci.appveyor.com/project/yanyiwu/cppjieba/branch/master)
|
9
11
|
|
10
|
-
|
12
|
+
|
13
|
+
[![logo](http://images.yanyiwu.com/CppJiebaLogo-v1.png)](https://github.com/yanyiwu/cppjieba)
|
11
14
|
|
12
15
|
## 简介
|
13
16
|
|
@@ -223,6 +226,7 @@ Query方法先使用Mix方法切词,对于切出来的较长的词再使用Ful
|
|
223
226
|
+ [iosjieba] iOS 版本的结巴分词。
|
224
227
|
+ [SqlJieba] MySQL 全文索引的结巴中文分词插件。
|
225
228
|
+ [pg_jieba] PostgreSQL 数据库的分词插件。
|
229
|
+
+ [simple] SQLite3 FTS5 数据库的分词插件。
|
226
230
|
+ [gitbook-plugin-search-pro] 支持中文搜索的 gitbook 插件。
|
227
231
|
+ [ngx_http_cppjieba_module] Nginx 分词插件。
|
228
232
|
+ [cppjiebapy] 由 [jannson] 开发的供 python 模块调用的项目 [cppjiebapy], 相关讨论 [cppjiebapy_discussion] .
|
@@ -231,6 +235,7 @@ Query方法先使用Mix方法切词,对于切出来的较长的词再使用Ful
|
|
231
235
|
+ [cppjieba-server] CppJieba HTTP 服务器。
|
232
236
|
+ [phpjieba] php版本的结巴分词扩展。
|
233
237
|
+ [perl5-jieba] Perl版本的结巴分词扩展。
|
238
|
+
+ [jieba-dlang] D 语言的结巴分词 Deimos Bindings。
|
234
239
|
|
235
240
|
## 线上演示
|
236
241
|
|
@@ -241,24 +246,16 @@ Query方法先使用Mix方法切词,对于切出来的较长的词再使用Ful
|
|
241
246
|
|
242
247
|
[Jieba中文分词系列性能评测]
|
243
248
|
|
244
|
-
##
|
245
|
-
|
246
|
-
+ Email: `i@yanyiwu.com`
|
247
|
-
+ QQ: 64162451
|
248
|
-
+ WeChat: ![image](http://7viirv.com1.z0.glb.clouddn.com/5a7d1b5c0d_yanyiwu_personal_qrcodes.jpg)
|
249
|
-
|
250
|
-
## 鸣谢
|
249
|
+
## Sponsorship
|
251
250
|
|
252
|
-
|
251
|
+
[![sponsorship](http://images.gitads.io/cppjieba)](https://tracking.gitads.io/?campaign=gitads&repo=cppjieba&redirect=gitads.io)
|
253
252
|
|
254
|
-
##
|
253
|
+
## Contributors
|
255
254
|
|
256
|
-
|
255
|
+
### Code Contributors
|
257
256
|
|
258
|
-
|
259
|
-
|
260
|
-
- [yanyiwu](yanyiwu.com)
|
261
|
-
- [aholic](https://github.com/aholic)
|
257
|
+
This project exists thanks to all the people who contribute.
|
258
|
+
<a href="https://github.com/yanyiwu/cppjieba/graphs/contributors"><img src="https://opencollective.com/cppjieba/contributors.svg?width=890&button=false" /></a>
|
262
259
|
|
263
260
|
[GoJieba]:https://github.com/yanyiwu/gojieba
|
264
261
|
[CppJieba]:https://github.com/yanyiwu/cppjieba
|
@@ -286,7 +283,7 @@ Query方法先使用Mix方法切词,对于切出来的较长的词再使用Ful
|
|
286
283
|
[cppjieba-server]:https://github.com/yanyiwu/cppjieba-server
|
287
284
|
[phpjieba]:https://github.com/jonnywang/phpjieba
|
288
285
|
[perl5-jieba]:https://metacpan.org/pod/distribution/Lingua-ZH-Jieba/lib/Lingua/ZH/Jieba.pod
|
286
|
+
[jieba-dlang]:https://github.com/shove70/jieba
|
287
|
+
[simple]:https://github.com/wangfenjin/simple
|
289
288
|
|
290
289
|
|
291
|
-
[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/yanyiwu/cppjieba/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
|
292
|
-
|
data/ext/cppjieba/README_EN.md
CHANGED
@@ -87,6 +87,7 @@ Please see details in `test/demo.cpp`.
|
|
87
87
|
+ [gitbook-plugin-search-pro]
|
88
88
|
+ [cppjieba-server]
|
89
89
|
+ [perl5-jieba]
|
90
|
+
+ [jieba-dlang]
|
90
91
|
|
91
92
|
## Contact
|
92
93
|
|
@@ -111,3 +112,4 @@ Please see details in `test/demo.cpp`.
|
|
111
112
|
[gitbook-plugin-search-pro]:https://plugins.gitbook.com/plugin/search-pro
|
112
113
|
[cppjieba-server]:https://github.com/yanyiwu/cppjieba-server
|
113
114
|
[perl5-jieba]:https://metacpan.org/pod/distribution/Lingua-ZH-Jieba/lib/Lingua/ZH/Jieba.pod
|
115
|
+
[jieba-dlang]:https://github.com/shove70/jieba
|
@@ -6,7 +6,7 @@
|
|
6
6
|
#ifdef __APPLE__
|
7
7
|
#include <unordered_map>
|
8
8
|
#include <unordered_set>
|
9
|
-
#elif(__cplusplus
|
9
|
+
#elif(__cplusplus >= 201103L)
|
10
10
|
#include <unordered_map>
|
11
11
|
#include <unordered_set>
|
12
12
|
#elif defined _MSC_VER
|
@@ -29,8 +29,6 @@ using std::tr1::unordered_set;
|
|
29
29
|
#include <fstream>
|
30
30
|
#include <sstream>
|
31
31
|
|
32
|
-
#define print(x) std::cout << x << std::endl
|
33
|
-
|
34
32
|
namespace std {
|
35
33
|
|
36
34
|
template<typename T>
|
@@ -80,7 +80,7 @@ inline string& Lower(string& str) {
|
|
80
80
|
|
81
81
|
inline bool IsSpace(unsigned c) {
|
82
82
|
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
|
83
|
-
return c > 0xff ? false : std::isspace(c & 0xff);
|
83
|
+
return c > 0xff ? false : std::isspace(c & 0xff) != 0;
|
84
84
|
}
|
85
85
|
|
86
86
|
inline std::string& LTrim(std::string &s) {
|
@@ -48,7 +48,7 @@ class FullSegment: public SegmentBase {
|
|
48
48
|
void Cut(RuneStrArray::const_iterator begin,
|
49
49
|
RuneStrArray::const_iterator end,
|
50
50
|
vector<WordRange>& res) const {
|
51
|
-
//
|
51
|
+
// result of searching in trie tree
|
52
52
|
LocalVector<pair<size_t, const DictUnit*> > tRes;
|
53
53
|
|
54
54
|
// max index of res's words
|
@@ -38,10 +38,10 @@ struct RuneStr {
|
|
38
38
|
uint32_t len;
|
39
39
|
uint32_t unicode_offset;
|
40
40
|
uint32_t unicode_length;
|
41
|
-
RuneStr(): rune(0), offset(0), len(0) {
|
41
|
+
RuneStr(): rune(0), offset(0), len(0), unicode_offset(0), unicode_length(0) {
|
42
42
|
}
|
43
43
|
RuneStr(Rune r, uint32_t o, uint32_t l)
|
44
|
-
: rune(r), offset(o), len(l) {
|
44
|
+
: rune(r), offset(o), len(l), unicode_offset(0), unicode_length(0) {
|
45
45
|
}
|
46
46
|
RuneStr(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
|
47
47
|
: rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
|
data/ext/cppjieba_rb/internal.cc
CHANGED
@@ -163,7 +163,7 @@ void Init_internal()
|
|
163
163
|
rb_sFull = rb_intern("full");
|
164
164
|
u8_enc = rb_utf8_encoding();
|
165
165
|
|
166
|
-
rb_cCppjiebaRb_Internal = rb_define_class_under(rb_mCppjiebaRb, "Internal",
|
166
|
+
rb_cCppjiebaRb_Internal = rb_define_class_under(rb_mCppjiebaRb, "Internal", rb_cObject);
|
167
167
|
rb_define_alloc_func(rb_cCppjiebaRb_Internal, internal_alloc);
|
168
168
|
rb_define_method(rb_cCppjiebaRb_Internal, "initialize", (ruby_method*) &internal_initialize, 5);
|
169
169
|
rb_define_method(rb_cCppjiebaRb_Internal, "extract_keyword", (ruby_method*) &internal_extract_keyword, 2);
|
data/lib/cppjieba_rb/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cppjieba_rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erick Guan
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,57 +16,63 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '2.2'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.2.10
|
20
23
|
type: :development
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - "~>"
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
29
|
+
version: '2.2'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.2.10
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: rake
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
|
-
- - "
|
37
|
+
- - "~>"
|
32
38
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
39
|
+
version: '13'
|
34
40
|
type: :development
|
35
41
|
prerelease: false
|
36
42
|
version_requirements: !ruby/object:Gem::Requirement
|
37
43
|
requirements:
|
38
|
-
- - "
|
44
|
+
- - "~>"
|
39
45
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
46
|
+
version: '13'
|
41
47
|
- !ruby/object:Gem::Dependency
|
42
48
|
name: rake-compiler
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
44
50
|
requirements:
|
45
|
-
- - "
|
51
|
+
- - "~>"
|
46
52
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
53
|
+
version: '1.1'
|
48
54
|
type: :development
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
51
57
|
requirements:
|
52
|
-
- - "
|
58
|
+
- - "~>"
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
60
|
+
version: '1.1'
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: minitest
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
58
64
|
requirements:
|
59
|
-
- - "
|
65
|
+
- - "~>"
|
60
66
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
67
|
+
version: '5.14'
|
62
68
|
type: :development
|
63
69
|
prerelease: false
|
64
70
|
version_requirements: !ruby/object:Gem::Requirement
|
65
71
|
requirements:
|
66
|
-
- - "
|
72
|
+
- - "~>"
|
67
73
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
69
|
-
description: cppjieba binding for ruby
|
74
|
+
version: '5.14'
|
75
|
+
description: cppjieba binding for ruby. Mainly used by Discourse.
|
70
76
|
email:
|
71
77
|
- fantasticfears@gmail.com
|
72
78
|
executables: []
|
@@ -234,8 +240,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
234
240
|
- !ruby/object:Gem::Version
|
235
241
|
version: '0'
|
236
242
|
requirements: []
|
237
|
-
|
238
|
-
rubygems_version: 2.6.14
|
243
|
+
rubygems_version: 3.0.3
|
239
244
|
signing_key:
|
240
245
|
specification_version: 4
|
241
246
|
summary: cppjieba binding for ruby
|