cppjieba_rb 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/ext/cppjieba_rb/internal.cc +26 -0
- data/lib/cppjieba_rb.rb +4 -0
- data/lib/cppjieba_rb/version.rb +1 -1
- data/test/test_stop_word_filter.rb +10 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4ab9b4adf8e5ddccbe36570f3492b4ae438c7b6a
|
4
|
+
data.tar.gz: 72987214a678eaf3505f804bd7c9db86f94c6178
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cd2f4cdd703464e0672ea034ae5bc875d4754ce3e45d859b66d2cf65759e32bbca499221ef4ca70239bcf3606eafeee13b9c6723d5a626a8aa2bf06fdfc970e
|
7
|
+
data.tar.gz: 796eacaca074e602a080a94a2532e4f051e2b9e874ee6412cf45b683522afc36f2ba219be6a375c728ce6f3eb9b225c7fff783643cac3732dd4992bf263a1e4a
|
data/README.md
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
|
5
5
|
[](https://travis-ci.org/fantasticfears/cppjieba_rb)
|
6
6
|
|
7
|
+
+[](https://www.patreon.com/fantasticfears)
|
8
|
+
|
7
9
|
Ruby bindings for [Cppjieba](https://github.com/yanyiwu/cppjieba). C++11 required. (gcc 4.8+)
|
8
10
|
|
9
11
|
## Installation
|
data/ext/cppjieba_rb/internal.cc
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
#include <ruby.h>
|
2
2
|
#include <ruby/encoding.h>
|
3
|
+
|
4
|
+
#include <string>
|
5
|
+
#include <iostream>
|
6
|
+
#include <unordered_set>
|
7
|
+
|
3
8
|
#include "cppjieba/Jieba.hpp"
|
4
9
|
|
5
10
|
#define GET_CPPJIEBA(_data) jieba_cpp_data* _data; \
|
@@ -7,6 +12,7 @@
|
|
7
12
|
|
8
13
|
typedef struct {
|
9
14
|
cppjieba::Jieba* jieba;
|
15
|
+
std::unordered_set<std::string>* stop_words;
|
10
16
|
} jieba_cpp_data;
|
11
17
|
|
12
18
|
// make compiler happy
|
@@ -26,6 +32,8 @@ static void jieba_cpp_free(void* _this)
|
|
26
32
|
jieba_cpp_data* data = static_cast<jieba_cpp_data*>(_this);
|
27
33
|
delete data->jieba;
|
28
34
|
data->jieba = nullptr;
|
35
|
+
delete data->stop_words;
|
36
|
+
data->stop_words = nullptr;
|
29
37
|
}
|
30
38
|
|
31
39
|
static size_t jieba_cpp_memsize(const void* _)
|
@@ -61,6 +69,13 @@ VALUE internal_initialize(VALUE self,
|
|
61
69
|
StringValueCStr(user_dict_path),
|
62
70
|
StringValueCStr(idf_path),
|
63
71
|
StringValueCStr(stop_word_path));
|
72
|
+
data->stop_words = new std::unordered_set<std::string>();
|
73
|
+
std::ifstream ifs(StringValueCStr(stop_word_path));
|
74
|
+
std::string line;
|
75
|
+
while (getline(ifs, line)) {
|
76
|
+
data->stop_words->insert(line);
|
77
|
+
}
|
78
|
+
assert(data->stop_words->size());
|
64
79
|
}
|
65
80
|
|
66
81
|
VALUE internal_extract_keyword(VALUE self, VALUE text_rbs, VALUE topN)
|
@@ -127,6 +142,16 @@ static VALUE internal_segment_tag(VALUE self, VALUE text_rbs)
|
|
127
142
|
return result;
|
128
143
|
}
|
129
144
|
|
145
|
+
static VALUE internal_stop_word(VALUE self, VALUE word)
|
146
|
+
{
|
147
|
+
std::string test(StringValueCStr(word));
|
148
|
+
GET_CPPJIEBA(data);
|
149
|
+
if (data->stop_words->find(test) != data->stop_words->end()) {
|
150
|
+
return Qtrue;
|
151
|
+
} else {
|
152
|
+
return Qfalse;
|
153
|
+
}
|
154
|
+
}
|
130
155
|
|
131
156
|
void Init_internal()
|
132
157
|
{
|
@@ -143,6 +168,7 @@ void Init_internal()
|
|
143
168
|
rb_define_method(rb_cCppjiebaRb_Internal, "extract_keyword", (ruby_method*) &internal_extract_keyword, 2);
|
144
169
|
rb_define_method(rb_cCppjiebaRb_Internal, "segment", (ruby_method*) &internal_segment, 4);
|
145
170
|
rb_define_method(rb_cCppjiebaRb_Internal, "segment_tag", (ruby_method*) &internal_segment_tag, 1);
|
171
|
+
rb_define_method(rb_cCppjiebaRb_Internal, "stop_word?", (ruby_method*) &internal_stop_word, 1);
|
146
172
|
}
|
147
173
|
|
148
174
|
}
|
data/lib/cppjieba_rb.rb
CHANGED
data/lib/cppjieba_rb/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cppjieba_rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erick Guan
|
@@ -213,6 +213,7 @@ files:
|
|
213
213
|
- lib/cppjieba_rb/version.rb
|
214
214
|
- test/test_keyword.rb
|
215
215
|
- test/test_segment.rb
|
216
|
+
- test/test_stop_word_filter.rb
|
216
217
|
- test/test_tagging.rb
|
217
218
|
homepage: https://github.com/fantasticfears/cppjieba_rb
|
218
219
|
licenses:
|
@@ -241,4 +242,5 @@ summary: cppjieba binding for ruby
|
|
241
242
|
test_files:
|
242
243
|
- test/test_keyword.rb
|
243
244
|
- test/test_segment.rb
|
245
|
+
- test/test_stop_word_filter.rb
|
244
246
|
- test/test_tagging.rb
|