cppjieba_rb 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/ext/cppjieba_rb/internal.cc +26 -0
- data/lib/cppjieba_rb.rb +4 -0
- data/lib/cppjieba_rb/version.rb +1 -1
- data/test/test_stop_word_filter.rb +10 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4ab9b4adf8e5ddccbe36570f3492b4ae438c7b6a
|
4
|
+
data.tar.gz: 72987214a678eaf3505f804bd7c9db86f94c6178
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cd2f4cdd703464e0672ea034ae5bc875d4754ce3e45d859b66d2cf65759e32bbca499221ef4ca70239bcf3606eafeee13b9c6723d5a626a8aa2bf06fdfc970e
|
7
|
+
data.tar.gz: 796eacaca074e602a080a94a2532e4f051e2b9e874ee6412cf45b683522afc36f2ba219be6a375c728ce6f3eb9b225c7fff783643cac3732dd4992bf263a1e4a
|
data/README.md
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
|
5
5
|
[![Build Status](https://travis-ci.org/fantasticfears/cppjieba_rb.png?branch=master)](https://travis-ci.org/fantasticfears/cppjieba_rb)
|
6
6
|
|
7
|
+
+[![Patreon](https://img.shields.io/badge/back_on-patreon-red.svg)](https://www.patreon.com/fantasticfears)
|
8
|
+
|
7
9
|
Ruby bindings for [Cppjieba](https://github.com/yanyiwu/cppjieba). C++11 required. (gcc 4.8+)
|
8
10
|
|
9
11
|
## Installation
|
data/ext/cppjieba_rb/internal.cc
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
#include <ruby.h>
|
2
2
|
#include <ruby/encoding.h>
|
3
|
+
|
4
|
+
#include <string>
|
5
|
+
#include <iostream>
|
6
|
+
#include <unordered_set>
|
7
|
+
|
3
8
|
#include "cppjieba/Jieba.hpp"
|
4
9
|
|
5
10
|
#define GET_CPPJIEBA(_data) jieba_cpp_data* _data; \
|
@@ -7,6 +12,7 @@
|
|
7
12
|
|
8
13
|
typedef struct {
|
9
14
|
cppjieba::Jieba* jieba;
|
15
|
+
std::unordered_set<std::string>* stop_words;
|
10
16
|
} jieba_cpp_data;
|
11
17
|
|
12
18
|
// make compiler happy
|
@@ -26,6 +32,8 @@ static void jieba_cpp_free(void* _this)
|
|
26
32
|
jieba_cpp_data* data = static_cast<jieba_cpp_data*>(_this);
|
27
33
|
delete data->jieba;
|
28
34
|
data->jieba = nullptr;
|
35
|
+
delete data->stop_words;
|
36
|
+
data->stop_words = nullptr;
|
29
37
|
}
|
30
38
|
|
31
39
|
static size_t jieba_cpp_memsize(const void* _)
|
@@ -61,6 +69,13 @@ VALUE internal_initialize(VALUE self,
|
|
61
69
|
StringValueCStr(user_dict_path),
|
62
70
|
StringValueCStr(idf_path),
|
63
71
|
StringValueCStr(stop_word_path));
|
72
|
+
data->stop_words = new std::unordered_set<std::string>();
|
73
|
+
std::ifstream ifs(StringValueCStr(stop_word_path));
|
74
|
+
std::string line;
|
75
|
+
while (getline(ifs, line)) {
|
76
|
+
data->stop_words->insert(line);
|
77
|
+
}
|
78
|
+
assert(data->stop_words->size());
|
64
79
|
}
|
65
80
|
|
66
81
|
VALUE internal_extract_keyword(VALUE self, VALUE text_rbs, VALUE topN)
|
@@ -127,6 +142,16 @@ static VALUE internal_segment_tag(VALUE self, VALUE text_rbs)
|
|
127
142
|
return result;
|
128
143
|
}
|
129
144
|
|
145
|
+
static VALUE internal_stop_word(VALUE self, VALUE word)
|
146
|
+
{
|
147
|
+
std::string test(StringValueCStr(word));
|
148
|
+
GET_CPPJIEBA(data);
|
149
|
+
if (data->stop_words->find(test) != data->stop_words->end()) {
|
150
|
+
return Qtrue;
|
151
|
+
} else {
|
152
|
+
return Qfalse;
|
153
|
+
}
|
154
|
+
}
|
130
155
|
|
131
156
|
void Init_internal()
|
132
157
|
{
|
@@ -143,6 +168,7 @@ void Init_internal()
|
|
143
168
|
rb_define_method(rb_cCppjiebaRb_Internal, "extract_keyword", (ruby_method*) &internal_extract_keyword, 2);
|
144
169
|
rb_define_method(rb_cCppjiebaRb_Internal, "segment", (ruby_method*) &internal_segment, 4);
|
145
170
|
rb_define_method(rb_cCppjiebaRb_Internal, "segment_tag", (ruby_method*) &internal_segment_tag, 1);
|
171
|
+
rb_define_method(rb_cCppjiebaRb_Internal, "stop_word?", (ruby_method*) &internal_stop_word, 1);
|
146
172
|
}
|
147
173
|
|
148
174
|
}
|
data/lib/cppjieba_rb.rb
CHANGED
data/lib/cppjieba_rb/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cppjieba_rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erick Guan
|
@@ -213,6 +213,7 @@ files:
|
|
213
213
|
- lib/cppjieba_rb/version.rb
|
214
214
|
- test/test_keyword.rb
|
215
215
|
- test/test_segment.rb
|
216
|
+
- test/test_stop_word_filter.rb
|
216
217
|
- test/test_tagging.rb
|
217
218
|
homepage: https://github.com/fantasticfears/cppjieba_rb
|
218
219
|
licenses:
|
@@ -241,4 +242,5 @@ summary: cppjieba binding for ruby
|
|
241
242
|
test_files:
|
242
243
|
- test/test_keyword.rb
|
243
244
|
- test/test_segment.rb
|
245
|
+
- test/test_stop_word_filter.rb
|
244
246
|
- test/test_tagging.rb
|