cppjieba_rb 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9761a257960334fdb5e3cad3dd80be9f669b7f65
4
- data.tar.gz: e62980a6590c4e2d141b5583af34737152dc4343
3
+ metadata.gz: 4ab9b4adf8e5ddccbe36570f3492b4ae438c7b6a
4
+ data.tar.gz: 72987214a678eaf3505f804bd7c9db86f94c6178
5
5
  SHA512:
6
- metadata.gz: b631d51e9ad4f79ef34baed3c2149a872b0a5e4a512c4d2bd8269c1df6d86d1a870e9824483855c881a2844250096bd2017ec34068af91b8629acc4910aa347d
7
- data.tar.gz: 29f1619beb0cca01ded498afd16d6fd081ca308d1d371b01998557fddf57ef870c31c5b51c0cda609f8e28a083a0ab945d08c98906153b11119f881946064acb
6
+ metadata.gz: 1cd2f4cdd703464e0672ea034ae5bc875d4754ce3e45d859b66d2cf65759e32bbca499221ef4ca70239bcf3606eafeee13b9c6723d5a626a8aa2bf06fdfc970e
7
+ data.tar.gz: 796eacaca074e602a080a94a2532e4f051e2b9e874ee6412cf45b683522afc36f2ba219be6a375c728ce6f3eb9b225c7fff783643cac3732dd4992bf263a1e4a
data/README.md CHANGED
@@ -4,6 +4,8 @@
4
4
 
5
5
  [![Build Status](https://travis-ci.org/fantasticfears/cppjieba_rb.png?branch=master)](https://travis-ci.org/fantasticfears/cppjieba_rb)
6
6
 
7
+ +[![Patreon](https://img.shields.io/badge/back_on-patreon-red.svg)](https://www.patreon.com/fantasticfears)
8
+
7
9
  Ruby bindings for [Cppjieba](https://github.com/yanyiwu/cppjieba). C++11 required. (gcc 4.8+)
8
10
 
9
11
  ## Installation
@@ -1,5 +1,10 @@
1
1
  #include <ruby.h>
2
2
  #include <ruby/encoding.h>
3
+
4
+ #include <string>
5
+ #include <iostream>
6
+ #include <unordered_set>
7
+
3
8
  #include "cppjieba/Jieba.hpp"
4
9
 
5
10
  #define GET_CPPJIEBA(_data) jieba_cpp_data* _data; \
@@ -7,6 +12,7 @@
7
12
 
8
13
  typedef struct {
9
14
  cppjieba::Jieba* jieba;
15
+ std::unordered_set<std::string>* stop_words;
10
16
  } jieba_cpp_data;
11
17
 
12
18
  // make compiler happy
@@ -26,6 +32,8 @@ static void jieba_cpp_free(void* _this)
26
32
  jieba_cpp_data* data = static_cast<jieba_cpp_data*>(_this);
27
33
  delete data->jieba;
28
34
  data->jieba = nullptr;
35
+ delete data->stop_words;
36
+ data->stop_words = nullptr;
29
37
  }
30
38
 
31
39
  static size_t jieba_cpp_memsize(const void* _)
@@ -61,6 +69,13 @@ VALUE internal_initialize(VALUE self,
61
69
  StringValueCStr(user_dict_path),
62
70
  StringValueCStr(idf_path),
63
71
  StringValueCStr(stop_word_path));
72
+ data->stop_words = new std::unordered_set<std::string>();
73
+ std::ifstream ifs(StringValueCStr(stop_word_path));
74
+ std::string line;
75
+ while (getline(ifs, line)) {
76
+ data->stop_words->insert(line);
77
+ }
78
+ assert(data->stop_words->size());
64
79
  }
65
80
 
66
81
  VALUE internal_extract_keyword(VALUE self, VALUE text_rbs, VALUE topN)
@@ -127,6 +142,16 @@ static VALUE internal_segment_tag(VALUE self, VALUE text_rbs)
127
142
  return result;
128
143
  }
129
144
 
145
+ static VALUE internal_stop_word(VALUE self, VALUE word)
146
+ {
147
+ std::string test(StringValueCStr(word));
148
+ GET_CPPJIEBA(data);
149
+ if (data->stop_words->find(test) != data->stop_words->end()) {
150
+ return Qtrue;
151
+ } else {
152
+ return Qfalse;
153
+ }
154
+ }
130
155
 
131
156
  void Init_internal()
132
157
  {
@@ -143,6 +168,7 @@ void Init_internal()
143
168
  rb_define_method(rb_cCppjiebaRb_Internal, "extract_keyword", (ruby_method*) &internal_extract_keyword, 2);
144
169
  rb_define_method(rb_cCppjiebaRb_Internal, "segment", (ruby_method*) &internal_segment, 4);
145
170
  rb_define_method(rb_cCppjiebaRb_Internal, "segment_tag", (ruby_method*) &internal_segment_tag, 1);
171
+ rb_define_method(rb_cCppjiebaRb_Internal, "stop_word?", (ruby_method*) &internal_stop_word, 1);
146
172
  }
147
173
 
148
174
  }
data/lib/cppjieba_rb.rb CHANGED
@@ -22,6 +22,10 @@ module CppjiebaRb
22
22
  internal.segment_tag(str)
23
23
  end
24
24
 
25
+ def self.filter_stop_word(arr)
26
+ arr.reject { |w| internal.stop_word?(w) }
27
+ end
28
+
25
29
  class << self
26
30
  def internal
27
31
  @backend ||= CppjiebaRb::Internal.new(DICT_PATH,
@@ -1,3 +1,3 @@
1
1
  module CppjiebaRb
2
- VERSION = '0.2.3'
2
+ VERSION = '0.3.0'
3
3
  end
@@ -0,0 +1,10 @@
1
+ # coding: utf-8
2
+ require 'minitest/autorun'
3
+ require 'cppjieba_rb'
4
+
5
+ class JiebaTest < Minitest::Test
6
+ def test_filter
7
+ words = CppjiebaRb.filter_stop_word %w(令狐冲 是 云计算 行业 的 专家)
8
+ assert_equal %w(令狐冲 云计算 行业 专家), words
9
+ end
10
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cppjieba_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erick Guan
@@ -213,6 +213,7 @@ files:
213
213
  - lib/cppjieba_rb/version.rb
214
214
  - test/test_keyword.rb
215
215
  - test/test_segment.rb
216
+ - test/test_stop_word_filter.rb
216
217
  - test/test_tagging.rb
217
218
  homepage: https://github.com/fantasticfears/cppjieba_rb
218
219
  licenses:
@@ -241,4 +242,5 @@ summary: cppjieba binding for ruby
241
242
  test_files:
242
243
  - test/test_keyword.rb
243
244
  - test/test_segment.rb
245
+ - test/test_stop_word_filter.rb
244
246
  - test/test_tagging.rb