cppjieba_rb 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9761a257960334fdb5e3cad3dd80be9f669b7f65
4
- data.tar.gz: e62980a6590c4e2d141b5583af34737152dc4343
3
+ metadata.gz: 4ab9b4adf8e5ddccbe36570f3492b4ae438c7b6a
4
+ data.tar.gz: 72987214a678eaf3505f804bd7c9db86f94c6178
5
5
  SHA512:
6
- metadata.gz: b631d51e9ad4f79ef34baed3c2149a872b0a5e4a512c4d2bd8269c1df6d86d1a870e9824483855c881a2844250096bd2017ec34068af91b8629acc4910aa347d
7
- data.tar.gz: 29f1619beb0cca01ded498afd16d6fd081ca308d1d371b01998557fddf57ef870c31c5b51c0cda609f8e28a083a0ab945d08c98906153b11119f881946064acb
6
+ metadata.gz: 1cd2f4cdd703464e0672ea034ae5bc875d4754ce3e45d859b66d2cf65759e32bbca499221ef4ca70239bcf3606eafeee13b9c6723d5a626a8aa2bf06fdfc970e
7
+ data.tar.gz: 796eacaca074e602a080a94a2532e4f051e2b9e874ee6412cf45b683522afc36f2ba219be6a375c728ce6f3eb9b225c7fff783643cac3732dd4992bf263a1e4a
data/README.md CHANGED
@@ -4,6 +4,8 @@
4
4
 
5
5
  [![Build Status](https://travis-ci.org/fantasticfears/cppjieba_rb.png?branch=master)](https://travis-ci.org/fantasticfears/cppjieba_rb)
6
6
 
7
+ +[![Patreon](https://img.shields.io/badge/back_on-patreon-red.svg)](https://www.patreon.com/fantasticfears)
8
+
7
9
  Ruby bindings for [Cppjieba](https://github.com/yanyiwu/cppjieba). C++11 required. (gcc 4.8+)
8
10
 
9
11
  ## Installation
@@ -1,5 +1,10 @@
1
1
  #include <ruby.h>
2
2
  #include <ruby/encoding.h>
3
+
4
+ #include <string>
5
+ #include <iostream>
6
+ #include <unordered_set>
7
+
3
8
  #include "cppjieba/Jieba.hpp"
4
9
 
5
10
  #define GET_CPPJIEBA(_data) jieba_cpp_data* _data; \
@@ -7,6 +12,7 @@
7
12
 
8
13
  typedef struct {
9
14
  cppjieba::Jieba* jieba;
15
+ std::unordered_set<std::string>* stop_words;
10
16
  } jieba_cpp_data;
11
17
 
12
18
  // make compiler happy
@@ -26,6 +32,8 @@ static void jieba_cpp_free(void* _this)
26
32
  jieba_cpp_data* data = static_cast<jieba_cpp_data*>(_this);
27
33
  delete data->jieba;
28
34
  data->jieba = nullptr;
35
+ delete data->stop_words;
36
+ data->stop_words = nullptr;
29
37
  }
30
38
 
31
39
  static size_t jieba_cpp_memsize(const void* _)
@@ -61,6 +69,13 @@ VALUE internal_initialize(VALUE self,
61
69
  StringValueCStr(user_dict_path),
62
70
  StringValueCStr(idf_path),
63
71
  StringValueCStr(stop_word_path));
72
+ data->stop_words = new std::unordered_set<std::string>();
73
+ std::ifstream ifs(StringValueCStr(stop_word_path));
74
+ std::string line;
75
+ while (getline(ifs, line)) {
76
+ data->stop_words->insert(line);
77
+ }
78
+ assert(data->stop_words->size());
64
79
  }
65
80
 
66
81
  VALUE internal_extract_keyword(VALUE self, VALUE text_rbs, VALUE topN)
@@ -127,6 +142,16 @@ static VALUE internal_segment_tag(VALUE self, VALUE text_rbs)
127
142
  return result;
128
143
  }
129
144
 
145
+ static VALUE internal_stop_word(VALUE self, VALUE word)
146
+ {
147
+ std::string test(StringValueCStr(word));
148
+ GET_CPPJIEBA(data);
149
+ if (data->stop_words->find(test) != data->stop_words->end()) {
150
+ return Qtrue;
151
+ } else {
152
+ return Qfalse;
153
+ }
154
+ }
130
155
 
131
156
  void Init_internal()
132
157
  {
@@ -143,6 +168,7 @@ void Init_internal()
143
168
  rb_define_method(rb_cCppjiebaRb_Internal, "extract_keyword", (ruby_method*) &internal_extract_keyword, 2);
144
169
  rb_define_method(rb_cCppjiebaRb_Internal, "segment", (ruby_method*) &internal_segment, 4);
145
170
  rb_define_method(rb_cCppjiebaRb_Internal, "segment_tag", (ruby_method*) &internal_segment_tag, 1);
171
+ rb_define_method(rb_cCppjiebaRb_Internal, "stop_word?", (ruby_method*) &internal_stop_word, 1);
146
172
  }
147
173
 
148
174
  }
data/lib/cppjieba_rb.rb CHANGED
@@ -22,6 +22,10 @@ module CppjiebaRb
22
22
  internal.segment_tag(str)
23
23
  end
24
24
 
25
+ def self.filter_stop_word(arr)
26
+ arr.reject { |w| internal.stop_word?(w) }
27
+ end
28
+
25
29
  class << self
26
30
  def internal
27
31
  @backend ||= CppjiebaRb::Internal.new(DICT_PATH,
@@ -1,3 +1,3 @@
1
1
  module CppjiebaRb
2
- VERSION = '0.2.3'
2
+ VERSION = '0.3.0'
3
3
  end
@@ -0,0 +1,10 @@
1
+ # coding: utf-8
2
+ require 'minitest/autorun'
3
+ require 'cppjieba_rb'
4
+
5
+ class JiebaTest < Minitest::Test
6
+ def test_filter
7
+ words = CppjiebaRb.filter_stop_word %w(令狐冲 是 云计算 行业 的 专家)
8
+ assert_equal %w(令狐冲 云计算 行业 专家), words
9
+ end
10
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cppjieba_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erick Guan
@@ -213,6 +213,7 @@ files:
213
213
  - lib/cppjieba_rb/version.rb
214
214
  - test/test_keyword.rb
215
215
  - test/test_segment.rb
216
+ - test/test_stop_word_filter.rb
216
217
  - test/test_tagging.rb
217
218
  homepage: https://github.com/fantasticfears/cppjieba_rb
218
219
  licenses:
@@ -241,4 +242,5 @@ summary: cppjieba binding for ruby
241
242
  test_files:
242
243
  - test/test_keyword.rb
243
244
  - test/test_segment.rb
245
+ - test/test_stop_word_filter.rb
244
246
  - test/test_tagging.rb