jieba_rb 0.0.2 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cf9ad1c59145d326f4edd1c639066e107b52c13a
4
- data.tar.gz: 4732514648268711284a8198a4a324889636112d
3
+ metadata.gz: 2aab0ba015d462b0ab6fc1ee344961660e20adeb
4
+ data.tar.gz: d15db1389073584381a83aad79967152949cda7c
5
5
  SHA512:
6
- metadata.gz: 8fdc938f12a9506eb46baef952fd6bd5a261803934673273ba33f8e57cccd614ea31725dc58262b1e6c24828db9f8787027f8e95f4e614504eb943cf75dadf04
7
- data.tar.gz: cd5d7df0f5f46cf4a7f8553d9f09cf05d0aeceb2f7b211a9238b9596aa43c3bd395425d2d278fc971bc3f224ac764b55cdfe88506c1b77a0ab8bcb927b206b48
6
+ metadata.gz: b63793a07432aaf9811cb0dc16c4a3e61b3c5930019cd417a0f8e86b310909a48974199bc8c731c0656c75c744bb37ebe734a1515967f6fb29d299a9fa977bdd
7
+ data.tar.gz: 1ec9fde21352676ba4be0e6a2cf99310dfec07c3933e23554e7b473d527dcea19ccc8d435d22328bcf1ced19860b1d9d141671221f1bbb9391961065f74d5761
@@ -2,10 +2,15 @@ language: ruby
2
2
  rvm:
3
3
  - 1.9.3
4
4
  - 2.0.0
5
- - 2.1.5
5
+ - 2.1.10
6
+ - 2.2.5
7
+ - 2.3.1
6
8
  - ruby-head
7
9
  - rbx-2
8
10
 
11
+ before_install:
12
+ - gem install bundler
13
+
9
14
  before_script: rake compile
10
15
 
11
16
  matrix:
data/README.md CHANGED
@@ -41,6 +41,22 @@ HMM or Max probability (mp) Segment mode:
41
41
  seg = JiebaRb::Segment.new mode: :hmm # or mode: :mp
42
42
  words = seg.cut "令狐冲是云计算行业的专家"
43
43
 
44
+ ## Word tagging Usage
45
+
46
+ Default tagging:
47
+
48
+ require 'jieba_rb'
49
+ tagging = JiebaRb::Tagging.new
50
+ pairs = tagging.tag "我是蓝翔技工拖拉机学院手扶拖拉机专业的。"
51
+ # [{"我"=>"r"}, {"是"=>"v"}, {"蓝翔"=>"x"}, {"技工"=>"n"}, {"拖拉机"=>"n"}, {"学院"=>"n"}, {"手扶拖拉机"=>"n"}, {"专业"=>"n"}, {"的"=>"uj"}, {"。"=>"x"}]
52
+
53
+ Tagging with user-defined dictionary:
54
+
55
+ require 'jieba_rb'
56
+ tagging = JiebaRb::Tagging.new user_dict: :default
57
+ pairs = tagging.tag "我是蓝翔技工拖拉机学院手扶拖拉机专业的。"
58
+ # [{"我"=>"r"}, {"是"=>"v"}, {"蓝翔"=>"nz"}, {"技工"=>"n"}, {"拖拉机"=>"n"}, {"学院"=>"n"}, {"手扶拖拉机"=>"n"}, {"专业"=>"n"}, {"的"=>"uj"}, {"。"=>"x"}]
59
+
44
60
  ## Keyword Extractor Usage
45
61
 
46
62
  * only support TF-IDF currently
@@ -7,4 +7,5 @@ void Init_jieba()
7
7
 
8
8
  Init_segment();
9
9
  Init_keyword();
10
+ Init_tagging();
10
11
  }
@@ -4,6 +4,7 @@
4
4
  #include <ruby.h>
5
5
  #include <segment.h>
6
6
  #include <keyword.h>
7
+ #include <tagging.h>
7
8
 
8
9
  extern VALUE mJieba;
9
10
 
@@ -0,0 +1,75 @@
1
+ #include "tagging.h"
2
+ #include <ruby/encoding.h>
3
+ #include <PosTagger.hpp>
4
+
5
+ static rb_encoding* u8_enc;
6
+
7
+ struct Tagging{
8
+ CppJieba::PosTagger * p;
9
+ };
10
+
11
+ static void tagger_free(void *p){
12
+ delete ((Tagging*) p) -> p;
13
+ delete (Tagging*)p;
14
+ }
15
+
16
+ static VALUE alloc(VALUE klass)
17
+ {
18
+ Tagging * tagging = new Tagging();
19
+ return Data_Wrap_Struct(klass, NULL, tagger_free, tagging);
20
+ }
21
+
22
+ static void init(VALUE self,
23
+ VALUE jieba_dict_rbs,
24
+ VALUE hmm_dict_rbs,
25
+ VALUE user_dict_rbs)
26
+ {
27
+ Tagging *tagging;
28
+ Data_Get_Struct(self, Tagging, tagging);
29
+
30
+ Check_Type(jieba_dict_rbs, T_STRING);
31
+ Check_Type(hmm_dict_rbs, T_STRING);
32
+ Check_Type(user_dict_rbs, T_STRING);
33
+
34
+ std::string jieba_dict = StringValueCStr(jieba_dict_rbs);
35
+ std::string hmm_dict = StringValueCStr(hmm_dict_rbs);
36
+ std::string user_dict = StringValueCStr(user_dict_rbs);
37
+
38
+ tagging->p = new CppJieba::PosTagger(jieba_dict, hmm_dict, user_dict);
39
+ }
40
+
41
+ static VALUE tag(VALUE self, VALUE text_rbs)
42
+ {
43
+ Check_Type(text_rbs, T_STRING);
44
+ std::string text = StringValueCStr(text_rbs);
45
+
46
+ Tagging *tagging;
47
+ Data_Get_Struct(self, Tagging, tagging);
48
+
49
+ std::vector<std::pair<std::string, std::string>> pairs;
50
+ tagging->p->tag(text, pairs);
51
+
52
+ volatile VALUE arr = rb_ary_new();
53
+ for (std::vector<std::pair<std::string, std::string>>::const_iterator j = pairs.begin(); j != pairs.end(); j++)
54
+ {
55
+ VALUE pair = rb_hash_new();
56
+ rb_hash_aset(pair, rb_enc_str_new(std::get<0>(*j).c_str(), std::get<0>(*j).length(), u8_enc), rb_enc_str_new(std::get<1>(*j).c_str(), std::get<1>(*j).length(), u8_enc));
57
+ rb_ary_push(arr, pair);
58
+
59
+ }
60
+ return arr;
61
+ }
62
+
63
+ #define DEF(k,n,f,c) rb_define_method(k,n,RUBY_METHOD_FUNC(f),c)
64
+
65
+ extern "C" {
66
+ void Init_tagging()
67
+ {
68
+ VALUE cTagging = rb_define_class_under(mJieba, "Tagging", rb_cObject);
69
+ u8_enc = rb_utf8_encoding();
70
+ rb_define_alloc_func(cTagging, alloc);
71
+ DEF(cTagging, "_init",init,3);
72
+ DEF(cTagging, "tag",tag,1);
73
+ }
74
+
75
+ }
@@ -0,0 +1,17 @@
1
+ #ifndef RUBY_JIEBA_TAGGING
2
+ #define RUBY_JIEBA_TAGGING
3
+
4
+ #include <jieba.h>
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ void Init_tagging();
11
+
12
+ #ifdef __cplusplus
13
+ }
14
+ #endif
15
+
16
+
17
+ #endif
@@ -46,10 +46,21 @@ module JiebaRb
46
46
  stop_words_path = opts[:stop_words] || DEFAULT_STOP_WORDS
47
47
 
48
48
  user_dict = opts[:user_dict] || ""
49
- user_dict = USER_DICT_FILE if user_dict == :default
49
+ user_dict = DEFAULT_USER_DICT if user_dict == :default
50
50
 
51
51
  _init mode, jieba_dict, hmm_dict, idf_path, stop_words_path, user_dict
52
52
  end
53
53
  end
54
54
 
55
+ class Tagging
56
+ private :_init
57
+ def initialize opts = {}
58
+ jieba_dict = opts[:jieba_dict] || DEFAULT_JIEBA_DICT
59
+ hmm_dict = opts[:hmm_dict] || DEFAULT_HMM_DICT
60
+ user_dict = opts[:user_dict] || ""
61
+ user_dict = DEFAULT_USER_DICT if user_dict == :default
62
+
63
+ _init jieba_dict, hmm_dict, user_dict
64
+ end
65
+ end
55
66
  end
@@ -1,3 +1,3 @@
1
1
  module JiebaRb
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -0,0 +1,17 @@
1
+ # coding: utf-8
2
+ require 'minitest/autorun'
3
+ require 'jieba_rb'
4
+ class JiebaTest < Minitest::Test
5
+ def test_tagging
6
+ tagging = JiebaRb::Tagging.new
7
+ pairs = tagging.tag "我是蓝翔技工拖拉机学院手扶拖拉机专业的。"
8
+ assert_equal [{"我"=>"r"}, {"是"=>"v"}, {"蓝翔"=>"x"}, {"技工"=>"n"}, {"拖拉机"=>"n"}, {"学院"=>"n"}, {"手扶拖拉机"=>"n"}, {"专业"=>"n"}, {"的"=>"uj"}, {"。"=>"x"}], pairs
9
+ end
10
+
11
+ def test_tagging_with_user_dict
12
+ tagging = JiebaRb::Tagging.new user_dict: :default
13
+ pairs = tagging.tag "我是蓝翔技工拖拉机学院手扶拖拉机专业的。"
14
+ assert_equal [{"我"=>"r"}, {"是"=>"v"}, {"蓝翔"=>"nz"}, {"技工"=>"n"}, {"拖拉机"=>"n"}, {"学院"=>"n"}, {"手扶拖拉机"=>"n"}, {"专业"=>"n"}, {"的"=>"uj"}, {"。"=>"x"}], pairs
15
+ end
16
+
17
+ end
metadata CHANGED
@@ -1,69 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jieba_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Li
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-24 00:00:00.000000000 Z
11
+ date: 2016-12-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.5'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.5'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rake-compiler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: minitest
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  description: cppjieba binding for ruby
@@ -74,9 +74,9 @@ extensions:
74
74
  - ext/jieba/extconf.rb
75
75
  extra_rdoc_files: []
76
76
  files:
77
- - .gitignore
78
- - .gitmodules
79
- - .travis.yml
77
+ - ".gitignore"
78
+ - ".gitmodules"
79
+ - ".travis.yml"
80
80
  - Gemfile
81
81
  - LICENSE.txt
82
82
  - README.md
@@ -215,11 +215,14 @@ files:
215
215
  - ext/jieba/keyword.h
216
216
  - ext/jieba/segment.cc
217
217
  - ext/jieba/segment.h
218
+ - ext/jieba/tagging.cc
219
+ - ext/jieba/tagging.h
218
220
  - jieba_rb.gemspec
219
221
  - lib/jieba_rb.rb
220
222
  - lib/jieba_rb/version.rb
221
223
  - test/test_keyword.rb
222
224
  - test/test_segment.rb
225
+ - test/test_tagging.rb
223
226
  homepage: https://github.com/altkatz/jieba_rb
224
227
  licenses:
225
228
  - MIT
@@ -230,21 +233,21 @@ require_paths:
230
233
  - lib
231
234
  required_ruby_version: !ruby/object:Gem::Requirement
232
235
  requirements:
233
- - - '>='
236
+ - - ">="
234
237
  - !ruby/object:Gem::Version
235
238
  version: 1.9.2
236
239
  required_rubygems_version: !ruby/object:Gem::Requirement
237
240
  requirements:
238
- - - '>='
241
+ - - ">="
239
242
  - !ruby/object:Gem::Version
240
243
  version: '0'
241
244
  requirements: []
242
245
  rubyforge_project:
243
- rubygems_version: 2.4.5
246
+ rubygems_version: 2.5.1
244
247
  signing_key:
245
248
  specification_version: 4
246
249
  summary: cppjieba binding for ruby
247
250
  test_files:
248
251
  - test/test_keyword.rb
249
252
  - test/test_segment.rb
250
- has_rdoc:
253
+ - test/test_tagging.rb