nameko 0.1.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ea033cbecf52f5f7f98326e16ea8877b3ed5d71ba5910ac28bfc4fd74bc5a77
4
- data.tar.gz: c824bc650de4469337ea3c8c12facb7c38240bbb718022a9d8444fe9ca0a8e2f
3
+ metadata.gz: aa8bfa40183c62b39a9b92361fe6b39997d2ee619d1b7af219775874836fb8d1
4
+ data.tar.gz: cde44325f5429beba2e90aa9bb31ccbe8685dfd2524e988c70dd5f9129b48751
5
5
  SHA512:
6
- metadata.gz: 9048e0b1021b73e58da3a9931ab28c0853c45982d37433b1039b0d15c59743be04b4de9619291b77800d66bedd8c4246a490d088005293424d2d71f600912b47
7
- data.tar.gz: 71b54f6dfa658f2e759a94401e6f97624a44619f1748e0cbf42712ed66574bb6c0be9ae829bdf52326e0d55e70119f9f8cfb53683efd9ba2a7182e7dfe8fa90d
6
+ metadata.gz: a946d2295725ee8bc0ffd488254ad8edfac1435dcab7a419c1e7869d5c82cedb7b4b7bc7f919163ff595593ce71fa3e0fd81478772462136f6de6474afe9ba53
7
+ data.tar.gz: c05da08603a4157dbbfd91daddd3075453c554a082a6e82024f4eb11530127cbf5cb0fccb7c6c88ff9bb4e0be6a47dc5a28ac078f91a36c2f10bc9d8631a367d
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- nameko (0.1.0)
4
+ nameko (0.5.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -26,22 +26,30 @@ Or install it yourself as:
26
26
  require 'nameko'
27
27
 
28
28
  mecab = Nameko::Mecab.new
29
- mecab.parse("私以外私じゃないの。")
29
+ mecab.parse("私以外私じゃないの")
30
30
  # =>
31
- [
32
- {:surface=>'私', :pos=>'名詞', :pos1=>'代名詞', :pos2=>'一般', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'私', :yomi=>'ワタシ', :pronunciation=>'ワタシ'},
33
- {:surface=>'以外', :pos=>'名詞', :pos1=>'非自立', :pos2=>'副詞可能', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'以外', :yomi=>'イガイ', :pronunciation=>'イガイ'},
34
- {:surface=>'私', :pos=>'名詞', :pos1=>'代名詞', :pos2=>'一般', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'私', :yomi=>'ワタシ', :pronunciation=>'ワタシ'},
35
- {:surface=>'じゃ', :pos=>'助詞', :pos1=>'副助詞', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'じゃ', :yomi=>'ジャ', :pronunciation=>'ジャ'},
36
- {:surface=>'ない', :pos=>'助動詞', :pos1=>'', :pos2=>'', :pos3=>'', :conjugation_form=>'特殊・ナイ', :conjugation=>'基本形', :base=>'ない', :yomi=>'ナイ', :pronunciation=>'ナイ'},
37
- {:surface=>'の', :pos=>'助詞', :pos1=>'終助詞', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'の', :yomi=>'ノ', :pronunciation=>'ノ'},
38
- {:surface=>'。',:pos=>'記号', :pos1=>'句点', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'。', :yomi=>'。', :pronunciation=>'。'},
39
- ]
31
+ # [
32
+ # #<MecabNode:0x00007f8f51117348>,
33
+ # #<MecabNode:0x00007f8f51116d30>,
34
+ # #<MecabNode:0x00007f8f51115610>,
35
+ # #<MecabNode:0x00007f8f51115138>,
36
+ # #<MecabNode:0x00007f8f51123fa8>,
37
+ # #<MecabNode:0x00007f8f51123be8>
38
+ # ]
39
+
40
+ node = mecab.parse("私以外私じゃないの")[0]
41
+ node.surface # => "私"
42
+ node.feature #=> {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"}
43
+ node.posid #=> 59
44
+ node.id #=> 1
40
45
  ```
41
46
 
42
- Nameko::Mecab#parse returns a array of hash.
47
+ Nameko::Mecab#parse returns a array of `MecabNode`.
48
+
49
+ The MecabNode has `feature` method.
50
+ It return hash.
43
51
  The hash keys meaning is as follows(The key is symbol):
44
- + `surface`: 表層系(Surface)
52
+
45
53
  + `pos`: 品詞(Part of speech)
46
54
  + `pos1`: 品詞細分類1(Part of speech subcategory1)
47
55
  + `pos2`: 品詞細分類2(Part of speech subcategory2)
@@ -63,13 +71,13 @@ mecab = Nameko::Mecab.new("-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
63
71
  # mecab = Nameko::Mecab.new(["-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
64
72
  # mecab = Nameko::Mecab.new(["-d", "/usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
65
73
 
66
- mecab.parse("アラレちゃん")
67
- # => [{:surface=>"アラレちゃん", :pos=>"名詞", :pos1=>"固有名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"アラレちゃん", :yomi=>"アラレチャン", :pronunciation=>"アラレチャン"}]
74
+ mecab.parse("アラレちゃん").map(&:feature)
75
+ # => [{:pos=>"名詞", :pos1=>"固有名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"アラレちゃん", :yomi=>"アラレチャン", :pronunciation=>"アラレチャン"}]
68
76
  ```
69
77
 
70
78
  ## Nameko VS. Natto
71
79
 
72
- The key difference between Natto and Nameko is the return value of parse method.
80
+ The key difference between Natto and Nameko is the return value of feature method.
73
81
 
74
82
  ```ruby:Natto
75
83
  require 'natto'
@@ -90,12 +98,12 @@ require 'nameko'
90
98
 
91
99
  mecab = Nameko::Mecab.new
92
100
 
93
- mecab.parse("私とあなた")
101
+ mecab.parse("私とあなた").map(&:feature)
94
102
  # =>
95
103
  [
96
- {:surface=>"私", :pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"},
97
- {:surface=>"と", :pos=>"助詞", :pos1=>"格助詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"と", :yomi=>"ト", :pronunciation=>"ト"},
98
- {:surface=>"あなた", :pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"あなた", :yomi=>"アナタ", :pronunciation=>"アナタ"}
104
+ {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"},
105
+ {:pos=>"助詞", :pos1=>"格助詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"と", :yomi=>"ト", :pronunciation=>"ト"},
106
+ {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"あなた", :yomi=>"アナタ", :pronunciation=>"アナタ"}
99
107
  ]
100
108
  ```
101
109
 
@@ -1,20 +1,20 @@
1
- require "ffi"
1
+ require 'ffi'
2
+ require 'nameko/node.rb'
2
3
 
3
4
  module Nameko
4
5
  # This class is providing a parse method.
5
- # require 'nameko'
6
- #
7
- # mecab = Nameko::Mecab.new
8
- # mecab.parse("私以外私じゃないの。")
6
+ # require 'nameko'
9
7
  #
8
+ # mecab = Nameko::Mecab.new
9
+ # mecab.parse("私以外私じゃないの")
10
+ # # =>
10
11
  # [
11
- # {:surface=>'私', :pos=>'名詞', :pos1=>'代名詞', :pos2=>'一般', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'私', :yomi=>'ワタシ', :pronunciation=>'ワタシ'},
12
- # {:surface=>'以外', :pos=>'名詞', :pos1=>'非自立', :pos2=>'副詞可能', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'以外', :yomi=>'イガイ', :pronunciation=>'イガイ'},
13
- # {:surface=>'私', :pos=>'名詞', :pos1=>'代名詞', :pos2=>'一般', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'私', :yomi=>'ワタシ', :pronunciation=>'ワタシ'},
14
- # {:surface=>'じゃ', :pos=>'助詞', :pos1=>'副助詞', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'じゃ', :yomi=>'ジャ', :pronunciation=>'ジャ'},
15
- # {:surface=>'ない', :pos=>'助動詞', :pos1=>'', :pos2=>'', :pos3=>'', :conjugation_form=>'特殊・ナイ', :conjugation=>'基本形', :base=>'ない', :yomi=>'ナイ', :pronunciation=>'ナイ'},
16
- # {:surface=>'の', :pos=>'助詞', :pos1=>'終助詞', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'の', :yomi=>'ノ', :pronunciation=>'ノ'},
17
- # {:surface=>'。',:pos=>'記号', :pos1=>'句点', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'。', :yomi=>'。', :pronunciation=>'。'},
12
+ # #<MecabNode:0x00007f8f51117348>,
13
+ # #<MecabNode:0x00007f8f51116d30>,
14
+ # #<MecabNode:0x00007f8f51115610>,
15
+ # #<MecabNode:0x00007f8f51115138>,
16
+ # #<MecabNode:0x00007f8f51123fa8>,
17
+ # #<MecabNode:0x00007f8f51123be8>
18
18
  # ]
19
19
  #
20
20
 
@@ -24,7 +24,7 @@ module Nameko
24
24
 
25
25
  attach_function :mecab_new2, [:string], :pointer
26
26
  attach_function :mecab_destroy, [:pointer], :void
27
- attach_function :mecab_sparse_tostr, [:pointer, :string], :string
27
+ attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
28
28
 
29
29
  def self.destroy(mecab)
30
30
  proc {
@@ -51,26 +51,31 @@ module Nameko
51
51
 
52
52
  # Parse the given string by MeCab.
53
53
  # @param [String] str Parsed text
54
- # @return [Array<Hash>] Result of Mecab parsing
54
+ # @return [Array<MecabNode>] Result of Mecab parsing
55
+ #
56
+ # @example
57
+ # node = mecab.parse("私以外私じゃないの")[0]
55
58
  #
56
- # The return value is array of hash.
59
+ # node.surface # => "私"
60
+ # node.feature #=> {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"}
61
+ # node.posid #=> 59
62
+ # node.id #=> 1
57
63
  #
58
- # The hash keys meaning is as follows(The key is symbol):
59
- # surface: 表層系(Surface)
60
- # pos: 品詞(Part of speech)
61
- # pos1: 品詞細分類1(Part of speech subcategory1)
62
- # pos2: 品詞細分類2(Part of speech subcategory2)
63
- # pos3: 品詞細分類3(Part of speech subcategory3)
64
- # conjugation_form: 活用形(Conjugation form)
65
- # conjugation: 活用形(conjucation)
66
- # base: 基本形・原型(Lexical form)
67
- # yomi: 読み(Reading)
68
- # pronunciation: 発音(Pronunciation)
69
64
 
70
65
  def parse(str)
71
- mecab_row = mecab_sparse_tostr(@mecab, str).force_encoding(Encoding.default_external)
72
- analysis_result = analyze(mecab_row)
73
- fill_up(analysis_result)
66
+ node = MecabNode.new mecab_sparse_tonode(@mecab, str)
67
+ result = []
68
+
69
+ while !node.null? do
70
+ if node.surface.empty?
71
+ node = node.next
72
+ next
73
+ end
74
+ result << node
75
+ node = node.next
76
+ end
77
+
78
+ result
74
79
  end
75
80
 
76
81
  private
@@ -95,7 +100,7 @@ module Nameko
95
100
  )?
96
101
  )?
97
102
  /x) do |md|
98
- md.named_captures.map{|k,v| [k.to_sym, v] }.to_h
103
+ md.named_captures.map{ |k, v| [k.to_sym, v] }.to_h
99
104
  end
100
105
  end
101
106
  end
@@ -0,0 +1,81 @@
1
+ # This class define Node struct
2
+ # Nameko::Mecab#parse method return it.
3
+ class MecabNode < FFI::Struct
4
+ layout :prev, :pointer,
5
+ :next, :pointer,
6
+ :enext, :pointer,
7
+ :bnext, :pointer,
8
+ :rpath, :pointer,
9
+ :lpath, :pointer,
10
+ :surface, :string,
11
+ :feature, :string,
12
+ :id, :uint,
13
+ :length, :ushort,
14
+ :rlength, :ushort,
15
+ :rcAttr, :ushort,
16
+ :lcAttr, :ushort,
17
+ :posid, :ushort,
18
+ :char_type, :uchar,
19
+ :stat, :uchar,
20
+ :isbest, :uchar,
21
+ :alpha, :float,
22
+ :beta, :float,
23
+ :prob, :float,
24
+ :wcost, :short,
25
+ :cost, :long
26
+
27
+ def feature
28
+ feature = self[:feature].force_encoding(Encoding.default_external).match(/
29
+ ^
30
+ (?:
31
+ (?<pos>[^,]+),
32
+ \*?(?<pos1>[^,]*),
33
+ \*?(?<pos2>[^,]*),
34
+ \*?(?<pos3>[^,]*),
35
+ \*?(?<conjugation_form>[^,]*),
36
+ \*?(?<conjugation>[^,]*),
37
+ (?<base>[^,]*)
38
+ (?:
39
+ ,(?<yomi>[^,]*)
40
+ ,(?<pronunciation>[^,]*)
41
+ )?
42
+ )?
43
+ /x) do |md|
44
+ md.named_captures.map{ |k, v| [k.to_sym, v] }.to_h
45
+ end
46
+
47
+ fill_up(feature)
48
+ end
49
+
50
+ def next
51
+ MecabNode.new self[:next]
52
+ end
53
+
54
+ def surface
55
+ self[:surface][0...self[:length]].force_encoding(Encoding.default_external)
56
+ end
57
+
58
+ private
59
+
60
+ def fill_up(analysis)
61
+ if !analysis[:yomi] && analysis[:surface].match(/\p{katakana}+/)
62
+ analysis[:yomi] = analysis[:surface]
63
+ analysis[:pronunciation] = analysis[:surface]
64
+ end
65
+ analysis
66
+ end
67
+
68
+ def to_s
69
+ self[:surface]
70
+ end
71
+
72
+ def to_ary
73
+ [self[:surface]]
74
+ end
75
+
76
+ private
77
+
78
+ def method_missing(key)
79
+ self[key]
80
+ end
81
+ end
@@ -1,3 +1,3 @@
1
1
  module Nameko
2
- VERSION = "0.1.1"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["sa2taka@gmail.com"]
11
11
 
12
12
  spec.summary = %q{Ruby binding for Mecab.}
13
- spec.description = %q{Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer.}
13
+ spec.description = %q{Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer."}
14
14
  spec.homepage = "https://github.com/sa2taka/nameko"
15
15
  spec.license = "MIT"
16
16
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nameko
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - sa2taka
@@ -66,7 +66,7 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.9'
69
- description: Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer.
69
+ description: Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer."
70
70
  email:
71
71
  - sa2taka@gmail.com
72
72
  executables: []
@@ -77,19 +77,16 @@ files:
77
77
  - ".rspec"
78
78
  - ".travis.yml"
79
79
  - CODE_OF_CONDUCT.md
80
- - 'File:'
81
80
  - Gemfile
82
81
  - Gemfile.lock
83
82
  - LICENSE.txt
84
- - 'Name:'
85
83
  - README.md
86
84
  - Rakefile
87
- - Successfully
88
- - 'Version:'
89
85
  - bin/console
90
86
  - bin/setup
91
87
  - lib/nameko.rb
92
88
  - lib/nameko/nameko.rb
89
+ - lib/nameko/node.rb
93
90
  - lib/nameko/version.rb
94
91
  - nameko.gemspec
95
92
  homepage: https://github.com/sa2taka/nameko
data/File: DELETED
File without changes
data/Name: DELETED
File without changes
File without changes
data/Version: DELETED
File without changes