nameko 0.1.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ea033cbecf52f5f7f98326e16ea8877b3ed5d71ba5910ac28bfc4fd74bc5a77
4
- data.tar.gz: c824bc650de4469337ea3c8c12facb7c38240bbb718022a9d8444fe9ca0a8e2f
3
+ metadata.gz: aa8bfa40183c62b39a9b92361fe6b39997d2ee619d1b7af219775874836fb8d1
4
+ data.tar.gz: cde44325f5429beba2e90aa9bb31ccbe8685dfd2524e988c70dd5f9129b48751
5
5
  SHA512:
6
- metadata.gz: 9048e0b1021b73e58da3a9931ab28c0853c45982d37433b1039b0d15c59743be04b4de9619291b77800d66bedd8c4246a490d088005293424d2d71f600912b47
7
- data.tar.gz: 71b54f6dfa658f2e759a94401e6f97624a44619f1748e0cbf42712ed66574bb6c0be9ae829bdf52326e0d55e70119f9f8cfb53683efd9ba2a7182e7dfe8fa90d
6
+ metadata.gz: a946d2295725ee8bc0ffd488254ad8edfac1435dcab7a419c1e7869d5c82cedb7b4b7bc7f919163ff595593ce71fa3e0fd81478772462136f6de6474afe9ba53
7
+ data.tar.gz: c05da08603a4157dbbfd91daddd3075453c554a082a6e82024f4eb11530127cbf5cb0fccb7c6c88ff9bb4e0be6a47dc5a28ac078f91a36c2f10bc9d8631a367d
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- nameko (0.1.0)
4
+ nameko (0.5.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -26,22 +26,30 @@ Or install it yourself as:
26
26
  require 'nameko'
27
27
 
28
28
  mecab = Nameko::Mecab.new
29
- mecab.parse("私以外私じゃないの。")
29
+ mecab.parse("私以外私じゃないの")
30
30
  # =>
31
- [
32
- {:surface=>'私', :pos=>'名詞', :pos1=>'代名詞', :pos2=>'一般', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'私', :yomi=>'ワタシ', :pronunciation=>'ワタシ'},
33
- {:surface=>'以外', :pos=>'名詞', :pos1=>'非自立', :pos2=>'副詞可能', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'以外', :yomi=>'イガイ', :pronunciation=>'イガイ'},
34
- {:surface=>'私', :pos=>'名詞', :pos1=>'代名詞', :pos2=>'一般', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'私', :yomi=>'ワタシ', :pronunciation=>'ワタシ'},
35
- {:surface=>'じゃ', :pos=>'助詞', :pos1=>'副助詞', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'じゃ', :yomi=>'ジャ', :pronunciation=>'ジャ'},
36
- {:surface=>'ない', :pos=>'助動詞', :pos1=>'', :pos2=>'', :pos3=>'', :conjugation_form=>'特殊・ナイ', :conjugation=>'基本形', :base=>'ない', :yomi=>'ナイ', :pronunciation=>'ナイ'},
37
- {:surface=>'の', :pos=>'助詞', :pos1=>'終助詞', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'の', :yomi=>'ノ', :pronunciation=>'ノ'},
38
- {:surface=>'。',:pos=>'記号', :pos1=>'句点', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'。', :yomi=>'。', :pronunciation=>'。'},
39
- ]
31
+ # [
32
+ # #<MecabNode:0x00007f8f51117348>,
33
+ # #<MecabNode:0x00007f8f51116d30>,
34
+ # #<MecabNode:0x00007f8f51115610>,
35
+ # #<MecabNode:0x00007f8f51115138>,
36
+ # #<MecabNode:0x00007f8f51123fa8>,
37
+ # #<MecabNode:0x00007f8f51123be8>
38
+ # ]
39
+
40
+ node = mecab.parse("私以外私じゃないの")[0]
41
+ node.surface # => "私"
42
+ node.feature #=> {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"}
43
+ node.posid #=> 59
44
+ node.id #=> 1
40
45
  ```
41
46
 
42
- Nameko::Mecab#parse returns a array of hash.
47
+ Nameko::Mecab#parse returns a array of `MecabNode`.
48
+
49
+ The MecabNode has `feature` method.
50
+ It return hash.
43
51
  The hash keys meaning is as follows(The key is symbol):
44
- + `surface`: 表層系(Surface)
52
+
45
53
  + `pos`: 品詞(Part of speech)
46
54
  + `pos1`: 品詞細分類1(Part of speech subcategory1)
47
55
  + `pos2`: 品詞細分類2(Part of speech subcategory2)
@@ -63,13 +71,13 @@ mecab = Nameko::Mecab.new("-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
63
71
  # mecab = Nameko::Mecab.new(["-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
64
72
  # mecab = Nameko::Mecab.new(["-d", "/usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
65
73
 
66
- mecab.parse("アラレちゃん")
67
- # => [{:surface=>"アラレちゃん", :pos=>"名詞", :pos1=>"固有名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"アラレちゃん", :yomi=>"アラレチャン", :pronunciation=>"アラレチャン"}]
74
+ mecab.parse("アラレちゃん").map(&:feature)
75
+ # => [{:pos=>"名詞", :pos1=>"固有名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"アラレちゃん", :yomi=>"アラレチャン", :pronunciation=>"アラレチャン"}]
68
76
  ```
69
77
 
70
78
  ## Nameko VS. Natto
71
79
 
72
- The key difference between Natto and Nameko is the return value of parse method.
80
+ The key difference between Natto and Nameko is the return value of feature method.
73
81
 
74
82
  ```ruby:Natto
75
83
  require 'natto'
@@ -90,12 +98,12 @@ require 'nameko'
90
98
 
91
99
  mecab = Nameko::Mecab.new
92
100
 
93
- mecab.parse("私とあなた")
101
+ mecab.parse("私とあなた").map(&:feature)
94
102
  # =>
95
103
  [
96
- {:surface=>"私", :pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"},
97
- {:surface=>"と", :pos=>"助詞", :pos1=>"格助詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"と", :yomi=>"ト", :pronunciation=>"ト"},
98
- {:surface=>"あなた", :pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"あなた", :yomi=>"アナタ", :pronunciation=>"アナタ"}
104
+ {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"},
105
+ {:pos=>"助詞", :pos1=>"格助詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"と", :yomi=>"ト", :pronunciation=>"ト"},
106
+ {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"あなた", :yomi=>"アナタ", :pronunciation=>"アナタ"}
99
107
  ]
100
108
  ```
101
109
 
@@ -1,20 +1,20 @@
1
- require "ffi"
1
+ require 'ffi'
2
+ require 'nameko/node.rb'
2
3
 
3
4
  module Nameko
4
5
  # This class is providing a parse method.
5
- # require 'nameko'
6
- #
7
- # mecab = Nameko::Mecab.new
8
- # mecab.parse("私以外私じゃないの。")
6
+ # require 'nameko'
9
7
  #
8
+ # mecab = Nameko::Mecab.new
9
+ # mecab.parse("私以外私じゃないの")
10
+ # # =>
10
11
  # [
11
- # {:surface=>'私', :pos=>'名詞', :pos1=>'代名詞', :pos2=>'一般', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'私', :yomi=>'ワタシ', :pronunciation=>'ワタシ'},
12
- # {:surface=>'以外', :pos=>'名詞', :pos1=>'非自立', :pos2=>'副詞可能', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'以外', :yomi=>'イガイ', :pronunciation=>'イガイ'},
13
- # {:surface=>'私', :pos=>'名詞', :pos1=>'代名詞', :pos2=>'一般', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'私', :yomi=>'ワタシ', :pronunciation=>'ワタシ'},
14
- # {:surface=>'じゃ', :pos=>'助詞', :pos1=>'副助詞', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'じゃ', :yomi=>'ジャ', :pronunciation=>'ジャ'},
15
- # {:surface=>'ない', :pos=>'助動詞', :pos1=>'', :pos2=>'', :pos3=>'', :conjugation_form=>'特殊・ナイ', :conjugation=>'基本形', :base=>'ない', :yomi=>'ナイ', :pronunciation=>'ナイ'},
16
- # {:surface=>'の', :pos=>'助詞', :pos1=>'終助詞', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'の', :yomi=>'ノ', :pronunciation=>'ノ'},
17
- # {:surface=>'。',:pos=>'記号', :pos1=>'句点', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'。', :yomi=>'。', :pronunciation=>'。'},
12
+ # #<MecabNode:0x00007f8f51117348>,
13
+ # #<MecabNode:0x00007f8f51116d30>,
14
+ # #<MecabNode:0x00007f8f51115610>,
15
+ # #<MecabNode:0x00007f8f51115138>,
16
+ # #<MecabNode:0x00007f8f51123fa8>,
17
+ # #<MecabNode:0x00007f8f51123be8>
18
18
  # ]
19
19
  #
20
20
 
@@ -24,7 +24,7 @@ module Nameko
24
24
 
25
25
  attach_function :mecab_new2, [:string], :pointer
26
26
  attach_function :mecab_destroy, [:pointer], :void
27
- attach_function :mecab_sparse_tostr, [:pointer, :string], :string
27
+ attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
28
28
 
29
29
  def self.destroy(mecab)
30
30
  proc {
@@ -51,26 +51,31 @@ module Nameko
51
51
 
52
52
  # Parse the given string by MeCab.
53
53
  # @param [String] str Parsed text
54
- # @return [Array<Hash>] Result of Mecab parsing
54
+ # @return [Array<MecabNode>] Result of Mecab parsing
55
+ #
56
+ # @example
57
+ # node = mecab.parse("私以外私じゃないの")[0]
55
58
  #
56
- # The return value is array of hash.
59
+ # node.surface # => "私"
60
+ # node.feature #=> {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"}
61
+ # node.posid #=> 59
62
+ # node.id #=> 1
57
63
  #
58
- # The hash keys meaning is as follows(The key is symbol):
59
- # surface: 表層系(Surface)
60
- # pos: 品詞(Part of speech)
61
- # pos1: 品詞細分類1(Part of speech subcategory1)
62
- # pos2: 品詞細分類2(Part of speech subcategory2)
63
- # pos3: 品詞細分類3(Part of speech subcategory3)
64
- # conjugation_form: 活用形(Conjugation form)
65
- # conjugation: 活用形(conjucation)
66
- # base: 基本形・原型(Lexical form)
67
- # yomi: 読み(Reading)
68
- # pronunciation: 発音(Pronunciation)
69
64
 
70
65
  def parse(str)
71
- mecab_row = mecab_sparse_tostr(@mecab, str).force_encoding(Encoding.default_external)
72
- analysis_result = analyze(mecab_row)
73
- fill_up(analysis_result)
66
+ node = MecabNode.new mecab_sparse_tonode(@mecab, str)
67
+ result = []
68
+
69
+ while !node.null? do
70
+ if node.surface.empty?
71
+ node = node.next
72
+ next
73
+ end
74
+ result << node
75
+ node = node.next
76
+ end
77
+
78
+ result
74
79
  end
75
80
 
76
81
  private
@@ -95,7 +100,7 @@ module Nameko
95
100
  )?
96
101
  )?
97
102
  /x) do |md|
98
- md.named_captures.map{|k,v| [k.to_sym, v] }.to_h
103
+ md.named_captures.map{ |k, v| [k.to_sym, v] }.to_h
99
104
  end
100
105
  end
101
106
  end
@@ -0,0 +1,81 @@
1
+ # This class define Node struct
2
+ # Nameko::Mecab#parse method return it.
3
+ class MecabNode < FFI::Struct
4
+ layout :prev, :pointer,
5
+ :next, :pointer,
6
+ :enext, :pointer,
7
+ :bnext, :pointer,
8
+ :rpath, :pointer,
9
+ :lpath, :pointer,
10
+ :surface, :string,
11
+ :feature, :string,
12
+ :id, :uint,
13
+ :length, :ushort,
14
+ :rlength, :ushort,
15
+ :rcAttr, :ushort,
16
+ :lcAttr, :ushort,
17
+ :posid, :ushort,
18
+ :char_type, :uchar,
19
+ :stat, :uchar,
20
+ :isbest, :uchar,
21
+ :alpha, :float,
22
+ :beta, :float,
23
+ :prob, :float,
24
+ :wcost, :short,
25
+ :cost, :long
26
+
27
+ def feature
28
+ feature = self[:feature].force_encoding(Encoding.default_external).match(/
29
+ ^
30
+ (?:
31
+ (?<pos>[^,]+),
32
+ \*?(?<pos1>[^,]*),
33
+ \*?(?<pos2>[^,]*),
34
+ \*?(?<pos3>[^,]*),
35
+ \*?(?<conjugation_form>[^,]*),
36
+ \*?(?<conjugation>[^,]*),
37
+ (?<base>[^,]*)
38
+ (?:
39
+ ,(?<yomi>[^,]*)
40
+ ,(?<pronunciation>[^,]*)
41
+ )?
42
+ )?
43
+ /x) do |md|
44
+ md.named_captures.map{ |k, v| [k.to_sym, v] }.to_h
45
+ end
46
+
47
+ fill_up(feature)
48
+ end
49
+
50
+ def next
51
+ MecabNode.new self[:next]
52
+ end
53
+
54
+ def surface
55
+ self[:surface][0...self[:length]].force_encoding(Encoding.default_external)
56
+ end
57
+
58
+ private
59
+
60
+ def fill_up(analysis)
61
+ if !analysis[:yomi] && analysis[:surface].match(/\p{katakana}+/)
62
+ analysis[:yomi] = analysis[:surface]
63
+ analysis[:pronunciation] = analysis[:surface]
64
+ end
65
+ analysis
66
+ end
67
+
68
+ def to_s
69
+ self[:surface]
70
+ end
71
+
72
+ def to_ary
73
+ [self[:surface]]
74
+ end
75
+
76
+ private
77
+
78
+ def method_missing(key)
79
+ self[key]
80
+ end
81
+ end
@@ -1,3 +1,3 @@
1
1
  module Nameko
2
- VERSION = "0.1.1"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["sa2taka@gmail.com"]
11
11
 
12
12
  spec.summary = %q{Ruby binding for Mecab.}
13
- spec.description = %q{Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer.}
13
+ spec.description = %q{Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer."}
14
14
  spec.homepage = "https://github.com/sa2taka/nameko"
15
15
  spec.license = "MIT"
16
16
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nameko
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - sa2taka
@@ -66,7 +66,7 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.9'
69
- description: Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer.
69
+ description: Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer."
70
70
  email:
71
71
  - sa2taka@gmail.com
72
72
  executables: []
@@ -77,19 +77,16 @@ files:
77
77
  - ".rspec"
78
78
  - ".travis.yml"
79
79
  - CODE_OF_CONDUCT.md
80
- - 'File:'
81
80
  - Gemfile
82
81
  - Gemfile.lock
83
82
  - LICENSE.txt
84
- - 'Name:'
85
83
  - README.md
86
84
  - Rakefile
87
- - Successfully
88
- - 'Version:'
89
85
  - bin/console
90
86
  - bin/setup
91
87
  - lib/nameko.rb
92
88
  - lib/nameko/nameko.rb
89
+ - lib/nameko/node.rb
93
90
  - lib/nameko/version.rb
94
91
  - nameko.gemspec
95
92
  homepage: https://github.com/sa2taka/nameko
data/File: DELETED
File without changes
data/Name: DELETED
File without changes
File without changes
data/Version: DELETED
File without changes