nameko 0.1.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +27 -19
- data/lib/nameko/nameko.rb +35 -30
- data/lib/nameko/node.rb +81 -0
- data/lib/nameko/version.rb +1 -1
- data/nameko.gemspec +1 -1
- metadata +3 -6
- data/File: +0 -0
- data/Name: +0 -0
- data/Successfully +0 -0
- data/Version: +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa8bfa40183c62b39a9b92361fe6b39997d2ee619d1b7af219775874836fb8d1
|
4
|
+
data.tar.gz: cde44325f5429beba2e90aa9bb31ccbe8685dfd2524e988c70dd5f9129b48751
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a946d2295725ee8bc0ffd488254ad8edfac1435dcab7a419c1e7869d5c82cedb7b4b7bc7f919163ff595593ce71fa3e0fd81478772462136f6de6474afe9ba53
|
7
|
+
data.tar.gz: c05da08603a4157dbbfd91daddd3075453c554a082a6e82024f4eb11530127cbf5cb0fccb7c6c88ff9bb4e0be6a47dc5a28ac078f91a36c2f10bc9d8631a367d
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -26,22 +26,30 @@ Or install it yourself as:
|
|
26
26
|
require 'nameko'
|
27
27
|
|
28
28
|
mecab = Nameko::Mecab.new
|
29
|
-
mecab.parse("
|
29
|
+
mecab.parse("私以外私じゃないの")
|
30
30
|
# =>
|
31
|
-
[
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
31
|
+
# [
|
32
|
+
# #<MecabNode:0x00007f8f51117348>,
|
33
|
+
# #<MecabNode:0x00007f8f51116d30>,
|
34
|
+
# #<MecabNode:0x00007f8f51115610>,
|
35
|
+
# #<MecabNode:0x00007f8f51115138>,
|
36
|
+
# #<MecabNode:0x00007f8f51123fa8>,
|
37
|
+
# #<MecabNode:0x00007f8f51123be8>
|
38
|
+
# ]
|
39
|
+
|
40
|
+
node = mecab.parse("私以外私じゃないの")[0]
|
41
|
+
node.surface # => "私"
|
42
|
+
node.feature #=> {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"}
|
43
|
+
node.posid #=> 59
|
44
|
+
node.id #=> 1
|
40
45
|
```
|
41
46
|
|
42
|
-
Nameko::Mecab#parse returns a array of
|
47
|
+
Nameko::Mecab#parse returns a array of `MecabNode`.
|
48
|
+
|
49
|
+
The MecabNode has `feature` method.
|
50
|
+
It return hash.
|
43
51
|
The hash keys meaning is as follows(The key is symbol):
|
44
|
-
|
52
|
+
|
45
53
|
+ `pos`: 品詞(Part of speech)
|
46
54
|
+ `pos1`: 品詞細分類1(Part of speech subcategory1)
|
47
55
|
+ `pos2`: 品詞細分類2(Part of speech subcategory2)
|
@@ -63,13 +71,13 @@ mecab = Nameko::Mecab.new("-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
|
|
63
71
|
# mecab = Nameko::Mecab.new(["-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
|
64
72
|
# mecab = Nameko::Mecab.new(["-d", "/usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
|
65
73
|
|
66
|
-
mecab.parse("アラレちゃん")
|
67
|
-
# => [{:
|
74
|
+
mecab.parse("アラレちゃん").map(&:feature)
|
75
|
+
# => [{:pos=>"名詞", :pos1=>"固有名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"アラレちゃん", :yomi=>"アラレチャン", :pronunciation=>"アラレチャン"}]
|
68
76
|
```
|
69
77
|
|
70
78
|
## Nameko VS. Natto
|
71
79
|
|
72
|
-
The key difference between Natto and Nameko is the return value of
|
80
|
+
The key difference between Natto and Nameko is the return value of feature method.
|
73
81
|
|
74
82
|
```ruby:Natto
|
75
83
|
require 'natto'
|
@@ -90,12 +98,12 @@ require 'nameko'
|
|
90
98
|
|
91
99
|
mecab = Nameko::Mecab.new
|
92
100
|
|
93
|
-
mecab.parse("私とあなた")
|
101
|
+
mecab.parse("私とあなた").map(&:feature)
|
94
102
|
# =>
|
95
103
|
[
|
96
|
-
{:
|
97
|
-
{:
|
98
|
-
{:
|
104
|
+
{:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"},
|
105
|
+
{:pos=>"助詞", :pos1=>"格助詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"と", :yomi=>"ト", :pronunciation=>"ト"},
|
106
|
+
{:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"あなた", :yomi=>"アナタ", :pronunciation=>"アナタ"}
|
99
107
|
]
|
100
108
|
```
|
101
109
|
|
data/lib/nameko/nameko.rb
CHANGED
@@ -1,20 +1,20 @@
|
|
1
|
-
require
|
1
|
+
require 'ffi'
|
2
|
+
require 'nameko/node.rb'
|
2
3
|
|
3
4
|
module Nameko
|
4
5
|
# This class is providing a parse method.
|
5
|
-
#
|
6
|
-
#
|
7
|
-
# mecab = Nameko::Mecab.new
|
8
|
-
# mecab.parse("私以外私じゃないの。")
|
6
|
+
# require 'nameko'
|
9
7
|
#
|
8
|
+
# mecab = Nameko::Mecab.new
|
9
|
+
# mecab.parse("私以外私じゃないの")
|
10
|
+
# # =>
|
10
11
|
# [
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
# {:surface=>'。',:pos=>'記号', :pos1=>'句点', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'。', :yomi=>'。', :pronunciation=>'。'},
|
12
|
+
# #<MecabNode:0x00007f8f51117348>,
|
13
|
+
# #<MecabNode:0x00007f8f51116d30>,
|
14
|
+
# #<MecabNode:0x00007f8f51115610>,
|
15
|
+
# #<MecabNode:0x00007f8f51115138>,
|
16
|
+
# #<MecabNode:0x00007f8f51123fa8>,
|
17
|
+
# #<MecabNode:0x00007f8f51123be8>
|
18
18
|
# ]
|
19
19
|
#
|
20
20
|
|
@@ -24,7 +24,7 @@ module Nameko
|
|
24
24
|
|
25
25
|
attach_function :mecab_new2, [:string], :pointer
|
26
26
|
attach_function :mecab_destroy, [:pointer], :void
|
27
|
-
attach_function :
|
27
|
+
attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
|
28
28
|
|
29
29
|
def self.destroy(mecab)
|
30
30
|
proc {
|
@@ -51,26 +51,31 @@ module Nameko
|
|
51
51
|
|
52
52
|
# Parse the given string by MeCab.
|
53
53
|
# @param [String] str Parsed text
|
54
|
-
# @return [Array<
|
54
|
+
# @return [Array<MecabNode>] Result of Mecab parsing
|
55
|
+
#
|
56
|
+
# @example
|
57
|
+
# node = mecab.parse("私以外私じゃないの")[0]
|
55
58
|
#
|
56
|
-
#
|
59
|
+
# node.surface # => "私"
|
60
|
+
# node.feature #=> {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"}
|
61
|
+
# node.posid #=> 59
|
62
|
+
# node.id #=> 1
|
57
63
|
#
|
58
|
-
# The hash keys meaning is as follows(The key is symbol):
|
59
|
-
# surface: 表層系(Surface)
|
60
|
-
# pos: 品詞(Part of speech)
|
61
|
-
# pos1: 品詞細分類1(Part of speech subcategory1)
|
62
|
-
# pos2: 品詞細分類2(Part of speech subcategory2)
|
63
|
-
# pos3: 品詞細分類3(Part of speech subcategory3)
|
64
|
-
# conjugation_form: 活用形(Conjugation form)
|
65
|
-
# conjugation: 活用形(conjucation)
|
66
|
-
# base: 基本形・原型(Lexical form)
|
67
|
-
# yomi: 読み(Reading)
|
68
|
-
# pronunciation: 発音(Pronunciation)
|
69
64
|
|
70
65
|
def parse(str)
|
71
|
-
|
72
|
-
|
73
|
-
|
66
|
+
node = MecabNode.new mecab_sparse_tonode(@mecab, str)
|
67
|
+
result = []
|
68
|
+
|
69
|
+
while !node.null? do
|
70
|
+
if node.surface.empty?
|
71
|
+
node = node.next
|
72
|
+
next
|
73
|
+
end
|
74
|
+
result << node
|
75
|
+
node = node.next
|
76
|
+
end
|
77
|
+
|
78
|
+
result
|
74
79
|
end
|
75
80
|
|
76
81
|
private
|
@@ -95,7 +100,7 @@ module Nameko
|
|
95
100
|
)?
|
96
101
|
)?
|
97
102
|
/x) do |md|
|
98
|
-
|
103
|
+
md.named_captures.map{ |k, v| [k.to_sym, v] }.to_h
|
99
104
|
end
|
100
105
|
end
|
101
106
|
end
|
data/lib/nameko/node.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# This class define Node struct
|
2
|
+
# Nameko::Mecab#parse method return it.
|
3
|
+
class MecabNode < FFI::Struct
|
4
|
+
layout :prev, :pointer,
|
5
|
+
:next, :pointer,
|
6
|
+
:enext, :pointer,
|
7
|
+
:bnext, :pointer,
|
8
|
+
:rpath, :pointer,
|
9
|
+
:lpath, :pointer,
|
10
|
+
:surface, :string,
|
11
|
+
:feature, :string,
|
12
|
+
:id, :uint,
|
13
|
+
:length, :ushort,
|
14
|
+
:rlength, :ushort,
|
15
|
+
:rcAttr, :ushort,
|
16
|
+
:lcAttr, :ushort,
|
17
|
+
:posid, :ushort,
|
18
|
+
:char_type, :uchar,
|
19
|
+
:stat, :uchar,
|
20
|
+
:isbest, :uchar,
|
21
|
+
:alpha, :float,
|
22
|
+
:beta, :float,
|
23
|
+
:prob, :float,
|
24
|
+
:wcost, :short,
|
25
|
+
:cost, :long
|
26
|
+
|
27
|
+
def feature
|
28
|
+
feature = self[:feature].force_encoding(Encoding.default_external).match(/
|
29
|
+
^
|
30
|
+
(?:
|
31
|
+
(?<pos>[^,]+),
|
32
|
+
\*?(?<pos1>[^,]*),
|
33
|
+
\*?(?<pos2>[^,]*),
|
34
|
+
\*?(?<pos3>[^,]*),
|
35
|
+
\*?(?<conjugation_form>[^,]*),
|
36
|
+
\*?(?<conjugation>[^,]*),
|
37
|
+
(?<base>[^,]*)
|
38
|
+
(?:
|
39
|
+
,(?<yomi>[^,]*)
|
40
|
+
,(?<pronunciation>[^,]*)
|
41
|
+
)?
|
42
|
+
)?
|
43
|
+
/x) do |md|
|
44
|
+
md.named_captures.map{ |k, v| [k.to_sym, v] }.to_h
|
45
|
+
end
|
46
|
+
|
47
|
+
fill_up(feature)
|
48
|
+
end
|
49
|
+
|
50
|
+
def next
|
51
|
+
MecabNode.new self[:next]
|
52
|
+
end
|
53
|
+
|
54
|
+
def surface
|
55
|
+
self[:surface][0...self[:length]].force_encoding(Encoding.default_external)
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def fill_up(analysis)
|
61
|
+
if !analysis[:yomi] && analysis[:surface].match(/\p{katakana}+/)
|
62
|
+
analysis[:yomi] = analysis[:surface]
|
63
|
+
analysis[:pronunciation] = analysis[:surface]
|
64
|
+
end
|
65
|
+
analysis
|
66
|
+
end
|
67
|
+
|
68
|
+
def to_s
|
69
|
+
self[:surface]
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_ary
|
73
|
+
[self[:surface]]
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def method_missing(key)
|
79
|
+
self[key]
|
80
|
+
end
|
81
|
+
end
|
data/lib/nameko/version.rb
CHANGED
data/nameko.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["sa2taka@gmail.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{Ruby binding for Mecab.}
|
13
|
-
spec.description = %q{Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer.}
|
13
|
+
spec.description = %q{Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer."}
|
14
14
|
spec.homepage = "https://github.com/sa2taka/nameko"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nameko
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- sa2taka
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '1.9'
|
69
|
-
description: Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer.
|
69
|
+
description: Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer."
|
70
70
|
email:
|
71
71
|
- sa2taka@gmail.com
|
72
72
|
executables: []
|
@@ -77,19 +77,16 @@ files:
|
|
77
77
|
- ".rspec"
|
78
78
|
- ".travis.yml"
|
79
79
|
- CODE_OF_CONDUCT.md
|
80
|
-
- 'File:'
|
81
80
|
- Gemfile
|
82
81
|
- Gemfile.lock
|
83
82
|
- LICENSE.txt
|
84
|
-
- 'Name:'
|
85
83
|
- README.md
|
86
84
|
- Rakefile
|
87
|
-
- Successfully
|
88
|
-
- 'Version:'
|
89
85
|
- bin/console
|
90
86
|
- bin/setup
|
91
87
|
- lib/nameko.rb
|
92
88
|
- lib/nameko/nameko.rb
|
89
|
+
- lib/nameko/node.rb
|
93
90
|
- lib/nameko/version.rb
|
94
91
|
- nameko.gemspec
|
95
92
|
homepage: https://github.com/sa2taka/nameko
|
data/File:
DELETED
File without changes
|
data/Name:
DELETED
File without changes
|
data/Successfully
DELETED
File without changes
|
data/Version:
DELETED
File without changes
|