nameko 0.1.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +27 -19
- data/lib/nameko/nameko.rb +35 -30
- data/lib/nameko/node.rb +81 -0
- data/lib/nameko/version.rb +1 -1
- data/nameko.gemspec +1 -1
- metadata +3 -6
- data/File: +0 -0
- data/Name: +0 -0
- data/Successfully +0 -0
- data/Version: +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa8bfa40183c62b39a9b92361fe6b39997d2ee619d1b7af219775874836fb8d1
|
4
|
+
data.tar.gz: cde44325f5429beba2e90aa9bb31ccbe8685dfd2524e988c70dd5f9129b48751
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a946d2295725ee8bc0ffd488254ad8edfac1435dcab7a419c1e7869d5c82cedb7b4b7bc7f919163ff595593ce71fa3e0fd81478772462136f6de6474afe9ba53
|
7
|
+
data.tar.gz: c05da08603a4157dbbfd91daddd3075453c554a082a6e82024f4eb11530127cbf5cb0fccb7c6c88ff9bb4e0be6a47dc5a28ac078f91a36c2f10bc9d8631a367d
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -26,22 +26,30 @@ Or install it yourself as:
|
|
26
26
|
require 'nameko'
|
27
27
|
|
28
28
|
mecab = Nameko::Mecab.new
|
29
|
-
mecab.parse("
|
29
|
+
mecab.parse("私以外私じゃないの")
|
30
30
|
# =>
|
31
|
-
[
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
31
|
+
# [
|
32
|
+
# #<MecabNode:0x00007f8f51117348>,
|
33
|
+
# #<MecabNode:0x00007f8f51116d30>,
|
34
|
+
# #<MecabNode:0x00007f8f51115610>,
|
35
|
+
# #<MecabNode:0x00007f8f51115138>,
|
36
|
+
# #<MecabNode:0x00007f8f51123fa8>,
|
37
|
+
# #<MecabNode:0x00007f8f51123be8>
|
38
|
+
# ]
|
39
|
+
|
40
|
+
node = mecab.parse("私以外私じゃないの")[0]
|
41
|
+
node.surface # => "私"
|
42
|
+
node.feature #=> {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"}
|
43
|
+
node.posid #=> 59
|
44
|
+
node.id #=> 1
|
40
45
|
```
|
41
46
|
|
42
|
-
Nameko::Mecab#parse returns a array of
|
47
|
+
Nameko::Mecab#parse returns a array of `MecabNode`.
|
48
|
+
|
49
|
+
The MecabNode has `feature` method.
|
50
|
+
It return hash.
|
43
51
|
The hash keys meaning is as follows(The key is symbol):
|
44
|
-
|
52
|
+
|
45
53
|
+ `pos`: 品詞(Part of speech)
|
46
54
|
+ `pos1`: 品詞細分類1(Part of speech subcategory1)
|
47
55
|
+ `pos2`: 品詞細分類2(Part of speech subcategory2)
|
@@ -63,13 +71,13 @@ mecab = Nameko::Mecab.new("-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
|
|
63
71
|
# mecab = Nameko::Mecab.new(["-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
|
64
72
|
# mecab = Nameko::Mecab.new(["-d", "/usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
|
65
73
|
|
66
|
-
mecab.parse("アラレちゃん")
|
67
|
-
# => [{:
|
74
|
+
mecab.parse("アラレちゃん").map(&:feature)
|
75
|
+
# => [{:pos=>"名詞", :pos1=>"固有名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"アラレちゃん", :yomi=>"アラレチャン", :pronunciation=>"アラレチャン"}]
|
68
76
|
```
|
69
77
|
|
70
78
|
## Nameko VS. Natto
|
71
79
|
|
72
|
-
The key difference between Natto and Nameko is the return value of
|
80
|
+
The key difference between Natto and Nameko is the return value of feature method.
|
73
81
|
|
74
82
|
```ruby:Natto
|
75
83
|
require 'natto'
|
@@ -90,12 +98,12 @@ require 'nameko'
|
|
90
98
|
|
91
99
|
mecab = Nameko::Mecab.new
|
92
100
|
|
93
|
-
mecab.parse("私とあなた")
|
101
|
+
mecab.parse("私とあなた").map(&:feature)
|
94
102
|
# =>
|
95
103
|
[
|
96
|
-
{:
|
97
|
-
{:
|
98
|
-
{:
|
104
|
+
{:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"},
|
105
|
+
{:pos=>"助詞", :pos1=>"格助詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"と", :yomi=>"ト", :pronunciation=>"ト"},
|
106
|
+
{:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"あなた", :yomi=>"アナタ", :pronunciation=>"アナタ"}
|
99
107
|
]
|
100
108
|
```
|
101
109
|
|
data/lib/nameko/nameko.rb
CHANGED
@@ -1,20 +1,20 @@
|
|
1
|
-
require
|
1
|
+
require 'ffi'
|
2
|
+
require 'nameko/node.rb'
|
2
3
|
|
3
4
|
module Nameko
|
4
5
|
# This class is providing a parse method.
|
5
|
-
#
|
6
|
-
#
|
7
|
-
# mecab = Nameko::Mecab.new
|
8
|
-
# mecab.parse("私以外私じゃないの。")
|
6
|
+
# require 'nameko'
|
9
7
|
#
|
8
|
+
# mecab = Nameko::Mecab.new
|
9
|
+
# mecab.parse("私以外私じゃないの")
|
10
|
+
# # =>
|
10
11
|
# [
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
# {:surface=>'。',:pos=>'記号', :pos1=>'句点', :pos2=>'', :pos3=>'', :conjugation_form=>'', :conjugation=>'', :base=>'。', :yomi=>'。', :pronunciation=>'。'},
|
12
|
+
# #<MecabNode:0x00007f8f51117348>,
|
13
|
+
# #<MecabNode:0x00007f8f51116d30>,
|
14
|
+
# #<MecabNode:0x00007f8f51115610>,
|
15
|
+
# #<MecabNode:0x00007f8f51115138>,
|
16
|
+
# #<MecabNode:0x00007f8f51123fa8>,
|
17
|
+
# #<MecabNode:0x00007f8f51123be8>
|
18
18
|
# ]
|
19
19
|
#
|
20
20
|
|
@@ -24,7 +24,7 @@ module Nameko
|
|
24
24
|
|
25
25
|
attach_function :mecab_new2, [:string], :pointer
|
26
26
|
attach_function :mecab_destroy, [:pointer], :void
|
27
|
-
attach_function :
|
27
|
+
attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
|
28
28
|
|
29
29
|
def self.destroy(mecab)
|
30
30
|
proc {
|
@@ -51,26 +51,31 @@ module Nameko
|
|
51
51
|
|
52
52
|
# Parse the given string by MeCab.
|
53
53
|
# @param [String] str Parsed text
|
54
|
-
# @return [Array<
|
54
|
+
# @return [Array<MecabNode>] Result of Mecab parsing
|
55
|
+
#
|
56
|
+
# @example
|
57
|
+
# node = mecab.parse("私以外私じゃないの")[0]
|
55
58
|
#
|
56
|
-
#
|
59
|
+
# node.surface # => "私"
|
60
|
+
# node.feature #=> {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"}
|
61
|
+
# node.posid #=> 59
|
62
|
+
# node.id #=> 1
|
57
63
|
#
|
58
|
-
# The hash keys meaning is as follows(The key is symbol):
|
59
|
-
# surface: 表層系(Surface)
|
60
|
-
# pos: 品詞(Part of speech)
|
61
|
-
# pos1: 品詞細分類1(Part of speech subcategory1)
|
62
|
-
# pos2: 品詞細分類2(Part of speech subcategory2)
|
63
|
-
# pos3: 品詞細分類3(Part of speech subcategory3)
|
64
|
-
# conjugation_form: 活用形(Conjugation form)
|
65
|
-
# conjugation: 活用形(conjucation)
|
66
|
-
# base: 基本形・原型(Lexical form)
|
67
|
-
# yomi: 読み(Reading)
|
68
|
-
# pronunciation: 発音(Pronunciation)
|
69
64
|
|
70
65
|
def parse(str)
|
71
|
-
|
72
|
-
|
73
|
-
|
66
|
+
node = MecabNode.new mecab_sparse_tonode(@mecab, str)
|
67
|
+
result = []
|
68
|
+
|
69
|
+
while !node.null? do
|
70
|
+
if node.surface.empty?
|
71
|
+
node = node.next
|
72
|
+
next
|
73
|
+
end
|
74
|
+
result << node
|
75
|
+
node = node.next
|
76
|
+
end
|
77
|
+
|
78
|
+
result
|
74
79
|
end
|
75
80
|
|
76
81
|
private
|
@@ -95,7 +100,7 @@ module Nameko
|
|
95
100
|
)?
|
96
101
|
)?
|
97
102
|
/x) do |md|
|
98
|
-
|
103
|
+
md.named_captures.map{ |k, v| [k.to_sym, v] }.to_h
|
99
104
|
end
|
100
105
|
end
|
101
106
|
end
|
data/lib/nameko/node.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# This class define Node struct
|
2
|
+
# Nameko::Mecab#parse method return it.
|
3
|
+
class MecabNode < FFI::Struct
|
4
|
+
layout :prev, :pointer,
|
5
|
+
:next, :pointer,
|
6
|
+
:enext, :pointer,
|
7
|
+
:bnext, :pointer,
|
8
|
+
:rpath, :pointer,
|
9
|
+
:lpath, :pointer,
|
10
|
+
:surface, :string,
|
11
|
+
:feature, :string,
|
12
|
+
:id, :uint,
|
13
|
+
:length, :ushort,
|
14
|
+
:rlength, :ushort,
|
15
|
+
:rcAttr, :ushort,
|
16
|
+
:lcAttr, :ushort,
|
17
|
+
:posid, :ushort,
|
18
|
+
:char_type, :uchar,
|
19
|
+
:stat, :uchar,
|
20
|
+
:isbest, :uchar,
|
21
|
+
:alpha, :float,
|
22
|
+
:beta, :float,
|
23
|
+
:prob, :float,
|
24
|
+
:wcost, :short,
|
25
|
+
:cost, :long
|
26
|
+
|
27
|
+
def feature
|
28
|
+
feature = self[:feature].force_encoding(Encoding.default_external).match(/
|
29
|
+
^
|
30
|
+
(?:
|
31
|
+
(?<pos>[^,]+),
|
32
|
+
\*?(?<pos1>[^,]*),
|
33
|
+
\*?(?<pos2>[^,]*),
|
34
|
+
\*?(?<pos3>[^,]*),
|
35
|
+
\*?(?<conjugation_form>[^,]*),
|
36
|
+
\*?(?<conjugation>[^,]*),
|
37
|
+
(?<base>[^,]*)
|
38
|
+
(?:
|
39
|
+
,(?<yomi>[^,]*)
|
40
|
+
,(?<pronunciation>[^,]*)
|
41
|
+
)?
|
42
|
+
)?
|
43
|
+
/x) do |md|
|
44
|
+
md.named_captures.map{ |k, v| [k.to_sym, v] }.to_h
|
45
|
+
end
|
46
|
+
|
47
|
+
fill_up(feature)
|
48
|
+
end
|
49
|
+
|
50
|
+
def next
|
51
|
+
MecabNode.new self[:next]
|
52
|
+
end
|
53
|
+
|
54
|
+
def surface
|
55
|
+
self[:surface][0...self[:length]].force_encoding(Encoding.default_external)
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def fill_up(analysis)
|
61
|
+
if !analysis[:yomi] && analysis[:surface].match(/\p{katakana}+/)
|
62
|
+
analysis[:yomi] = analysis[:surface]
|
63
|
+
analysis[:pronunciation] = analysis[:surface]
|
64
|
+
end
|
65
|
+
analysis
|
66
|
+
end
|
67
|
+
|
68
|
+
def to_s
|
69
|
+
self[:surface]
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_ary
|
73
|
+
[self[:surface]]
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def method_missing(key)
|
79
|
+
self[key]
|
80
|
+
end
|
81
|
+
end
|
data/lib/nameko/version.rb
CHANGED
data/nameko.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["sa2taka@gmail.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{Ruby binding for Mecab.}
|
13
|
-
spec.description = %q{Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer.}
|
13
|
+
spec.description = %q{Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer."}
|
14
14
|
spec.homepage = "https://github.com/sa2taka/nameko"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nameko
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- sa2taka
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '1.9'
|
69
|
-
description: Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer.
|
69
|
+
description: Ruby binding for Mecab, Part-of-Speech and Morphological Analyzer."
|
70
70
|
email:
|
71
71
|
- sa2taka@gmail.com
|
72
72
|
executables: []
|
@@ -77,19 +77,16 @@ files:
|
|
77
77
|
- ".rspec"
|
78
78
|
- ".travis.yml"
|
79
79
|
- CODE_OF_CONDUCT.md
|
80
|
-
- 'File:'
|
81
80
|
- Gemfile
|
82
81
|
- Gemfile.lock
|
83
82
|
- LICENSE.txt
|
84
|
-
- 'Name:'
|
85
83
|
- README.md
|
86
84
|
- Rakefile
|
87
|
-
- Successfully
|
88
|
-
- 'Version:'
|
89
85
|
- bin/console
|
90
86
|
- bin/setup
|
91
87
|
- lib/nameko.rb
|
92
88
|
- lib/nameko/nameko.rb
|
89
|
+
- lib/nameko/node.rb
|
93
90
|
- lib/nameko/version.rb
|
94
91
|
- nameko.gemspec
|
95
92
|
homepage: https://github.com/sa2taka/nameko
|
data/File:
DELETED
File without changes
|
data/Name:
DELETED
File without changes
|
data/Successfully
DELETED
File without changes
|
data/Version:
DELETED
File without changes
|