light-mecab 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +22 -8
- data/lib/light-mecab.rb +2 -61
- data/lib/light-mecab/morpheme.rb +69 -0
- data/lib/light-mecab/version.rb +1 -1
- data/locale/morpheme.yml +7 -3
- data/test/test_morpheme.rb +54 -13
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06ae6b62245e3fb9df0bdd05bf95f41b0ef489e6
|
4
|
+
data.tar.gz: 86ed420fb61f9774d903101c7c6e8f90c90550a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f07bb8aa5563ec6bc1a0463efc5cd15fb6ef11a6cad34b7285ab77caf26a430d9a179c120fa5d0f6f6e3bced49856525627d239cfb0cfb2d6620da1e02735643
|
7
|
+
data.tar.gz: 09cdefcf4faa2c8836b39f5b97eb7cdb8e4e885b4115e310b160dcbed271f036d3e15c8baf6ec9970238013a8fce7188a0950f1ef3e2768c926e60efc9b445c4
|
data/README.md
CHANGED
@@ -4,7 +4,8 @@ LightMecabは,mecab-rubyをラッピングすることで,より簡単に形
|
|
4
4
|
|
5
5
|
## Environment
|
6
6
|
|
7
|
-
MeCabおよびmecab-rubyが動作する環境が必要.
|
7
|
+
MeCabおよびmecab-rubyが動作する環境が必要.
|
8
|
+
MeCabの辞書にはIPA辞書を用いる.
|
8
9
|
|
9
10
|
## Installation
|
10
11
|
|
@@ -23,20 +24,33 @@ Gemfileに以下を記述する.
|
|
23
24
|
$ gem install light-mecab
|
24
25
|
|
25
26
|
## Usage
|
26
|
-
例えば「太郎はこの本を二郎を見た女性に渡した。」という文に含まれる名詞は,
|
27
|
-
|
28
27
|
```ruby
|
29
28
|
require 'light-mecab'
|
29
|
+
|
30
30
|
sentence = '太郎はこの本を二郎を見た女性に渡した。'
|
31
|
-
LightMecab::Morpheme.new(sentence)
|
32
|
-
```
|
33
|
-
によって取得できる(返り値はString型の配列).
|
31
|
+
m = LightMecab::Morpheme.new(sentence)
|
34
32
|
|
35
|
-
|
33
|
+
# 品詞数の取得
|
34
|
+
m.num
|
35
|
+
# => 15
|
36
36
|
|
37
|
-
|
37
|
+
# 品詞分解
|
38
|
+
m.parse
|
39
|
+
# => [{"太郎"=>"名詞"}, {"は"=>"助詞"}, …, {"。"=>"記号"}]
|
38
40
|
|
41
|
+
# わかち書き
|
42
|
+
m.parse.map{|p| p.keys}.flatten.join(' ')
|
43
|
+
# => "太郎 は この 本 を 二 郎 を 見 た 女性 に 渡し た 。"
|
39
44
|
|
45
|
+
# 名詞一覧
|
46
|
+
m.noun
|
47
|
+
# => ["太郎", "本", "二", "郎", "女性"]
|
48
|
+
# その他の品詞を取得するメソッド名は locale/morpheme.yml を参照
|
49
|
+
|
50
|
+
# MeCab::Nodeオブジェクトを取得したい場合
|
51
|
+
LightMecab::Morpheme.analyze(sentence)
|
52
|
+
# => MeCab::Nodeの配列が返ってきます
|
53
|
+
```
|
40
54
|
|
41
55
|
## Contributing
|
42
56
|
|
data/lib/light-mecab.rb
CHANGED
@@ -1,63 +1,4 @@
|
|
1
|
-
require 'light-mecab/version'
|
2
1
|
require 'MeCab'
|
3
2
|
require 'yaml'
|
4
|
-
|
5
|
-
|
6
|
-
class Morpheme
|
7
|
-
@@tagger = ::MeCab::Tagger.new
|
8
|
-
@@i18n = ::YAML.load_file(File.expand_path(File.join(__FILE__, '..', '..', 'locale', 'morpheme.yml')))
|
9
|
-
|
10
|
-
class << self
|
11
|
-
# @param text [String]
|
12
|
-
# @return [Array <MeCab::Node>]
|
13
|
-
def analyze(text)
|
14
|
-
nodes = Array.new
|
15
|
-
node = @@tagger.parseToNode(text)
|
16
|
-
while node
|
17
|
-
nodes << node
|
18
|
-
node = node.next
|
19
|
-
end
|
20
|
-
nodes.shift
|
21
|
-
nodes.pop
|
22
|
-
nodes
|
23
|
-
end
|
24
|
-
|
25
|
-
# @return [Hash]
|
26
|
-
def i18n
|
27
|
-
@@i18n
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# @param text [String]
|
32
|
-
def initialize(text)
|
33
|
-
@nodes = self.class.analyze(text)
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [Integer]
|
37
|
-
def count
|
38
|
-
@nodes.size
|
39
|
-
end
|
40
|
-
|
41
|
-
# @param method_name [Symbol]
|
42
|
-
def method_missing(method_name)
|
43
|
-
if !self.class.i18n[method_name.to_s]
|
44
|
-
raise NoMethodError
|
45
|
-
end
|
46
|
-
extract(self.class.i18n[method_name.to_s])
|
47
|
-
end
|
48
|
-
|
49
|
-
private
|
50
|
-
|
51
|
-
# @param name [String]
|
52
|
-
# @return [Array <String>]
|
53
|
-
def extract(name)
|
54
|
-
morpheme = Array.new
|
55
|
-
@nodes.each do |node|
|
56
|
-
if name == node.feature.force_encoding('UTF-8').split(',').first
|
57
|
-
morpheme << node.surface.force_encoding('UTF-8')
|
58
|
-
end
|
59
|
-
end
|
60
|
-
morpheme
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
3
|
+
require 'light-mecab/morpheme'
|
4
|
+
require 'light-mecab/version'
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module LightMecab
|
2
|
+
class Morpheme
|
3
|
+
@@tagger = ::MeCab::Tagger.new
|
4
|
+
@@i18n = ::YAML.load_file(File.expand_path(File.join(__FILE__, '..', '..', '..', 'locale', 'morpheme.yml')))
|
5
|
+
|
6
|
+
class << self
|
7
|
+
# @param text [String]
|
8
|
+
# @return [Array <MeCab::Node>]
|
9
|
+
def analyze(text)
|
10
|
+
nodes = Array.new
|
11
|
+
node = @@tagger.parseToNode(text)
|
12
|
+
while node
|
13
|
+
nodes << node
|
14
|
+
node = node.next
|
15
|
+
end
|
16
|
+
nodes.shift
|
17
|
+
nodes.pop
|
18
|
+
nodes
|
19
|
+
end
|
20
|
+
|
21
|
+
# @param node [MeCab::Node]
|
22
|
+
# @return [String]
|
23
|
+
def pos(node)
|
24
|
+
node.surface.force_encoding('UTF-8')
|
25
|
+
end
|
26
|
+
|
27
|
+
# @param node [MeCab::Node]
|
28
|
+
# @return [String]
|
29
|
+
def pos_name(node)
|
30
|
+
node.feature.force_encoding('UTF-8').split(',').first
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Hash]
|
34
|
+
def i18n
|
35
|
+
@@i18n
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# @param text [String]
|
40
|
+
def initialize(text)
|
41
|
+
@nodes = self.class.analyze(text)
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [Integer]
|
45
|
+
def num
|
46
|
+
@nodes.size
|
47
|
+
end
|
48
|
+
|
49
|
+
# return [Array <Hash>]
|
50
|
+
def parse
|
51
|
+
@nodes.map {|node| {self.class.pos(node) => self.class.pos_name(node)}}
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param method_name [Symbol]
|
55
|
+
# @raise [NoMethodError]
|
56
|
+
def method_missing(method_name)
|
57
|
+
raise NoMethodError unless self.class.i18n[method_name.to_s]
|
58
|
+
extract(self.class.i18n[method_name.to_s])
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
# @param name [String]
|
64
|
+
# @return [Array <String>]
|
65
|
+
def extract(name)
|
66
|
+
@nodes.map {|node| self.class.pos(node) if name == self.class.pos_name(node)}.compact
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/light-mecab/version.rb
CHANGED
data/locale/morpheme.yml
CHANGED
@@ -1,10 +1,14 @@
|
|
1
|
+
ad_adjective: '連体詞'
|
2
|
+
prefix: '接頭詞'
|
1
3
|
noun: '名詞'
|
2
4
|
verb: '動詞'
|
3
|
-
aux_verb: '助動詞'
|
4
5
|
adjective: '形容詞'
|
5
6
|
adverb: '副詞'
|
6
7
|
conjunction: '接続詞'
|
7
|
-
interjection: '感動詞'
|
8
8
|
particle: '助詞'
|
9
|
-
|
9
|
+
aux_verb: '助動詞'
|
10
|
+
interjection: '感動詞'
|
10
11
|
symbol: '記号'
|
12
|
+
filler: 'フィラー'
|
13
|
+
other: 'その他'
|
14
|
+
unknown: '未知語'
|
data/test/test_morpheme.rb
CHANGED
@@ -10,8 +10,39 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
|
|
10
10
|
@morpheme = LightMecab::Morpheme.new(@sentence)
|
11
11
|
end
|
12
12
|
|
13
|
-
def
|
14
|
-
assert_equal(17, @morpheme.
|
13
|
+
def test_num
|
14
|
+
assert_equal(17, @morpheme.num)
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_parse()
|
18
|
+
parse_result = [
|
19
|
+
{"そして"=>"接続詞"},
|
20
|
+
{"太郎"=>"名詞"},
|
21
|
+
{"は"=>"助詞"},
|
22
|
+
{"この"=>"連体詞"},
|
23
|
+
{"本"=>"名詞"},
|
24
|
+
{"を"=>"助詞"},
|
25
|
+
{"二"=>"名詞"},
|
26
|
+
{"郎"=>"名詞"},
|
27
|
+
{"を"=>"助詞"},
|
28
|
+
{"見"=>"動詞"},
|
29
|
+
{"た"=>"助動詞"},
|
30
|
+
{"美しい"=>"形容詞"},
|
31
|
+
{"女性"=>"名詞"},
|
32
|
+
{"に"=>"助詞"},
|
33
|
+
{"渡し"=>"動詞"},
|
34
|
+
{"た"=>"助動詞"},
|
35
|
+
{"。"=>"記号"}
|
36
|
+
]
|
37
|
+
assert_equal(parse_result, @morpheme.parse)
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_method_missing_ad_adjective
|
41
|
+
assert_equal(['この'], @morpheme.ad_adjective)
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_method_missing_prefix
|
45
|
+
assert_equal([], @morpheme.prefix)
|
15
46
|
end
|
16
47
|
|
17
48
|
def test_method_missing_noun
|
@@ -22,10 +53,6 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
|
|
22
53
|
assert_equal(['見', '渡し'], @morpheme.verb)
|
23
54
|
end
|
24
55
|
|
25
|
-
def test_method_missing_aux_verb
|
26
|
-
assert_equal(['た', 'た'], @morpheme.aux_verb)
|
27
|
-
end
|
28
|
-
|
29
56
|
def test_method_missing_adjective
|
30
57
|
assert_equal(['美しい'], @morpheme.adjective)
|
31
58
|
end
|
@@ -38,23 +65,37 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
|
|
38
65
|
assert_equal(['そして'], @morpheme.conjunction)
|
39
66
|
end
|
40
67
|
|
41
|
-
def test_method_missing_interjection
|
42
|
-
assert_equal([], @morpheme.interjection)
|
43
|
-
end
|
44
|
-
|
45
68
|
def test_method_missing_particle
|
46
69
|
assert_equal(['は', 'を', 'を', 'に'], @morpheme.particle)
|
47
70
|
end
|
48
71
|
|
49
|
-
def
|
50
|
-
assert_equal(['
|
72
|
+
def test_method_missing_aux_verb
|
73
|
+
assert_equal(['た', 'た'], @morpheme.aux_verb)
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_method_missing_interjection
|
77
|
+
assert_equal([], @morpheme.interjection)
|
51
78
|
end
|
52
79
|
|
53
80
|
def test_method_missing_symbol
|
54
81
|
assert_equal(['。'], @morpheme.symbol)
|
55
82
|
end
|
56
83
|
|
84
|
+
def test_method_missing_filler
|
85
|
+
assert_equal([], @morpheme.filler)
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_method_missing_other
|
89
|
+
assert_equal([], @morpheme.other)
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_method_missing_unknown
|
93
|
+
assert_equal([], @morpheme.unknown)
|
94
|
+
end
|
95
|
+
|
57
96
|
def test_method_missing_hoge
|
58
|
-
assert_raise
|
97
|
+
assert_raise(NoMethodError) do
|
98
|
+
@morpheme.hogei
|
99
|
+
end
|
59
100
|
end
|
60
101
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: light-mecab
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kei Tsuchiya
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Execute morphological analysis by MeCab
|
14
14
|
email:
|
@@ -23,6 +23,7 @@ files:
|
|
23
23
|
- README.md
|
24
24
|
- Rakefile
|
25
25
|
- lib/light-mecab.rb
|
26
|
+
- lib/light-mecab/morpheme.rb
|
26
27
|
- lib/light-mecab/version.rb
|
27
28
|
- light-mecab.gemspec
|
28
29
|
- locale/morpheme.yml
|