light-mecab 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +22 -8
- data/lib/light-mecab.rb +2 -61
- data/lib/light-mecab/morpheme.rb +69 -0
- data/lib/light-mecab/version.rb +1 -1
- data/locale/morpheme.yml +7 -3
- data/test/test_morpheme.rb +54 -13
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06ae6b62245e3fb9df0bdd05bf95f41b0ef489e6
|
4
|
+
data.tar.gz: 86ed420fb61f9774d903101c7c6e8f90c90550a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f07bb8aa5563ec6bc1a0463efc5cd15fb6ef11a6cad34b7285ab77caf26a430d9a179c120fa5d0f6f6e3bced49856525627d239cfb0cfb2d6620da1e02735643
|
7
|
+
data.tar.gz: 09cdefcf4faa2c8836b39f5b97eb7cdb8e4e885b4115e310b160dcbed271f036d3e15c8baf6ec9970238013a8fce7188a0950f1ef3e2768c926e60efc9b445c4
|
data/README.md
CHANGED
@@ -4,7 +4,8 @@ LightMecabは,mecab-rubyをラッピングすることで,より簡単に形
|
|
4
4
|
|
5
5
|
## Environment
|
6
6
|
|
7
|
-
MeCabおよびmecab-rubyが動作する環境が必要.
|
7
|
+
MeCabおよびmecab-rubyが動作する環境が必要.
|
8
|
+
MeCabの辞書にはIPA辞書を用いる.
|
8
9
|
|
9
10
|
## Installation
|
10
11
|
|
@@ -23,20 +24,33 @@ Gemfileに以下を記述する.
|
|
23
24
|
$ gem install light-mecab
|
24
25
|
|
25
26
|
## Usage
|
26
|
-
例えば「太郎はこの本を二郎を見た女性に渡した。」という文に含まれる名詞は,
|
27
|
-
|
28
27
|
```ruby
|
29
28
|
require 'light-mecab'
|
29
|
+
|
30
30
|
sentence = '太郎はこの本を二郎を見た女性に渡した。'
|
31
|
-
LightMecab::Morpheme.new(sentence)
|
32
|
-
```
|
33
|
-
によって取得できる(返り値はString型の配列).
|
31
|
+
m = LightMecab::Morpheme.new(sentence)
|
34
32
|
|
35
|
-
|
33
|
+
# 品詞数の取得
|
34
|
+
m.num
|
35
|
+
# => 15
|
36
36
|
|
37
|
-
|
37
|
+
# 品詞分解
|
38
|
+
m.parse
|
39
|
+
# => [{"太郎"=>"名詞"}, {"は"=>"助詞"}, …, {"。"=>"記号"}]
|
38
40
|
|
41
|
+
# わかち書き
|
42
|
+
m.parse.map{|p| p.keys}.flatten.join(' ')
|
43
|
+
# => "太郎 は この 本 を 二 郎 を 見 た 女性 に 渡し た 。"
|
39
44
|
|
45
|
+
# 名詞一覧
|
46
|
+
m.noun
|
47
|
+
# => ["太郎", "本", "二", "郎", "女性"]
|
48
|
+
# その他の品詞を取得するメソッド名は locale/morpheme.yml を参照
|
49
|
+
|
50
|
+
# MeCab::Nodeオブジェクトを取得したい場合
|
51
|
+
LightMecab::Morpheme.analyze(sentence)
|
52
|
+
# => MeCab::Nodeの配列が返ってきます
|
53
|
+
```
|
40
54
|
|
41
55
|
## Contributing
|
42
56
|
|
data/lib/light-mecab.rb
CHANGED
@@ -1,63 +1,4 @@
|
|
1
|
-
require 'light-mecab/version'
|
2
1
|
require 'MeCab'
|
3
2
|
require 'yaml'
|
4
|
-
|
5
|
-
|
6
|
-
class Morpheme
|
7
|
-
@@tagger = ::MeCab::Tagger.new
|
8
|
-
@@i18n = ::YAML.load_file(File.expand_path(File.join(__FILE__, '..', '..', 'locale', 'morpheme.yml')))
|
9
|
-
|
10
|
-
class << self
|
11
|
-
# @param text [String]
|
12
|
-
# @return [Array <MeCab::Node>]
|
13
|
-
def analyze(text)
|
14
|
-
nodes = Array.new
|
15
|
-
node = @@tagger.parseToNode(text)
|
16
|
-
while node
|
17
|
-
nodes << node
|
18
|
-
node = node.next
|
19
|
-
end
|
20
|
-
nodes.shift
|
21
|
-
nodes.pop
|
22
|
-
nodes
|
23
|
-
end
|
24
|
-
|
25
|
-
# @return [Hash]
|
26
|
-
def i18n
|
27
|
-
@@i18n
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# @param text [String]
|
32
|
-
def initialize(text)
|
33
|
-
@nodes = self.class.analyze(text)
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [Integer]
|
37
|
-
def count
|
38
|
-
@nodes.size
|
39
|
-
end
|
40
|
-
|
41
|
-
# @param method_name [Symbol]
|
42
|
-
def method_missing(method_name)
|
43
|
-
if !self.class.i18n[method_name.to_s]
|
44
|
-
raise NoMethodError
|
45
|
-
end
|
46
|
-
extract(self.class.i18n[method_name.to_s])
|
47
|
-
end
|
48
|
-
|
49
|
-
private
|
50
|
-
|
51
|
-
# @param name [String]
|
52
|
-
# @return [Array <String>]
|
53
|
-
def extract(name)
|
54
|
-
morpheme = Array.new
|
55
|
-
@nodes.each do |node|
|
56
|
-
if name == node.feature.force_encoding('UTF-8').split(',').first
|
57
|
-
morpheme << node.surface.force_encoding('UTF-8')
|
58
|
-
end
|
59
|
-
end
|
60
|
-
morpheme
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
3
|
+
require 'light-mecab/morpheme'
|
4
|
+
require 'light-mecab/version'
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module LightMecab
|
2
|
+
class Morpheme
|
3
|
+
@@tagger = ::MeCab::Tagger.new
|
4
|
+
@@i18n = ::YAML.load_file(File.expand_path(File.join(__FILE__, '..', '..', '..', 'locale', 'morpheme.yml')))
|
5
|
+
|
6
|
+
class << self
|
7
|
+
# @param text [String]
|
8
|
+
# @return [Array <MeCab::Node>]
|
9
|
+
def analyze(text)
|
10
|
+
nodes = Array.new
|
11
|
+
node = @@tagger.parseToNode(text)
|
12
|
+
while node
|
13
|
+
nodes << node
|
14
|
+
node = node.next
|
15
|
+
end
|
16
|
+
nodes.shift
|
17
|
+
nodes.pop
|
18
|
+
nodes
|
19
|
+
end
|
20
|
+
|
21
|
+
# @param node [MeCab::Node]
|
22
|
+
# @return [String]
|
23
|
+
def pos(node)
|
24
|
+
node.surface.force_encoding('UTF-8')
|
25
|
+
end
|
26
|
+
|
27
|
+
# @param node [MeCab::Node]
|
28
|
+
# @return [String]
|
29
|
+
def pos_name(node)
|
30
|
+
node.feature.force_encoding('UTF-8').split(',').first
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Hash]
|
34
|
+
def i18n
|
35
|
+
@@i18n
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# @param text [String]
|
40
|
+
def initialize(text)
|
41
|
+
@nodes = self.class.analyze(text)
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [Integer]
|
45
|
+
def num
|
46
|
+
@nodes.size
|
47
|
+
end
|
48
|
+
|
49
|
+
# return [Array <Hash>]
|
50
|
+
def parse
|
51
|
+
@nodes.map {|node| {self.class.pos(node) => self.class.pos_name(node)}}
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param method_name [Symbol]
|
55
|
+
# @raise [NoMethodError]
|
56
|
+
def method_missing(method_name)
|
57
|
+
raise NoMethodError unless self.class.i18n[method_name.to_s]
|
58
|
+
extract(self.class.i18n[method_name.to_s])
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
# @param name [String]
|
64
|
+
# @return [Array <String>]
|
65
|
+
def extract(name)
|
66
|
+
@nodes.map {|node| self.class.pos(node) if name == self.class.pos_name(node)}.compact
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/light-mecab/version.rb
CHANGED
data/locale/morpheme.yml
CHANGED
@@ -1,10 +1,14 @@
|
|
1
|
+
ad_adjective: '連体詞'
|
2
|
+
prefix: '接頭詞'
|
1
3
|
noun: '名詞'
|
2
4
|
verb: '動詞'
|
3
|
-
aux_verb: '助動詞'
|
4
5
|
adjective: '形容詞'
|
5
6
|
adverb: '副詞'
|
6
7
|
conjunction: '接続詞'
|
7
|
-
interjection: '感動詞'
|
8
8
|
particle: '助詞'
|
9
|
-
|
9
|
+
aux_verb: '助動詞'
|
10
|
+
interjection: '感動詞'
|
10
11
|
symbol: '記号'
|
12
|
+
filler: 'フィラー'
|
13
|
+
other: 'その他'
|
14
|
+
unknown: '未知語'
|
data/test/test_morpheme.rb
CHANGED
@@ -10,8 +10,39 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
|
|
10
10
|
@morpheme = LightMecab::Morpheme.new(@sentence)
|
11
11
|
end
|
12
12
|
|
13
|
-
def
|
14
|
-
assert_equal(17, @morpheme.
|
13
|
+
def test_num
|
14
|
+
assert_equal(17, @morpheme.num)
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_parse()
|
18
|
+
parse_result = [
|
19
|
+
{"そして"=>"接続詞"},
|
20
|
+
{"太郎"=>"名詞"},
|
21
|
+
{"は"=>"助詞"},
|
22
|
+
{"この"=>"連体詞"},
|
23
|
+
{"本"=>"名詞"},
|
24
|
+
{"を"=>"助詞"},
|
25
|
+
{"二"=>"名詞"},
|
26
|
+
{"郎"=>"名詞"},
|
27
|
+
{"を"=>"助詞"},
|
28
|
+
{"見"=>"動詞"},
|
29
|
+
{"た"=>"助動詞"},
|
30
|
+
{"美しい"=>"形容詞"},
|
31
|
+
{"女性"=>"名詞"},
|
32
|
+
{"に"=>"助詞"},
|
33
|
+
{"渡し"=>"動詞"},
|
34
|
+
{"た"=>"助動詞"},
|
35
|
+
{"。"=>"記号"}
|
36
|
+
]
|
37
|
+
assert_equal(parse_result, @morpheme.parse)
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_method_missing_ad_adjective
|
41
|
+
assert_equal(['この'], @morpheme.ad_adjective)
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_method_missing_prefix
|
45
|
+
assert_equal([], @morpheme.prefix)
|
15
46
|
end
|
16
47
|
|
17
48
|
def test_method_missing_noun
|
@@ -22,10 +53,6 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
|
|
22
53
|
assert_equal(['見', '渡し'], @morpheme.verb)
|
23
54
|
end
|
24
55
|
|
25
|
-
def test_method_missing_aux_verb
|
26
|
-
assert_equal(['た', 'た'], @morpheme.aux_verb)
|
27
|
-
end
|
28
|
-
|
29
56
|
def test_method_missing_adjective
|
30
57
|
assert_equal(['美しい'], @morpheme.adjective)
|
31
58
|
end
|
@@ -38,23 +65,37 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
|
|
38
65
|
assert_equal(['そして'], @morpheme.conjunction)
|
39
66
|
end
|
40
67
|
|
41
|
-
def test_method_missing_interjection
|
42
|
-
assert_equal([], @morpheme.interjection)
|
43
|
-
end
|
44
|
-
|
45
68
|
def test_method_missing_particle
|
46
69
|
assert_equal(['は', 'を', 'を', 'に'], @morpheme.particle)
|
47
70
|
end
|
48
71
|
|
49
|
-
def
|
50
|
-
assert_equal(['
|
72
|
+
def test_method_missing_aux_verb
|
73
|
+
assert_equal(['た', 'た'], @morpheme.aux_verb)
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_method_missing_interjection
|
77
|
+
assert_equal([], @morpheme.interjection)
|
51
78
|
end
|
52
79
|
|
53
80
|
def test_method_missing_symbol
|
54
81
|
assert_equal(['。'], @morpheme.symbol)
|
55
82
|
end
|
56
83
|
|
84
|
+
def test_method_missing_filler
|
85
|
+
assert_equal([], @morpheme.filler)
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_method_missing_other
|
89
|
+
assert_equal([], @morpheme.other)
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_method_missing_unknown
|
93
|
+
assert_equal([], @morpheme.unknown)
|
94
|
+
end
|
95
|
+
|
57
96
|
def test_method_missing_hoge
|
58
|
-
assert_raise
|
97
|
+
assert_raise(NoMethodError) do
|
98
|
+
@morpheme.hogei
|
99
|
+
end
|
59
100
|
end
|
60
101
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: light-mecab
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kei Tsuchiya
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Execute morphological analysis by MeCab
|
14
14
|
email:
|
@@ -23,6 +23,7 @@ files:
|
|
23
23
|
- README.md
|
24
24
|
- Rakefile
|
25
25
|
- lib/light-mecab.rb
|
26
|
+
- lib/light-mecab/morpheme.rb
|
26
27
|
- lib/light-mecab/version.rb
|
27
28
|
- light-mecab.gemspec
|
28
29
|
- locale/morpheme.yml
|