light-mecab 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 68e3e72d4f025c81ade1963d6d0d556ceb91e2ad
4
- data.tar.gz: f2d313ced0c67a085dcc1a4a33c0b500b4225f23
3
+ metadata.gz: 06ae6b62245e3fb9df0bdd05bf95f41b0ef489e6
4
+ data.tar.gz: 86ed420fb61f9774d903101c7c6e8f90c90550a0
5
5
  SHA512:
6
- metadata.gz: a1abeafdabb72ba2056735ff8d7473802c04ce8be5b81b2a9efebb54605d4b5d47e59fb61e326819ee0802a692d547f12916832816b51bcd1f44aab83332a173
7
- data.tar.gz: eaba3a9e21dc751204e59e3bdf16243ae333e50a94354bf3a1c04d2f165ae04c957ae1ffcaa8e84a1d1fb0503be66513d4d4f2a16ca6378c1d1bfe2119e26745
6
+ metadata.gz: f07bb8aa5563ec6bc1a0463efc5cd15fb6ef11a6cad34b7285ab77caf26a430d9a179c120fa5d0f6f6e3bced49856525627d239cfb0cfb2d6620da1e02735643
7
+ data.tar.gz: 09cdefcf4faa2c8836b39f5b97eb7cdb8e4e885b4115e310b160dcbed271f036d3e15c8baf6ec9970238013a8fce7188a0950f1ef3e2768c926e60efc9b445c4
data/README.md CHANGED
@@ -4,7 +4,8 @@ LightMecabは,mecab-rubyをラッピングすることで,より簡単に形
4
4
 
5
5
  ## Environment
6
6
 
7
- MeCabおよびmecab-rubyが動作する環境が必要.
7
+ MeCabおよびmecab-rubyが動作する環境が必要.
8
+ MeCabの辞書にはIPA辞書を用いる.
8
9
 
9
10
  ## Installation
10
11
 
@@ -23,20 +24,33 @@ Gemfileに以下を記述する.
23
24
  $ gem install light-mecab
24
25
 
25
26
  ## Usage
26
- 例えば「太郎はこの本を二郎を見た女性に渡した。」という文に含まれる名詞は,
27
-
28
27
  ```ruby
29
28
  require 'light-mecab'
29
+
30
30
  sentence = '太郎はこの本を二郎を見た女性に渡した。'
31
- LightMecab::Morpheme.new(sentence).noun
32
- ```
33
- によって取得できる(返り値はString型の配列).
31
+ m = LightMecab::Morpheme.new(sentence)
34
32
 
35
- その他の品詞(形態素)を取得するメソッド名は,以下を参照.
33
+ # 品詞数の取得
34
+ m.num
35
+ # => 15
36
36
 
37
- lib/locale/morpheme.yml
37
+ # 品詞分解
38
+ m.parse
39
+ # => [{"太郎"=>"名詞"}, {"は"=>"助詞"}, …, {"。"=>"記号"}]
38
40
 
41
+ # わかち書き
42
+ m.parse.map{|p| p.keys}.flatten.join(' ')
43
+ # => "太郎 は この 本 を 二 郎 を 見 た 女性 に 渡し た 。"
39
44
 
45
+ # 名詞一覧
46
+ m.noun
47
+ # => ["太郎", "本", "二", "郎", "女性"]
48
+ # その他の品詞を取得するメソッド名は locale/morpheme.yml を参照
49
+
50
+ # MeCab::Nodeオブジェクトを取得したい場合
51
+ LightMecab::Morpheme.analyze(sentence)
52
+ # => MeCab::Nodeの配列が返ってきます
53
+ ```
40
54
 
41
55
  ## Contributing
42
56
 
@@ -1,63 +1,4 @@
1
- require 'light-mecab/version'
2
1
  require 'MeCab'
3
2
  require 'yaml'
4
-
5
- module LightMecab
6
- class Morpheme
7
- @@tagger = ::MeCab::Tagger.new
8
- @@i18n = ::YAML.load_file(File.expand_path(File.join(__FILE__, '..', '..', 'locale', 'morpheme.yml')))
9
-
10
- class << self
11
- # @param text [String]
12
- # @return [Array <MeCab::Node>]
13
- def analyze(text)
14
- nodes = Array.new
15
- node = @@tagger.parseToNode(text)
16
- while node
17
- nodes << node
18
- node = node.next
19
- end
20
- nodes.shift
21
- nodes.pop
22
- nodes
23
- end
24
-
25
- # @return [Hash]
26
- def i18n
27
- @@i18n
28
- end
29
- end
30
-
31
- # @param text [String]
32
- def initialize(text)
33
- @nodes = self.class.analyze(text)
34
- end
35
-
36
- # @return [Integer]
37
- def count
38
- @nodes.size
39
- end
40
-
41
- # @param method_name [Symbol]
42
- def method_missing(method_name)
43
- if !self.class.i18n[method_name.to_s]
44
- raise NoMethodError
45
- end
46
- extract(self.class.i18n[method_name.to_s])
47
- end
48
-
49
- private
50
-
51
- # @param name [String]
52
- # @return [Array <String>]
53
- def extract(name)
54
- morpheme = Array.new
55
- @nodes.each do |node|
56
- if name == node.feature.force_encoding('UTF-8').split(',').first
57
- morpheme << node.surface.force_encoding('UTF-8')
58
- end
59
- end
60
- morpheme
61
- end
62
- end
63
- end
3
+ require 'light-mecab/morpheme'
4
+ require 'light-mecab/version'
@@ -0,0 +1,69 @@
1
+ module LightMecab
2
+ class Morpheme
3
+ @@tagger = ::MeCab::Tagger.new
4
+ @@i18n = ::YAML.load_file(File.expand_path(File.join(__FILE__, '..', '..', '..', 'locale', 'morpheme.yml')))
5
+
6
+ class << self
7
+ # @param text [String]
8
+ # @return [Array <MeCab::Node>]
9
+ def analyze(text)
10
+ nodes = Array.new
11
+ node = @@tagger.parseToNode(text)
12
+ while node
13
+ nodes << node
14
+ node = node.next
15
+ end
16
+ nodes.shift
17
+ nodes.pop
18
+ nodes
19
+ end
20
+
21
+ # @param node [MeCab::Node]
22
+ # @return [String]
23
+ def pos(node)
24
+ node.surface.force_encoding('UTF-8')
25
+ end
26
+
27
+ # @param node [MeCab::Node]
28
+ # @return [String]
29
+ def pos_name(node)
30
+ node.feature.force_encoding('UTF-8').split(',').first
31
+ end
32
+
33
+ # @return [Hash]
34
+ def i18n
35
+ @@i18n
36
+ end
37
+ end
38
+
39
+ # @param text [String]
40
+ def initialize(text)
41
+ @nodes = self.class.analyze(text)
42
+ end
43
+
44
+ # @return [Integer]
45
+ def num
46
+ @nodes.size
47
+ end
48
+
49
+ # return [Array <Hash>]
50
+ def parse
51
+ @nodes.map {|node| {self.class.pos(node) => self.class.pos_name(node)}}
52
+ end
53
+
54
+ # @param method_name [Symbol]
55
+ # @raise [NoMethodError]
56
+ def method_missing(method_name)
57
+ raise NoMethodError unless self.class.i18n[method_name.to_s]
58
+ extract(self.class.i18n[method_name.to_s])
59
+ end
60
+
61
+ private
62
+
63
+ # @param name [String]
64
+ # @return [Array <String>]
65
+ def extract(name)
66
+ @nodes.map {|node| self.class.pos(node) if name == self.class.pos_name(node)}.compact
67
+ end
68
+ end
69
+ end
@@ -1,3 +1,3 @@
1
1
  module LightMecab
2
- VERSION = '0.0.4'
2
+ VERSION = '0.0.5'
3
3
  end
@@ -1,10 +1,14 @@
1
+ ad_adjective: '連体詞'
2
+ prefix: '接頭詞'
1
3
  noun: '名詞'
2
4
  verb: '動詞'
3
- aux_verb: '助動詞'
4
5
  adjective: '形容詞'
5
6
  adverb: '副詞'
6
7
  conjunction: '接続詞'
7
- interjection: '感動詞'
8
8
  particle: '助詞'
9
- ad_adjective: '連体詞'
9
+ aux_verb: '助動詞'
10
+ interjection: '感動詞'
10
11
  symbol: '記号'
12
+ filler: 'フィラー'
13
+ other: 'その他'
14
+ unknown: '未知語'
@@ -10,8 +10,39 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
10
10
  @morpheme = LightMecab::Morpheme.new(@sentence)
11
11
  end
12
12
 
13
- def test_count
14
- assert_equal(17, @morpheme.count)
13
+ def test_num
14
+ assert_equal(17, @morpheme.num)
15
+ end
16
+
17
+ def test_parse()
18
+ parse_result = [
19
+ {"そして"=>"接続詞"},
20
+ {"太郎"=>"名詞"},
21
+ {"は"=>"助詞"},
22
+ {"この"=>"連体詞"},
23
+ {"本"=>"名詞"},
24
+ {"を"=>"助詞"},
25
+ {"二"=>"名詞"},
26
+ {"郎"=>"名詞"},
27
+ {"を"=>"助詞"},
28
+ {"見"=>"動詞"},
29
+ {"た"=>"助動詞"},
30
+ {"美しい"=>"形容詞"},
31
+ {"女性"=>"名詞"},
32
+ {"に"=>"助詞"},
33
+ {"渡し"=>"動詞"},
34
+ {"た"=>"助動詞"},
35
+ {"。"=>"記号"}
36
+ ]
37
+ assert_equal(parse_result, @morpheme.parse)
38
+ end
39
+
40
+ def test_method_missing_ad_adjective
41
+ assert_equal(['この'], @morpheme.ad_adjective)
42
+ end
43
+
44
+ def test_method_missing_prefix
45
+ assert_equal([], @morpheme.prefix)
15
46
  end
16
47
 
17
48
  def test_method_missing_noun
@@ -22,10 +53,6 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
22
53
  assert_equal(['見', '渡し'], @morpheme.verb)
23
54
  end
24
55
 
25
- def test_method_missing_aux_verb
26
- assert_equal(['た', 'た'], @morpheme.aux_verb)
27
- end
28
-
29
56
  def test_method_missing_adjective
30
57
  assert_equal(['美しい'], @morpheme.adjective)
31
58
  end
@@ -38,23 +65,37 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
38
65
  assert_equal(['そして'], @morpheme.conjunction)
39
66
  end
40
67
 
41
- def test_method_missing_interjection
42
- assert_equal([], @morpheme.interjection)
43
- end
44
-
45
68
  def test_method_missing_particle
46
69
  assert_equal(['は', 'を', 'を', 'に'], @morpheme.particle)
47
70
  end
48
71
 
49
- def test_method_missing_ad_adjective
50
- assert_equal(['この'], @morpheme.ad_adjective)
72
+ def test_method_missing_aux_verb
73
+ assert_equal(['', 'た'], @morpheme.aux_verb)
74
+ end
75
+
76
+ def test_method_missing_interjection
77
+ assert_equal([], @morpheme.interjection)
51
78
  end
52
79
 
53
80
  def test_method_missing_symbol
54
81
  assert_equal(['。'], @morpheme.symbol)
55
82
  end
56
83
 
84
+ def test_method_missing_filler
85
+ assert_equal([], @morpheme.filler)
86
+ end
87
+
88
+ def test_method_missing_other
89
+ assert_equal([], @morpheme.other)
90
+ end
91
+
92
+ def test_method_missing_unknown
93
+ assert_equal([], @morpheme.unknown)
94
+ end
95
+
57
96
  def test_method_missing_hoge
58
- assert_raise{@morpheme.hoge}
97
+ assert_raise(NoMethodError) do
98
+ @morpheme.hogei
99
+ end
59
100
  end
60
101
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: light-mecab
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kei Tsuchiya
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-10-24 00:00:00.000000000 Z
11
+ date: 2013-11-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Execute morphological analysis by MeCab
14
14
  email:
@@ -23,6 +23,7 @@ files:
23
23
  - README.md
24
24
  - Rakefile
25
25
  - lib/light-mecab.rb
26
+ - lib/light-mecab/morpheme.rb
26
27
  - lib/light-mecab/version.rb
27
28
  - light-mecab.gemspec
28
29
  - locale/morpheme.yml