light-mecab 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 68e3e72d4f025c81ade1963d6d0d556ceb91e2ad
4
- data.tar.gz: f2d313ced0c67a085dcc1a4a33c0b500b4225f23
3
+ metadata.gz: 06ae6b62245e3fb9df0bdd05bf95f41b0ef489e6
4
+ data.tar.gz: 86ed420fb61f9774d903101c7c6e8f90c90550a0
5
5
  SHA512:
6
- metadata.gz: a1abeafdabb72ba2056735ff8d7473802c04ce8be5b81b2a9efebb54605d4b5d47e59fb61e326819ee0802a692d547f12916832816b51bcd1f44aab83332a173
7
- data.tar.gz: eaba3a9e21dc751204e59e3bdf16243ae333e50a94354bf3a1c04d2f165ae04c957ae1ffcaa8e84a1d1fb0503be66513d4d4f2a16ca6378c1d1bfe2119e26745
6
+ metadata.gz: f07bb8aa5563ec6bc1a0463efc5cd15fb6ef11a6cad34b7285ab77caf26a430d9a179c120fa5d0f6f6e3bced49856525627d239cfb0cfb2d6620da1e02735643
7
+ data.tar.gz: 09cdefcf4faa2c8836b39f5b97eb7cdb8e4e885b4115e310b160dcbed271f036d3e15c8baf6ec9970238013a8fce7188a0950f1ef3e2768c926e60efc9b445c4
data/README.md CHANGED
@@ -4,7 +4,8 @@ LightMecabは,mecab-rubyをラッピングすることで,より簡単に形
4
4
 
5
5
  ## Environment
6
6
 
7
- MeCabおよびmecab-rubyが動作する環境が必要.
7
+ MeCabおよびmecab-rubyが動作する環境が必要.
8
+ MeCabの辞書にはIPA辞書を用いる.
8
9
 
9
10
  ## Installation
10
11
 
@@ -23,20 +24,33 @@ Gemfileに以下を記述する.
23
24
  $ gem install light-mecab
24
25
 
25
26
  ## Usage
26
- 例えば「太郎はこの本を二郎を見た女性に渡した。」という文に含まれる名詞は,
27
-
28
27
  ```ruby
29
28
  require 'light-mecab'
29
+
30
30
  sentence = '太郎はこの本を二郎を見た女性に渡した。'
31
- LightMecab::Morpheme.new(sentence).noun
32
- ```
33
- によって取得できる(返り値はString型の配列).
31
+ m = LightMecab::Morpheme.new(sentence)
34
32
 
35
- その他の品詞(形態素)を取得するメソッド名は,以下を参照.
33
+ # 品詞数の取得
34
+ m.num
35
+ # => 15
36
36
 
37
- lib/locale/morpheme.yml
37
+ # 品詞分解
38
+ m.parse
39
+ # => [{"太郎"=>"名詞"}, {"は"=>"助詞"}, …, {"。"=>"記号"}]
38
40
 
41
+ # わかち書き
42
+ m.parse.map{|p| p.keys}.flatten.join(' ')
43
+ # => "太郎 は この 本 を 二 郎 を 見 た 女性 に 渡し た 。"
39
44
 
45
+ # 名詞一覧
46
+ m.noun
47
+ # => ["太郎", "本", "二", "郎", "女性"]
48
+ # その他の品詞を取得するメソッド名は locale/morpheme.yml を参照
49
+
50
+ # MeCab::Nodeオブジェクトを取得したい場合
51
+ LightMecab::Morpheme.analyze(sentence)
52
+ # => MeCab::Nodeの配列が返ってきます
53
+ ```
40
54
 
41
55
  ## Contributing
42
56
 
@@ -1,63 +1,4 @@
1
- require 'light-mecab/version'
2
1
  require 'MeCab'
3
2
  require 'yaml'
4
-
5
- module LightMecab
6
- class Morpheme
7
- @@tagger = ::MeCab::Tagger.new
8
- @@i18n = ::YAML.load_file(File.expand_path(File.join(__FILE__, '..', '..', 'locale', 'morpheme.yml')))
9
-
10
- class << self
11
- # @param text [String]
12
- # @return [Array <MeCab::Node>]
13
- def analyze(text)
14
- nodes = Array.new
15
- node = @@tagger.parseToNode(text)
16
- while node
17
- nodes << node
18
- node = node.next
19
- end
20
- nodes.shift
21
- nodes.pop
22
- nodes
23
- end
24
-
25
- # @return [Hash]
26
- def i18n
27
- @@i18n
28
- end
29
- end
30
-
31
- # @param text [String]
32
- def initialize(text)
33
- @nodes = self.class.analyze(text)
34
- end
35
-
36
- # @return [Integer]
37
- def count
38
- @nodes.size
39
- end
40
-
41
- # @param method_name [Symbol]
42
- def method_missing(method_name)
43
- if !self.class.i18n[method_name.to_s]
44
- raise NoMethodError
45
- end
46
- extract(self.class.i18n[method_name.to_s])
47
- end
48
-
49
- private
50
-
51
- # @param name [String]
52
- # @return [Array <String>]
53
- def extract(name)
54
- morpheme = Array.new
55
- @nodes.each do |node|
56
- if name == node.feature.force_encoding('UTF-8').split(',').first
57
- morpheme << node.surface.force_encoding('UTF-8')
58
- end
59
- end
60
- morpheme
61
- end
62
- end
63
- end
3
+ require 'light-mecab/morpheme'
4
+ require 'light-mecab/version'
@@ -0,0 +1,69 @@
1
+ module LightMecab
2
+ class Morpheme
3
+ @@tagger = ::MeCab::Tagger.new
4
+ @@i18n = ::YAML.load_file(File.expand_path(File.join(__FILE__, '..', '..', '..', 'locale', 'morpheme.yml')))
5
+
6
+ class << self
7
+ # @param text [String]
8
+ # @return [Array <MeCab::Node>]
9
+ def analyze(text)
10
+ nodes = Array.new
11
+ node = @@tagger.parseToNode(text)
12
+ while node
13
+ nodes << node
14
+ node = node.next
15
+ end
16
+ nodes.shift
17
+ nodes.pop
18
+ nodes
19
+ end
20
+
21
+ # @param node [MeCab::Node]
22
+ # @return [String]
23
+ def pos(node)
24
+ node.surface.force_encoding('UTF-8')
25
+ end
26
+
27
+ # @param node [MeCab::Node]
28
+ # @return [String]
29
+ def pos_name(node)
30
+ node.feature.force_encoding('UTF-8').split(',').first
31
+ end
32
+
33
+ # @return [Hash]
34
+ def i18n
35
+ @@i18n
36
+ end
37
+ end
38
+
39
+ # @param text [String]
40
+ def initialize(text)
41
+ @nodes = self.class.analyze(text)
42
+ end
43
+
44
+ # @return [Integer]
45
+ def num
46
+ @nodes.size
47
+ end
48
+
49
+ # return [Array <Hash>]
50
+ def parse
51
+ @nodes.map {|node| {self.class.pos(node) => self.class.pos_name(node)}}
52
+ end
53
+
54
+ # @param method_name [Symbol]
55
+ # @raise [NoMethodError]
56
+ def method_missing(method_name)
57
+ raise NoMethodError unless self.class.i18n[method_name.to_s]
58
+ extract(self.class.i18n[method_name.to_s])
59
+ end
60
+
61
+ private
62
+
63
+ # @param name [String]
64
+ # @return [Array <String>]
65
+ def extract(name)
66
+ @nodes.map {|node| self.class.pos(node) if name == self.class.pos_name(node)}.compact
67
+ end
68
+ end
69
+ end
@@ -1,3 +1,3 @@
1
1
  module LightMecab
2
- VERSION = '0.0.4'
2
+ VERSION = '0.0.5'
3
3
  end
@@ -1,10 +1,14 @@
1
+ ad_adjective: '連体詞'
2
+ prefix: '接頭詞'
1
3
  noun: '名詞'
2
4
  verb: '動詞'
3
- aux_verb: '助動詞'
4
5
  adjective: '形容詞'
5
6
  adverb: '副詞'
6
7
  conjunction: '接続詞'
7
- interjection: '感動詞'
8
8
  particle: '助詞'
9
- ad_adjective: '連体詞'
9
+ aux_verb: '助動詞'
10
+ interjection: '感動詞'
10
11
  symbol: '記号'
12
+ filler: 'フィラー'
13
+ other: 'その他'
14
+ unknown: '未知語'
@@ -10,8 +10,39 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
10
10
  @morpheme = LightMecab::Morpheme.new(@sentence)
11
11
  end
12
12
 
13
- def test_count
14
- assert_equal(17, @morpheme.count)
13
+ def test_num
14
+ assert_equal(17, @morpheme.num)
15
+ end
16
+
17
+ def test_parse()
18
+ parse_result = [
19
+ {"そして"=>"接続詞"},
20
+ {"太郎"=>"名詞"},
21
+ {"は"=>"助詞"},
22
+ {"この"=>"連体詞"},
23
+ {"本"=>"名詞"},
24
+ {"を"=>"助詞"},
25
+ {"二"=>"名詞"},
26
+ {"郎"=>"名詞"},
27
+ {"を"=>"助詞"},
28
+ {"見"=>"動詞"},
29
+ {"た"=>"助動詞"},
30
+ {"美しい"=>"形容詞"},
31
+ {"女性"=>"名詞"},
32
+ {"に"=>"助詞"},
33
+ {"渡し"=>"動詞"},
34
+ {"た"=>"助動詞"},
35
+ {"。"=>"記号"}
36
+ ]
37
+ assert_equal(parse_result, @morpheme.parse)
38
+ end
39
+
40
+ def test_method_missing_ad_adjective
41
+ assert_equal(['この'], @morpheme.ad_adjective)
42
+ end
43
+
44
+ def test_method_missing_prefix
45
+ assert_equal([], @morpheme.prefix)
15
46
  end
16
47
 
17
48
  def test_method_missing_noun
@@ -22,10 +53,6 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
22
53
  assert_equal(['見', '渡し'], @morpheme.verb)
23
54
  end
24
55
 
25
- def test_method_missing_aux_verb
26
- assert_equal(['た', 'た'], @morpheme.aux_verb)
27
- end
28
-
29
56
  def test_method_missing_adjective
30
57
  assert_equal(['美しい'], @morpheme.adjective)
31
58
  end
@@ -38,23 +65,37 @@ class TC_LightMecab_Morpheme < Test::Unit::TestCase
38
65
  assert_equal(['そして'], @morpheme.conjunction)
39
66
  end
40
67
 
41
- def test_method_missing_interjection
42
- assert_equal([], @morpheme.interjection)
43
- end
44
-
45
68
  def test_method_missing_particle
46
69
  assert_equal(['は', 'を', 'を', 'に'], @morpheme.particle)
47
70
  end
48
71
 
49
- def test_method_missing_ad_adjective
50
- assert_equal(['この'], @morpheme.ad_adjective)
72
+ def test_method_missing_aux_verb
73
+ assert_equal(['', 'た'], @morpheme.aux_verb)
74
+ end
75
+
76
+ def test_method_missing_interjection
77
+ assert_equal([], @morpheme.interjection)
51
78
  end
52
79
 
53
80
  def test_method_missing_symbol
54
81
  assert_equal(['。'], @morpheme.symbol)
55
82
  end
56
83
 
84
+ def test_method_missing_filler
85
+ assert_equal([], @morpheme.filler)
86
+ end
87
+
88
+ def test_method_missing_other
89
+ assert_equal([], @morpheme.other)
90
+ end
91
+
92
+ def test_method_missing_unknown
93
+ assert_equal([], @morpheme.unknown)
94
+ end
95
+
57
96
  def test_method_missing_hoge
58
- assert_raise{@morpheme.hoge}
97
+ assert_raise(NoMethodError) do
98
+ @morpheme.hogei
99
+ end
59
100
  end
60
101
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: light-mecab
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kei Tsuchiya
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-10-24 00:00:00.000000000 Z
11
+ date: 2013-11-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Execute morphological analysis by MeCab
14
14
  email:
@@ -23,6 +23,7 @@ files:
23
23
  - README.md
24
24
  - Rakefile
25
25
  - lib/light-mecab.rb
26
+ - lib/light-mecab/morpheme.rb
26
27
  - lib/light-mecab/version.rb
27
28
  - light-mecab.gemspec
28
29
  - locale/morpheme.yml