langue-japanese 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/Gemfile +1 -0
  2. data/lib/langue/japanese/inflector.rb +44 -0
  3. data/lib/langue/japanese/inflector/default.rb +454 -0
  4. data/lib/langue/japanese/inflector/inflection.rb +59 -0
  5. data/lib/langue/japanese/inflector/inflections.rb +56 -0
  6. data/lib/langue/japanese/language.rb +9 -0
  7. data/lib/langue/japanese/parser.rb +24 -7
  8. data/lib/langue/japanese/shaper.rb +2 -2
  9. data/lib/langue/japanese/structurer.rb +6 -1
  10. data/lib/langue/japanese/version.rb +1 -1
  11. data/lib/langue/japanese/words/adjectival_noun.rb +67 -0
  12. data/lib/langue/japanese/words/adjective.rb +37 -33
  13. data/lib/langue/japanese/words/adverb.rb +24 -0
  14. data/lib/langue/japanese/words/attribute.rb +52 -28
  15. data/lib/langue/japanese/words/classifier.rb +37 -5
  16. data/lib/langue/japanese/words/conjunction.rb +18 -0
  17. data/lib/langue/japanese/words/determiner.rb +24 -0
  18. data/lib/langue/japanese/words/interjection.rb +18 -0
  19. data/lib/langue/japanese/words/morpheme_filter.rb +40 -17
  20. data/lib/langue/japanese/words/noun.rb +50 -43
  21. data/lib/langue/japanese/words/particle.rb +24 -0
  22. data/lib/langue/japanese/words/period.rb +26 -26
  23. data/lib/langue/japanese/words/prefix.rb +13 -5
  24. data/lib/langue/japanese/words/pronoun.rb +5 -7
  25. data/lib/langue/japanese/words/verb.rb +59 -64
  26. data/spec/langue/japanese/data.yaml +53 -5
  27. data/spec/langue/japanese/inflector/inflection_spec.rb +80 -0
  28. data/spec/langue/japanese/inflector/inflections_spec.rb +83 -0
  29. data/spec/langue/japanese/inflector_spec.rb +1551 -0
  30. data/spec/langue/japanese/language_spec.rb +36 -0
  31. data/spec/langue/japanese/parser_spec.rb +100 -28
  32. data/spec/langue/japanese/structurer_spec.rb +8 -2
  33. data/spec/langue/japanese/words/{adjective_noun_spec.rb → adjectival_noun_spec.rb} +18 -12
  34. data/spec/langue/japanese/words/adjective_spec.rb +15 -0
  35. data/spec/langue/japanese/words/adverb_spec.rb +25 -0
  36. data/spec/langue/japanese/words/conjunction_spec.rb +25 -0
  37. data/spec/langue/japanese/words/determiner_spec.rb +25 -0
  38. data/spec/langue/japanese/words/interjection_spec.rb +25 -0
  39. data/spec/langue/japanese/words/noun_spec.rb +19 -0
  40. data/spec/langue/japanese/words/particle_spec.rb +26 -0
  41. data/spec/langue/japanese/words/period_spec.rb +6 -0
  42. data/spec/langue/japanese/words/pronoun_spec.rb +6 -0
  43. data/spec/langue/japanese/words/verb_spec.rb +79 -48
  44. data/spec/spec_helper.rb +23 -3
  45. metadata +22 -5
  46. data/lib/langue/japanese/words/adjective_noun.rb +0 -76
@@ -1,15 +1,13 @@
1
- require 'langue/japanese/words/noun'
1
+ require 'langue/word'
2
2
  require 'langue/japanese/words/classifier'
3
3
 
4
4
  module Langue
5
5
  module Japanese
6
- class Pronoun < Noun
7
- class << self
8
- include Classifier
6
+ class Pronoun < ::Langue::Pronoun
7
+ extend Classifier
9
8
 
10
- def take(morphemes, index)
11
- pronoun?(morphemes, index) ? 1 : 0
12
- end
9
+ def self.take(morphemes, index)
10
+ pronoun?(morphemes, index) ? 1 : 0
13
11
  end
14
12
  end
15
13
  end
@@ -5,67 +5,76 @@ require 'langue/japanese/words/classifier'
5
5
 
6
6
  module Langue
7
7
  module Japanese
8
- class Verb < Word
8
+ class Verb < ::Langue::Verb
9
9
  include Prefix
10
- include Attribute
11
-
12
- has :progressive, :passive, :aggressive, :negative, :perfective, :imperative
13
10
 
14
- class << self
15
- include Classifier
11
+ def extract_prefix_morphemes
12
+ size = 0
16
13
 
17
- def take(morphemes, index)
18
- if first_verb?(morphemes, index)
19
- take_verb(morphemes, index)
20
- elsif verb_prefix?(morphemes, index)
21
- take_verb_with_prefix(morphemes, index)
22
- elsif first_noun?(morphemes, index)
23
- take_noun_conjunct_to_suru(morphemes, index)
24
- elsif noun_prefix?(morphemes, index)
25
- take_noun_with_prefix_conjunct_to_suru(morphemes, index)
26
- else
27
- 0
28
- end
14
+ if self.class.verb_prefix?(morphemes, size)
15
+ size += 1 while self.class.verb_prefix?(morphemes, size)
16
+ elsif self.class.noun_prefix?(morphemes, size)
17
+ size += 1 while self.class.noun_prefix?(morphemes, size)
29
18
  end
30
19
 
31
- def take_verb(morphemes, index)
32
- return 0 unless first_verb?(morphemes, index)
33
- take_following_verb(morphemes, index)
34
- end
20
+ morphemes[0, size]
21
+ end
35
22
 
36
- def take_verb_with_prefix(morphemes, index)
37
- size = 0
38
- size += 1 while verb_prefix?(morphemes, index + size)
39
- return 0 unless size > 0
40
- next_size = take_verb(morphemes, index + size)
41
- next_size > 0 ? size + next_size : 0
42
- end
23
+ include Attribute
43
24
 
44
- def take_noun_conjunct_to_suru(morphemes, index)
45
- size = 0
46
- size += 1 while following_noun?(morphemes, index + size)
47
- return 0 unless size > 0
48
- return 0 unless noun_conjunct_to_suru?(morphemes, index + size - 1)
49
- return 0 unless suru_verb?(morphemes, index + size)
50
- size + take_following_verb(morphemes, index + size)
51
- end
25
+ has :progressive, :passive, :causative, :aggressive, :negative, :perfective, :imperative
26
+
27
+ extend Classifier
52
28
 
53
- def take_noun_with_prefix_conjunct_to_suru(morphemes, index)
54
- size = 0
55
- size += 1 while noun_prefix?(morphemes, index + size)
56
- return 0 unless size > 0
57
- next_size = take_noun_conjunct_to_suru(morphemes, index + size)
58
- next_size > 0 ? size + next_size : 0
29
+ def self.take(morphemes, index)
30
+ if first_verb?(morphemes, index)
31
+ take_verb(morphemes, index)
32
+ elsif verb_prefix?(morphemes, index)
33
+ take_verb_with_prefix(morphemes, index)
34
+ elsif first_noun?(morphemes, index)
35
+ take_noun_conjunct_to_suru(morphemes, index)
36
+ elsif noun_prefix?(morphemes, index)
37
+ take_noun_with_prefix_conjunct_to_suru(morphemes, index)
38
+ else
39
+ 0
59
40
  end
41
+ end
60
42
 
61
- private
43
+ def self.take_verb(morphemes, index)
44
+ return 0 unless first_verb?(morphemes, index)
45
+ take_following_verb(morphemes, index)
46
+ end
62
47
 
63
- def take_following_verb(morphemes, index)
64
- size = 1
65
- size += 1 while following_verb?(morphemes, index + size) || conjunctive_particle?(morphemes, index + size) && following_verb?(morphemes, index + size + 1)
66
- size += 1 while auxiliary_verb?(morphemes, index + size)
67
- size
68
- end
48
+ def self.take_verb_with_prefix(morphemes, index)
49
+ size = 0
50
+ size += 1 while verb_prefix?(morphemes, index + size)
51
+ return 0 unless size > 0
52
+ next_size = take_verb(morphemes, index + size)
53
+ next_size > 0 ? size + next_size : 0
54
+ end
55
+
56
+ def self.take_noun_conjunct_to_suru(morphemes, index)
57
+ size = 0
58
+ size += 1 while following_noun?(morphemes, index + size)
59
+ return 0 unless size > 0
60
+ return 0 unless noun_conjunct_to_suru?(morphemes, index + size - 1)
61
+ return 0 unless suru_verb?(morphemes, index + size)
62
+ size + take_following_verb(morphemes, index + size)
63
+ end
64
+
65
+ def self.take_noun_with_prefix_conjunct_to_suru(morphemes, index)
66
+ size = 0
67
+ size += 1 while noun_prefix?(morphemes, index + size)
68
+ return 0 unless size > 0
69
+ next_size = take_noun_conjunct_to_suru(morphemes, index + size)
70
+ next_size > 0 ? size + next_size : 0
71
+ end
72
+
73
+ def self.take_following_verb(morphemes, index)
74
+ size = 1
75
+ size += 1 while following_verb?(morphemes, index + size)
76
+ size += 1 while final_particle?(morphemes, index + size)
77
+ size
69
78
  end
70
79
 
71
80
  def key_morpheme
@@ -81,20 +90,6 @@ module Langue
81
90
 
82
91
  @key_morpheme
83
92
  end
84
-
85
- def prefix_morphemes
86
- @prefix_morphemes ||= begin
87
- size = 0
88
-
89
- if self.class.verb_prefix?(morphemes, size)
90
- size += 1 while self.class.verb_prefix?(morphemes, size)
91
- elsif self.class.noun_prefix?(morphemes, size)
92
- size += 1 while self.class.noun_prefix?(morphemes, size)
93
- end
94
-
95
- morphemes[0, size]
96
- end
97
- end
98
93
  end
99
94
  end
100
95
  end
@@ -3,9 +3,12 @@
3
3
  sentences:
4
4
  -
5
5
  - [今日, Noun]
6
+ - [は, Particle]
6
7
  - [妹, Noun]
8
+ - [と, Particle]
7
9
  - [一緒, Noun]
8
- - - お買い物してきた
10
+ - [に, Particle]
11
+ - - お買い物してきたよ
9
12
  - Verb
10
13
  - body: 買い物してくる
11
14
  prefix: お
@@ -22,24 +25,39 @@
22
25
  sentences:
23
26
  -
24
27
  - [例, Noun]
28
+ - [の, Particle]
25
29
  - [女子会, Noun]
30
+ - [が, Particle]
26
31
  - [延期, Noun]
32
+ - [に, Particle]
33
+ - [なって, Verb]
34
+ - [ちょっとホッ, Adverb]
35
+ - [と, Particle]
36
+ - [している, Verb]
27
37
 
28
38
  -
29
39
  text: こんなに部屋が寒くてはインターネットもままならぬ
30
40
  sentences:
31
41
  -
42
+ - [こんなに, Adverb]
32
43
  - [部屋, Noun]
44
+ - [が, Particle]
33
45
  - - 寒く
34
46
  - Adjective
35
47
  - body: 寒い
48
+ - [は, Particle]
36
49
  - [インターネット, Noun]
50
+ - [も, Particle]
51
+ - [まま, Adverb]
52
+ - [ならぬ, Verb]
37
53
 
38
54
  -
39
55
  text: 牛乳って意外と甘い?
40
56
  sentences:
41
57
  -
42
58
  - [牛乳, Noun]
59
+ - [って, Particle]
60
+ - [意外と, Adverb]
43
61
  - - 甘い
44
62
  - Adjective
45
63
  - body: 甘い
@@ -49,7 +67,9 @@
49
67
  text: むしろ精神的疾患とは……
50
68
  sentences:
51
69
  -
70
+ - [むしろ, Adverb]
52
71
  - [精神的疾患, Noun]
72
+ - [とは, Particle]
53
73
  - [……, Period]
54
74
 
55
75
  -
@@ -58,12 +78,16 @@
58
78
  -
59
79
  - [情報系大学生, Noun]
60
80
  - [卒業, Noun]
81
+ - [までに, Particle]
61
82
  - [応用情報レベル, Noun]
83
+ - [の, Particle]
62
84
  - [知識, Noun]
85
+ - [を, Particle]
63
86
  - - 習っている
64
87
  - Verb
65
88
  - body: 習う
66
89
  attributes: [progressive]
90
+ - [と, Particle]
67
91
  - [., Period]
68
92
 
69
93
  -
@@ -71,12 +95,16 @@
71
95
  sentences:
72
96
  -
73
97
  - [プログラマー, Noun]
98
+ - [の, Particle]
74
99
  - [資格, Noun]
100
+ - [を, Particle]
75
101
  - - 持つ
76
102
  - Verb
77
103
  - body: 持つ
78
104
  - [息子, Noun]
105
+ - [が, Particle]
79
106
  - [面接, Noun]
107
+ - [に, Particle]
80
108
  - [全て, Noun]
81
109
  - - 落ちました
82
110
  - Verb
@@ -85,40 +113,52 @@
85
113
  - [。, Period]
86
114
  -
87
115
  - [理系, Noun]
116
+ - [でも, Particle]
88
117
  - [旧帝, Noun]
89
- - [ダメ, AdjectiveNoun]
118
+ - [じゃ, Particle]
119
+ - [ダメ, AdjectivalNoun]
120
+ - [か, Particle]
90
121
  - [?, Period]
91
122
 
92
123
  -
93
124
  text: ちゃんと病院いこうね…。はい…。
94
125
  sentences:
95
126
  -
127
+ - [ちゃんと, Adverb]
96
128
  - [病院, Noun]
97
- - - いこう
129
+ - - いこうね
98
130
  - Verb
99
131
  - body: いく
100
132
  - […。, Period]
101
133
  -
134
+ - [はい, Interjection]
102
135
  - […。, Period]
103
136
 
104
137
  -
105
138
  text: んー、お昼がスープパスタだけとか。。。あふんあふん。。。後で絶対お腹すくなー。でも今あんま食べる気しにゃい。。。
106
139
  sentences:
107
140
  -
141
+ - [ん, Particle]
108
142
  - [お昼, Noun]
143
+ - [が, Particle]
109
144
  - [スープパスタ, Noun]
145
+ - [だけとか, Particle]
110
146
  - [。。。, Period]
111
147
  -
112
148
  - [あふん, Verb]
149
+ - [ふん, Interjection]
113
150
  - [。。。, Period]
114
151
  -
152
+ - [後で, Adverb]
115
153
  - [絶対, Noun]
116
154
  - [お腹, Noun]
117
155
  - - すく
118
156
  - Adjective
119
157
  - body: すい
158
+ - [なー, Particle]
120
159
  - [。, Period]
121
160
  -
161
+ - [でも, Conjunction]
122
162
  - [今あんま, Noun]
123
163
  - - 食べる
124
164
  - Verb
@@ -130,19 +170,24 @@
130
170
  text: iPhone 4Sにただで替えられるって聞いたけど、本当に使うかは疑問なので……
131
171
  sentences:
132
172
  -
133
- - [iphone4s, Noun]
173
+ - [iPhone4S, Noun]
174
+ - [に, Particle]
134
175
  - [ただ, Noun]
176
+ - [で, Particle]
135
177
  - - 替えられる
136
178
  - Verb
137
179
  - body: 替える
138
180
  attributes: [passive]
181
+ - [って, Particle]
139
182
  - - 聞いた
140
183
  - Verb
141
184
  - body: 聞く
142
185
  attributes: [perfective]
143
- - - 使う
186
+ - [本当に, Adverb]
187
+ - - 使うか
144
188
  - Verb
145
189
  - body: 使う
190
+ - [は, Particle]
146
191
  - [疑問, Noun]
147
192
  - [……, Period]
148
193
 
@@ -151,6 +196,8 @@
151
196
  sentences:
152
197
  -
153
198
  - [俺, Pronoun]
199
+ - [の, Particle]
200
+ - [に, Particle]
154
201
  - - ご奉仕しろ
155
202
  - Verb
156
203
  - body: 奉仕する
@@ -163,6 +210,7 @@
163
210
  sentences:
164
211
  -
165
212
  - [風, Noun]
213
+ - [と共に, Particle]
166
214
  - - 去らぬ
167
215
  - Verb
168
216
  - body: 去る
@@ -0,0 +1,80 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'langue/japanese/inflector/inflection'
3
+
4
+ describe Langue::Japanese::Inflector::Inflection, '#inflect' do
5
+ before do
6
+ suffixes = {
7
+ 'form' => 'form_suffix',
8
+ 'proc' => lambda { |options| options[:suffix] },
9
+ '連用タ接続' => 'ta_suffix',
10
+ '連用テ接続' => 'te_suffix'
11
+ }
12
+
13
+ @inflection = described_class.new 'suffix', suffixes
14
+ end
15
+
16
+ it 'inflects the word to the inflectional form' do
17
+ word = @inflection.inflect('body-suffix', 'form')
18
+ word.should == 'body-form_suffix'
19
+ end
20
+
21
+ it 'adds value returning from calling of its Proc as suffix to the word if the inflectional form is an instance of Proc' do
22
+ word = @inflection.inflect('body-suffix', 'proc', :suffix => 'proc_suffix')
23
+ word.should == 'body-proc_suffix'
24
+ end
25
+
26
+ it 'raises ArgumentError if the word does not end with the base suffix' do
27
+ lambda { @inflection.inflect('body-suffi', 'form') }.should raise_error(ArgumentError, 'the word does not end with "suffix"')
28
+ end
29
+
30
+ it 'raises ArgumentError if the inflectional form does not defined' do
31
+ lambda { @inflection.inflect('body-suffix', 'form1') }.should raise_error(ArgumentError, '"form1" inflectional form does not defined in the inflection')
32
+ end
33
+
34
+ context 'with :following option' do
35
+ it 'adds value of :following option to the word' do
36
+ word = @inflection.inflect('body-suffix', 'form', :following => '-following')
37
+ word.should == 'body-form_suffix-following'
38
+ end
39
+
40
+ it 'converts to "タ行" from "ダ行" in first character of the following word if the inflectional form is "連用タ接続" or "連用テ接続"' do
41
+ {
42
+ '連用タ接続' => 'ta_suffix',
43
+ '連用テ接続' => 'te_suffix'
44
+ }.each do |form, suffix|
45
+ {
46
+ 'だ' => 'た',
47
+ 'じ' => 'ち',
48
+ 'ぢ' => 'ち',
49
+ 'で' => 'て',
50
+ 'ど' => 'と'
51
+ }.each do |following, converted_following|
52
+ word = @inflection.inflect('body-suffix', form, :following => following)
53
+ word.should == "body-#{suffix}#{converted_following}"
54
+ end
55
+ end
56
+ end
57
+
58
+ it 'converts to "ダ行" from "タ行" in first character of the following word if the inflectional form is "連用タ接続" or "連用テ接続"' do
59
+ inflection = described_class.new('suffix', {
60
+ '連用タ接続' => ['ta_suffix', true],
61
+ '連用テ接続' => ['te_suffix', true]
62
+ })
63
+
64
+ {
65
+ '連用タ接続' => 'ta_suffix',
66
+ '連用テ接続' => 'te_suffix'
67
+ }.each do |form, suffix|
68
+ {
69
+ 'た' => 'だ',
70
+ 'ち' => 'じ',
71
+ 'て' => 'で',
72
+ 'と' => 'ど'
73
+ }.each do |following, converted_following|
74
+ word = inflection.inflect('body-suffix', form, :following => following)
75
+ word.should == "body-#{suffix}#{converted_following}"
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,83 @@
1
+ require 'langue/japanese/inflector/inflections'
2
+
3
+ describe Langue::Japanese::Inflector::Inflections, '#inflection' do
4
+ before do
5
+ @inflections = described_class.new
6
+ end
7
+
8
+ it 'calls Langue::Japanese::Inflector::Inflection.new with the base suffix and the suffixes' do
9
+ Langue::Japanese::Inflector::Inflection.should_receive(:new).with('suffix', 'form' => 'suffix')
10
+ @inflections.inflection('classification', 'suffix', 'form' => 'suffix')
11
+ end
12
+
13
+ it 'defines the inflection' do
14
+ @inflections.inflection('classification', 'suffix', 'form' => 'suffix')
15
+ @inflections['classification'].should be_a(Langue::Japanese::Inflector::Inflection)
16
+ end
17
+
18
+ context 'in call #category' do
19
+ it 'does not raise ArgumentError if defined the inflectional forms in just proportion' do
20
+ lambda {
21
+ @inflections.category 'form' do
22
+ inflection 'classification', 'suffix', 'form' => 'suffix'
23
+ end
24
+ }.should_not raise_error(ArgumentError)
25
+ end
26
+
27
+ it 'raises ArgumentError if the inflectional forms is excess' do
28
+ lambda {
29
+ @inflections.category *%w(form1 form2 form3) do
30
+ inflection 'classification', 'suffix'
31
+ end
32
+ }.should raise_error(ArgumentError, 'form1, form2 and form3 has not been defined')
33
+ end
34
+
35
+ it 'raises ArgumentError if the inflectional forms is inadequate' do
36
+ lambda {
37
+ @inflections.category 'form' do
38
+ inflection 'classification', 'suffix', {
39
+ 'form' => 'suffix',
40
+ 'form1' => 'suffix1'
41
+ }
42
+ end
43
+ }.should raise_error(ArgumentError, 'form1 should not be defined')
44
+ end
45
+
46
+ it 'raises ArgumentError if the inflectional forms is excess and inadequate' do
47
+ lambda {
48
+ @inflections.category *%w(form1 form2) do
49
+ inflection 'classification', 'suffix', {
50
+ 'form3' => 'suffix3',
51
+ 'form4' => 'suffix4'
52
+ }
53
+ end
54
+ }.should raise_error(ArgumentError, 'form1 and form2 has not been defined, and form3 and form4 should not be defined')
55
+ end
56
+ end
57
+ end
58
+
59
+ describe Langue::Japanese::Inflector::Inflections, '#category' do
60
+ before do
61
+ @inflections = described_class.new
62
+ end
63
+
64
+ it 'calls the block in scope of the instance' do
65
+ matcher = equal(@inflections)
66
+ @inflections.category { should matcher }
67
+ end
68
+ end
69
+
70
+ describe Langue::Japanese::Inflector::Inflections, '#categorizing?' do
71
+ before do
72
+ @inflections = described_class.new
73
+ end
74
+
75
+ it 'returns false after initializing' do
76
+ @inflections.should_not be_categorizing
77
+ end
78
+
79
+ it 'returns true in call #category' do
80
+ matcher = be_categorizing
81
+ @inflections.category { should matcher }
82
+ end
83
+ end