japanese_deinflector 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,20 +13,21 @@ def parse(fpath)
13
13
  # Rules are tab-separated in the following format:
14
14
  # <from>\t<to>\t<type>\t<reason_index>
15
15
  else
16
- from = parts.first
17
- rules_hash[from.size] ||= []
18
- rules_hash[from.size] << {
19
- :from => from,
20
- :to => parts[1],
21
- :reason_id => parts[3].to_i
16
+ from_suffix = parts.first
17
+ reason_id = parts[3].to_i
18
+ rules_hash[from_suffix.size] ||= []
19
+ rules_hash[from_suffix.size] << {
20
+ :from_suffix => from_suffix,
21
+ :to_suffix => parts[1],
22
+ :reason => reasons[reason_id],
22
23
  }
23
24
  end
24
25
  end
25
-
26
- {:reasons => reasons, :rules => rules_hash}
26
+ rules_hash
27
27
  end
28
28
 
29
29
  root = File.expand_path(File.dirname(__FILE__))
30
30
  File.open(File.join(root, 'data/deinflect.json'), 'w') do |f|
31
- f.write(parse(File.join(root, 'data/deinflect.dat')).to_json)
31
+ parsed_dat = parse(File.join(root, 'data/deinflect.dat'))
32
+ f.write(JSON.pretty_generate(parsed_dat))
32
33
  end
@@ -1,41 +1,65 @@
1
1
  #encoding: utf-8
2
+ require 'rubygems'
2
3
  require 'json'
3
4
  require "japanese_deinflector/version"
4
5
 
5
6
  class JapaneseDeinflector
6
7
  def initialize
7
8
  File.open(File.join(File.expand_path(File.dirname(__FILE__)), 'data/deinflect.json')) do |f|
8
- rules_and_reasons = JSON.parse(f.read, :symbolize_names => true)
9
- @reasons = rules_and_reasons[:reasons]
10
- # Convert hash keys to integers
11
- @rules = {}
12
- # Convert hash keys from something like :"9" -> 9
13
- rules_and_reasons[:rules].each do |size, rules|
14
- @rules[size.to_s.to_i] = rules
9
+ rule_groups = JSON.parse(f.read, :symbolize_names => true)
10
+ # Convert hash keys to integers (from something like :"9" -> 9)
11
+ @rule_groups = {}
12
+ rule_groups.each do |suffix_size, rules|
13
+ @rule_groups[suffix_size.to_s.to_i] = rules
15
14
  end
16
15
  end
17
16
  end
18
17
 
19
- def deinflect(word)
20
- possibilities = []
21
- rules_less_than_size(word.size).each do |size, rules|
22
- ending = word[-size..-1]
23
- rules.each do |rule|
24
- next unless ending == rule[:from]
25
- deinflected_word = "#{word[0..-size-1]}#{rule[:to]}"
26
- next if possibilities.include?(deinflected_word)
27
- # Weight is between 0 and 1, 1 being a higher chance of actual deinflection
28
- weight = (Float(size) / word.size).round(3)
29
- reason = @reasons[rule[:reason_id]]
30
- possibilities << {:weight => weight, :word => deinflected_word, :reason => reason}
18
+ def deinflect(word, iteration = 0)
19
+ results = Set.new
20
+
21
+ filter_rule_groups(@rule_groups, :max_suffix_size => word.size).each do |suffix_size, rules|
22
+ from_suffix = word[-suffix_size..-1]
23
+ filter_rules(rules, :from_suffix => from_suffix).each do |rule|
24
+ results << result_hash(word, from_suffix, rule[:to_suffix], rule[:reason])
25
+ end
26
+ end
27
+
28
+ if iteration < 2
29
+ additional_results = Set.new
30
+ results.each do |result|
31
+ additional_results.merge(deinflect(result[:word], iteration + 1))
31
32
  end
33
+ results.merge(additional_results)
32
34
  end
33
- possibilities
35
+
36
+ # Sort results in descending order by weight
37
+ results.to_a.sort{|x, y| y[:weight] <=> x[:weight]}
34
38
  end
35
39
 
36
40
  private
37
41
 
38
- def rules_less_than_size(max_size)
39
- @rules.clone.keep_if{|size, rules| size < max_size}
42
+ def result_hash(word, from_suffix, to_suffix, reason = "")
43
+ # Append new suffix to the original word to get the deinflection
44
+ deinflected_word = "#{word[0..-from_suffix.size-1]}#{to_suffix}"
45
+ # Weight is between 0 and 1, 1 being a higher chance of correct deinflection
46
+ weight = (Float(from_suffix.size) / word.size).round(3)
47
+ {
48
+ :word => deinflected_word,
49
+ :weight => weight,
50
+ :reason => reason,
51
+ }
52
+ end
53
+
54
+ def filter_rule_groups(groups, filters = {})
55
+ groups.clone.delete_if do |suffix_size, rules|
56
+ suffix_size >= filters[:max_suffix_size] if filters[:max_suffix_size]
57
+ end
58
+ end
59
+
60
+ def filter_rules(rules, filters = {})
61
+ rules.clone.delete_if.each do |rule|
62
+ rule[:from_suffix] != filters[:from_suffix] if filters[:from_suffix]
63
+ end
40
64
  end
41
65
  end
@@ -1,3 +1,3 @@
1
1
  class JapaneseDeinflector
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -4,67 +4,162 @@ require 'spec_helper'
4
4
  describe JapaneseDeinflector do
5
5
  subject{ JapaneseDeinflector.new }
6
6
 
7
- it "deinflects plain positive verbs" do
8
- # progressive tense
9
- #subject.deinflect("見ている").first[:word].should == "見る"
10
- #subject.deinflect("歌っている").first[:word].should == "歌う"
11
-
12
- # past tense
13
- subject.deinflect("見た").first[:word].should == "見る"
14
- subject.deinflect("歌った").first[:word].should == "歌う"
15
- end
7
+ describe "verbs" do
8
+ context "in present / attibutive tense" do
16
9
 
17
- it "deinflects polite positive verbs" do
18
- # present tense
19
- subject.deinflect("見ます").first[:word].should == "見る"
20
- subject.deinflect("歌います").first[:word].should == "歌う"
10
+ it "does not deinflect plain positive (non-inflected) verbs" do
11
+ subject.deinflect("見る").should be_empty
12
+ subject.deinflect("歌う").should be_empty
13
+ end
21
14
 
22
- # progressive tense
23
- #subject.deinflect("見ています").first[:word].should == "見る"
24
- #subject.deinflect("歌っています").first[:word].should == "歌う"
15
+ it "deinflects polite positive verbs" do
16
+ subject.deinflect("見ます").first[:word].should == "見る"
17
+ subject.deinflect("歌います").first[:word].should == "歌う"
18
+ end
25
19
 
26
- # past tense
27
- subject.deinflect("見ました").first[:word].should == "見る"
28
- subject.deinflect("歌いました").first[:word].should == "歌う"
29
- end
20
+ it "deinflects plain negative verbs" do
21
+ subject.deinflect("見ない").first[:word].should == "見る"
22
+ subject.deinflect("歌わない").first[:word].should == "歌う"
23
+ end
30
24
 
31
- it "deinflects plain negative verbs" do
32
- # present tense
33
- subject.deinflect("見ない").first[:word].should == "見る"
34
- subject.deinflect("歌わない").first[:word].should == "歌う"
25
+ it "deinflects polite negative formal verbs" do
26
+ subject.deinflect("見ません").first[:word].should == "見る"
27
+ subject.deinflect("歌いません").first[:word].should == "歌う"
28
+ end
35
29
 
36
- # progressive tense
37
- #subject.deinflect("見ていない").first[:word].should == "見る"
38
- #subject.deinflect("歌っていない").first[:word].should == "歌う"
30
+ end
39
31
 
40
- # past tense
41
- #subject.deinflect("見なかった").first[:word].should == "見る"
42
- #subject.deinflect("歌わなかった").first[:word].should == "歌う"
43
- end
32
+ context "in progressive (ongoing) tense" do
44
33
 
45
- it "deinflects polite negative formal verbs" do
46
- # present tense
47
- subject.deinflect("見ません").first[:word].should == "見る"
48
- subject.deinflect("歌いません").first[:word].should == "歌う"
34
+ it "deinflects plain positive verbs" do
35
+ pending "unimplemented"
49
36
 
50
- # progressive tense
51
- #subject.deinflect("見ていません").first[:word].should == "見る"
52
- #subject.deinflect("歌っていません").first[:word].should == "歌う"
37
+ subject.deinflect("見ている").first[:word].should == "見る"
38
+ subject.deinflect("歌っている").first[:word].should == "歌う"
39
+ end
53
40
 
54
- # past tense
55
- subject.deinflect("見ませんでした").first[:word].should == "見る"
56
- subject.deinflect("歌いませんでした").first[:word].should == "歌う"
57
- end
41
+ it "deinflects polite positive verbs" do
42
+ pending "unimplemented"
43
+
44
+ subject.deinflect("見ています").first[:word].should == "見る"
45
+ subject.deinflect("歌っています").first[:word].should == "歌う"
46
+ end
47
+
48
+ it "deinflects plain negative verbs" do
49
+ pending "unimplemented"
50
+
51
+ subject.deinflect("見ていない").first[:word].should == "見る"
52
+ subject.deinflect("歌っていない").first[:word].should == "歌う"
53
+ end
54
+
55
+ it "deinflects polite negative formal verbs" do
56
+ pending "unimplemented"
57
+
58
+ subject.deinflect("見ていません").first[:word].should == "見る"
59
+ subject.deinflect("歌っていません").first[:word].should == "歌う"
60
+ end
61
+
62
+ end
63
+
64
+ context "in past / present-perfect tense" do
65
+
66
+ it "deinflects plain positive verbs" do
67
+ subject.deinflect("見た").first[:word].should == "見る"
68
+ subject.deinflect("歌った").first[:word].should == "歌う"
69
+ end
70
+
71
+ it "deinflects polite positive verbs" do
72
+ subject.deinflect("見ました").first[:word].should == "見る"
73
+ subject.deinflect("歌いました").first[:word].should == "歌う"
74
+ end
75
+
76
+ it "deinflects plain negative verbs" do
77
+ subject.deinflect("見なかった").first[:word].should == "見る"
78
+ subject.deinflect("歌わなかった").first[:word].should == "歌う"
79
+ end
80
+
81
+ it "deinflects polite negative formal verbs" do
82
+ subject.deinflect("見ませんでした").first[:word].should == "見る"
83
+ subject.deinflect("歌いませんでした").first[:word].should == "歌う"
84
+ end
85
+
86
+ end
87
+
88
+ context "presumptive verbs" do
58
89
 
59
- it "deinflects polite negative formal adjectives" do
60
- # present tense
61
- subject.deinflect("嬉しくありません").first[:word].should == "嬉しい"
90
+ it "deinflects plain verbs" do
91
+ subject.deinflect("見よう").first[:word].should == "見る"
92
+ subject.deinflect("歌おう").first[:word].should == "歌う"
93
+ subject.deinflect("走ろう").first[:word].should == "走る"
94
+ end
62
95
 
63
- # past tense
64
- subject.deinflect("嬉しくありませんでした").first[:word].should == "嬉しい"
96
+ it "deinflects polite verbs" do
97
+ subject.deinflect("見ましょう").first[:word].should == "見る"
98
+ subject.deinflect("歌いましょう").first[:word].should == "歌う"
99
+ subject.deinflect("走りましょう").first[:word].should == "走る"
100
+ end
101
+
102
+ end
103
+
104
+ context "imperative verbs" do
105
+
106
+ it "deinflects plain positive verbs" do
107
+ subject.deinflect("見ろ").first[:word].should == "見る"
108
+ subject.deinflect("歌え").first[:word].should == "歌う"
109
+ subject.deinflect("走れ").first[:word].should == "走る"
110
+ end
111
+
112
+ it "deinflects plain negative verbs" do
113
+ subject.deinflect("見るな").first[:word].should == "見る"
114
+ subject.deinflect("歌うな").first[:word].should == "歌う"
115
+ subject.deinflect("走るな").first[:word].should == "走る"
116
+ end
117
+
118
+ end
119
+
120
+ context "provisional verbs" do
121
+
122
+ it "deinflects positive verbs" do
123
+ subject.deinflect("見れば").first[:word].should == "見る"
124
+ subject.deinflect("歌えば").first[:word].should == "歌う"
125
+ end
126
+
127
+ it "deinflects negative verbs" do
128
+ subject.deinflect("見なければ").first[:word].should == "見る"
129
+ subject.deinflect("歌えなければ").first[:word].should == "歌う"
130
+ end
131
+
132
+ end
133
+
134
+ context "in conditional form" do
135
+
136
+ it "deinflects plain positive verbs" do
137
+ subject.deinflect("見たら").first[:word].should == "見る"
138
+ subject.deinflect("歌ったら").first[:word].should == "歌う"
139
+ end
140
+
141
+ it "deinflects plain negative verbs" do
142
+ pending "unimplemented"
143
+
144
+ subject.deinflect("見なかったら").first[:word].should == "見る"
145
+ subject.deinflect("歌わなかったら").first[:word].should == "歌う"
146
+ end
147
+
148
+ it "deinflects polite positive verbs" do
149
+ subject.deinflect("見ましたら").first[:word].should == "見る"
150
+ subject.deinflect("歌いましたら").first[:word].should == "歌う"
151
+ end
152
+
153
+ end
65
154
  end
66
155
 
67
- it "deinflects imperative verbs" do
68
- subject.deinflect("歌って").first[:word].should == "歌う"
156
+ describe "adjectives" do
157
+ it "deinflects adjectives in present tense" do
158
+ subject.deinflect("嬉しくありません").first[:word].should == "嬉しい"
159
+ end
160
+
161
+ it "deinflects adjectives in past tense" do
162
+ subject.deinflect("嬉しくありませんでした").first[:word].should == "嬉しい"
163
+ end
69
164
  end
70
165
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: japanese_deinflector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-28 00:00:00.000000000 Z
12
+ date: 2012-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -70,6 +70,7 @@ files:
70
70
  - .rspec
71
71
  - .travis.yml
72
72
  - Gemfile
73
+ - README.md
73
74
  - Rakefile
74
75
  - japanese_deinflector.gemspec
75
76
  - lib/data/deinflect.dat
@@ -99,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
100
  version: '0'
100
101
  requirements: []
101
102
  rubyforge_project:
102
- rubygems_version: 1.8.21
103
+ rubygems_version: 1.8.24
103
104
  signing_key:
104
105
  specification_version: 3
105
106
  summary: Deinflect (unconjugate/undecline) Japanese words.