llt-morphologizer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,214 @@
1
+ module LLT::Morphologizer::StemLookupStatementBuilder::Declinable
2
+
3
+ DECL_COMPONENTS = %w{ issim errim illim ior} # nd, nt, s, bindevokale
4
+ NOMINATIVE_ENDING = [:ending, [ /(?<=us|er|es|u|e|al|ar|is|or|os|o|(?<=[^aeio])s|x|as|ur|men)$/]] # no a, um anymore # because of comparison (?<!i)
5
+ OTHER_CASE_ENDING = [:ending, [ /(?<=ior|ius|nter|iter)$|ae$|am$|arum$|as$|is$|(?<!aeo)i$|o$|orum$|os$|(?<!aeiou)e$|ei$|erum$|ebus$|es$|em$|(?<!i)us$|u$|uum$|ua$|ibus$|im$|ia$|ium$|(?<=n)s$|(?<=nt)er$|iter$/]] # ubus
6
+ UM_ENDING = [:ending, [ /um$/, /ui$/ ]] # i erased - filium, u erased - suum
7
+ IUS_ENDING = [:ending, [ /(?<=i)us$/ ]] # 2013-10-08 solely for filius, Gaius...
8
+ A_ENDING = [:ending, [ /(?<=[^ao])a$/ ]] # removed u => sua
9
+ PRONOMINAL_ENDING = [:ending, [ /(?<=ali)u[sd]$/, /ius$/ ]] # alius aliud
10
+ COMPARISON = [:comparison_sign, [/ior$|ius$|issim$|lim$|rim$/]] # ior, ius... ne ending at all...
11
+ PPA_OR_GERUND = [:extension, [/n$|nt$|nd$/]]
12
+ THEMATIC_VOWEL = [:thematic, [/[ue]$/]]
13
+ THEMATIC_I_OF_M = [:thematic, [/i$/]]
14
+ FUTURE_PARTICIPLE = [:extension, [/ur$/]]
15
+
16
+
17
+ def create_declinables
18
+ setup(:declinable)
19
+
20
+ nominative
21
+ other_case
22
+ um_ending
23
+ ius_ending
24
+ a_ending
25
+ pronominal
26
+ contracted_vocative
27
+ end
28
+
29
+ private
30
+
31
+ def nominative
32
+ if has NOMINATIVE_ENDING
33
+ look_for :noun, :nom
34
+ look_for :adjective, :nom
35
+ reset :ending # ending would be overwritten by prepend otherwise!
36
+ end
37
+ end
38
+
39
+ def other_case
40
+ if has OTHER_CASE_ENDING
41
+ look_for :noun, :stem
42
+ look_for :adjective, :stem
43
+ look_for :verb, :ppp
44
+ comparison_or_verbal_extension
45
+ reset all
46
+ end
47
+ end
48
+
49
+ def um_ending
50
+ if has UM_ENDING
51
+ look_for :noun, :stem
52
+ look_for :adjective, :stem
53
+ look_for :verb, :ppp
54
+ comparison_or_verbal_extension
55
+ reset all
56
+ end
57
+ end
58
+
59
+ def ius_ending
60
+ # only filius is looked up here
61
+ if has IUS_ENDING
62
+ look_for :noun, :stem
63
+ reset all
64
+ end
65
+ end
66
+
67
+ def a_ending
68
+ if has A_ENDING
69
+ look_for :noun, :stem
70
+ look_for :adjective, :stem
71
+ look_for :verb, :ppp
72
+ comparison_or_verbal_extension
73
+ reset all
74
+ end
75
+ end
76
+
77
+ def pronominal
78
+ if has PRONOMINAL_ENDING
79
+ look_for :adjective, :stem
80
+ reset all
81
+ end
82
+
83
+ end
84
+
85
+ def contracted_vocative
86
+ if stem =~ /i$/
87
+ look_for :noun, :stem
88
+ end
89
+ end
90
+
91
+ def comparison_or_verbal_extension
92
+ if has COMPARISON then look_for :adjective, :stem; end
93
+ if has PPA_OR_GERUND then look_for :verb, :pr
94
+ if has THEMATIC_VOWEL then look_for :same
95
+ if has THEMATIC_I_OF_M then look_for :same; end
96
+ end
97
+ end
98
+ if has FUTURE_PARTICIPLE then look_for :verb, :ppp; end
99
+ end
100
+
101
+ def valid_itypes_for_declinable
102
+ case table
103
+ when :noun then valid_noun_classes
104
+ when :adjective then valid_adjective_classes
105
+ when :verb then valid_verb_classes
106
+ end
107
+ end
108
+
109
+ def valid_noun_classes
110
+ if column == :nom
111
+ case stem # 3 is consonantic stem, 31 vocalic stem - group 1 and so forth
112
+ #when /(?<=a)$/ then itype << 1 # disabled in new morphologizer
113
+ #when /(?<=um)$/ then itype << 2 # disabled in new morphologizer
114
+ when /(?<=us)$/ then itype << 3 << 4 # 2 disabled in new morphologizer # [^i] for comparison. cf ior here and both in Adjective nom ### erased. filius. gaius
115
+ when /(?<=er)$/ then itype << 2 << 3
116
+ when /(?<=es)$/ then itype << 3 # 5 disabled in new morphologizer
117
+ when /(?<=u)$/ then itype << 4
118
+ when /(?<=ar)$/ then itype << 3 << 31 # added for Caesar, who is 3. could be done better, but performance won't count here.
119
+ when /(?<=e|al|ar)$/ then itype << 31
120
+ when /(?<=is)$/ then itype << 3 << 32 << 33
121
+ when /(?<=[^aeiou]s)$/ then itype << 3 << 33 # ns was excluded before. we don't know why.
122
+ when /(?<=x)$/ then itype << 3 << 33 # nox! 2013-10-07 20:51
123
+ when /(?<=[^i]or|os|o||as|ur|men)$/ then itype << 3
124
+ end
125
+ end
126
+
127
+ if column == :stem && ending.empty? && stem =~ /i$/ then itype << 2; end # fili vocative
128
+
129
+ if column == :stem && !ending.empty? # nouns that end like a comparison?!
130
+ case stem + ending # watch out: regexps musst be redefined... stem+ending doesn't work. check corporum.
131
+ when /[^aeou]a$/ then itype << 1 << 2 << 3 << 31 # a decl word whos stem ends with a vowel?
132
+ when /ae$|am$|arum$|as$/ then itype << 1
133
+ when /is$/ then itype << 1 << 2 << 3 << 31 << 32 << 33
134
+ when /ui$/ then itype << 2 << 4
135
+ when /[^aeou]i$/ then itype << 2 << 3 << 31 << 32 << 33
136
+ when /um$/ then itype << 2 << 3 << 4 << 31 << 32 << 33 # [^i] erased. filius
137
+ when /o$|orum$|os$/ then itype << 2
138
+ when /ei$|erum$|ebus$/ then itype << 5
139
+ when /[^aeou]e$/ then itype << 2 << 3 << 33 << 5 # i allowed, acie
140
+ when /es$/ then itype << 3 << 32 << 33 << 5
141
+ when /em$/ then itype << 3 << 33 << 5
142
+ when /ibus$/ then itype << 3 << 31 << 32 << 33 << 4
143
+ when /us$|u$|ua$/ then itype << 2 << 4 # adds 2 in new morphologizer - evaluated through stem now
144
+ when /im$/ then itype << 32
145
+ when /ia$/ then itype << 31 # ineffective here, searched together with a now
146
+ #when /ium$/ then itype << 31 << 32 << 33
147
+ end
148
+
149
+ itype << 5 if ending == "erum" # rerum is missed
150
+ end
151
+ end
152
+
153
+ def valid_adjective_classes
154
+ if column == :nom
155
+ case stem
156
+ when "maior" then itype << 3
157
+ when /(?<=us|er|is|[^i]or)$/ then itype << 1 << 3 << 5
158
+ when /(?<=ar|s|x)$/ then itype << 3
159
+ end
160
+ end
161
+
162
+ if column == :stem && ! ending.empty?
163
+ # vacui - 2013-10-07 23:42 - well this is weird.
164
+ # Might account for vacui - but certainly not for exercitui,
165
+ # which will arrive here, even if it's not needed in any event.
166
+ # So do it only for vacu - and god knows what else...
167
+ stem << ending.slice!("u") if stem == "vacu" && ending == "ui"
168
+
169
+ case stem + ending
170
+ when /ius$/ then itype << 5
171
+ when /ter$/ then itype << 3 << 5 # 5? not sure. 2013-10-07 20:35
172
+ when /[a-z]$/ then itype << 1 << 3 << 5
173
+ end
174
+ end
175
+
176
+ if column == :stem && ! comparison_sign.empty? && ending.empty?
177
+ case stem
178
+ when /$/ then itype << 1 << 3
179
+ end
180
+ end
181
+ end
182
+
183
+ def valid_verb_classes
184
+ if column == :ppp && !ending.empty? && (extension.empty? || extension == "ur")
185
+ case stem
186
+ when /(?<=t|s|x)$/ then itype << 1 << 2 << 3 << 4 << 5
187
+ end
188
+ end
189
+
190
+ if column == :pr || column == :ppp && !extension.empty?
191
+ unless extension == "n" && ending != "s"
192
+ case stem
193
+ when /a$/ then itype << 1
194
+ when /i$/ then itype << 4
195
+ when /e$/ then itype << 2
196
+ when /[^aie]$/
197
+ itype << 3 if thematic == "e"
198
+ itype << 5 if thematic == "ie" || ending == "re"
199
+ end
200
+ end
201
+ end
202
+
203
+ if column == :pr && ending == "i" && extension.empty?
204
+ case stem
205
+ when /[^aie]$/ then itype << 3 << 5
206
+ end
207
+ end
208
+
209
+ if column == :pf
210
+ # perfect composition
211
+ itype << 1 << 2 << 3 << 4 << 5
212
+ end
213
+ end
214
+ end
@@ -0,0 +1,5 @@
1
+ module LLT
2
+ class Morphologizer
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,34 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'llt/morphologizer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "llt-morphologizer"
8
+ spec.version = LLT::Morphologizer::VERSION
9
+ spec.authors = ["LFDM"]
10
+ spec.email = ["1986gh@gmail.com"]
11
+ spec.summary = %q{Morphological parsing of Latin forms}
12
+ spec.description = spec.summary
13
+ spec.homepage = "http://www.latin-language-toolkit.net"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_development_dependency "simplecov", "~> 0.7"
25
+ spec.add_development_dependency "llt-db_handler-stub"
26
+
27
+ spec.add_dependency "llt-constants"
28
+ spec.add_dependency "llt-core"
29
+ spec.add_dependency "llt-core_extensions"
30
+ spec.add_dependency "llt-db_handler"
31
+ spec.add_dependency "llt-form_builder"
32
+ spec.add_dependency "llt-helpers"
33
+ spec.add_dependency "llt-logger"
34
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ describe LLT::Morphologizer::LookupStatement do
4
+ let(:ls) { LLT::Morphologizer::LookupStatement }
5
+ let(:rosam) { ls.new("ros", :noun, :stem, [1], { ending: "am" }) }
6
+
7
+ describe "#stem_type" do
8
+ it "returns the stem type" do
9
+ rosam.stem_type.should == :stem
10
+ end
11
+ end
12
+
13
+ describe "#type" do
14
+ it "returns the type" do
15
+ rosam.type.should == :noun
16
+ end
17
+ end
18
+
19
+ describe "#to_query" do
20
+ it "builds a query in a hash format, that corresponds with the db handler interface" do
21
+ rosam.to_query.should == { type: :noun, stem_type: :stem, stem: "ros", restrictions: { type: :inflection_class, values: [1] } }
22
+ end
23
+
24
+ it "build a query for laudavit" do
25
+ ros = ls.new("laudav", :verb, :pf, ["v"], { ending: "it" })
26
+ ros.to_query.should == { type: :verb, stem_type: :pf, stem: "laudav", restrictions: { type: :pf_composition, values: ["v"] } }
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+
3
+ describe LLT::Morphologizer::StemLookupStatementBuilder do
4
+
5
+ def slsb(word)
6
+ LLT::Morphologizer::StemLookupStatementBuilder.new(word, LLT::Logger.new)
7
+ end
8
+
9
+ describe "#statements" do
10
+ it "creates no separate nominative lookup request for a, um, es and us endings - different from old implementation" do
11
+ slsb("rosa").statements.map(&:to_query).count { |h| h[:stem_type] == :nom }.should == 0
12
+ slsb("templum").statements.map(&:to_query).count { |h| h[:stem_type] == :nom }.should == 0
13
+ slsb("res").statements.map(&:to_query).count { |h| h[:stem_type] == :nom && h[:restrictions][:values].include?(5) }.should == 0
14
+ slsb("hortus").statements.map(&:to_query).count { |h| h[:stem_type] == :nom && h[:restrictions][:values].include?(2)}.should == 0
15
+ end
16
+
17
+ it "searches in persona, place and ethnic table when a word is capitalized" do
18
+ plato_queries = slsb("Plato").statements.map(&:to_query)
19
+ plato_queries.select { |h| h[:type] == :persona }.should_not be_empty
20
+ plato_queries.select { |h| h[:type] == :place }.should_not be_empty
21
+ plato_queries.select { |h| h[:type] == :ethnic }.should_not be_empty
22
+ end
23
+
24
+ it "only stems are searched in the ethnic table" do
25
+ queries = slsb("Haeduus").statements.map(&:to_query)
26
+ queries.none? { |h| h[:type] == :ethnic && h[:stem_type] == :nom }.should be_true
27
+ queries.any? { |h| h[:type] == :ethnic && h[:stem_type] == :stem }.should be_true
28
+ end
29
+
30
+ it "searches for capitalized words in downcase, expect for names, places and ethnics" do
31
+ plato_queries = slsb("Plato").statements.map(&:to_query)
32
+ plato_queries.any? { |h| h[:type] == :noun && h[:stem] =~ /^[a-z].*/ }.should be_true
33
+ plato_queries.none? { |h| h[:type] == :noun && h[:stem] =~ /^[A-Z].*/ }.should be_true
34
+
35
+ plato_queries.any? { |h| h[:type] == :persona && h[:stem] =~ /^[A-Z].*/ }.should be_true
36
+ plato_queries.none? { |h| h[:type] == :persona && h[:stem] =~ /^[a-z].*/ }.should be_true
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,524 @@
1
+ require 'spec_helper'
2
+
3
+ describe LLT::Morphologizer do
4
+ it 'should have a version number' do
5
+ LLT::Morphologizer::VERSION.should_not be_nil
6
+ end
7
+
8
+ let(:stub_db) { LLT::DbHandler::Stub.new }
9
+ let(:morphologizer) { LLT::Morphologizer.new(db: stub_db) }
10
+
11
+ def morph_stub(word)
12
+ m = LLT::Morphologizer.new(db: LLT::DbHandler::Stub.new)
13
+ m.send(:setup, word)
14
+ m
15
+ end
16
+
17
+ before(:all) { LLT::DbHandler::Stub.setup }
18
+
19
+ describe "#personal_pronons" do
20
+ # this tests some private methods just to be safe
21
+ context "morphologizes pronouns" do
22
+ it "with se" do
23
+ se = morph_stub("se")
24
+ se.send(:clook_up, :personal_pronouns).should have(4).items
25
+ se.send(:unique_pers_pron?).should be_true
26
+ end
27
+
28
+ it "with Se" do
29
+ se = morph_stub("Se")
30
+ se.send(:clook_up, :personal_pronouns).should have(4).items
31
+ se.send(:unique_pers_pron?).should be_true
32
+ end
33
+
34
+ it "with secum" do
35
+ secum = morph_stub("secum")
36
+ forms = secum.send(:clook_up, :personal_pronouns)
37
+ secum.send(:unique_pers_pron?).should be_true
38
+ forms.should have(2).items
39
+ forms.first.to_s(:segmentized).should == "se-cum"
40
+ end
41
+
42
+ it "with nosmet" do
43
+ nosmet = morph_stub("nosmet")
44
+ forms = nosmet.send(:clook_up, :personal_pronouns)
45
+ nosmet.send(:unique_pers_pron?).should be_true
46
+ forms.should have(2).items
47
+ forms.first.to_s(:segmentized).should == "nos-met"
48
+ end
49
+ end
50
+ end
51
+
52
+ describe "#other_pronouns" do
53
+ context "morphologizes pronouns" do
54
+ it "with hic" do
55
+ morph_stub("hic").send(:other_pronouns).should have(1).item
56
+ morph_stub("hunc").send(:other_pronouns).should have(1).item
57
+ morph_stub("huic").send(:other_pronouns).should have(3).item
58
+ end
59
+
60
+ it "with aliqui" do
61
+ morph_stub("alicuius").send(:other_pronouns).should have(3).items
62
+ end
63
+
64
+ it "with quicumque" do
65
+ morph_stub("quibuscumque").send(:other_pronouns).should have(6).items
66
+ end
67
+
68
+ it "with quilibet" do
69
+ morph_stub("quaelibet").send(:other_pronouns).should have(4).items
70
+ end
71
+
72
+ it "with quivis" do
73
+ morph_stub("quodvis").send(:other_pronouns).should have(2).items
74
+ end
75
+
76
+ it "with quidam" do
77
+ morph_stub("quibusdam").send(:other_pronouns).should have(6).items
78
+ end
79
+
80
+ it "with is" do
81
+ morph_stub("eas").send(:other_pronouns).should have(1).item
82
+ morph_stub("is").send(:other_pronouns).should have(7).item # sadly - eis...
83
+ morph_stub("ii").send(:other_pronouns).should have(1).item
84
+ end
85
+
86
+ it "with idem" do
87
+ morph_stub("eorundem").send(:other_pronouns).should have(2).items
88
+ morph_stub("eisdem").send(:other_pronouns).should have(6).items
89
+ morph_stub("iisdem").send(:other_pronouns).should have(6).items
90
+ end
91
+
92
+ it "with uter" do
93
+ morph_stub("utrum").send(:other_pronouns).should have(3).items
94
+ end
95
+
96
+ it "with uterque" do
97
+ morph_stub("utrumque").send(:other_pronouns).should have(3).items
98
+ morph_stub("utriusque").send(:other_pronouns).should have(3).items
99
+ end
100
+
101
+ it "with quisque" do
102
+ morph_stub("cuiusque").send(:other_pronouns).should have(3).items
103
+ end
104
+
105
+ it "with quisquam"do
106
+ morph_stub("quisquam").send(:other_pronouns).should have(2).items
107
+ end
108
+
109
+ it "with quisquam"do
110
+ morph_stub("quemquam").send(:other_pronouns).should have(2).items
111
+ end
112
+
113
+ it "with quispiam" do
114
+ morph_stub("quempiam").send(:other_pronouns).should have(2).items
115
+ end
116
+
117
+ it "with quispiam" do
118
+ morph_stub("quispiam").send(:other_pronouns).should have(2).items
119
+ end
120
+
121
+ it "with quibuscum" do
122
+ morph_stub("quibuscum").send(:other_pronouns).should have(3).items
123
+ end
124
+
125
+ it "with quonam" do
126
+ morph_stub("quonam").send(:other_pronouns).should have(2).items
127
+ end
128
+
129
+ # Might be solved through an exceptional form
130
+ #m = morph("i")
131
+ #m.pronouns.should have(1).item
132
+ end
133
+
134
+ it "returns when a unique pronoun like huius is found" do
135
+ morphologizer.should_not receive(:direct_lookup)
136
+ morphologizer.morphologize("huius")
137
+ end
138
+
139
+ it "continues when a homographic pronoun like his is found" do
140
+ morphologizer.should receive(:direct_lookup)
141
+ morphologizer.morphologize("hic")
142
+ end
143
+ end
144
+
145
+ describe "#prepositions" do
146
+ it "returns when a unique preposition like in is found" do
147
+ morphologizer.should_not receive(:direct_lookup)
148
+ morphologizer.morphologize("in")
149
+ end
150
+
151
+ it "goes on when a not uniq prep like cum is found - another entry should be present and then returned" do
152
+ morphologizer.should_not receive(:direct_lookup)
153
+ morphologizer.morphologize("cum").should have(2).items
154
+ end
155
+ end
156
+
157
+ describe "#numerals" do
158
+ it "returns when a roman numeral is found" do
159
+ morphologizer.should_not receive(:direct_lookup)
160
+ morphologizer.morphologize("MD").should have(1).item
161
+ end
162
+ end
163
+
164
+ describe "#look_up" do
165
+ context "with conjunctions" do
166
+ it "returns when a unique conjunction like et is found" do
167
+ morphologizer.should_not receive(:direct_lookup)
168
+ morphologizer.morphologize("et")
169
+ end
170
+ end
171
+
172
+ #context "with subjunctions" do
173
+ # it "returns when a unique conjunction like et is found" do
174
+ # end
175
+ #end
176
+ end
177
+
178
+ describe "#morphologize" do
179
+ LLT::DbHandler::Stub.setup
180
+
181
+ describe "returns morphologized forms" do
182
+ context "with nouns" do
183
+ it "ratio" do
184
+ f = morphologizer.morphologize("ratio")
185
+ f.should have(2).item
186
+ f1, f2 = f
187
+ f1.casus.should == 1
188
+ f2.casus.should == 5
189
+ end
190
+
191
+ it "homine" do
192
+ f = morphologizer.morphologize("homine")
193
+ f.should have(1).item
194
+ f.first.casus.should == 6
195
+ f.first.to_s(:segmentized).should == "homin-e"
196
+ end
197
+
198
+ it "nox" do
199
+ f = morphologizer.morphologize("nox")
200
+ f.should have(2).items
201
+ end
202
+
203
+ it "serve" do
204
+ f = morphologizer.morphologize("serve")
205
+ f.should have(1).item
206
+ end
207
+
208
+ it "fili - contracted vocative" do
209
+ f = morphologizer.morphologize("fili")
210
+ f.should have(1).item
211
+ end
212
+
213
+ it "filius - ius o declension" do
214
+ f = morphologizer.morphologize("filius")
215
+ f.should have(1).item
216
+ end
217
+ end
218
+
219
+ context "with verbs" do
220
+ it "miserunt" do
221
+ f = morphologizer.morphologize("miserunt")
222
+ f.should have(1).item
223
+ end
224
+
225
+ it "hortant" do
226
+ f = morphologizer.morphologize("hortant")
227
+ f.should have(0).items # no active forms
228
+ end
229
+
230
+ it "hortatur" do
231
+ f = morphologizer.morphologize("hortatur")
232
+ f.should have(1).item
233
+ end
234
+
235
+ context "and infinitives" do
236
+ # the active one all bring the stupid pass inf...
237
+ it "audire" do
238
+ f = morphologizer.morphologize("audire")
239
+ f.should have(2).items
240
+ end
241
+
242
+ it "audiri" do
243
+ f = morphologizer.morphologize("audiri")
244
+ f.should have(1).item
245
+ end
246
+
247
+ it "canare" do
248
+ f = morphologizer.morphologize("canare")
249
+ f.should have(2).items
250
+ end
251
+
252
+ it "canari" do
253
+ f = morphologizer.morphologize("canari")
254
+ f.should have(1).items
255
+ end
256
+
257
+ it "monere" do
258
+ f = morphologizer.morphologize("monere")
259
+ f.should have(2).items
260
+ end
261
+
262
+ it "hortari" do
263
+ f = morphologizer.morphologize("hortari")
264
+ f.should have(1).items
265
+ end
266
+ end
267
+ end
268
+
269
+ context "with plain adverbs" do
270
+ it "iam" do
271
+ f = morphologizer.morphologize("iam")
272
+ f.should have(1).item
273
+ end
274
+ end
275
+
276
+ context "with adverbs from adjectives" do
277
+ it "diligenter" do
278
+ f = morphologizer.morphologize("diligenter")
279
+ f.should have(1).item
280
+ end
281
+
282
+ it "laete" do
283
+ # the real world has a noun as well, will never be
284
+ # in the stub db I guess.
285
+ f = morphologizer.morphologize("laete")
286
+ f.should have(2).item # there's actually a vocative as well...
287
+ f.first.casus.should == 5
288
+ f.map(&:to_s).should == %w{ laete laete }
289
+ end
290
+ end
291
+
292
+ context "with adjectives" do
293
+ it "feri" do
294
+ f = morphologizer.morphologize("feri")
295
+ f.should have(4).items # all from ferus3
296
+ end
297
+ end
298
+
299
+ context "with cardinals" do
300
+ it "duo" do
301
+ f = morphologizer.morphologize("duo")
302
+ f.should have(4).items
303
+ end
304
+
305
+ it "sex" do
306
+ f = morphologizer.morphologize("sex")
307
+ f.should have(1).item
308
+ end
309
+ end
310
+
311
+ context "with ethnics" do
312
+ it "Haeduorum" do
313
+ f = morphologizer.morphologize("Haeduorum")
314
+ f.should have(2).items
315
+ f.first.to_s.should == "Haeduorum"
316
+ end
317
+ end
318
+
319
+ context "with pronouns" do
320
+ it "quis" do
321
+ f = morphologizer.morphologize("quis")
322
+ f.should have(2).items # m && f?
323
+ end
324
+
325
+ it "quid" do
326
+ f = morphologizer.morphologize("quid")
327
+ f.should have(2).items # nom and acc
328
+ end
329
+
330
+ it "aliquis" do
331
+ f = morphologizer.morphologize("aliquis")
332
+ f.should have(2).items
333
+ end
334
+
335
+ it "quidque" do
336
+ f = morphologizer.morphologize("quidque")
337
+ f.should have(2).items
338
+ end
339
+
340
+ it "quodque" do
341
+ f = morphologizer.morphologize("quodque")
342
+ f.should have(2).items
343
+ end
344
+
345
+ it "quisque" do
346
+ f = morphologizer.morphologize("quisque")
347
+ f.should have(3).items
348
+ end
349
+
350
+ it "quicquam" do
351
+ f = morphologizer.morphologize("quicquam")
352
+ f.should have(2).items
353
+ end
354
+
355
+ it "quisquis" do
356
+ f = morphologizer.morphologize("quisquis")
357
+ f.should have(2).items
358
+ end
359
+
360
+ it "quidquid" do
361
+ f = morphologizer.morphologize("quidquid")
362
+ f.should have(2).items
363
+ end
364
+
365
+ it "quoquo" do
366
+ f = morphologizer.morphologize("quoquo")
367
+ f.should have(3).item # m f n, it's substantivic!
368
+ end
369
+
370
+ it "quicquid" do
371
+ f = morphologizer.morphologize("quicquid")
372
+ f.should have(2).items
373
+ end
374
+
375
+ it "unusquisque" do
376
+ f = morphologizer.morphologize("unusquisque")
377
+ f.map(&:to_s).should == %w{ unusquisque } * 3
378
+ end
379
+
380
+ it "uniuscuiusque" do
381
+ f = morphologizer.morphologize("uniuscuiusque")
382
+ f.map(&:to_s).should == %w{ uniuscuiusque } * 3
383
+ end
384
+
385
+ end
386
+
387
+ context "with mixed forms" do
388
+ it "ita - adverb and ppp of ire" do
389
+ f = morphologizer.morphologize("ita")
390
+ f.should have(2).item
391
+ end
392
+
393
+ it "fero - ferre and ferus3" do
394
+ f = morphologizer.morphologize("fero")
395
+ f.should have(5).items # 1 from ferre, 4 from ferus3
396
+ end
397
+
398
+ it "subito - adverb and ppp of ire" do
399
+ f = morphologizer.morphologize("subito")
400
+ f.should have(5).items # 1 adv, 4 ppp
401
+ end
402
+ end
403
+ end
404
+
405
+ describe "handles irregular verbs" do
406
+ it "fiebat" do
407
+ f = morphologizer.morphologize("fiebat")
408
+ f.should have(1).item
409
+ end
410
+
411
+ it "fio" do
412
+ f = morphologizer.morphologize("fio")
413
+ f.should have(1).item
414
+ end
415
+
416
+ it "posse" do
417
+ f = morphologizer.morphologize("posse")
418
+ f.should have(1).item
419
+ end
420
+
421
+ it "ferri" do
422
+ f = morphologizer.morphologize("ferri")
423
+ f.should have(1).item
424
+ end
425
+ end
426
+
427
+ describe "handles prefixed irregular verbs" do
428
+ it "desum" do
429
+ f = morphologizer.morphologize("desum")
430
+ f.should have(1).item
431
+ f.first.to_s(:segmentized).should == "de-s-u-m"
432
+ end
433
+
434
+ it "maluit" do
435
+ f = morphologizer.morphologize("maluit")
436
+ f.should have(1).item
437
+ f.first.to_s(:segmentized).should == "malu-it"
438
+ f.first.tempus.should == :pf
439
+ end
440
+
441
+ it "mavult" do
442
+ f = morphologizer.morphologize("mavult")
443
+ f.should have(1).item
444
+ f.first.to_s(:segmentized).should == "mavul-t"
445
+ end
446
+
447
+ it "it" do
448
+ f = morphologizer.morphologize("it")
449
+ f.should have(1).item
450
+ f.first.to_s(:segmentized).should == "i-t"
451
+ end
452
+
453
+ it "vult" do
454
+ f = morphologizer.morphologize("vult")
455
+ f.should have(1).item
456
+ f.first.to_s(:segmentized).should == "vul-t"
457
+ end
458
+
459
+ it "nolumus" do
460
+ f = morphologizer.morphologize("nolumus")
461
+ f.should have(1).item
462
+ f.first.to_s(:segmentized).should == "nol-u-mus"
463
+ end
464
+
465
+ it "contulissent" do
466
+ f = morphologizer.morphologize("contulissent")
467
+ f.should have(1).item
468
+ f.first.to_s(:segmentized).should == "con-tul-isse-nt"
469
+ end
470
+
471
+ it "intulisset" do
472
+ f = morphologizer.morphologize("intulisset")
473
+ f.should have(1).item
474
+ f.first.to_s(:segmentized).should == "in-tul-isse-t"
475
+ end
476
+
477
+ it "inito" do
478
+ f = morphologizer.morphologize("inito")
479
+ f.should have(4).item
480
+ end
481
+ end
482
+
483
+ describe "takes an optional keyword argument add_to" do
484
+ let(:token_dummy) do
485
+ Class.new do
486
+ attr_reader :forms
487
+ def initialize; @forms = []; end
488
+ def <<(forms); @forms += forms; end
489
+ end.new
490
+ end
491
+
492
+ it "adds the result to the given object if is #<< implemented" do
493
+ forms = morphologizer.morphologize("est", add_to: token_dummy)
494
+ token_dummy.forms.should == forms
495
+ end
496
+
497
+ it "does nothing to the given object when #<< it does not respond to" do
498
+ token = double(respond_to?: false)
499
+ token.should_not receive(:<<)
500
+ morphologizer.morphologize("est", add_to: token)
501
+ end
502
+ end
503
+
504
+ it "writes stem pack objects to morphologized forms" do
505
+ forms = morphologizer.morphologize('homo')
506
+ homo = forms.first
507
+ homo.stems.should_not be_nil
508
+
509
+ forms = morphologizer.morphologize('est')
510
+ est = forms.first
511
+ est.stems.should_not be_nil
512
+ end
513
+
514
+ it "one instance handles multiple requests" do
515
+ tokens = %w{ homo ratio }
516
+ forms = tokens.map { |t| morphologizer.morphologize(t) }
517
+ forms.should have(2).items
518
+ h = forms[0]
519
+ r = forms[1]
520
+ (h.any? && h.all? { |f| f.to_s == "homo"}) .should be_true
521
+ (r.any? && r.all? { |f| f.to_s == "ratio"}).should be_true
522
+ end
523
+ end
524
+ end