llt-morphologizer 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/.travis.yml +8 -0
- data/Gemfile +27 -0
- data/LICENSE.txt +22 -0
- data/README.md +35 -0
- data/Rakefile +6 -0
- data/lib/llt/morphologizer.rb +378 -0
- data/lib/llt/morphologizer/lookup_statement.rb +66 -0
- data/lib/llt/morphologizer/stem_lookup_statement_builder.rb +130 -0
- data/lib/llt/morphologizer/stem_lookup_statement_builder/conjugable.rb +221 -0
- data/lib/llt/morphologizer/stem_lookup_statement_builder/contracted_forms.rb +38 -0
- data/lib/llt/morphologizer/stem_lookup_statement_builder/declinable.rb +214 -0
- data/lib/llt/morphologizer/version.rb +5 -0
- data/llt-morphologizer.gemspec +34 -0
- data/spec/lib/llt/morphologizer/lookup_statement_spec.rb +29 -0
- data/spec/lib/llt/morphologizer/stem_lookup_statement_builder_spec.rb +39 -0
- data/spec/lib/llt/morphologizer_spec.rb +524 -0
- data/spec/spec_helper.rb +27 -0
- metadata +235 -0
@@ -0,0 +1,214 @@
|
|
1
|
+
module LLT::Morphologizer::StemLookupStatementBuilder::Declinable
|
2
|
+
|
3
|
+
DECL_COMPONENTS = %w{ issim errim illim ior} # nd, nt, s, bindevokale
|
4
|
+
NOMINATIVE_ENDING = [:ending, [ /(?<=us|er|es|u|e|al|ar|is|or|os|o|(?<=[^aeio])s|x|as|ur|men)$/]] # no a, um anymore # because of comparison (?<!i)
|
5
|
+
OTHER_CASE_ENDING = [:ending, [ /(?<=ior|ius|nter|iter)$|ae$|am$|arum$|as$|is$|(?<!aeo)i$|o$|orum$|os$|(?<!aeiou)e$|ei$|erum$|ebus$|es$|em$|(?<!i)us$|u$|uum$|ua$|ibus$|im$|ia$|ium$|(?<=n)s$|(?<=nt)er$|iter$/]] # ubus
|
6
|
+
UM_ENDING = [:ending, [ /um$/, /ui$/ ]] # i erased - filium, u erased - suum
|
7
|
+
IUS_ENDING = [:ending, [ /(?<=i)us$/ ]] # 2013-10-08 solely for filius, Gaius...
|
8
|
+
A_ENDING = [:ending, [ /(?<=[^ao])a$/ ]] # removed u => sua
|
9
|
+
PRONOMINAL_ENDING = [:ending, [ /(?<=ali)u[sd]$/, /ius$/ ]] # alius aliud
|
10
|
+
COMPARISON = [:comparison_sign, [/ior$|ius$|issim$|lim$|rim$/]] # ior, ius... ne ending at all...
|
11
|
+
PPA_OR_GERUND = [:extension, [/n$|nt$|nd$/]]
|
12
|
+
THEMATIC_VOWEL = [:thematic, [/[ue]$/]]
|
13
|
+
THEMATIC_I_OF_M = [:thematic, [/i$/]]
|
14
|
+
FUTURE_PARTICIPLE = [:extension, [/ur$/]]
|
15
|
+
|
16
|
+
|
17
|
+
def create_declinables
|
18
|
+
setup(:declinable)
|
19
|
+
|
20
|
+
nominative
|
21
|
+
other_case
|
22
|
+
um_ending
|
23
|
+
ius_ending
|
24
|
+
a_ending
|
25
|
+
pronominal
|
26
|
+
contracted_vocative
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def nominative
|
32
|
+
if has NOMINATIVE_ENDING
|
33
|
+
look_for :noun, :nom
|
34
|
+
look_for :adjective, :nom
|
35
|
+
reset :ending # ending would be overwritten by prepend otherwise!
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def other_case
|
40
|
+
if has OTHER_CASE_ENDING
|
41
|
+
look_for :noun, :stem
|
42
|
+
look_for :adjective, :stem
|
43
|
+
look_for :verb, :ppp
|
44
|
+
comparison_or_verbal_extension
|
45
|
+
reset all
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def um_ending
|
50
|
+
if has UM_ENDING
|
51
|
+
look_for :noun, :stem
|
52
|
+
look_for :adjective, :stem
|
53
|
+
look_for :verb, :ppp
|
54
|
+
comparison_or_verbal_extension
|
55
|
+
reset all
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def ius_ending
|
60
|
+
# only filius is looked up here
|
61
|
+
if has IUS_ENDING
|
62
|
+
look_for :noun, :stem
|
63
|
+
reset all
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def a_ending
|
68
|
+
if has A_ENDING
|
69
|
+
look_for :noun, :stem
|
70
|
+
look_for :adjective, :stem
|
71
|
+
look_for :verb, :ppp
|
72
|
+
comparison_or_verbal_extension
|
73
|
+
reset all
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def pronominal
|
78
|
+
if has PRONOMINAL_ENDING
|
79
|
+
look_for :adjective, :stem
|
80
|
+
reset all
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
def contracted_vocative
|
86
|
+
if stem =~ /i$/
|
87
|
+
look_for :noun, :stem
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def comparison_or_verbal_extension
|
92
|
+
if has COMPARISON then look_for :adjective, :stem; end
|
93
|
+
if has PPA_OR_GERUND then look_for :verb, :pr
|
94
|
+
if has THEMATIC_VOWEL then look_for :same
|
95
|
+
if has THEMATIC_I_OF_M then look_for :same; end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
if has FUTURE_PARTICIPLE then look_for :verb, :ppp; end
|
99
|
+
end
|
100
|
+
|
101
|
+
def valid_itypes_for_declinable
|
102
|
+
case table
|
103
|
+
when :noun then valid_noun_classes
|
104
|
+
when :adjective then valid_adjective_classes
|
105
|
+
when :verb then valid_verb_classes
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def valid_noun_classes
|
110
|
+
if column == :nom
|
111
|
+
case stem # 3 is consonantic stem, 31 vocalic stem - group 1 and so forth
|
112
|
+
#when /(?<=a)$/ then itype << 1 # disabled in new morphologizer
|
113
|
+
#when /(?<=um)$/ then itype << 2 # disabled in new morphologizer
|
114
|
+
when /(?<=us)$/ then itype << 3 << 4 # 2 disabled in new morphologizer # [^i] for comparison. cf ior here and both in Adjective nom ### erased. filius. gaius
|
115
|
+
when /(?<=er)$/ then itype << 2 << 3
|
116
|
+
when /(?<=es)$/ then itype << 3 # 5 disabled in new morphologizer
|
117
|
+
when /(?<=u)$/ then itype << 4
|
118
|
+
when /(?<=ar)$/ then itype << 3 << 31 # added for Caesar, who is 3. could be done better, but performance won't count here.
|
119
|
+
when /(?<=e|al|ar)$/ then itype << 31
|
120
|
+
when /(?<=is)$/ then itype << 3 << 32 << 33
|
121
|
+
when /(?<=[^aeiou]s)$/ then itype << 3 << 33 # ns was excluded before. we don't know why.
|
122
|
+
when /(?<=x)$/ then itype << 3 << 33 # nox! 2013-10-07 20:51
|
123
|
+
when /(?<=[^i]or|os|o||as|ur|men)$/ then itype << 3
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
if column == :stem && ending.empty? && stem =~ /i$/ then itype << 2; end # fili vocative
|
128
|
+
|
129
|
+
if column == :stem && !ending.empty? # nouns that end like a comparison?!
|
130
|
+
case stem + ending # watch out: regexps musst be redefined... stem+ending doesn't work. check corporum.
|
131
|
+
when /[^aeou]a$/ then itype << 1 << 2 << 3 << 31 # a decl word whos stem ends with a vowel?
|
132
|
+
when /ae$|am$|arum$|as$/ then itype << 1
|
133
|
+
when /is$/ then itype << 1 << 2 << 3 << 31 << 32 << 33
|
134
|
+
when /ui$/ then itype << 2 << 4
|
135
|
+
when /[^aeou]i$/ then itype << 2 << 3 << 31 << 32 << 33
|
136
|
+
when /um$/ then itype << 2 << 3 << 4 << 31 << 32 << 33 # [^i] erased. filius
|
137
|
+
when /o$|orum$|os$/ then itype << 2
|
138
|
+
when /ei$|erum$|ebus$/ then itype << 5
|
139
|
+
when /[^aeou]e$/ then itype << 2 << 3 << 33 << 5 # i allowed, acie
|
140
|
+
when /es$/ then itype << 3 << 32 << 33 << 5
|
141
|
+
when /em$/ then itype << 3 << 33 << 5
|
142
|
+
when /ibus$/ then itype << 3 << 31 << 32 << 33 << 4
|
143
|
+
when /us$|u$|ua$/ then itype << 2 << 4 # adds 2 in new morphologizer - evaluated through stem now
|
144
|
+
when /im$/ then itype << 32
|
145
|
+
when /ia$/ then itype << 31 # ineffective here, searched together with a now
|
146
|
+
#when /ium$/ then itype << 31 << 32 << 33
|
147
|
+
end
|
148
|
+
|
149
|
+
itype << 5 if ending == "erum" # rerum is missed
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def valid_adjective_classes
|
154
|
+
if column == :nom
|
155
|
+
case stem
|
156
|
+
when "maior" then itype << 3
|
157
|
+
when /(?<=us|er|is|[^i]or)$/ then itype << 1 << 3 << 5
|
158
|
+
when /(?<=ar|s|x)$/ then itype << 3
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
if column == :stem && ! ending.empty?
|
163
|
+
# vacui - 2013-10-07 23:42 - well this is weird.
|
164
|
+
# Might account for vacui - but certainly not for exercitui,
|
165
|
+
# which will arrive here, even if it's not needed in any event.
|
166
|
+
# So do it only for vacu - and god knows what else...
|
167
|
+
stem << ending.slice!("u") if stem == "vacu" && ending == "ui"
|
168
|
+
|
169
|
+
case stem + ending
|
170
|
+
when /ius$/ then itype << 5
|
171
|
+
when /ter$/ then itype << 3 << 5 # 5? not sure. 2013-10-07 20:35
|
172
|
+
when /[a-z]$/ then itype << 1 << 3 << 5
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
if column == :stem && ! comparison_sign.empty? && ending.empty?
|
177
|
+
case stem
|
178
|
+
when /$/ then itype << 1 << 3
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def valid_verb_classes
|
184
|
+
if column == :ppp && !ending.empty? && (extension.empty? || extension == "ur")
|
185
|
+
case stem
|
186
|
+
when /(?<=t|s|x)$/ then itype << 1 << 2 << 3 << 4 << 5
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
if column == :pr || column == :ppp && !extension.empty?
|
191
|
+
unless extension == "n" && ending != "s"
|
192
|
+
case stem
|
193
|
+
when /a$/ then itype << 1
|
194
|
+
when /i$/ then itype << 4
|
195
|
+
when /e$/ then itype << 2
|
196
|
+
when /[^aie]$/
|
197
|
+
itype << 3 if thematic == "e"
|
198
|
+
itype << 5 if thematic == "ie" || ending == "re"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
if column == :pr && ending == "i" && extension.empty?
|
204
|
+
case stem
|
205
|
+
when /[^aie]$/ then itype << 3 << 5
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
if column == :pf
|
210
|
+
# perfect composition
|
211
|
+
itype << 1 << 2 << 3 << 4 << 5
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'llt/morphologizer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "llt-morphologizer"
|
8
|
+
spec.version = LLT::Morphologizer::VERSION
|
9
|
+
spec.authors = ["LFDM"]
|
10
|
+
spec.email = ["1986gh@gmail.com"]
|
11
|
+
spec.summary = %q{Morphological parsing of Latin forms}
|
12
|
+
spec.description = spec.summary
|
13
|
+
spec.homepage = "http://www.latin-language-toolkit.net"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_development_dependency "simplecov", "~> 0.7"
|
25
|
+
spec.add_development_dependency "llt-db_handler-stub"
|
26
|
+
|
27
|
+
spec.add_dependency "llt-constants"
|
28
|
+
spec.add_dependency "llt-core"
|
29
|
+
spec.add_dependency "llt-core_extensions"
|
30
|
+
spec.add_dependency "llt-db_handler"
|
31
|
+
spec.add_dependency "llt-form_builder"
|
32
|
+
spec.add_dependency "llt-helpers"
|
33
|
+
spec.add_dependency "llt-logger"
|
34
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Morphologizer::LookupStatement do
|
4
|
+
let(:ls) { LLT::Morphologizer::LookupStatement }
|
5
|
+
let(:rosam) { ls.new("ros", :noun, :stem, [1], { ending: "am" }) }
|
6
|
+
|
7
|
+
describe "#stem_type" do
|
8
|
+
it "returns the stem type" do
|
9
|
+
rosam.stem_type.should == :stem
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#type" do
|
14
|
+
it "returns the type" do
|
15
|
+
rosam.type.should == :noun
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "#to_query" do
|
20
|
+
it "builds a query in a hash format, that corresponds with the db handler interface" do
|
21
|
+
rosam.to_query.should == { type: :noun, stem_type: :stem, stem: "ros", restrictions: { type: :inflection_class, values: [1] } }
|
22
|
+
end
|
23
|
+
|
24
|
+
it "build a query for laudavit" do
|
25
|
+
ros = ls.new("laudav", :verb, :pf, ["v"], { ending: "it" })
|
26
|
+
ros.to_query.should == { type: :verb, stem_type: :pf, stem: "laudav", restrictions: { type: :pf_composition, values: ["v"] } }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Morphologizer::StemLookupStatementBuilder do
|
4
|
+
|
5
|
+
def slsb(word)
|
6
|
+
LLT::Morphologizer::StemLookupStatementBuilder.new(word, LLT::Logger.new)
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "#statements" do
|
10
|
+
it "creates no separate nominative lookup request for a, um, es and us endings - different from old implementation" do
|
11
|
+
slsb("rosa").statements.map(&:to_query).count { |h| h[:stem_type] == :nom }.should == 0
|
12
|
+
slsb("templum").statements.map(&:to_query).count { |h| h[:stem_type] == :nom }.should == 0
|
13
|
+
slsb("res").statements.map(&:to_query).count { |h| h[:stem_type] == :nom && h[:restrictions][:values].include?(5) }.should == 0
|
14
|
+
slsb("hortus").statements.map(&:to_query).count { |h| h[:stem_type] == :nom && h[:restrictions][:values].include?(2)}.should == 0
|
15
|
+
end
|
16
|
+
|
17
|
+
it "searches in persona, place and ethnic table when a word is capitalized" do
|
18
|
+
plato_queries = slsb("Plato").statements.map(&:to_query)
|
19
|
+
plato_queries.select { |h| h[:type] == :persona }.should_not be_empty
|
20
|
+
plato_queries.select { |h| h[:type] == :place }.should_not be_empty
|
21
|
+
plato_queries.select { |h| h[:type] == :ethnic }.should_not be_empty
|
22
|
+
end
|
23
|
+
|
24
|
+
it "only stems are searched in the ethnic table" do
|
25
|
+
queries = slsb("Haeduus").statements.map(&:to_query)
|
26
|
+
queries.none? { |h| h[:type] == :ethnic && h[:stem_type] == :nom }.should be_true
|
27
|
+
queries.any? { |h| h[:type] == :ethnic && h[:stem_type] == :stem }.should be_true
|
28
|
+
end
|
29
|
+
|
30
|
+
it "searches for capitalized words in downcase, expect for names, places and ethnics" do
|
31
|
+
plato_queries = slsb("Plato").statements.map(&:to_query)
|
32
|
+
plato_queries.any? { |h| h[:type] == :noun && h[:stem] =~ /^[a-z].*/ }.should be_true
|
33
|
+
plato_queries.none? { |h| h[:type] == :noun && h[:stem] =~ /^[A-Z].*/ }.should be_true
|
34
|
+
|
35
|
+
plato_queries.any? { |h| h[:type] == :persona && h[:stem] =~ /^[A-Z].*/ }.should be_true
|
36
|
+
plato_queries.none? { |h| h[:type] == :persona && h[:stem] =~ /^[a-z].*/ }.should be_true
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,524 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Morphologizer do
|
4
|
+
it 'should have a version number' do
|
5
|
+
LLT::Morphologizer::VERSION.should_not be_nil
|
6
|
+
end
|
7
|
+
|
8
|
+
let(:stub_db) { LLT::DbHandler::Stub.new }
|
9
|
+
let(:morphologizer) { LLT::Morphologizer.new(db: stub_db) }
|
10
|
+
|
11
|
+
def morph_stub(word)
|
12
|
+
m = LLT::Morphologizer.new(db: LLT::DbHandler::Stub.new)
|
13
|
+
m.send(:setup, word)
|
14
|
+
m
|
15
|
+
end
|
16
|
+
|
17
|
+
before(:all) { LLT::DbHandler::Stub.setup }
|
18
|
+
|
19
|
+
describe "#personal_pronons" do
|
20
|
+
# this tests some private methods just to be safe
|
21
|
+
context "morphologizes pronouns" do
|
22
|
+
it "with se" do
|
23
|
+
se = morph_stub("se")
|
24
|
+
se.send(:clook_up, :personal_pronouns).should have(4).items
|
25
|
+
se.send(:unique_pers_pron?).should be_true
|
26
|
+
end
|
27
|
+
|
28
|
+
it "with Se" do
|
29
|
+
se = morph_stub("Se")
|
30
|
+
se.send(:clook_up, :personal_pronouns).should have(4).items
|
31
|
+
se.send(:unique_pers_pron?).should be_true
|
32
|
+
end
|
33
|
+
|
34
|
+
it "with secum" do
|
35
|
+
secum = morph_stub("secum")
|
36
|
+
forms = secum.send(:clook_up, :personal_pronouns)
|
37
|
+
secum.send(:unique_pers_pron?).should be_true
|
38
|
+
forms.should have(2).items
|
39
|
+
forms.first.to_s(:segmentized).should == "se-cum"
|
40
|
+
end
|
41
|
+
|
42
|
+
it "with nosmet" do
|
43
|
+
nosmet = morph_stub("nosmet")
|
44
|
+
forms = nosmet.send(:clook_up, :personal_pronouns)
|
45
|
+
nosmet.send(:unique_pers_pron?).should be_true
|
46
|
+
forms.should have(2).items
|
47
|
+
forms.first.to_s(:segmentized).should == "nos-met"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "#other_pronouns" do
|
53
|
+
context "morphologizes pronouns" do
|
54
|
+
it "with hic" do
|
55
|
+
morph_stub("hic").send(:other_pronouns).should have(1).item
|
56
|
+
morph_stub("hunc").send(:other_pronouns).should have(1).item
|
57
|
+
morph_stub("huic").send(:other_pronouns).should have(3).item
|
58
|
+
end
|
59
|
+
|
60
|
+
it "with aliqui" do
|
61
|
+
morph_stub("alicuius").send(:other_pronouns).should have(3).items
|
62
|
+
end
|
63
|
+
|
64
|
+
it "with quicumque" do
|
65
|
+
morph_stub("quibuscumque").send(:other_pronouns).should have(6).items
|
66
|
+
end
|
67
|
+
|
68
|
+
it "with quilibet" do
|
69
|
+
morph_stub("quaelibet").send(:other_pronouns).should have(4).items
|
70
|
+
end
|
71
|
+
|
72
|
+
it "with quivis" do
|
73
|
+
morph_stub("quodvis").send(:other_pronouns).should have(2).items
|
74
|
+
end
|
75
|
+
|
76
|
+
it "with quidam" do
|
77
|
+
morph_stub("quibusdam").send(:other_pronouns).should have(6).items
|
78
|
+
end
|
79
|
+
|
80
|
+
it "with is" do
|
81
|
+
morph_stub("eas").send(:other_pronouns).should have(1).item
|
82
|
+
morph_stub("is").send(:other_pronouns).should have(7).item # sadly - eis...
|
83
|
+
morph_stub("ii").send(:other_pronouns).should have(1).item
|
84
|
+
end
|
85
|
+
|
86
|
+
it "with idem" do
|
87
|
+
morph_stub("eorundem").send(:other_pronouns).should have(2).items
|
88
|
+
morph_stub("eisdem").send(:other_pronouns).should have(6).items
|
89
|
+
morph_stub("iisdem").send(:other_pronouns).should have(6).items
|
90
|
+
end
|
91
|
+
|
92
|
+
it "with uter" do
|
93
|
+
morph_stub("utrum").send(:other_pronouns).should have(3).items
|
94
|
+
end
|
95
|
+
|
96
|
+
it "with uterque" do
|
97
|
+
morph_stub("utrumque").send(:other_pronouns).should have(3).items
|
98
|
+
morph_stub("utriusque").send(:other_pronouns).should have(3).items
|
99
|
+
end
|
100
|
+
|
101
|
+
it "with quisque" do
|
102
|
+
morph_stub("cuiusque").send(:other_pronouns).should have(3).items
|
103
|
+
end
|
104
|
+
|
105
|
+
it "with quisquam"do
|
106
|
+
morph_stub("quisquam").send(:other_pronouns).should have(2).items
|
107
|
+
end
|
108
|
+
|
109
|
+
it "with quisquam"do
|
110
|
+
morph_stub("quemquam").send(:other_pronouns).should have(2).items
|
111
|
+
end
|
112
|
+
|
113
|
+
it "with quispiam" do
|
114
|
+
morph_stub("quempiam").send(:other_pronouns).should have(2).items
|
115
|
+
end
|
116
|
+
|
117
|
+
it "with quispiam" do
|
118
|
+
morph_stub("quispiam").send(:other_pronouns).should have(2).items
|
119
|
+
end
|
120
|
+
|
121
|
+
it "with quibuscum" do
|
122
|
+
morph_stub("quibuscum").send(:other_pronouns).should have(3).items
|
123
|
+
end
|
124
|
+
|
125
|
+
it "with quonam" do
|
126
|
+
morph_stub("quonam").send(:other_pronouns).should have(2).items
|
127
|
+
end
|
128
|
+
|
129
|
+
# Might be solved through an exceptional form
|
130
|
+
#m = morph("i")
|
131
|
+
#m.pronouns.should have(1).item
|
132
|
+
end
|
133
|
+
|
134
|
+
it "returns when a unique pronoun like huius is found" do
|
135
|
+
morphologizer.should_not receive(:direct_lookup)
|
136
|
+
morphologizer.morphologize("huius")
|
137
|
+
end
|
138
|
+
|
139
|
+
it "continues when a homographic pronoun like his is found" do
|
140
|
+
morphologizer.should receive(:direct_lookup)
|
141
|
+
morphologizer.morphologize("hic")
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
describe "#prepositions" do
|
146
|
+
it "returns when a unique preposition like in is found" do
|
147
|
+
morphologizer.should_not receive(:direct_lookup)
|
148
|
+
morphologizer.morphologize("in")
|
149
|
+
end
|
150
|
+
|
151
|
+
it "goes on when a not uniq prep like cum is found - another entry should be present and then returned" do
|
152
|
+
morphologizer.should_not receive(:direct_lookup)
|
153
|
+
morphologizer.morphologize("cum").should have(2).items
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
describe "#numerals" do
|
158
|
+
it "returns when a roman numeral is found" do
|
159
|
+
morphologizer.should_not receive(:direct_lookup)
|
160
|
+
morphologizer.morphologize("MD").should have(1).item
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
describe "#look_up" do
|
165
|
+
context "with conjunctions" do
|
166
|
+
it "returns when a unique conjunction like et is found" do
|
167
|
+
morphologizer.should_not receive(:direct_lookup)
|
168
|
+
morphologizer.morphologize("et")
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
#context "with subjunctions" do
|
173
|
+
# it "returns when a unique conjunction like et is found" do
|
174
|
+
# end
|
175
|
+
#end
|
176
|
+
end
|
177
|
+
|
178
|
+
describe "#morphologize" do
|
179
|
+
LLT::DbHandler::Stub.setup
|
180
|
+
|
181
|
+
describe "returns morphologized forms" do
|
182
|
+
context "with nouns" do
|
183
|
+
it "ratio" do
|
184
|
+
f = morphologizer.morphologize("ratio")
|
185
|
+
f.should have(2).item
|
186
|
+
f1, f2 = f
|
187
|
+
f1.casus.should == 1
|
188
|
+
f2.casus.should == 5
|
189
|
+
end
|
190
|
+
|
191
|
+
it "homine" do
|
192
|
+
f = morphologizer.morphologize("homine")
|
193
|
+
f.should have(1).item
|
194
|
+
f.first.casus.should == 6
|
195
|
+
f.first.to_s(:segmentized).should == "homin-e"
|
196
|
+
end
|
197
|
+
|
198
|
+
it "nox" do
|
199
|
+
f = morphologizer.morphologize("nox")
|
200
|
+
f.should have(2).items
|
201
|
+
end
|
202
|
+
|
203
|
+
it "serve" do
|
204
|
+
f = morphologizer.morphologize("serve")
|
205
|
+
f.should have(1).item
|
206
|
+
end
|
207
|
+
|
208
|
+
it "fili - contracted vocative" do
|
209
|
+
f = morphologizer.morphologize("fili")
|
210
|
+
f.should have(1).item
|
211
|
+
end
|
212
|
+
|
213
|
+
it "filius - ius o declension" do
|
214
|
+
f = morphologizer.morphologize("filius")
|
215
|
+
f.should have(1).item
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
context "with verbs" do
|
220
|
+
it "miserunt" do
|
221
|
+
f = morphologizer.morphologize("miserunt")
|
222
|
+
f.should have(1).item
|
223
|
+
end
|
224
|
+
|
225
|
+
it "hortant" do
|
226
|
+
f = morphologizer.morphologize("hortant")
|
227
|
+
f.should have(0).items # no active forms
|
228
|
+
end
|
229
|
+
|
230
|
+
it "hortatur" do
|
231
|
+
f = morphologizer.morphologize("hortatur")
|
232
|
+
f.should have(1).item
|
233
|
+
end
|
234
|
+
|
235
|
+
context "and infinitives" do
|
236
|
+
# the active one all bring the stupid pass inf...
|
237
|
+
it "audire" do
|
238
|
+
f = morphologizer.morphologize("audire")
|
239
|
+
f.should have(2).items
|
240
|
+
end
|
241
|
+
|
242
|
+
it "audiri" do
|
243
|
+
f = morphologizer.morphologize("audiri")
|
244
|
+
f.should have(1).item
|
245
|
+
end
|
246
|
+
|
247
|
+
it "canare" do
|
248
|
+
f = morphologizer.morphologize("canare")
|
249
|
+
f.should have(2).items
|
250
|
+
end
|
251
|
+
|
252
|
+
it "canari" do
|
253
|
+
f = morphologizer.morphologize("canari")
|
254
|
+
f.should have(1).items
|
255
|
+
end
|
256
|
+
|
257
|
+
it "monere" do
|
258
|
+
f = morphologizer.morphologize("monere")
|
259
|
+
f.should have(2).items
|
260
|
+
end
|
261
|
+
|
262
|
+
it "hortari" do
|
263
|
+
f = morphologizer.morphologize("hortari")
|
264
|
+
f.should have(1).items
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
context "with plain adverbs" do
|
270
|
+
it "iam" do
|
271
|
+
f = morphologizer.morphologize("iam")
|
272
|
+
f.should have(1).item
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
context "with adverbs from adjectives" do
|
277
|
+
it "diligenter" do
|
278
|
+
f = morphologizer.morphologize("diligenter")
|
279
|
+
f.should have(1).item
|
280
|
+
end
|
281
|
+
|
282
|
+
it "laete" do
|
283
|
+
# the real world has a noun as well, will never be
|
284
|
+
# in the stub db I guess.
|
285
|
+
f = morphologizer.morphologize("laete")
|
286
|
+
f.should have(2).item # there's actually a vocative as well...
|
287
|
+
f.first.casus.should == 5
|
288
|
+
f.map(&:to_s).should == %w{ laete laete }
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
context "with adjectives" do
|
293
|
+
it "feri" do
|
294
|
+
f = morphologizer.morphologize("feri")
|
295
|
+
f.should have(4).items # all from ferus3
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
context "with cardinals" do
|
300
|
+
it "duo" do
|
301
|
+
f = morphologizer.morphologize("duo")
|
302
|
+
f.should have(4).items
|
303
|
+
end
|
304
|
+
|
305
|
+
it "sex" do
|
306
|
+
f = morphologizer.morphologize("sex")
|
307
|
+
f.should have(1).item
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
context "with ethnics" do
|
312
|
+
it "Haeduorum" do
|
313
|
+
f = morphologizer.morphologize("Haeduorum")
|
314
|
+
f.should have(2).items
|
315
|
+
f.first.to_s.should == "Haeduorum"
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
context "with pronouns" do
|
320
|
+
it "quis" do
|
321
|
+
f = morphologizer.morphologize("quis")
|
322
|
+
f.should have(2).items # m && f?
|
323
|
+
end
|
324
|
+
|
325
|
+
it "quid" do
|
326
|
+
f = morphologizer.morphologize("quid")
|
327
|
+
f.should have(2).items # nom and acc
|
328
|
+
end
|
329
|
+
|
330
|
+
it "aliquis" do
|
331
|
+
f = morphologizer.morphologize("aliquis")
|
332
|
+
f.should have(2).items
|
333
|
+
end
|
334
|
+
|
335
|
+
it "quidque" do
|
336
|
+
f = morphologizer.morphologize("quidque")
|
337
|
+
f.should have(2).items
|
338
|
+
end
|
339
|
+
|
340
|
+
it "quodque" do
|
341
|
+
f = morphologizer.morphologize("quodque")
|
342
|
+
f.should have(2).items
|
343
|
+
end
|
344
|
+
|
345
|
+
it "quisque" do
|
346
|
+
f = morphologizer.morphologize("quisque")
|
347
|
+
f.should have(3).items
|
348
|
+
end
|
349
|
+
|
350
|
+
it "quicquam" do
|
351
|
+
f = morphologizer.morphologize("quicquam")
|
352
|
+
f.should have(2).items
|
353
|
+
end
|
354
|
+
|
355
|
+
it "quisquis" do
|
356
|
+
f = morphologizer.morphologize("quisquis")
|
357
|
+
f.should have(2).items
|
358
|
+
end
|
359
|
+
|
360
|
+
it "quidquid" do
|
361
|
+
f = morphologizer.morphologize("quidquid")
|
362
|
+
f.should have(2).items
|
363
|
+
end
|
364
|
+
|
365
|
+
it "quoquo" do
|
366
|
+
f = morphologizer.morphologize("quoquo")
|
367
|
+
f.should have(3).item # m f n, it's substantivic!
|
368
|
+
end
|
369
|
+
|
370
|
+
it "quicquid" do
|
371
|
+
f = morphologizer.morphologize("quicquid")
|
372
|
+
f.should have(2).items
|
373
|
+
end
|
374
|
+
|
375
|
+
it "unusquisque" do
|
376
|
+
f = morphologizer.morphologize("unusquisque")
|
377
|
+
f.map(&:to_s).should == %w{ unusquisque } * 3
|
378
|
+
end
|
379
|
+
|
380
|
+
it "uniuscuiusque" do
|
381
|
+
f = morphologizer.morphologize("uniuscuiusque")
|
382
|
+
f.map(&:to_s).should == %w{ uniuscuiusque } * 3
|
383
|
+
end
|
384
|
+
|
385
|
+
end
|
386
|
+
|
387
|
+
context "with mixed forms" do
|
388
|
+
it "ita - adverb and ppp of ire" do
|
389
|
+
f = morphologizer.morphologize("ita")
|
390
|
+
f.should have(2).item
|
391
|
+
end
|
392
|
+
|
393
|
+
it "fero - ferre and ferus3" do
|
394
|
+
f = morphologizer.morphologize("fero")
|
395
|
+
f.should have(5).items # 1 from ferre, 4 from ferus3
|
396
|
+
end
|
397
|
+
|
398
|
+
it "subito - adverb and ppp of ire" do
|
399
|
+
f = morphologizer.morphologize("subito")
|
400
|
+
f.should have(5).items # 1 adv, 4 ppp
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
describe "handles irregular verbs" do
|
406
|
+
it "fiebat" do
|
407
|
+
f = morphologizer.morphologize("fiebat")
|
408
|
+
f.should have(1).item
|
409
|
+
end
|
410
|
+
|
411
|
+
it "fio" do
|
412
|
+
f = morphologizer.morphologize("fio")
|
413
|
+
f.should have(1).item
|
414
|
+
end
|
415
|
+
|
416
|
+
it "posse" do
|
417
|
+
f = morphologizer.morphologize("posse")
|
418
|
+
f.should have(1).item
|
419
|
+
end
|
420
|
+
|
421
|
+
it "ferri" do
|
422
|
+
f = morphologizer.morphologize("ferri")
|
423
|
+
f.should have(1).item
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
describe "handles prefixed irregular verbs" do
|
428
|
+
it "desum" do
|
429
|
+
f = morphologizer.morphologize("desum")
|
430
|
+
f.should have(1).item
|
431
|
+
f.first.to_s(:segmentized).should == "de-s-u-m"
|
432
|
+
end
|
433
|
+
|
434
|
+
it "maluit" do
|
435
|
+
f = morphologizer.morphologize("maluit")
|
436
|
+
f.should have(1).item
|
437
|
+
f.first.to_s(:segmentized).should == "malu-it"
|
438
|
+
f.first.tempus.should == :pf
|
439
|
+
end
|
440
|
+
|
441
|
+
it "mavult" do
|
442
|
+
f = morphologizer.morphologize("mavult")
|
443
|
+
f.should have(1).item
|
444
|
+
f.first.to_s(:segmentized).should == "mavul-t"
|
445
|
+
end
|
446
|
+
|
447
|
+
it "it" do
|
448
|
+
f = morphologizer.morphologize("it")
|
449
|
+
f.should have(1).item
|
450
|
+
f.first.to_s(:segmentized).should == "i-t"
|
451
|
+
end
|
452
|
+
|
453
|
+
it "vult" do
|
454
|
+
f = morphologizer.morphologize("vult")
|
455
|
+
f.should have(1).item
|
456
|
+
f.first.to_s(:segmentized).should == "vul-t"
|
457
|
+
end
|
458
|
+
|
459
|
+
it "nolumus" do
|
460
|
+
f = morphologizer.morphologize("nolumus")
|
461
|
+
f.should have(1).item
|
462
|
+
f.first.to_s(:segmentized).should == "nol-u-mus"
|
463
|
+
end
|
464
|
+
|
465
|
+
it "contulissent" do
|
466
|
+
f = morphologizer.morphologize("contulissent")
|
467
|
+
f.should have(1).item
|
468
|
+
f.first.to_s(:segmentized).should == "con-tul-isse-nt"
|
469
|
+
end
|
470
|
+
|
471
|
+
it "intulisset" do
|
472
|
+
f = morphologizer.morphologize("intulisset")
|
473
|
+
f.should have(1).item
|
474
|
+
f.first.to_s(:segmentized).should == "in-tul-isse-t"
|
475
|
+
end
|
476
|
+
|
477
|
+
it "inito" do
|
478
|
+
f = morphologizer.morphologize("inito")
|
479
|
+
f.should have(4).item
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
describe "takes an optional keyword argument add_to" do
|
484
|
+
let(:token_dummy) do
|
485
|
+
Class.new do
|
486
|
+
attr_reader :forms
|
487
|
+
def initialize; @forms = []; end
|
488
|
+
def <<(forms); @forms += forms; end
|
489
|
+
end.new
|
490
|
+
end
|
491
|
+
|
492
|
+
it "adds the result to the given object if is #<< implemented" do
|
493
|
+
forms = morphologizer.morphologize("est", add_to: token_dummy)
|
494
|
+
token_dummy.forms.should == forms
|
495
|
+
end
|
496
|
+
|
497
|
+
it "does nothing to the given object when #<< it does not respond to" do
|
498
|
+
token = double(respond_to?: false)
|
499
|
+
token.should_not receive(:<<)
|
500
|
+
morphologizer.morphologize("est", add_to: token)
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
it "writes stem pack objects to morphologized forms" do
|
505
|
+
forms = morphologizer.morphologize('homo')
|
506
|
+
homo = forms.first
|
507
|
+
homo.stems.should_not be_nil
|
508
|
+
|
509
|
+
forms = morphologizer.morphologize('est')
|
510
|
+
est = forms.first
|
511
|
+
est.stems.should_not be_nil
|
512
|
+
end
|
513
|
+
|
514
|
+
it "one instance handles multiple requests" do
|
515
|
+
tokens = %w{ homo ratio }
|
516
|
+
forms = tokens.map { |t| morphologizer.morphologize(t) }
|
517
|
+
forms.should have(2).items
|
518
|
+
h = forms[0]
|
519
|
+
r = forms[1]
|
520
|
+
(h.any? && h.all? { |f| f.to_s == "homo"}) .should be_true
|
521
|
+
(r.any? && r.all? { |f| f.to_s == "ratio"}).should be_true
|
522
|
+
end
|
523
|
+
end
|
524
|
+
end
|