llt-morphologizer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/.travis.yml +8 -0
- data/Gemfile +27 -0
- data/LICENSE.txt +22 -0
- data/README.md +35 -0
- data/Rakefile +6 -0
- data/lib/llt/morphologizer.rb +378 -0
- data/lib/llt/morphologizer/lookup_statement.rb +66 -0
- data/lib/llt/morphologizer/stem_lookup_statement_builder.rb +130 -0
- data/lib/llt/morphologizer/stem_lookup_statement_builder/conjugable.rb +221 -0
- data/lib/llt/morphologizer/stem_lookup_statement_builder/contracted_forms.rb +38 -0
- data/lib/llt/morphologizer/stem_lookup_statement_builder/declinable.rb +214 -0
- data/lib/llt/morphologizer/version.rb +5 -0
- data/llt-morphologizer.gemspec +34 -0
- data/spec/lib/llt/morphologizer/lookup_statement_spec.rb +29 -0
- data/spec/lib/llt/morphologizer/stem_lookup_statement_builder_spec.rb +39 -0
- data/spec/lib/llt/morphologizer_spec.rb +524 -0
- data/spec/spec_helper.rb +27 -0
- metadata +235 -0
@@ -0,0 +1,214 @@
|
|
1
|
+
module LLT::Morphologizer::StemLookupStatementBuilder::Declinable
|
2
|
+
|
3
|
+
DECL_COMPONENTS = %w{ issim errim illim ior} # nd, nt, s, bindevokale
|
4
|
+
NOMINATIVE_ENDING = [:ending, [ /(?<=us|er|es|u|e|al|ar|is|or|os|o|(?<=[^aeio])s|x|as|ur|men)$/]] # no a, um anymore # because of comparison (?<!i)
|
5
|
+
OTHER_CASE_ENDING = [:ending, [ /(?<=ior|ius|nter|iter)$|ae$|am$|arum$|as$|is$|(?<!aeo)i$|o$|orum$|os$|(?<!aeiou)e$|ei$|erum$|ebus$|es$|em$|(?<!i)us$|u$|uum$|ua$|ibus$|im$|ia$|ium$|(?<=n)s$|(?<=nt)er$|iter$/]] # ubus
|
6
|
+
UM_ENDING = [:ending, [ /um$/, /ui$/ ]] # i erased - filium, u erased - suum
|
7
|
+
IUS_ENDING = [:ending, [ /(?<=i)us$/ ]] # 2013-10-08 solely for filius, Gaius...
|
8
|
+
A_ENDING = [:ending, [ /(?<=[^ao])a$/ ]] # removed u => sua
|
9
|
+
PRONOMINAL_ENDING = [:ending, [ /(?<=ali)u[sd]$/, /ius$/ ]] # alius aliud
|
10
|
+
COMPARISON = [:comparison_sign, [/ior$|ius$|issim$|lim$|rim$/]] # ior, ius... ne ending at all...
|
11
|
+
PPA_OR_GERUND = [:extension, [/n$|nt$|nd$/]]
|
12
|
+
THEMATIC_VOWEL = [:thematic, [/[ue]$/]]
|
13
|
+
THEMATIC_I_OF_M = [:thematic, [/i$/]]
|
14
|
+
FUTURE_PARTICIPLE = [:extension, [/ur$/]]
|
15
|
+
|
16
|
+
|
17
|
+
def create_declinables
|
18
|
+
setup(:declinable)
|
19
|
+
|
20
|
+
nominative
|
21
|
+
other_case
|
22
|
+
um_ending
|
23
|
+
ius_ending
|
24
|
+
a_ending
|
25
|
+
pronominal
|
26
|
+
contracted_vocative
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def nominative
|
32
|
+
if has NOMINATIVE_ENDING
|
33
|
+
look_for :noun, :nom
|
34
|
+
look_for :adjective, :nom
|
35
|
+
reset :ending # ending would be overwritten by prepend otherwise!
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def other_case
|
40
|
+
if has OTHER_CASE_ENDING
|
41
|
+
look_for :noun, :stem
|
42
|
+
look_for :adjective, :stem
|
43
|
+
look_for :verb, :ppp
|
44
|
+
comparison_or_verbal_extension
|
45
|
+
reset all
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def um_ending
|
50
|
+
if has UM_ENDING
|
51
|
+
look_for :noun, :stem
|
52
|
+
look_for :adjective, :stem
|
53
|
+
look_for :verb, :ppp
|
54
|
+
comparison_or_verbal_extension
|
55
|
+
reset all
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def ius_ending
|
60
|
+
# only filius is looked up here
|
61
|
+
if has IUS_ENDING
|
62
|
+
look_for :noun, :stem
|
63
|
+
reset all
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def a_ending
|
68
|
+
if has A_ENDING
|
69
|
+
look_for :noun, :stem
|
70
|
+
look_for :adjective, :stem
|
71
|
+
look_for :verb, :ppp
|
72
|
+
comparison_or_verbal_extension
|
73
|
+
reset all
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def pronominal
|
78
|
+
if has PRONOMINAL_ENDING
|
79
|
+
look_for :adjective, :stem
|
80
|
+
reset all
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
def contracted_vocative
|
86
|
+
if stem =~ /i$/
|
87
|
+
look_for :noun, :stem
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def comparison_or_verbal_extension
|
92
|
+
if has COMPARISON then look_for :adjective, :stem; end
|
93
|
+
if has PPA_OR_GERUND then look_for :verb, :pr
|
94
|
+
if has THEMATIC_VOWEL then look_for :same
|
95
|
+
if has THEMATIC_I_OF_M then look_for :same; end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
if has FUTURE_PARTICIPLE then look_for :verb, :ppp; end
|
99
|
+
end
|
100
|
+
|
101
|
+
def valid_itypes_for_declinable
|
102
|
+
case table
|
103
|
+
when :noun then valid_noun_classes
|
104
|
+
when :adjective then valid_adjective_classes
|
105
|
+
when :verb then valid_verb_classes
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def valid_noun_classes
|
110
|
+
if column == :nom
|
111
|
+
case stem # 3 is consonantic stem, 31 vocalic stem - group 1 and so forth
|
112
|
+
#when /(?<=a)$/ then itype << 1 # disabled in new morphologizer
|
113
|
+
#when /(?<=um)$/ then itype << 2 # disabled in new morphologizer
|
114
|
+
when /(?<=us)$/ then itype << 3 << 4 # 2 disabled in new morphologizer # [^i] for comparison. cf ior here and both in Adjective nom ### erased. filius. gaius
|
115
|
+
when /(?<=er)$/ then itype << 2 << 3
|
116
|
+
when /(?<=es)$/ then itype << 3 # 5 disabled in new morphologizer
|
117
|
+
when /(?<=u)$/ then itype << 4
|
118
|
+
when /(?<=ar)$/ then itype << 3 << 31 # added for Caesar, who is 3. could be done better, but performance won't count here.
|
119
|
+
when /(?<=e|al|ar)$/ then itype << 31
|
120
|
+
when /(?<=is)$/ then itype << 3 << 32 << 33
|
121
|
+
when /(?<=[^aeiou]s)$/ then itype << 3 << 33 # ns was excluded before. we don't know why.
|
122
|
+
when /(?<=x)$/ then itype << 3 << 33 # nox! 2013-10-07 20:51
|
123
|
+
when /(?<=[^i]or|os|o||as|ur|men)$/ then itype << 3
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
if column == :stem && ending.empty? && stem =~ /i$/ then itype << 2; end # fili vocative
|
128
|
+
|
129
|
+
if column == :stem && !ending.empty? # nouns that end like a comparison?!
|
130
|
+
case stem + ending # watch out: regexps musst be redefined... stem+ending doesn't work. check corporum.
|
131
|
+
when /[^aeou]a$/ then itype << 1 << 2 << 3 << 31 # a decl word whos stem ends with a vowel?
|
132
|
+
when /ae$|am$|arum$|as$/ then itype << 1
|
133
|
+
when /is$/ then itype << 1 << 2 << 3 << 31 << 32 << 33
|
134
|
+
when /ui$/ then itype << 2 << 4
|
135
|
+
when /[^aeou]i$/ then itype << 2 << 3 << 31 << 32 << 33
|
136
|
+
when /um$/ then itype << 2 << 3 << 4 << 31 << 32 << 33 # [^i] erased. filius
|
137
|
+
when /o$|orum$|os$/ then itype << 2
|
138
|
+
when /ei$|erum$|ebus$/ then itype << 5
|
139
|
+
when /[^aeou]e$/ then itype << 2 << 3 << 33 << 5 # i allowed, acie
|
140
|
+
when /es$/ then itype << 3 << 32 << 33 << 5
|
141
|
+
when /em$/ then itype << 3 << 33 << 5
|
142
|
+
when /ibus$/ then itype << 3 << 31 << 32 << 33 << 4
|
143
|
+
when /us$|u$|ua$/ then itype << 2 << 4 # adds 2 in new morphologizer - evaluated through stem now
|
144
|
+
when /im$/ then itype << 32
|
145
|
+
when /ia$/ then itype << 31 # ineffective here, searched together with a now
|
146
|
+
#when /ium$/ then itype << 31 << 32 << 33
|
147
|
+
end
|
148
|
+
|
149
|
+
itype << 5 if ending == "erum" # rerum is missed
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def valid_adjective_classes
|
154
|
+
if column == :nom
|
155
|
+
case stem
|
156
|
+
when "maior" then itype << 3
|
157
|
+
when /(?<=us|er|is|[^i]or)$/ then itype << 1 << 3 << 5
|
158
|
+
when /(?<=ar|s|x)$/ then itype << 3
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
if column == :stem && ! ending.empty?
|
163
|
+
# vacui - 2013-10-07 23:42 - well this is weird.
|
164
|
+
# Might account for vacui - but certainly not for exercitui,
|
165
|
+
# which will arrive here, even if it's not needed in any event.
|
166
|
+
# So do it only for vacu - and god knows what else...
|
167
|
+
stem << ending.slice!("u") if stem == "vacu" && ending == "ui"
|
168
|
+
|
169
|
+
case stem + ending
|
170
|
+
when /ius$/ then itype << 5
|
171
|
+
when /ter$/ then itype << 3 << 5 # 5? not sure. 2013-10-07 20:35
|
172
|
+
when /[a-z]$/ then itype << 1 << 3 << 5
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
if column == :stem && ! comparison_sign.empty? && ending.empty?
|
177
|
+
case stem
|
178
|
+
when /$/ then itype << 1 << 3
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def valid_verb_classes
|
184
|
+
if column == :ppp && !ending.empty? && (extension.empty? || extension == "ur")
|
185
|
+
case stem
|
186
|
+
when /(?<=t|s|x)$/ then itype << 1 << 2 << 3 << 4 << 5
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
if column == :pr || column == :ppp && !extension.empty?
|
191
|
+
unless extension == "n" && ending != "s"
|
192
|
+
case stem
|
193
|
+
when /a$/ then itype << 1
|
194
|
+
when /i$/ then itype << 4
|
195
|
+
when /e$/ then itype << 2
|
196
|
+
when /[^aie]$/
|
197
|
+
itype << 3 if thematic == "e"
|
198
|
+
itype << 5 if thematic == "ie" || ending == "re"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
if column == :pr && ending == "i" && extension.empty?
|
204
|
+
case stem
|
205
|
+
when /[^aie]$/ then itype << 3 << 5
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
if column == :pf
|
210
|
+
# perfect composition
|
211
|
+
itype << 1 << 2 << 3 << 4 << 5
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'llt/morphologizer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "llt-morphologizer"
|
8
|
+
spec.version = LLT::Morphologizer::VERSION
|
9
|
+
spec.authors = ["LFDM"]
|
10
|
+
spec.email = ["1986gh@gmail.com"]
|
11
|
+
spec.summary = %q{Morphological parsing of Latin forms}
|
12
|
+
spec.description = spec.summary
|
13
|
+
spec.homepage = "http://www.latin-language-toolkit.net"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_development_dependency "simplecov", "~> 0.7"
|
25
|
+
spec.add_development_dependency "llt-db_handler-stub"
|
26
|
+
|
27
|
+
spec.add_dependency "llt-constants"
|
28
|
+
spec.add_dependency "llt-core"
|
29
|
+
spec.add_dependency "llt-core_extensions"
|
30
|
+
spec.add_dependency "llt-db_handler"
|
31
|
+
spec.add_dependency "llt-form_builder"
|
32
|
+
spec.add_dependency "llt-helpers"
|
33
|
+
spec.add_dependency "llt-logger"
|
34
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Morphologizer::LookupStatement do
|
4
|
+
let(:ls) { LLT::Morphologizer::LookupStatement }
|
5
|
+
let(:rosam) { ls.new("ros", :noun, :stem, [1], { ending: "am" }) }
|
6
|
+
|
7
|
+
describe "#stem_type" do
|
8
|
+
it "returns the stem type" do
|
9
|
+
rosam.stem_type.should == :stem
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#type" do
|
14
|
+
it "returns the type" do
|
15
|
+
rosam.type.should == :noun
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "#to_query" do
|
20
|
+
it "builds a query in a hash format, that corresponds with the db handler interface" do
|
21
|
+
rosam.to_query.should == { type: :noun, stem_type: :stem, stem: "ros", restrictions: { type: :inflection_class, values: [1] } }
|
22
|
+
end
|
23
|
+
|
24
|
+
it "build a query for laudavit" do
|
25
|
+
ros = ls.new("laudav", :verb, :pf, ["v"], { ending: "it" })
|
26
|
+
ros.to_query.should == { type: :verb, stem_type: :pf, stem: "laudav", restrictions: { type: :pf_composition, values: ["v"] } }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Morphologizer::StemLookupStatementBuilder do
|
4
|
+
|
5
|
+
def slsb(word)
|
6
|
+
LLT::Morphologizer::StemLookupStatementBuilder.new(word, LLT::Logger.new)
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "#statements" do
|
10
|
+
it "creates no separate nominative lookup request for a, um, es and us endings - different from old implementation" do
|
11
|
+
slsb("rosa").statements.map(&:to_query).count { |h| h[:stem_type] == :nom }.should == 0
|
12
|
+
slsb("templum").statements.map(&:to_query).count { |h| h[:stem_type] == :nom }.should == 0
|
13
|
+
slsb("res").statements.map(&:to_query).count { |h| h[:stem_type] == :nom && h[:restrictions][:values].include?(5) }.should == 0
|
14
|
+
slsb("hortus").statements.map(&:to_query).count { |h| h[:stem_type] == :nom && h[:restrictions][:values].include?(2)}.should == 0
|
15
|
+
end
|
16
|
+
|
17
|
+
it "searches in persona, place and ethnic table when a word is capitalized" do
|
18
|
+
plato_queries = slsb("Plato").statements.map(&:to_query)
|
19
|
+
plato_queries.select { |h| h[:type] == :persona }.should_not be_empty
|
20
|
+
plato_queries.select { |h| h[:type] == :place }.should_not be_empty
|
21
|
+
plato_queries.select { |h| h[:type] == :ethnic }.should_not be_empty
|
22
|
+
end
|
23
|
+
|
24
|
+
it "only stems are searched in the ethnic table" do
|
25
|
+
queries = slsb("Haeduus").statements.map(&:to_query)
|
26
|
+
queries.none? { |h| h[:type] == :ethnic && h[:stem_type] == :nom }.should be_true
|
27
|
+
queries.any? { |h| h[:type] == :ethnic && h[:stem_type] == :stem }.should be_true
|
28
|
+
end
|
29
|
+
|
30
|
+
it "searches for capitalized words in downcase, expect for names, places and ethnics" do
|
31
|
+
plato_queries = slsb("Plato").statements.map(&:to_query)
|
32
|
+
plato_queries.any? { |h| h[:type] == :noun && h[:stem] =~ /^[a-z].*/ }.should be_true
|
33
|
+
plato_queries.none? { |h| h[:type] == :noun && h[:stem] =~ /^[A-Z].*/ }.should be_true
|
34
|
+
|
35
|
+
plato_queries.any? { |h| h[:type] == :persona && h[:stem] =~ /^[A-Z].*/ }.should be_true
|
36
|
+
plato_queries.none? { |h| h[:type] == :persona && h[:stem] =~ /^[a-z].*/ }.should be_true
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,524 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Morphologizer do
|
4
|
+
it 'should have a version number' do
|
5
|
+
LLT::Morphologizer::VERSION.should_not be_nil
|
6
|
+
end
|
7
|
+
|
8
|
+
let(:stub_db) { LLT::DbHandler::Stub.new }
|
9
|
+
let(:morphologizer) { LLT::Morphologizer.new(db: stub_db) }
|
10
|
+
|
11
|
+
def morph_stub(word)
|
12
|
+
m = LLT::Morphologizer.new(db: LLT::DbHandler::Stub.new)
|
13
|
+
m.send(:setup, word)
|
14
|
+
m
|
15
|
+
end
|
16
|
+
|
17
|
+
before(:all) { LLT::DbHandler::Stub.setup }
|
18
|
+
|
19
|
+
describe "#personal_pronons" do
|
20
|
+
# this tests some private methods just to be safe
|
21
|
+
context "morphologizes pronouns" do
|
22
|
+
it "with se" do
|
23
|
+
se = morph_stub("se")
|
24
|
+
se.send(:clook_up, :personal_pronouns).should have(4).items
|
25
|
+
se.send(:unique_pers_pron?).should be_true
|
26
|
+
end
|
27
|
+
|
28
|
+
it "with Se" do
|
29
|
+
se = morph_stub("Se")
|
30
|
+
se.send(:clook_up, :personal_pronouns).should have(4).items
|
31
|
+
se.send(:unique_pers_pron?).should be_true
|
32
|
+
end
|
33
|
+
|
34
|
+
it "with secum" do
|
35
|
+
secum = morph_stub("secum")
|
36
|
+
forms = secum.send(:clook_up, :personal_pronouns)
|
37
|
+
secum.send(:unique_pers_pron?).should be_true
|
38
|
+
forms.should have(2).items
|
39
|
+
forms.first.to_s(:segmentized).should == "se-cum"
|
40
|
+
end
|
41
|
+
|
42
|
+
it "with nosmet" do
|
43
|
+
nosmet = morph_stub("nosmet")
|
44
|
+
forms = nosmet.send(:clook_up, :personal_pronouns)
|
45
|
+
nosmet.send(:unique_pers_pron?).should be_true
|
46
|
+
forms.should have(2).items
|
47
|
+
forms.first.to_s(:segmentized).should == "nos-met"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "#other_pronouns" do
|
53
|
+
context "morphologizes pronouns" do
|
54
|
+
it "with hic" do
|
55
|
+
morph_stub("hic").send(:other_pronouns).should have(1).item
|
56
|
+
morph_stub("hunc").send(:other_pronouns).should have(1).item
|
57
|
+
morph_stub("huic").send(:other_pronouns).should have(3).item
|
58
|
+
end
|
59
|
+
|
60
|
+
it "with aliqui" do
|
61
|
+
morph_stub("alicuius").send(:other_pronouns).should have(3).items
|
62
|
+
end
|
63
|
+
|
64
|
+
it "with quicumque" do
|
65
|
+
morph_stub("quibuscumque").send(:other_pronouns).should have(6).items
|
66
|
+
end
|
67
|
+
|
68
|
+
it "with quilibet" do
|
69
|
+
morph_stub("quaelibet").send(:other_pronouns).should have(4).items
|
70
|
+
end
|
71
|
+
|
72
|
+
it "with quivis" do
|
73
|
+
morph_stub("quodvis").send(:other_pronouns).should have(2).items
|
74
|
+
end
|
75
|
+
|
76
|
+
it "with quidam" do
|
77
|
+
morph_stub("quibusdam").send(:other_pronouns).should have(6).items
|
78
|
+
end
|
79
|
+
|
80
|
+
it "with is" do
|
81
|
+
morph_stub("eas").send(:other_pronouns).should have(1).item
|
82
|
+
morph_stub("is").send(:other_pronouns).should have(7).item # sadly - eis...
|
83
|
+
morph_stub("ii").send(:other_pronouns).should have(1).item
|
84
|
+
end
|
85
|
+
|
86
|
+
it "with idem" do
|
87
|
+
morph_stub("eorundem").send(:other_pronouns).should have(2).items
|
88
|
+
morph_stub("eisdem").send(:other_pronouns).should have(6).items
|
89
|
+
morph_stub("iisdem").send(:other_pronouns).should have(6).items
|
90
|
+
end
|
91
|
+
|
92
|
+
it "with uter" do
|
93
|
+
morph_stub("utrum").send(:other_pronouns).should have(3).items
|
94
|
+
end
|
95
|
+
|
96
|
+
it "with uterque" do
|
97
|
+
morph_stub("utrumque").send(:other_pronouns).should have(3).items
|
98
|
+
morph_stub("utriusque").send(:other_pronouns).should have(3).items
|
99
|
+
end
|
100
|
+
|
101
|
+
it "with quisque" do
|
102
|
+
morph_stub("cuiusque").send(:other_pronouns).should have(3).items
|
103
|
+
end
|
104
|
+
|
105
|
+
it "with quisquam"do
|
106
|
+
morph_stub("quisquam").send(:other_pronouns).should have(2).items
|
107
|
+
end
|
108
|
+
|
109
|
+
it "with quisquam"do
|
110
|
+
morph_stub("quemquam").send(:other_pronouns).should have(2).items
|
111
|
+
end
|
112
|
+
|
113
|
+
it "with quispiam" do
|
114
|
+
morph_stub("quempiam").send(:other_pronouns).should have(2).items
|
115
|
+
end
|
116
|
+
|
117
|
+
it "with quispiam" do
|
118
|
+
morph_stub("quispiam").send(:other_pronouns).should have(2).items
|
119
|
+
end
|
120
|
+
|
121
|
+
it "with quibuscum" do
|
122
|
+
morph_stub("quibuscum").send(:other_pronouns).should have(3).items
|
123
|
+
end
|
124
|
+
|
125
|
+
it "with quonam" do
|
126
|
+
morph_stub("quonam").send(:other_pronouns).should have(2).items
|
127
|
+
end
|
128
|
+
|
129
|
+
# Might be solved through an exceptional form
|
130
|
+
#m = morph("i")
|
131
|
+
#m.pronouns.should have(1).item
|
132
|
+
end
|
133
|
+
|
134
|
+
it "returns when a unique pronoun like huius is found" do
|
135
|
+
morphologizer.should_not receive(:direct_lookup)
|
136
|
+
morphologizer.morphologize("huius")
|
137
|
+
end
|
138
|
+
|
139
|
+
it "continues when a homographic pronoun like his is found" do
|
140
|
+
morphologizer.should receive(:direct_lookup)
|
141
|
+
morphologizer.morphologize("hic")
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
describe "#prepositions" do
|
146
|
+
it "returns when a unique preposition like in is found" do
|
147
|
+
morphologizer.should_not receive(:direct_lookup)
|
148
|
+
morphologizer.morphologize("in")
|
149
|
+
end
|
150
|
+
|
151
|
+
it "goes on when a not uniq prep like cum is found - another entry should be present and then returned" do
|
152
|
+
morphologizer.should_not receive(:direct_lookup)
|
153
|
+
morphologizer.morphologize("cum").should have(2).items
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
describe "#numerals" do
|
158
|
+
it "returns when a roman numeral is found" do
|
159
|
+
morphologizer.should_not receive(:direct_lookup)
|
160
|
+
morphologizer.morphologize("MD").should have(1).item
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
describe "#look_up" do
|
165
|
+
context "with conjunctions" do
|
166
|
+
it "returns when a unique conjunction like et is found" do
|
167
|
+
morphologizer.should_not receive(:direct_lookup)
|
168
|
+
morphologizer.morphologize("et")
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
#context "with subjunctions" do
|
173
|
+
# it "returns when a unique conjunction like et is found" do
|
174
|
+
# end
|
175
|
+
#end
|
176
|
+
end
|
177
|
+
|
178
|
+
describe "#morphologize" do
|
179
|
+
LLT::DbHandler::Stub.setup
|
180
|
+
|
181
|
+
describe "returns morphologized forms" do
|
182
|
+
context "with nouns" do
|
183
|
+
it "ratio" do
|
184
|
+
f = morphologizer.morphologize("ratio")
|
185
|
+
f.should have(2).item
|
186
|
+
f1, f2 = f
|
187
|
+
f1.casus.should == 1
|
188
|
+
f2.casus.should == 5
|
189
|
+
end
|
190
|
+
|
191
|
+
it "homine" do
|
192
|
+
f = morphologizer.morphologize("homine")
|
193
|
+
f.should have(1).item
|
194
|
+
f.first.casus.should == 6
|
195
|
+
f.first.to_s(:segmentized).should == "homin-e"
|
196
|
+
end
|
197
|
+
|
198
|
+
it "nox" do
|
199
|
+
f = morphologizer.morphologize("nox")
|
200
|
+
f.should have(2).items
|
201
|
+
end
|
202
|
+
|
203
|
+
it "serve" do
|
204
|
+
f = morphologizer.morphologize("serve")
|
205
|
+
f.should have(1).item
|
206
|
+
end
|
207
|
+
|
208
|
+
it "fili - contracted vocative" do
|
209
|
+
f = morphologizer.morphologize("fili")
|
210
|
+
f.should have(1).item
|
211
|
+
end
|
212
|
+
|
213
|
+
it "filius - ius o declension" do
|
214
|
+
f = morphologizer.morphologize("filius")
|
215
|
+
f.should have(1).item
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
context "with verbs" do
|
220
|
+
it "miserunt" do
|
221
|
+
f = morphologizer.morphologize("miserunt")
|
222
|
+
f.should have(1).item
|
223
|
+
end
|
224
|
+
|
225
|
+
it "hortant" do
|
226
|
+
f = morphologizer.morphologize("hortant")
|
227
|
+
f.should have(0).items # no active forms
|
228
|
+
end
|
229
|
+
|
230
|
+
it "hortatur" do
|
231
|
+
f = morphologizer.morphologize("hortatur")
|
232
|
+
f.should have(1).item
|
233
|
+
end
|
234
|
+
|
235
|
+
context "and infinitives" do
|
236
|
+
# the active one all bring the stupid pass inf...
|
237
|
+
it "audire" do
|
238
|
+
f = morphologizer.morphologize("audire")
|
239
|
+
f.should have(2).items
|
240
|
+
end
|
241
|
+
|
242
|
+
it "audiri" do
|
243
|
+
f = morphologizer.morphologize("audiri")
|
244
|
+
f.should have(1).item
|
245
|
+
end
|
246
|
+
|
247
|
+
it "canare" do
|
248
|
+
f = morphologizer.morphologize("canare")
|
249
|
+
f.should have(2).items
|
250
|
+
end
|
251
|
+
|
252
|
+
it "canari" do
|
253
|
+
f = morphologizer.morphologize("canari")
|
254
|
+
f.should have(1).items
|
255
|
+
end
|
256
|
+
|
257
|
+
it "monere" do
|
258
|
+
f = morphologizer.morphologize("monere")
|
259
|
+
f.should have(2).items
|
260
|
+
end
|
261
|
+
|
262
|
+
it "hortari" do
|
263
|
+
f = morphologizer.morphologize("hortari")
|
264
|
+
f.should have(1).items
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
context "with plain adverbs" do
|
270
|
+
it "iam" do
|
271
|
+
f = morphologizer.morphologize("iam")
|
272
|
+
f.should have(1).item
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
context "with adverbs from adjectives" do
|
277
|
+
it "diligenter" do
|
278
|
+
f = morphologizer.morphologize("diligenter")
|
279
|
+
f.should have(1).item
|
280
|
+
end
|
281
|
+
|
282
|
+
it "laete" do
|
283
|
+
# the real world has a noun as well, will never be
|
284
|
+
# in the stub db I guess.
|
285
|
+
f = morphologizer.morphologize("laete")
|
286
|
+
f.should have(2).item # there's actually a vocative as well...
|
287
|
+
f.first.casus.should == 5
|
288
|
+
f.map(&:to_s).should == %w{ laete laete }
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
context "with adjectives" do
|
293
|
+
it "feri" do
|
294
|
+
f = morphologizer.morphologize("feri")
|
295
|
+
f.should have(4).items # all from ferus3
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
context "with cardinals" do
|
300
|
+
it "duo" do
|
301
|
+
f = morphologizer.morphologize("duo")
|
302
|
+
f.should have(4).items
|
303
|
+
end
|
304
|
+
|
305
|
+
it "sex" do
|
306
|
+
f = morphologizer.morphologize("sex")
|
307
|
+
f.should have(1).item
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
context "with ethnics" do
|
312
|
+
it "Haeduorum" do
|
313
|
+
f = morphologizer.morphologize("Haeduorum")
|
314
|
+
f.should have(2).items
|
315
|
+
f.first.to_s.should == "Haeduorum"
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
context "with pronouns" do
|
320
|
+
it "quis" do
|
321
|
+
f = morphologizer.morphologize("quis")
|
322
|
+
f.should have(2).items # m && f?
|
323
|
+
end
|
324
|
+
|
325
|
+
it "quid" do
|
326
|
+
f = morphologizer.morphologize("quid")
|
327
|
+
f.should have(2).items # nom and acc
|
328
|
+
end
|
329
|
+
|
330
|
+
it "aliquis" do
|
331
|
+
f = morphologizer.morphologize("aliquis")
|
332
|
+
f.should have(2).items
|
333
|
+
end
|
334
|
+
|
335
|
+
it "quidque" do
|
336
|
+
f = morphologizer.morphologize("quidque")
|
337
|
+
f.should have(2).items
|
338
|
+
end
|
339
|
+
|
340
|
+
it "quodque" do
|
341
|
+
f = morphologizer.morphologize("quodque")
|
342
|
+
f.should have(2).items
|
343
|
+
end
|
344
|
+
|
345
|
+
it "quisque" do
|
346
|
+
f = morphologizer.morphologize("quisque")
|
347
|
+
f.should have(3).items
|
348
|
+
end
|
349
|
+
|
350
|
+
it "quicquam" do
|
351
|
+
f = morphologizer.morphologize("quicquam")
|
352
|
+
f.should have(2).items
|
353
|
+
end
|
354
|
+
|
355
|
+
it "quisquis" do
|
356
|
+
f = morphologizer.morphologize("quisquis")
|
357
|
+
f.should have(2).items
|
358
|
+
end
|
359
|
+
|
360
|
+
it "quidquid" do
|
361
|
+
f = morphologizer.morphologize("quidquid")
|
362
|
+
f.should have(2).items
|
363
|
+
end
|
364
|
+
|
365
|
+
it "quoquo" do
|
366
|
+
f = morphologizer.morphologize("quoquo")
|
367
|
+
f.should have(3).item # m f n, it's substantivic!
|
368
|
+
end
|
369
|
+
|
370
|
+
it "quicquid" do
|
371
|
+
f = morphologizer.morphologize("quicquid")
|
372
|
+
f.should have(2).items
|
373
|
+
end
|
374
|
+
|
375
|
+
it "unusquisque" do
|
376
|
+
f = morphologizer.morphologize("unusquisque")
|
377
|
+
f.map(&:to_s).should == %w{ unusquisque } * 3
|
378
|
+
end
|
379
|
+
|
380
|
+
it "uniuscuiusque" do
|
381
|
+
f = morphologizer.morphologize("uniuscuiusque")
|
382
|
+
f.map(&:to_s).should == %w{ uniuscuiusque } * 3
|
383
|
+
end
|
384
|
+
|
385
|
+
end
|
386
|
+
|
387
|
+
context "with mixed forms" do
|
388
|
+
it "ita - adverb and ppp of ire" do
|
389
|
+
f = morphologizer.morphologize("ita")
|
390
|
+
f.should have(2).item
|
391
|
+
end
|
392
|
+
|
393
|
+
it "fero - ferre and ferus3" do
|
394
|
+
f = morphologizer.morphologize("fero")
|
395
|
+
f.should have(5).items # 1 from ferre, 4 from ferus3
|
396
|
+
end
|
397
|
+
|
398
|
+
it "subito - adverb and ppp of ire" do
|
399
|
+
f = morphologizer.morphologize("subito")
|
400
|
+
f.should have(5).items # 1 adv, 4 ppp
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
describe "handles irregular verbs" do
|
406
|
+
it "fiebat" do
|
407
|
+
f = morphologizer.morphologize("fiebat")
|
408
|
+
f.should have(1).item
|
409
|
+
end
|
410
|
+
|
411
|
+
it "fio" do
|
412
|
+
f = morphologizer.morphologize("fio")
|
413
|
+
f.should have(1).item
|
414
|
+
end
|
415
|
+
|
416
|
+
it "posse" do
|
417
|
+
f = morphologizer.morphologize("posse")
|
418
|
+
f.should have(1).item
|
419
|
+
end
|
420
|
+
|
421
|
+
it "ferri" do
|
422
|
+
f = morphologizer.morphologize("ferri")
|
423
|
+
f.should have(1).item
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
describe "handles prefixed irregular verbs" do
|
428
|
+
it "desum" do
|
429
|
+
f = morphologizer.morphologize("desum")
|
430
|
+
f.should have(1).item
|
431
|
+
f.first.to_s(:segmentized).should == "de-s-u-m"
|
432
|
+
end
|
433
|
+
|
434
|
+
it "maluit" do
|
435
|
+
f = morphologizer.morphologize("maluit")
|
436
|
+
f.should have(1).item
|
437
|
+
f.first.to_s(:segmentized).should == "malu-it"
|
438
|
+
f.first.tempus.should == :pf
|
439
|
+
end
|
440
|
+
|
441
|
+
it "mavult" do
|
442
|
+
f = morphologizer.morphologize("mavult")
|
443
|
+
f.should have(1).item
|
444
|
+
f.first.to_s(:segmentized).should == "mavul-t"
|
445
|
+
end
|
446
|
+
|
447
|
+
it "it" do
|
448
|
+
f = morphologizer.morphologize("it")
|
449
|
+
f.should have(1).item
|
450
|
+
f.first.to_s(:segmentized).should == "i-t"
|
451
|
+
end
|
452
|
+
|
453
|
+
it "vult" do
|
454
|
+
f = morphologizer.morphologize("vult")
|
455
|
+
f.should have(1).item
|
456
|
+
f.first.to_s(:segmentized).should == "vul-t"
|
457
|
+
end
|
458
|
+
|
459
|
+
it "nolumus" do
|
460
|
+
f = morphologizer.morphologize("nolumus")
|
461
|
+
f.should have(1).item
|
462
|
+
f.first.to_s(:segmentized).should == "nol-u-mus"
|
463
|
+
end
|
464
|
+
|
465
|
+
it "contulissent" do
|
466
|
+
f = morphologizer.morphologize("contulissent")
|
467
|
+
f.should have(1).item
|
468
|
+
f.first.to_s(:segmentized).should == "con-tul-isse-nt"
|
469
|
+
end
|
470
|
+
|
471
|
+
it "intulisset" do
|
472
|
+
f = morphologizer.morphologize("intulisset")
|
473
|
+
f.should have(1).item
|
474
|
+
f.first.to_s(:segmentized).should == "in-tul-isse-t"
|
475
|
+
end
|
476
|
+
|
477
|
+
it "inito" do
|
478
|
+
f = morphologizer.morphologize("inito")
|
479
|
+
f.should have(4).item
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
describe "takes an optional keyword argument add_to" do
|
484
|
+
let(:token_dummy) do
|
485
|
+
Class.new do
|
486
|
+
attr_reader :forms
|
487
|
+
def initialize; @forms = []; end
|
488
|
+
def <<(forms); @forms += forms; end
|
489
|
+
end.new
|
490
|
+
end
|
491
|
+
|
492
|
+
it "adds the result to the given object if is #<< implemented" do
|
493
|
+
forms = morphologizer.morphologize("est", add_to: token_dummy)
|
494
|
+
token_dummy.forms.should == forms
|
495
|
+
end
|
496
|
+
|
497
|
+
it "does nothing to the given object when #<< it does not respond to" do
|
498
|
+
token = double(respond_to?: false)
|
499
|
+
token.should_not receive(:<<)
|
500
|
+
morphologizer.morphologize("est", add_to: token)
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
it "writes stem pack objects to morphologized forms" do
|
505
|
+
forms = morphologizer.morphologize('homo')
|
506
|
+
homo = forms.first
|
507
|
+
homo.stems.should_not be_nil
|
508
|
+
|
509
|
+
forms = morphologizer.morphologize('est')
|
510
|
+
est = forms.first
|
511
|
+
est.stems.should_not be_nil
|
512
|
+
end
|
513
|
+
|
514
|
+
it "one instance handles multiple requests" do
|
515
|
+
tokens = %w{ homo ratio }
|
516
|
+
forms = tokens.map { |t| morphologizer.morphologize(t) }
|
517
|
+
forms.should have(2).items
|
518
|
+
h = forms[0]
|
519
|
+
r = forms[1]
|
520
|
+
(h.any? && h.all? { |f| f.to_s == "homo"}) .should be_true
|
521
|
+
(r.any? && r.all? { |f| f.to_s == "ratio"}).should be_true
|
522
|
+
end
|
523
|
+
end
|
524
|
+
end
|