llt-morphologizer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,66 @@
1
+ module LLT
2
+ class Morphologizer
3
+ class LookupStatement
4
+ attr_reader :components
5
+ alias :options :components
6
+
7
+ def initialize(*args)
8
+ @stem, @table, @column, @itypes, @components = args
9
+ safety_clones
10
+ end
11
+
12
+ def stem_type
13
+ @column
14
+ end
15
+
16
+ def type
17
+ @table
18
+ end
19
+
20
+ def to_query
21
+ {
22
+ type: @table,
23
+ stem: @stem,
24
+ stem_type: @column,
25
+ restrictions: build_restrictions
26
+ }
27
+ end
28
+
29
+ def to_s
30
+ "Looking up #{@stem.light_green} as #{@table}, #{@column} #{"with #{components_to_s}" if @components.any? } (classes: #{@itypes * ", "})"
31
+ end
32
+
33
+ private
34
+
35
+ # The methods that help in the creation of such instances are
36
+ # prepending and appending strings - especially the thematic.
37
+ # Just to be safe, clones this value.
38
+ def safety_clones
39
+ if thematic = @components[:thematic]
40
+ @components[:thematic] = thematic.clone
41
+ end
42
+ end
43
+
44
+ def components_to_s
45
+ @components.map do |k, v|
46
+ val = (v.empty? ? '""' : v)
47
+ "#{k} #{val.to_s.cyan}"
48
+ end.compact * ", "
49
+ end
50
+
51
+
52
+ def build_restrictions
53
+ kw = if @itypes.all? { |x| x.kind_of? Fixnum }
54
+ :inflection_class
55
+ else
56
+ :pf_composition
57
+ end
58
+
59
+ {
60
+ type: kw,
61
+ values: @itypes
62
+ }
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,130 @@
1
+ module LLT
2
+ class Morphologizer
3
+ class StemLookupStatementBuilder
4
+
5
+ require 'llt/morphologizer/stem_lookup_statement_builder/contracted_forms'
6
+ require 'llt/morphologizer/stem_lookup_statement_builder/declinable'
7
+ require 'llt/morphologizer/stem_lookup_statement_builder/conjugable'
8
+ require 'llt/morphologizer/lookup_statement'
9
+
10
+ include Declinable
11
+ include Conjugable
12
+
13
+ def initialize(word, log)
14
+ @word = word.clone # clone! because this will get sliced and reset continuously in this class
15
+ @log = log
16
+
17
+ @components = Hash.new { |h, k| h[k] = "" }
18
+ @lookup = {}
19
+ end
20
+
21
+ def stem
22
+ # a semantic help
23
+ @word
24
+ end
25
+
26
+ GETTER_METHODS = { components: %w{ thematic extension comparison_sign ending contraction },
27
+ lookup: %w{ table column itype } }
28
+
29
+ GETTER_METHODS.each do |inst_var, methods|
30
+ methods.each do |method|
31
+ class_eval <<-STR
32
+ def #{method}
33
+ @#{inst_var}[:#{method}]
34
+ end
35
+ STR
36
+ end
37
+ end
38
+
39
+ def statements
40
+ @statements = []
41
+ create_declinables
42
+ create_conjugables
43
+
44
+ @statements
45
+ end
46
+
47
+ def setup(operator)
48
+ @components.clear
49
+ @lookup = { table: "", column: "", itype: [] }
50
+ @operator = operator
51
+ end
52
+
53
+ def reset(*args)
54
+ args.flatten.each do |comp|
55
+ @word << @components.delete(comp).to_s
56
+ end
57
+ end
58
+
59
+ def all
60
+ @all_memo ||= %i{ thematic extension comparison_sign ending }
61
+ end
62
+
63
+ def has(arr)
64
+ type, components = arr
65
+ if result = scan(components, type)
66
+ slice_and_stash(type, result)
67
+ end
68
+ end
69
+
70
+ def scan(components, type)
71
+ # look what that's doing, it's a bit weird
72
+ components.flat_map {|x| @word.scan(x) }.first # that's brutally ugly
73
+ end
74
+
75
+ def slice_and_stash(type, result)
76
+ @components[type].prepend(@word.slice!(/#{result}$/))
77
+ end
78
+
79
+ def look_for the_table = :same, the_column = :same
80
+ unless the_table == :same
81
+ @lookup[:table] = the_table
82
+ @lookup[:column] = the_column
83
+ end
84
+
85
+ send("valid_itypes_for_#{@operator}")
86
+ add_statement!
87
+ add_additional_persona_place_or_ethnic_statement!
88
+ itype.clear
89
+ end
90
+
91
+ def add_statement!
92
+ if itype.empty?
93
+ @log.warning("#{stem} with #{@components[:ending]} has no searchable infl classes.")
94
+ else
95
+ # 2013-09-27 19:23 @components.clone substituted with rejection of empty strings - observe if this leads to trouble.
96
+ st = LookupStatement.new(cloned_stem, table, column, itype.clone, unemptied_components)
97
+ @statements << st
98
+ end
99
+ end
100
+
101
+ def unemptied_components
102
+ # leave ending always in - otherwise some words trigger build all forms (cf ita)
103
+ @components.reject { |k, v| v.empty? unless k == :ending }
104
+ end
105
+
106
+
107
+ def cloned_stem
108
+ s = stem.clone
109
+ s.downcase! unless persona_place_or_ethnic?
110
+ s
111
+ end
112
+
113
+ def persona_place_or_ethnic?
114
+ table == :persona || table == :place || table == :ethnic
115
+ end
116
+
117
+ def add_additional_persona_place_or_ethnic_statement!
118
+ if stem.match(/^[A-Z].*/)
119
+ case table
120
+ when :noun
121
+ @lookup[:table] = :persona and add_statement!
122
+ @lookup[:table] = :place and add_statement!
123
+ when :adjective
124
+ @lookup[:table] = :ethnic and add_statement! if column == :stem
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,221 @@
1
+ module LLT::Morphologizer::StemLookupStatementBuilder::Conjugable
2
+ include LLT::Morphologizer::StemLookupStatementBuilder::ContractedForms
3
+
4
+ class << self
5
+ def sg_1_active
6
+ [/(?<!tud|[^nu][st]i|ment|[bc]ul|[ao]ri|\Apr)o$/, /(?<![^s]u)m$/]
7
+ end
8
+
9
+ def sg_2_active
10
+ /(?<=[aer]|[^tr]i|[^aeirsl]ti|[^ai]ri|[^t][sft]eri|quiri|quaeri|\A[a-z]peri|[^a-z]geri|[^a-z]pari|[^a-z]meti)s$/
11
+ end
12
+
13
+ def sg_3_active
14
+ /(?<=\S[ae]|[is])t$/
15
+ end
16
+
17
+ def pl_1_active
18
+ /(?<!illi|erri|ssi|[^aeiu])mus$/
19
+ end
20
+
21
+ def pl_2_active
22
+ /(?<=[aei])tis$/
23
+ end
24
+
25
+ def pl_3_active
26
+ /(?<=[aeiu])nt$/
27
+ end
28
+
29
+ def sg_1_passive
30
+ [/(?<!u)or$/, /(?<!u)r$/]
31
+ end
32
+
33
+ def sg_2_passive
34
+ /(?<=[^p]a|[^afgtpsx]e|[^(qu)]i|[cr][uia]pe|[a-z][tg]e)ris$/
35
+ end
36
+
37
+ def sg_3_passive
38
+ /(?<=[aei])tur$/
39
+ end
40
+
41
+ def pl_1_passive
42
+ /mur$/
43
+ end
44
+
45
+ def pl_2_passive
46
+ /(?<=[aei])mini$/
47
+ end
48
+
49
+ def pl_3_passive
50
+ /(?<=[aeiu])ntur$/
51
+ end
52
+ end
53
+
54
+ PRIMARY_ENDING = [:ending, [ *sg_1_active, sg_2_active, sg_3_active, pl_1_active, pl_2_active, pl_3_active,
55
+ *sg_1_passive, sg_3_passive, pl_1_passive, pl_2_passive, pl_3_passive]]
56
+ PRIMARY_ENDING_SG_2_PASSIVE = [:ending, [sg_2_passive]]
57
+
58
+ SECONDARY_ENDING = [:ending, [ /isti$/, /(?<=[^rnt])i$/, /it$/, /imus$/, /istis$/, /erunt$/, /ere$/ ]]
59
+ IMPERATIVE_ENDING = [:ending, [ /(?<=[aei])te$/, /tote$/, /(?<=[^ieu]a$|e$|[^min][^uv]i$)/, /(?<=[^n])to$/, /nto$/,
60
+ /(?<=[^n])tor$/, /ntor$/]]
61
+ DEP_IMP_ENDING = [:ending, [ /(?<=[aei])re$/ ]]
62
+
63
+ PERFECT_EXTENSIONS = [:extension, [/er$|er[ai]$|isse$/]]
64
+ IMPERFECT_BA = [:extension, [/ba$/]]
65
+ FUTURE_B = [:extension, [/[b]$/]]
66
+ FUTURE_OR_SUBJUNCTIVE_A_OR_E = [:extension, [/[ae]$/]]
67
+ SUBJUNCTIVE_IMPERFECT = [:extension, [/re$/]]
68
+
69
+ THEMATIC_VOWEL = [:thematic, [/[eiu]$/]]
70
+ THEMATIC_I_OF_M = [:thematic, [/[i]$/]]
71
+ THEMATIC_E_OF_SUBJUNCTIVE_IMPERFECT = [:thematic, [/e$/]]
72
+
73
+ # (?<=[aei])re not needed here as inf pr, - the dep_imp_ending finds it
74
+ # anyway, the FormBuilder cares for the rest.
75
+ INFINITIVE_PR = [:ending, [/(?<=[aei])ri$|(?<=[^aeior])i$|r?ier$/]]
76
+ INFINITIVE_PF = [:ending, [/isse$/]]
77
+
78
+ def create_conjugables
79
+ setup(:conjugable)
80
+ search_for_contracted_form(:conjugable_search)
81
+
82
+ setup(:conjugable)
83
+ conjugable_search
84
+ end
85
+
86
+
87
+ def conjugable_search
88
+ secondary_ending
89
+ primary_ending(PRIMARY_ENDING)
90
+ primary_ending(PRIMARY_ENDING_SG_2_PASSIVE)
91
+ imperative
92
+ infinitive
93
+ end
94
+
95
+ private
96
+
97
+ def secondary_ending
98
+ if has SECONDARY_ENDING
99
+ look_for :verb, :pf
100
+ reset all
101
+ end
102
+ end
103
+
104
+ def primary_ending(const)
105
+ if has const then look_for :verb, :pr
106
+
107
+ if has IMPERFECT_BA then look_for :verb, :pr
108
+ if has THEMATIC_VOWEL then look_for :same; end
109
+ if has THEMATIC_I_OF_M then look_for :same; end
110
+ reset :thematic, :extension
111
+ end
112
+
113
+ if has THEMATIC_VOWEL then look_for :same
114
+ if has THEMATIC_I_OF_M then look_for :same; end
115
+ if has FUTURE_B
116
+ look_for :same
117
+ reset :extension, :thematic
118
+ else reset :thematic, :extension
119
+ end
120
+ end
121
+
122
+ if has FUTURE_B then look_for :same
123
+ reset :thematic, :extension
124
+ end
125
+
126
+ if has FUTURE_OR_SUBJUNCTIVE_A_OR_E then look_for :same
127
+ subjunctive_present_of_A_conjugation
128
+ if has THEMATIC_I_OF_M then look_for :same; end
129
+ reset :thematic, :extension
130
+ end
131
+
132
+ if has SUBJUNCTIVE_IMPERFECT then look_for :same
133
+ if has THEMATIC_E_OF_SUBJUNCTIVE_IMPERFECT then look_for :same; end
134
+ end
135
+
136
+ first_person_present_of_A_conjugation
137
+
138
+ reset :thematic, :extension
139
+
140
+ if has PERFECT_EXTENSIONS
141
+ look_for :verb, :pf
142
+ end
143
+
144
+ reset all
145
+ end
146
+ end
147
+
148
+ def imperative
149
+ unless short_imperative
150
+ if has IMPERATIVE_ENDING then look_for :verb, :pr
151
+ if has THEMATIC_VOWEL then look_for :same
152
+ if has THEMATIC_I_OF_M then look_for :same; end
153
+ end
154
+ reset all
155
+ end
156
+
157
+ if has DEP_IMP_ENDING then look_for :verb, :pr
158
+ if has THEMATIC_VOWEL then look_for :same
159
+ if has THEMATIC_I_OF_M then look_for :same; end
160
+ end
161
+ reset all
162
+ end
163
+ end
164
+ end
165
+
166
+ def infinitive
167
+ if has INFINITIVE_PR
168
+ look_for :verb, :pr
169
+ if has THEMATIC_VOWEL
170
+ look_for :same
171
+ end
172
+ reset all
173
+ end
174
+
175
+ if has INFINITIVE_PF
176
+ look_for :verb, :pf
177
+ reset all
178
+ end
179
+ end
180
+
181
+ def subjunctive_present_of_A_conjugation
182
+ append_a_search_and_chop_it if extension == "e" && stem !~ /i$/
183
+ end
184
+
185
+ def first_person_present_of_A_conjugation
186
+ append_a_search_and_chop_it if ending =~ /(o|or)$/ && extension.empty? # laudavero
187
+ end
188
+
189
+ def append_a_search_and_chop_it
190
+ stem << "a"
191
+ look_for :same
192
+ stem.chop!
193
+ end
194
+
195
+ def short_imperative
196
+ if stem =~ /dic$|duc$|fac$|fer$/
197
+ look_for :verb, :pr # had return true before, but look_for should return true anyway
198
+ end
199
+ end
200
+
201
+ def valid_itypes_for_conjugable
202
+ if column == :pr
203
+ itype << 1 if stem =~ /a$/
204
+ itype << 2 if stem =~ /e$/
205
+ itype << 3 if stem =~ /[^aeio]$/ && thematic != "iu"
206
+ itype << 4 if stem =~ /i$/
207
+ itype << 5 if stem =~ /[^aeio]$/ && thematic != "u"
208
+ end
209
+
210
+ if column == :pf
211
+ itype << "v" if stem =~ /v$/
212
+ itype << "u" if stem =~ /u$/
213
+ itype << "s" if stem =~ /s$|x$/
214
+ itype << "else" if stem !~ /v$|u$|s$|x$/
215
+ itype << "ablaut" if stem !~ /v$|u$|s$|x$/
216
+ # regexps needed
217
+ itype << "reduplication" if stem
218
+ end
219
+ end
220
+ end
221
+
@@ -0,0 +1,38 @@
1
+ module LLT::Morphologizer::StemLookupStatementBuilder::ContractedForms
2
+
3
+ CONTRACTED_FORMS = {
4
+ "v" => /(?<=[^v]i)(er[aiu]nt|er[ia][mst]|er[ia]mus|er[ia]tis|ero)$/,
5
+ "vi" => /(?<=[^v][aeio])(stis?|sse[mst]|ssemus|ssetis|ssent|sse)$/,
6
+ "ve" => /(?<=[^v][aeo])(r[aiu]nt|r[ia][mst]|r[ia]mus|r[ia]tis|ro)$/
7
+ }
8
+
9
+
10
+ def search_for_contracted_form(method)
11
+ CONTRACTED_FORMS.each do |missing_piece, regexp|
12
+ index = @word =~ regexp
13
+ unless index.nil?
14
+ @word.insert(index, missing_piece)
15
+ @components[:contraction] = Contraction.new(index, missing_piece)
16
+
17
+ send(method)
18
+ @word.slice!(index, 2)
19
+ end
20
+ end
21
+ end
22
+
23
+ class Contraction
24
+ def initialize(position, contraction)
25
+ @position = position
26
+ @contraction = contraction
27
+ end
28
+
29
+ def empty?
30
+ # duck type, fulfilling the contract of the other component strings
31
+ false
32
+ end
33
+
34
+ def to_s
35
+ "#{@contraction} contracted at #{@position}"
36
+ end
37
+ end
38
+ end