odin 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/.rvmrc +1 -0
- data/.travis.yml +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +26 -0
- data/HISTORY.md +102 -0
- data/LICENSE.md +10 -0
- data/README.md +46 -0
- data/Rakefile +69 -0
- data/app/controllers/grammar_checker.rb +51 -0
- data/check_grammar.rb +24 -0
- data/configure +9 -0
- data/images/atn_diagram.graffle +0 -0
- data/images/atn_diagram.pdf +0 -0
- data/images/odin-ff6.gif +0 -0
- data/lang/en/adjectives.rb +388 -0
- data/lang/en/atn.rb +102 -0
- data/lang/en/closed_class_words.rb +206 -0
- data/lang/en/data.rb +1086 -0
- data/lang/en/noun_inflections.rb +76 -0
- data/lang/en/noun_inflector_test_cases.rb +235 -0
- data/lang/en/pronoun_inflector_test_cases.rb +14 -0
- data/lang/en/verbs.rb +648 -0
- data/lang/iso639.rb +405 -0
- data/lib/array.rb +15 -0
- data/lib/atn.rb +82 -0
- data/lib/augmented_transition_network.rb +146 -0
- data/lib/dumper.rb +44 -0
- data/lib/noun_inflector.rb +283 -0
- data/lib/odin.rb +3 -0
- data/lib/odin/version.rb +3 -0
- data/lib/parts_of_speech.rb +402 -0
- data/lib/star.rb +23 -0
- data/lib/string.rb +99 -0
- data/lib/string_bracketing.rb +100 -0
- data/lib/word.rb +69 -0
- data/lib/word_net.rb +265 -0
- data/odin.gemspec +27 -0
- data/simple_atn/README.md +45 -0
- data/simple_atn/Rakefile +9 -0
- data/simple_atn/array.rb +15 -0
- data/simple_atn/augmented_transition_network.rb +146 -0
- data/simple_atn/augmented_transition_network_test.rb +113 -0
- data/simple_atn/english.rb +161 -0
- data/simple_atn/string.rb +63 -0
- data/test/fixtures/alice.txt +3594 -0
- data/test/fixtures/art.txt +7 -0
- data/test/fixtures/both.txt +1 -0
- data/test/fixtures/existing.txt +0 -0
- data/test/fixtures/existing.txt.checked.html +0 -0
- data/test/fixtures/grammar_checker.css +4 -0
- data/test/fixtures/grammatical.txt +1 -0
- data/test/fixtures/ungrammatical.txt +1 -0
- data/test/functional/grammar_checker_test.rb +64 -0
- data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
- data/test/test_helper.rb +82 -0
- data/test/unit/atn_test.rb +240 -0
- data/test/unit/noun_inflector_test.rb +249 -0
- data/test/unit/pronoun_inflector_test.rb +17 -0
- data/test/unit/star_test.rb +24 -0
- data/test/unit/string_bracketing_test_module.rb +70 -0
- data/test/unit/string_test.rb +92 -0
- data/test/unit/word_test.rb +15 -0
- metadata +223 -0
data/lang/en/atn.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
module English
|
2
|
+
# TODO before_function :next_word
|
3
|
+
|
4
|
+
def sentence
|
5
|
+
subject = required_phrase do
|
6
|
+
noun_phrase
|
7
|
+
end
|
8
|
+
|
9
|
+
verb = required_phrase do
|
10
|
+
verb_phrase
|
11
|
+
end
|
12
|
+
|
13
|
+
return tag_phrase("S", subject + verb)
|
14
|
+
end
|
15
|
+
|
16
|
+
alias :root :sentence
|
17
|
+
|
18
|
+
def noun_phrase
|
19
|
+
next_word
|
20
|
+
|
21
|
+
# TODO remove these
|
22
|
+
determiner = ""
|
23
|
+
adjective = ""
|
24
|
+
preposition = ""
|
25
|
+
|
26
|
+
if Pronoun == @star.part_of_speech
|
27
|
+
noun = tag("N", @star)
|
28
|
+
else
|
29
|
+
determiner = optional Determiner do |word|
|
30
|
+
next_word
|
31
|
+
tag("D", word)
|
32
|
+
end
|
33
|
+
|
34
|
+
adjective = optional Adjective do
|
35
|
+
# TODO convert to optional_phrase
|
36
|
+
adjective_phrase
|
37
|
+
end
|
38
|
+
|
39
|
+
noun = required Noun do |word|
|
40
|
+
tag("N", word)
|
41
|
+
end
|
42
|
+
|
43
|
+
preposition = optional_phrase do
|
44
|
+
prepositional_phrase
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
return tag_phrase("NP", determiner + adjective + noun + preposition)
|
49
|
+
end
|
50
|
+
|
51
|
+
def adjective_phrase
|
52
|
+
adjective = ""
|
53
|
+
while Adjective == @star.part_of_speech
|
54
|
+
adjective += tag("A", @star)
|
55
|
+
next_word
|
56
|
+
end
|
57
|
+
# tag_phrase ?
|
58
|
+
return adjective
|
59
|
+
end
|
60
|
+
|
61
|
+
def prepositional_phrase
|
62
|
+
# next_word
|
63
|
+
#
|
64
|
+
# preposition = ""
|
65
|
+
# object = ""
|
66
|
+
#
|
67
|
+
# required Preposition do |word|
|
68
|
+
# preposition = tag("P", word)
|
69
|
+
# end
|
70
|
+
#
|
71
|
+
# required_phrase do
|
72
|
+
# object = noun_phrase
|
73
|
+
# end
|
74
|
+
#
|
75
|
+
# return tag_phrase("PP", preposition + object)
|
76
|
+
#
|
77
|
+
|
78
|
+
if Preposition == Word.new(@words[0]).part_of_speech
|
79
|
+
next_word
|
80
|
+
preposition = tag("P", @star)
|
81
|
+
output = tag_phrase("PP", preposition + noun_phrase)
|
82
|
+
else
|
83
|
+
output = ""
|
84
|
+
end
|
85
|
+
|
86
|
+
return output
|
87
|
+
end
|
88
|
+
|
89
|
+
def verb_phrase
|
90
|
+
next_word
|
91
|
+
|
92
|
+
verb = required Verb do |word|
|
93
|
+
tag("V", word)
|
94
|
+
end
|
95
|
+
|
96
|
+
object = optional_phrase do
|
97
|
+
noun_phrase
|
98
|
+
end
|
99
|
+
|
100
|
+
return tag_phrase("VP", verb + object)
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
module ClosedClassWords
|
2
|
+
# Some words from the Ruby Linguistics Project, release 1.0.5
|
3
|
+
@@Determiners = ['a', 'an', 'the']
|
4
|
+
@@PossesiveAdjectives = %w{my our your his her its their}
|
5
|
+
|
6
|
+
@@SingularPronouns = ['i', 'you', 'he', 'she', 'it']
|
7
|
+
@@PluralPronouns = ['we', 'you', 'they']
|
8
|
+
@@Pronouns = (@@SingularPronouns + @@PluralPronouns).uniq
|
9
|
+
|
10
|
+
#['i', 'we', 'you', 'y\'all', 'he', 'she', 'it', 'they'] # TODO objective forms
|
11
|
+
|
12
|
+
@@SingularToPluralPronouns = {
|
13
|
+
"i" => "we",
|
14
|
+
"you" => "you",
|
15
|
+
"he" => "they",
|
16
|
+
"she" => "they",
|
17
|
+
"it" => "they"
|
18
|
+
}
|
19
|
+
|
20
|
+
@@Conjunctions = ['and', 'but', 'or', 'as']
|
21
|
+
|
22
|
+
# From http://www2.gsu.edu/~wwwesl/egw/verbs.htm
|
23
|
+
@@IrregularVerbs = %w{
|
24
|
+
awake awoke awoken
|
25
|
+
be was were been
|
26
|
+
bear bore born
|
27
|
+
beat beat beat
|
28
|
+
become became become
|
29
|
+
begin began begun
|
30
|
+
bend bent bent
|
31
|
+
beset beset beset
|
32
|
+
bet bet bet
|
33
|
+
bid bid bade bid bidden
|
34
|
+
bind bound bound
|
35
|
+
bite bit bitten
|
36
|
+
bleed bled bled
|
37
|
+
blow blew blown
|
38
|
+
break broke broken
|
39
|
+
breed bred bred
|
40
|
+
bring brought brought
|
41
|
+
broadcast broadcast broadcast
|
42
|
+
build built built
|
43
|
+
burn burned burnt burned burnt
|
44
|
+
burst burst burst
|
45
|
+
buy bought bought
|
46
|
+
cast cast cast
|
47
|
+
catch caught caught
|
48
|
+
choose chose chosen
|
49
|
+
cling clung clung
|
50
|
+
come came come
|
51
|
+
cost cost cost
|
52
|
+
creep crept crept
|
53
|
+
cut cut cut
|
54
|
+
deal dealt dealt
|
55
|
+
dig dug dug
|
56
|
+
dive dived dove dived
|
57
|
+
do did done
|
58
|
+
draw drew drawn
|
59
|
+
dream dreamed dreamt dreamed dreamt
|
60
|
+
drive drove driven
|
61
|
+
drink drank drunk
|
62
|
+
eat ate eaten
|
63
|
+
fall fell fallen
|
64
|
+
feed fed fed
|
65
|
+
feel felt felt
|
66
|
+
fight fought fought
|
67
|
+
find found found
|
68
|
+
fit fit fit
|
69
|
+
flee fled fled
|
70
|
+
fling flung flung
|
71
|
+
fly flew flown
|
72
|
+
forbid forbade forbidden
|
73
|
+
forget forgot forgotten
|
74
|
+
forego (forgo) forewent foregone
|
75
|
+
forgive forgave forgiven
|
76
|
+
forsake forsook forsaken
|
77
|
+
freeze froze frozen
|
78
|
+
get got gotten
|
79
|
+
give gave given
|
80
|
+
go went gone
|
81
|
+
grind ground ground
|
82
|
+
grow grew grown
|
83
|
+
have had had
|
84
|
+
hang hung hung
|
85
|
+
hear heard heard
|
86
|
+
hide hid hidden
|
87
|
+
hit hit hit
|
88
|
+
hold held held
|
89
|
+
hurt hurt hurt
|
90
|
+
keep kept kept
|
91
|
+
kneel knelt knelt
|
92
|
+
knit knit knit
|
93
|
+
know knew know
|
94
|
+
lay laid laid
|
95
|
+
lead led led
|
96
|
+
leap leaped leapt leaped leapt
|
97
|
+
learn learned learnt learned learnt
|
98
|
+
leave left left
|
99
|
+
lend lent lent
|
100
|
+
let let let
|
101
|
+
lie lay lain
|
102
|
+
light lighted lit lighted
|
103
|
+
lose lost lost
|
104
|
+
make made made
|
105
|
+
mean meant meant
|
106
|
+
meet met met
|
107
|
+
misspell misspelled misspelt misspelled misspelt
|
108
|
+
mistake mistook mistaken
|
109
|
+
mow mowed mowed mown
|
110
|
+
overcome overcame overcome
|
111
|
+
overdo overdid overdone
|
112
|
+
overtake overtook overtaken
|
113
|
+
overthrow overthrew overthrown
|
114
|
+
pay paid paid
|
115
|
+
plead pled pled
|
116
|
+
prove proved proved proven
|
117
|
+
put put put
|
118
|
+
quit quit quit
|
119
|
+
read read read
|
120
|
+
rid rid rid
|
121
|
+
ride rode ridden
|
122
|
+
ring rang rung
|
123
|
+
rise rose risen
|
124
|
+
run ran run
|
125
|
+
saw sawed sawed sawn
|
126
|
+
say said said
|
127
|
+
see saw seen
|
128
|
+
seek sought sought
|
129
|
+
sell sold sold
|
130
|
+
send sent sent
|
131
|
+
set set set
|
132
|
+
sew sewed sewed sewn
|
133
|
+
shake shook shaken
|
134
|
+
shave shaved shaved shaven
|
135
|
+
shear shore shorn
|
136
|
+
shed shed shed
|
137
|
+
shine shone shone
|
138
|
+
shoot shot shot
|
139
|
+
show showed showed shown
|
140
|
+
shrink shrank shrunk
|
141
|
+
shut shut shut
|
142
|
+
sing sang sung
|
143
|
+
sink sank sunk
|
144
|
+
sit sat sat
|
145
|
+
sleep slept slept
|
146
|
+
slay slew slain
|
147
|
+
slide slid slid
|
148
|
+
sling slung slung
|
149
|
+
slit slit slit
|
150
|
+
smite smote smitten
|
151
|
+
sow sowed sowed sown
|
152
|
+
speak spoke spoken
|
153
|
+
speed sped sped
|
154
|
+
spend spent spent
|
155
|
+
spill spilled spilt spilled spilt
|
156
|
+
spin spun spun
|
157
|
+
spit spit spat spit
|
158
|
+
split split split
|
159
|
+
spread spread spread
|
160
|
+
spring sprang sprung sprung
|
161
|
+
stand stood stood
|
162
|
+
steal stole stolen
|
163
|
+
stick stuck stuck
|
164
|
+
sting stung stung
|
165
|
+
stink stank stunk
|
166
|
+
stride strod stridden
|
167
|
+
strike struck struck
|
168
|
+
string strung strung
|
169
|
+
strive strove striven
|
170
|
+
swear swore sworn
|
171
|
+
sweep swept swept
|
172
|
+
swell swelled swelled swollen
|
173
|
+
swim swam swum
|
174
|
+
swing swung swung
|
175
|
+
take took taken
|
176
|
+
teach taught taught
|
177
|
+
tear tore torn
|
178
|
+
tell told told
|
179
|
+
think thought thought
|
180
|
+
thrive thrived throve thrived
|
181
|
+
throw threw thrown
|
182
|
+
thrust thrust thrust
|
183
|
+
tread trod trodden
|
184
|
+
understand understood understood
|
185
|
+
uphold upheld upheld
|
186
|
+
upset upset upset
|
187
|
+
wake woke woken
|
188
|
+
wear wore worn
|
189
|
+
weave weaved wove weaved woven
|
190
|
+
wed wed wed
|
191
|
+
weep wept wept
|
192
|
+
wind wound wound
|
193
|
+
win won won
|
194
|
+
withhold withheld withheld
|
195
|
+
withstand withstood withstood
|
196
|
+
wring wrung wrung
|
197
|
+
write wrote written
|
198
|
+
}
|
199
|
+
|
200
|
+
# Temporarily removed:
|
201
|
+
# shoe shoed shoed shod
|
202
|
+
|
203
|
+
@@Prepositions = ["amid", "at", "but", "by", "down", "from", "in", "into", "like",
|
204
|
+
"near", "of", "off", "on", "onto", "out", "over", "past", "with",
|
205
|
+
"to", "until", "unto", "up", "upon", "with"]
|
206
|
+
end
|
data/lang/en/data.rb
ADDED
@@ -0,0 +1,1086 @@
|
|
1
|
+
# # From the Ruby Linguistics Project, release 1.0.5
|
2
|
+
# # http://www.deveiate.org/projects/Linguistics/browser/tags/RELEASE_1_0_5/lib/linguistics/en.rb
|
3
|
+
#
|
4
|
+
# # Irregular words => infinitive forms
|
5
|
+
# IrregularInfinitives = {
|
6
|
+
# 'abided' => 'abide',
|
7
|
+
# 'abode' => 'abide',
|
8
|
+
# 'am' => 'be',
|
9
|
+
# 'are' => 'be',
|
10
|
+
# 'arisen' => 'arise',
|
11
|
+
# 'arose' => 'arise',
|
12
|
+
# 'ate' => 'eat',
|
13
|
+
# 'awaked' => 'awake',
|
14
|
+
# 'awoke' => 'awake',
|
15
|
+
# 'bade' => 'bid',
|
16
|
+
# 'beaten' => 'beat',
|
17
|
+
# 'became' => 'become',
|
18
|
+
# 'been' => 'be',
|
19
|
+
# 'befallen' => 'befall',
|
20
|
+
# 'befell' => 'befall',
|
21
|
+
# 'began' => 'begin',
|
22
|
+
# 'begat' => 'beget',
|
23
|
+
# 'begot' => 'beget',
|
24
|
+
# 'begotten' => 'beget',
|
25
|
+
# 'begun' => 'begin',
|
26
|
+
# 'beheld' => 'behold',
|
27
|
+
# 'bent' => 'bend',
|
28
|
+
# 'bereaved' => 'bereave',
|
29
|
+
# 'bereft' => 'bereave',
|
30
|
+
# 'beseeched' => 'beseech',
|
31
|
+
# 'besought' => 'beseech',
|
32
|
+
# 'bespoke' => 'bespeak',
|
33
|
+
# 'bespoken' => 'bespeak',
|
34
|
+
# 'bestrewed' => 'bestrew',
|
35
|
+
# 'bestrewn' => 'bestrew',
|
36
|
+
# 'bestrid' => 'bestride',
|
37
|
+
# 'bestridden' => 'bestride',
|
38
|
+
# 'bestrode' => 'bestride',
|
39
|
+
# 'betaken' => 'betake',
|
40
|
+
# 'bethought' => 'bethink',
|
41
|
+
# 'betook' => 'betake',
|
42
|
+
# 'betted' => 'bet',
|
43
|
+
# 'bidden' => 'bid',
|
44
|
+
# 'bided' => 'bide',
|
45
|
+
# 'bit' => 'bite',
|
46
|
+
# 'bitten' => 'bite',
|
47
|
+
# 'bled' => 'bleed',
|
48
|
+
# 'blended' => 'blend',
|
49
|
+
# 'blent' => 'blend',
|
50
|
+
# 'blessed' => 'bless',
|
51
|
+
# 'blest' => 'bless',
|
52
|
+
# 'blew' => 'blow',
|
53
|
+
# 'blown' => 'blow',
|
54
|
+
# 'bode' => 'bide',
|
55
|
+
# 'bore' => 'bear',
|
56
|
+
# 'born' => 'bear',
|
57
|
+
# 'borne' => 'bear',
|
58
|
+
# 'bought' => 'buy',
|
59
|
+
# 'bound' => 'bind',
|
60
|
+
# 'bred' => 'breed',
|
61
|
+
# 'broadcasted' => 'broadcast',
|
62
|
+
# 'broke' => 'break',
|
63
|
+
# 'broken' => 'break',
|
64
|
+
# 'brought' => 'bring',
|
65
|
+
# 'browbeaten' => 'browbeat',
|
66
|
+
# 'built' => 'build',
|
67
|
+
# 'burned' => 'burn',
|
68
|
+
# 'burnt' => 'burn',
|
69
|
+
# 'came' => 'come',
|
70
|
+
# 'caught' => 'catch',
|
71
|
+
# 'chid' => 'chide',
|
72
|
+
# 'chidden' => 'chide',
|
73
|
+
# 'chided' => 'chide',
|
74
|
+
# 'chose' => 'choose',
|
75
|
+
# 'chosen' => 'choose',
|
76
|
+
# 'clad' => 'clothe',
|
77
|
+
# 'clave' => 'cleave',
|
78
|
+
# 'cleaved' => 'cleave',
|
79
|
+
# 'cleft' => 'cleave',
|
80
|
+
# 'clothed' => 'clothe',
|
81
|
+
# 'clove' => 'cleave',
|
82
|
+
# 'cloven' => 'cleave',
|
83
|
+
# 'clung' => 'cling',
|
84
|
+
# 'costed' => 'cost',
|
85
|
+
# 'could' => 'can',
|
86
|
+
# 'crept' => 'creep',
|
87
|
+
# 'crew' => 'crow',
|
88
|
+
# 'crowed' => 'crow',
|
89
|
+
# 'dealt' => 'deal',
|
90
|
+
# 'did' => 'do',
|
91
|
+
# 'done' => 'do',
|
92
|
+
# 'dove' => 'dive',
|
93
|
+
# 'drank' => 'drink',
|
94
|
+
# 'drawn' => 'draw',
|
95
|
+
# 'dreamed' => 'dream',
|
96
|
+
# 'dreamt' => 'dream',
|
97
|
+
# 'drew' => 'draw',
|
98
|
+
# 'driven' => 'drive',
|
99
|
+
# 'drove' => 'drive',
|
100
|
+
# 'drunk' => 'drink',
|
101
|
+
# 'dug' => 'dig',
|
102
|
+
# 'dwelled' => 'dwell',
|
103
|
+
# 'dwelt' => 'dwell',
|
104
|
+
# 'eaten' => 'eat',
|
105
|
+
# 'fallen' => 'fall',
|
106
|
+
# 'fed' => 'feed',
|
107
|
+
# 'fell' => 'fall',
|
108
|
+
# 'felt' => 'feel',
|
109
|
+
# 'fled' => 'flee',
|
110
|
+
# 'flew' => 'fly',
|
111
|
+
# 'flown' => 'fly',
|
112
|
+
# 'flung' => 'fling',
|
113
|
+
# 'forbad' => 'forbid',
|
114
|
+
# 'forbade' => 'forbid',
|
115
|
+
# 'forbidden' => 'forbid',
|
116
|
+
# 'forbore' => 'forbear',
|
117
|
+
# 'forborne' => 'forbear',
|
118
|
+
# 'fordid' => 'fordo',
|
119
|
+
# 'fordone' => 'fordo',
|
120
|
+
# 'forecasted' => 'forecast',
|
121
|
+
# 'foregone' => 'forego',
|
122
|
+
# 'foreknew' => 'foreknow',
|
123
|
+
# 'foreknown' => 'foreknow',
|
124
|
+
# 'foreran' => 'forerun',
|
125
|
+
# 'foresaw' => 'foresee',
|
126
|
+
# 'foreshowed' => 'foreshow',
|
127
|
+
# 'foreshown' => 'foreshow',
|
128
|
+
# 'foretold' => 'foretell',
|
129
|
+
# 'forewent' => 'forego',
|
130
|
+
# 'forgave' => 'forgive',
|
131
|
+
# 'forgiven' => 'forgive',
|
132
|
+
# 'forgot' => 'forget',
|
133
|
+
# 'forgotten' => 'forget',
|
134
|
+
# 'forsaken' => 'forsake',
|
135
|
+
# 'forseen' => 'foresee',
|
136
|
+
# 'forsook' => 'forsake',
|
137
|
+
# 'forswore' => 'forswear',
|
138
|
+
# 'forsworn' => 'forswear',
|
139
|
+
# 'fought' => 'fight',
|
140
|
+
# 'found' => 'find',
|
141
|
+
# 'froze' => 'freeze',
|
142
|
+
# 'frozen' => 'freeze',
|
143
|
+
# 'gainsaid' => 'gainsay',
|
144
|
+
# 'gave' => 'give',
|
145
|
+
# 'gilded' => 'gild',
|
146
|
+
# 'gilt' => 'gild',
|
147
|
+
# 'girded' => 'gird',
|
148
|
+
# 'girt' => 'gird',
|
149
|
+
# 'given' => 'give',
|
150
|
+
# 'gone' => 'go',
|
151
|
+
# 'got' => 'get',
|
152
|
+
# 'gotten' => 'get',
|
153
|
+
# 'graved' => 'grave',
|
154
|
+
# 'graven' => 'grave',
|
155
|
+
# 'grew' => 'grow',
|
156
|
+
# 'ground' => 'grind',
|
157
|
+
# 'grown' => 'grow',
|
158
|
+
# 'had' => 'have',
|
159
|
+
# 'hamstringed' => 'hamstring',
|
160
|
+
# 'hamstrung' => 'hamstring',
|
161
|
+
# 'hanged' => 'hang',
|
162
|
+
# 'heard' => 'hear',
|
163
|
+
# 'heaved' => 'heave',
|
164
|
+
# 'held' => 'hold',
|
165
|
+
# 'hewed' => 'hew',
|
166
|
+
# 'hewn' => 'hew',
|
167
|
+
# 'hid' => 'hide',
|
168
|
+
# 'hidden' => 'hide',
|
169
|
+
# 'hove' => 'heave',
|
170
|
+
# 'hung' => 'hang',
|
171
|
+
# 'inlaid' => 'inlay',
|
172
|
+
# 'is' => 'be',
|
173
|
+
# 'kept' => 'keep',
|
174
|
+
# 'kneeled' => 'kneel',
|
175
|
+
# 'knelt' => 'kneel',
|
176
|
+
# 'knew' => 'know',
|
177
|
+
# 'knitted' => 'knit',
|
178
|
+
# 'known' => 'know',
|
179
|
+
# 'laded' => 'lade',
|
180
|
+
# 'laden' => 'lade',
|
181
|
+
# 'laid' => 'lay',
|
182
|
+
# 'lain' => 'lie',
|
183
|
+
# 'lay' => 'lie',
|
184
|
+
# 'leaned' => 'lean',
|
185
|
+
# 'leant' => 'lean',
|
186
|
+
# 'leaped' => 'leap',
|
187
|
+
# 'leapt' => 'leap',
|
188
|
+
# 'learned' => 'learn',
|
189
|
+
# 'learnt' => 'learn',
|
190
|
+
# 'led' => 'lead',
|
191
|
+
# 'left' => 'leave',
|
192
|
+
# 'lent' => 'lend',
|
193
|
+
# 'lighted' => 'light',
|
194
|
+
# 'lit' => 'light',
|
195
|
+
# 'lost' => 'lose',
|
196
|
+
# 'made' => 'make',
|
197
|
+
# 'meant' => 'mean',
|
198
|
+
# 'melted' => 'melt',
|
199
|
+
# 'met' => 'meet',
|
200
|
+
# 'might' => 'may',
|
201
|
+
# 'misdealt' => 'misdeal',
|
202
|
+
# 'misgave' => 'misgive',
|
203
|
+
# 'misgiven' => 'misgive',
|
204
|
+
# 'mislaid' => 'mislay',
|
205
|
+
# 'misled' => 'mislead',
|
206
|
+
# 'mistaken' => 'mistake',
|
207
|
+
# 'mistook' => 'mistake',
|
208
|
+
# 'misunderstood' => 'misunderstand',
|
209
|
+
# 'molten' => 'melt',
|
210
|
+
# 'mowed' => 'mow',
|
211
|
+
# 'mown' => 'mow',
|
212
|
+
# 'outate' => 'outeat',
|
213
|
+
# 'outbade' => 'outbid',
|
214
|
+
# 'outbidden' => 'outbid',
|
215
|
+
# 'outbred' => 'outbreed',
|
216
|
+
# 'outdid' => 'outdo',
|
217
|
+
# 'outdone' => 'outdo',
|
218
|
+
# 'outeaten' => 'outeat',
|
219
|
+
# 'outfought' => 'outfight',
|
220
|
+
# 'outgone' => 'outgo',
|
221
|
+
# 'outgrew' => 'outgrow',
|
222
|
+
# 'outgrown' => 'outgrow',
|
223
|
+
# 'outlaid' => 'outlay',
|
224
|
+
# 'outran' => 'outrun',
|
225
|
+
# 'outridden' => 'outride',
|
226
|
+
# 'outrode' => 'outride',
|
227
|
+
# 'outsat' => 'outsit',
|
228
|
+
# 'outshone' => 'outshine',
|
229
|
+
# 'outshot' => 'outshoot',
|
230
|
+
# 'outsold' => 'outsell',
|
231
|
+
# 'outspent' => 'outspend',
|
232
|
+
# 'outthrew' => 'outthrow',
|
233
|
+
# 'outthrown' => 'outthrow',
|
234
|
+
# 'outwent' => 'outgo',
|
235
|
+
# 'outwore' => 'outwear',
|
236
|
+
# 'outworn' => 'outwear',
|
237
|
+
# 'overate' => 'overeat',
|
238
|
+
# 'overbade' => 'overbid',
|
239
|
+
# 'overbidden' => 'overbid',
|
240
|
+
# 'overblew' => 'overblow',
|
241
|
+
# 'overblown' => 'overblow',
|
242
|
+
# 'overbore' => 'overbear',
|
243
|
+
# 'overborn' => 'overbear',
|
244
|
+
# 'overborne' => 'overbear',
|
245
|
+
# 'overbought' => 'overbuy',
|
246
|
+
# 'overbuilt' => 'overbuild',
|
247
|
+
# 'overcame' => 'overcome',
|
248
|
+
# 'overdid' => 'overdo',
|
249
|
+
# 'overdone' => 'overdo',
|
250
|
+
# 'overdrawn' => 'overdraw',
|
251
|
+
# 'overdrew' => 'overdraw',
|
252
|
+
# 'overdriven' => 'overdrive',
|
253
|
+
# 'overdrove' => 'overdrive',
|
254
|
+
# 'overeaten' => 'overeat',
|
255
|
+
# 'overfed' => 'overfeed',
|
256
|
+
# 'overflew' => 'overfly',
|
257
|
+
# 'overflown' => 'overfly',
|
258
|
+
# 'overgrew' => 'overgrow',
|
259
|
+
# 'overgrown' => 'overgrow',
|
260
|
+
# 'overhanged' => 'overhang',
|
261
|
+
# 'overheard' => 'overhear',
|
262
|
+
# 'overhung' => 'overhang',
|
263
|
+
# 'overladed' => 'overlade',
|
264
|
+
# 'overladen' => 'overlade',
|
265
|
+
# 'overlaid' => 'overlay',
|
266
|
+
# 'overlain' => 'overlie',
|
267
|
+
# 'overlay' => 'overlie',
|
268
|
+
# 'overleaped' => 'overleap',
|
269
|
+
# 'overleapt' => 'overleap',
|
270
|
+
# 'overpaid' => 'overpay',
|
271
|
+
# 'overran' => 'overrun',
|
272
|
+
# 'overridden' => 'override',
|
273
|
+
# 'overrode' => 'override',
|
274
|
+
# 'oversaw' => 'oversee',
|
275
|
+
# 'overseen' => 'oversee',
|
276
|
+
# 'oversewed' => 'oversew',
|
277
|
+
# 'oversewn' => 'oversew',
|
278
|
+
# 'overshot' => 'overshoot',
|
279
|
+
# 'overslept' => 'oversleep',
|
280
|
+
# 'overspent' => 'overspend',
|
281
|
+
# 'overtaken' => 'overtake',
|
282
|
+
# 'overthrew' => 'overthrow',
|
283
|
+
# 'overthrown' => 'overthrow',
|
284
|
+
# 'overtook' => 'overtake',
|
285
|
+
# 'overwinded' => 'overwind',
|
286
|
+
# 'overwound' => 'overwind',
|
287
|
+
# 'overwritten' => 'overwrite',
|
288
|
+
# 'overwrote' => 'overwrite',
|
289
|
+
# 'paid' => 'pay',
|
290
|
+
# 'partaken' => 'partake',
|
291
|
+
# 'partook' => 'partake',
|
292
|
+
# 'prechose' => 'prechoose',
|
293
|
+
# 'prechosen' => 'prechoose',
|
294
|
+
# 'proved' => 'prove',
|
295
|
+
# 'proven' => 'prove',
|
296
|
+
# 'quitted' => 'quit',
|
297
|
+
# 'ran' => 'run',
|
298
|
+
# 'rang' => 'ring',
|
299
|
+
# 'reaved' => 'reave',
|
300
|
+
# 'rebuilt' => 'rebuild',
|
301
|
+
# 'reeved' => 'reeve',
|
302
|
+
# 'reft' => 'reave',
|
303
|
+
# 'relaid' => 'relay',
|
304
|
+
# 'rent' => 'rend',
|
305
|
+
# 'repaid' => 'repay',
|
306
|
+
# 'retold' => 'retell',
|
307
|
+
# 'ridded' => 'rid',
|
308
|
+
# 'ridden' => 'ride',
|
309
|
+
# 'risen' => 'rise',
|
310
|
+
# 'rived' => 'rive',
|
311
|
+
# 'riven' => 'rive',
|
312
|
+
# 'rode' => 'ride',
|
313
|
+
# 'rose' => 'rise',
|
314
|
+
# 'rove' => 'reeve',
|
315
|
+
# 'rung' => 'ring',
|
316
|
+
# 'said' => 'say',
|
317
|
+
# 'sang' => 'sing',
|
318
|
+
# 'sank' => 'sink',
|
319
|
+
# 'sat' => 'sit',
|
320
|
+
# 'saw' => 'see',
|
321
|
+
# 'sawed' => 'saw',
|
322
|
+
# 'sawn' => 'saw',
|
323
|
+
# 'seen' => 'see',
|
324
|
+
# 'sent' => 'send',
|
325
|
+
# 'sewed' => 'sew',
|
326
|
+
# 'sewn' => 'sew',
|
327
|
+
# 'shaken' => 'shake',
|
328
|
+
# 'shaved' => 'shave',
|
329
|
+
# 'shaven' => 'shave',
|
330
|
+
# 'sheared' => 'shear',
|
331
|
+
# 'shined' => 'shine',
|
332
|
+
# 'shod' => 'shoe',
|
333
|
+
# 'shoed' => 'shoe',
|
334
|
+
# 'shone' => 'shine',
|
335
|
+
# 'shook' => 'shake',
|
336
|
+
# 'shorn' => 'shear',
|
337
|
+
# 'shot' => 'shoot',
|
338
|
+
# 'showed' => 'show',
|
339
|
+
# 'shown' => 'show',
|
340
|
+
# 'shrank' => 'shrink',
|
341
|
+
# 'shredded' => 'shred',
|
342
|
+
# 'shrived' => 'shrive',
|
343
|
+
# 'shriven' => 'shrive',
|
344
|
+
# 'shrove' => 'shrive',
|
345
|
+
# 'shrunk' => 'shrink',
|
346
|
+
# 'shrunken' => 'shrink',
|
347
|
+
# 'slain' => 'slay',
|
348
|
+
# 'slept' => 'sleep',
|
349
|
+
# 'slew' => 'slay',
|
350
|
+
# 'slid' => 'slide',
|
351
|
+
# 'slidden' => 'slide',
|
352
|
+
# 'slung' => 'sling',
|
353
|
+
# 'slunk' => 'slink',
|
354
|
+
# 'smelled' => 'smell',
|
355
|
+
# 'smelt' => 'smell',
|
356
|
+
# 'smitten' => 'smite',
|
357
|
+
# 'smote' => 'smite',
|
358
|
+
# 'snuck' => 'sneak',
|
359
|
+
# 'sold' => 'sell',
|
360
|
+
# 'sought' => 'seek',
|
361
|
+
# 'sowed' => 'sow',
|
362
|
+
# 'sown' => 'sow',
|
363
|
+
# 'span' => 'spin',
|
364
|
+
# 'spat' => 'spit',
|
365
|
+
# 'sped' => 'speed',
|
366
|
+
# 'speeded' => 'speed',
|
367
|
+
# 'spelled' => 'spell',
|
368
|
+
# 'spelt' => 'spell',
|
369
|
+
# 'spent' => 'spend',
|
370
|
+
# 'spilled' => 'spill',
|
371
|
+
# 'spilt' => 'spill',
|
372
|
+
# 'spoiled' => 'spoil',
|
373
|
+
# 'spoilt' => 'spoil',
|
374
|
+
# 'spoke' => 'speak',
|
375
|
+
# 'spoken' => 'speak',
|
376
|
+
# 'sprang' => 'spring',
|
377
|
+
# 'sprung' => 'spring',
|
378
|
+
# 'spun' => 'spin',
|
379
|
+
# 'stank' => 'stink',
|
380
|
+
# 'staved' => 'stave',
|
381
|
+
# 'stole' => 'steal',
|
382
|
+
# 'stolen' => 'steal',
|
383
|
+
# 'stood' => 'stand',
|
384
|
+
# 'stove' => 'stave',
|
385
|
+
# 'strewed' => 'strew',
|
386
|
+
# 'strewn' => 'strew',
|
387
|
+
# 'stricken' => 'strike',
|
388
|
+
# 'strid' => 'stride',
|
389
|
+
# 'stridden' => 'stride',
|
390
|
+
# 'strived' => 'strive',
|
391
|
+
# 'striven' => 'strive',
|
392
|
+
# 'strode' => 'stride',
|
393
|
+
# 'strove' => 'strive',
|
394
|
+
# 'struck' => 'strike',
|
395
|
+
# 'strung' => 'string',
|
396
|
+
# 'stuck' => 'stick',
|
397
|
+
# 'stung' => 'sting',
|
398
|
+
# 'stunk' => 'stink',
|
399
|
+
# 'sung' => 'sing',
|
400
|
+
# 'sunk' => 'sink',
|
401
|
+
# 'sunken' => 'sink',
|
402
|
+
# 'swam' => 'swim',
|
403
|
+
# 'sweated' => 'sweat',
|
404
|
+
# 'swelled' => 'swell',
|
405
|
+
# 'swept' => 'sweep',
|
406
|
+
# 'swollen' => 'swell',
|
407
|
+
# 'swore' => 'swear',
|
408
|
+
# 'sworn' => 'swear',
|
409
|
+
# 'swum' => 'swim',
|
410
|
+
# 'swung' => 'swing',
|
411
|
+
# 'taken' => 'take',
|
412
|
+
# 'taught' => 'teach',
|
413
|
+
# 'thought' => 'think',
|
414
|
+
# 'threw' => 'throw',
|
415
|
+
# 'thrived' => 'thrive',
|
416
|
+
# 'thriven' => 'thrive',
|
417
|
+
# 'throve' => 'thrive',
|
418
|
+
# 'thrown' => 'throw',
|
419
|
+
# 'told' => 'tell',
|
420
|
+
# 'took' => 'take',
|
421
|
+
# 'tore' => 'tear',
|
422
|
+
# 'torn' => 'tear',
|
423
|
+
# 'trod' => 'tread',
|
424
|
+
# 'trodden' => 'tread',
|
425
|
+
# 'unbent' => 'unbend',
|
426
|
+
# 'unbound' => 'unbind',
|
427
|
+
# 'unbuilt' => 'unbuild',
|
428
|
+
# 'underbought' => 'underbuy',
|
429
|
+
# 'underfed' => 'underfeed',
|
430
|
+
# 'undergone' => 'undergo',
|
431
|
+
# 'underlaid' => 'underlay',
|
432
|
+
# 'underlain' => 'underlie',
|
433
|
+
# 'underlay' => 'underlie',
|
434
|
+
# 'underpaid' => 'underpay',
|
435
|
+
# 'underran' => 'underrun',
|
436
|
+
# 'undershot' => 'undershoot',
|
437
|
+
# 'undersold' => 'undersell',
|
438
|
+
# 'understood' => 'understand',
|
439
|
+
# 'undertaken' => 'undertake',
|
440
|
+
# 'undertook' => 'undertake',
|
441
|
+
# 'underwent' => 'undergo',
|
442
|
+
# 'underwritten' => 'underwrite',
|
443
|
+
# 'underwrote' => 'underwrite',
|
444
|
+
# 'undid' => 'undo',
|
445
|
+
# 'undone' => 'undo',
|
446
|
+
# 'undrawn' => 'undraw',
|
447
|
+
# 'undrew' => 'undraw',
|
448
|
+
# 'unfroze' => 'unfreeze',
|
449
|
+
# 'unfrozen' => 'unfreeze',
|
450
|
+
# 'ungirded' => 'ungird',
|
451
|
+
# 'ungirt' => 'ungird',
|
452
|
+
# 'unhanged' => 'unhang',
|
453
|
+
# 'unhung' => 'unhang',
|
454
|
+
# 'unknitted' => 'unknit',
|
455
|
+
# 'unladed' => 'unlade',
|
456
|
+
# 'unladen' => 'unlade',
|
457
|
+
# 'unlaid' => 'unlay',
|
458
|
+
# 'unlearned' => 'unlearn',
|
459
|
+
# 'unlearnt' => 'unlearn',
|
460
|
+
# 'unmade' => 'unmake',
|
461
|
+
# 'unreeved' => 'unreeve',
|
462
|
+
# 'unrove' => 'unreeve',
|
463
|
+
# 'unsaid' => 'unsay',
|
464
|
+
# 'unslung' => 'unsling',
|
465
|
+
# 'unspoke' => 'unspeak',
|
466
|
+
# 'unspoken' => 'unspeak',
|
467
|
+
# 'unstrung' => 'unstring',
|
468
|
+
# 'unstuck' => 'unstick',
|
469
|
+
# 'unswore' => 'unswear',
|
470
|
+
# 'unsworn' => 'unswear',
|
471
|
+
# 'untaught' => 'unteach',
|
472
|
+
# 'unthought' => 'unthink',
|
473
|
+
# 'untrod' => 'untread',
|
474
|
+
# 'untrodden' => 'untread',
|
475
|
+
# 'unwinded' => 'unwind',
|
476
|
+
# 'unwound' => 'unwind',
|
477
|
+
# 'unwove' => 'unweave',
|
478
|
+
# 'unwoven' => 'unweave',
|
479
|
+
# 'upbuilt' => 'upbuild',
|
480
|
+
# 'upheld' => 'uphold',
|
481
|
+
# 'uprisen' => 'uprise',
|
482
|
+
# 'uprose' => 'uprise',
|
483
|
+
# 'upswept' => 'upsweep',
|
484
|
+
# 'upswung' => 'upswing',
|
485
|
+
# 'waked' => 'wake',
|
486
|
+
# 'was' => 'be',
|
487
|
+
# 'waylaid' => 'waylay',
|
488
|
+
# 'wedded' => 'wed',
|
489
|
+
# 'went' => 'go',
|
490
|
+
# 'wept' => 'weep',
|
491
|
+
# 'were' => 'be',
|
492
|
+
# 'wetted' => 'wet',
|
493
|
+
# 'winded' => 'wind',
|
494
|
+
# 'wist' => 'wit',
|
495
|
+
# 'wot' => 'wit',
|
496
|
+
# 'withdrawn' => 'withdraw',
|
497
|
+
# 'withdrew' => 'withdraw',
|
498
|
+
# 'withheld' => 'withhold',
|
499
|
+
# 'withstood' => 'withstand',
|
500
|
+
# 'woke' => 'wake',
|
501
|
+
# 'woken' => 'wake',
|
502
|
+
# 'won' => 'win',
|
503
|
+
# 'wore' => 'wear',
|
504
|
+
# 'worked' => 'work',
|
505
|
+
# 'worn' => 'wear',
|
506
|
+
# 'wound' => 'wind',
|
507
|
+
# 'wove' => 'weave',
|
508
|
+
# 'woven' => 'weave',
|
509
|
+
# 'written' => 'write',
|
510
|
+
# 'wrote' => 'write',
|
511
|
+
# 'wrought' => 'work',
|
512
|
+
# 'wrung' => 'wring'
|
513
|
+
# }
|
514
|
+
#
|
515
|
+
# # Mapping of word suffixes to infinitive rules.
|
516
|
+
# InfSuffixRules = {
|
517
|
+
# # '<suffix>' => {
|
518
|
+
# # :order => <sort order>,
|
519
|
+
# # :rule => <rule number>,
|
520
|
+
#
|
521
|
+
# # :word1 == 0 => Use 0, the index of the longest prefix
|
522
|
+
# # within @{$prefix{$self->{'suffix'} } }, below.
|
523
|
+
#
|
524
|
+
# # :word1 == 1 => Use 1, the index of the 2nd longest prefix
|
525
|
+
# # within @{$prefix{$self->{'suffix'} } }, below.
|
526
|
+
#
|
527
|
+
# # :word1 == -1 => Use the index of the shortest prefix
|
528
|
+
# # within @{$prefix{$self->{'suffix'} } }, below + a letter.
|
529
|
+
#
|
530
|
+
# # :word1 == -2 => Use the index of the shortest prefix
|
531
|
+
# # within @{$prefix{$self->{'suffix'} } }, below + a letter,
|
532
|
+
# # and use the shortest prefix as well.
|
533
|
+
#
|
534
|
+
# # :word1 == -3 => Use the index of the shortest prefix
|
535
|
+
# # within @{$prefix{$self->{'suffix'} } }, below + meter,
|
536
|
+
# # and use the shortest prefix + metre as well.
|
537
|
+
#
|
538
|
+
# # :word1 == -4 => Use the original string.
|
539
|
+
# 'hes' => {
|
540
|
+
# :order => 1011,
|
541
|
+
# :rule => '1',
|
542
|
+
# :word1 => 0, # Longest prefix.
|
543
|
+
# :suffix1 => '',
|
544
|
+
# :suffix2 => '',
|
545
|
+
# },
|
546
|
+
# 'ses' => {
|
547
|
+
# :order => 1021,
|
548
|
+
# :rule => '2',
|
549
|
+
# :word1 => 0, # Longest prefix.
|
550
|
+
# :suffix1 => '',
|
551
|
+
# :suffix2 => '',
|
552
|
+
# },
|
553
|
+
# 'xes' => {
|
554
|
+
# :order => 1031,
|
555
|
+
# :rule => '3',
|
556
|
+
# :word1 => 0, # Longest prefix.
|
557
|
+
# :suffix1 => '',
|
558
|
+
# :suffix2 => '',
|
559
|
+
# },
|
560
|
+
# 'zes' => {
|
561
|
+
# :order => 1041,
|
562
|
+
# :rule => '4',
|
563
|
+
# :word1 => 0, # Longest prefix.
|
564
|
+
# :suffix1 => '',
|
565
|
+
# :suffix2 => '',
|
566
|
+
# },
|
567
|
+
# 'iless' => {
|
568
|
+
# :order => 1051,
|
569
|
+
# :rule => '43a',
|
570
|
+
# :word1 => -1, # Shortest prefix.
|
571
|
+
# :suffix1 => 'y',
|
572
|
+
# :suffix2 => '',
|
573
|
+
# },
|
574
|
+
# 'less' => {
|
575
|
+
# :order => 1052,
|
576
|
+
# :rule => '43b',
|
577
|
+
# :word1 => -1, # Shortest prefix.
|
578
|
+
# :suffix1 => '',
|
579
|
+
# :suffix2 => '',
|
580
|
+
# },
|
581
|
+
# 'iness' => {
|
582
|
+
# :order => 1053,
|
583
|
+
# :rule => '44a',
|
584
|
+
# :word1 => -1, # Shortest prefix.
|
585
|
+
# :suffix1 => 'y',
|
586
|
+
# :suffix2 => '',
|
587
|
+
# },
|
588
|
+
# 'ness' => {
|
589
|
+
# :order => 1054,
|
590
|
+
# :rule => '44b',
|
591
|
+
# :word1 => -1, # Shortest prefix.
|
592
|
+
# :suffix1 => '',
|
593
|
+
# :suffix2 => '',
|
594
|
+
# },
|
595
|
+
# "'s" => {
|
596
|
+
# :order => 1055,
|
597
|
+
# :rule => '7',
|
598
|
+
# :word1 => -1, # Shortest prefix.
|
599
|
+
# :suffix1 => '',
|
600
|
+
# :suffix2 => '',
|
601
|
+
# },
|
602
|
+
# 'ies' => {
|
603
|
+
# :order => 1056,
|
604
|
+
# :rule => '13a',
|
605
|
+
# :word1 => -1, # Shortest prefix.
|
606
|
+
# :suffix1 => 'y',
|
607
|
+
# :suffix2 => '',
|
608
|
+
# },
|
609
|
+
# 'es' => {
|
610
|
+
# :order => 1057,
|
611
|
+
# :rule => '13b',
|
612
|
+
# :word1 => 0, # Longest prefix.
|
613
|
+
# :suffix1 => '',
|
614
|
+
# :suffix2 => '',
|
615
|
+
# },
|
616
|
+
# 'ss' => {
|
617
|
+
# :order => 1061,
|
618
|
+
# :rule => '6a',
|
619
|
+
# :word1 => -4, # Original string.
|
620
|
+
# :suffix1 => '',
|
621
|
+
# :suffix2 => '',
|
622
|
+
# },
|
623
|
+
# 's' => {
|
624
|
+
# :order => 1062,
|
625
|
+
# :rule => '6b',
|
626
|
+
# :word1 => -1, # Shortest prefix.
|
627
|
+
# :suffix1 => '',
|
628
|
+
# :suffix2 => '',
|
629
|
+
# },
|
630
|
+
# 'ater' => {
|
631
|
+
# :order => 1081,
|
632
|
+
# :rule => '8',
|
633
|
+
# :word1 => -4, # Original string.
|
634
|
+
# :suffix1 => '',
|
635
|
+
# :suffix2 => '',
|
636
|
+
# },
|
637
|
+
# 'cter' => {
|
638
|
+
# :order => 1091,
|
639
|
+
# :rule => '9',
|
640
|
+
# :word1 => -4, # Original string.
|
641
|
+
# :suffix1 => '',
|
642
|
+
# :suffix2 => '',
|
643
|
+
# },
|
644
|
+
# 'ier' => {
|
645
|
+
# :order => 1101,
|
646
|
+
# :rule => '10',
|
647
|
+
# :word1 => -1, # Shortest prefix.
|
648
|
+
# :suffix1 => 'y',
|
649
|
+
# :suffix2 => '',
|
650
|
+
# },
|
651
|
+
# 'er' => {
|
652
|
+
# :order => 1111,
|
653
|
+
# :rule => '11',
|
654
|
+
# :word1 => 0, # Longest prefix.
|
655
|
+
# :suffix1 => '',
|
656
|
+
# :suffix2 => '',
|
657
|
+
# },
|
658
|
+
# 'ied' => {
|
659
|
+
# :order => 1121,
|
660
|
+
# :rule => '12a',
|
661
|
+
# :word1 => -1, # Shortest prefix.
|
662
|
+
# :suffix1 => 'y',
|
663
|
+
# :suffix2 => '',
|
664
|
+
# },
|
665
|
+
# 'ed' => {
|
666
|
+
# :order => 1122,
|
667
|
+
# :rule => '12b', # There is extra code for 12b below.
|
668
|
+
# :word1 => 0, # Longest prefix.
|
669
|
+
# :suffix1 => '',
|
670
|
+
# :suffix2 => '',
|
671
|
+
# },
|
672
|
+
# 'iest' => {
|
673
|
+
# :order => 1141,
|
674
|
+
# :rule => '14a',
|
675
|
+
# :word1 => -1, # Shortest prefix.
|
676
|
+
# :suffix1 => 'y',
|
677
|
+
# :suffix2 => '',
|
678
|
+
# },
|
679
|
+
# 'est' => {
|
680
|
+
# :order => 1142,
|
681
|
+
# :rule => '14b',
|
682
|
+
# :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
|
683
|
+
# :suffix1 => 'e',
|
684
|
+
# :suffix2 => '',
|
685
|
+
# },
|
686
|
+
# 'blity' => {
|
687
|
+
# :order => 1143,
|
688
|
+
# :rule => '21',
|
689
|
+
# :word1 => -4, # Original string.
|
690
|
+
# :suffix1 => '',
|
691
|
+
# :suffix2 => '',
|
692
|
+
# },
|
693
|
+
# 'bility' => {
|
694
|
+
# :order => 1144,
|
695
|
+
# :rule => '22',
|
696
|
+
# :word1 => -1, # Shortest prefix.
|
697
|
+
# :suffix1 => 'ble',
|
698
|
+
# :suffix2 => '',
|
699
|
+
# },
|
700
|
+
# 'fiable' => {
|
701
|
+
# :order => 1145,
|
702
|
+
# :rule => '23',
|
703
|
+
# :word1 => -1, # Shortest prefix.
|
704
|
+
# :suffix1 => 'fy',
|
705
|
+
# :suffix2 => '',
|
706
|
+
# },
|
707
|
+
# 'logist' => {
|
708
|
+
# :order => 1146,
|
709
|
+
# :rule => '24',
|
710
|
+
# :word1 => -1, # Shortest prefix.
|
711
|
+
# :suffix1 => 'logy',
|
712
|
+
# :suffix2 => '',
|
713
|
+
# },
|
714
|
+
# 'ing' => {
|
715
|
+
# :order => 1151,
|
716
|
+
# :rule => '15', # There is extra code for 15 below.
|
717
|
+
# :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
|
718
|
+
# :suffix1 => 'e',
|
719
|
+
# :suffix2 => '',
|
720
|
+
# },
|
721
|
+
# 'ist' => {
|
722
|
+
# :order => 1161,
|
723
|
+
# :rule => '16',
|
724
|
+
# :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
|
725
|
+
# :suffix1 => 'e',
|
726
|
+
# :suffix2 => '',
|
727
|
+
# },
|
728
|
+
# 'ism' => {
|
729
|
+
# :order => 1171,
|
730
|
+
# :rule => '17',
|
731
|
+
# :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
|
732
|
+
# :suffix1 => 'e',
|
733
|
+
# :suffix2 => '',
|
734
|
+
# },
|
735
|
+
# 'ity' => {
|
736
|
+
# :order => 1181,
|
737
|
+
# :rule => '18',
|
738
|
+
# :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
|
739
|
+
# :suffix1 => 'e',
|
740
|
+
# :suffix2 => '',
|
741
|
+
# },
|
742
|
+
# 'ize' => {
|
743
|
+
# :order => 1191,
|
744
|
+
# :rule => '19',
|
745
|
+
# :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
|
746
|
+
# :suffix1 => 'e',
|
747
|
+
# :suffix2 => '',
|
748
|
+
# },
|
749
|
+
# 'cable' => {
|
750
|
+
# :order => 1201,
|
751
|
+
# :rule => '20a',
|
752
|
+
# :word1 => -4, # Original string.
|
753
|
+
# :suffix1 => '',
|
754
|
+
# :suffix2 => '',
|
755
|
+
# },
|
756
|
+
# 'gable' => {
|
757
|
+
# :order => 1202,
|
758
|
+
# :rule => '20b',
|
759
|
+
# :word1 => -4, # Original string.
|
760
|
+
# :suffix1 => '',
|
761
|
+
# :suffix2 => '',
|
762
|
+
# },
|
763
|
+
# 'able' => {
|
764
|
+
# :order => 1203,
|
765
|
+
# :rule => '20c',
|
766
|
+
# :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
|
767
|
+
# :suffix1 => 'e',
|
768
|
+
# :suffix2 => '',
|
769
|
+
# },
|
770
|
+
# 'graphic' => {
|
771
|
+
# :order => 1251,
|
772
|
+
# :rule => '25',
|
773
|
+
# :word1 => -1, # Shortest prefix.
|
774
|
+
# :suffix1 => 'graphy',
|
775
|
+
# :suffix2 => '',
|
776
|
+
# },
|
777
|
+
# 'istic' => {
|
778
|
+
# :order => 1261,
|
779
|
+
# :rule => '26',
|
780
|
+
# :word1 => -1, # Shortest prefix.
|
781
|
+
# :suffix1 => 'ist',
|
782
|
+
# :suffix2 => '',
|
783
|
+
# },
|
784
|
+
# 'itic' => {
|
785
|
+
# :order => 1271,
|
786
|
+
# :rule => '27',
|
787
|
+
# :word1 => -1, # Shortest prefix.
|
788
|
+
# :suffix1 => 'ite',
|
789
|
+
# :suffix2 => '',
|
790
|
+
# },
|
791
|
+
# 'like' => {
|
792
|
+
# :order => 1281,
|
793
|
+
# :rule => '28',
|
794
|
+
# :word1 => -1, # Shortest prefix.
|
795
|
+
# :suffix1 => '',
|
796
|
+
# :suffix2 => '',
|
797
|
+
# },
|
798
|
+
# 'logic' => {
|
799
|
+
# :order => 1291,
|
800
|
+
# :rule => '29',
|
801
|
+
# :word1 => -1, # Shortest prefix.
|
802
|
+
# :suffix1 => 'logy',
|
803
|
+
# :suffix2 => '',
|
804
|
+
# },
|
805
|
+
# 'ment' => {
|
806
|
+
# :order => 1301,
|
807
|
+
# :rule => '30',
|
808
|
+
# :word1 => -1, # Shortest prefix.
|
809
|
+
# :suffix1 => '',
|
810
|
+
# :suffix2 => '',
|
811
|
+
# },
|
812
|
+
# 'mental' => {
|
813
|
+
# :order => 1311,
|
814
|
+
# :rule => '31',
|
815
|
+
# :word1 => -1, # Shortest prefix.
|
816
|
+
# :suffix1 => 'ment',
|
817
|
+
# :suffix2 => '',
|
818
|
+
# },
|
819
|
+
# 'metry' => {
|
820
|
+
# :order => 1321,
|
821
|
+
# :rule => '32',
|
822
|
+
# :word1 => -3, # Shortest prefix + meter, and shortest perfix + metre.
|
823
|
+
# :suffix1 => 'meter',
|
824
|
+
# :suffix2 => 'metre',
|
825
|
+
# },
|
826
|
+
# 'nce' => {
|
827
|
+
# :order => 1331,
|
828
|
+
# :rule => '33',
|
829
|
+
# :word1 => -1, # Shortest prefix.
|
830
|
+
# :suffix1 => 'nt',
|
831
|
+
# :suffix2 => '',
|
832
|
+
# },
|
833
|
+
# 'ncy' => {
|
834
|
+
# :order => 1341,
|
835
|
+
# :rule => '34',
|
836
|
+
# :word1 => -1, # Shortest prefix.
|
837
|
+
# :suffix1 => 'nt',
|
838
|
+
# :suffix2 => '',
|
839
|
+
# },
|
840
|
+
# 'ship' => {
|
841
|
+
# :order => 1351,
|
842
|
+
# :rule => '35',
|
843
|
+
# :word1 => -1, # Shortest prefix.
|
844
|
+
# :suffix1 => '',
|
845
|
+
# :suffix2 => '',
|
846
|
+
# },
|
847
|
+
# 'ical' => {
|
848
|
+
# :order => 1361,
|
849
|
+
# :rule => '36',
|
850
|
+
# :word1 => -1, # Shortest prefix.
|
851
|
+
# :suffix1 => 'ic',
|
852
|
+
# :suffix2 => '',
|
853
|
+
# },
|
854
|
+
# 'ional' => {
|
855
|
+
# :order => 1371,
|
856
|
+
# :rule => '37',
|
857
|
+
# :word1 => -1, # Shortest prefix.
|
858
|
+
# :suffix1 => 'ion',
|
859
|
+
# :suffix2 => '',
|
860
|
+
# },
|
861
|
+
# 'bly' => {
|
862
|
+
# :order => 1381,
|
863
|
+
# :rule => '38',
|
864
|
+
# :word1 => -1, # Shortest prefix.
|
865
|
+
# :suffix1 => 'ble',
|
866
|
+
# :suffix2 => '',
|
867
|
+
# },
|
868
|
+
# 'ily' => {
|
869
|
+
# :order => 1391,
|
870
|
+
# :rule => '39',
|
871
|
+
# :word1 => -1, # Shortest prefix.
|
872
|
+
# :suffix1 => 'y',
|
873
|
+
# :suffix2 => '',
|
874
|
+
# },
|
875
|
+
# 'ly' => {
|
876
|
+
# :order => 1401,
|
877
|
+
# :rule => '40',
|
878
|
+
# :word1 => -1, # Shortest prefix.
|
879
|
+
# :suffix1 => '',
|
880
|
+
# :suffix2 => '',
|
881
|
+
# },
|
882
|
+
# 'iful' => {
|
883
|
+
# :order => 1411,
|
884
|
+
# :rule => '41a',
|
885
|
+
# :word1 => -1, # Shortest prefix.
|
886
|
+
# :suffix1 => 'y',
|
887
|
+
# :suffix2 => '',
|
888
|
+
# },
|
889
|
+
# 'ful' => {
|
890
|
+
# :order => 1412,
|
891
|
+
# :rule => '41b',
|
892
|
+
# :word1 => -1, # Shortest prefix.
|
893
|
+
# :suffix1 => '',
|
894
|
+
# :suffix2 => '',
|
895
|
+
# },
|
896
|
+
# 'ihood' => {
|
897
|
+
# :order => 1421,
|
898
|
+
# :rule => '42a',
|
899
|
+
# :word1 => -1, # Shortest prefix.
|
900
|
+
# :suffix1 => 'y',
|
901
|
+
# :suffix2 => '',
|
902
|
+
# },
|
903
|
+
# 'hood' => {
|
904
|
+
# :order => 1422,
|
905
|
+
# :rule => '42b',
|
906
|
+
# :word1 => -1, # Shortest prefix.
|
907
|
+
# :suffix1 => '',
|
908
|
+
# :suffix2 => '',
|
909
|
+
# },
|
910
|
+
# 'ification' => {
|
911
|
+
# :order => 1451,
|
912
|
+
# :rule => '45',
|
913
|
+
# :word1 => -1, # Shortest prefix.
|
914
|
+
# :suffix1 => 'ify',
|
915
|
+
# :suffix2 => '',
|
916
|
+
# },
|
917
|
+
# 'ization' => {
|
918
|
+
# :order => 1461,
|
919
|
+
# :rule => '46',
|
920
|
+
# :word1 => -1, # Shortest prefix.
|
921
|
+
# :suffix1 => 'ize',
|
922
|
+
# :suffix2 => '',
|
923
|
+
# },
|
924
|
+
# 'ction' => {
|
925
|
+
# :order => 1471,
|
926
|
+
# :rule => '47',
|
927
|
+
# :word1 => -1, # Shortest prefix.
|
928
|
+
# :suffix1 => 'ct',
|
929
|
+
# :suffix2 => '',
|
930
|
+
# },
|
931
|
+
# 'rtion' => {
|
932
|
+
# :order => 1481,
|
933
|
+
# :rule => '48',
|
934
|
+
# :word1 => -1, # Shortest prefix.
|
935
|
+
# :suffix1 => 'rt',
|
936
|
+
# :suffix2 => '',
|
937
|
+
# },
|
938
|
+
# 'ation' => {
|
939
|
+
# :order => 1491,
|
940
|
+
# :rule => '49',
|
941
|
+
# :word1 => -1, # Shortest prefix.
|
942
|
+
# :suffix1 => 'ate',
|
943
|
+
# :suffix2 => '',
|
944
|
+
# },
|
945
|
+
# 'ator' => {
|
946
|
+
# :order => 1501,
|
947
|
+
# :rule => '50',
|
948
|
+
# :word1 => -1, # Shortest prefix.
|
949
|
+
# :suffix1 => 'ate',
|
950
|
+
# :suffix2 => '',
|
951
|
+
# },
|
952
|
+
# 'ctor' => {
|
953
|
+
# :order => 1511,
|
954
|
+
# :rule => '51',
|
955
|
+
# :word1 => -1, # Shortest prefix.
|
956
|
+
# :suffix1 => 'ct',
|
957
|
+
# :suffix2 => '',
|
958
|
+
# },
|
959
|
+
# 'ive' => {
|
960
|
+
# :order => 1521,
|
961
|
+
# :rule => '52',
|
962
|
+
# :word1 => -1, # Shortest prefix.
|
963
|
+
# :suffix1 => 'ion',
|
964
|
+
# :suffix2 => '',
|
965
|
+
# },
|
966
|
+
# 'onian' => {
|
967
|
+
# :order => 1530,
|
968
|
+
# :rule => '54',
|
969
|
+
# :word1 => -1, # Shortest prefix.
|
970
|
+
# :suffix1 => 'on',
|
971
|
+
# :suffix2 => '',
|
972
|
+
# },
|
973
|
+
# 'an' => {
|
974
|
+
# :order => 1531,
|
975
|
+
# :rule => '53',
|
976
|
+
# :word1 => -1, # Shortest prefix.
|
977
|
+
# :suffix1 => 'a',
|
978
|
+
# :suffix2 => '',
|
979
|
+
# },
|
980
|
+
# }
|
981
|
+
#
|
982
|
+
# PL_prep = matchgroup %w[
|
983
|
+
# about above across after among around at athwart before behind
|
984
|
+
# below beneath beside besides between betwixt beyond but by
|
985
|
+
# during except for from in into near of off on onto out over
|
986
|
+
# since till to under until unto upon with
|
987
|
+
# ]
|
988
|
+
#
|
989
|
+
# PL_sb_prep_dual_compound = %r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
|
990
|
+
# PL_sb_prep_compound = %r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
|
991
|
+
#
|
992
|
+
#
|
993
|
+
# PL_pron_nom_h = {
|
994
|
+
# # Nominative Reflexive
|
995
|
+
# "i" => "we", "myself" => "ourselves",
|
996
|
+
# "you" => "you", "yourself" => "yourselves",
|
997
|
+
# "she" => "they", "herself" => "themselves",
|
998
|
+
# "he" => "they", "himself" => "themselves",
|
999
|
+
# "it" => "they", "itself" => "themselves",
|
1000
|
+
# "they" => "they", "themself" => "themselves",
|
1001
|
+
#
|
1002
|
+
# # Possessive
|
1003
|
+
# "mine" => "ours",
|
1004
|
+
# "yours" => "yours",
|
1005
|
+
# "hers" => "theirs",
|
1006
|
+
# "his" => "theirs",
|
1007
|
+
# "its" => "theirs",
|
1008
|
+
# "theirs" => "theirs",
|
1009
|
+
# }
|
1010
|
+
# PL_pron_nom = matchgroup PL_pron_nom_h.keys
|
1011
|
+
#
|
1012
|
+
# PL_pron_acc_h = {
|
1013
|
+
# # Accusative Reflexive
|
1014
|
+
# "me" => "us", "myself" => "ourselves",
|
1015
|
+
# "you" => "you", "yourself" => "yourselves",
|
1016
|
+
# "her" => "them", "herself" => "themselves",
|
1017
|
+
# "him" => "them", "himself" => "themselves",
|
1018
|
+
# "it" => "them", "itself" => "themselves",
|
1019
|
+
# "them" => "them", "themself" => "themselves",
|
1020
|
+
# }
|
1021
|
+
# PL_pron_acc = matchgroup PL_pron_acc_h.keys
|
1022
|
+
#
|
1023
|
+
# PL_v_irregular_pres_h = {
|
1024
|
+
# # 1St pers. sing. 2nd pers. sing. 3rd pers. singular
|
1025
|
+
# # 3rd pers. (indet.)
|
1026
|
+
# "am" => "are", "are" => "are", "is" => "are",
|
1027
|
+
# "was" => "were", "were" => "were", "was" => "were",
|
1028
|
+
# "have" => "have", "have" => "have", "has" => "have",
|
1029
|
+
# }
|
1030
|
+
# PL_v_irregular_pres = matchgroup PL_v_irregular_pres_h.keys
|
1031
|
+
#
|
1032
|
+
# PL_v_ambiguous_pres_h = {
|
1033
|
+
# # 1st pers. sing. 2nd pers. sing. 3rd pers. singular
|
1034
|
+
# # 3rd pers. (indet.)
|
1035
|
+
# "act" => "act", "act" => "act", "acts" => "act",
|
1036
|
+
# "blame" => "blame", "blame" => "blame", "blames" => "blame",
|
1037
|
+
# "can" => "can", "can" => "can", "can" => "can",
|
1038
|
+
# "must" => "must", "must" => "must", "must" => "must",
|
1039
|
+
# "fly" => "fly", "fly" => "fly", "flies" => "fly",
|
1040
|
+
# "copy" => "copy", "copy" => "copy", "copies" => "copy",
|
1041
|
+
# "drink" => "drink", "drink" => "drink", "drinks" => "drink",
|
1042
|
+
# "fight" => "fight", "fight" => "fight", "fights" => "fight",
|
1043
|
+
# "fire" => "fire", "fire" => "fire", "fires" => "fire",
|
1044
|
+
# "like" => "like", "like" => "like", "likes" => "like",
|
1045
|
+
# "look" => "look", "look" => "look", "looks" => "look",
|
1046
|
+
# "make" => "make", "make" => "make", "makes" => "make",
|
1047
|
+
# "reach" => "reach", "reach" => "reach", "reaches" => "reach",
|
1048
|
+
# "run" => "run", "run" => "run", "runs" => "run",
|
1049
|
+
# "sink" => "sink", "sink" => "sink", "sinks" => "sink",
|
1050
|
+
# "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
|
1051
|
+
# "view" => "view", "view" => "view", "views" => "view",
|
1052
|
+
# }
|
1053
|
+
# PL_v_ambiguous_pres = matchgroup PL_v_ambiguous_pres_h.keys
|
1054
|
+
#
|
1055
|
+
# PL_v_irregular_non_pres = matchgroup %w[
|
1056
|
+
# did had ate made put
|
1057
|
+
# spent fought sank gave sought
|
1058
|
+
# shall could ought should
|
1059
|
+
# ]
|
1060
|
+
#
|
1061
|
+
# PL_v_ambiguous_non_pres = matchgroup %w[
|
1062
|
+
# thought saw bent will might cut
|
1063
|
+
# ]
|
1064
|
+
#
|
1065
|
+
# PL_count_zero = matchgroup %w[
|
1066
|
+
# 0 no zero nil
|
1067
|
+
# ]
|
1068
|
+
#
|
1069
|
+
# PL_count_one = matchgroup %w[
|
1070
|
+
# 1 a an one each every this that
|
1071
|
+
# ]
|
1072
|
+
#
|
1073
|
+
# PL_adj_special_h = {
|
1074
|
+
# "a" => "some", "an" => "some",
|
1075
|
+
# "this" => "these", "that" => "those",
|
1076
|
+
# }
|
1077
|
+
# PL_adj_special = matchgroup PL_adj_special_h.keys
|
1078
|
+
#
|
1079
|
+
# PL_adj_poss_h = {
|
1080
|
+
# "my" => "our",
|
1081
|
+
# "your" => "your",
|
1082
|
+
# "its" => "their",
|
1083
|
+
# "her" => "their",
|
1084
|
+
# "his" => "their",
|
1085
|
+
# "their" => "their",
|
1086
|
+
# }
|