mittens 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
routines (
|
|
2
|
+
postlude mark_regions
|
|
3
|
+
RV R1 R2
|
|
4
|
+
attached_pronoun
|
|
5
|
+
standard_suffix
|
|
6
|
+
y_verb_suffix
|
|
7
|
+
verb_suffix
|
|
8
|
+
residual_suffix
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
externals ( stem )
|
|
12
|
+
|
|
13
|
+
integers ( pV p1 p2 )
|
|
14
|
+
|
|
15
|
+
groupings ( v )
|
|
16
|
+
|
|
17
|
+
stringescapes {}
|
|
18
|
+
|
|
19
|
+
/* special characters */
|
|
20
|
+
|
|
21
|
+
stringdef a' '{U+00E1}' // a-acute
|
|
22
|
+
stringdef e' '{U+00E9}' // e-acute
|
|
23
|
+
stringdef i' '{U+00ED}' // i-acute
|
|
24
|
+
stringdef o' '{U+00F3}' // o-acute
|
|
25
|
+
stringdef u' '{U+00FA}' // u-acute
|
|
26
|
+
stringdef u" '{U+00FC}' // u-diaeresis
|
|
27
|
+
stringdef n~ '{U+00F1}' // n-tilde
|
|
28
|
+
|
|
29
|
+
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
|
|
30
|
+
|
|
31
|
+
define mark_regions as (
|
|
32
|
+
|
|
33
|
+
$pV = limit
|
|
34
|
+
$p1 = limit
|
|
35
|
+
$p2 = limit // defaults
|
|
36
|
+
|
|
37
|
+
do (
|
|
38
|
+
( v (non-v gopast v) or (v gopast non-v) )
|
|
39
|
+
or
|
|
40
|
+
( non-v (non-v gopast v) or (v next) )
|
|
41
|
+
setmark pV
|
|
42
|
+
)
|
|
43
|
+
do (
|
|
44
|
+
gopast v gopast non-v setmark p1
|
|
45
|
+
gopast v gopast non-v setmark p2
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
define postlude as repeat (
|
|
50
|
+
[substring] among(
|
|
51
|
+
'{a'}' (<- 'a')
|
|
52
|
+
'{e'}' (<- 'e')
|
|
53
|
+
'{i'}' (<- 'i')
|
|
54
|
+
'{o'}' (<- 'o')
|
|
55
|
+
'{u'}' (<- 'u')
|
|
56
|
+
// and possibly {u"}->u here, or in prelude
|
|
57
|
+
'' (next)
|
|
58
|
+
) //or next
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
backwardmode (
|
|
62
|
+
|
|
63
|
+
define RV as $pV <= cursor
|
|
64
|
+
define R1 as $p1 <= cursor
|
|
65
|
+
define R2 as $p2 <= cursor
|
|
66
|
+
|
|
67
|
+
define attached_pronoun as (
|
|
68
|
+
[substring] among(
|
|
69
|
+
'me' 'se' 'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
|
|
70
|
+
'las' 'les' 'los' 'nos'
|
|
71
|
+
)
|
|
72
|
+
substring RV among(
|
|
73
|
+
'i{e'}ndo' (] <- 'iendo')
|
|
74
|
+
'{a'}ndo' (] <- 'ando')
|
|
75
|
+
'{a'}r' (] <- 'ar')
|
|
76
|
+
'{e'}r' (] <- 'er')
|
|
77
|
+
'{i'}r' (] <- 'ir')
|
|
78
|
+
'ando'
|
|
79
|
+
'iendo'
|
|
80
|
+
'ar' 'er' 'ir'
|
|
81
|
+
(delete)
|
|
82
|
+
'yendo' ('u' delete)
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
define standard_suffix as (
|
|
87
|
+
[substring] among(
|
|
88
|
+
|
|
89
|
+
'anza' 'anzas'
|
|
90
|
+
'ico' 'ica' 'icos' 'icas'
|
|
91
|
+
'ismo' 'ismos'
|
|
92
|
+
'able' 'ables'
|
|
93
|
+
'ible' 'ibles'
|
|
94
|
+
'ista' 'istas'
|
|
95
|
+
'oso' 'osa' 'osos' 'osas'
|
|
96
|
+
'amiento' 'amientos'
|
|
97
|
+
'imiento' 'imientos'
|
|
98
|
+
(
|
|
99
|
+
R2 delete
|
|
100
|
+
)
|
|
101
|
+
'adora' 'ador' 'aci{o'}n'
|
|
102
|
+
'adoras' 'adores' 'aciones'
|
|
103
|
+
'ante' 'antes' 'ancia' 'ancias'// Note 1
|
|
104
|
+
(
|
|
105
|
+
R2 delete
|
|
106
|
+
try ( ['ic'] R2 delete )
|
|
107
|
+
)
|
|
108
|
+
'log{i'}a'
|
|
109
|
+
'log{i'}as'
|
|
110
|
+
(
|
|
111
|
+
R2 <- 'log'
|
|
112
|
+
)
|
|
113
|
+
'uci{o'}n' 'uciones'
|
|
114
|
+
(
|
|
115
|
+
R2 <- 'u'
|
|
116
|
+
)
|
|
117
|
+
'encia' 'encias'
|
|
118
|
+
(
|
|
119
|
+
R2 <- 'ente'
|
|
120
|
+
)
|
|
121
|
+
'amente'
|
|
122
|
+
(
|
|
123
|
+
R1 delete
|
|
124
|
+
try (
|
|
125
|
+
[substring] R2 delete among(
|
|
126
|
+
'iv' (['at'] R2 delete)
|
|
127
|
+
'os'
|
|
128
|
+
'ic'
|
|
129
|
+
'ad'
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
)
|
|
133
|
+
'mente'
|
|
134
|
+
(
|
|
135
|
+
R2 delete
|
|
136
|
+
try (
|
|
137
|
+
[substring] among(
|
|
138
|
+
'ante' // Note 1
|
|
139
|
+
'able'
|
|
140
|
+
'ible' (R2 delete)
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
'idad'
|
|
145
|
+
'idades'
|
|
146
|
+
(
|
|
147
|
+
R2 delete
|
|
148
|
+
try (
|
|
149
|
+
[substring] among(
|
|
150
|
+
'abil'
|
|
151
|
+
'ic'
|
|
152
|
+
'iv' (R2 delete)
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
'iva' 'ivo'
|
|
157
|
+
'ivas' 'ivos'
|
|
158
|
+
(
|
|
159
|
+
R2 delete
|
|
160
|
+
try (
|
|
161
|
+
['at'] R2 delete // but not a further ['ic'] R2 delete
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
define y_verb_suffix as (
|
|
168
|
+
setlimit tomark pV for ([substring]) among(
|
|
169
|
+
'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
|
|
170
|
+
'yas' 'yes' 'yais' 'yamos'
|
|
171
|
+
('u' delete)
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
define verb_suffix as (
|
|
176
|
+
setlimit tomark pV for ([substring]) among(
|
|
177
|
+
|
|
178
|
+
'en' 'es' '{e'}is' 'emos'
|
|
179
|
+
(try ('u' test 'g') ] delete)
|
|
180
|
+
|
|
181
|
+
'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
|
|
182
|
+
'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
|
|
183
|
+
'ar{e'}'
|
|
184
|
+
'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
|
|
185
|
+
'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
|
|
186
|
+
'er{e'}'
|
|
187
|
+
'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
|
|
188
|
+
'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
|
|
189
|
+
'ir{e'}'
|
|
190
|
+
|
|
191
|
+
'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
|
|
192
|
+
'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
|
|
193
|
+
'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
|
|
194
|
+
'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
|
|
195
|
+
'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
|
|
196
|
+
'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
|
|
197
|
+
'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
|
|
198
|
+
'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
|
|
199
|
+
'{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
|
|
200
|
+
(delete)
|
|
201
|
+
)
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
define residual_suffix as (
|
|
205
|
+
[substring] among(
|
|
206
|
+
'os'
|
|
207
|
+
'a' 'o' '{a'}' '{i'}' '{o'}'
|
|
208
|
+
( RV delete )
|
|
209
|
+
'e' '{e'}'
|
|
210
|
+
( RV delete try( ['u'] test 'g' RV delete ) )
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
define stem as (
|
|
216
|
+
do mark_regions
|
|
217
|
+
backwards (
|
|
218
|
+
do attached_pronoun
|
|
219
|
+
do ( standard_suffix or
|
|
220
|
+
y_verb_suffix or
|
|
221
|
+
verb_suffix
|
|
222
|
+
)
|
|
223
|
+
do residual_suffix
|
|
224
|
+
)
|
|
225
|
+
do postlude
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
/*
|
|
229
|
+
Note 1: additions of 15 Jun 2005
|
|
230
|
+
*/
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
routines (
|
|
2
|
+
mark_regions
|
|
3
|
+
main_suffix
|
|
4
|
+
consonant_pair
|
|
5
|
+
other_suffix
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
externals ( stem )
|
|
9
|
+
|
|
10
|
+
integers ( p1 x )
|
|
11
|
+
|
|
12
|
+
groupings ( v s_ending )
|
|
13
|
+
|
|
14
|
+
stringescapes {}
|
|
15
|
+
|
|
16
|
+
/* special characters */
|
|
17
|
+
|
|
18
|
+
stringdef a" '{U+00E4}'
|
|
19
|
+
stringdef ao '{U+00E5}'
|
|
20
|
+
stringdef o" '{U+00F6}'
|
|
21
|
+
|
|
22
|
+
define v 'aeiouy{a"}{ao}{o"}'
|
|
23
|
+
|
|
24
|
+
define s_ending 'bcdfghjklmnoprtvy'
|
|
25
|
+
|
|
26
|
+
define mark_regions as (
|
|
27
|
+
|
|
28
|
+
$p1 = limit
|
|
29
|
+
test ( hop 3 setmark x )
|
|
30
|
+
goto v gopast non-v setmark p1
|
|
31
|
+
try ( $p1 < x $p1 = x )
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
backwardmode (
|
|
35
|
+
|
|
36
|
+
define main_suffix as (
|
|
37
|
+
setlimit tomark p1 for ([substring])
|
|
38
|
+
among(
|
|
39
|
+
|
|
40
|
+
'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
|
|
41
|
+
'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
|
|
42
|
+
'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
|
|
43
|
+
'hetens' 'erns' 'at' 'andet' 'het' 'ast'
|
|
44
|
+
(delete)
|
|
45
|
+
's'
|
|
46
|
+
(s_ending delete)
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
define consonant_pair as setlimit tomark p1 for (
|
|
51
|
+
among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
|
|
52
|
+
and ([next] delete)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
define other_suffix as setlimit tomark p1 for (
|
|
56
|
+
[substring] among(
|
|
57
|
+
'lig' 'ig' 'els' (delete)
|
|
58
|
+
'l{o"}st' (<-'l{o"}s')
|
|
59
|
+
'fullt' (<-'full')
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
define stem as (
|
|
65
|
+
|
|
66
|
+
do mark_regions
|
|
67
|
+
backwards (
|
|
68
|
+
do main_suffix
|
|
69
|
+
do consonant_pair
|
|
70
|
+
do other_suffix
|
|
71
|
+
)
|
|
72
|
+
)
|
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Affix stripping stemming algorithm for Tamil
|
|
3
|
+
* By Damodharan Rajalingam
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
stringescapes {}
|
|
7
|
+
|
|
8
|
+
/* Aytham */
|
|
9
|
+
stringdef aytham '{U+0B83}'
|
|
10
|
+
|
|
11
|
+
/* Uyir - independent vowels */
|
|
12
|
+
stringdef a '{U+0B85}'
|
|
13
|
+
stringdef aa '{U+0B86}'
|
|
14
|
+
stringdef i '{U+0B87}'
|
|
15
|
+
stringdef ii '{U+0B88}'
|
|
16
|
+
stringdef u '{U+0B89}'
|
|
17
|
+
stringdef uu '{U+0B8A}'
|
|
18
|
+
stringdef e '{U+0B8E}'
|
|
19
|
+
stringdef ee '{U+0B8F}'
|
|
20
|
+
stringdef ai '{U+0B90}'
|
|
21
|
+
stringdef o '{U+0B92}'
|
|
22
|
+
stringdef oo '{U+0B93}'
|
|
23
|
+
stringdef au '{U+0B94}'
|
|
24
|
+
|
|
25
|
+
/* Consonants */
|
|
26
|
+
stringdef ka '{U+0B95}'
|
|
27
|
+
stringdef nga '{U+0B99}'
|
|
28
|
+
stringdef ca '{U+0B9A}'
|
|
29
|
+
stringdef ja '{U+0B9C}'
|
|
30
|
+
stringdef nya '{U+0B9E}'
|
|
31
|
+
stringdef tta '{U+0B9F}'
|
|
32
|
+
stringdef nna '{U+0BA3}'
|
|
33
|
+
stringdef ta '{U+0BA4}'
|
|
34
|
+
stringdef tha '{U+0BA4}'
|
|
35
|
+
stringdef na '{U+0BA8}'
|
|
36
|
+
stringdef nnna '{U+0BA9}'
|
|
37
|
+
stringdef pa '{U+0BAA}'
|
|
38
|
+
stringdef ma '{U+0BAE}'
|
|
39
|
+
stringdef ya '{U+0BAF}'
|
|
40
|
+
stringdef ra '{U+0BB0}'
|
|
41
|
+
stringdef rra '{U+0BB1}'
|
|
42
|
+
stringdef la '{U+0BB2}'
|
|
43
|
+
stringdef lla '{U+0BB3}'
|
|
44
|
+
stringdef llla '{U+0BB4}'
|
|
45
|
+
stringdef zha '{U+0BB4}'
|
|
46
|
+
stringdef va '{U+0BB5}'
|
|
47
|
+
|
|
48
|
+
/* Vatamozi - borrowed */
|
|
49
|
+
stringdef sha '{U+0BB6}'
|
|
50
|
+
stringdef ssa '{U+0BB7}'
|
|
51
|
+
stringdef sa '{U+0BB8}'
|
|
52
|
+
stringdef ha '{U+0BB9}'
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
/* Dependent vowel signs (kombu etc.) */
|
|
56
|
+
stringdef vs_aa '{U+0BBE}'
|
|
57
|
+
stringdef vs_i '{U+0BBF}'
|
|
58
|
+
stringdef vs_ii '{U+0BC0}'
|
|
59
|
+
stringdef vs_u '{U+0BC1}'
|
|
60
|
+
stringdef vs_uu '{U+0BC2}'
|
|
61
|
+
stringdef vs_e '{U+0BC6}'
|
|
62
|
+
stringdef vs_ee '{U+0BC7}'
|
|
63
|
+
stringdef vs_ai '{U+0BC8}'
|
|
64
|
+
stringdef vs_o '{U+0BCA}'
|
|
65
|
+
stringdef vs_oo '{U+0BCB}'
|
|
66
|
+
stringdef vs_au '{U+0BCC}'
|
|
67
|
+
|
|
68
|
+
/* Pulli */
|
|
69
|
+
stringdef pulli '{U+0BCD}'
|
|
70
|
+
|
|
71
|
+
/* AU length markk */
|
|
72
|
+
stringdef au_lmark '{U+0BD7}'
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
routines (
|
|
76
|
+
remove_plural_suffix
|
|
77
|
+
remove_question_suffixes
|
|
78
|
+
remove_question_prefixes
|
|
79
|
+
remove_pronoun_prefixes
|
|
80
|
+
remove_command_suffixes
|
|
81
|
+
remove_um
|
|
82
|
+
remove_vetrumai_urupukal
|
|
83
|
+
fix_va_start
|
|
84
|
+
fix_ending
|
|
85
|
+
fix_endings
|
|
86
|
+
remove_tense_suffix
|
|
87
|
+
remove_tense_suffixes
|
|
88
|
+
remove_common_word_endings
|
|
89
|
+
has_min_length
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
externals ( stem )
|
|
93
|
+
|
|
94
|
+
booleans (
|
|
95
|
+
found_a_match
|
|
96
|
+
found_vetrumai_urupu
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
define has_min_length as (
|
|
100
|
+
$(len > 4)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
define fix_va_start as (
|
|
104
|
+
(try '{va}{vs_oo}' and [ '{va}{vs_oo}' ] <- '{oo}' ) or
|
|
105
|
+
(try '{va}{vs_o}' and [ '{va}{vs_o}' ] <- '{o}' ) or
|
|
106
|
+
(try '{va}{vs_u}' and [ '{va}{vs_u}' ] <- '{u}' ) or
|
|
107
|
+
(try '{va}{vs_uu}' and [ '{va}{vs_uu}' ] <- '{uu}' )
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
define fix_endings as (
|
|
111
|
+
do repeat fix_ending
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
define remove_question_prefixes as (
|
|
115
|
+
[ ('{e}' ) among('{ka}' '{ca}' '{tha}' '{va}' '{na}' '{pa}' '{ma}' '{ya}' '{nga}' '{nya}') '{pulli}' ] delete
|
|
116
|
+
do fix_va_start
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
// Gives signal t if an ending was fixed, signal f otherwise.
|
|
120
|
+
define fix_ending as (
|
|
121
|
+
$(len > 3)
|
|
122
|
+
backwards (
|
|
123
|
+
( [among('{na}{pulli}' '{na}{pulli}{ta}' '{na}{pulli}{ta}{pulli}') ] delete )
|
|
124
|
+
or
|
|
125
|
+
( ['{ya}{pulli}' test among('{vs_ai}' '{vs_i}' '{vs_ii}') ] delete )
|
|
126
|
+
or
|
|
127
|
+
( [ '{tta}{pulli}{pa}{pulli}' or '{tta}{pulli}{ka}{pulli}' ] <- '{lla}{pulli}' )
|
|
128
|
+
or
|
|
129
|
+
( [ '{nnna}{pulli}{rra}{pulli}' ] <- '{la}{pulli}' )
|
|
130
|
+
or
|
|
131
|
+
// ( [ '{rra}{pulli}{ka}{pulli}' or '{nnna}{pulli}{nnna}{pulli}' ] <- '{la}{pulli}' )
|
|
132
|
+
( [ '{rra}{pulli}{ka}{pulli}' ] <- '{la}{pulli}' )
|
|
133
|
+
or
|
|
134
|
+
( [ '{tta}{pulli}{tta}{pulli}' ] <- '{tta}{vs_u}' )
|
|
135
|
+
or
|
|
136
|
+
( found_vetrumai_urupu [ '{ta}{pulli}{ta}{pulli}' (test not '{vs_ai}') ] <- '{ma}{pulli}' ] )
|
|
137
|
+
or
|
|
138
|
+
( [ '{vs_u}{ka}{pulli}' or '{vs_u}{ka}{pulli}{ka}{pulli}' ] <- '{pulli}' )
|
|
139
|
+
or
|
|
140
|
+
( [ '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') ] delete )
|
|
141
|
+
or
|
|
142
|
+
( [ '{vs_u}{ka}{pulli}' ] <- '{pulli}' )
|
|
143
|
+
or
|
|
144
|
+
( [ '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') ] delete )
|
|
145
|
+
or
|
|
146
|
+
( [ '{pulli}' (among('{ya}' '{ra}' '{la}' '{va}' '{zha}' '{lla}') or among('{nga}' '{nya}' '{nna}' '{na}' '{ma}' '{nnna}')) '{pulli}' ] <- '{pulli}' )
|
|
147
|
+
or
|
|
148
|
+
( [ among('{va}' '{ya}' '{va}{pulli}') ] delete )
|
|
149
|
+
or
|
|
150
|
+
( [ '{nnna}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')) ] delete )
|
|
151
|
+
or
|
|
152
|
+
( [ '{nga}{pulli}' (test not '{vs_ai}')] <- '{ma}{pulli}' )
|
|
153
|
+
or
|
|
154
|
+
( [ '{nga}{pulli}' ] delete )
|
|
155
|
+
or
|
|
156
|
+
( [ '{pulli}' (test (among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}') or '{pulli}')) ] delete )
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
define remove_pronoun_prefixes as (
|
|
161
|
+
unset found_a_match
|
|
162
|
+
[ among('{a}' '{i}' '{u}') among('{ka}' '{ca}' '{tha}' '{va}' '{na}' '{pa}' '{ma}' '{ya}' '{nga}' '{nya}') '{pulli}' ] delete
|
|
163
|
+
(set found_a_match)
|
|
164
|
+
do fix_va_start
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
define remove_plural_suffix as (
|
|
168
|
+
unset found_a_match
|
|
169
|
+
backwards (
|
|
170
|
+
( [ '{vs_u}{nga}{pulli}{ka}{lla}{pulli}' (test not among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}')) ] <- '{pulli}' ) or
|
|
171
|
+
( [ '{rra}{pulli}{ka}{lla}{pulli}' ] <- '{la}{pulli}' ) or
|
|
172
|
+
( [ '{tta}{pulli}{ka}{lla}{pulli}' ] <- '{lla}{pulli}' ) or
|
|
173
|
+
( [ '{ka}{lla}{pulli}' ] delete )
|
|
174
|
+
(set found_a_match)
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
define remove_question_suffixes as (
|
|
179
|
+
has_min_length
|
|
180
|
+
unset found_a_match
|
|
181
|
+
backwards (
|
|
182
|
+
do (
|
|
183
|
+
[ among('{vs_oo}' '{vs_ee}' '{vs_aa}') ] <- '{pulli}'
|
|
184
|
+
(set found_a_match)
|
|
185
|
+
)
|
|
186
|
+
)
|
|
187
|
+
do fix_endings
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
define remove_command_suffixes as (
|
|
191
|
+
has_min_length
|
|
192
|
+
unset found_a_match
|
|
193
|
+
backwards (
|
|
194
|
+
[ among('{pa}{vs_i}' '{va}{vs_i}') ] delete
|
|
195
|
+
(set found_a_match)
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
define remove_um as (
|
|
200
|
+
unset found_a_match
|
|
201
|
+
has_min_length
|
|
202
|
+
backwards ( [ '{vs_u}{ma}{pulli}' ] <- '{pulli}'
|
|
203
|
+
(set found_a_match)
|
|
204
|
+
)
|
|
205
|
+
do fix_ending
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
define remove_common_word_endings as (
|
|
209
|
+
// These are not suffixes actually but are
|
|
210
|
+
// some words that are attached to other words
|
|
211
|
+
// but can be removed for stemming
|
|
212
|
+
unset found_a_match
|
|
213
|
+
has_min_length
|
|
214
|
+
backwards (
|
|
215
|
+
test ( [ '{vs_u}{tta}{nnna}{pulli}' or
|
|
216
|
+
'{vs_i}{la}{pulli}{la}{vs_ai}' or
|
|
217
|
+
'{vs_i}{tta}{ma}{pulli}' or
|
|
218
|
+
'{vs_i}{nnna}{pulli}{rra}{vs_i}' or
|
|
219
|
+
'{vs_aa}{ka}{vs_i}' or
|
|
220
|
+
'{vs_aa}{ka}{vs_i}{ya}' or
|
|
221
|
+
'{vs_e}{nnna}{pulli}{rra}{vs_u}' or
|
|
222
|
+
'{vs_u}{lla}{pulli}{lla}' or
|
|
223
|
+
'{vs_u}{tta}{vs_ai}{ya}' or
|
|
224
|
+
'{vs_u}{tta}{vs_ai}' or
|
|
225
|
+
'{vs_e}{nnna}{vs_u}{ma}{pulli}' or
|
|
226
|
+
('{la}{pulli}{la}' test (not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
|
|
227
|
+
'{vs_e}{nnna}' or
|
|
228
|
+
'{vs_aa}{ka}{vs_i}' ] <- '{pulli}'
|
|
229
|
+
(set found_a_match)
|
|
230
|
+
)
|
|
231
|
+
or
|
|
232
|
+
test ( [ among('{pa}{tta}{vs_u}'
|
|
233
|
+
'{pa}{tta}{pulli}{tta}'
|
|
234
|
+
'{pa}{tta}{pulli}{tta}{vs_u}'
|
|
235
|
+
'{pa}{tta}{pulli}{tta}{ta}{vs_u}'
|
|
236
|
+
'{pa}{tta}{pulli}{tta}{nna}'
|
|
237
|
+
'{ka}{vs_u}{ra}{vs_i}{ya}'
|
|
238
|
+
'{pa}{rra}{pulli}{rra}{vs_i}'
|
|
239
|
+
'{va}{vs_i}{tta}{vs_u}'
|
|
240
|
+
'{va}{vs_i}{tta}{pulli}{tta}{vs_u}'
|
|
241
|
+
'{pa}{tta}{vs_i}{ta}{vs_aa}{nnna}'
|
|
242
|
+
'{pa}{tta}{vs_i}'
|
|
243
|
+
'{ta}{vs_aa}{nnna}'
|
|
244
|
+
'{vs_e}{la}{pulli}{la}{vs_aa}{ma}{pulli}')
|
|
245
|
+
] delete
|
|
246
|
+
(set found_a_match)
|
|
247
|
+
)
|
|
248
|
+
)
|
|
249
|
+
do fix_endings
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
define remove_vetrumai_urupukal as (
|
|
253
|
+
unset found_a_match
|
|
254
|
+
unset found_vetrumai_urupu
|
|
255
|
+
has_min_length
|
|
256
|
+
backwards (
|
|
257
|
+
(
|
|
258
|
+
test ( ['{nnna}{vs_ai}'] delete )
|
|
259
|
+
or
|
|
260
|
+
test ([ ( '{vs_i}{nnna}{vs_ai}' or
|
|
261
|
+
'{vs_ai}' (test not among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}'))) or
|
|
262
|
+
( '{vs_ai}' (test (among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') '{pulli}')))
|
|
263
|
+
] <- '{pulli}'
|
|
264
|
+
)
|
|
265
|
+
or
|
|
266
|
+
test ( [
|
|
267
|
+
'{vs_o}{tta}{vs_u}' or
|
|
268
|
+
'{vs_oo}{tta}{vs_u}' or
|
|
269
|
+
'{vs_i}{la}{pulli}' or
|
|
270
|
+
'{vs_i}{rra}{pulli}' or
|
|
271
|
+
('{vs_i}{nnna}{pulli}' (test not '{ma}')) or
|
|
272
|
+
'{vs_i}{nnna}{pulli}{rra}{vs_u}' or
|
|
273
|
+
'{vs_i}{ra}{vs_u}{na}{pulli}{ta}{vs_u}' or
|
|
274
|
+
'{va}{vs_i}{tta}' or
|
|
275
|
+
($(len >= 7) '{vs_i}{tta}{ma}{pulli}') or
|
|
276
|
+
'{vs_aa}{la}{pulli}' or
|
|
277
|
+
'{vs_u}{tta}{vs_ai}' or
|
|
278
|
+
'{vs_aa}{ma}{la}{pulli}' or
|
|
279
|
+
('{la}{pulli}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
|
|
280
|
+
'{vs_u}{lla}{pulli}'
|
|
281
|
+
] <- '{pulli}'
|
|
282
|
+
)
|
|
283
|
+
or
|
|
284
|
+
test ( [
|
|
285
|
+
'{ka}{nna}{pulli}' or
|
|
286
|
+
'{ma}{vs_u}{nnna}{pulli}' or
|
|
287
|
+
'{ma}{vs_ee}{la}{pulli}' or
|
|
288
|
+
'{ma}{vs_ee}{rra}{pulli}' or
|
|
289
|
+
'{ka}{vs_ii}{llla}{pulli}' or
|
|
290
|
+
'{pa}{vs_i}{nnna}{pulli}' or
|
|
291
|
+
('{ta}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')))
|
|
292
|
+
] delete
|
|
293
|
+
)
|
|
294
|
+
or
|
|
295
|
+
test ([ '{vs_ii}' ] <- '{vs_i}')
|
|
296
|
+
)
|
|
297
|
+
(set found_a_match)
|
|
298
|
+
(set found_vetrumai_urupu)
|
|
299
|
+
do ( [ '{vs_i}{nnna}{pulli}' ] <- '{pulli}' )
|
|
300
|
+
)
|
|
301
|
+
do fix_endings
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
define remove_tense_suffixes as (
|
|
305
|
+
set found_a_match
|
|
306
|
+
repeat ( found_a_match (do remove_tense_suffix) )
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
define remove_tense_suffix as (
|
|
310
|
+
unset found_a_match
|
|
311
|
+
has_min_length
|
|
312
|
+
backwards (
|
|
313
|
+
do (
|
|
314
|
+
test ( [among(
|
|
315
|
+
'{ka}{vs_o}{nna}{pulli}{tta}{vs_i}{ra}{pulli}'
|
|
316
|
+
'{pa}{tta}{vs_u}'
|
|
317
|
+
)] delete
|
|
318
|
+
(set found_a_match)
|
|
319
|
+
)
|
|
320
|
+
or
|
|
321
|
+
test ( [
|
|
322
|
+
'{ma}{vs_aa}{ra}{pulli}' or
|
|
323
|
+
'{ma}{vs_i}{nnna}{pulli}' or
|
|
324
|
+
'{nnna}{nnna}{pulli}' or
|
|
325
|
+
'{nnna}{vs_aa}{nnna}{pulli}' or
|
|
326
|
+
'{nnna}{vs_aa}{lla}{pulli}' or
|
|
327
|
+
'{nnna}{vs_aa}{ra}{pulli}' or
|
|
328
|
+
('{va}{nnna}{pulli}' test (not among('{a}' '{aa}' '{i}' '{ii}' '{u}' '{uu}' '{e}' '{ee}' '{ai}' '{o}' '{oo}' '{au}')) ) or
|
|
329
|
+
'{nnna}{lla}{pulli}' or
|
|
330
|
+
'{va}{lla}{pulli}' or
|
|
331
|
+
'{nnna}{ra}{pulli}' or
|
|
332
|
+
'{va}{ra}{pulli}' or
|
|
333
|
+
'{nnna}' or '{pa}' or '{ka}' or '{ta}' or '{ya}' or
|
|
334
|
+
'{pa}{nnna}{pulli}' or
|
|
335
|
+
'{pa}{lla}{pulli}' or
|
|
336
|
+
'{pa}{ra}{pulli}' or
|
|
337
|
+
('{ta}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
|
|
338
|
+
'{vs_i}{rra}{pulli}{rra}{vs_u}' or
|
|
339
|
+
'{pa}{ma}{pulli}' or
|
|
340
|
+
'{nnna}{ma}{pulli}' or
|
|
341
|
+
'{ta}{vs_u}{ma}{pulli}' or
|
|
342
|
+
'{rra}{vs_u}{ma}{pulli}' or
|
|
343
|
+
'{ka}{vs_u}{ma}{pulli}' or
|
|
344
|
+
'{nnna}{vs_e}{nnna}{pulli}' or
|
|
345
|
+
'{nnna}{vs_ai}' or
|
|
346
|
+
'{va}{vs_ai}'
|
|
347
|
+
] delete
|
|
348
|
+
(set found_a_match)
|
|
349
|
+
)
|
|
350
|
+
or
|
|
351
|
+
test ( [
|
|
352
|
+
('{vs_aa}{nnna}{pulli}' test (not '{ca}')) or
|
|
353
|
+
'{vs_aa}{lla}{pulli}' or
|
|
354
|
+
'{vs_aa}{ra}{pulli}' or
|
|
355
|
+
'{vs_ee}{nnna}{pulli}' or
|
|
356
|
+
'{vs_aa}' or
|
|
357
|
+
'{vs_aa}{ma}{pulli}' or
|
|
358
|
+
'{vs_e}{ma}{pulli}' or
|
|
359
|
+
'{vs_ee}{ma}{pulli}' or
|
|
360
|
+
'{vs_oo}{ma}{pulli}' or
|
|
361
|
+
'{ka}{vs_u}{ma}{pulli}' or
|
|
362
|
+
'{ta}{vs_u}{ma}{pulli}' or
|
|
363
|
+
'{tta}{vs_u}{ma}{pulli}' or
|
|
364
|
+
'{rra}{vs_u}{ma}{pulli}' or
|
|
365
|
+
'{vs_aa}{ya}{pulli}' or
|
|
366
|
+
'{nnna}{vs_e}{nnna}{pulli}' or
|
|
367
|
+
'{nnna}{vs_i}{ra}{pulli}' or
|
|
368
|
+
'{vs_ii}{ra}{pulli}' or
|
|
369
|
+
'{vs_ii}{ya}{ra}{pulli}'
|
|
370
|
+
] <- '{pulli}'
|
|
371
|
+
(set found_a_match)
|
|
372
|
+
)
|
|
373
|
+
or
|
|
374
|
+
test ( ([ '{ka}{vs_u}' or '{ta}{vs_u}' ) (test '{pulli}') ] delete
|
|
375
|
+
(set found_a_match)
|
|
376
|
+
)
|
|
377
|
+
)
|
|
378
|
+
do ([among(
|
|
379
|
+
'{vs_aa}{na}{vs_i}{nnna}{pulli}{rra}'
|
|
380
|
+
'{vs_aa}{na}{vs_i}{nnna}{pulli}{rra}{pulli}'
|
|
381
|
+
'{ka}{vs_i}{nnna}{pulli}{rra}'
|
|
382
|
+
'{ka}{vs_i}{nnna}{pulli}{rra}{pulli}'
|
|
383
|
+
'{ka}{vs_i}{rra}'
|
|
384
|
+
'{ka}{vs_i}{rra}{pulli}'
|
|
385
|
+
)] delete
|
|
386
|
+
(set found_a_match)
|
|
387
|
+
)
|
|
388
|
+
)
|
|
389
|
+
do fix_endings
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
define stem as (
|
|
393
|
+
unset found_vetrumai_urupu
|
|
394
|
+
do fix_ending
|
|
395
|
+
has_min_length
|
|
396
|
+
do remove_question_prefixes
|
|
397
|
+
do remove_pronoun_prefixes
|
|
398
|
+
do remove_question_suffixes
|
|
399
|
+
do remove_um
|
|
400
|
+
do remove_common_word_endings
|
|
401
|
+
do remove_vetrumai_urupukal
|
|
402
|
+
do remove_plural_suffix
|
|
403
|
+
do remove_command_suffixes
|
|
404
|
+
do remove_tense_suffixes
|
|
405
|
+
)
|