mittens 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -3
- data/lib/mittens/version.rb +1 -1
- data/vendor/snowball/.github/workflows/ci.yml +216 -0
- data/vendor/snowball/CONTRIBUTING.rst +111 -62
- data/vendor/snowball/GNUmakefile +194 -136
- data/vendor/snowball/NEWS +798 -3
- data/vendor/snowball/README.rst +50 -1
- data/vendor/snowball/ada/src/stemmer.adb +25 -13
- data/vendor/snowball/ada/src/stemmer.ads +9 -9
- data/vendor/snowball/ada/stemmer_config.gpr +7 -7
- data/vendor/snowball/algorithms/basque.sbl +4 -19
- data/vendor/snowball/algorithms/catalan.sbl +2 -9
- data/vendor/snowball/algorithms/danish.sbl +1 -1
- data/vendor/snowball/algorithms/dutch.sbl +284 -122
- data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
- data/vendor/snowball/algorithms/english.sbl +52 -37
- data/vendor/snowball/algorithms/esperanto.sbl +157 -0
- data/vendor/snowball/algorithms/estonian.sbl +269 -0
- data/vendor/snowball/algorithms/finnish.sbl +2 -3
- data/vendor/snowball/algorithms/french.sbl +42 -16
- data/vendor/snowball/algorithms/german.sbl +35 -14
- data/vendor/snowball/algorithms/greek.sbl +76 -76
- data/vendor/snowball/algorithms/hungarian.sbl +8 -6
- data/vendor/snowball/algorithms/indonesian.sbl +14 -8
- data/vendor/snowball/algorithms/italian.sbl +11 -21
- data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
- data/vendor/snowball/algorithms/lovins.sbl +0 -1
- data/vendor/snowball/algorithms/nepali.sbl +138 -37
- data/vendor/snowball/algorithms/norwegian.sbl +19 -5
- data/vendor/snowball/algorithms/porter.sbl +2 -2
- data/vendor/snowball/algorithms/portuguese.sbl +9 -13
- data/vendor/snowball/algorithms/romanian.sbl +17 -4
- data/vendor/snowball/algorithms/serbian.sbl +467 -468
- data/vendor/snowball/algorithms/spanish.sbl +5 -7
- data/vendor/snowball/algorithms/swedish.sbl +60 -6
- data/vendor/snowball/algorithms/tamil.sbl +207 -176
- data/vendor/snowball/algorithms/turkish.sbl +461 -445
- data/vendor/snowball/algorithms/yiddish.sbl +36 -38
- data/vendor/snowball/compiler/analyser.c +445 -192
- data/vendor/snowball/compiler/driver.c +109 -101
- data/vendor/snowball/compiler/generator.c +853 -464
- data/vendor/snowball/compiler/generator_ada.c +404 -366
- data/vendor/snowball/compiler/generator_csharp.c +297 -260
- data/vendor/snowball/compiler/generator_go.c +323 -254
- data/vendor/snowball/compiler/generator_java.c +326 -252
- data/vendor/snowball/compiler/generator_js.c +362 -252
- data/vendor/snowball/compiler/generator_pascal.c +349 -197
- data/vendor/snowball/compiler/generator_python.c +257 -240
- data/vendor/snowball/compiler/generator_rust.c +423 -251
- data/vendor/snowball/compiler/header.h +117 -71
- data/vendor/snowball/compiler/space.c +137 -68
- data/vendor/snowball/compiler/syswords.h +2 -2
- data/vendor/snowball/compiler/tokeniser.c +125 -107
- data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
- data/vendor/snowball/csharp/Stemwords/App.config +2 -2
- data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
- data/vendor/snowball/doc/libstemmer_c_README +7 -4
- data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
- data/vendor/snowball/doc/libstemmer_java_README +12 -1
- data/vendor/snowball/doc/libstemmer_js_README +6 -4
- data/vendor/snowball/doc/libstemmer_python_README +9 -4
- data/vendor/snowball/examples/stemwords.c +12 -12
- data/vendor/snowball/go/env.go +107 -31
- data/vendor/snowball/go/util.go +0 -4
- data/vendor/snowball/include/libstemmer.h +4 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
- data/vendor/snowball/javascript/base-stemmer.js +186 -2
- data/vendor/snowball/javascript/stemwords.js +3 -6
- data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
- data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
- data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
- data/vendor/snowball/libstemmer/modules.txt +13 -10
- data/vendor/snowball/libstemmer/test.c +1 -1
- data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
- data/vendor/snowball/pascal/generate.pl +13 -13
- data/vendor/snowball/python/create_init.py +4 -1
- data/vendor/snowball/python/setup.cfg +0 -3
- data/vendor/snowball/python/setup.py +8 -3
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
- data/vendor/snowball/python/stemwords.py +8 -12
- data/vendor/snowball/runtime/api.c +10 -5
- data/vendor/snowball/runtime/header.h +10 -9
- data/vendor/snowball/runtime/utilities.c +9 -9
- data/vendor/snowball/rust/build.rs +1 -1
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
- data/vendor/snowball/tests/stemtest.c +7 -4
- metadata +7 -7
- data/vendor/snowball/.travis.yml +0 -112
- data/vendor/snowball/algorithms/german2.sbl +0 -145
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
- data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -55,7 +55,7 @@ define postlude as repeat (
|
|
55
55
|
'{u'}' (<- 'u')
|
56
56
|
// and possibly {u"}->u here, or in prelude
|
57
57
|
'' (next)
|
58
|
-
)
|
58
|
+
)
|
59
59
|
)
|
60
60
|
|
61
61
|
backwardmode (
|
@@ -100,7 +100,8 @@ backwardmode (
|
|
100
100
|
)
|
101
101
|
'adora' 'ador' 'aci{o'}n'
|
102
102
|
'adoras' 'adores' 'aciones'
|
103
|
-
'ante' 'antes' 'ancia' 'ancias'
|
103
|
+
'ante' 'antes' 'ancia' 'ancias'
|
104
|
+
'acion' // Misspelling of '-ación'.
|
104
105
|
(
|
105
106
|
R2 delete
|
106
107
|
try ( ['ic'] R2 delete )
|
@@ -111,6 +112,7 @@ backwardmode (
|
|
111
112
|
R2 <- 'log'
|
112
113
|
)
|
113
114
|
'uci{o'}n' 'uciones'
|
115
|
+
'ucion' // Misspelling of '-ución'.
|
114
116
|
(
|
115
117
|
R2 <- 'u'
|
116
118
|
)
|
@@ -135,7 +137,7 @@ backwardmode (
|
|
135
137
|
R2 delete
|
136
138
|
try (
|
137
139
|
[substring] among(
|
138
|
-
'ante'
|
140
|
+
'ante'
|
139
141
|
'able'
|
140
142
|
'ible' (R2 delete)
|
141
143
|
)
|
@@ -224,7 +226,3 @@ define stem as (
|
|
224
226
|
)
|
225
227
|
do postlude
|
226
228
|
)
|
227
|
-
|
228
|
-
/*
|
229
|
-
Note 1: additions of 15 Jun 2005
|
230
|
-
*/
|
@@ -1,4 +1,5 @@
|
|
1
1
|
routines (
|
2
|
+
et_condition
|
2
3
|
mark_regions
|
3
4
|
main_suffix
|
4
5
|
consonant_pair
|
@@ -9,7 +10,7 @@ externals ( stem )
|
|
9
10
|
|
10
11
|
integers ( p1 x )
|
11
12
|
|
12
|
-
groupings ( v s_ending )
|
13
|
+
groupings ( v s_ending ost_ending )
|
13
14
|
|
14
15
|
stringescapes {}
|
15
16
|
|
@@ -23,16 +24,66 @@ define v 'aeiouy{a"}{ao}{o"}'
|
|
23
24
|
|
24
25
|
define s_ending 'bcdfghjklmnoprtvy'
|
25
26
|
|
27
|
+
define ost_ending 'iklnprtuv'
|
28
|
+
|
26
29
|
define mark_regions as (
|
27
30
|
|
28
31
|
$p1 = limit
|
29
32
|
test ( hop 3 setmark x )
|
30
|
-
|
33
|
+
gopast v gopast non-v setmark p1
|
31
34
|
try ( $p1 < x $p1 = x )
|
32
35
|
)
|
33
36
|
|
34
37
|
backwardmode (
|
35
38
|
|
39
|
+
define et_condition as (
|
40
|
+
(non-v v not atlimit)
|
41
|
+
and not among (
|
42
|
+
// frihet, nyhet, råhet, trohet
|
43
|
+
'h'
|
44
|
+
// societet
|
45
|
+
'iet'
|
46
|
+
// annuitet, kontinuitet
|
47
|
+
'uit'
|
48
|
+
// alfabet
|
49
|
+
'fab'
|
50
|
+
// autenticitet, elektricitet, kapacitet, metallicitet, publicitet
|
51
|
+
'cit'
|
52
|
+
// graviditet, likviditet, rigiditet
|
53
|
+
'dit'
|
54
|
+
// neutralitet, rivalitet, sexualitet
|
55
|
+
'alit'
|
56
|
+
// flexibilitet, instabilitet, kompatibilitet, mobilitet, variabilitet
|
57
|
+
'ilit'
|
58
|
+
// anonymitet, intimitet, legitimitet
|
59
|
+
'mit'
|
60
|
+
// kommunitet, maskulinitet, modernitet, spontanitet, suveränitet
|
61
|
+
'nit'
|
62
|
+
// epitet, serendipitet
|
63
|
+
'pit'
|
64
|
+
// auktoritet, integritet, majoritet, popularitet, prioritet
|
65
|
+
'rit'
|
66
|
+
// densitet, generositet, intensitet, luminositet, viskositet
|
67
|
+
'sit'
|
68
|
+
// identitet, kvantitet
|
69
|
+
'tit'
|
70
|
+
// aggressivitet, positivitet
|
71
|
+
'ivit'
|
72
|
+
// antikvitet, oblikvitet
|
73
|
+
'kvit'
|
74
|
+
// komplexitet
|
75
|
+
'xit'
|
76
|
+
// komet
|
77
|
+
'kom'
|
78
|
+
// raket
|
79
|
+
'rak'
|
80
|
+
// paket
|
81
|
+
'pak'
|
82
|
+
// staket
|
83
|
+
'stak'
|
84
|
+
)
|
85
|
+
)
|
86
|
+
|
36
87
|
define main_suffix as (
|
37
88
|
setlimit tomark p1 for ([substring])
|
38
89
|
among(
|
@@ -43,7 +94,9 @@ backwardmode (
|
|
43
94
|
'hetens' 'erns' 'at' 'andet' 'het' 'ast'
|
44
95
|
(delete)
|
45
96
|
's'
|
46
|
-
(s_ending
|
97
|
+
( ('et' et_condition ]) or s_ending delete )
|
98
|
+
'et'
|
99
|
+
( et_condition delete )
|
47
100
|
)
|
48
101
|
)
|
49
102
|
|
@@ -52,10 +105,11 @@ backwardmode (
|
|
52
105
|
and ([next] delete)
|
53
106
|
)
|
54
107
|
|
55
|
-
define other_suffix as
|
56
|
-
[substring]
|
108
|
+
define other_suffix as (
|
109
|
+
setlimit tomark p1 for ([substring])
|
110
|
+
among(
|
57
111
|
'lig' 'ig' 'els' (delete)
|
58
|
-
'
|
112
|
+
'{o"}st' (ost_ending <-'{o"}s')
|
59
113
|
'fullt' (<-'full')
|
60
114
|
)
|
61
115
|
)
|
@@ -68,7 +68,7 @@ stringdef vs_au '{U+0BCC}'
|
|
68
68
|
/* Pulli */
|
69
69
|
stringdef pulli '{U+0BCD}'
|
70
70
|
|
71
|
-
/* AU length
|
71
|
+
/* AU length mark */
|
72
72
|
stringdef au_lmark '{U+0BD7}'
|
73
73
|
|
74
74
|
|
@@ -101,10 +101,12 @@ define has_min_length as (
|
|
101
101
|
)
|
102
102
|
|
103
103
|
define fix_va_start as (
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
104
|
+
[substring] among (
|
105
|
+
'{va}{vs_oo}' ( <- '{oo}' )
|
106
|
+
'{va}{vs_o}' ( <- '{o}' )
|
107
|
+
'{va}{vs_u}' ( <- '{u}' )
|
108
|
+
'{va}{vs_uu}' ( <- '{uu}' )
|
109
|
+
)
|
108
110
|
)
|
109
111
|
|
110
112
|
define fix_endings as (
|
@@ -120,68 +122,90 @@ define remove_question_prefixes as (
|
|
120
122
|
define fix_ending as (
|
121
123
|
$(len > 3)
|
122
124
|
backwards (
|
123
|
-
(
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
125
|
+
(
|
126
|
+
[substring] among (
|
127
|
+
'{na}{pulli}'
|
128
|
+
'{na}{pulli}{ta}'
|
129
|
+
'{na}{pulli}{ta}{pulli}'
|
130
|
+
( delete )
|
131
|
+
'{ya}{pulli}'
|
132
|
+
( test among('{vs_ai}' '{vs_i}' '{vs_ii}') delete )
|
133
|
+
'{tta}{pulli}{pa}{pulli}'
|
134
|
+
'{tta}{pulli}{ka}{pulli}'
|
135
|
+
( <- '{lla}{pulli}' )
|
136
|
+
'{nnna}{pulli}{rra}{pulli}'
|
137
|
+
( <- '{la}{pulli}' )
|
138
|
+
'{rra}{pulli}{ka}{pulli}'
|
139
|
+
// '{nnna}{pulli}{nnna}{pulli}'
|
140
|
+
( <- '{la}{pulli}' )
|
141
|
+
'{tta}{pulli}{tta}{pulli}'
|
142
|
+
( <- '{tta}{vs_u}' )
|
143
|
+
'{ta}{pulli}{ta}{pulli}'
|
144
|
+
( found_vetrumai_urupu not '{vs_ai}' <- '{ma}{pulli}' )
|
145
|
+
'{vs_u}{ka}{pulli}'
|
146
|
+
'{vs_u}{ka}{pulli}{ka}{pulli}'
|
147
|
+
( <- '{pulli}' )
|
148
|
+
'{va}'
|
149
|
+
'{ya}'
|
150
|
+
'{va}{pulli}'
|
151
|
+
( delete )
|
152
|
+
'{nnna}{vs_u}'
|
153
|
+
(
|
154
|
+
not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
|
155
|
+
delete
|
156
|
+
)
|
157
|
+
'{nga}{pulli}'
|
158
|
+
(
|
159
|
+
among (
|
160
|
+
'{vs_ai}' ( delete )
|
161
|
+
'{pulli}' ( delete )
|
162
|
+
'' ( <- '{ma}{pulli}' )
|
163
|
+
)
|
164
|
+
)
|
157
165
|
)
|
166
|
+
) or
|
167
|
+
( [ '{pulli}'
|
168
|
+
(
|
169
|
+
( among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}')
|
170
|
+
try ( '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') )
|
171
|
+
] delete )
|
172
|
+
or
|
173
|
+
( among(
|
174
|
+
'{ya}' '{ra}' '{la}' '{va}' '{zha}' '{lla}'
|
175
|
+
'{nya}' '{nna}' '{na}' '{ma}' '{nnna}') ] '{pulli}' delete )
|
176
|
+
or
|
177
|
+
( test among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}' '{pulli}') ] delete )
|
178
|
+
)
|
179
|
+
)
|
180
|
+
)
|
158
181
|
)
|
159
182
|
|
160
183
|
define remove_pronoun_prefixes as (
|
161
|
-
unset found_a_match
|
162
184
|
[ among('{a}' '{i}' '{u}') among('{ka}' '{ca}' '{tha}' '{va}' '{na}' '{pa}' '{ma}' '{ya}' '{nga}' '{nya}') '{pulli}' ] delete
|
163
|
-
(set found_a_match)
|
164
185
|
do fix_va_start
|
165
186
|
)
|
166
187
|
|
167
188
|
define remove_plural_suffix as (
|
168
|
-
unset found_a_match
|
169
189
|
backwards (
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
190
|
+
[substring] among (
|
191
|
+
'{vs_u}{nga}{pulli}{ka}{lla}{pulli}'
|
192
|
+
( ( among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') <- '{vs_u}{nga}{pulli}' )
|
193
|
+
or <- '{pulli}' )
|
194
|
+
'{rra}{pulli}{ka}{lla}{pulli}'
|
195
|
+
( <- '{la}{pulli}' )
|
196
|
+
'{tta}{pulli}{ka}{lla}{pulli}'
|
197
|
+
( <- '{lla}{pulli}' )
|
198
|
+
'{ka}{lla}{pulli}'
|
199
|
+
( delete )
|
200
|
+
)
|
175
201
|
)
|
176
202
|
)
|
177
203
|
|
178
204
|
define remove_question_suffixes as (
|
179
205
|
has_min_length
|
180
|
-
unset found_a_match
|
181
206
|
backwards (
|
182
207
|
do (
|
183
208
|
[ among('{vs_oo}' '{vs_ee}' '{vs_aa}') ] <- '{pulli}'
|
184
|
-
(set found_a_match)
|
185
209
|
)
|
186
210
|
)
|
187
211
|
do fix_endings
|
@@ -189,18 +213,14 @@ define remove_question_suffixes as (
|
|
189
213
|
|
190
214
|
define remove_command_suffixes as (
|
191
215
|
has_min_length
|
192
|
-
unset found_a_match
|
193
216
|
backwards (
|
194
217
|
[ among('{pa}{vs_i}' '{va}{vs_i}') ] delete
|
195
|
-
(set found_a_match)
|
196
218
|
)
|
197
219
|
)
|
198
220
|
|
199
221
|
define remove_um as (
|
200
|
-
unset found_a_match
|
201
222
|
has_min_length
|
202
223
|
backwards ( [ '{vs_u}{ma}{pulli}' ] <- '{pulli}'
|
203
|
-
(set found_a_match)
|
204
224
|
)
|
205
225
|
do fix_ending
|
206
226
|
)
|
@@ -209,27 +229,28 @@ define remove_common_word_endings as (
|
|
209
229
|
// These are not suffixes actually but are
|
210
230
|
// some words that are attached to other words
|
211
231
|
// but can be removed for stemming
|
212
|
-
unset found_a_match
|
213
232
|
has_min_length
|
214
233
|
backwards (
|
215
|
-
|
216
|
-
'{
|
217
|
-
'{vs_i}{
|
218
|
-
'{vs_i}{
|
219
|
-
'{
|
220
|
-
'{vs_aa}{ka}{vs_i}
|
221
|
-
'{
|
222
|
-
'{
|
223
|
-
'{vs_u}{
|
224
|
-
'{vs_u}{tta}{vs_ai}'
|
225
|
-
'{
|
226
|
-
|
227
|
-
'{vs_e}{nnna}'
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
234
|
+
[substring] among (
|
235
|
+
'{vs_u}{tta}{nnna}{pulli}'
|
236
|
+
'{vs_i}{la}{pulli}{la}{vs_ai}'
|
237
|
+
'{vs_i}{tta}{ma}{pulli}'
|
238
|
+
'{vs_i}{nnna}{pulli}{rra}{vs_i}'
|
239
|
+
'{vs_aa}{ka}{vs_i}'
|
240
|
+
'{vs_aa}{ka}{vs_i}{ya}'
|
241
|
+
'{vs_e}{nnna}{pulli}{rra}{vs_u}'
|
242
|
+
'{vs_u}{lla}{pulli}{lla}'
|
243
|
+
'{vs_u}{tta}{vs_ai}{ya}'
|
244
|
+
'{vs_u}{tta}{vs_ai}'
|
245
|
+
'{vs_e}{nnna}{vs_u}{ma}{pulli}'
|
246
|
+
'{vs_e}{nnna}'
|
247
|
+
( <- '{pulli}' )
|
248
|
+
'{la}{pulli}{la}'
|
249
|
+
(
|
250
|
+
not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
|
251
|
+
<- '{pulli}'
|
252
|
+
)
|
253
|
+
'{pa}{tta}{vs_u}'
|
233
254
|
'{pa}{tta}{pulli}{tta}'
|
234
255
|
'{pa}{tta}{pulli}{tta}{vs_u}'
|
235
256
|
'{pa}{tta}{pulli}{tta}{ta}{vs_u}'
|
@@ -241,60 +262,69 @@ define remove_common_word_endings as (
|
|
241
262
|
'{pa}{tta}{vs_i}{ta}{vs_aa}{nnna}'
|
242
263
|
'{pa}{tta}{vs_i}'
|
243
264
|
'{ta}{vs_aa}{nnna}'
|
244
|
-
'{vs_e}{la}{pulli}{la}{vs_aa}{ma}{pulli}'
|
245
|
-
|
246
|
-
|
247
|
-
)
|
265
|
+
'{vs_e}{la}{pulli}{la}{vs_aa}{ma}{pulli}'
|
266
|
+
( delete )
|
267
|
+
)
|
248
268
|
)
|
249
269
|
do fix_endings
|
250
270
|
)
|
251
271
|
|
252
272
|
define remove_vetrumai_urupukal as (
|
253
|
-
unset found_a_match
|
254
273
|
unset found_vetrumai_urupu
|
255
274
|
has_min_length
|
256
275
|
backwards (
|
257
276
|
(
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
277
|
+
test (
|
278
|
+
[substring] among (
|
279
|
+
'{nnna}{vs_ai}'
|
280
|
+
( delete )
|
281
|
+
'{vs_o}{tta}{vs_u}'
|
282
|
+
'{vs_oo}{tta}{vs_u}'
|
283
|
+
'{vs_i}{la}{pulli}'
|
284
|
+
'{vs_i}{rra}{pulli}'
|
285
|
+
'{vs_i}{nnna}{pulli}{rra}{vs_u}'
|
286
|
+
'{vs_i}{ra}{vs_u}{na}{pulli}{ta}{vs_u}'
|
287
|
+
'{va}{vs_i}{tta}'
|
288
|
+
'{vs_aa}{la}{pulli}'
|
289
|
+
'{vs_u}{tta}{vs_ai}'
|
290
|
+
'{vs_aa}{ma}{la}{pulli}'
|
291
|
+
'{vs_u}{lla}{pulli}'
|
292
|
+
( <- '{pulli}' )
|
293
|
+
'{vs_i}{nnna}{pulli}'
|
294
|
+
( not '{ma}' <- '{pulli}' )
|
295
|
+
'{vs_i}{tta}{ma}{pulli}'
|
296
|
+
( $(len >= 7) <- '{pulli}' )
|
297
|
+
'{la}{pulli}'
|
298
|
+
(
|
299
|
+
not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
|
300
|
+
<- '{pulli}'
|
301
|
+
)
|
302
|
+
'{ka}{nna}{pulli}'
|
303
|
+
'{ma}{vs_u}{nnna}{pulli}'
|
304
|
+
'{ma}{vs_ee}{la}{pulli}'
|
305
|
+
'{ma}{vs_ee}{rra}{pulli}'
|
306
|
+
'{ka}{vs_ii}{llla}{pulli}'
|
307
|
+
(delete)
|
308
|
+
'{ta}{vs_u}'
|
309
|
+
(
|
310
|
+
not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
|
311
|
+
delete
|
312
|
+
)
|
313
|
+
'{vs_ii}'
|
314
|
+
( <- '{vs_i}' )
|
293
315
|
)
|
316
|
+
)
|
294
317
|
or
|
295
|
-
test (
|
318
|
+
test (
|
319
|
+
[ '{vs_ai}'
|
320
|
+
(
|
321
|
+
(not among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}'))
|
322
|
+
or
|
323
|
+
(test (among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') '{pulli}'))
|
324
|
+
)
|
325
|
+
] <- '{pulli}'
|
326
|
+
)
|
296
327
|
)
|
297
|
-
(set found_a_match)
|
298
328
|
(set found_vetrumai_urupu)
|
299
329
|
do ( [ '{vs_i}{nnna}{pulli}' ] <- '{pulli}' )
|
300
330
|
)
|
@@ -302,76 +332,76 @@ define remove_vetrumai_urupukal as (
|
|
302
332
|
)
|
303
333
|
|
304
334
|
define remove_tense_suffixes as (
|
305
|
-
|
306
|
-
repeat ( found_a_match (do remove_tense_suffix) )
|
335
|
+
repeat remove_tense_suffix
|
307
336
|
)
|
308
337
|
|
338
|
+
// Gives signal t if a tense suffix was removed, signal f otherwise.
|
309
339
|
define remove_tense_suffix as (
|
310
340
|
unset found_a_match
|
311
341
|
has_min_length
|
312
342
|
backwards (
|
313
343
|
do (
|
314
|
-
test (
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
344
|
+
test (
|
345
|
+
[substring] among (
|
346
|
+
'{ka}{vs_o}{nna}{pulli}{tta}{vs_i}{ra}{pulli}'
|
347
|
+
'{pa}{tta}{vs_u}'
|
348
|
+
'{ma}{vs_aa}{ra}{pulli}'
|
349
|
+
'{ma}{vs_i}{nnna}{pulli}'
|
350
|
+
'{nnna}{nnna}{pulli}'
|
351
|
+
'{nnna}{vs_aa}{nnna}{pulli}'
|
352
|
+
'{nnna}{vs_aa}{lla}{pulli}'
|
353
|
+
'{nnna}{vs_aa}{ra}{pulli}'
|
354
|
+
'{nnna}{lla}{pulli}'
|
355
|
+
'{va}{lla}{pulli}'
|
356
|
+
'{nnna}{ra}{pulli}'
|
357
|
+
'{va}{ra}{pulli}'
|
358
|
+
'{nnna}'
|
359
|
+
'{pa}'
|
360
|
+
'{ka}'
|
361
|
+
'{ta}'
|
362
|
+
'{ya}'
|
363
|
+
'{pa}{nnna}{pulli}'
|
364
|
+
'{pa}{lla}{pulli}'
|
365
|
+
'{pa}{ra}{pulli}'
|
366
|
+
'{vs_i}{rra}{pulli}{rra}{vs_u}'
|
367
|
+
'{pa}{ma}{pulli}'
|
368
|
+
'{nnna}{ma}{pulli}'
|
369
|
+
'{ta}{vs_u}{ma}{pulli}'
|
370
|
+
'{rra}{vs_u}{ma}{pulli}'
|
371
|
+
'{ka}{vs_u}{ma}{pulli}'
|
372
|
+
'{nnna}{vs_e}{nnna}{pulli}'
|
373
|
+
'{nnna}{vs_ai}'
|
374
|
+
'{va}{vs_ai}'
|
375
|
+
( delete )
|
376
|
+
'{va}{nnna}{pulli}'
|
377
|
+
(
|
378
|
+
not among('{a}' '{aa}' '{i}' '{ii}' '{u}' '{uu}' '{e}' '{ee}' '{ai}' '{o}' '{oo}' '{au}')
|
379
|
+
delete
|
380
|
+
)
|
381
|
+
'{ta}{vs_u}'
|
382
|
+
(
|
383
|
+
not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
|
384
|
+
delete
|
385
|
+
)
|
386
|
+
'{vs_aa}{nnna}{pulli}'
|
387
|
+
( not '{ca}' <- '{pulli}' )
|
388
|
+
'{vs_aa}{lla}{pulli}'
|
389
|
+
'{vs_aa}{ra}{pulli}'
|
390
|
+
'{vs_ee}{nnna}{pulli}'
|
391
|
+
'{vs_aa}'
|
392
|
+
'{vs_aa}{ma}{pulli}'
|
393
|
+
'{vs_e}{ma}{pulli}'
|
394
|
+
'{vs_ee}{ma}{pulli}'
|
395
|
+
'{vs_oo}{ma}{pulli}'
|
396
|
+
'{tta}{vs_u}{ma}{pulli}'
|
397
|
+
'{vs_aa}{ya}{pulli}'
|
398
|
+
'{nnna}{vs_i}{ra}{pulli}'
|
399
|
+
'{vs_ii}{ra}{pulli}'
|
400
|
+
'{vs_ii}{ya}{ra}{pulli}'
|
401
|
+
( <- '{pulli}' )
|
402
|
+
'{ka}{vs_u}'
|
403
|
+
( test '{pulli}' delete )
|
372
404
|
)
|
373
|
-
or
|
374
|
-
test ( ([ '{ka}{vs_u}' or '{ta}{vs_u}' ) (test '{pulli}') ] delete
|
375
405
|
(set found_a_match)
|
376
406
|
)
|
377
407
|
)
|
@@ -387,6 +417,7 @@ define remove_tense_suffix as (
|
|
387
417
|
)
|
388
418
|
)
|
389
419
|
do fix_endings
|
420
|
+
found_a_match
|
390
421
|
)
|
391
422
|
|
392
423
|
define stem as (
|