mittens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +3 -3
  4. data/lib/mittens/version.rb +1 -1
  5. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  6. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  7. data/vendor/snowball/GNUmakefile +194 -136
  8. data/vendor/snowball/NEWS +798 -3
  9. data/vendor/snowball/README.rst +50 -1
  10. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  11. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  12. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  13. data/vendor/snowball/algorithms/basque.sbl +4 -19
  14. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  15. data/vendor/snowball/algorithms/danish.sbl +1 -1
  16. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  17. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  18. data/vendor/snowball/algorithms/english.sbl +52 -37
  19. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  20. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  21. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  22. data/vendor/snowball/algorithms/french.sbl +42 -16
  23. data/vendor/snowball/algorithms/german.sbl +35 -14
  24. data/vendor/snowball/algorithms/greek.sbl +76 -76
  25. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  26. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  27. data/vendor/snowball/algorithms/italian.sbl +11 -21
  28. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  29. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  30. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  31. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  32. data/vendor/snowball/algorithms/porter.sbl +2 -2
  33. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  34. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  35. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  36. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  37. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  38. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  39. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  40. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  41. data/vendor/snowball/compiler/analyser.c +445 -192
  42. data/vendor/snowball/compiler/driver.c +109 -101
  43. data/vendor/snowball/compiler/generator.c +853 -464
  44. data/vendor/snowball/compiler/generator_ada.c +404 -366
  45. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  46. data/vendor/snowball/compiler/generator_go.c +323 -254
  47. data/vendor/snowball/compiler/generator_java.c +326 -252
  48. data/vendor/snowball/compiler/generator_js.c +362 -252
  49. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  50. data/vendor/snowball/compiler/generator_python.c +257 -240
  51. data/vendor/snowball/compiler/generator_rust.c +423 -251
  52. data/vendor/snowball/compiler/header.h +117 -71
  53. data/vendor/snowball/compiler/space.c +137 -68
  54. data/vendor/snowball/compiler/syswords.h +2 -2
  55. data/vendor/snowball/compiler/tokeniser.c +125 -107
  56. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  57. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  58. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  59. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  60. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  61. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  62. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  63. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  64. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  65. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  66. data/vendor/snowball/examples/stemwords.c +12 -12
  67. data/vendor/snowball/go/env.go +107 -31
  68. data/vendor/snowball/go/util.go +0 -4
  69. data/vendor/snowball/include/libstemmer.h +4 -0
  70. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  71. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  72. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  73. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  74. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  75. data/vendor/snowball/javascript/stemwords.js +3 -6
  76. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  77. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  78. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  79. data/vendor/snowball/libstemmer/modules.txt +13 -10
  80. data/vendor/snowball/libstemmer/test.c +1 -1
  81. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  82. data/vendor/snowball/pascal/generate.pl +13 -13
  83. data/vendor/snowball/python/create_init.py +4 -1
  84. data/vendor/snowball/python/setup.cfg +0 -3
  85. data/vendor/snowball/python/setup.py +8 -3
  86. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  87. data/vendor/snowball/python/stemwords.py +8 -12
  88. data/vendor/snowball/runtime/api.c +10 -5
  89. data/vendor/snowball/runtime/header.h +10 -9
  90. data/vendor/snowball/runtime/utilities.c +9 -9
  91. data/vendor/snowball/rust/build.rs +1 -1
  92. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  93. data/vendor/snowball/tests/stemtest.c +7 -4
  94. metadata +7 -7
  95. data/vendor/snowball/.travis.yml +0 -112
  96. data/vendor/snowball/algorithms/german2.sbl +0 -145
  97. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  98. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -55,7 +55,7 @@ define postlude as repeat (
55
55
  '{u'}' (<- 'u')
56
56
  // and possibly {u"}->u here, or in prelude
57
57
  '' (next)
58
- ) //or next
58
+ )
59
59
  )
60
60
 
61
61
  backwardmode (
@@ -100,7 +100,8 @@ backwardmode (
100
100
  )
101
101
  'adora' 'ador' 'aci{o'}n'
102
102
  'adoras' 'adores' 'aciones'
103
- 'ante' 'antes' 'ancia' 'ancias'// Note 1
103
+ 'ante' 'antes' 'ancia' 'ancias'
104
+ 'acion' // Misspelling of '-ación'.
104
105
  (
105
106
  R2 delete
106
107
  try ( ['ic'] R2 delete )
@@ -111,6 +112,7 @@ backwardmode (
111
112
  R2 <- 'log'
112
113
  )
113
114
  'uci{o'}n' 'uciones'
115
+ 'ucion' // Misspelling of '-ución'.
114
116
  (
115
117
  R2 <- 'u'
116
118
  )
@@ -135,7 +137,7 @@ backwardmode (
135
137
  R2 delete
136
138
  try (
137
139
  [substring] among(
138
- 'ante' // Note 1
140
+ 'ante'
139
141
  'able'
140
142
  'ible' (R2 delete)
141
143
  )
@@ -224,7 +226,3 @@ define stem as (
224
226
  )
225
227
  do postlude
226
228
  )
227
-
228
- /*
229
- Note 1: additions of 15 Jun 2005
230
- */
@@ -1,4 +1,5 @@
1
1
  routines (
2
+ et_condition
2
3
  mark_regions
3
4
  main_suffix
4
5
  consonant_pair
@@ -9,7 +10,7 @@ externals ( stem )
9
10
 
10
11
  integers ( p1 x )
11
12
 
12
- groupings ( v s_ending )
13
+ groupings ( v s_ending ost_ending )
13
14
 
14
15
  stringescapes {}
15
16
 
@@ -23,16 +24,66 @@ define v 'aeiouy{a"}{ao}{o"}'
23
24
 
24
25
  define s_ending 'bcdfghjklmnoprtvy'
25
26
 
27
+ define ost_ending 'iklnprtuv'
28
+
26
29
  define mark_regions as (
27
30
 
28
31
  $p1 = limit
29
32
  test ( hop 3 setmark x )
30
- goto v gopast non-v setmark p1
33
+ gopast v gopast non-v setmark p1
31
34
  try ( $p1 < x $p1 = x )
32
35
  )
33
36
 
34
37
  backwardmode (
35
38
 
39
+ define et_condition as (
40
+ (non-v v not atlimit)
41
+ and not among (
42
+ // frihet, nyhet, råhet, trohet
43
+ 'h'
44
+ // societet
45
+ 'iet'
46
+ // annuitet, kontinuitet
47
+ 'uit'
48
+ // alfabet
49
+ 'fab'
50
+ // autenticitet, elektricitet, kapacitet, metallicitet, publicitet
51
+ 'cit'
52
+ // graviditet, likviditet, rigiditet
53
+ 'dit'
54
+ // neutralitet, rivalitet, sexualitet
55
+ 'alit'
56
+ // flexibilitet, instabilitet, kompatibilitet, mobilitet, variabilitet
57
+ 'ilit'
58
+ // anonymitet, intimitet, legitimitet
59
+ 'mit'
60
+ // kommunitet, maskulinitet, modernitet, spontanitet, suveränitet
61
+ 'nit'
62
+ // epitet, serendipitet
63
+ 'pit'
64
+ // auktoritet, integritet, majoritet, popularitet, prioritet
65
+ 'rit'
66
+ // densitet, generositet, intensitet, luminositet, viskositet
67
+ 'sit'
68
+ // identitet, kvantitet
69
+ 'tit'
70
+ // aggressivitet, positivitet
71
+ 'ivit'
72
+ // antikvitet, oblikvitet
73
+ 'kvit'
74
+ // komplexitet
75
+ 'xit'
76
+ // komet
77
+ 'kom'
78
+ // raket
79
+ 'rak'
80
+ // paket
81
+ 'pak'
82
+ // staket
83
+ 'stak'
84
+ )
85
+ )
86
+
36
87
  define main_suffix as (
37
88
  setlimit tomark p1 for ([substring])
38
89
  among(
@@ -43,7 +94,9 @@ backwardmode (
43
94
  'hetens' 'erns' 'at' 'andet' 'het' 'ast'
44
95
  (delete)
45
96
  's'
46
- (s_ending delete)
97
+ ( ('et' et_condition ]) or s_ending delete )
98
+ 'et'
99
+ ( et_condition delete )
47
100
  )
48
101
  )
49
102
 
@@ -52,10 +105,11 @@ backwardmode (
52
105
  and ([next] delete)
53
106
  )
54
107
 
55
- define other_suffix as setlimit tomark p1 for (
56
- [substring] among(
108
+ define other_suffix as (
109
+ setlimit tomark p1 for ([substring])
110
+ among(
57
111
  'lig' 'ig' 'els' (delete)
58
- 'l{o"}st' (<-'l{o"}s')
112
+ '{o"}st' (ost_ending <-'{o"}s')
59
113
  'fullt' (<-'full')
60
114
  )
61
115
  )
@@ -68,7 +68,7 @@ stringdef vs_au '{U+0BCC}'
68
68
  /* Pulli */
69
69
  stringdef pulli '{U+0BCD}'
70
70
 
71
- /* AU length markk */
71
+ /* AU length mark */
72
72
  stringdef au_lmark '{U+0BD7}'
73
73
 
74
74
 
@@ -101,10 +101,12 @@ define has_min_length as (
101
101
  )
102
102
 
103
103
  define fix_va_start as (
104
- (try '{va}{vs_oo}' and [ '{va}{vs_oo}' ] <- '{oo}' ) or
105
- (try '{va}{vs_o}' and [ '{va}{vs_o}' ] <- '{o}' ) or
106
- (try '{va}{vs_u}' and [ '{va}{vs_u}' ] <- '{u}' ) or
107
- (try '{va}{vs_uu}' and [ '{va}{vs_uu}' ] <- '{uu}' )
104
+ [substring] among (
105
+ '{va}{vs_oo}' ( <- '{oo}' )
106
+ '{va}{vs_o}' ( <- '{o}' )
107
+ '{va}{vs_u}' ( <- '{u}' )
108
+ '{va}{vs_uu}' ( <- '{uu}' )
109
+ )
108
110
  )
109
111
 
110
112
  define fix_endings as (
@@ -120,68 +122,90 @@ define remove_question_prefixes as (
120
122
  define fix_ending as (
121
123
  $(len > 3)
122
124
  backwards (
123
- ( [among('{na}{pulli}' '{na}{pulli}{ta}' '{na}{pulli}{ta}{pulli}') ] delete )
124
- or
125
- ( ['{ya}{pulli}' test among('{vs_ai}' '{vs_i}' '{vs_ii}') ] delete )
126
- or
127
- ( [ '{tta}{pulli}{pa}{pulli}' or '{tta}{pulli}{ka}{pulli}' ] <- '{lla}{pulli}' )
128
- or
129
- ( [ '{nnna}{pulli}{rra}{pulli}' ] <- '{la}{pulli}' )
130
- or
131
- // ( [ '{rra}{pulli}{ka}{pulli}' or '{nnna}{pulli}{nnna}{pulli}' ] <- '{la}{pulli}' )
132
- ( [ '{rra}{pulli}{ka}{pulli}' ] <- '{la}{pulli}' )
133
- or
134
- ( [ '{tta}{pulli}{tta}{pulli}' ] <- '{tta}{vs_u}' )
135
- or
136
- ( found_vetrumai_urupu [ '{ta}{pulli}{ta}{pulli}' (test not '{vs_ai}') ] <- '{ma}{pulli}' ] )
137
- or
138
- ( [ '{vs_u}{ka}{pulli}' or '{vs_u}{ka}{pulli}{ka}{pulli}' ] <- '{pulli}' )
139
- or
140
- ( [ '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') ] delete )
141
- or
142
- ( [ '{vs_u}{ka}{pulli}' ] <- '{pulli}' )
143
- or
144
- ( [ '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') ] delete )
145
- or
146
- ( [ '{pulli}' (among('{ya}' '{ra}' '{la}' '{va}' '{zha}' '{lla}') or among('{nga}' '{nya}' '{nna}' '{na}' '{ma}' '{nnna}')) '{pulli}' ] <- '{pulli}' )
147
- or
148
- ( [ among('{va}' '{ya}' '{va}{pulli}') ] delete )
149
- or
150
- ( [ '{nnna}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')) ] delete )
151
- or
152
- ( [ '{nga}{pulli}' (test not '{vs_ai}')] <- '{ma}{pulli}' )
153
- or
154
- ( [ '{nga}{pulli}' ] delete )
155
- or
156
- ( [ '{pulli}' (test (among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}') or '{pulli}')) ] delete )
125
+ (
126
+ [substring] among (
127
+ '{na}{pulli}'
128
+ '{na}{pulli}{ta}'
129
+ '{na}{pulli}{ta}{pulli}'
130
+ ( delete )
131
+ '{ya}{pulli}'
132
+ ( test among('{vs_ai}' '{vs_i}' '{vs_ii}') delete )
133
+ '{tta}{pulli}{pa}{pulli}'
134
+ '{tta}{pulli}{ka}{pulli}'
135
+ ( <- '{lla}{pulli}' )
136
+ '{nnna}{pulli}{rra}{pulli}'
137
+ ( <- '{la}{pulli}' )
138
+ '{rra}{pulli}{ka}{pulli}'
139
+ // '{nnna}{pulli}{nnna}{pulli}'
140
+ ( <- '{la}{pulli}' )
141
+ '{tta}{pulli}{tta}{pulli}'
142
+ ( <- '{tta}{vs_u}' )
143
+ '{ta}{pulli}{ta}{pulli}'
144
+ ( found_vetrumai_urupu not '{vs_ai}' <- '{ma}{pulli}' )
145
+ '{vs_u}{ka}{pulli}'
146
+ '{vs_u}{ka}{pulli}{ka}{pulli}'
147
+ ( <- '{pulli}' )
148
+ '{va}'
149
+ '{ya}'
150
+ '{va}{pulli}'
151
+ ( delete )
152
+ '{nnna}{vs_u}'
153
+ (
154
+ not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
155
+ delete
156
+ )
157
+ '{nga}{pulli}'
158
+ (
159
+ among (
160
+ '{vs_ai}' ( delete )
161
+ '{pulli}' ( delete )
162
+ '' ( <- '{ma}{pulli}' )
163
+ )
164
+ )
157
165
  )
166
+ ) or
167
+ ( [ '{pulli}'
168
+ (
169
+ ( among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}')
170
+ try ( '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') )
171
+ ] delete )
172
+ or
173
+ ( among(
174
+ '{ya}' '{ra}' '{la}' '{va}' '{zha}' '{lla}'
175
+ '{nya}' '{nna}' '{na}' '{ma}' '{nnna}') ] '{pulli}' delete )
176
+ or
177
+ ( test among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}' '{pulli}') ] delete )
178
+ )
179
+ )
180
+ )
158
181
  )
159
182
 
160
183
  define remove_pronoun_prefixes as (
161
- unset found_a_match
162
184
  [ among('{a}' '{i}' '{u}') among('{ka}' '{ca}' '{tha}' '{va}' '{na}' '{pa}' '{ma}' '{ya}' '{nga}' '{nya}') '{pulli}' ] delete
163
- (set found_a_match)
164
185
  do fix_va_start
165
186
  )
166
187
 
167
188
  define remove_plural_suffix as (
168
- unset found_a_match
169
189
  backwards (
170
- ( [ '{vs_u}{nga}{pulli}{ka}{lla}{pulli}' (test not among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}')) ] <- '{pulli}' ) or
171
- ( [ '{rra}{pulli}{ka}{lla}{pulli}' ] <- '{la}{pulli}' ) or
172
- ( [ '{tta}{pulli}{ka}{lla}{pulli}' ] <- '{lla}{pulli}' ) or
173
- ( [ '{ka}{lla}{pulli}' ] delete )
174
- (set found_a_match)
190
+ [substring] among (
191
+ '{vs_u}{nga}{pulli}{ka}{lla}{pulli}'
192
+ ( ( among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') <- '{vs_u}{nga}{pulli}' )
193
+ or <- '{pulli}' )
194
+ '{rra}{pulli}{ka}{lla}{pulli}'
195
+ ( <- '{la}{pulli}' )
196
+ '{tta}{pulli}{ka}{lla}{pulli}'
197
+ ( <- '{lla}{pulli}' )
198
+ '{ka}{lla}{pulli}'
199
+ ( delete )
200
+ )
175
201
  )
176
202
  )
177
203
 
178
204
  define remove_question_suffixes as (
179
205
  has_min_length
180
- unset found_a_match
181
206
  backwards (
182
207
  do (
183
208
  [ among('{vs_oo}' '{vs_ee}' '{vs_aa}') ] <- '{pulli}'
184
- (set found_a_match)
185
209
  )
186
210
  )
187
211
  do fix_endings
@@ -189,18 +213,14 @@ define remove_question_suffixes as (
189
213
 
190
214
  define remove_command_suffixes as (
191
215
  has_min_length
192
- unset found_a_match
193
216
  backwards (
194
217
  [ among('{pa}{vs_i}' '{va}{vs_i}') ] delete
195
- (set found_a_match)
196
218
  )
197
219
  )
198
220
 
199
221
  define remove_um as (
200
- unset found_a_match
201
222
  has_min_length
202
223
  backwards ( [ '{vs_u}{ma}{pulli}' ] <- '{pulli}'
203
- (set found_a_match)
204
224
  )
205
225
  do fix_ending
206
226
  )
@@ -209,27 +229,28 @@ define remove_common_word_endings as (
209
229
  // These are not suffixes actually but are
210
230
  // some words that are attached to other words
211
231
  // but can be removed for stemming
212
- unset found_a_match
213
232
  has_min_length
214
233
  backwards (
215
- test ( [ '{vs_u}{tta}{nnna}{pulli}' or
216
- '{vs_i}{la}{pulli}{la}{vs_ai}' or
217
- '{vs_i}{tta}{ma}{pulli}' or
218
- '{vs_i}{nnna}{pulli}{rra}{vs_i}' or
219
- '{vs_aa}{ka}{vs_i}' or
220
- '{vs_aa}{ka}{vs_i}{ya}' or
221
- '{vs_e}{nnna}{pulli}{rra}{vs_u}' or
222
- '{vs_u}{lla}{pulli}{lla}' or
223
- '{vs_u}{tta}{vs_ai}{ya}' or
224
- '{vs_u}{tta}{vs_ai}' or
225
- '{vs_e}{nnna}{vs_u}{ma}{pulli}' or
226
- ('{la}{pulli}{la}' test (not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
227
- '{vs_e}{nnna}' or
228
- '{vs_aa}{ka}{vs_i}' ] <- '{pulli}'
229
- (set found_a_match)
230
- )
231
- or
232
- test ( [ among('{pa}{tta}{vs_u}'
234
+ [substring] among (
235
+ '{vs_u}{tta}{nnna}{pulli}'
236
+ '{vs_i}{la}{pulli}{la}{vs_ai}'
237
+ '{vs_i}{tta}{ma}{pulli}'
238
+ '{vs_i}{nnna}{pulli}{rra}{vs_i}'
239
+ '{vs_aa}{ka}{vs_i}'
240
+ '{vs_aa}{ka}{vs_i}{ya}'
241
+ '{vs_e}{nnna}{pulli}{rra}{vs_u}'
242
+ '{vs_u}{lla}{pulli}{lla}'
243
+ '{vs_u}{tta}{vs_ai}{ya}'
244
+ '{vs_u}{tta}{vs_ai}'
245
+ '{vs_e}{nnna}{vs_u}{ma}{pulli}'
246
+ '{vs_e}{nnna}'
247
+ ( <- '{pulli}' )
248
+ '{la}{pulli}{la}'
249
+ (
250
+ not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
251
+ <- '{pulli}'
252
+ )
253
+ '{pa}{tta}{vs_u}'
233
254
  '{pa}{tta}{pulli}{tta}'
234
255
  '{pa}{tta}{pulli}{tta}{vs_u}'
235
256
  '{pa}{tta}{pulli}{tta}{ta}{vs_u}'
@@ -241,60 +262,69 @@ define remove_common_word_endings as (
241
262
  '{pa}{tta}{vs_i}{ta}{vs_aa}{nnna}'
242
263
  '{pa}{tta}{vs_i}'
243
264
  '{ta}{vs_aa}{nnna}'
244
- '{vs_e}{la}{pulli}{la}{vs_aa}{ma}{pulli}')
245
- ] delete
246
- (set found_a_match)
247
- )
265
+ '{vs_e}{la}{pulli}{la}{vs_aa}{ma}{pulli}'
266
+ ( delete )
267
+ )
248
268
  )
249
269
  do fix_endings
250
270
  )
251
271
 
252
272
  define remove_vetrumai_urupukal as (
253
- unset found_a_match
254
273
  unset found_vetrumai_urupu
255
274
  has_min_length
256
275
  backwards (
257
276
  (
258
- test ( ['{nnna}{vs_ai}'] delete )
259
- or
260
- test ([ ( '{vs_i}{nnna}{vs_ai}' or
261
- '{vs_ai}' (test not among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}'))) or
262
- ( '{vs_ai}' (test (among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') '{pulli}')))
263
- ] <- '{pulli}'
264
- )
265
- or
266
- test ( [
267
- '{vs_o}{tta}{vs_u}' or
268
- '{vs_oo}{tta}{vs_u}' or
269
- '{vs_i}{la}{pulli}' or
270
- '{vs_i}{rra}{pulli}' or
271
- ('{vs_i}{nnna}{pulli}' (test not '{ma}')) or
272
- '{vs_i}{nnna}{pulli}{rra}{vs_u}' or
273
- '{vs_i}{ra}{vs_u}{na}{pulli}{ta}{vs_u}' or
274
- '{va}{vs_i}{tta}' or
275
- ($(len >= 7) '{vs_i}{tta}{ma}{pulli}') or
276
- '{vs_aa}{la}{pulli}' or
277
- '{vs_u}{tta}{vs_ai}' or
278
- '{vs_aa}{ma}{la}{pulli}' or
279
- ('{la}{pulli}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
280
- '{vs_u}{lla}{pulli}'
281
- ] <- '{pulli}'
282
- )
283
- or
284
- test ( [
285
- '{ka}{nna}{pulli}' or
286
- '{ma}{vs_u}{nnna}{pulli}' or
287
- '{ma}{vs_ee}{la}{pulli}' or
288
- '{ma}{vs_ee}{rra}{pulli}' or
289
- '{ka}{vs_ii}{llla}{pulli}' or
290
- '{pa}{vs_i}{nnna}{pulli}' or
291
- ('{ta}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')))
292
- ] delete
277
+ test (
278
+ [substring] among (
279
+ '{nnna}{vs_ai}'
280
+ ( delete )
281
+ '{vs_o}{tta}{vs_u}'
282
+ '{vs_oo}{tta}{vs_u}'
283
+ '{vs_i}{la}{pulli}'
284
+ '{vs_i}{rra}{pulli}'
285
+ '{vs_i}{nnna}{pulli}{rra}{vs_u}'
286
+ '{vs_i}{ra}{vs_u}{na}{pulli}{ta}{vs_u}'
287
+ '{va}{vs_i}{tta}'
288
+ '{vs_aa}{la}{pulli}'
289
+ '{vs_u}{tta}{vs_ai}'
290
+ '{vs_aa}{ma}{la}{pulli}'
291
+ '{vs_u}{lla}{pulli}'
292
+ ( <- '{pulli}' )
293
+ '{vs_i}{nnna}{pulli}'
294
+ ( not '{ma}' <- '{pulli}' )
295
+ '{vs_i}{tta}{ma}{pulli}'
296
+ ( $(len >= 7) <- '{pulli}' )
297
+ '{la}{pulli}'
298
+ (
299
+ not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
300
+ <- '{pulli}'
301
+ )
302
+ '{ka}{nna}{pulli}'
303
+ '{ma}{vs_u}{nnna}{pulli}'
304
+ '{ma}{vs_ee}{la}{pulli}'
305
+ '{ma}{vs_ee}{rra}{pulli}'
306
+ '{ka}{vs_ii}{llla}{pulli}'
307
+ (delete)
308
+ '{ta}{vs_u}'
309
+ (
310
+ not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
311
+ delete
312
+ )
313
+ '{vs_ii}'
314
+ ( <- '{vs_i}' )
293
315
  )
316
+ )
294
317
  or
295
- test ([ '{vs_ii}' ] <- '{vs_i}')
318
+ test (
319
+ [ '{vs_ai}'
320
+ (
321
+ (not among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}'))
322
+ or
323
+ (test (among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') '{pulli}'))
324
+ )
325
+ ] <- '{pulli}'
326
+ )
296
327
  )
297
- (set found_a_match)
298
328
  (set found_vetrumai_urupu)
299
329
  do ( [ '{vs_i}{nnna}{pulli}' ] <- '{pulli}' )
300
330
  )
@@ -302,76 +332,76 @@ define remove_vetrumai_urupukal as (
302
332
  )
303
333
 
304
334
  define remove_tense_suffixes as (
305
- set found_a_match
306
- repeat ( found_a_match (do remove_tense_suffix) )
335
+ repeat remove_tense_suffix
307
336
  )
308
337
 
338
+ // Gives signal t if a tense suffix was removed, signal f otherwise.
309
339
  define remove_tense_suffix as (
310
340
  unset found_a_match
311
341
  has_min_length
312
342
  backwards (
313
343
  do (
314
- test ( [among(
315
- '{ka}{vs_o}{nna}{pulli}{tta}{vs_i}{ra}{pulli}'
316
- '{pa}{tta}{vs_u}'
317
- )] delete
318
- (set found_a_match)
319
- )
320
- or
321
- test ( [
322
- '{ma}{vs_aa}{ra}{pulli}' or
323
- '{ma}{vs_i}{nnna}{pulli}' or
324
- '{nnna}{nnna}{pulli}' or
325
- '{nnna}{vs_aa}{nnna}{pulli}' or
326
- '{nnna}{vs_aa}{lla}{pulli}' or
327
- '{nnna}{vs_aa}{ra}{pulli}' or
328
- ('{va}{nnna}{pulli}' test (not among('{a}' '{aa}' '{i}' '{ii}' '{u}' '{uu}' '{e}' '{ee}' '{ai}' '{o}' '{oo}' '{au}')) ) or
329
- '{nnna}{lla}{pulli}' or
330
- '{va}{lla}{pulli}' or
331
- '{nnna}{ra}{pulli}' or
332
- '{va}{ra}{pulli}' or
333
- '{nnna}' or '{pa}' or '{ka}' or '{ta}' or '{ya}' or
334
- '{pa}{nnna}{pulli}' or
335
- '{pa}{lla}{pulli}' or
336
- '{pa}{ra}{pulli}' or
337
- ('{ta}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
338
- '{vs_i}{rra}{pulli}{rra}{vs_u}' or
339
- '{pa}{ma}{pulli}' or
340
- '{nnna}{ma}{pulli}' or
341
- '{ta}{vs_u}{ma}{pulli}' or
342
- '{rra}{vs_u}{ma}{pulli}' or
343
- '{ka}{vs_u}{ma}{pulli}' or
344
- '{nnna}{vs_e}{nnna}{pulli}' or
345
- '{nnna}{vs_ai}' or
346
- '{va}{vs_ai}'
347
- ] delete
348
- (set found_a_match)
349
- )
350
- or
351
- test ( [
352
- ('{vs_aa}{nnna}{pulli}' test (not '{ca}')) or
353
- '{vs_aa}{lla}{pulli}' or
354
- '{vs_aa}{ra}{pulli}' or
355
- '{vs_ee}{nnna}{pulli}' or
356
- '{vs_aa}' or
357
- '{vs_aa}{ma}{pulli}' or
358
- '{vs_e}{ma}{pulli}' or
359
- '{vs_ee}{ma}{pulli}' or
360
- '{vs_oo}{ma}{pulli}' or
361
- '{ka}{vs_u}{ma}{pulli}' or
362
- '{ta}{vs_u}{ma}{pulli}' or
363
- '{tta}{vs_u}{ma}{pulli}' or
364
- '{rra}{vs_u}{ma}{pulli}' or
365
- '{vs_aa}{ya}{pulli}' or
366
- '{nnna}{vs_e}{nnna}{pulli}' or
367
- '{nnna}{vs_i}{ra}{pulli}' or
368
- '{vs_ii}{ra}{pulli}' or
369
- '{vs_ii}{ya}{ra}{pulli}'
370
- ] <- '{pulli}'
371
- (set found_a_match)
344
+ test (
345
+ [substring] among (
346
+ '{ka}{vs_o}{nna}{pulli}{tta}{vs_i}{ra}{pulli}'
347
+ '{pa}{tta}{vs_u}'
348
+ '{ma}{vs_aa}{ra}{pulli}'
349
+ '{ma}{vs_i}{nnna}{pulli}'
350
+ '{nnna}{nnna}{pulli}'
351
+ '{nnna}{vs_aa}{nnna}{pulli}'
352
+ '{nnna}{vs_aa}{lla}{pulli}'
353
+ '{nnna}{vs_aa}{ra}{pulli}'
354
+ '{nnna}{lla}{pulli}'
355
+ '{va}{lla}{pulli}'
356
+ '{nnna}{ra}{pulli}'
357
+ '{va}{ra}{pulli}'
358
+ '{nnna}'
359
+ '{pa}'
360
+ '{ka}'
361
+ '{ta}'
362
+ '{ya}'
363
+ '{pa}{nnna}{pulli}'
364
+ '{pa}{lla}{pulli}'
365
+ '{pa}{ra}{pulli}'
366
+ '{vs_i}{rra}{pulli}{rra}{vs_u}'
367
+ '{pa}{ma}{pulli}'
368
+ '{nnna}{ma}{pulli}'
369
+ '{ta}{vs_u}{ma}{pulli}'
370
+ '{rra}{vs_u}{ma}{pulli}'
371
+ '{ka}{vs_u}{ma}{pulli}'
372
+ '{nnna}{vs_e}{nnna}{pulli}'
373
+ '{nnna}{vs_ai}'
374
+ '{va}{vs_ai}'
375
+ ( delete )
376
+ '{va}{nnna}{pulli}'
377
+ (
378
+ not among('{a}' '{aa}' '{i}' '{ii}' '{u}' '{uu}' '{e}' '{ee}' '{ai}' '{o}' '{oo}' '{au}')
379
+ delete
380
+ )
381
+ '{ta}{vs_u}'
382
+ (
383
+ not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')
384
+ delete
385
+ )
386
+ '{vs_aa}{nnna}{pulli}'
387
+ ( not '{ca}' <- '{pulli}' )
388
+ '{vs_aa}{lla}{pulli}'
389
+ '{vs_aa}{ra}{pulli}'
390
+ '{vs_ee}{nnna}{pulli}'
391
+ '{vs_aa}'
392
+ '{vs_aa}{ma}{pulli}'
393
+ '{vs_e}{ma}{pulli}'
394
+ '{vs_ee}{ma}{pulli}'
395
+ '{vs_oo}{ma}{pulli}'
396
+ '{tta}{vs_u}{ma}{pulli}'
397
+ '{vs_aa}{ya}{pulli}'
398
+ '{nnna}{vs_i}{ra}{pulli}'
399
+ '{vs_ii}{ra}{pulli}'
400
+ '{vs_ii}{ya}{ra}{pulli}'
401
+ ( <- '{pulli}' )
402
+ '{ka}{vs_u}'
403
+ ( test '{pulli}' delete )
372
404
  )
373
- or
374
- test ( ([ '{ka}{vs_u}' or '{ta}{vs_u}' ) (test '{pulli}') ] delete
375
405
  (set found_a_match)
376
406
  )
377
407
  )
@@ -387,6 +417,7 @@ define remove_tense_suffix as (
387
417
  )
388
418
  )
389
419
  do fix_endings
420
+ found_a_match
390
421
  )
391
422
 
392
423
  define stem as (