mittens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +3 -3
  4. data/lib/mittens/version.rb +1 -1
  5. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  6. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  7. data/vendor/snowball/GNUmakefile +194 -136
  8. data/vendor/snowball/NEWS +798 -3
  9. data/vendor/snowball/README.rst +50 -1
  10. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  11. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  12. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  13. data/vendor/snowball/algorithms/basque.sbl +4 -19
  14. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  15. data/vendor/snowball/algorithms/danish.sbl +1 -1
  16. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  17. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  18. data/vendor/snowball/algorithms/english.sbl +52 -37
  19. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  20. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  21. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  22. data/vendor/snowball/algorithms/french.sbl +42 -16
  23. data/vendor/snowball/algorithms/german.sbl +35 -14
  24. data/vendor/snowball/algorithms/greek.sbl +76 -76
  25. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  26. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  27. data/vendor/snowball/algorithms/italian.sbl +11 -21
  28. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  29. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  30. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  31. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  32. data/vendor/snowball/algorithms/porter.sbl +2 -2
  33. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  34. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  35. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  36. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  37. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  38. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  39. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  40. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  41. data/vendor/snowball/compiler/analyser.c +445 -192
  42. data/vendor/snowball/compiler/driver.c +109 -101
  43. data/vendor/snowball/compiler/generator.c +853 -464
  44. data/vendor/snowball/compiler/generator_ada.c +404 -366
  45. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  46. data/vendor/snowball/compiler/generator_go.c +323 -254
  47. data/vendor/snowball/compiler/generator_java.c +326 -252
  48. data/vendor/snowball/compiler/generator_js.c +362 -252
  49. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  50. data/vendor/snowball/compiler/generator_python.c +257 -240
  51. data/vendor/snowball/compiler/generator_rust.c +423 -251
  52. data/vendor/snowball/compiler/header.h +117 -71
  53. data/vendor/snowball/compiler/space.c +137 -68
  54. data/vendor/snowball/compiler/syswords.h +2 -2
  55. data/vendor/snowball/compiler/tokeniser.c +125 -107
  56. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  57. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  58. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  59. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  60. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  61. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  62. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  63. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  64. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  65. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  66. data/vendor/snowball/examples/stemwords.c +12 -12
  67. data/vendor/snowball/go/env.go +107 -31
  68. data/vendor/snowball/go/util.go +0 -4
  69. data/vendor/snowball/include/libstemmer.h +4 -0
  70. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  71. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  72. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  73. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  74. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  75. data/vendor/snowball/javascript/stemwords.js +3 -6
  76. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  77. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  78. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  79. data/vendor/snowball/libstemmer/modules.txt +13 -10
  80. data/vendor/snowball/libstemmer/test.c +1 -1
  81. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  82. data/vendor/snowball/pascal/generate.pl +13 -13
  83. data/vendor/snowball/python/create_init.py +4 -1
  84. data/vendor/snowball/python/setup.cfg +0 -3
  85. data/vendor/snowball/python/setup.py +8 -3
  86. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  87. data/vendor/snowball/python/stemwords.py +8 -12
  88. data/vendor/snowball/runtime/api.c +10 -5
  89. data/vendor/snowball/runtime/header.h +10 -9
  90. data/vendor/snowball/runtime/utilities.c +9 -9
  91. data/vendor/snowball/rust/build.rs +1 -1
  92. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  93. data/vendor/snowball/tests/stemtest.c +7 -4
  94. metadata +7 -7
  95. data/vendor/snowball/.travis.yml +0 -112
  96. data/vendor/snowball/algorithms/german2.sbl +0 -145
  97. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  98. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -8,15 +8,15 @@ routines (
8
8
  R1 R2
9
9
  Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5
10
10
  exception1
11
- exception2
12
11
  )
13
12
 
14
13
  externals ( stem )
15
14
 
16
- groupings ( v v_WXY valid_LI )
15
+ groupings ( aeo v v_WXY valid_LI )
17
16
 
18
17
  stringescapes {}
19
18
 
19
+ define aeo 'aeo'
20
20
  define v 'aeiouy'
21
21
  define v_WXY v + 'wxY'
22
22
 
@@ -34,9 +34,14 @@ define mark_regions as (
34
34
  $p2 = limit
35
35
  do(
36
36
  among (
37
- 'gener'
38
- 'commun' // added May 2005
39
- 'arsen' // added Nov 2006 (arsenic/arsenal)
37
+ 'gener' // generate/general/generic/generous
38
+ 'commun' // communication/communism/community
39
+ 'arsen' // arsenic/arsenal
40
+ 'past' // past/paste
41
+ 'univers' // universe/universal/university
42
+ 'later' // lateral/later
43
+ 'emerg' // emerge/emergency
44
+ 'organ' // organ/organic/organize
40
45
  // ... extensions possible here ...
41
46
  ) or (gopast v gopast non-v)
42
47
  setmark p1
@@ -50,6 +55,8 @@ backwardmode (
50
55
  ( non-v_WXY v non-v )
51
56
  or
52
57
  ( non-v v atlimit )
58
+ or
59
+ ( 'past' ) // pasted/pasting
53
60
  )
54
61
 
55
62
  define R1 as $p1 <= cursor
@@ -74,19 +81,44 @@ backwardmode (
74
81
  define Step_1b as (
75
82
  [substring] among (
76
83
  'eed' 'eedly'
77
- (R1 <-'ee')
78
- 'ed' 'edly' 'ing' 'ingly'
79
84
  (
80
- test gopast v delete
81
- test substring among(
85
+ do (
86
+ among (
87
+ 'proc' 'exc' 'succ'
88
+ (atlimit)
89
+ ) or (
90
+ R1 <-'ee'
91
+ )
92
+ )
93
+ )
94
+ 'ed' 'edly' 'ingly'
95
+ (false) // Handled below.
96
+ 'ing'
97
+ ( // Handle exceptional cases here, rest handled below.
98
+ among (
99
+ // dying->die, lying->die, tying->tie, vying->vie
100
+ 'y'
101
+ (test(non-v atlimit) ] <-'ie')
102
+ // Leave inning, outing, etc along.
103
+ 'inn' 'out' 'cann' 'herr' 'earr' 'even'
104
+ (atlimit)
105
+ )
106
+ )
107
+ '' ()
108
+ ) or (
109
+ // Handle 'ed' 'edly' 'ing' 'ingly'
110
+ test gopast v delete
111
+ [] test (
112
+ substring among(
82
113
  'at' 'bl' 'iz'
83
- (<+ 'e')
114
+ (fail(<- 'e'))
84
115
  'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
85
116
  // ignoring double c, h, j, k, q, v, w, and x
86
- ([next] delete)
87
- '' (atmark p1 test shortv <+ 'e')
117
+ (not (aeo atlimit))
118
+ '' (fail(atmark p1 test shortv <- 'e'))
88
119
  )
89
120
  )
121
+ [next] delete
90
122
  )
91
123
  )
92
124
 
@@ -116,6 +148,7 @@ backwardmode (
116
148
  (<-'ive')
117
149
  'biliti' 'bli'
118
150
  (<-'ble')
151
+ 'ogist' (<-'og')
119
152
  'ogi' ('l' <-'og')
120
153
  'fulli' (<-'ful')
121
154
  'lessli' (<-'less')
@@ -133,7 +166,7 @@ backwardmode (
133
166
  'ful' 'ness'
134
167
  (delete)
135
168
  'ative'
136
- (R2 delete) // 'R2' added Dec 2001
169
+ (R2 delete)
137
170
  )
138
171
  )
139
172
 
@@ -152,17 +185,6 @@ backwardmode (
152
185
  'l' (R2 'l' delete)
153
186
  )
154
187
  )
155
-
156
- define exception2 as (
157
-
158
- [substring] atlimit among(
159
- 'inning' 'outing' 'canning' 'herring' 'earring'
160
- 'proceed' 'exceed' 'succeed'
161
-
162
- // ... extensions possible here ...
163
-
164
- )
165
- )
166
188
  )
167
189
 
168
190
  define exception1 as (
@@ -171,11 +193,7 @@ define exception1 as (
171
193
 
172
194
  /* special changes: */
173
195
 
174
- 'skis' (<-'ski')
175
196
  'skies' (<-'sky')
176
- 'dying' (<-'die')
177
- 'lying' (<-'lie')
178
- 'tying' (<-'tie')
179
197
 
180
198
  /* special -LY cases */
181
199
 
@@ -212,17 +230,14 @@ define stem as (
212
230
 
213
231
  do Step_1a
214
232
 
215
- exception2 or (
216
-
217
- do Step_1b
218
- do Step_1c
233
+ do Step_1b
234
+ do Step_1c
219
235
 
220
- do Step_2
221
- do Step_3
222
- do Step_4
236
+ do Step_2
237
+ do Step_3
238
+ do Step_4
223
239
 
224
- do Step_5
225
- )
240
+ do Step_5
226
241
  )
227
242
  do postlude
228
243
  )
@@ -0,0 +1,157 @@
1
+ booleans ( foreign )
2
+
3
+ routines (
4
+ canonical_form
5
+ correlative
6
+ final_apostrophe
7
+ initial_apostrophe
8
+ long_word
9
+ merged_numeral
10
+ not_after_letter
11
+ pronoun
12
+ standard_suffix
13
+ ujn_suffix
14
+ uninflected
15
+ )
16
+
17
+ externals ( stem )
18
+
19
+ groupings ( vowel aou digit )
20
+
21
+ define vowel 'aeiou'
22
+ define aou 'aou'
23
+ define digit '0123456789'
24
+
25
+ stringescapes {}
26
+
27
+ stringdef c^ '{U+0109}'
28
+ stringdef g^ '{U+011D}'
29
+ stringdef h^ '{U+0125}'
30
+ stringdef j^ '{U+0135}'
31
+ stringdef s^ '{U+015D}'
32
+ stringdef u+ '{U+016D}'
33
+
34
+ stringdef a' '{U+00E1}'
35
+ stringdef e' '{U+00E9}'
36
+ stringdef i' '{U+00ED}'
37
+ stringdef o' '{U+00F3}'
38
+ stringdef u' '{U+00FA}'
39
+
40
+ define canonical_form as (
41
+ unset foreign
42
+ repeat (
43
+ [substring]
44
+ among(
45
+ 'cx' (<- '{c^}')
46
+ 'gx' (<- '{g^}')
47
+ 'hx' (<- '{h^}')
48
+ 'jx' (<- '{j^}')
49
+ 'sx' (<- '{s^}')
50
+ 'ux' (<- '{u+}')
51
+ '{a'}' (<- 'a' set foreign)
52
+ '{e'}' (<- 'e' set foreign)
53
+ '{i'}' (<- 'i' set foreign)
54
+ '{o'}' (<- 'o' set foreign)
55
+ '{u'}' (<- 'u' set foreign)
56
+ 'q' 'w' 'x' 'y' (set foreign)
57
+ '-' (unset foreign)
58
+ '' (next)
59
+ )
60
+ )
61
+ not foreign
62
+ )
63
+
64
+ define initial_apostrophe as (
65
+ ['{'}'] 'st' among('as' 'i' 'is' 'os' 'u' 'us') atlimit <- 'e'
66
+ )
67
+
68
+ backwardmode (
69
+ define pronoun as (
70
+ [try 'n']
71
+ among(
72
+ 'ci' 'gi' '{g^}i' 'hi' 'ili' 'i{s^}i' 'ivi' 'li' 'mal{s^}i' 'mi' 'ni'
73
+ 'oni' 'ri' 'si' '{s^}i' '{s^}li' 'vi'
74
+ )
75
+ (atlimit or '-') delete
76
+ )
77
+
78
+ define final_apostrophe as (
79
+ ['{'}']
80
+ ('l' atlimit <- 'a') or
81
+ ('un' atlimit <- 'u') or
82
+ (
83
+ among(
84
+ 'adi' 'almen' 'amb' 'ank' 'ankor' 'anstat' 'anta{u+}hier' 'apen'
85
+ 'bald' '{c^}irk' 'hier' 'hodi' 'kontr' 'kvaz' 'malbald' 'malgr'
86
+ 'morg' 'postmorg' 'presk' 'tut{c^}irk'
87
+ ) (atlimit or '-') <- 'a{u+}'
88
+ ) or
89
+ (<- 'o')
90
+ )
91
+
92
+ define ujn_suffix as (
93
+ [try 'n' try 'j'] among('aliu' 'unu') (atlimit or '-') delete
94
+ )
95
+
96
+ define uninflected as (
97
+ among(
98
+ 'aha' 'amen' 'dirlididi' 'disde' 'ehe' 'ekde' 'elde' 'haha'
99
+ 'haleluja' 'hola' 'hosana' 'hura' '{h^}a{h^}a' 'mal{c^}i' 'malkaj'
100
+ 'malpli' 'maltra' 'maltre' 'maltro' 'minus' 'muu' 'oho' 'tamen'
101
+ 'uhu'
102
+ )
103
+ (atlimit or '-')
104
+ )
105
+
106
+ define merged_numeral as (
107
+ among('du' 'tri' 'unu')
108
+ among('cent' 'dek')
109
+ )
110
+
111
+ define correlative as (
112
+ []
113
+ // Ignore -al, -am, etc. since they can't be confused with suffixes.
114
+ test (
115
+ ((try 'n'] 'e') or (try 'n' try 'j'] aou))
116
+ 'i'
117
+ try among('{c^}' 'k' 'kelk' 'mult' 'nen' 'samt' 't')
118
+ (atlimit or '-')
119
+ )
120
+ delete
121
+ )
122
+
123
+ define long_word as (
124
+ loop 2 gopast vowel or (gopast '-' next) or gopast digit
125
+ )
126
+
127
+ define not_after_letter as ('-' or digit)
128
+
129
+ define standard_suffix as (
130
+ [substring try '-']
131
+ among(
132
+ 'a' 'aj' 'ajn' 'an'
133
+ 'e' 'en'
134
+ 'i' 'as' 'is' 'os' 'u' 'us'
135
+ 'o' 'oj' 'ojn' 'on'
136
+ 'j' not_after_letter
137
+ 'jn' not_after_letter
138
+ 'n' not_after_letter
139
+ )
140
+ delete
141
+ )
142
+ )
143
+
144
+ define stem as (
145
+ test canonical_form
146
+ do initial_apostrophe
147
+ backwards (
148
+ not pronoun
149
+ do final_apostrophe
150
+ not correlative
151
+ not uninflected
152
+ not merged_numeral
153
+ not ujn_suffix
154
+ test long_word
155
+ standard_suffix
156
+ )
157
+ )
@@ -0,0 +1,269 @@
1
+ /* Estonian stemmer
2
+
3
+ Made by Linda Freienthal in January 2019.
4
+
5
+ */
6
+
7
+ routines (
8
+ mark_regions
9
+ LONGV
10
+ special_noun_endings
11
+ case_ending
12
+ emphasis
13
+ plural_three_first_cases
14
+ undouble_kpt
15
+ i_plural
16
+ degrees
17
+ substantive
18
+ verb_exceptions
19
+ verb
20
+ nu
21
+ )
22
+
23
+ stringescapes {}
24
+
25
+ stringdef a" '{U+00E4}' //a-umlaut ä
26
+ stringdef o" '{U+00F6}' //o-umlaut ö
27
+ stringdef o~ '{U+00F5}' //o with tilde õ
28
+ stringdef u" '{U+00FC}' //u-umlaut ü
29
+ stringdef sv '{U+0161}' //s-caron š
30
+ stringdef zv '{U+017E}' //z-caron ž
31
+
32
+ externals ( stem )
33
+ integers ( p1 )
34
+ groupings ( V1 RV KI GI)
35
+
36
+ define V1 'aeiou{o~}{a"}{o"}{u"}'
37
+ define RV 'aeiuo'
38
+ define KI 'kptgbdshf{sv}z{zv}'
39
+ define GI 'cjlmnqrvwxaeiou{o~}{a"}{o"}{u"}'
40
+
41
+ define mark_regions as (
42
+
43
+ $p1 = limit
44
+
45
+ gopast V1 gopast non-V1 setmark p1
46
+ )
47
+
48
+
49
+ backwardmode (
50
+
51
+ define emphasis as (
52
+ setlimit tomark p1 for ([substring])
53
+ test hop 4 //kingi -> kingi
54
+ among(
55
+ 'gi' ((GI and not LONGV) delete) //jookse-me-gi, bioloogi -> bioloogi
56
+ 'ki' (KI delete) //kookki -> kook
57
+ )
58
+
59
+ )
60
+
61
+ // Signals t if a replacement was made; f otherwise.
62
+ define verb as (
63
+ setlimit tomark p1 for ([substring])
64
+ among(
65
+ 'nuksin' 'nuksime' 'nuksid' 'nuksite' (delete) //seleta-nuksite
66
+ 'ksin' 'ksid' 'ksime' 'ksite' (delete) //personal conditional: rõõmusta-ksin
67
+ 'mata' (delete)
68
+ 'takse' 'dakse' (delete) //impersonal: laul-dakse, luba-takse
69
+ 'taks' 'daks' (delete) //impersonal conditional: laul-daks, saade-taks
70
+ 'akse' (<-'a') //impersonal: tulla-kse, süüa-kse (-> söö), teha-kse (-> tegi), püüta-kse, leita-kse
71
+ 'sime' (delete) //pl1pst: saat-sime
72
+ 'site' (delete) //pl2pst: saat-site
73
+ 'sin' (delete) //sg1pst: laul-sin, saat-sin
74
+ 'me' (V1 delete) //pl1prs: laula-me, tule-me
75
+ 'da' (V1 delete) //da-infinitive: luba-da
76
+ 'n' (V1 delete) //sg1prs: kirjuta-n
77
+ 'b' (V1 delete) //sg3prs: laula-b
78
+ )
79
+ )
80
+
81
+ define LONGV as
82
+ among('aa' 'ee' 'ii' 'oo' 'uu' '{a"}{a"}' '{o"}{o"}' '{u"}{u"}' '{o~}{o~}')
83
+
84
+ define i_plural as (
85
+ setlimit tomark p1 for ([substring])
86
+ among(
87
+ 'i' (RV) //raama-tu-i, lapsiku-i
88
+ )
89
+ delete
90
+ )
91
+
92
+ define special_noun_endings as (
93
+ setlimit tomark p1 for ([substring])
94
+ among(
95
+ 'lasse' (<- 'lase') //teadlasse -> teadlase
96
+ 'last' (<- 'lase') //teadlast -> teadlase
97
+ 'lane' (<- 'lase') //teadlane -> teadlase
98
+ 'lasi'(<- 'lase') //teadlasi -> teadlase
99
+ 'misse' (<- 'mise') //tegemisse -> tegemise
100
+ 'mist' (<- 'mise') //kasutamist -> kasutamise
101
+ 'mine' (<- 'mise') //tegemine -> tegemise
102
+ 'misi' (<- 'mise') //kasutamisi -> kasutamise
103
+ 'lisse' (<- 'lise') //rohelisse -> rohelise
104
+ 'list' (<- 'lise') //tavalist -> tavalise
105
+ 'line' (<- 'lise') //roheline -> rohelise
106
+ 'lisi' (<- 'lise') //tavalisi -> tavalise
107
+ )
108
+ )
109
+
110
+ define case_ending as (
111
+ setlimit tomark p1 for ([substring])
112
+ among(
113
+ 'sse' (RV or LONGV) //illative: saapa-sse
114
+ 'st' (RV or LONGV) //elative: saapa-st and kapsas-t
115
+ 'le' (RV or LONGV) //allative: raama-tu-le
116
+ 'lt' (RV or LONGV) //ablative: raama-tu-lt
117
+ 'ga' (RV or LONGV) //komitatiive: õpetaja-ga
118
+ 'ks' (RV or LONGV) //translative: õpetaja-ks
119
+ 'ta' (RV or LONGV) //abessive and da-infinitive: õpetaja-ta and hüpa-ta
120
+ 't' (test hop 4) //partitiiv, raamatu-t
121
+ 's' (RV or LONGV) //inessive and sg3pst: raama-tu-s and sõiti-s
122
+ 'l' (RV or LONGV) //adessive: raama-tu-l and kapsa-l.
123
+ )
124
+ delete
125
+ )
126
+
127
+
128
+ define plural_three_first_cases as (
129
+ setlimit tomark p1 for ([substring])
130
+ among(
131
+ 'ikkude' (<-'iku') //plural genitive: õnnelikkude -> õnneliku
132
+ 'ikke' (<-'iku') //plural partitive: rahulikke -> rahuliku
133
+ 'ike' (<-'iku') //plural genitive: ohtlike -> ohtliku
134
+ 'sid' (not LONGV delete) //plural partitive and sg2pst and pl3pst: auto-sid and laul-sid (exludes plural nominative with words like gaasid, roosid)
135
+ // plural genitive and pl2: ministri-te, oluliste -> olulise and saada-te, laula-te;
136
+ // also torte -> tort (if not in compound word) and kokkuvõtte -> kokkuvõte and roheliste -> rohelise, tegemiste -> tegemise, teadlaste -> teadlase
137
+ 'te' (
138
+ (test hop 4
139
+ among (
140
+ 'mis' 'las' 'lis' (<- 'e')
141
+ 't' ()
142
+ '' (delete)
143
+ )
144
+ ) or <- 't'
145
+ )
146
+ 'de' ((RV or LONGV) delete) //plural genitive: lauda-de
147
+ 'd' ((RV or LONGV) delete) //plural nominative: voodi-d, rattai-d (rata), lapsiku-i-d
148
+ )
149
+ )
150
+
151
+ define nu as (
152
+ setlimit tomark p1 for ([substring])
153
+ among(
154
+ 'nu' //haka-nu(-te-ga)
155
+ 'tu' //luba-tu(-d)
156
+ 'du' //laul-du(-te-st)
157
+ 'va' //laul-va(-te-le)
158
+ )
159
+ delete
160
+ )
161
+
162
+ define undouble_kpt as (
163
+ // undouble '-C1C1V' where C1 is k, p or t:
164
+ // mõtte(-le) -> mõte, hakka(-n) -> haka
165
+ //
166
+ // We only undouble if the vowel is in R1 to avoid modifying short
167
+ // non-words (mostly to avoid modifying acronyms/initialisms such
168
+ // as "PPE").
169
+ V1 $(p1 <= cursor)
170
+ [substring] among(
171
+ 'kk' (<- 'k')
172
+ 'pp' (<- 'p')
173
+ 'tt' (<- 't')
174
+ )
175
+ )
176
+
177
+ define degrees as (
178
+ setlimit tomark p1 for ([substring])
179
+ among(
180
+ 'mai' (RV delete) //heleda-mai(-le)
181
+ 'ma' (delete) //tuge-va-ma(-le) and ma-infinitive: sõit-ma
182
+ 'm' (RV delete) //kauge-i-m, rõõmsa-m
183
+ )
184
+ )
185
+
186
+ define substantive as (
187
+ do special_noun_endings
188
+ do case_ending
189
+ do plural_three_first_cases
190
+ do degrees
191
+ do i_plural
192
+ do nu
193
+ )
194
+ )
195
+
196
+
197
+ define verb_exceptions as (
198
+ [substring] atlimit
199
+ among(
200
+ 'joon' 'jood' 'joob' 'joote' 'joome' 'joovad' (<-'joo')
201
+ 'j{o~}in' 'j{o~}id' 'j{o~}i' 'j{o~}ime' 'j{o~}ite' (<-'joo')
202
+ 'joomata' 'juuakse' 'joodakse' 'juua' 'jooma' (<- 'joo')
203
+ 'saan' 'saad' 'saab' 'saate' 'saame' 'saavad' (<-'saa')
204
+ 'saaksin' 'saaksid' 'saaks' 'saaksite' 'saaksime' (<-'saa')
205
+ 'sain' 'said' 'sai' 'saite' 'saime' (<-'saa')
206
+ 'saamata' 'saadakse' 'saadi' 'saama' 'saada' (<-'saa')
207
+ 'viin' 'viid' 'viib' 'viite' 'viime' 'viivad' (<-'viima')
208
+ 'viiksin' 'viiksid' 'viiks' 'viiksite' 'viiksime' (<-'viima')
209
+ 'viisin' 'viisite' 'viisime' (<-'viima')
210
+ 'viimata' 'viiakse' 'viidi' 'viima' 'viia' (<-'viima')
211
+ 'keen' 'keeb' 'keed' 'kees' 'keeme' 'keete' 'keevad' (<-'keesi')
212
+ 'keeksin' 'keeks' 'keeksid' 'keeksime' 'keeksite' (<-'keesi')
213
+ 'keemata' 'keema' 'keeta' 'keedakse' (<-'keesi')
214
+ 'l{o"}{o"}n' 'l{o"}{o"}d' 'l{o"}{o"}b' 'l{o"}{o"}me' 'l{o"}{o"}te' 'l{o"}{o"}vad' (<-'l{o"}{o"}')
215
+ 'l{o"}{o"}ksin' 'l{o"}{o"}ksid' 'l{o"}{o"}ks' 'l{o"}{o"}ksime' 'l{o"}{o"}ksite' (<-'l{o"}{o"}')
216
+ 'l{o"}{o"}mata' 'l{u"}{u"}akse' 'l{o"}{o"}dakse' 'l{o"}{o"}di' 'l{o"}{o"}ma' 'l{u"}{u"}a' (<-'l{o"}{o"}')
217
+ // Both looma and lööma have these same past tense forms
218
+ 'l{o~}in' 'l{o~}id' 'l{o~}i' 'l{o~}ime' 'l{o~}ite' (<-'l{o~}i')
219
+ 'loon' 'lood' 'loob' 'loome' 'loote' 'loovad' (<-'loo')
220
+ 'looksin' 'looksid' 'looks' 'looksime' 'looksite' (<-'loo')
221
+ 'loomata' 'luuakse' 'loodi' 'luua' 'looma' (<-'loo')
222
+ 'k{a"}in' 'k{a"}ib' 'k{a"}id' 'k{a"}is' 'k{a"}ime' 'k{a"}ite' 'k{a"}ivad' (<-'k{a"}isi')
223
+ 'k{a"}iksin' 'k{a"}iks' 'k{a"}iksid' 'k{a"}iksime' 'k{a"}iksite' (<-'k{a"}isi')
224
+ 'k{a"}imata' 'k{a"}iakse' 'k{a"}idi' 'k{a"}ia' 'k{a"}ima' (<-'k{a"}isi')
225
+ 's{o"}{o"}n' 's{o"}{o"}b' 's{o"}{o"}d' 's{o"}{o"}me' 's{o"}{o"}te' 's{o"}{o"}vad' (<-'s{o"}{o"}')
226
+ 's{o"}{o"}ksin' 's{o"}{o"}ks' 's{o"}{o"}ksid' 's{o"}{o"}ksime' 's{o"}{o"}ksite' (<-'s{o"}{o"}')
227
+ 's{o~}in' 's{o~}i' 's{o~}id' 's{o~}ime' 's{o~}ite' (<-'s{o"}{o"}')
228
+ 's{o"}{o"}mata' 's{u"}{u"}akse' 's{o"}{o"}dakse' 's{o"}{o"}di' 's{o"}{o"}ma' 's{u"}{u"}a' (<-'s{o"}{o"}')
229
+ 'toon' 'tood' 'toob' 'toote' 'toome' 'toovad' (<-'too')
230
+ 'tooksin' 'tooksid' 'tooks' 'tooksite' 'tooksime' (<-'too')
231
+ 't{o~}in' 't{o~}id' 't{o~}i' 't{o~}ime' 't{o~}ite' (<-'too')
232
+ 'toomata' 'tuuakse' 'toodi' 'tooma' 'tuua' (<-'too')
233
+ 'v{o~}in' 'v{o~}id' 'v{o~}ib' 'v{o~}ime' 'v{o~}is' 'v{o~}ite' 'v{o~}ivad' (<-'v{o~}isi')
234
+ 'v{o~}iksin' 'v{o~}iksid' 'v{o~}iks' 'v{o~}iksime' 'v{o~}iksite' (<-'v{o~}isi')
235
+ 'v{o~}imata' 'v{o~}idakse' 'v{o~}idi' 'v{o~}ida' 'v{o~}ima' (<-'v{o~}isi')
236
+ 'j{a"}{a"}n' 'j{a"}{a"}d' 'j{a"}{a"}b' 'j{a"}{a"}me' 'j{a"}{a"}te' 'j{a"}{a"}vad' (<-'j{a"}{a"}ma')
237
+ 'j{a"}{a"}ksin' 'j{a"}{a"}ksid' 'j{a"}{a"}ks' 'j{a"}{a"}ksime' 'j{a"}{a"}ksite' (<-'j{a"}{a"}ma')
238
+ 'j{a"}ime' 'j{a"}ite' 'j{a"}in' 'j{a"}id' 'j{a"}i' (<-'j{a"}{a"}ma')
239
+ 'j{a"}{a"}mata' 'j{a"}{a"}dakse' 'j{a"}{a"}da' 'j{a"}{a"}ma' 'j{a"}{a"}di' (<-'j{a"}{a"}ma')
240
+ 'm{u"}{u"}n' 'm{u"}{u"}d' 'm{u"}{u"}b' 'm{u"}{u"}s' 'm{u"}{u"}me' 'm{u"}{u"}te' 'm{u"}{u"}vad' (<-'m{u"}{u"}si')
241
+ 'm{u"}{u"}ksin' 'm{u"}{u"}ksid' 'm{u"}{u"}ks' 'm{u"}{u"}ksime' 'm{u"}{u"}ksite' (<-'m{u"}{u"}si')
242
+ 'm{u"}{u"}mata' 'm{u"}{u"}akse' 'm{u"}{u"}di' 'm{u"}{u"}a' 'm{u"}{u"}ma' (<-'m{u"}{u"}si')
243
+ 'loeb' 'loen' 'loed' 'loeme' 'loete' 'loevad' (<- 'luge')
244
+ 'loeks' 'loeksin' 'loeksid' 'loeksime' 'loeksite' (<- 'luge')
245
+ 'p{o~}en' 'p{o~}eb' 'p{o~}ed' 'p{o~}eme' 'p{o~}ete' 'p{o~}evad' (<- 'p{o~}de')
246
+ 'p{o~}eksin' 'p{o~}eks' 'p{o~}eksid' 'p{o~}eksime' 'p{o~}eksite' (<- 'p{o~}de')
247
+ 'laon' 'laob' 'laod' 'laome' 'laote' 'laovad' (<- 'ladu')
248
+ 'laoksin' 'laoks' 'laoksid' 'laoksime' 'laoksite' (<- 'ladu')
249
+ 'teeksin' 'teeks' 'teeksid' 'teeksime' 'teeksite' (<- 'tegi')
250
+ 'teen' 'teeb' 'teed' 'teeme' 'teete' 'teevad' (<- 'tegi')
251
+ 'tegemata' 'tehakse' 'tehti' 'tegema' 'teha' (<-'tegi')
252
+ 'n{a"}en' 'n{a"}eb' 'n{a"}ed' 'n{a"}eme' 'n{a"}ete' 'n{a"}evad' (<-'n{a"}gi')
253
+ 'n{a"}eksin' 'n{a"}eks' 'n{a"}eksid' 'n{a"}eksime' 'n{a"}eksite' (<-'n{a"}gi')
254
+ 'n{a"}gemata' 'n{a"}hakse' 'n{a"}hti' 'n{a"}ha' 'n{a"}gema' (<-'n{a"}gi')
255
+ )
256
+ )
257
+
258
+
259
+ define stem as (
260
+ not verb_exceptions
261
+ // p1 isn't used by verb_exceptions
262
+ do mark_regions
263
+ backwards (
264
+ do emphasis
265
+ do ( verb or substantive )
266
+ do undouble_kpt
267
+
268
+ )
269
+ )
@@ -44,8 +44,8 @@ define mark_regions as (
44
44
  $p1 = limit
45
45
  $p2 = limit
46
46
 
47
- goto V1 gopast non-V1 setmark p1
48
- goto V1 gopast non-V1 setmark p2
47
+ gopast V1 gopast non-V1 setmark p1
48
+ gopast V1 gopast non-V1 setmark p2
49
49
  )
50
50
 
51
51
  backwardmode (
@@ -194,4 +194,3 @@ define stem as (
194
194
  do tidy
195
195
  )
196
196
  )
197
-