mittens 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,323 @@
1
+ // An implementation of "A Lightweight Stemmer for Hindi":
2
+ // http://www.kbcs.in/downloads/papers/StmmerHindi.pdf
3
+
4
+ externals ( stem )
5
+
6
+ stringescapes {}
7
+
8
+ // The transliteration scheme used for our stringdefs matches that used in the
9
+ // paper, as documented in the appendix. It appears to match the WX notation
10
+ // (https://en.wikipedia.org/wiki/WX_notation) except that WX apparently
11
+ // uses 'z' for Anunasika whereas the paper uses Mh.
12
+ //
13
+ // We discriminate dependent vowels by adding a leading "_" to their stringdef
14
+ // names (mnemonic: the _ signifies removing the implicit a from the preceding
15
+ // character).
16
+
17
+ // Vowels and sonorants:
18
+ stringdef a '{U+0905}'
19
+ stringdef A '{U+0906}'
20
+ stringdef i '{U+0907}'
21
+ stringdef I '{U+0908}'
22
+ stringdef u '{U+0909}'
23
+ stringdef U '{U+090A}'
24
+ stringdef q '{U+090B}'
25
+ stringdef e '{U+090F}'
26
+ stringdef E '{U+0910}'
27
+ stringdef o '{U+0913}'
28
+ stringdef O '{U+0914}'
29
+
30
+ // Vowel signs:
31
+ stringdef _A '{U+093E}'
32
+ stringdef _i '{U+093F}'
33
+ stringdef _I '{U+0940}'
34
+ stringdef _u '{U+0941}'
35
+ stringdef _U '{U+0942}'
36
+ stringdef _q '{U+0943}'
37
+ stringdef _e '{U+0947}'
38
+ stringdef _E '{U+0948}'
39
+ stringdef _o '{U+094B}'
40
+ stringdef _O '{U+094C}'
41
+
42
+ // Diacritics:
43
+ stringdef M '{U+0902}'
44
+ stringdef H '{U+0903}'
45
+ stringdef Mh '{U+0901}'
46
+ stringdef Z '{U+093C}' // Nukta
47
+ stringdef virama '{U+094D}'
48
+
49
+ // Velar consonants:
50
+ stringdef k '{U+0915}'
51
+ stringdef K '{U+0916}'
52
+ stringdef g '{U+0917}'
53
+ stringdef G '{U+0918}'
54
+ stringdef f '{U+0919}'
55
+
56
+ // Palatal consonants:
57
+ stringdef c '{U+091A}'
58
+ stringdef C '{U+091B}'
59
+ stringdef j '{U+091C}'
60
+ stringdef J '{U+091D}'
61
+ stringdef F '{U+091E}'
62
+
63
+ // Retroflex consonants:
64
+ stringdef t '{U+091F}'
65
+ stringdef T '{U+0920}'
66
+ stringdef d '{U+0921}'
67
+ stringdef D '{U+0922}'
68
+ stringdef N '{U+0923}'
69
+
70
+ // Dental consonants:
71
+ stringdef w '{U+0924}'
72
+ stringdef W '{U+0925}'
73
+ stringdef x '{U+0926}'
74
+ stringdef X '{U+0927}'
75
+ stringdef n '{U+0928}'
76
+
77
+ // Labial consonants:
78
+ stringdef p '{U+092A}'
79
+ stringdef P '{U+092B}'
80
+ stringdef b '{U+092C}'
81
+ stringdef B '{U+092D}'
82
+ stringdef m '{U+092E}'
83
+
84
+ // Semi-vowels:
85
+ stringdef y '{U+092F}'
86
+ stringdef r '{U+0930}'
87
+ stringdef l '{U+0932}'
88
+ stringdef v '{U+0935}'
89
+
90
+ // Fricatives:
91
+ stringdef S '{U+0936}'
92
+ stringdef R '{U+0937}'
93
+ stringdef s '{U+0938}'
94
+ stringdef h '{U+0939}'
95
+
96
+ stringdef lY '{U+0933}'
97
+
98
+ // Precomposed characters - letters + nukta:
99
+ stringdef nZ '{U+0929}' // ≡ {n}{Z}
100
+ stringdef rZ '{U+0931}' // ≡ {r}{Z}
101
+ stringdef lYZ '{U+0934}' // ≡ {lY}{Z}
102
+ stringdef kZ '{U+0958}' // ≡ {k}{Z}
103
+ stringdef KZ '{U+0959}' // ≡ {K}{Z}
104
+ stringdef gZ '{U+095A}' // ≡ {g}{Z}
105
+ stringdef jZ '{U+095B}' // ≡ {j}{Z}
106
+ stringdef dZ '{U+095C}' // ≡ {d}{Z}
107
+ stringdef DZ '{U+095D}' // ≡ {D}{Z}
108
+ stringdef PZ '{U+095E}' // ≡ {P}{Z}
109
+ stringdef yZ '{U+095F}' // ≡ {y}{Z}
110
+
111
+ groupings ( consonant )
112
+
113
+ routines ( CONSONANT )
114
+
115
+ define consonant '{k}{K}{g}{G}{f}' +
116
+ '{c}{C}{j}{J}{F}' +
117
+ '{t}{T}{d}{D}{N}' +
118
+ '{w}{W}{x}{X}{n}' +
119
+ '{p}{P}{b}{B}{m}' +
120
+ '{y}{r}{l}{v}' +
121
+ '{S}{R}{s}{h}' +
122
+ '{lY}' +
123
+ '{Z}' + // Nukta
124
+ // Precomposed characters - letter and nukta:
125
+ '{nZ}{rZ}{lYZ}{kZ}{KZ}{gZ}{jZ}{dZ}{DZ}{PZ}{yZ}'
126
+
127
+ backwardmode ( define CONSONANT as ( consonant ) )
128
+
129
+ define stem as (
130
+ // We assume in this implementation that the whole word doesn't count
131
+ // as a valid suffix to remove, so we remove the longest suffix from
132
+ // the list which leaves at least one character. This change affects
133
+ // 47 words out of the 65,140 in the sample vocabulary from Hindi
134
+ // wikipedia.
135
+ //
136
+ // The trick here is we use `next` in forward mode to advance the cursor
137
+ // to the second character, then `backwards` swaps the cursor and limit.
138
+ next
139
+ backwards (
140
+ [substring] among (
141
+ // The list below is derived from figure 3 in the paper.
142
+ //
143
+ // We perform the stemming on the Devanagari characters rather than
144
+ // transliterating to Latin, so we have adapted the list below to
145
+ // reflect this by converting suffixes back to Devanagari as
146
+ // follows:
147
+ //
148
+ // * within the suffixes, "a" after a consonant is dropped since
149
+ // consonants have an implicit "a".
150
+ //
151
+ // * within the suffixes, a vowel other than "a" after a consonant
152
+ // is a dependent vowel (vowel sign); a vowel (including "a")
153
+ // after a non-consonant is an independent vowel.
154
+ //
155
+ // * to allow the vowel at the start of each suffix being dependent
156
+ // or independent, we include each suffix twice. For the
157
+ // dependent version, a leading "a" is dropped and we check that
158
+ // the suffix is preceded by a consonant (which will have an
159
+ // implicit "a").
160
+ //
161
+ // * we add '{a}', which is needed for the example given right at
162
+ // the end of section 5 to work (conflating BarawIya and
163
+ // BarawIyawA), and which 3.1 a.v strongly suggests should be in
164
+ // the list:
165
+ //
166
+ // Thus, the following suffix deletions (longest possible
167
+ // match) are required to reduce inflected forms of masculine
168
+ // nouns to a common stem:
169
+ // a A i [...]
170
+ //
171
+ // Adding '{a}' only affect 2 words out of the 65,140 in the
172
+ // sample vocabulary.
173
+ //
174
+ // * The transliterations of our stems would end with "a" when our
175
+ // stems end in a consonant, so we also include {virama} in the
176
+ // list of suffixes to remove (this affects 222 words from the
177
+ // sample vocabulary).
178
+ //
179
+ // We've also assumed that Mh in the suffix list always means {Mh}
180
+ // and never {M}{h}{virama}. Only one of the 65,140 words in the
181
+ // sample vocabulary stems differently due to this (and that word
182
+ // seems to be a typo).
183
+
184
+ '{virama}'
185
+
186
+ '{a}'
187
+ '{A}'
188
+ '{i}'
189
+ '{I}'
190
+ '{u}'
191
+ '{U}'
192
+ '{e}'
193
+ '{o}'
194
+ '{e}{M}'
195
+ '{o}{M}'
196
+ '{A}{M}'
197
+ '{u}{A}{M}'
198
+ '{u}{e}{M}'
199
+ '{u}{o}{M}'
200
+ '{A}{e}{M}'
201
+ '{A}{o}{M}'
202
+ '{i}{y}{_A}{M}'
203
+ '{i}{y}{_o}{M}'
204
+ '{A}{i}{y}{_A}{M}'
205
+ '{A}{i}{y}{_o}{M}'
206
+ '{A}{Mh}'
207
+ '{i}{y}{_A}{Mh}'
208
+ '{A}{i}{y}{_A}{Mh}'
209
+ '{a}{w}{_A}{e}{M}'
210
+ '{a}{w}{_A}{o}{M}'
211
+ '{a}{n}{_A}{e}{M}'
212
+ '{a}{n}{_A}{o}{M}'
213
+ '{a}{w}{_A}'
214
+ '{a}{w}{_I}'
215
+ '{I}{M}'
216
+ '{a}{w}{_I}{M}'
217
+ '{a}{w}{_e}'
218
+ '{A}{w}{_A}'
219
+ '{A}{w}{_I}'
220
+ '{A}{w}{_I}{M}'
221
+ '{A}{w}{_e}'
222
+ '{a}{n}{_A}'
223
+ '{a}{n}{_I}'
224
+ '{a}{n}{_e}'
225
+ '{A}{n}{_A}'
226
+ '{A}{n}{_e}'
227
+ '{U}{M}{g}{_A}'
228
+ '{U}{M}{g}{_I}'
229
+ '{A}{U}{M}{g}{_A}'
230
+ '{A}{U}{M}{g}{_I}'
231
+ '{e}{M}{g}{_e}'
232
+ '{e}{M}{g}{_I}'
233
+ '{A}{e}{M}{g}{_e}'
234
+ '{A}{e}{M}{g}{_I}'
235
+ '{o}{g}{_e}'
236
+ '{o}{g}{_I}'
237
+ '{A}{o}{g}{_e}'
238
+ '{A}{o}{g}{_I}'
239
+ '{e}{g}{_A}'
240
+ '{e}{g}{_I}'
241
+ '{A}{e}{g}{_A}'
242
+ '{A}{e}{g}{_I}'
243
+ '{A}{y}{_A}'
244
+ '{A}{e}'
245
+ '{A}{I}'
246
+ '{A}{I}{M}'
247
+ '{i}{e}'
248
+ '{A}{o}'
249
+ '{A}{i}{e}'
250
+ '{a}{k}{r}'
251
+ '{A}{k}{r}'
252
+
253
+ '{_A}'
254
+ '{_i}'
255
+ '{_I}'
256
+ '{_u}'
257
+ '{_U}'
258
+ '{_e}'
259
+ '{_o}'
260
+ '{_e}{M}'
261
+ '{_o}{M}'
262
+ '{_A}{M}'
263
+ '{_u}{A}{M}'
264
+ '{_u}{e}{M}'
265
+ '{_u}{o}{M}'
266
+ '{_A}{e}{M}'
267
+ '{_A}{o}{M}'
268
+ '{_i}{y}{_A}{M}'
269
+ '{_i}{y}{_o}{M}'
270
+ '{_A}{i}{y}{_A}{M}'
271
+ '{_A}{i}{y}{_o}{M}'
272
+ '{_A}{Mh}'
273
+ '{_i}{y}{_A}{Mh}'
274
+ '{_A}{i}{y}{_A}{Mh}'
275
+ '{_I}{M}'
276
+ '{_A}{w}{_A}'
277
+ '{_A}{w}{_I}'
278
+ '{_A}{w}{_I}{M}'
279
+ '{_A}{w}{_e}'
280
+ '{_A}{n}{_A}'
281
+ '{_A}{n}{_e}'
282
+ '{_U}{M}{g}{_A}'
283
+ '{_U}{M}{g}{_I}'
284
+ '{_A}{U}{M}{g}{_A}'
285
+ '{_A}{U}{M}{g}{_I}'
286
+ '{_e}{M}{g}{_e}'
287
+ '{_e}{M}{g}{_I}'
288
+ '{_A}{e}{M}{g}{_e}'
289
+ '{_A}{e}{M}{g}{_I}'
290
+ '{_o}{g}{_e}'
291
+ '{_o}{g}{_I}'
292
+ '{_A}{o}{g}{_e}'
293
+ '{_A}{o}{g}{_I}'
294
+ '{_e}{g}{_A}'
295
+ '{_e}{g}{_I}'
296
+ '{_A}{e}{g}{_A}'
297
+ '{_A}{e}{g}{_I}'
298
+ '{_A}{y}{_A}'
299
+ '{_A}{e}'
300
+ '{_A}{I}'
301
+ '{_A}{I}{M}'
302
+ '{_i}{e}'
303
+ '{_A}{o}'
304
+ '{_A}{i}{e}'
305
+ '{_A}{k}{r}'
306
+
307
+ /* Suffixes with a leading implicit a: */
308
+ '{w}{_A}{e}{M}' CONSONANT
309
+ '{w}{_A}{o}{M}' CONSONANT
310
+ '{n}{_A}{e}{M}' CONSONANT
311
+ '{n}{_A}{o}{M}' CONSONANT
312
+ '{w}{_A}' CONSONANT
313
+ '{w}{_I}' CONSONANT
314
+ '{w}{_I}{M}' CONSONANT
315
+ '{w}{_e}' CONSONANT
316
+ '{n}{_A}' CONSONANT
317
+ '{n}{_I}' CONSONANT
318
+ '{n}{_e}' CONSONANT
319
+ '{k}{r}' CONSONANT
320
+ )
321
+ delete
322
+ )
323
+ )
@@ -0,0 +1,241 @@
1
+ /*
2
+ Hungarian Stemmer
3
+ Removes noun inflections
4
+ */
5
+
6
+ routines (
7
+ mark_regions
8
+ R1
9
+ v_ending
10
+ case
11
+ case_special
12
+ case_other
13
+ plural
14
+ owned
15
+ sing_owner
16
+ plur_owner
17
+ instrum
18
+ factive
19
+ undouble
20
+ double
21
+ )
22
+
23
+ externals ( stem )
24
+
25
+ integers ( p1 )
26
+ groupings ( v )
27
+
28
+ stringescapes {}
29
+
30
+ /* special characters */
31
+
32
+ stringdef a' '{U+00E1}' //a-acute
33
+ stringdef e' '{U+00E9}' //e-acute
34
+ stringdef i' '{U+00ED}' //i-acute
35
+ stringdef o' '{U+00F3}' //o-acute
36
+ stringdef o" '{U+00F6}' //o-umlaut
37
+ stringdef oq '{U+0151}' //o-double acute
38
+ stringdef u' '{U+00FA}' //u-acute
39
+ stringdef u" '{U+00FC}' //u-umlaut
40
+ stringdef uq '{U+0171}' //u-double acute
41
+
42
+ define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'
43
+
44
+ define mark_regions as (
45
+
46
+ $p1 = limit
47
+
48
+ (v goto non-v
49
+ among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
50
+ setmark p1)
51
+ or
52
+
53
+ (non-v gopast v setmark p1)
54
+ )
55
+
56
+ backwardmode (
57
+
58
+ define R1 as $p1 <= cursor
59
+
60
+ define v_ending as (
61
+ [substring] R1 among(
62
+ '{a'}' (<- 'a')
63
+ '{e'}' (<- 'e')
64
+ )
65
+ )
66
+
67
+ define double as (
68
+ test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
69
+ 'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
70
+ )
71
+
72
+ define undouble as (
73
+ next [hop 1] delete
74
+ )
75
+
76
+ define instrum as(
77
+ [substring] R1 among(
78
+ 'al' (double)
79
+ 'el' (double)
80
+ )
81
+ delete
82
+ undouble
83
+ )
84
+
85
+
86
+ define case as (
87
+ [substring] R1 among(
88
+ 'ban' 'ben'
89
+ 'ba' 'be'
90
+ 'ra' 're'
91
+ 'nak' 'nek'
92
+ 'val' 'vel'
93
+ 't{o'}l' 't{oq}l'
94
+ 'r{o'}l' 'r{oq}l'
95
+ 'b{o'}l' 'b{oq}l'
96
+ 'hoz' 'hez' 'h{o"}z'
97
+ 'n{a'}l' 'n{e'}l'
98
+ 'ig'
99
+ 'at' 'et' 'ot' '{o"}t'
100
+ '{e'}rt'
101
+ 'k{e'}pp' 'k{e'}ppen'
102
+ 'kor'
103
+ 'ul' '{u"}l'
104
+ 'v{a'}' 'v{e'}'
105
+ 'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
106
+ 'k{e'}nt'
107
+ 'en' 'on' 'an' '{o"}n'
108
+ 'n'
109
+ 't'
110
+ )
111
+ delete
112
+ v_ending
113
+ )
114
+
115
+ define case_special as(
116
+ [substring] R1 among(
117
+ '{e'}n' (<- 'e')
118
+ '{a'}n' (<- 'a')
119
+ '{a'}nk{e'}nt' (<- 'a')
120
+ )
121
+ )
122
+
123
+ define case_other as(
124
+ [substring] R1 among(
125
+ 'astul' 'est{u"}l' (delete)
126
+ 'stul' 'st{u"}l' (delete)
127
+ '{a'}stul' (<- 'a')
128
+ '{e'}st{u"}l' (<- 'e')
129
+ )
130
+ )
131
+
132
+ define factive as(
133
+ [substring] R1 among(
134
+ '{a'}' (double)
135
+ '{e'}' (double)
136
+ )
137
+ delete
138
+ undouble
139
+ )
140
+
141
+ define plural as (
142
+ [substring] R1 among(
143
+ '{a'}k' (<- 'a')
144
+ '{e'}k' (<- 'e')
145
+ '{o"}k' (delete)
146
+ 'ak' (delete)
147
+ 'ok' (delete)
148
+ 'ek' (delete)
149
+ 'k' (delete)
150
+ )
151
+ )
152
+
153
+ define owned as (
154
+ [substring] R1 among (
155
+ 'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
156
+ '{e'}k{e'}' (<- 'e')
157
+ '{a'}k{e'}' (<- 'a')
158
+ 'k{e'}' (delete)
159
+ '{e'}{e'}i' (<- 'e')
160
+ '{a'}{e'}i' (<- 'a')
161
+ '{e'}i' (delete)
162
+ '{e'}{e'}' (<- 'e')
163
+ '{e'}' (delete)
164
+ )
165
+ )
166
+
167
+ define sing_owner as (
168
+ [substring] R1 among(
169
+ '{u"}nk' 'unk' (delete)
170
+ '{a'}nk' (<- 'a')
171
+ '{e'}nk' (<- 'e')
172
+ 'nk' (delete)
173
+ '{a'}juk' (<- 'a')
174
+ '{e'}j{u"}k' (<- 'e')
175
+ 'juk' 'j{u"}k' (delete)
176
+ 'uk' '{u"}k' (delete)
177
+ 'em' 'om' 'am' (delete)
178
+ '{a'}m' (<- 'a')
179
+ '{e'}m' (<- 'e')
180
+ 'm' (delete)
181
+ 'od' 'ed' 'ad' '{o"}d' (delete)
182
+ '{a'}d' (<- 'a')
183
+ '{e'}d' (<- 'e')
184
+ 'd' (delete)
185
+ 'ja' 'je' (delete)
186
+ 'a' 'e' 'o' (delete)
187
+ '{a'}' (<- 'a')
188
+ '{e'}' (<- 'e')
189
+ )
190
+ )
191
+
192
+ define plur_owner as (
193
+ [substring] R1 among(
194
+ 'jaim' 'jeim' (delete)
195
+ '{a'}im' (<- 'a')
196
+ '{e'}im' (<- 'e')
197
+ 'aim' 'eim' (delete)
198
+ 'im' (delete)
199
+ 'jaid' 'jeid' (delete)
200
+ '{a'}id' (<- 'a')
201
+ '{e'}id' (<- 'e')
202
+ 'aid' 'eid' (delete)
203
+ 'id' (delete)
204
+ 'jai' 'jei' (delete)
205
+ '{a'}i' (<- 'a')
206
+ '{e'}i' (<- 'e')
207
+ 'ai' 'ei' (delete)
208
+ 'i' (delete)
209
+ 'jaink' 'jeink' (delete)
210
+ 'eink' 'aink' (delete)
211
+ '{a'}ink' (<- 'a')
212
+ '{e'}ink' (<- 'e')
213
+ 'ink'
214
+ 'jaitok' 'jeitek' (delete)
215
+ 'aitok' 'eitek' (delete)
216
+ '{a'}itok' (<- 'a')
217
+ '{e'}itek' (<- 'e')
218
+ 'itek' (delete)
219
+ 'jeik' 'jaik' (delete)
220
+ 'aik' 'eik' (delete)
221
+ '{a'}ik' (<- 'a')
222
+ '{e'}ik' (<- 'e')
223
+ 'ik' (delete)
224
+ )
225
+ )
226
+ )
227
+
228
+ define stem as (
229
+ do mark_regions
230
+ backwards (
231
+ do instrum
232
+ do case
233
+ do case_special
234
+ do case_other
235
+ do factive
236
+ do owned
237
+ do sing_owner
238
+ do plur_owner
239
+ do plural
240
+ )
241
+ )