mittens 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,93 @@
1
+ routines (
2
+ mark_regions
3
+ main_suffix
4
+ consonant_pair
5
+ other_suffix
6
+ undouble
7
+ )
8
+
9
+ externals ( stem )
10
+
11
+ strings ( ch )
12
+
13
+ integers ( p1 x )
14
+
15
+ groupings ( c v s_ending )
16
+
17
+ stringescapes {}
18
+
19
+ /* special characters */
20
+
21
+ stringdef ae '{U+00E6}'
22
+ stringdef ao '{U+00E5}'
23
+ stringdef o/ '{U+00F8}'
24
+
25
+ define c 'bcdfghjklmnpqrstvwxz'
26
+
27
+ define v 'aeiouy{ae}{ao}{o/}'
28
+
29
+ define s_ending 'abcdfghjklmnoprtvyz{ao}'
30
+
31
+ define mark_regions as (
32
+
33
+ $p1 = limit
34
+
35
+ test ( hop 3 setmark x )
36
+ goto v gopast non-v setmark p1
37
+ try ( $p1 < x $p1 = x )
38
+ )
39
+
40
+ backwardmode (
41
+
42
+ define main_suffix as (
43
+ setlimit tomark p1 for ([substring])
44
+ among(
45
+
46
+ 'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
47
+ 'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
48
+ 'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
49
+ 'erets' 'et' 'eret'
50
+ (delete)
51
+ 's'
52
+ (s_ending delete)
53
+ )
54
+ )
55
+
56
+ define consonant_pair as (
57
+ test (
58
+ setlimit tomark p1 for ([substring])
59
+ among(
60
+ 'gd' // significant in the call from other_suffix
61
+ 'dt' 'gt' 'kt'
62
+ )
63
+ )
64
+ next] delete
65
+ )
66
+
67
+ define other_suffix as (
68
+ do ( ['st'] 'ig' delete )
69
+ setlimit tomark p1 for ([substring])
70
+ among(
71
+ 'ig' 'lig' 'elig' 'els'
72
+ (delete do consonant_pair)
73
+ 'l{o/}st'
74
+ (<-'l{o/}s')
75
+ )
76
+ )
77
+ define undouble as (
78
+ setlimit tomark p1 for ([c] ->ch)
79
+ ch
80
+ delete
81
+ )
82
+ )
83
+
84
+ define stem as (
85
+
86
+ do mark_regions
87
+ backwards (
88
+ do main_suffix
89
+ do consonant_pair
90
+ do other_suffix
91
+ do undouble
92
+ )
93
+ )
@@ -0,0 +1,164 @@
1
+ routines (
2
+ prelude postlude
3
+ e_ending
4
+ en_ending
5
+ mark_regions
6
+ R1 R2
7
+ undouble
8
+ standard_suffix
9
+ )
10
+
11
+ externals ( stem )
12
+
13
+ booleans ( e_found )
14
+
15
+ integers ( p1 p2 )
16
+
17
+ groupings ( v v_I v_j )
18
+
19
+ stringescapes {}
20
+
21
+ /* special characters */
22
+
23
+ stringdef a" '{U+00E4}'
24
+ stringdef e" '{U+00EB}'
25
+ stringdef i" '{U+00EF}'
26
+ stringdef o" '{U+00F6}'
27
+ stringdef u" '{U+00FC}'
28
+
29
+ stringdef a' '{U+00E1}'
30
+ stringdef e' '{U+00E9}'
31
+ stringdef i' '{U+00ED}'
32
+ stringdef o' '{U+00F3}'
33
+ stringdef u' '{U+00FA}'
34
+
35
+ stringdef e` '{U+00E8}'
36
+
37
+ define v 'aeiouy{e`}'
38
+ define v_I v + 'I'
39
+ define v_j v + 'j'
40
+
41
+ define prelude as (
42
+ test repeat (
43
+ [substring] among(
44
+ '{a"}' '{a'}'
45
+ (<- 'a')
46
+ '{e"}' '{e'}'
47
+ (<- 'e')
48
+ '{i"}' '{i'}'
49
+ (<- 'i')
50
+ '{o"}' '{o'}'
51
+ (<- 'o')
52
+ '{u"}' '{u'}'
53
+ (<- 'u')
54
+ '' (next)
55
+ ) //or next
56
+ )
57
+ try(['y'] <- 'Y')
58
+ repeat goto (
59
+ v [('i'] v <- 'I') or
60
+ ('y'] <- 'Y')
61
+ )
62
+ )
63
+
64
+ define mark_regions as (
65
+
66
+ $p1 = limit
67
+ $p2 = limit
68
+
69
+ gopast v gopast non-v setmark p1
70
+ try($p1 < 3 $p1 = 3) // at least 3
71
+ gopast v gopast non-v setmark p2
72
+
73
+ )
74
+
75
+ define postlude as repeat (
76
+
77
+ [substring] among(
78
+ 'Y' (<- 'y')
79
+ 'I' (<- 'i')
80
+ '' (next)
81
+ ) //or next
82
+
83
+ )
84
+
85
+ backwardmode (
86
+
87
+ define R1 as $p1 <= cursor
88
+ define R2 as $p2 <= cursor
89
+
90
+ define undouble as (
91
+ test among('kk' 'dd' 'tt') [next] delete
92
+ )
93
+
94
+ define e_ending as (
95
+ unset e_found
96
+ ['e'] R1 test non-v delete
97
+ set e_found
98
+ undouble
99
+ )
100
+
101
+ define en_ending as (
102
+ R1 non-v and not 'gem' delete
103
+ undouble
104
+ )
105
+
106
+ define standard_suffix as (
107
+ do (
108
+ [substring] among(
109
+ 'heden'
110
+ ( R1 <- 'heid'
111
+ )
112
+ 'en' 'ene'
113
+ ( en_ending
114
+ )
115
+ 's' 'se'
116
+ ( R1 non-v_j delete
117
+ )
118
+ )
119
+ )
120
+ do e_ending
121
+
122
+ do ( ['heid'] R2 not 'c' delete
123
+ ['en'] en_ending
124
+ )
125
+
126
+ do (
127
+ [substring] among(
128
+ 'end' 'ing'
129
+ ( R2 delete
130
+ (['ig'] R2 not 'e' delete) or undouble
131
+ )
132
+ 'ig'
133
+ ( R2 not 'e' delete
134
+ )
135
+ 'lijk'
136
+ ( R2 delete e_ending
137
+ )
138
+ 'baar'
139
+ ( R2 delete
140
+ )
141
+ 'bar'
142
+ ( R2 e_found delete
143
+ )
144
+ )
145
+ )
146
+ do (
147
+ non-v_I
148
+ test (
149
+ among ('aa' 'ee' 'oo' 'uu')
150
+ non-v
151
+ )
152
+ [next] delete
153
+ )
154
+ )
155
+ )
156
+
157
+ define stem as (
158
+
159
+ do prelude
160
+ do mark_regions
161
+ backwards
162
+ do standard_suffix
163
+ do postlude
164
+ )
@@ -0,0 +1,229 @@
1
+ integers ( p1 p2 )
2
+ booleans ( Y_found )
3
+
4
+ routines (
5
+ prelude postlude
6
+ mark_regions
7
+ shortv
8
+ R1 R2
9
+ Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5
10
+ exception1
11
+ exception2
12
+ )
13
+
14
+ externals ( stem )
15
+
16
+ groupings ( v v_WXY valid_LI )
17
+
18
+ stringescapes {}
19
+
20
+ define v 'aeiouy'
21
+ define v_WXY v + 'wxY'
22
+
23
+ define valid_LI 'cdeghkmnrt'
24
+
25
+ define prelude as (
26
+ unset Y_found
27
+ do ( ['{'}'] delete)
28
+ do ( ['y'] <-'Y' set Y_found)
29
+ do repeat(goto (v ['y']) <-'Y' set Y_found)
30
+ )
31
+
32
+ define mark_regions as (
33
+ $p1 = limit
34
+ $p2 = limit
35
+ do(
36
+ among (
37
+ 'gener'
38
+ 'commun' // added May 2005
39
+ 'arsen' // added Nov 2006 (arsenic/arsenal)
40
+ // ... extensions possible here ...
41
+ ) or (gopast v gopast non-v)
42
+ setmark p1
43
+ gopast v gopast non-v setmark p2
44
+ )
45
+ )
46
+
47
+ backwardmode (
48
+
49
+ define shortv as (
50
+ ( non-v_WXY v non-v )
51
+ or
52
+ ( non-v v atlimit )
53
+ )
54
+
55
+ define R1 as $p1 <= cursor
56
+ define R2 as $p2 <= cursor
57
+
58
+ define Step_1a as (
59
+ try (
60
+ [substring] among (
61
+ '{'}' '{'}s' '{'}s{'}'
62
+ (delete)
63
+ )
64
+ )
65
+ [substring] among (
66
+ 'sses' (<-'ss')
67
+ 'ied' 'ies'
68
+ ((hop 2 <-'i') or <-'ie')
69
+ 's' (next gopast v delete)
70
+ 'us' 'ss'
71
+ )
72
+ )
73
+
74
+ define Step_1b as (
75
+ [substring] among (
76
+ 'eed' 'eedly'
77
+ (R1 <-'ee')
78
+ 'ed' 'edly' 'ing' 'ingly'
79
+ (
80
+ test gopast v delete
81
+ test substring among(
82
+ 'at' 'bl' 'iz'
83
+ (<+ 'e')
84
+ 'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
85
+ // ignoring double c, h, j, k, q, v, w, and x
86
+ ([next] delete)
87
+ '' (atmark p1 test shortv <+ 'e')
88
+ )
89
+ )
90
+ )
91
+ )
92
+
93
+ define Step_1c as (
94
+ ['y' or 'Y']
95
+ non-v not atlimit
96
+ <-'i'
97
+ )
98
+
99
+ define Step_2 as (
100
+ [substring] R1 among (
101
+ 'tional' (<-'tion')
102
+ 'enci' (<-'ence')
103
+ 'anci' (<-'ance')
104
+ 'abli' (<-'able')
105
+ 'entli' (<-'ent')
106
+ 'izer' 'ization'
107
+ (<-'ize')
108
+ 'ational' 'ation' 'ator'
109
+ (<-'ate')
110
+ 'alism' 'aliti' 'alli'
111
+ (<-'al')
112
+ 'fulness' (<-'ful')
113
+ 'ousli' 'ousness'
114
+ (<-'ous')
115
+ 'iveness' 'iviti'
116
+ (<-'ive')
117
+ 'biliti' 'bli'
118
+ (<-'ble')
119
+ 'ogi' ('l' <-'og')
120
+ 'fulli' (<-'ful')
121
+ 'lessli' (<-'less')
122
+ 'li' (valid_LI delete)
123
+ )
124
+ )
125
+
126
+ define Step_3 as (
127
+ [substring] R1 among (
128
+ 'tional' (<- 'tion')
129
+ 'ational' (<- 'ate')
130
+ 'alize' (<-'al')
131
+ 'icate' 'iciti' 'ical'
132
+ (<-'ic')
133
+ 'ful' 'ness'
134
+ (delete)
135
+ 'ative'
136
+ (R2 delete) // 'R2' added Dec 2001
137
+ )
138
+ )
139
+
140
+ define Step_4 as (
141
+ [substring] R2 among (
142
+ 'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
143
+ 'ment' 'ent' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
144
+ (delete)
145
+ 'ion' ('s' or 't' delete)
146
+ )
147
+ )
148
+
149
+ define Step_5 as (
150
+ [substring] among (
151
+ 'e' (R2 or (R1 not shortv) delete)
152
+ 'l' (R2 'l' delete)
153
+ )
154
+ )
155
+
156
+ define exception2 as (
157
+
158
+ [substring] atlimit among(
159
+ 'inning' 'outing' 'canning' 'herring' 'earring'
160
+ 'proceed' 'exceed' 'succeed'
161
+
162
+ // ... extensions possible here ...
163
+
164
+ )
165
+ )
166
+ )
167
+
168
+ define exception1 as (
169
+
170
+ [substring] atlimit among(
171
+
172
+ /* special changes: */
173
+
174
+ 'skis' (<-'ski')
175
+ 'skies' (<-'sky')
176
+ 'dying' (<-'die')
177
+ 'lying' (<-'lie')
178
+ 'tying' (<-'tie')
179
+
180
+ /* special -LY cases */
181
+
182
+ 'idly' (<-'idl')
183
+ 'gently' (<-'gentl')
184
+ 'ugly' (<-'ugli')
185
+ 'early' (<-'earli')
186
+ 'only' (<-'onli')
187
+ 'singly' (<-'singl')
188
+
189
+ // ... extensions possible here ...
190
+
191
+ /* invariant forms: */
192
+
193
+ 'sky'
194
+ 'news'
195
+ 'howe'
196
+
197
+ 'atlas' 'cosmos' 'bias' 'andes' // not plural forms
198
+
199
+ // ... extensions possible here ...
200
+ )
201
+ )
202
+
203
+ define postlude as (Y_found repeat(goto (['Y']) <-'y'))
204
+
205
+ define stem as (
206
+
207
+ exception1 or
208
+ not hop 3 or (
209
+ do prelude
210
+ do mark_regions
211
+ backwards (
212
+
213
+ do Step_1a
214
+
215
+ exception2 or (
216
+
217
+ do Step_1b
218
+ do Step_1c
219
+
220
+ do Step_2
221
+ do Step_3
222
+ do Step_4
223
+
224
+ do Step_5
225
+ )
226
+ )
227
+ do postlude
228
+ )
229
+ )
@@ -0,0 +1,197 @@
1
+
2
+ /* Finnish stemmer.
3
+
4
+ Numbers in square brackets refer to the sections in
5
+ Fred Karlsson, Finnish: An Essential Grammar. Routledge, 1999
6
+ ISBN 0-415-20705-3
7
+
8
+ */
9
+
10
+ routines (
11
+ mark_regions
12
+ R2
13
+ particle_etc possessive
14
+ LONG VI
15
+ case_ending
16
+ i_plural
17
+ t_plural
18
+ other_endings
19
+ tidy
20
+ )
21
+
22
+ externals ( stem )
23
+
24
+ integers ( p1 p2 )
25
+ strings ( x )
26
+ booleans ( ending_removed )
27
+ groupings ( AEI C V1 V2 particle_end )
28
+
29
+ stringescapes {}
30
+
31
+ /* special characters */
32
+
33
+ stringdef a" '{U+00E4}'
34
+ stringdef o" '{U+00F6}'
35
+
36
+ define AEI 'a{a"}ei'
37
+ define C 'bcdfghjklmnpqrstvwxz'
38
+ define V1 'aeiouy{a"}{o"}'
39
+ define V2 'aeiou{a"}{o"}'
40
+ define particle_end V1 + 'nt'
41
+
42
+ define mark_regions as (
43
+
44
+ $p1 = limit
45
+ $p2 = limit
46
+
47
+ goto V1 gopast non-V1 setmark p1
48
+ goto V1 gopast non-V1 setmark p2
49
+ )
50
+
51
+ backwardmode (
52
+
53
+ define R2 as $p2 <= cursor
54
+
55
+ define particle_etc as (
56
+ setlimit tomark p1 for ([substring])
57
+ among(
58
+ 'kin'
59
+ 'kaan' 'k{a"}{a"}n'
60
+ 'ko' 'k{o"}'
61
+ 'han' 'h{a"}n'
62
+ 'pa' 'p{a"}' // Particles [91]
63
+ (particle_end)
64
+ 'sti' // Adverb [87]
65
+ (R2)
66
+ )
67
+ delete
68
+ )
69
+ define possessive as ( // [36]
70
+ setlimit tomark p1 for ([substring])
71
+ among(
72
+ 'si'
73
+ (not 'k' delete) // take 'ksi' as the Comitative case
74
+ 'ni'
75
+ (delete ['kse'] <- 'ksi') // kseni = ksi + ni
76
+ 'nsa' 'ns{a"}'
77
+ 'mme'
78
+ 'nne'
79
+ (delete)
80
+ /* Now for Vn possessives after case endings: [36] */
81
+ 'an'
82
+ (among('ta' 'ssa' 'sta' 'lla' 'lta' 'na') delete)
83
+ '{a"}n'
84
+ (among('t{a"}' 'ss{a"}' 'st{a"}'
85
+ 'll{a"}' 'lt{a"}' 'n{a"}') delete)
86
+ 'en'
87
+ (among('lle' 'ine') delete)
88
+ )
89
+ )
90
+
91
+ define LONG as
92
+ among('aa' 'ee' 'ii' 'oo' 'uu' '{a"}{a"}' '{o"}{o"}')
93
+
94
+ define VI as ('i' V2)
95
+
96
+ define case_ending as (
97
+ setlimit tomark p1 for ([substring])
98
+ among(
99
+ 'han' ('a') //-.
100
+ 'hen' ('e') // |
101
+ 'hin' ('i') // |
102
+ 'hon' ('o') // |
103
+ 'h{a"}n' ('{a"}') // Illative [43]
104
+ 'h{o"}n' ('{o"}') // |
105
+ 'siin' VI // |
106
+ 'seen' LONG //-'
107
+
108
+ 'den' VI
109
+ 'tten' VI // Genitive plurals [34]
110
+ ()
111
+ 'n' // Genitive or Illative
112
+ ( try ( LONG // Illative
113
+ or 'ie' // Genitive
114
+ and next ]
115
+ )
116
+ /* otherwise Genitive */
117
+ )
118
+
119
+ 'a' '{a"}' //-.
120
+ (V1 C) // |
121
+ 'tta' 'tt{a"}' // Partitive [32]
122
+ ('e') // |
123
+ 'ta' 't{a"}' //-'
124
+
125
+ 'ssa' 'ss{a"}' // Inessive [41]
126
+ 'sta' 'st{a"}' // Elative [42]
127
+
128
+ 'lla' 'll{a"}' // Adessive [44]
129
+ 'lta' 'lt{a"}' // Ablative [51]
130
+ 'lle' // Allative [46]
131
+ 'na' 'n{a"}' // Essive [49]
132
+ 'ksi' // Translative[50]
133
+ 'ine' // Comitative [51]
134
+
135
+ /* Abessive and Instructive are too rare for
136
+ inclusion [51] */
137
+
138
+ )
139
+ delete
140
+ set ending_removed
141
+ )
142
+ define other_endings as (
143
+ setlimit tomark p2 for ([substring])
144
+ among(
145
+ 'mpi' 'mpa' 'mp{a"}'
146
+ 'mmi' 'mma' 'mm{a"}' // Comparative forms [85]
147
+ (not 'po') //-improves things
148
+ 'impi' 'impa' 'imp{a"}'
149
+ 'immi' 'imma' 'imm{a"}' // Superlative forms [86]
150
+ 'eja' 'ej{a"}' // indicates agent [93.1B]
151
+ )
152
+ delete
153
+ )
154
+ define i_plural as ( // [26]
155
+ setlimit tomark p1 for ([substring])
156
+ among(
157
+ 'i' 'j'
158
+ )
159
+ delete
160
+ )
161
+ define t_plural as ( // [26]
162
+ setlimit tomark p1 for (
163
+ ['t'] test V1
164
+ delete
165
+ )
166
+ setlimit tomark p2 for ([substring])
167
+ among(
168
+ 'mma' (not 'po') //-mmat endings
169
+ 'imma' //-immat endings
170
+ )
171
+ delete
172
+ )
173
+ define tidy as (
174
+ setlimit tomark p1 for (
175
+ do ( LONG and ([next] delete ) ) // undouble vowel
176
+ do ( [AEI] C delete ) // remove trailing a, a", e, i
177
+ do ( ['j'] 'o' or 'u' delete )
178
+ do ( ['o'] 'j' delete )
179
+ )
180
+ goto non-V1 [C] -> x x delete // undouble consonant
181
+ )
182
+ )
183
+
184
+ define stem as (
185
+
186
+ do mark_regions
187
+ unset ending_removed
188
+ backwards (
189
+ do particle_etc
190
+ do possessive
191
+ do case_ending
192
+ do other_endings
193
+ (ending_removed do i_plural) or do t_plural
194
+ do tidy
195
+ )
196
+ )
197
+