mittens 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
@@ -0,0 +1,254 @@
|
|
1
|
+
routines (
|
2
|
+
prelude postlude mark_regions
|
3
|
+
RV R1 R2
|
4
|
+
standard_suffix
|
5
|
+
i_verb_suffix
|
6
|
+
verb_suffix
|
7
|
+
residual_suffix
|
8
|
+
un_double
|
9
|
+
un_accent
|
10
|
+
)
|
11
|
+
|
12
|
+
externals ( stem )
|
13
|
+
|
14
|
+
integers ( pV p1 p2 )
|
15
|
+
|
16
|
+
groupings ( v keep_with_s )
|
17
|
+
|
18
|
+
stringescapes {}
|
19
|
+
|
20
|
+
/* special characters */
|
21
|
+
|
22
|
+
stringdef a^ '{U+00E2}' // a-circumflex
|
23
|
+
stringdef a` '{U+00E0}' // a-grave
|
24
|
+
stringdef c, '{U+00E7}' // c-cedilla
|
25
|
+
|
26
|
+
stringdef e" '{U+00EB}' // e-diaeresis (rare)
|
27
|
+
stringdef e' '{U+00E9}' // e-acute
|
28
|
+
stringdef e^ '{U+00EA}' // e-circumflex
|
29
|
+
stringdef e` '{U+00E8}' // e-grave
|
30
|
+
stringdef i" '{U+00EF}' // i-diaeresis
|
31
|
+
stringdef i^ '{U+00EE}' // i-circumflex
|
32
|
+
stringdef o^ '{U+00F4}' // o-circumflex
|
33
|
+
stringdef u^ '{U+00FB}' // u-circumflex
|
34
|
+
stringdef u` '{U+00F9}' // u-grave
|
35
|
+
|
36
|
+
define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
|
37
|
+
|
38
|
+
define prelude as repeat goto (
|
39
|
+
|
40
|
+
( v [ ('u' ] v <- 'U') or
|
41
|
+
('i' ] v <- 'I') or
|
42
|
+
('y' ] <- 'Y')
|
43
|
+
)
|
44
|
+
or
|
45
|
+
( [ '{e"}' ] <- 'He' )
|
46
|
+
or
|
47
|
+
( [ '{i"}' ] <- 'Hi' )
|
48
|
+
or
|
49
|
+
( ['y'] v <- 'Y' )
|
50
|
+
or
|
51
|
+
( 'q' ['u'] <- 'U' )
|
52
|
+
)
|
53
|
+
|
54
|
+
define mark_regions as (
|
55
|
+
|
56
|
+
$pV = limit
|
57
|
+
$p1 = limit
|
58
|
+
$p2 = limit // defaults
|
59
|
+
|
60
|
+
do (
|
61
|
+
( v v next )
|
62
|
+
or
|
63
|
+
among ( // this exception list begun Nov 2006
|
64
|
+
'par' // paris, parie, pari
|
65
|
+
'col' // colis
|
66
|
+
'tap' // tapis
|
67
|
+
// extensions possible here
|
68
|
+
)
|
69
|
+
or
|
70
|
+
( next gopast v )
|
71
|
+
setmark pV
|
72
|
+
)
|
73
|
+
do (
|
74
|
+
gopast v gopast non-v setmark p1
|
75
|
+
gopast v gopast non-v setmark p2
|
76
|
+
)
|
77
|
+
)
|
78
|
+
|
79
|
+
define postlude as repeat (
|
80
|
+
|
81
|
+
[substring] among(
|
82
|
+
'I' (<- 'i')
|
83
|
+
'U' (<- 'u')
|
84
|
+
'Y' (<- 'y')
|
85
|
+
'He' (<- '{e"}')
|
86
|
+
'Hi' (<- '{i"}')
|
87
|
+
'H' (delete)
|
88
|
+
'' (next)
|
89
|
+
)
|
90
|
+
)
|
91
|
+
|
92
|
+
backwardmode (
|
93
|
+
|
94
|
+
define RV as $pV <= cursor
|
95
|
+
define R1 as $p1 <= cursor
|
96
|
+
define R2 as $p2 <= cursor
|
97
|
+
|
98
|
+
define standard_suffix as (
|
99
|
+
[substring] among(
|
100
|
+
|
101
|
+
'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
|
102
|
+
'ances' 'iqUes' 'ismes' 'ables' 'istes'
|
103
|
+
( R2 delete )
|
104
|
+
'atrice' 'ateur' 'ation'
|
105
|
+
'atrices' 'ateurs' 'ations'
|
106
|
+
( R2 delete
|
107
|
+
try ( ['ic'] (R2 delete) or <-'iqU' )
|
108
|
+
)
|
109
|
+
'logie'
|
110
|
+
'logies'
|
111
|
+
( R2 <- 'log' )
|
112
|
+
'usion' 'ution'
|
113
|
+
'usions' 'utions'
|
114
|
+
( R2 <- 'u' )
|
115
|
+
'ence'
|
116
|
+
'ences'
|
117
|
+
( R2 <- 'ent' )
|
118
|
+
'ement'
|
119
|
+
'ements'
|
120
|
+
(
|
121
|
+
RV delete
|
122
|
+
try (
|
123
|
+
[substring] among(
|
124
|
+
'iv' (R2 delete ['at'] R2 delete)
|
125
|
+
'eus' ((R2 delete) or (R1<-'eux'))
|
126
|
+
'abl' 'iqU'
|
127
|
+
(R2 delete)
|
128
|
+
'i{e`}r' 'I{e`}r' //)
|
129
|
+
(RV <-'i') //)--new 2 Sept 02
|
130
|
+
)
|
131
|
+
)
|
132
|
+
)
|
133
|
+
'it{e'}'
|
134
|
+
'it{e'}s'
|
135
|
+
(
|
136
|
+
R2 delete
|
137
|
+
try (
|
138
|
+
[substring] among(
|
139
|
+
'abil' ((R2 delete) or <-'abl')
|
140
|
+
'ic' ((R2 delete) or <-'iqU')
|
141
|
+
'iv' (R2 delete)
|
142
|
+
)
|
143
|
+
)
|
144
|
+
)
|
145
|
+
'if' 'ive'
|
146
|
+
'ifs' 'ives'
|
147
|
+
(
|
148
|
+
R2 delete
|
149
|
+
try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
|
150
|
+
)
|
151
|
+
'eaux' (<- 'eau')
|
152
|
+
'aux' (R1 <- 'al')
|
153
|
+
'euse'
|
154
|
+
'euses'((R2 delete) or (R1<-'eux'))
|
155
|
+
|
156
|
+
'issement'
|
157
|
+
'issements'(R1 non-v delete) // verbal
|
158
|
+
|
159
|
+
// fail(...) below forces entry to verb_suffix. -ment typically
|
160
|
+
// follows the p.p., e.g 'confus{e'}ment'.
|
161
|
+
|
162
|
+
'amment' (RV fail(<- 'ant'))
|
163
|
+
'emment' (RV fail(<- 'ent'))
|
164
|
+
'ment'
|
165
|
+
'ments' (test(v RV) fail(delete))
|
166
|
+
// v is e,i,u,{e'},I or U
|
167
|
+
)
|
168
|
+
)
|
169
|
+
|
170
|
+
define i_verb_suffix as setlimit tomark pV for (
|
171
|
+
[substring] among (
|
172
|
+
'{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
|
173
|
+
'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
|
174
|
+
'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
|
175
|
+
'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
|
176
|
+
'issez' 'issiez' 'issions' 'issons' 'it'
|
177
|
+
(not 'H' non-v delete)
|
178
|
+
)
|
179
|
+
)
|
180
|
+
|
181
|
+
define verb_suffix as setlimit tomark pV for (
|
182
|
+
[substring] among (
|
183
|
+
'ions'
|
184
|
+
(R2 delete)
|
185
|
+
|
186
|
+
'{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
|
187
|
+
'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
|
188
|
+
'erons' 'eront' 'ez' 'iez'
|
189
|
+
|
190
|
+
// 'ons' //-best omitted
|
191
|
+
|
192
|
+
(delete)
|
193
|
+
|
194
|
+
'{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
|
195
|
+
'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
|
196
|
+
'assions'
|
197
|
+
(delete
|
198
|
+
try(['e'] delete)
|
199
|
+
)
|
200
|
+
)
|
201
|
+
)
|
202
|
+
|
203
|
+
define keep_with_s 'aiou{e`}s'
|
204
|
+
|
205
|
+
define residual_suffix as (
|
206
|
+
try(['s'] test ('Hi' or non-keep_with_s) delete)
|
207
|
+
setlimit tomark pV for (
|
208
|
+
[substring] among(
|
209
|
+
'ion' (R2 's' or 't' delete)
|
210
|
+
'ier' 'i{e`}re'
|
211
|
+
'Ier' 'I{e`}re' (<-'i')
|
212
|
+
'e' (delete)
|
213
|
+
)
|
214
|
+
)
|
215
|
+
)
|
216
|
+
|
217
|
+
define un_double as (
|
218
|
+
test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
|
219
|
+
)
|
220
|
+
|
221
|
+
define un_accent as (
|
222
|
+
atleast 1 non-v
|
223
|
+
[ '{e'}' or '{e`}' ] <-'e'
|
224
|
+
)
|
225
|
+
)
|
226
|
+
|
227
|
+
define stem as (
|
228
|
+
|
229
|
+
do prelude
|
230
|
+
do mark_regions
|
231
|
+
backwards (
|
232
|
+
|
233
|
+
do (
|
234
|
+
(
|
235
|
+
( standard_suffix or
|
236
|
+
i_verb_suffix or
|
237
|
+
verb_suffix
|
238
|
+
)
|
239
|
+
and
|
240
|
+
try( [ ('Y' ] <- 'i' ) or
|
241
|
+
('{c,}'] <- 'c' )
|
242
|
+
)
|
243
|
+
) or
|
244
|
+
residual_suffix
|
245
|
+
)
|
246
|
+
|
247
|
+
// try(['ent'] RV delete) // is best omitted
|
248
|
+
|
249
|
+
do un_double
|
250
|
+
do un_accent
|
251
|
+
)
|
252
|
+
do postlude
|
253
|
+
)
|
254
|
+
|
@@ -0,0 +1,139 @@
|
|
1
|
+
|
2
|
+
/*
|
3
|
+
Extra rule for -nisse ending added 11 Dec 2009
|
4
|
+
*/
|
5
|
+
|
6
|
+
routines (
|
7
|
+
prelude postlude
|
8
|
+
mark_regions
|
9
|
+
R1 R2
|
10
|
+
standard_suffix
|
11
|
+
)
|
12
|
+
|
13
|
+
externals ( stem )
|
14
|
+
|
15
|
+
integers ( p1 p2 x )
|
16
|
+
|
17
|
+
groupings ( v s_ending st_ending )
|
18
|
+
|
19
|
+
stringescapes {}
|
20
|
+
|
21
|
+
/* special characters */
|
22
|
+
|
23
|
+
stringdef a" '{U+00E4}'
|
24
|
+
stringdef o" '{U+00F6}'
|
25
|
+
stringdef u" '{U+00FC}'
|
26
|
+
stringdef ss '{U+00DF}'
|
27
|
+
|
28
|
+
define v 'aeiouy{a"}{o"}{u"}'
|
29
|
+
|
30
|
+
define s_ending 'bdfghklmnrt'
|
31
|
+
define st_ending s_ending - 'r'
|
32
|
+
|
33
|
+
define prelude as (
|
34
|
+
|
35
|
+
test repeat (
|
36
|
+
(
|
37
|
+
['{ss}'] <- 'ss'
|
38
|
+
) or next
|
39
|
+
)
|
40
|
+
|
41
|
+
repeat goto (
|
42
|
+
v [('u'] v <- 'U') or
|
43
|
+
('y'] v <- 'Y')
|
44
|
+
)
|
45
|
+
)
|
46
|
+
|
47
|
+
define mark_regions as (
|
48
|
+
|
49
|
+
$p1 = limit
|
50
|
+
$p2 = limit
|
51
|
+
|
52
|
+
test(hop 3 setmark x)
|
53
|
+
|
54
|
+
gopast v gopast non-v setmark p1
|
55
|
+
try($p1 < x $p1 = x) // at least 3
|
56
|
+
gopast v gopast non-v setmark p2
|
57
|
+
|
58
|
+
)
|
59
|
+
|
60
|
+
define postlude as repeat (
|
61
|
+
|
62
|
+
[substring] among(
|
63
|
+
'Y' (<- 'y')
|
64
|
+
'U' (<- 'u')
|
65
|
+
'{a"}' (<- 'a')
|
66
|
+
'{o"}' (<- 'o')
|
67
|
+
'{u"}' (<- 'u')
|
68
|
+
'' (next)
|
69
|
+
)
|
70
|
+
|
71
|
+
)
|
72
|
+
|
73
|
+
backwardmode (
|
74
|
+
|
75
|
+
define R1 as $p1 <= cursor
|
76
|
+
define R2 as $p2 <= cursor
|
77
|
+
|
78
|
+
define standard_suffix as (
|
79
|
+
do (
|
80
|
+
[substring] R1 among(
|
81
|
+
'em' 'ern' 'er'
|
82
|
+
( delete
|
83
|
+
)
|
84
|
+
'e' 'en' 'es'
|
85
|
+
( delete
|
86
|
+
try (['s'] 'nis' delete)
|
87
|
+
)
|
88
|
+
's'
|
89
|
+
( s_ending delete
|
90
|
+
)
|
91
|
+
)
|
92
|
+
)
|
93
|
+
do (
|
94
|
+
[substring] R1 among(
|
95
|
+
'en' 'er' 'est'
|
96
|
+
( delete
|
97
|
+
)
|
98
|
+
'st'
|
99
|
+
( st_ending hop 3 delete
|
100
|
+
)
|
101
|
+
)
|
102
|
+
)
|
103
|
+
do (
|
104
|
+
[substring] R2 among(
|
105
|
+
'end' 'ung'
|
106
|
+
( delete
|
107
|
+
try (['ig'] not 'e' R2 delete)
|
108
|
+
)
|
109
|
+
'ig' 'ik' 'isch'
|
110
|
+
( not 'e' delete
|
111
|
+
)
|
112
|
+
'lich' 'heit'
|
113
|
+
( delete
|
114
|
+
try (
|
115
|
+
['er' or 'en'] R1 delete
|
116
|
+
)
|
117
|
+
)
|
118
|
+
'keit'
|
119
|
+
( delete
|
120
|
+
try (
|
121
|
+
[substring] R2 among(
|
122
|
+
'lich' 'ig'
|
123
|
+
( delete
|
124
|
+
)
|
125
|
+
)
|
126
|
+
)
|
127
|
+
)
|
128
|
+
)
|
129
|
+
)
|
130
|
+
)
|
131
|
+
)
|
132
|
+
|
133
|
+
define stem as (
|
134
|
+
do prelude
|
135
|
+
do mark_regions
|
136
|
+
backwards
|
137
|
+
do standard_suffix
|
138
|
+
do postlude
|
139
|
+
)
|
@@ -0,0 +1,145 @@
|
|
1
|
+
|
2
|
+
/*
|
3
|
+
Extra rule for -nisse ending added 11 Dec 2009
|
4
|
+
*/
|
5
|
+
|
6
|
+
routines (
|
7
|
+
prelude postlude
|
8
|
+
mark_regions
|
9
|
+
R1 R2
|
10
|
+
standard_suffix
|
11
|
+
)
|
12
|
+
|
13
|
+
externals ( stem )
|
14
|
+
|
15
|
+
integers ( p1 p2 x )
|
16
|
+
|
17
|
+
groupings ( v s_ending st_ending )
|
18
|
+
|
19
|
+
stringescapes {}
|
20
|
+
|
21
|
+
/* special characters */
|
22
|
+
|
23
|
+
stringdef a" '{U+00E4}'
|
24
|
+
stringdef o" '{U+00F6}'
|
25
|
+
stringdef u" '{U+00FC}'
|
26
|
+
stringdef ss '{U+00DF}'
|
27
|
+
|
28
|
+
define v 'aeiouy{a"}{o"}{u"}'
|
29
|
+
|
30
|
+
define s_ending 'bdfghklmnrt'
|
31
|
+
define st_ending s_ending - 'r'
|
32
|
+
|
33
|
+
define prelude as (
|
34
|
+
|
35
|
+
test repeat goto (
|
36
|
+
v [('u'] v <- 'U') or
|
37
|
+
('y'] v <- 'Y')
|
38
|
+
)
|
39
|
+
|
40
|
+
repeat (
|
41
|
+
[substring] among(
|
42
|
+
'{ss}' (<- 'ss')
|
43
|
+
'ae' (<- '{a"}')
|
44
|
+
'oe' (<- '{o"}')
|
45
|
+
'ue' (<- '{u"}')
|
46
|
+
'qu' ()
|
47
|
+
'' (next)
|
48
|
+
)
|
49
|
+
)
|
50
|
+
|
51
|
+
)
|
52
|
+
|
53
|
+
define mark_regions as (
|
54
|
+
|
55
|
+
$p1 = limit
|
56
|
+
$p2 = limit
|
57
|
+
|
58
|
+
test(hop 3 setmark x)
|
59
|
+
|
60
|
+
gopast v gopast non-v setmark p1
|
61
|
+
try($p1 < x $p1 = x) // at least 3
|
62
|
+
gopast v gopast non-v setmark p2
|
63
|
+
|
64
|
+
)
|
65
|
+
|
66
|
+
define postlude as repeat (
|
67
|
+
|
68
|
+
[substring] among(
|
69
|
+
'Y' (<- 'y')
|
70
|
+
'U' (<- 'u')
|
71
|
+
'{a"}' (<- 'a')
|
72
|
+
'{o"}' (<- 'o')
|
73
|
+
'{u"}' (<- 'u')
|
74
|
+
'' (next)
|
75
|
+
)
|
76
|
+
|
77
|
+
)
|
78
|
+
|
79
|
+
backwardmode (
|
80
|
+
|
81
|
+
define R1 as $p1 <= cursor
|
82
|
+
define R2 as $p2 <= cursor
|
83
|
+
|
84
|
+
define standard_suffix as (
|
85
|
+
do (
|
86
|
+
[substring] R1 among(
|
87
|
+
'em' 'ern' 'er'
|
88
|
+
( delete
|
89
|
+
)
|
90
|
+
'e' 'en' 'es'
|
91
|
+
( delete
|
92
|
+
try (['s'] 'nis' delete)
|
93
|
+
)
|
94
|
+
's'
|
95
|
+
( s_ending delete
|
96
|
+
)
|
97
|
+
)
|
98
|
+
)
|
99
|
+
do (
|
100
|
+
[substring] R1 among(
|
101
|
+
'en' 'er' 'est'
|
102
|
+
( delete
|
103
|
+
)
|
104
|
+
'st'
|
105
|
+
( st_ending hop 3 delete
|
106
|
+
)
|
107
|
+
)
|
108
|
+
)
|
109
|
+
do (
|
110
|
+
[substring] R2 among(
|
111
|
+
'end' 'ung'
|
112
|
+
( delete
|
113
|
+
try (['ig'] not 'e' R2 delete)
|
114
|
+
)
|
115
|
+
'ig' 'ik' 'isch'
|
116
|
+
( not 'e' delete
|
117
|
+
)
|
118
|
+
'lich' 'heit'
|
119
|
+
( delete
|
120
|
+
try (
|
121
|
+
['er' or 'en'] R1 delete
|
122
|
+
)
|
123
|
+
)
|
124
|
+
'keit'
|
125
|
+
( delete
|
126
|
+
try (
|
127
|
+
[substring] R2 among(
|
128
|
+
'lich' 'ig'
|
129
|
+
( delete
|
130
|
+
)
|
131
|
+
)
|
132
|
+
)
|
133
|
+
)
|
134
|
+
)
|
135
|
+
)
|
136
|
+
)
|
137
|
+
)
|
138
|
+
|
139
|
+
define stem as (
|
140
|
+
do prelude
|
141
|
+
do mark_regions
|
142
|
+
backwards
|
143
|
+
do standard_suffix
|
144
|
+
do postlude
|
145
|
+
)
|