mittens 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
@@ -0,0 +1,301 @@
|
|
1
|
+
stringescapes {}
|
2
|
+
|
3
|
+
stringdef a '{U+0561}' // 531
|
4
|
+
stringdef b '{U+0562}' // 532
|
5
|
+
stringdef g '{U+0563}' // 533
|
6
|
+
stringdef d '{U+0564}' // 534
|
7
|
+
stringdef ye '{U+0565}' // 535
|
8
|
+
stringdef z '{U+0566}' // 536
|
9
|
+
stringdef e '{U+0567}' // 537
|
10
|
+
stringdef y '{U+0568}' // 538
|
11
|
+
stringdef dt '{U+0569}' // 539
|
12
|
+
stringdef zh '{U+056A}' // 53A
|
13
|
+
stringdef i '{U+056B}' // 53B
|
14
|
+
stringdef l '{U+056C}' // 53C
|
15
|
+
stringdef kh '{U+056D}' // 53D
|
16
|
+
stringdef ts '{U+056E}' // 53E
|
17
|
+
stringdef k '{U+056F}' // 53F
|
18
|
+
stringdef h '{U+0570}' // 540
|
19
|
+
stringdef dz '{U+0571}' // 541
|
20
|
+
stringdef gh '{U+0572}' // 542
|
21
|
+
stringdef djch '{U+0573}' // 543
|
22
|
+
stringdef m '{U+0574}' // 544
|
23
|
+
stringdef j '{U+0575}' // 545
|
24
|
+
stringdef n '{U+0576}' // 546
|
25
|
+
stringdef sh '{U+0577}' // 547
|
26
|
+
stringdef vo '{U+0578}' // 548
|
27
|
+
stringdef ch '{U+0579}' // 549
|
28
|
+
stringdef p '{U+057A}' // 54A
|
29
|
+
stringdef dj '{U+057B}' // 54B
|
30
|
+
stringdef r '{U+057C}' // 54C
|
31
|
+
stringdef s '{U+057D}' // 54D
|
32
|
+
stringdef v '{U+057E}' // 54E
|
33
|
+
stringdef t '{U+057F}' // 54F
|
34
|
+
stringdef r' '{U+0580}' // 550
|
35
|
+
stringdef c '{U+0581}' // 551
|
36
|
+
stringdef u '{U+0582}' // 552 //vjun
|
37
|
+
stringdef bp '{U+0583}' // 553
|
38
|
+
stringdef q '{U+0584}' // 554
|
39
|
+
stringdef ev '{U+0587}'
|
40
|
+
stringdef o '{U+0585}' // 555
|
41
|
+
stringdef f '{U+0586}' // 556
|
42
|
+
|
43
|
+
routines ( mark_regions R2
|
44
|
+
adjective
|
45
|
+
verb
|
46
|
+
noun
|
47
|
+
ending
|
48
|
+
)
|
49
|
+
|
50
|
+
externals ( stem )
|
51
|
+
|
52
|
+
integers ( pV p2 )
|
53
|
+
|
54
|
+
groupings ( v )
|
55
|
+
|
56
|
+
define v '{a}{e}{i}{o}{u}{ye}{vo}{y}'
|
57
|
+
|
58
|
+
define mark_regions as (
|
59
|
+
|
60
|
+
$pV = limit
|
61
|
+
$p2 = limit
|
62
|
+
do (
|
63
|
+
gopast v setmark pV gopast non-v
|
64
|
+
gopast v gopast non-v setmark p2
|
65
|
+
)
|
66
|
+
)
|
67
|
+
|
68
|
+
backwardmode (
|
69
|
+
|
70
|
+
define R2 as $p2 <= cursor
|
71
|
+
|
72
|
+
define adjective as (
|
73
|
+
[substring] among (
|
74
|
+
'{b}{a}{r'}'
|
75
|
+
'{p}{ye}{s}'
|
76
|
+
'{vo}{r'}{e}{n}'
|
77
|
+
'{vo}{v}{i}{n}'
|
78
|
+
'{a}{k}{i}'
|
79
|
+
'{l}{a}{j}{n}'
|
80
|
+
'{r'}{vo}{r'}{d}'
|
81
|
+
'{ye}{r'}{vo}{r'}{d}'
|
82
|
+
'{a}{k}{a}{n}'
|
83
|
+
'{a}{l}{i}'
|
84
|
+
'{k}{vo}{t}'
|
85
|
+
'{ye}{k}{ye}{n}'
|
86
|
+
'{vo}{r'}{a}{k}'
|
87
|
+
'{ye}{gh}'
|
88
|
+
'{v}{vo}{u}{n}'
|
89
|
+
'{ye}{r'}{ye}{n}'
|
90
|
+
'{a}{r'}{a}{n}'
|
91
|
+
'{ye}{n}'
|
92
|
+
'{a}{v}{ye}{t}'
|
93
|
+
'{g}{i}{n}'
|
94
|
+
'{i}{v}'
|
95
|
+
'{a}{t}'
|
96
|
+
'{i}{n}'
|
97
|
+
|
98
|
+
(delete)
|
99
|
+
)
|
100
|
+
)
|
101
|
+
|
102
|
+
define verb as (
|
103
|
+
[substring] among (
|
104
|
+
'{vo}{u}{m}'
|
105
|
+
'{v}{vo}{u}{m}'
|
106
|
+
'{a}{l}{vo}{u}'
|
107
|
+
'{ye}{l}{vo}{u}'
|
108
|
+
'{v}{ye}{l}'
|
109
|
+
'{a}{n}{a}{l}'
|
110
|
+
'{ye}{l}{vo}{u}{c}'
|
111
|
+
'{a}{l}{vo}{u}{c}'
|
112
|
+
'{y}{a}{l}'
|
113
|
+
'{y}{ye}{l}'
|
114
|
+
'{a}{l}{vo}{v}'
|
115
|
+
'{ye}{l}{vo}{v}'
|
116
|
+
'{a}{l}{i}{s}'
|
117
|
+
'{ye}{l}{i}{s}'
|
118
|
+
'{ye}{n}{a}{l}'
|
119
|
+
'{a}{c}{n}{a}{l}'
|
120
|
+
'{ye}{c}{n}{ye}{l}'
|
121
|
+
'{c}{n}{ye}{l}'
|
122
|
+
'{n}{ye}{l}'
|
123
|
+
'{a}{t}{ye}{l}'
|
124
|
+
'{vo}{t}{ye}{l}'
|
125
|
+
'{k}{vo}{t}{ye}{l}'
|
126
|
+
'{t}{ye}{l}'
|
127
|
+
'{v}{a}{ts}'
|
128
|
+
'{ye}{c}{v}{ye}{l}'
|
129
|
+
'{a}{c}{v}{ye}{l}'
|
130
|
+
'{ye}{c}{i}{r'}'
|
131
|
+
'{a}{c}{i}{r'}'
|
132
|
+
'{ye}{c}{i}{n}{q}'
|
133
|
+
'{a}{c}{i}{n}{q}'
|
134
|
+
'{v}{ye}{c}{i}{r'}'
|
135
|
+
'{v}{ye}{c}{i}{n}{q}'
|
136
|
+
'{v}{ye}{c}{i}{q}'
|
137
|
+
'{v}{ye}{c}{i}{n}'
|
138
|
+
'{a}{c}{r'}{i}{r'}'
|
139
|
+
'{a}{c}{r'}{ye}{c}'
|
140
|
+
'{a}{c}{r'}{i}{n}{q}'
|
141
|
+
'{a}{c}{r'}{i}{q}'
|
142
|
+
'{a}{c}{r'}{i}{n}'
|
143
|
+
'{ye}{c}{i}{q}'
|
144
|
+
'{a}{c}{i}{q}'
|
145
|
+
'{ye}{c}{i}{n}'
|
146
|
+
'{a}{c}{i}{n}'
|
147
|
+
'{a}{c}{a}{r'}'
|
148
|
+
'{a}{c}{a}{v}'
|
149
|
+
'{a}{c}{a}{n}{q}'
|
150
|
+
'{a}{c}{a}{q}'
|
151
|
+
'{a}{c}{a}{n}'
|
152
|
+
'{v}{ye}{c}{i}'
|
153
|
+
'{a}{c}{r'}{i}'
|
154
|
+
'{ye}{c}{a}{r'}'
|
155
|
+
'{ye}{c}{a}{v}'
|
156
|
+
'{c}{a}{n}{q}'
|
157
|
+
'{c}{a}{q}'
|
158
|
+
'{c}{a}{n}'
|
159
|
+
'{a}{c}{a}'
|
160
|
+
'{a}{c}{i}'
|
161
|
+
'{ye}{c}{a}'
|
162
|
+
'{ch}{ye}{l}'
|
163
|
+
'{ye}{c}{i}'
|
164
|
+
'{a}{r'}'
|
165
|
+
'{a}{v}'
|
166
|
+
'{a}{n}{q}'
|
167
|
+
'{a}{q}'
|
168
|
+
'{a}{n}'
|
169
|
+
'{a}{l}'
|
170
|
+
'{ye}{l}'
|
171
|
+
'{ye}{c}'
|
172
|
+
'{a}{c}'
|
173
|
+
'{v}{ye}'
|
174
|
+
'{a}'
|
175
|
+
|
176
|
+
(delete)
|
177
|
+
)
|
178
|
+
)
|
179
|
+
|
180
|
+
define noun as (
|
181
|
+
[substring] among (
|
182
|
+
'{a}{ts}{vo}'
|
183
|
+
'{a}{n}{a}{k}'
|
184
|
+
'{a}{n}{o}{c}'
|
185
|
+
'{a}{r'}{a}{n}'
|
186
|
+
'{a}{r'}{q}'
|
187
|
+
'{p}{a}{n}'
|
188
|
+
'{s}{t}{a}{n}'
|
189
|
+
'{ye}{gh}{e}{n}'
|
190
|
+
'{ye}{n}{q}'
|
191
|
+
'{i}{k}'
|
192
|
+
'{i}{ch}'
|
193
|
+
'{i}{q}'
|
194
|
+
'{m}{vo}{u}{n}{q}'
|
195
|
+
'{j}{a}{k}'
|
196
|
+
'{j}{vo}{u}{n}'
|
197
|
+
'{vo}{n}{q}'
|
198
|
+
'{vo}{r'}{d}'
|
199
|
+
'{vo}{c}'
|
200
|
+
'{ch}{ye}{q}'
|
201
|
+
'{v}{a}{ts}{q}'
|
202
|
+
'{v}{vo}{r'}'
|
203
|
+
'{a}{v}{vo}{r'}'
|
204
|
+
'{vo}{u}{dt}{j}{vo}{u}{n}'
|
205
|
+
'{vo}{u}{k}'
|
206
|
+
'{vo}{u}{h}{i}'
|
207
|
+
'{vo}{u}{j}{dt}'
|
208
|
+
'{vo}{u}{j}{q}'
|
209
|
+
'{vo}{u}{s}{t}'
|
210
|
+
'{vo}{u}{s}'
|
211
|
+
'{c}{i}'
|
212
|
+
'{a}{l}{i}{q}'
|
213
|
+
'{a}{n}{i}{q}'
|
214
|
+
'{i}{l}'
|
215
|
+
'{i}{ch}{q}'
|
216
|
+
'{vo}{u}{n}{q}'
|
217
|
+
'{g}{a}{r'}'
|
218
|
+
'{vo}{u}'
|
219
|
+
'{a}{k}'
|
220
|
+
'{a}{n}'
|
221
|
+
'{q}'
|
222
|
+
|
223
|
+
(delete)
|
224
|
+
)
|
225
|
+
)
|
226
|
+
|
227
|
+
define ending as (
|
228
|
+
[substring] R2 among (
|
229
|
+
'{n}{ye}{r'}{y}'
|
230
|
+
'{n}{ye}{r'}{n}'
|
231
|
+
'{n}{ye}{r'}{i}'
|
232
|
+
'{n}{ye}{r'}{d}'
|
233
|
+
'{ye}{r'}{i}{c}'
|
234
|
+
'{n}{ye}{r'}{i}{c}'
|
235
|
+
'{ye}{r'}{i}'
|
236
|
+
'{ye}{r'}{d}'
|
237
|
+
'{ye}{r'}{n}'
|
238
|
+
'{ye}{r'}{y}'
|
239
|
+
'{n}{ye}{r'}{i}{n}'
|
240
|
+
'{vo}{u}{dt}{j}{a}{n}{n}'
|
241
|
+
'{vo}{u}{dt}{j}{a}{n}{y}'
|
242
|
+
'{vo}{u}{dt}{j}{a}{n}{s}'
|
243
|
+
'{vo}{u}{dt}{j}{a}{n}{d}'
|
244
|
+
'{vo}{u}{dt}{j}{a}{n}'
|
245
|
+
'{ye}{r'}{i}{n}'
|
246
|
+
'{i}{n}'
|
247
|
+
'{s}{a}'
|
248
|
+
'{vo}{dj}'
|
249
|
+
'{i}{c}'
|
250
|
+
'{ye}{r'}{vo}{v}'
|
251
|
+
'{n}{ye}{r'}{vo}{v}'
|
252
|
+
'{ye}{r'}{vo}{u}{m}'
|
253
|
+
'{n}{ye}{r'}{vo}{u}{m}'
|
254
|
+
'{vo}{u}{n}'
|
255
|
+
'{vo}{u}{d}'
|
256
|
+
'{v}{a}{n}{s}'
|
257
|
+
'{v}{a}{n}{y}'
|
258
|
+
'{v}{a}{n}{d}'
|
259
|
+
'{a}{n}{y}'
|
260
|
+
'{a}{n}{d}'
|
261
|
+
'{v}{a}{n}'
|
262
|
+
'{vo}{dj}{y}'
|
263
|
+
'{vo}{dj}{s}'
|
264
|
+
'{vo}{dj}{d}'
|
265
|
+
'{vo}{c}'
|
266
|
+
'{vo}{u}{c}'
|
267
|
+
'{vo}{dj}{i}{c}'
|
268
|
+
'{c}{i}{c}'
|
269
|
+
'{v}{i}{c}'
|
270
|
+
'{v}{i}'
|
271
|
+
'{v}{vo}{v}'
|
272
|
+
'{vo}{v}'
|
273
|
+
'{a}{n}{vo}{v}'
|
274
|
+
'{a}{n}{vo}{u}{m}'
|
275
|
+
'{v}{a}{n}{i}{c}'
|
276
|
+
'{a}{m}{b}'
|
277
|
+
'{a}{n}'
|
278
|
+
'{n}{ye}{r'}'
|
279
|
+
'{ye}{r'}'
|
280
|
+
'{v}{a}'
|
281
|
+
'{y}'
|
282
|
+
'{n}'
|
283
|
+
'{d}'
|
284
|
+
'{c}'
|
285
|
+
'{i}'
|
286
|
+
|
287
|
+
(delete)
|
288
|
+
)
|
289
|
+
)
|
290
|
+
)
|
291
|
+
|
292
|
+
define stem as (
|
293
|
+
|
294
|
+
do mark_regions
|
295
|
+
backwards setlimit tomark pV for (
|
296
|
+
do ending
|
297
|
+
do verb
|
298
|
+
do adjective
|
299
|
+
do noun
|
300
|
+
)
|
301
|
+
)
|
@@ -0,0 +1,149 @@
|
|
1
|
+
routines (
|
2
|
+
aditzak
|
3
|
+
izenak
|
4
|
+
adjetiboak
|
5
|
+
mark_regions
|
6
|
+
RV R2 R1
|
7
|
+
)
|
8
|
+
|
9
|
+
externals ( stem )
|
10
|
+
|
11
|
+
integers ( pV p1 p2 )
|
12
|
+
|
13
|
+
groupings ( v )
|
14
|
+
|
15
|
+
stringescapes {}
|
16
|
+
|
17
|
+
/* special characters */
|
18
|
+
|
19
|
+
stringdef n~ '{U+00F1}'
|
20
|
+
|
21
|
+
define v 'aeiou'
|
22
|
+
|
23
|
+
define mark_regions as (
|
24
|
+
|
25
|
+
$pV = limit
|
26
|
+
$p1 = limit
|
27
|
+
$p2 = limit // defaults
|
28
|
+
|
29
|
+
do (
|
30
|
+
( v (non-v gopast v) or (v gopast non-v) )
|
31
|
+
or
|
32
|
+
( non-v (non-v gopast v) or (v next) )
|
33
|
+
setmark pV
|
34
|
+
)
|
35
|
+
do (
|
36
|
+
gopast v gopast non-v setmark p1
|
37
|
+
gopast v gopast non-v setmark p2
|
38
|
+
)
|
39
|
+
)
|
40
|
+
|
41
|
+
backwardmode (
|
42
|
+
|
43
|
+
define RV as $pV <= cursor
|
44
|
+
define R2 as $p2 <= cursor
|
45
|
+
define R1 as $p1 <= cursor
|
46
|
+
|
47
|
+
define aditzak as (
|
48
|
+
[substring] among(
|
49
|
+
'le' 'la' 'tzaile' 'aldatu' 'atu' 'tzailea' 'taile' 'tailea' 'pera' 'gale' 'galea'
|
50
|
+
'gura' 'kura' 'kor' 'korra' 'or' 'orra' 'tun' 'tuna' 'gaitz' 'gaitza'
|
51
|
+
'kaitz' 'kaitza' 'ezin' 'ezina' 'tezin' 'tezina' 'errez' 'erreza'
|
52
|
+
'karri' 'karria' 'tzaga' 'tzaka' 'tzake' 'tzeke' 'ez' 'eza' 'tzez'
|
53
|
+
'keta' 'eta' 'etan' 'pen' 'pena' 'tze' 'atze' 'kuntza' 'kunde' 'kundea'
|
54
|
+
'kune' 'kunea' 'kuna' 'kera' 'era' 'kizun' 'kizuna' 'dura' 'tura' 'men' 'mena'
|
55
|
+
'go' 'ago' 'tio' 'taldi' 'taldia' 'aldi' 'aldia' 'gune' 'gunea' 'bide' 'bidea'
|
56
|
+
'pide' 'pidea' 'gai' 'gaia' 'ki' 'kin' 'rekin' 'kina' 'kari' 'karia' 'ari' 'tari' 'etari'
|
57
|
+
'gailu' 'gailua' 'kide' 'kidea' 'ide' 'idea' 'du' 'ka' 'kan' 'an' 'ean' 'tu' 'lari' 'tatu'
|
58
|
+
'rean' 'tarazi' 'arazi' 'tzat' 'bera' 'dako'
|
59
|
+
( RV delete )
|
60
|
+
'garri' 'garria' 'tza'
|
61
|
+
(R2 delete)
|
62
|
+
'atseden'
|
63
|
+
(<- 'atseden')
|
64
|
+
'arabera'
|
65
|
+
(<- 'arabera')
|
66
|
+
'baditu'
|
67
|
+
(<- 'baditu')
|
68
|
+
|
69
|
+
)
|
70
|
+
)
|
71
|
+
|
72
|
+
define izenak as (
|
73
|
+
[substring] among(
|
74
|
+
'ari' 'aria' 'bizia' 'kari' 'karia' 'lari' 'laria' 'tari' 'taria' 'zain' 'zaina'
|
75
|
+
'tzain' 'tzaina' 'zale' 'zalea' 'tzale' 'tzalea' 'aizun' 'orde' 'ordea'
|
76
|
+
'burua' 'ohi' 'ohia' 'kintza' 'gintzo' 'gintzu' 'tzu' 'tzua'
|
77
|
+
'tzo' 'tzoa' 'kuntza' 'talde' 'taldea' 'eria' 'keria' 'teria' 'di'
|
78
|
+
'za' 'ada' 'tara' 'etara' 'tra' 'ta' 'tegi' 'tegia' 'keta' 'z' 'zko' 'zkoa'
|
79
|
+
'ti' 'tia' 'tsu' 'tsua' 'zu' 'zua' 'bera' 'pera' 'zto' 'ztoa' 'asi' 'asia'
|
80
|
+
'gile' 'gilea' 'estu' 'estua' 'larri' 'larria' 'nahi' 'nahia'
|
81
|
+
'koi' 'koia' 'oi' 'oia' 'goi' 'min' 'mina' 'dun' 'duna' 'duru' 'durua'
|
82
|
+
'duri' 'duria' 'os' 'osa' 'oso' 'osoa' 'ar' 'ara' 'tar' 'dar' 'dara'
|
83
|
+
'tiar' 'tiara' 'liar' 'liara' 'gabe' 'gabea' 'kabe' 'kabea' 'ga' 'ge'
|
84
|
+
'kada' 'tasun' 'tasuna' 'asun' 'asuna' 'go' 'mendu' 'mendua' 'mentu' 'mentua'
|
85
|
+
'mendi' 'mendia' 'zio' 'zioa' 'zino' 'zinoa' 'zione' 'zionea' 'ezia'
|
86
|
+
'degi' 'degia' 'egi' 'egia' 'toki' 'tokia' 'leku' 'lekua' 'gintza' 'alde'
|
87
|
+
'aldea' 'kalde' 'kaldea' 'gune' 'gunea' 'une' 'unea' 'una' 'pe' 'pea'
|
88
|
+
'gibel' 'gibela' 'ondo' 'ondoa' 'arte' 'artea' 'aurre' 'aurrea'
|
89
|
+
'etxe' 'etxea' 'ola' 'ontzi' 'ontzia' 'gela' 'denda' 'taldi' 'taldia'
|
90
|
+
'aldi' 'aldia' 'te' 'tea' 'zaro' 'zaroa' 'taro' 'taroa' 'oro' 'oroa'
|
91
|
+
'aro' 'aroa' 'ero' 'eroa' 'eroz' 'eroza' 'ka' 'kan' 'kana' 'tako' 'etako' 'takoa'
|
92
|
+
'kote' 'kotea' 'tzar' 'tzarra' 'handi' 'handia' 'kondo' 'kondoa' 'skila'
|
93
|
+
'no' 'noa' '{n~}o' '{n~}oa' 'ska' 'xka' 'zka' 'tila' 'to' 'toa' 'tto' 'ttoa'
|
94
|
+
'txo' 'txoa' 'txu' 'txua' 'anda' 'anga' 'urren' 'urrena' 'gai' 'gaia'
|
95
|
+
'gei' 'geia' 'eme' 'emea' 'kume' 'kumea' 'sa' 'ko' 'eko' 'koa' 'ena'
|
96
|
+
'enea' 'ne' 'nea' 'kor' 'korra' 'ez' 'eza' 'eta' 'etan'
|
97
|
+
'ki' 'kia' 'kin' 'kina' 'tu' 'tua' 'du' 'dua' 'ek'
|
98
|
+
'tarik' 'tariko' 'tan' 'ordu' 'ordua' 'oste' 'ostea' 'tzara'
|
99
|
+
'ra' 'antza' 'behar' 'ro' 'giro' 'ak' 'zp' 'ket'
|
100
|
+
'kail' 'kaila' 'ail' 'kirri' 'kirria' 'ngo' 'ngoa' '{n~}i' 'sko'
|
101
|
+
'sta' 'koitz' 'koitza' 'na' 'garren' 'garrena' 'kera'
|
102
|
+
'gerren' 'gerrena' 'garna' 'kide' 'tz' 'tuko'
|
103
|
+
( RV delete )
|
104
|
+
'ora' 'garri' 'garria' 'or' 'buru' 'ren' 'tza'
|
105
|
+
( R2 delete )
|
106
|
+
'joka'
|
107
|
+
(<- 'jok')
|
108
|
+
'tzen' 'ten' 'en' 'tatu'
|
109
|
+
(R1 delete)
|
110
|
+
'trako'
|
111
|
+
(<- 'tra')
|
112
|
+
'minutuko'
|
113
|
+
(<- 'minutu')
|
114
|
+
'zehar'
|
115
|
+
(<- 'zehar')
|
116
|
+
'geldi'
|
117
|
+
(<- 'geldi')
|
118
|
+
'igaro'
|
119
|
+
(<- 'igaro')
|
120
|
+
'aurka'
|
121
|
+
(<- 'aurka')
|
122
|
+
)
|
123
|
+
)
|
124
|
+
|
125
|
+
define adjetiboak as (
|
126
|
+
[substring] among(
|
127
|
+
'era' 'ero' 'go' 'tate' 'tade' 'date' 'dade' 'keria'
|
128
|
+
'ki' 'to' 'ro' 'la' 'gi' 'larik' 'lanik' 'ik' 'ztik' 'rik'
|
129
|
+
( RV delete )
|
130
|
+
'zlea'
|
131
|
+
(<- 'z')
|
132
|
+
)
|
133
|
+
)
|
134
|
+
|
135
|
+
)
|
136
|
+
|
137
|
+
define stem as (
|
138
|
+
do mark_regions
|
139
|
+
backwards (
|
140
|
+
repeat aditzak
|
141
|
+
repeat izenak
|
142
|
+
do adjetiboak
|
143
|
+
)
|
144
|
+
|
145
|
+
)
|
146
|
+
|
147
|
+
/*
|
148
|
+
Note 1: additions of 21 Jul 2010
|
149
|
+
*/
|
@@ -0,0 +1,202 @@
|
|
1
|
+
routines (
|
2
|
+
cleaning mark_regions
|
3
|
+
R1 R2
|
4
|
+
attached_pronoun
|
5
|
+
standard_suffix
|
6
|
+
verb_suffix
|
7
|
+
residual_suffix
|
8
|
+
)
|
9
|
+
|
10
|
+
externals ( stem )
|
11
|
+
|
12
|
+
integers ( p1 p2 )
|
13
|
+
|
14
|
+
groupings ( v )
|
15
|
+
|
16
|
+
stringescapes {}
|
17
|
+
|
18
|
+
/* special characters */
|
19
|
+
|
20
|
+
stringdef a' '{U+00E1}' // a-acute
|
21
|
+
stringdef a` '{U+00E0}' // a-grave
|
22
|
+
stringdef c, '{U+00E7}' // c-cedilla
|
23
|
+
stringdef e' '{U+00E9}' // e-acute
|
24
|
+
stringdef e` '{U+00E8}' // e-grave
|
25
|
+
stringdef i' '{U+00ED}' // i-acute
|
26
|
+
stringdef i` '{U+00EC}' // i-grave
|
27
|
+
stringdef i" '{U+00EF}' // i-diaeresis
|
28
|
+
stringdef o' '{U+00F3}' // o-acute
|
29
|
+
stringdef o` '{U+00F2}' // o-grave
|
30
|
+
stringdef u' '{U+00FA}' // u-acute
|
31
|
+
stringdef u" '{U+00FC}' // u-diaeresis
|
32
|
+
stringdef . '{U+00B7}' // - per l aggeminades
|
33
|
+
|
34
|
+
define v 'aeiou{a'}{a`}{e'}{e`}{i'}{i"}{o'}{o`}{u'}{u"}'
|
35
|
+
|
36
|
+
define mark_regions as (
|
37
|
+
|
38
|
+
$p1 = limit
|
39
|
+
$p2 = limit // defaults
|
40
|
+
|
41
|
+
do (
|
42
|
+
gopast v gopast non-v setmark p1
|
43
|
+
gopast v gopast non-v setmark p2
|
44
|
+
)
|
45
|
+
)
|
46
|
+
|
47
|
+
define cleaning as repeat (
|
48
|
+
[substring] among(
|
49
|
+
'{a'}' (<- 'a')
|
50
|
+
'{a`}' (<- 'a')
|
51
|
+
'{e'}' (<- 'e')
|
52
|
+
'{e`}' (<- 'e')
|
53
|
+
'{i'}' (<- 'i')
|
54
|
+
'{i`}' (<- 'i')
|
55
|
+
'{o'}' (<- 'o')
|
56
|
+
'{o`}' (<- 'o')
|
57
|
+
'{u'}' (<- 'u')
|
58
|
+
'{u"}' (<- 'u')
|
59
|
+
'{i"}' (<- 'i')
|
60
|
+
'{.}' (<- '.')
|
61
|
+
'' (next)
|
62
|
+
)
|
63
|
+
)
|
64
|
+
|
65
|
+
backwardmode (
|
66
|
+
|
67
|
+
define R1 as $p1 <= cursor
|
68
|
+
define R2 as $p2 <= cursor
|
69
|
+
|
70
|
+
define attached_pronoun as (
|
71
|
+
[substring] among (
|
72
|
+
'{'}s' '{'}hi' '{'}ho' '{'}l' '{'}ls'
|
73
|
+
'-ls' '-la' '-les' '-li'
|
74
|
+
'vos' 'se' 'nos' '-nos' '-us' 'us'
|
75
|
+
'{'}n' '{'}ns' '-n' '-ns'
|
76
|
+
'{'}m' '-me' '-m'
|
77
|
+
'-te' '{'}t'
|
78
|
+
'li' 'lo' 'los'
|
79
|
+
'me' 'sela' 'selo' 'selas' 'selos' 'le'
|
80
|
+
'la' 'las' 'les' 'ens' 'ho' 'hi'
|
81
|
+
(R1 delete)
|
82
|
+
)
|
83
|
+
)
|
84
|
+
|
85
|
+
define standard_suffix as (
|
86
|
+
[substring] among(
|
87
|
+
'ar' 'atge' 'formes' 'icte' 'ictes'
|
88
|
+
'ell' 'ells' 'ella' '{e'}s' '{e`}s' 'esc' 'essa' 'et' 'ets' 'eta'
|
89
|
+
'eres' 'eries' 'ers' 'ina' 'ines' 'able' 'ls'
|
90
|
+
'i{o'}' 'itat' 'itats' 'itzar' 'iva' 'ives' 'ivisme' 'ius'
|
91
|
+
'fer' 'ment' 'amen' 'ament' 'aments' 'ments' 'ot' 'sfera' 'al' 'als' 'era' 'ana' 'iste'
|
92
|
+
'aire' 'eria' 'esa' 'eses' 'esos' 'or' '{i'}cia' '{i'}cies' 'icis' 'ici' '{i'}ci' '{i'}cis'
|
93
|
+
'{a`}ria' '{a`}ries' 'alla' 'ci{o'}' 'cions' 'n{c,}a' 'nces' '{o'}' 'dor' 'all'
|
94
|
+
'il' '{i'}stic' 'enc' 'enca' '{i'}s' 'issa' 'issos' '{i'}ssem' '{i'}ssiu' 'issem' 'isseu' '{i'}sseu'
|
95
|
+
'{o'}s' 'osa' 'dora' 'dores' 'dors' 'adura' 'ble' 'bles' '{i'}vol' '{i'}vola' 'd{i'}s' 'egar' 'ejar' 'ificar'
|
96
|
+
'itar' 'ables' 'adors' 'idores' 'idors'
|
97
|
+
'adora' 'aci{o'}' 'doras' 'dur' 'dures' 'alleng{u"}es'
|
98
|
+
'ant' 'ants' 'ancia' 'ancies' 'at{o`}ria' 'at{o`}ries' 'tori' 'toris'
|
99
|
+
'ats' 'ions' 'ota' 'isam' 'ors' 'ora' 'ores' 'isament'
|
100
|
+
'bilitat' 'bilitats' 'ivitat' 'ivitats' 'ari' 'aris' 'ionisme' 'ionista' 'ionistes'
|
101
|
+
'ialista' 'ialistes' 'ialisme' 'ialismes' 'ud' 'uts' 'uds' 'encia' 'encies' '{e`}ncia' '{e`}ncies'
|
102
|
+
'{i"}tat' '{i"}tats' 'atiu' 'atius' 'atives' 'ativa' 'ativitat' 'ativitats' 'ible' 'ibles'
|
103
|
+
'assa' 'asses' 'assos'
|
104
|
+
'ent' 'ents'
|
105
|
+
'{i'}ssim' '{i'}ssima' '{i'}ssims' '{i'}ssimes' '{i`}ssem' '{i`}sseu' '{i`}ssin'
|
106
|
+
'ims' 'ima' 'imes'
|
107
|
+
'isme' 'ista' 'ismes' 'istes'
|
108
|
+
'inia' 'inies' '{i'}inia' '{i'}nies' 'ita' 'ites' 'triu' 'trius'
|
109
|
+
'oses' 'osos' 'ient' 'otes' 'ots'
|
110
|
+
(R1 delete)
|
111
|
+
'acions' 'ada' 'ades'
|
112
|
+
(R2 delete)
|
113
|
+
'log{i'}a' 'log{i'}es''logia' 'logies' 'logi' 'logis' 'l{o'}gica' 'l{o'}gics' 'l{o'}giques'
|
114
|
+
(R2 <- 'log')
|
115
|
+
'ic' 'ica' 'ics' 'iques'
|
116
|
+
(R2 <- 'ic')
|
117
|
+
'qu{i'}ssim' 'qu{i'}ssims' 'qu{i'}ssimes' 'qu{i'}ssima'
|
118
|
+
(R1 <- 'c')
|
119
|
+
)
|
120
|
+
)
|
121
|
+
|
122
|
+
define verb_suffix as (
|
123
|
+
[substring] among(
|
124
|
+
'ador' 'adora' 'adors' 'adores' 're' 'ie'
|
125
|
+
'ent' 'ents' 'udes' 'ar{a`}' 'eren'
|
126
|
+
'ar{a'}' 'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
|
127
|
+
'aria' 'arian' 'arien' 'aries' 'ar{a`}s'
|
128
|
+
'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ara'
|
129
|
+
'ar{e'}' 'ar{e'}s'
|
130
|
+
'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
|
131
|
+
'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
|
132
|
+
'er{e'}' 'er' 'erau' 'erass'
|
133
|
+
'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
|
134
|
+
'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
|
135
|
+
'ir{e'}' '{i'}rem' '{i'}reu' '{i'}eu'
|
136
|
+
'ia' 'ies' '{i'}em' '{i`}eu' 'ien'
|
137
|
+
'at' 'ut' 'uda' 'ava' 'aves' 'avem' '{a'}vem' '{a`}vem' '{a`}veu' '{a'}veu' 'aven' 'au' 'ats'
|
138
|
+
'asseu' 'esseu' 'eresseu' '{a`}sseu' '{a`}ssem' '{a`}ssim' '{a`}ssiu'
|
139
|
+
'essen' 'esses' 'assen' 'asses' 'assim' 'assiu'
|
140
|
+
'{e'}ssen' '{e'}sseu' '{e'}ssim' '{e'}ssiu' '{e'}ssem'
|
141
|
+
'{i'}' 'ares' '{a`}rem' '{a`}reu' '{a`}ren'
|
142
|
+
'ar{i'}em' 'ar{i'}eu'
|
143
|
+
'areu' 'aren' 'ant' '{i"}m' '{i"}u'
|
144
|
+
'{e'}s' '{i"}en' 'en' 'es' 'em' 'am' 'ams' '{i"}a' '{i"}es'
|
145
|
+
'dre' 'eix' 'eixer' 'tzar' 'eixes' 'ides' '{i"}des' 'it' '{i"}t' '{i"}da'
|
146
|
+
'aba' 'ada' 'ades' 'ida' '{i'}a' 'iera' 'ad' 'ed' 'its'
|
147
|
+
'id' 'ids' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
|
148
|
+
'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
|
149
|
+
'ido' 'iendo' 'i{o'}' 'ar' 'ir' 'as'
|
150
|
+
'ieu' 'ii' 'io' 'i{a`}'
|
151
|
+
'ess' 'essin' 'essis' 'ass' 'assin' 'assis' 'essim' '{e`}ssim' '{e`}ssiu'
|
152
|
+
'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
|
153
|
+
'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
|
154
|
+
'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
|
155
|
+
'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos' 'ques'
|
156
|
+
'{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
|
157
|
+
'ira' 'iran' 'irem' 'iren' 'ires' 'ireu' 'iria' 'irien'
|
158
|
+
'iries' 'ir{a`}' 'ir{a`}s' 'ir{e`}' 'ir{i`}em' 'ir{i`}eu'
|
159
|
+
'isquen' 'iguem' 'igueu' 'esqui' 'esquin' 'esquis' 'eixi' 'eixin' 'eixis'
|
160
|
+
'eixen' 'eixo' 'isin' 'isis' 'esques' 'sis' 'sin'
|
161
|
+
'int' 'ir{i'}em' 'ir{i'}eu' 'isc' 'atges' 'esca' 'esquen'
|
162
|
+
'issen' 'isses' 'issin' 'issis' 'isca' 'issiu' 'issim'
|
163
|
+
'{i"}sc' '{i"}sca' '{i"}ssin' '{i'}ssiu' '{i'}ssim' '{i"}ssis' '{i"}guem' '{i"}gueu'
|
164
|
+
'{i"}ra' '{i"}ren' '{i"}res'
|
165
|
+
'{i"}squen' '{i"}sques' '{i"}ssen' '{i"}sses' '{i"}xo' '{i"}xen' '{i"}xes' '{i"}x'
|
166
|
+
'ixo' 'ixen' 'ixes' 'ix' 'ixa' 'inin' 'inis' 'ini' 'ineu' 'itza' 'itzi' 'itzeu' 'itzis'
|
167
|
+
'itzo' 'itz' 'itz{a`}' 'arem' 'in' '{a`}s' 'i{i"}' 'i{i"}n' 'i{i"}s'
|
168
|
+
(R1 delete)
|
169
|
+
'ando'
|
170
|
+
(R2 delete)
|
171
|
+
)
|
172
|
+
)
|
173
|
+
|
174
|
+
define residual_suffix as (
|
175
|
+
[substring] among(
|
176
|
+
'os' 'a' 'o' '{a'}' '{a`}' '{i'}' '{o'}' 'e' '{e'}' 'eu' 'iu'
|
177
|
+
'is' 'i' 'ir' 's' '{i`}' 'itz' '{i"}' '{i"}n' '{i"}s' 'it'
|
178
|
+
(R1 delete)
|
179
|
+
'iqu'
|
180
|
+
(R1 <- 'ic')
|
181
|
+
)
|
182
|
+
)
|
183
|
+
)
|
184
|
+
|
185
|
+
define stem as (
|
186
|
+
do mark_regions
|
187
|
+
backwards (
|
188
|
+
do attached_pronoun
|
189
|
+
do ( standard_suffix or
|
190
|
+
verb_suffix
|
191
|
+
)
|
192
|
+
do residual_suffix
|
193
|
+
)
|
194
|
+
do cleaning
|
195
|
+
)
|
196
|
+
|
197
|
+
/*
|
198
|
+
First works 2010/07/19
|
199
|
+
First Grammatical Reviews: https://ca.wikipedia.org/wiki/Gram%C3%A0tica_del_catal%C3%A0
|
200
|
+
Suffix list: https://ca.wikipedia.org/wiki/Llista_de_sufixos
|
201
|
+
Irregular Verbs: https://ca.wikipedia.org/wiki/Flexi%C3%B3_verbal_del_catal%C3%A0
|
202
|
+
*/
|