mittens 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,301 @@
1
+ stringescapes {}
2
+
3
+ stringdef a '{U+0561}' // 531
4
+ stringdef b '{U+0562}' // 532
5
+ stringdef g '{U+0563}' // 533
6
+ stringdef d '{U+0564}' // 534
7
+ stringdef ye '{U+0565}' // 535
8
+ stringdef z '{U+0566}' // 536
9
+ stringdef e '{U+0567}' // 537
10
+ stringdef y '{U+0568}' // 538
11
+ stringdef dt '{U+0569}' // 539
12
+ stringdef zh '{U+056A}' // 53A
13
+ stringdef i '{U+056B}' // 53B
14
+ stringdef l '{U+056C}' // 53C
15
+ stringdef kh '{U+056D}' // 53D
16
+ stringdef ts '{U+056E}' // 53E
17
+ stringdef k '{U+056F}' // 53F
18
+ stringdef h '{U+0570}' // 540
19
+ stringdef dz '{U+0571}' // 541
20
+ stringdef gh '{U+0572}' // 542
21
+ stringdef djch '{U+0573}' // 543
22
+ stringdef m '{U+0574}' // 544
23
+ stringdef j '{U+0575}' // 545
24
+ stringdef n '{U+0576}' // 546
25
+ stringdef sh '{U+0577}' // 547
26
+ stringdef vo '{U+0578}' // 548
27
+ stringdef ch '{U+0579}' // 549
28
+ stringdef p '{U+057A}' // 54A
29
+ stringdef dj '{U+057B}' // 54B
30
+ stringdef r '{U+057C}' // 54C
31
+ stringdef s '{U+057D}' // 54D
32
+ stringdef v '{U+057E}' // 54E
33
+ stringdef t '{U+057F}' // 54F
34
+ stringdef r' '{U+0580}' // 550
35
+ stringdef c '{U+0581}' // 551
36
+ stringdef u '{U+0582}' // 552 //vjun
37
+ stringdef bp '{U+0583}' // 553
38
+ stringdef q '{U+0584}' // 554
39
+ stringdef ev '{U+0587}'
40
+ stringdef o '{U+0585}' // 555
41
+ stringdef f '{U+0586}' // 556
42
+
43
+ routines ( mark_regions R2
44
+ adjective
45
+ verb
46
+ noun
47
+ ending
48
+ )
49
+
50
+ externals ( stem )
51
+
52
+ integers ( pV p2 )
53
+
54
+ groupings ( v )
55
+
56
+ define v '{a}{e}{i}{o}{u}{ye}{vo}{y}'
57
+
58
+ define mark_regions as (
59
+
60
+ $pV = limit
61
+ $p2 = limit
62
+ do (
63
+ gopast v setmark pV gopast non-v
64
+ gopast v gopast non-v setmark p2
65
+ )
66
+ )
67
+
68
+ backwardmode (
69
+
70
+ define R2 as $p2 <= cursor
71
+
72
+ define adjective as (
73
+ [substring] among (
74
+ '{b}{a}{r'}'
75
+ '{p}{ye}{s}'
76
+ '{vo}{r'}{e}{n}'
77
+ '{vo}{v}{i}{n}'
78
+ '{a}{k}{i}'
79
+ '{l}{a}{j}{n}'
80
+ '{r'}{vo}{r'}{d}'
81
+ '{ye}{r'}{vo}{r'}{d}'
82
+ '{a}{k}{a}{n}'
83
+ '{a}{l}{i}'
84
+ '{k}{vo}{t}'
85
+ '{ye}{k}{ye}{n}'
86
+ '{vo}{r'}{a}{k}'
87
+ '{ye}{gh}'
88
+ '{v}{vo}{u}{n}'
89
+ '{ye}{r'}{ye}{n}'
90
+ '{a}{r'}{a}{n}'
91
+ '{ye}{n}'
92
+ '{a}{v}{ye}{t}'
93
+ '{g}{i}{n}'
94
+ '{i}{v}'
95
+ '{a}{t}'
96
+ '{i}{n}'
97
+
98
+ (delete)
99
+ )
100
+ )
101
+
102
+ define verb as (
103
+ [substring] among (
104
+ '{vo}{u}{m}'
105
+ '{v}{vo}{u}{m}'
106
+ '{a}{l}{vo}{u}'
107
+ '{ye}{l}{vo}{u}'
108
+ '{v}{ye}{l}'
109
+ '{a}{n}{a}{l}'
110
+ '{ye}{l}{vo}{u}{c}'
111
+ '{a}{l}{vo}{u}{c}'
112
+ '{y}{a}{l}'
113
+ '{y}{ye}{l}'
114
+ '{a}{l}{vo}{v}'
115
+ '{ye}{l}{vo}{v}'
116
+ '{a}{l}{i}{s}'
117
+ '{ye}{l}{i}{s}'
118
+ '{ye}{n}{a}{l}'
119
+ '{a}{c}{n}{a}{l}'
120
+ '{ye}{c}{n}{ye}{l}'
121
+ '{c}{n}{ye}{l}'
122
+ '{n}{ye}{l}'
123
+ '{a}{t}{ye}{l}'
124
+ '{vo}{t}{ye}{l}'
125
+ '{k}{vo}{t}{ye}{l}'
126
+ '{t}{ye}{l}'
127
+ '{v}{a}{ts}'
128
+ '{ye}{c}{v}{ye}{l}'
129
+ '{a}{c}{v}{ye}{l}'
130
+ '{ye}{c}{i}{r'}'
131
+ '{a}{c}{i}{r'}'
132
+ '{ye}{c}{i}{n}{q}'
133
+ '{a}{c}{i}{n}{q}'
134
+ '{v}{ye}{c}{i}{r'}'
135
+ '{v}{ye}{c}{i}{n}{q}'
136
+ '{v}{ye}{c}{i}{q}'
137
+ '{v}{ye}{c}{i}{n}'
138
+ '{a}{c}{r'}{i}{r'}'
139
+ '{a}{c}{r'}{ye}{c}'
140
+ '{a}{c}{r'}{i}{n}{q}'
141
+ '{a}{c}{r'}{i}{q}'
142
+ '{a}{c}{r'}{i}{n}'
143
+ '{ye}{c}{i}{q}'
144
+ '{a}{c}{i}{q}'
145
+ '{ye}{c}{i}{n}'
146
+ '{a}{c}{i}{n}'
147
+ '{a}{c}{a}{r'}'
148
+ '{a}{c}{a}{v}'
149
+ '{a}{c}{a}{n}{q}'
150
+ '{a}{c}{a}{q}'
151
+ '{a}{c}{a}{n}'
152
+ '{v}{ye}{c}{i}'
153
+ '{a}{c}{r'}{i}'
154
+ '{ye}{c}{a}{r'}'
155
+ '{ye}{c}{a}{v}'
156
+ '{c}{a}{n}{q}'
157
+ '{c}{a}{q}'
158
+ '{c}{a}{n}'
159
+ '{a}{c}{a}'
160
+ '{a}{c}{i}'
161
+ '{ye}{c}{a}'
162
+ '{ch}{ye}{l}'
163
+ '{ye}{c}{i}'
164
+ '{a}{r'}'
165
+ '{a}{v}'
166
+ '{a}{n}{q}'
167
+ '{a}{q}'
168
+ '{a}{n}'
169
+ '{a}{l}'
170
+ '{ye}{l}'
171
+ '{ye}{c}'
172
+ '{a}{c}'
173
+ '{v}{ye}'
174
+ '{a}'
175
+
176
+ (delete)
177
+ )
178
+ )
179
+
180
+ define noun as (
181
+ [substring] among (
182
+ '{a}{ts}{vo}'
183
+ '{a}{n}{a}{k}'
184
+ '{a}{n}{o}{c}'
185
+ '{a}{r'}{a}{n}'
186
+ '{a}{r'}{q}'
187
+ '{p}{a}{n}'
188
+ '{s}{t}{a}{n}'
189
+ '{ye}{gh}{e}{n}'
190
+ '{ye}{n}{q}'
191
+ '{i}{k}'
192
+ '{i}{ch}'
193
+ '{i}{q}'
194
+ '{m}{vo}{u}{n}{q}'
195
+ '{j}{a}{k}'
196
+ '{j}{vo}{u}{n}'
197
+ '{vo}{n}{q}'
198
+ '{vo}{r'}{d}'
199
+ '{vo}{c}'
200
+ '{ch}{ye}{q}'
201
+ '{v}{a}{ts}{q}'
202
+ '{v}{vo}{r'}'
203
+ '{a}{v}{vo}{r'}'
204
+ '{vo}{u}{dt}{j}{vo}{u}{n}'
205
+ '{vo}{u}{k}'
206
+ '{vo}{u}{h}{i}'
207
+ '{vo}{u}{j}{dt}'
208
+ '{vo}{u}{j}{q}'
209
+ '{vo}{u}{s}{t}'
210
+ '{vo}{u}{s}'
211
+ '{c}{i}'
212
+ '{a}{l}{i}{q}'
213
+ '{a}{n}{i}{q}'
214
+ '{i}{l}'
215
+ '{i}{ch}{q}'
216
+ '{vo}{u}{n}{q}'
217
+ '{g}{a}{r'}'
218
+ '{vo}{u}'
219
+ '{a}{k}'
220
+ '{a}{n}'
221
+ '{q}'
222
+
223
+ (delete)
224
+ )
225
+ )
226
+
227
+ define ending as (
228
+ [substring] R2 among (
229
+ '{n}{ye}{r'}{y}'
230
+ '{n}{ye}{r'}{n}'
231
+ '{n}{ye}{r'}{i}'
232
+ '{n}{ye}{r'}{d}'
233
+ '{ye}{r'}{i}{c}'
234
+ '{n}{ye}{r'}{i}{c}'
235
+ '{ye}{r'}{i}'
236
+ '{ye}{r'}{d}'
237
+ '{ye}{r'}{n}'
238
+ '{ye}{r'}{y}'
239
+ '{n}{ye}{r'}{i}{n}'
240
+ '{vo}{u}{dt}{j}{a}{n}{n}'
241
+ '{vo}{u}{dt}{j}{a}{n}{y}'
242
+ '{vo}{u}{dt}{j}{a}{n}{s}'
243
+ '{vo}{u}{dt}{j}{a}{n}{d}'
244
+ '{vo}{u}{dt}{j}{a}{n}'
245
+ '{ye}{r'}{i}{n}'
246
+ '{i}{n}'
247
+ '{s}{a}'
248
+ '{vo}{dj}'
249
+ '{i}{c}'
250
+ '{ye}{r'}{vo}{v}'
251
+ '{n}{ye}{r'}{vo}{v}'
252
+ '{ye}{r'}{vo}{u}{m}'
253
+ '{n}{ye}{r'}{vo}{u}{m}'
254
+ '{vo}{u}{n}'
255
+ '{vo}{u}{d}'
256
+ '{v}{a}{n}{s}'
257
+ '{v}{a}{n}{y}'
258
+ '{v}{a}{n}{d}'
259
+ '{a}{n}{y}'
260
+ '{a}{n}{d}'
261
+ '{v}{a}{n}'
262
+ '{vo}{dj}{y}'
263
+ '{vo}{dj}{s}'
264
+ '{vo}{dj}{d}'
265
+ '{vo}{c}'
266
+ '{vo}{u}{c}'
267
+ '{vo}{dj}{i}{c}'
268
+ '{c}{i}{c}'
269
+ '{v}{i}{c}'
270
+ '{v}{i}'
271
+ '{v}{vo}{v}'
272
+ '{vo}{v}'
273
+ '{a}{n}{vo}{v}'
274
+ '{a}{n}{vo}{u}{m}'
275
+ '{v}{a}{n}{i}{c}'
276
+ '{a}{m}{b}'
277
+ '{a}{n}'
278
+ '{n}{ye}{r'}'
279
+ '{ye}{r'}'
280
+ '{v}{a}'
281
+ '{y}'
282
+ '{n}'
283
+ '{d}'
284
+ '{c}'
285
+ '{i}'
286
+
287
+ (delete)
288
+ )
289
+ )
290
+ )
291
+
292
+ define stem as (
293
+
294
+ do mark_regions
295
+ backwards setlimit tomark pV for (
296
+ do ending
297
+ do verb
298
+ do adjective
299
+ do noun
300
+ )
301
+ )
@@ -0,0 +1,149 @@
1
+ routines (
2
+ aditzak
3
+ izenak
4
+ adjetiboak
5
+ mark_regions
6
+ RV R2 R1
7
+ )
8
+
9
+ externals ( stem )
10
+
11
+ integers ( pV p1 p2 )
12
+
13
+ groupings ( v )
14
+
15
+ stringescapes {}
16
+
17
+ /* special characters */
18
+
19
+ stringdef n~ '{U+00F1}'
20
+
21
+ define v 'aeiou'
22
+
23
+ define mark_regions as (
24
+
25
+ $pV = limit
26
+ $p1 = limit
27
+ $p2 = limit // defaults
28
+
29
+ do (
30
+ ( v (non-v gopast v) or (v gopast non-v) )
31
+ or
32
+ ( non-v (non-v gopast v) or (v next) )
33
+ setmark pV
34
+ )
35
+ do (
36
+ gopast v gopast non-v setmark p1
37
+ gopast v gopast non-v setmark p2
38
+ )
39
+ )
40
+
41
+ backwardmode (
42
+
43
+ define RV as $pV <= cursor
44
+ define R2 as $p2 <= cursor
45
+ define R1 as $p1 <= cursor
46
+
47
+ define aditzak as (
48
+ [substring] among(
49
+ 'le' 'la' 'tzaile' 'aldatu' 'atu' 'tzailea' 'taile' 'tailea' 'pera' 'gale' 'galea'
50
+ 'gura' 'kura' 'kor' 'korra' 'or' 'orra' 'tun' 'tuna' 'gaitz' 'gaitza'
51
+ 'kaitz' 'kaitza' 'ezin' 'ezina' 'tezin' 'tezina' 'errez' 'erreza'
52
+ 'karri' 'karria' 'tzaga' 'tzaka' 'tzake' 'tzeke' 'ez' 'eza' 'tzez'
53
+ 'keta' 'eta' 'etan' 'pen' 'pena' 'tze' 'atze' 'kuntza' 'kunde' 'kundea'
54
+ 'kune' 'kunea' 'kuna' 'kera' 'era' 'kizun' 'kizuna' 'dura' 'tura' 'men' 'mena'
55
+ 'go' 'ago' 'tio' 'taldi' 'taldia' 'aldi' 'aldia' 'gune' 'gunea' 'bide' 'bidea'
56
+ 'pide' 'pidea' 'gai' 'gaia' 'ki' 'kin' 'rekin' 'kina' 'kari' 'karia' 'ari' 'tari' 'etari'
57
+ 'gailu' 'gailua' 'kide' 'kidea' 'ide' 'idea' 'du' 'ka' 'kan' 'an' 'ean' 'tu' 'lari' 'tatu'
58
+ 'rean' 'tarazi' 'arazi' 'tzat' 'bera' 'dako'
59
+ ( RV delete )
60
+ 'garri' 'garria' 'tza'
61
+ (R2 delete)
62
+ 'atseden'
63
+ (<- 'atseden')
64
+ 'arabera'
65
+ (<- 'arabera')
66
+ 'baditu'
67
+ (<- 'baditu')
68
+
69
+ )
70
+ )
71
+
72
+ define izenak as (
73
+ [substring] among(
74
+ 'ari' 'aria' 'bizia' 'kari' 'karia' 'lari' 'laria' 'tari' 'taria' 'zain' 'zaina'
75
+ 'tzain' 'tzaina' 'zale' 'zalea' 'tzale' 'tzalea' 'aizun' 'orde' 'ordea'
76
+ 'burua' 'ohi' 'ohia' 'kintza' 'gintzo' 'gintzu' 'tzu' 'tzua'
77
+ 'tzo' 'tzoa' 'kuntza' 'talde' 'taldea' 'eria' 'keria' 'teria' 'di'
78
+ 'za' 'ada' 'tara' 'etara' 'tra' 'ta' 'tegi' 'tegia' 'keta' 'z' 'zko' 'zkoa'
79
+ 'ti' 'tia' 'tsu' 'tsua' 'zu' 'zua' 'bera' 'pera' 'zto' 'ztoa' 'asi' 'asia'
80
+ 'gile' 'gilea' 'estu' 'estua' 'larri' 'larria' 'nahi' 'nahia'
81
+ 'koi' 'koia' 'oi' 'oia' 'goi' 'min' 'mina' 'dun' 'duna' 'duru' 'durua'
82
+ 'duri' 'duria' 'os' 'osa' 'oso' 'osoa' 'ar' 'ara' 'tar' 'dar' 'dara'
83
+ 'tiar' 'tiara' 'liar' 'liara' 'gabe' 'gabea' 'kabe' 'kabea' 'ga' 'ge'
84
+ 'kada' 'tasun' 'tasuna' 'asun' 'asuna' 'go' 'mendu' 'mendua' 'mentu' 'mentua'
85
+ 'mendi' 'mendia' 'zio' 'zioa' 'zino' 'zinoa' 'zione' 'zionea' 'ezia'
86
+ 'degi' 'degia' 'egi' 'egia' 'toki' 'tokia' 'leku' 'lekua' 'gintza' 'alde'
87
+ 'aldea' 'kalde' 'kaldea' 'gune' 'gunea' 'une' 'unea' 'una' 'pe' 'pea'
88
+ 'gibel' 'gibela' 'ondo' 'ondoa' 'arte' 'artea' 'aurre' 'aurrea'
89
+ 'etxe' 'etxea' 'ola' 'ontzi' 'ontzia' 'gela' 'denda' 'taldi' 'taldia'
90
+ 'aldi' 'aldia' 'te' 'tea' 'zaro' 'zaroa' 'taro' 'taroa' 'oro' 'oroa'
91
+ 'aro' 'aroa' 'ero' 'eroa' 'eroz' 'eroza' 'ka' 'kan' 'kana' 'tako' 'etako' 'takoa'
92
+ 'kote' 'kotea' 'tzar' 'tzarra' 'handi' 'handia' 'kondo' 'kondoa' 'skila'
93
+ 'no' 'noa' '{n~}o' '{n~}oa' 'ska' 'xka' 'zka' 'tila' 'to' 'toa' 'tto' 'ttoa'
94
+ 'txo' 'txoa' 'txu' 'txua' 'anda' 'anga' 'urren' 'urrena' 'gai' 'gaia'
95
+ 'gei' 'geia' 'eme' 'emea' 'kume' 'kumea' 'sa' 'ko' 'eko' 'koa' 'ena'
96
+ 'enea' 'ne' 'nea' 'kor' 'korra' 'ez' 'eza' 'eta' 'etan'
97
+ 'ki' 'kia' 'kin' 'kina' 'tu' 'tua' 'du' 'dua' 'ek'
98
+ 'tarik' 'tariko' 'tan' 'ordu' 'ordua' 'oste' 'ostea' 'tzara'
99
+ 'ra' 'antza' 'behar' 'ro' 'giro' 'ak' 'zp' 'ket'
100
+ 'kail' 'kaila' 'ail' 'kirri' 'kirria' 'ngo' 'ngoa' '{n~}i' 'sko'
101
+ 'sta' 'koitz' 'koitza' 'na' 'garren' 'garrena' 'kera'
102
+ 'gerren' 'gerrena' 'garna' 'kide' 'tz' 'tuko'
103
+ ( RV delete )
104
+ 'ora' 'garri' 'garria' 'or' 'buru' 'ren' 'tza'
105
+ ( R2 delete )
106
+ 'joka'
107
+ (<- 'jok')
108
+ 'tzen' 'ten' 'en' 'tatu'
109
+ (R1 delete)
110
+ 'trako'
111
+ (<- 'tra')
112
+ 'minutuko'
113
+ (<- 'minutu')
114
+ 'zehar'
115
+ (<- 'zehar')
116
+ 'geldi'
117
+ (<- 'geldi')
118
+ 'igaro'
119
+ (<- 'igaro')
120
+ 'aurka'
121
+ (<- 'aurka')
122
+ )
123
+ )
124
+
125
+ define adjetiboak as (
126
+ [substring] among(
127
+ 'era' 'ero' 'go' 'tate' 'tade' 'date' 'dade' 'keria'
128
+ 'ki' 'to' 'ro' 'la' 'gi' 'larik' 'lanik' 'ik' 'ztik' 'rik'
129
+ ( RV delete )
130
+ 'zlea'
131
+ (<- 'z')
132
+ )
133
+ )
134
+
135
+ )
136
+
137
+ define stem as (
138
+ do mark_regions
139
+ backwards (
140
+ repeat aditzak
141
+ repeat izenak
142
+ do adjetiboak
143
+ )
144
+
145
+ )
146
+
147
+ /*
148
+ Note 1: additions of 21 Jul 2010
149
+ */
@@ -0,0 +1,202 @@
1
+ routines (
2
+ cleaning mark_regions
3
+ R1 R2
4
+ attached_pronoun
5
+ standard_suffix
6
+ verb_suffix
7
+ residual_suffix
8
+ )
9
+
10
+ externals ( stem )
11
+
12
+ integers ( p1 p2 )
13
+
14
+ groupings ( v )
15
+
16
+ stringescapes {}
17
+
18
+ /* special characters */
19
+
20
+ stringdef a' '{U+00E1}' // a-acute
21
+ stringdef a` '{U+00E0}' // a-grave
22
+ stringdef c, '{U+00E7}' // c-cedilla
23
+ stringdef e' '{U+00E9}' // e-acute
24
+ stringdef e` '{U+00E8}' // e-grave
25
+ stringdef i' '{U+00ED}' // i-acute
26
+ stringdef i` '{U+00EC}' // i-grave
27
+ stringdef i" '{U+00EF}' // i-diaeresis
28
+ stringdef o' '{U+00F3}' // o-acute
29
+ stringdef o` '{U+00F2}' // o-grave
30
+ stringdef u' '{U+00FA}' // u-acute
31
+ stringdef u" '{U+00FC}' // u-diaeresis
32
+ stringdef . '{U+00B7}' // - per l aggeminades
33
+
34
+ define v 'aeiou{a'}{a`}{e'}{e`}{i'}{i"}{o'}{o`}{u'}{u"}'
35
+
36
+ define mark_regions as (
37
+
38
+ $p1 = limit
39
+ $p2 = limit // defaults
40
+
41
+ do (
42
+ gopast v gopast non-v setmark p1
43
+ gopast v gopast non-v setmark p2
44
+ )
45
+ )
46
+
47
+ define cleaning as repeat (
48
+ [substring] among(
49
+ '{a'}' (<- 'a')
50
+ '{a`}' (<- 'a')
51
+ '{e'}' (<- 'e')
52
+ '{e`}' (<- 'e')
53
+ '{i'}' (<- 'i')
54
+ '{i`}' (<- 'i')
55
+ '{o'}' (<- 'o')
56
+ '{o`}' (<- 'o')
57
+ '{u'}' (<- 'u')
58
+ '{u"}' (<- 'u')
59
+ '{i"}' (<- 'i')
60
+ '{.}' (<- '.')
61
+ '' (next)
62
+ )
63
+ )
64
+
65
+ backwardmode (
66
+
67
+ define R1 as $p1 <= cursor
68
+ define R2 as $p2 <= cursor
69
+
70
+ define attached_pronoun as (
71
+ [substring] among (
72
+ '{'}s' '{'}hi' '{'}ho' '{'}l' '{'}ls'
73
+ '-ls' '-la' '-les' '-li'
74
+ 'vos' 'se' 'nos' '-nos' '-us' 'us'
75
+ '{'}n' '{'}ns' '-n' '-ns'
76
+ '{'}m' '-me' '-m'
77
+ '-te' '{'}t'
78
+ 'li' 'lo' 'los'
79
+ 'me' 'sela' 'selo' 'selas' 'selos' 'le'
80
+ 'la' 'las' 'les' 'ens' 'ho' 'hi'
81
+ (R1 delete)
82
+ )
83
+ )
84
+
85
+ define standard_suffix as (
86
+ [substring] among(
87
+ 'ar' 'atge' 'formes' 'icte' 'ictes'
88
+ 'ell' 'ells' 'ella' '{e'}s' '{e`}s' 'esc' 'essa' 'et' 'ets' 'eta'
89
+ 'eres' 'eries' 'ers' 'ina' 'ines' 'able' 'ls'
90
+ 'i{o'}' 'itat' 'itats' 'itzar' 'iva' 'ives' 'ivisme' 'ius'
91
+ 'fer' 'ment' 'amen' 'ament' 'aments' 'ments' 'ot' 'sfera' 'al' 'als' 'era' 'ana' 'iste'
92
+ 'aire' 'eria' 'esa' 'eses' 'esos' 'or' '{i'}cia' '{i'}cies' 'icis' 'ici' '{i'}ci' '{i'}cis'
93
+ '{a`}ria' '{a`}ries' 'alla' 'ci{o'}' 'cions' 'n{c,}a' 'nces' '{o'}' 'dor' 'all'
94
+ 'il' '{i'}stic' 'enc' 'enca' '{i'}s' 'issa' 'issos' '{i'}ssem' '{i'}ssiu' 'issem' 'isseu' '{i'}sseu'
95
+ '{o'}s' 'osa' 'dora' 'dores' 'dors' 'adura' 'ble' 'bles' '{i'}vol' '{i'}vola' 'd{i'}s' 'egar' 'ejar' 'ificar'
96
+ 'itar' 'ables' 'adors' 'idores' 'idors'
97
+ 'adora' 'aci{o'}' 'doras' 'dur' 'dures' 'alleng{u"}es'
98
+ 'ant' 'ants' 'ancia' 'ancies' 'at{o`}ria' 'at{o`}ries' 'tori' 'toris'
99
+ 'ats' 'ions' 'ota' 'isam' 'ors' 'ora' 'ores' 'isament'
100
+ 'bilitat' 'bilitats' 'ivitat' 'ivitats' 'ari' 'aris' 'ionisme' 'ionista' 'ionistes'
101
+ 'ialista' 'ialistes' 'ialisme' 'ialismes' 'ud' 'uts' 'uds' 'encia' 'encies' '{e`}ncia' '{e`}ncies'
102
+ '{i"}tat' '{i"}tats' 'atiu' 'atius' 'atives' 'ativa' 'ativitat' 'ativitats' 'ible' 'ibles'
103
+ 'assa' 'asses' 'assos'
104
+ 'ent' 'ents'
105
+ '{i'}ssim' '{i'}ssima' '{i'}ssims' '{i'}ssimes' '{i`}ssem' '{i`}sseu' '{i`}ssin'
106
+ 'ims' 'ima' 'imes'
107
+ 'isme' 'ista' 'ismes' 'istes'
108
+ 'inia' 'inies' '{i'}inia' '{i'}nies' 'ita' 'ites' 'triu' 'trius'
109
+ 'oses' 'osos' 'ient' 'otes' 'ots'
110
+ (R1 delete)
111
+ 'acions' 'ada' 'ades'
112
+ (R2 delete)
113
+ 'log{i'}a' 'log{i'}es''logia' 'logies' 'logi' 'logis' 'l{o'}gica' 'l{o'}gics' 'l{o'}giques'
114
+ (R2 <- 'log')
115
+ 'ic' 'ica' 'ics' 'iques'
116
+ (R2 <- 'ic')
117
+ 'qu{i'}ssim' 'qu{i'}ssims' 'qu{i'}ssimes' 'qu{i'}ssima'
118
+ (R1 <- 'c')
119
+ )
120
+ )
121
+
122
+ define verb_suffix as (
123
+ [substring] among(
124
+ 'ador' 'adora' 'adors' 'adores' 're' 'ie'
125
+ 'ent' 'ents' 'udes' 'ar{a`}' 'eren'
126
+ 'ar{a'}' 'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
127
+ 'aria' 'arian' 'arien' 'aries' 'ar{a`}s'
128
+ 'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ara'
129
+ 'ar{e'}' 'ar{e'}s'
130
+ 'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
131
+ 'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
132
+ 'er{e'}' 'er' 'erau' 'erass'
133
+ 'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
134
+ 'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
135
+ 'ir{e'}' '{i'}rem' '{i'}reu' '{i'}eu'
136
+ 'ia' 'ies' '{i'}em' '{i`}eu' 'ien'
137
+ 'at' 'ut' 'uda' 'ava' 'aves' 'avem' '{a'}vem' '{a`}vem' '{a`}veu' '{a'}veu' 'aven' 'au' 'ats'
138
+ 'asseu' 'esseu' 'eresseu' '{a`}sseu' '{a`}ssem' '{a`}ssim' '{a`}ssiu'
139
+ 'essen' 'esses' 'assen' 'asses' 'assim' 'assiu'
140
+ '{e'}ssen' '{e'}sseu' '{e'}ssim' '{e'}ssiu' '{e'}ssem'
141
+ '{i'}' 'ares' '{a`}rem' '{a`}reu' '{a`}ren'
142
+ 'ar{i'}em' 'ar{i'}eu'
143
+ 'areu' 'aren' 'ant' '{i"}m' '{i"}u'
144
+ '{e'}s' '{i"}en' 'en' 'es' 'em' 'am' 'ams' '{i"}a' '{i"}es'
145
+ 'dre' 'eix' 'eixer' 'tzar' 'eixes' 'ides' '{i"}des' 'it' '{i"}t' '{i"}da'
146
+ 'aba' 'ada' 'ades' 'ida' '{i'}a' 'iera' 'ad' 'ed' 'its'
147
+ 'id' 'ids' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
148
+ 'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
149
+ 'ido' 'iendo' 'i{o'}' 'ar' 'ir' 'as'
150
+ 'ieu' 'ii' 'io' 'i{a`}'
151
+ 'ess' 'essin' 'essis' 'ass' 'assin' 'assis' 'essim' '{e`}ssim' '{e`}ssiu'
152
+ 'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
153
+ 'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
154
+ 'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
155
+ 'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos' 'ques'
156
+ '{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
157
+ 'ira' 'iran' 'irem' 'iren' 'ires' 'ireu' 'iria' 'irien'
158
+ 'iries' 'ir{a`}' 'ir{a`}s' 'ir{e`}' 'ir{i`}em' 'ir{i`}eu'
159
+ 'isquen' 'iguem' 'igueu' 'esqui' 'esquin' 'esquis' 'eixi' 'eixin' 'eixis'
160
+ 'eixen' 'eixo' 'isin' 'isis' 'esques' 'sis' 'sin'
161
+ 'int' 'ir{i'}em' 'ir{i'}eu' 'isc' 'atges' 'esca' 'esquen'
162
+ 'issen' 'isses' 'issin' 'issis' 'isca' 'issiu' 'issim'
163
+ '{i"}sc' '{i"}sca' '{i"}ssin' '{i'}ssiu' '{i'}ssim' '{i"}ssis' '{i"}guem' '{i"}gueu'
164
+ '{i"}ra' '{i"}ren' '{i"}res'
165
+ '{i"}squen' '{i"}sques' '{i"}ssen' '{i"}sses' '{i"}xo' '{i"}xen' '{i"}xes' '{i"}x'
166
+ 'ixo' 'ixen' 'ixes' 'ix' 'ixa' 'inin' 'inis' 'ini' 'ineu' 'itza' 'itzi' 'itzeu' 'itzis'
167
+ 'itzo' 'itz' 'itz{a`}' 'arem' 'in' '{a`}s' 'i{i"}' 'i{i"}n' 'i{i"}s'
168
+ (R1 delete)
169
+ 'ando'
170
+ (R2 delete)
171
+ )
172
+ )
173
+
174
+ define residual_suffix as (
175
+ [substring] among(
176
+ 'os' 'a' 'o' '{a'}' '{a`}' '{i'}' '{o'}' 'e' '{e'}' 'eu' 'iu'
177
+ 'is' 'i' 'ir' 's' '{i`}' 'itz' '{i"}' '{i"}n' '{i"}s' 'it'
178
+ (R1 delete)
179
+ 'iqu'
180
+ (R1 <- 'ic')
181
+ )
182
+ )
183
+ )
184
+
185
+ define stem as (
186
+ do mark_regions
187
+ backwards (
188
+ do attached_pronoun
189
+ do ( standard_suffix or
190
+ verb_suffix
191
+ )
192
+ do residual_suffix
193
+ )
194
+ do cleaning
195
+ )
196
+
197
+ /*
198
+ First works 2010/07/19
199
+ First Grammatical Reviews: https://ca.wikipedia.org/wiki/Gram%C3%A0tica_del_catal%C3%A0
200
+ Suffix list: https://ca.wikipedia.org/wiki/Llista_de_sufixos
201
+ Irregular Verbs: https://ca.wikipedia.org/wiki/Flexi%C3%B3_verbal_del_catal%C3%A0
202
+ */