mittens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +3 -3
  4. data/lib/mittens/version.rb +1 -1
  5. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  6. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  7. data/vendor/snowball/GNUmakefile +194 -136
  8. data/vendor/snowball/NEWS +798 -3
  9. data/vendor/snowball/README.rst +50 -1
  10. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  11. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  12. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  13. data/vendor/snowball/algorithms/basque.sbl +4 -19
  14. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  15. data/vendor/snowball/algorithms/danish.sbl +1 -1
  16. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  17. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  18. data/vendor/snowball/algorithms/english.sbl +52 -37
  19. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  20. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  21. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  22. data/vendor/snowball/algorithms/french.sbl +42 -16
  23. data/vendor/snowball/algorithms/german.sbl +35 -14
  24. data/vendor/snowball/algorithms/greek.sbl +76 -76
  25. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  26. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  27. data/vendor/snowball/algorithms/italian.sbl +11 -21
  28. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  29. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  30. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  31. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  32. data/vendor/snowball/algorithms/porter.sbl +2 -2
  33. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  34. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  35. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  36. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  37. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  38. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  39. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  40. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  41. data/vendor/snowball/compiler/analyser.c +445 -192
  42. data/vendor/snowball/compiler/driver.c +109 -101
  43. data/vendor/snowball/compiler/generator.c +853 -464
  44. data/vendor/snowball/compiler/generator_ada.c +404 -366
  45. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  46. data/vendor/snowball/compiler/generator_go.c +323 -254
  47. data/vendor/snowball/compiler/generator_java.c +326 -252
  48. data/vendor/snowball/compiler/generator_js.c +362 -252
  49. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  50. data/vendor/snowball/compiler/generator_python.c +257 -240
  51. data/vendor/snowball/compiler/generator_rust.c +423 -251
  52. data/vendor/snowball/compiler/header.h +117 -71
  53. data/vendor/snowball/compiler/space.c +137 -68
  54. data/vendor/snowball/compiler/syswords.h +2 -2
  55. data/vendor/snowball/compiler/tokeniser.c +125 -107
  56. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  57. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  58. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  59. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  60. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  61. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  62. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  63. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  64. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  65. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  66. data/vendor/snowball/examples/stemwords.c +12 -12
  67. data/vendor/snowball/go/env.go +107 -31
  68. data/vendor/snowball/go/util.go +0 -4
  69. data/vendor/snowball/include/libstemmer.h +4 -0
  70. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  71. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  72. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  73. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  74. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  75. data/vendor/snowball/javascript/stemwords.js +3 -6
  76. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  77. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  78. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  79. data/vendor/snowball/libstemmer/modules.txt +13 -10
  80. data/vendor/snowball/libstemmer/test.c +1 -1
  81. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  82. data/vendor/snowball/pascal/generate.pl +13 -13
  83. data/vendor/snowball/python/create_init.py +4 -1
  84. data/vendor/snowball/python/setup.cfg +0 -3
  85. data/vendor/snowball/python/setup.py +8 -3
  86. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  87. data/vendor/snowball/python/stemwords.py +8 -12
  88. data/vendor/snowball/runtime/api.c +10 -5
  89. data/vendor/snowball/runtime/header.h +10 -9
  90. data/vendor/snowball/runtime/utilities.c +9 -9
  91. data/vendor/snowball/rust/build.rs +1 -1
  92. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  93. data/vendor/snowball/tests/stemtest.c +7 -4
  94. metadata +7 -7
  95. data/vendor/snowball/.travis.yml +0 -112
  96. data/vendor/snowball/algorithms/german2.sbl +0 -145
  97. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  98. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -3,8 +3,6 @@
3
3
  *
4
4
  * Author: Assaf Urieli
5
5
  * Emails: assaf.urieli at gmail.com
6
- * Version: 0.1 (15.05.2020)
7
- *
8
6
  ********************************************* */
9
7
 
10
8
  routines (
@@ -103,9 +101,9 @@ define mark_regions as (
103
101
  (
104
102
  try (
105
103
  // Replace past participle ge- at start of word
106
- // Unless word starts with gelt- or gebn-
104
+ // Unless word starts with gelt- or gebn- or the whole word is ge
107
105
  ['{Giml}{Ayen}']
108
- not ('{Lamed}{Tes}' or '{Beys}{Nun}') <- 'GE'
106
+ not ('{Lamed}{Tes}' or '{Beys}{Nun}' or atlimit) <- 'GE'
109
107
  )
110
108
 
111
109
  try (
@@ -149,7 +147,7 @@ define mark_regions as (
149
147
  // Either 3 consonants or the first non-vowel after a vowel
150
148
  (
151
149
  not (consonant consonant consonant setmark p1)
152
- goto vowel repeat vowel setmark p1
150
+ gopast vowel goto non-vowel setmark p1
153
151
  )
154
152
  try($p1 < x $p1 = x) // at least 3 past the prefix
155
153
  )
@@ -178,7 +176,7 @@ backwardmode (
178
176
 
179
177
  // Plural/adjective endings: -enem, -ener, -ene, -ens
180
178
  '{Ayen}{Nun}{Ayen}' '{Ayen}{Nun}{Ayen}{Mem}' '{Ayen}{Nun}{Ayen}{Reysh}' '{Ayen}{Nun}{Samekh}'
181
- (R1 delete
179
+ (R1 delete
182
180
  [substring] among (
183
181
  // -gegangen => -gey
184
182
  '{Giml}{Alef}{Nun}{Giml}' (<- '{Giml}{TsveyYudn}')
@@ -237,7 +235,7 @@ backwardmode (
237
235
 
238
236
  // Verb/past participle ending: -t
239
237
  '{Tes}'
240
- ( R1 delete )
238
+ ( R1 delete )
241
239
 
242
240
  // As well as noun/adjectives ending in -tn, -te, -ter, -ts so that the "-t" doesn't differentiate
243
241
  // Similarly for past participles: -tns, -tene, -tenem, -tener
@@ -271,95 +269,95 @@ backwardmode (
271
269
  (<- '{Shin}{Reysh}{TsveyYudn}{Beys}' )
272
270
 
273
271
  // -gemiten => -mayd
274
- 'GE{Mem}{Yud}{Tes}{Nun}'
272
+ 'GE{Mem}{Yud}{Tes}{Nun}'
275
273
  (<- '{Mem}{TsveyYudn}{Dalet}')
276
274
 
277
275
  // -gebiten => -bayt
278
- 'GE{Beys}{Yud}{Tes}{Nun}'
276
+ 'GE{Beys}{Yud}{Tes}{Nun}'
279
277
  (<- '{Beys}{TsveyYudn}{Tes}')
280
278
 
281
279
  // -gebisen => -bays
282
- 'GE{Beys}{Yud}{Samekh}{Nun}'
280
+ 'GE{Beys}{Yud}{Samekh}{Nun}'
283
281
  ( <- '{Beys}{TsveyYudn}{Samekh}')
284
282
 
285
283
  // -gevizen => -vayz
286
- '{TsveyVovn}{Yud}{Zayen}{Nun}'
284
+ '{TsveyVovn}{Yud}{Zayen}{Nun}'
287
285
  ( <- '{TsveyVovn}{TsveyYudn}{Zayen}')
288
286
 
289
287
  // -getriben => -trayb
290
- '{Tes}{Reysh}{Yud}{Beys}{Nun}'
288
+ '{Tes}{Reysh}{Yud}{Beys}{Nun}'
291
289
  ( <- '{Tes}{Reysh}{TsveyYudn}{Beys}')
292
290
 
293
291
  // -geliten => -layt
294
- 'GE{Lamed}{Yud}{Tes}{Nun}'
292
+ 'GE{Lamed}{Yud}{Tes}{Nun}'
295
293
  ( <- '{Lamed}{TsveyYudn}{Tes}')
296
294
 
297
295
  // -gekliben => -klayb
298
- '{Kuf}{Lamed}{Yud}{Beys}{Nun}'
296
+ '{Kuf}{Lamed}{Yud}{Beys}{Nun}'
299
297
  ( <- '{Kuf}{Lamed}{TsveyYudn}{Beys}')
300
298
 
301
299
  // -geriben => -rayb
302
- '{Reysh}{Yud}{Beys}{Nun}'
300
+ '{Reysh}{Yud}{Beys}{Nun}'
303
301
  ( <- '{Reysh}{TsveyYudn}{Beys}')
304
302
 
305
303
  // -gerisen => -rays
306
- 'GE{Reysh}{Yud}{Samekh}{Nun}'
304
+ 'GE{Reysh}{Yud}{Samekh}{Nun}'
307
305
  ( <- '{Reysh}{TsveyYudn}{Samekh}')
308
306
 
309
307
  // -geshvigen => -shvayg
310
- '{Shin}{TsveyVovn}{Yud}{Giml}{Nun}'
308
+ '{Shin}{TsveyVovn}{Yud}{Giml}{Nun}'
311
309
  ( <- '{Shin}{TsveyVovn}{TsveyYudn}{Giml}')
312
310
 
313
311
  // -geshmisen => -shmays
314
- '{Shin}{Mem}{Yud}{Samekh}{Nun}'
312
+ '{Shin}{Mem}{Yud}{Samekh}{Nun}'
315
313
  ( <- '{Shin}{Mem}{TsveyYudn}{Samekh}')
316
314
 
317
315
  // -geshniten => -shnayd
318
- '{Shin}{Nun}{Yud}{Tes}{Nun}'
316
+ '{Shin}{Nun}{Yud}{Tes}{Nun}'
319
317
  ( <- '{Shin}{Nun}{TsveyYudn}{Dalet}')
320
318
 
321
319
  // -gebunden => -bind
322
- '{Beys}{Vov}{Nun}{Dalet}{Nun}'
320
+ '{Beys}{Vov}{Nun}{Dalet}{Nun}'
323
321
  ( <- '{Beys}{Yud}{Nun}{Dalet}')
324
322
 
325
323
  // -gevuntshn => -vintsh
326
- '{TsveyVovn}{Vov}{Tes}{Shin}{Nun}'
324
+ '{TsveyVovn}{Vov}{Tes}{Shin}{Nun}'
327
325
  ( <- '{TsveyVovn}{Yud}{Tes}{Shin}')
328
326
 
329
327
  // -gezungen => -zing
330
- '{Zayen}{Vov}{Nun}{Giml}{Nun}'
328
+ '{Zayen}{Vov}{Nun}{Giml}{Nun}'
331
329
  ( <- '{Zayen}{Yud}{Nun}{Giml}')
332
330
 
333
331
  // -getrunken => -trink
334
- '{Tes}{Reysh}{Vov}{Nun}{Kuf}{Nun}'
332
+ '{Tes}{Reysh}{Vov}{Nun}{Kuf}{Nun}'
335
333
  ( <- '{Tes}{Reysh}{Yud}{Nun}{Kuf}')
336
334
 
337
335
  // -getsvungen => -tsving
338
- '{Tsadek}{TsveyVovn}{Vov}{Nun}{Giml}{Nun}'
336
+ '{Tsadek}{TsveyVovn}{Vov}{Nun}{Giml}{Nun}'
339
337
  ( <- '{Tsadek}{TsveyVovn}{Yud}{Nun}{Giml}')
340
338
 
341
339
  // -geshlungen => -shling
342
- '{Shin}{Lamed}{Vov}{Nun}{Giml}{Nun}'
340
+ '{Shin}{Lamed}{Vov}{Nun}{Giml}{Nun}'
343
341
  ( <- '{Shin}{Lamed}{Yud}{Nun}{Giml}')
344
342
 
345
343
  // -geboygen => -beyg
346
- '{Beys}{VovYud}{Giml}{Nun}'
344
+ '{Beys}{VovYud}{Giml}{Nun}'
347
345
  ( <- '{Beys}{TsveyYudn}{Giml}')
348
346
 
349
347
  // -gehoyben => -heyb
350
- '{Hey}{VovYud}{Beys}{Nun}'
348
+ '{Hey}{VovYud}{Beys}{Nun}'
351
349
  ( <- '{Hey}{TsveyYudn}{Beys}')
352
350
 
353
351
  // -farloyren => -farlir
354
- '{Fey}{Alef}{Reysh}{Lamed}{VovYud}{Reysh}{Nun}'
352
+ '{Fey}{Alef}{Reysh}{Lamed}{VovYud}{Reysh}{Nun}'
355
353
  ( <- '{Fey}{Alef}{Reysh}{Lamed}{Yud}{Reysh}')
356
354
 
357
355
  // -shtanen => -shtey
358
- '{Shin}{Tes}{Alef}{Nun}{Ayen}{Nun}'
356
+ '{Shin}{Tes}{Alef}{Nun}{Ayen}{Nun}'
359
357
  ( <- '{Shin}{Tes}{TsveyYudn}')
360
358
 
361
359
  // -geshvoyrn => -shver
362
- '{Shin}{TsveyVovn}{VovYud}{Reysh}{Nun}'
360
+ '{Shin}{TsveyVovn}{VovYud}{Reysh}{Nun}'
363
361
  ( <- '{Shin}{TsveyVovn}{Ayen}{Reysh}')
364
362
 
365
363
  // -(ge)brakht (shortened to -brakht after prefixes) => -breng
@@ -379,7 +377,7 @@ backwardmode (
379
377
  // Plural ending: -im
380
378
  '{Yud}{Mem}'
381
379
  ( R1 delete )
382
-
380
+
383
381
  // Plural ending: -os (Hebraic), replace with -h
384
382
  '{Vov}{Sof}'
385
383
  ( R1 <- '{Hey}' )
@@ -387,7 +385,7 @@ backwardmode (
387
385
  // Diminutive endings: -elekh, -ele, -lekh, -eles, -elen
388
386
  '{Ayen}{Lamed}{Ayen}{Khof}' '{Ayen}{Lamed}{Ayen}' '{Lamed}{Ayen}{Khof}' '{Ayen}{Lamed}{Ayen}{Samekh}' '{Ayen}{Lamed}{Ayen}{Nun}'
389
387
  ( R1 delete )
390
-
388
+
391
389
  // Noun ending: -ist
392
390
  '{Yud}{Samekh}{Tes}'
393
391
  (
@@ -400,18 +398,18 @@ backwardmode (
400
398
  // Noun ending: -istn
401
399
  '{Yud}{Samekh}{Tes}{Nun}'
402
400
  ( R1 delete )
403
-
401
+
404
402
  // Verb ending: -stu
405
403
  '{Samekh}{Tes}{Vov}'
406
404
  ( R1 delete )
407
405
 
408
- // Superlative ending: -ster, -ste, -stn
409
- '{Samekh}{Tes}{Ayen}{Reysh}' '{Samekh}{Tes}{Ayen}' '{Samekh}{Tes}{Nun}'
406
+ // Superlative ending: -ster, -ste, -stn
407
+ '{Samekh}{Tes}{Ayen}{Reysh}' '{Samekh}{Tes}{Ayen}' '{Samekh}{Tes}{Nun}'
410
408
  ( R1 delete )
411
-
409
+
412
410
  // Ambiguous verb ending: -st
413
411
  '{Samekh}{Tes}'
414
- ( R1 delete )
412
+ ( R1 delete )
415
413
  )
416
414
  )
417
415
 
@@ -436,7 +434,7 @@ backwardmode (
436
434
  // Exceptions to above: -blik, -glik
437
435
  '{Beys}{Lamed}{Yud}{Kuf}' '{Giml}{Lamed}{Yud}{Kuf}'
438
436
  ( true )
439
-
437
+
440
438
  // Present participle endings: -ndik
441
439
  '{Nun}{Dalet}{Yud}{Kuf}'
442
440
  ( R1 delete )