mittens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +3 -3
  4. data/lib/mittens/version.rb +1 -1
  5. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  6. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  7. data/vendor/snowball/GNUmakefile +194 -136
  8. data/vendor/snowball/NEWS +798 -3
  9. data/vendor/snowball/README.rst +50 -1
  10. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  11. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  12. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  13. data/vendor/snowball/algorithms/basque.sbl +4 -19
  14. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  15. data/vendor/snowball/algorithms/danish.sbl +1 -1
  16. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  17. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  18. data/vendor/snowball/algorithms/english.sbl +52 -37
  19. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  20. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  21. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  22. data/vendor/snowball/algorithms/french.sbl +42 -16
  23. data/vendor/snowball/algorithms/german.sbl +35 -14
  24. data/vendor/snowball/algorithms/greek.sbl +76 -76
  25. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  26. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  27. data/vendor/snowball/algorithms/italian.sbl +11 -21
  28. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  29. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  30. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  31. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  32. data/vendor/snowball/algorithms/porter.sbl +2 -2
  33. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  34. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  35. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  36. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  37. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  38. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  39. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  40. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  41. data/vendor/snowball/compiler/analyser.c +445 -192
  42. data/vendor/snowball/compiler/driver.c +109 -101
  43. data/vendor/snowball/compiler/generator.c +853 -464
  44. data/vendor/snowball/compiler/generator_ada.c +404 -366
  45. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  46. data/vendor/snowball/compiler/generator_go.c +323 -254
  47. data/vendor/snowball/compiler/generator_java.c +326 -252
  48. data/vendor/snowball/compiler/generator_js.c +362 -252
  49. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  50. data/vendor/snowball/compiler/generator_python.c +257 -240
  51. data/vendor/snowball/compiler/generator_rust.c +423 -251
  52. data/vendor/snowball/compiler/header.h +117 -71
  53. data/vendor/snowball/compiler/space.c +137 -68
  54. data/vendor/snowball/compiler/syswords.h +2 -2
  55. data/vendor/snowball/compiler/tokeniser.c +125 -107
  56. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  57. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  58. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  59. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  60. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  61. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  62. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  63. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  64. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  65. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  66. data/vendor/snowball/examples/stemwords.c +12 -12
  67. data/vendor/snowball/go/env.go +107 -31
  68. data/vendor/snowball/go/util.go +0 -4
  69. data/vendor/snowball/include/libstemmer.h +4 -0
  70. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  71. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  72. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  73. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  74. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  75. data/vendor/snowball/javascript/stemwords.js +3 -6
  76. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  77. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  78. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  79. data/vendor/snowball/libstemmer/modules.txt +13 -10
  80. data/vendor/snowball/libstemmer/test.c +1 -1
  81. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  82. data/vendor/snowball/pascal/generate.pl +13 -13
  83. data/vendor/snowball/python/create_init.py +4 -1
  84. data/vendor/snowball/python/setup.cfg +0 -3
  85. data/vendor/snowball/python/setup.py +8 -3
  86. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  87. data/vendor/snowball/python/stemwords.py +8 -12
  88. data/vendor/snowball/runtime/api.c +10 -5
  89. data/vendor/snowball/runtime/header.h +10 -9
  90. data/vendor/snowball/runtime/utilities.c +9 -9
  91. data/vendor/snowball/rust/build.rs +1 -1
  92. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  93. data/vendor/snowball/tests/stemtest.c +7 -4
  94. metadata +7 -7
  95. data/vendor/snowball/.travis.yml +0 -112
  96. data/vendor/snowball/algorithms/german2.sbl +0 -145
  97. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  98. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -1,5 +1,6 @@
1
1
 
2
2
  routines (
3
+ norm
3
4
  prelude postlude mark_regions
4
5
  RV R1 R2
5
6
  step_0
@@ -23,11 +24,23 @@ stringescapes {}
23
24
  stringdef a^ '{U+00E2}' // a circumflex
24
25
  stringdef i^ '{U+00EE}' // i circumflex
25
26
  stringdef a+ '{U+0103}' // a breve
26
- stringdef s, '{U+015F}' // s cedilla
27
- stringdef t, '{U+0163}' // t cedilla
27
+ stringdef sc '{U+015F}' // s cedilla
28
+ stringdef tc '{U+0163}' // t cedilla
29
+ stringdef s, '{U+0219}' // s comma
30
+ stringdef t, '{U+021B}' // t comma
28
31
 
29
32
  define v 'aeiou{a^}{i^}{a+}'
30
33
 
34
+ // Normalize old cedilla forms to correct comma-below forms.
35
+ define norm as (
36
+ do repeat goto (
37
+ [substring] among (
38
+ '{sc}' (<- '{s,}')
39
+ '{tc}' (<- '{t,}')
40
+ )
41
+ )
42
+ )
43
+
31
44
  define prelude as (
32
45
  repeat goto (
33
46
  v [ ('u' ] v <- 'U') or
@@ -190,7 +203,7 @@ backwardmode (
190
203
  'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
191
204
  '{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
192
205
 
193
- // pluferfect:
206
+ // pluperfect:
194
207
  'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
195
208
  'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
196
209
  '{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
@@ -223,6 +236,7 @@ backwardmode (
223
236
  )
224
237
 
225
238
  define stem as (
239
+ do norm
226
240
  do prelude
227
241
  do mark_regions
228
242
  backwards (
@@ -233,4 +247,3 @@ define stem as (
233
247
  )
234
248
  do postlude
235
249
  )
236
-