mittens 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +4 -4
  5. data/lib/mittens/version.rb +1 -1
  6. data/mittens.gemspec +1 -1
  7. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  8. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  9. data/vendor/snowball/GNUmakefile +194 -136
  10. data/vendor/snowball/NEWS +798 -3
  11. data/vendor/snowball/README.rst +50 -1
  12. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  13. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  14. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  15. data/vendor/snowball/algorithms/basque.sbl +4 -19
  16. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  17. data/vendor/snowball/algorithms/danish.sbl +1 -1
  18. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  19. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  20. data/vendor/snowball/algorithms/english.sbl +52 -37
  21. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  22. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  23. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  24. data/vendor/snowball/algorithms/french.sbl +42 -16
  25. data/vendor/snowball/algorithms/german.sbl +35 -14
  26. data/vendor/snowball/algorithms/greek.sbl +76 -76
  27. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  28. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  29. data/vendor/snowball/algorithms/italian.sbl +11 -21
  30. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  31. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  32. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  33. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  34. data/vendor/snowball/algorithms/porter.sbl +2 -2
  35. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  36. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  37. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  38. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  39. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  40. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  41. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  42. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  43. data/vendor/snowball/compiler/analyser.c +445 -192
  44. data/vendor/snowball/compiler/driver.c +109 -101
  45. data/vendor/snowball/compiler/generator.c +853 -464
  46. data/vendor/snowball/compiler/generator_ada.c +404 -366
  47. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  48. data/vendor/snowball/compiler/generator_go.c +323 -254
  49. data/vendor/snowball/compiler/generator_java.c +326 -252
  50. data/vendor/snowball/compiler/generator_js.c +362 -252
  51. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  52. data/vendor/snowball/compiler/generator_python.c +257 -240
  53. data/vendor/snowball/compiler/generator_rust.c +423 -251
  54. data/vendor/snowball/compiler/header.h +117 -71
  55. data/vendor/snowball/compiler/space.c +137 -68
  56. data/vendor/snowball/compiler/syswords.h +2 -2
  57. data/vendor/snowball/compiler/tokeniser.c +125 -107
  58. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  59. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  60. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  61. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  62. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  63. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  64. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  65. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  66. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  67. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  68. data/vendor/snowball/examples/stemwords.c +12 -12
  69. data/vendor/snowball/go/env.go +107 -31
  70. data/vendor/snowball/go/util.go +0 -4
  71. data/vendor/snowball/include/libstemmer.h +4 -0
  72. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  73. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  74. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  75. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  76. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  77. data/vendor/snowball/javascript/stemwords.js +3 -6
  78. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  79. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  80. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  81. data/vendor/snowball/libstemmer/modules.txt +13 -10
  82. data/vendor/snowball/libstemmer/test.c +1 -1
  83. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  84. data/vendor/snowball/pascal/generate.pl +13 -13
  85. data/vendor/snowball/python/create_init.py +4 -1
  86. data/vendor/snowball/python/setup.cfg +0 -3
  87. data/vendor/snowball/python/setup.py +8 -3
  88. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  89. data/vendor/snowball/python/stemwords.py +8 -12
  90. data/vendor/snowball/runtime/api.c +10 -5
  91. data/vendor/snowball/runtime/header.h +10 -9
  92. data/vendor/snowball/runtime/utilities.c +9 -9
  93. data/vendor/snowball/rust/build.rs +1 -1
  94. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  95. data/vendor/snowball/tests/stemtest.c +7 -4
  96. metadata +8 -12
  97. data/vendor/snowball/.travis.yml +0 -112
  98. data/vendor/snowball/algorithms/german2.sbl +0 -145
  99. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  100. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -1,5 +1,6 @@
1
1
 
2
2
  routines (
3
+ norm
3
4
  prelude postlude mark_regions
4
5
  RV R1 R2
5
6
  step_0
@@ -23,11 +24,23 @@ stringescapes {}
23
24
  stringdef a^ '{U+00E2}' // a circumflex
24
25
  stringdef i^ '{U+00EE}' // i circumflex
25
26
  stringdef a+ '{U+0103}' // a breve
26
- stringdef s, '{U+015F}' // s cedilla
27
- stringdef t, '{U+0163}' // t cedilla
27
+ stringdef sc '{U+015F}' // s cedilla
28
+ stringdef tc '{U+0163}' // t cedilla
29
+ stringdef s, '{U+0219}' // s comma
30
+ stringdef t, '{U+021B}' // t comma
28
31
 
29
32
  define v 'aeiou{a^}{i^}{a+}'
30
33
 
34
+ // Normalize old cedilla forms to correct comma-below forms.
35
+ define norm as (
36
+ do repeat goto (
37
+ [substring] among (
38
+ '{sc}' (<- '{s,}')
39
+ '{tc}' (<- '{t,}')
40
+ )
41
+ )
42
+ )
43
+
31
44
  define prelude as (
32
45
  repeat goto (
33
46
  v [ ('u' ] v <- 'U') or
@@ -190,7 +203,7 @@ backwardmode (
190
203
  'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
191
204
  '{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
192
205
 
193
- // pluferfect:
206
+ // pluperfect:
194
207
  'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
195
208
  'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
196
209
  '{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
@@ -223,6 +236,7 @@ backwardmode (
223
236
  )
224
237
 
225
238
  define stem as (
239
+ do norm
226
240
  do prelude
227
241
  do mark_regions
228
242
  backwards (
@@ -233,4 +247,3 @@ define stem as (
233
247
  )
234
248
  do postlude
235
249
  )
236
-