mittens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +3 -3
  4. data/lib/mittens/version.rb +1 -1
  5. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  6. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  7. data/vendor/snowball/GNUmakefile +194 -136
  8. data/vendor/snowball/NEWS +798 -3
  9. data/vendor/snowball/README.rst +50 -1
  10. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  11. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  12. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  13. data/vendor/snowball/algorithms/basque.sbl +4 -19
  14. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  15. data/vendor/snowball/algorithms/danish.sbl +1 -1
  16. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  17. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  18. data/vendor/snowball/algorithms/english.sbl +52 -37
  19. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  20. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  21. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  22. data/vendor/snowball/algorithms/french.sbl +42 -16
  23. data/vendor/snowball/algorithms/german.sbl +35 -14
  24. data/vendor/snowball/algorithms/greek.sbl +76 -76
  25. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  26. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  27. data/vendor/snowball/algorithms/italian.sbl +11 -21
  28. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  29. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  30. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  31. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  32. data/vendor/snowball/algorithms/porter.sbl +2 -2
  33. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  34. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  35. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  36. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  37. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  38. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  39. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  40. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  41. data/vendor/snowball/compiler/analyser.c +445 -192
  42. data/vendor/snowball/compiler/driver.c +109 -101
  43. data/vendor/snowball/compiler/generator.c +853 -464
  44. data/vendor/snowball/compiler/generator_ada.c +404 -366
  45. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  46. data/vendor/snowball/compiler/generator_go.c +323 -254
  47. data/vendor/snowball/compiler/generator_java.c +326 -252
  48. data/vendor/snowball/compiler/generator_js.c +362 -252
  49. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  50. data/vendor/snowball/compiler/generator_python.c +257 -240
  51. data/vendor/snowball/compiler/generator_rust.c +423 -251
  52. data/vendor/snowball/compiler/header.h +117 -71
  53. data/vendor/snowball/compiler/space.c +137 -68
  54. data/vendor/snowball/compiler/syswords.h +2 -2
  55. data/vendor/snowball/compiler/tokeniser.c +125 -107
  56. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  57. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  58. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  59. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  60. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  61. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  62. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  63. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  64. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  65. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  66. data/vendor/snowball/examples/stemwords.c +12 -12
  67. data/vendor/snowball/go/env.go +107 -31
  68. data/vendor/snowball/go/util.go +0 -4
  69. data/vendor/snowball/include/libstemmer.h +4 -0
  70. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  71. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  72. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  73. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  74. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  75. data/vendor/snowball/javascript/stemwords.js +3 -6
  76. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  77. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  78. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  79. data/vendor/snowball/libstemmer/modules.txt +13 -10
  80. data/vendor/snowball/libstemmer/test.c +1 -1
  81. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  82. data/vendor/snowball/pascal/generate.pl +13 -13
  83. data/vendor/snowball/python/create_init.py +4 -1
  84. data/vendor/snowball/python/setup.cfg +0 -3
  85. data/vendor/snowball/python/setup.py +8 -3
  86. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  87. data/vendor/snowball/python/stemwords.py +8 -12
  88. data/vendor/snowball/runtime/api.c +10 -5
  89. data/vendor/snowball/runtime/header.h +10 -9
  90. data/vendor/snowball/runtime/utilities.c +9 -9
  91. data/vendor/snowball/rust/build.rs +1 -1
  92. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  93. data/vendor/snowball/tests/stemtest.c +7 -4
  94. metadata +7 -7
  95. data/vendor/snowball/.travis.yml +0 -112
  96. data/vendor/snowball/algorithms/german2.sbl +0 -145
  97. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  98. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -17,30 +17,29 @@
17
17
 
18
18
  symbol * b = create_b(n);
19
19
  - create an empty block b with room for n symbols
20
- b = increase_capacity(b, n);
20
+ b = increase_capacity_b(b, n);
21
21
  - increase the capacity of block b by n symbols (b may change)
22
22
  b2 = copy_b(b)
23
23
  - copy block b into b2
24
24
  lose_b(b);
25
25
  - lose block b
26
- b = move_to_b(b, n, p);
27
- - set the data in b to be the n symbols at address p
28
- b = add_to_b(b, n, p);
26
+ b = add_to_b(b, p, n);
29
27
  - add the n symbols at address p to the end of the data in b
30
28
  SIZE(b)
31
29
  - is the number of symbols in b
32
- For example:
30
+
31
+ For example:
33
32
 
34
33
  symbol * b = create_b(0);
35
- { int i;
36
- char p[10];
37
- for (i = 0; i < 100; i++) {
38
- sprintf(p, " %d", i);
39
- add_s_to_b(b, p);
34
+ { symbol i;
35
+ for (i = 'A'; i <= 'Z'; i++) {
36
+ add_symbol_to_b(b, i);
40
37
  }
41
38
  }
42
39
 
43
- and b contains " 0 1 2 ... 99" spaced out as symbols.
40
+ After running the above code b contains:
41
+
42
+ { (symbol)'A', (symbol)'B', ..., (symbol)'Z' }
44
43
  */
45
44
 
46
45
  /* For a block b, SIZE(b) is the number of symbols so far written into it,
@@ -69,45 +68,48 @@ extern void report_b(FILE * out, const symbol * p) {
69
68
  }
70
69
 
71
70
  extern void output_str(FILE * outfile, struct str * str) {
72
- report_b(outfile, str_data(str));
71
+ report_s(outfile, str_data(str));
73
72
  }
74
73
 
75
74
  extern void lose_b(symbol * p) {
76
- if (p == 0) return;
75
+ if (p == NULL) return;
77
76
  FREE((char *) p - HEAD);
78
77
  }
79
78
 
80
- extern symbol * increase_capacity(symbol * p, int n) {
79
+ extern symbol * increase_capacity_b(symbol * p, int n) {
81
80
  symbol * q = create_b(CAPACITY(p) + n + EXTENDER);
82
81
  memmove(q, p, CAPACITY(p) * sizeof(symbol));
83
82
  SIZE(q) = SIZE(p);
84
83
  lose_b(p); return q;
85
84
  }
86
85
 
87
- extern symbol * move_to_b(symbol * p, int n, const symbol * q) {
88
- int x = n - CAPACITY(p);
89
- if (x > 0) p = increase_capacity(p, x);
90
- memmove(p, q, n * sizeof(symbol)); SIZE(p) = n; return p;
91
- }
92
-
93
- extern symbol * add_to_b(symbol * p, int n, const symbol * q) {
86
+ extern symbol * add_to_b(symbol * p, const symbol * q, int n) {
94
87
  int x = SIZE(p) + n - CAPACITY(p);
95
- if (x > 0) p = increase_capacity(p, x);
88
+ if (x > 0) p = increase_capacity_b(p, x);
96
89
  memmove(p + SIZE(p), q, n * sizeof(symbol)); SIZE(p) += n; return p;
97
90
  }
98
91
 
99
92
  extern symbol * copy_b(const symbol * p) {
100
93
  int n = SIZE(p);
101
94
  symbol * q = create_b(n);
102
- move_to_b(q, n, p);
95
+ add_to_b(q, p, n);
103
96
  return q;
104
97
  }
105
98
 
106
99
  int space_count = 0;
107
100
 
108
- extern void * check_malloc(int n) {
101
+ static void * xmalloc(size_t n) {
102
+ void * result = malloc(n);
103
+ if (result == NULL) {
104
+ fprintf(stderr, "Failed to allocate %lu bytes\n", (unsigned long)n);
105
+ exit(1);
106
+ }
107
+ return result;
108
+ }
109
+
110
+ extern void * check_malloc(size_t n) {
109
111
  space_count++;
110
- return malloc(n);
112
+ return xmalloc(n);
111
113
  }
112
114
 
113
115
  extern void check_free(void * p) {
@@ -117,9 +119,9 @@ extern void check_free(void * p) {
117
119
 
118
120
  /* To convert a block to a zero terminated string: */
119
121
 
120
- extern char * b_to_s(const symbol * p) {
122
+ extern char * b_to_sz(const symbol * p) {
121
123
  int n = SIZE(p);
122
- char * s = (char *)malloc(n + 1);
124
+ char * s = (char *)xmalloc(n + 1);
123
125
  {
124
126
  int i;
125
127
  for (i = 0; i < n; i++) {
@@ -134,120 +136,179 @@ extern char * b_to_s(const symbol * p) {
134
136
  return s;
135
137
  }
136
138
 
137
- /* To add a zero terminated string to a block. If p = 0 the
139
+ /* Add a single symbol to a block. If p = 0 the
138
140
  block is created. */
139
141
 
140
- extern symbol * add_s_to_b(symbol * p, const char * s) {
141
- int n = strlen(s);
142
+ extern symbol * add_symbol_to_b(symbol * p, symbol ch) {
142
143
  int k;
143
- if (p == 0) p = create_b(n);
144
+ if (p == NULL) p = create_b(1);
144
145
  k = SIZE(p);
145
146
  {
146
- int x = k + n - CAPACITY(p);
147
- if (x > 0) p = increase_capacity(p, x);
147
+ int x = k + 1 - CAPACITY(p);
148
+ if (x > 0) p = increase_capacity_b(p, x);
148
149
  }
150
+ p[k] = ch;
151
+ SIZE(p)++;
152
+ return p;
153
+ }
154
+
155
+ extern byte * create_s(int n) {
156
+ byte * p = (byte *) (HEAD + (byte *) MALLOC(HEAD + (n + 1)));
157
+ CAPACITY(p) = n;
158
+ SIZE(p) = 0;
159
+ return p;
160
+ }
161
+
162
+ extern void report_s(FILE * out, const byte * p) {
163
+ fwrite(p, 1, SIZE(p), out);
164
+ }
165
+
166
+ extern void lose_s(byte * p) {
167
+ if (p == NULL) return;
168
+ FREE((byte *) p - HEAD);
169
+ }
170
+
171
+ extern byte * increase_capacity_s(byte * p, int n) {
172
+ byte * q = create_s(CAPACITY(p) + n + EXTENDER);
173
+ memmove(q, p, CAPACITY(p));
174
+ SIZE(q) = SIZE(p);
175
+ lose_s(p);
176
+ return q;
177
+ }
178
+
179
+ extern byte * copy_s(const byte * p) {
180
+ return add_s_to_s(NULL, (const char*)p, SIZE(p));
181
+ }
182
+
183
+ /* Add a string with given length to a byte block. If p = 0 the
184
+ block is created. */
185
+
186
+ extern byte * add_s_to_s(byte * p, const char * s, int n) {
187
+ int k;
188
+ if (p == NULL) p = create_s(n);
189
+ k = SIZE(p);
149
190
  {
150
- int i;
151
- for (i = 0; i < n; i++) p[i + k] = s[i];
191
+ int x = k + n - CAPACITY(p);
192
+ if (x > 0) p = increase_capacity_s(p, x);
152
193
  }
194
+ memcpy(p + k, s, n);
153
195
  SIZE(p) += n;
154
196
  return p;
155
197
  }
156
198
 
199
+ /* Add a zero terminated string to a byte block. If p = 0 the
200
+ block is created. */
201
+
202
+ extern byte * add_sz_to_s(byte * p, const char * s) {
203
+ return add_s_to_s(p, s, strlen(s));
204
+ }
205
+
206
+ /* Add a single character to a byte block. If p = 0 the
207
+ block is created. */
208
+
209
+ extern byte * add_char_to_s(byte * p, char ch) {
210
+ int k;
211
+ if (p == NULL) p = create_s(1);
212
+ k = SIZE(p);
213
+ {
214
+ int x = k + 1 - CAPACITY(p);
215
+ if (x > 0) p = increase_capacity_s(p, x);
216
+ }
217
+ p[k] = ch;
218
+ SIZE(p)++;
219
+ return p;
220
+ }
221
+
157
222
  /* The next section defines string handling capabilities in terms
158
- of the lower level block handling capabilities of space.c */
223
+ of the lower level byte block handling capabilities of space.c */
159
224
  /* -------------------------------------------------------------*/
160
225
 
161
226
  struct str {
162
- symbol * data;
227
+ byte * data;
163
228
  };
164
229
 
165
230
  /* Create a new string. */
166
231
  extern struct str * str_new(void) {
167
-
168
- struct str * output = (struct str *) malloc(sizeof(struct str));
169
- output->data = create_b(0);
232
+ struct str * output = (struct str *) xmalloc(sizeof(struct str));
233
+ output->data = create_s(0);
170
234
  return output;
171
235
  }
172
236
 
173
237
  /* Delete a string. */
174
238
  extern void str_delete(struct str * str) {
175
-
176
- lose_b(str->data);
239
+ lose_s(str->data);
177
240
  free(str);
178
241
  }
179
242
 
180
243
  /* Append a str to this str. */
181
244
  extern void str_append(struct str * str, const struct str * add) {
182
-
183
- symbol * q = add->data;
184
- str->data = add_to_b(str->data, SIZE(q), q);
245
+ byte * q = add->data;
246
+ str->data = add_s_to_s(str->data, (char *)q, SIZE(q));
185
247
  }
186
248
 
187
249
  /* Append a character to this str. */
188
250
  extern void str_append_ch(struct str * str, char add) {
189
-
190
- symbol sym = (unsigned char)add;
191
- str->data = add_to_b(str->data, 1, &sym);
251
+ str->data = add_char_to_s(str->data, add);
192
252
  }
193
253
 
194
- /* Append a low level block to a str. */
195
- extern void str_append_b(struct str * str, const symbol * q) {
196
-
197
- str->data = add_to_b(str->data, SIZE(q), q);
198
- }
199
-
200
- /* Append the tail of a low level block to a str. */
201
- extern void str_append_b_tail(struct str * str, const symbol * q, int skip) {
202
- if (skip < 0 || skip >= SIZE(q)) return;
203
-
204
- str->data = add_to_b(str->data, SIZE(q) - skip, q + skip);
254
+ /* Append a low level byte block to a str. */
255
+ extern void str_append_s(struct str * str, const byte * q) {
256
+ str->data = add_s_to_s(str->data, (const char *)q, SIZE(q));
205
257
  }
206
258
 
207
259
  /* Append a (char *, null terminated) string to a str. */
208
260
  extern void str_append_string(struct str * str, const char * s) {
209
-
210
- str->data = add_s_to_b(str->data, s);
261
+ str->data = add_sz_to_s(str->data, s);
211
262
  }
212
263
 
213
264
  /* Append an integer to a str. */
214
265
  extern void str_append_int(struct str * str, int i) {
215
-
216
266
  char s[30];
217
267
  sprintf(s, "%d", i);
218
268
  str_append_string(str, s);
219
269
  }
220
270
 
271
+ /* Append wide character to a string as UTF-8. */
272
+ extern void str_append_wchar_as_utf8(struct str * str, symbol ch) {
273
+ if (ch < 0x80) {
274
+ str_append_ch(str, ch);
275
+ return;
276
+ }
277
+ if (ch < 0x800) {
278
+ str_append_ch(str, (ch >> 6) | 0xC0);
279
+ str_append_ch(str, (ch & 0x3F) | 0x80);
280
+ return;
281
+ }
282
+ str_append_ch(str, (ch >> 12) | 0xE0);
283
+ str_append_ch(str, ((ch >> 6) & 0x3F) | 0x80);
284
+ str_append_ch(str, (ch & 0x3F) | 0x80);
285
+ }
286
+
221
287
  /* Clear a string */
222
288
  extern void str_clear(struct str * str) {
223
-
224
289
  SIZE(str->data) = 0;
225
290
  }
226
291
 
227
292
  /* Set a string */
228
293
  extern void str_assign(struct str * str, const char * s) {
229
-
230
294
  str_clear(str);
231
295
  str_append_string(str, s);
232
296
  }
233
297
 
234
298
  /* Copy a string. */
235
299
  extern struct str * str_copy(const struct str * old) {
236
-
237
300
  struct str * newstr = str_new();
238
301
  str_append(newstr, old);
239
302
  return newstr;
240
303
  }
241
304
 
242
305
  /* Get the data stored in this str. */
243
- extern symbol * str_data(const struct str * str) {
244
-
306
+ extern byte * str_data(const struct str * str) {
245
307
  return str->data;
246
308
  }
247
309
 
248
310
  /* Get the length of the str. */
249
311
  extern int str_len(const struct str * str) {
250
-
251
312
  return SIZE(str->data);
252
313
  }
253
314
 
@@ -259,6 +320,14 @@ extern int str_back(const struct str *str) {
259
320
  return SIZE(str->data) ? str->data[SIZE(str->data) - 1] : -1;
260
321
  }
261
322
 
323
+ /* Remove the last character of the str.
324
+ *
325
+ * Or do nothing if the string is empty.
326
+ */
327
+ extern void str_pop(const struct str *str) {
328
+ if (SIZE(str->data)) --SIZE(str->data);
329
+ }
330
+
262
331
  extern int get_utf8(const symbol * p, int * slot) {
263
332
  int b0, b1;
264
333
  b0 = *p++;
@@ -8,9 +8,9 @@ static const struct system_word vocab[82+1] = {
8
8
  { 1, (const byte *)"+", c_plus },
9
9
  { 1, (const byte *)"-", c_minus },
10
10
  { 1, (const byte *)"/", c_divide },
11
- { 1, (const byte *)"<", c_ls },
11
+ { 1, (const byte *)"<", c_lt },
12
12
  { 1, (const byte *)"=", c_assign },
13
- { 1, (const byte *)">", c_gr },
13
+ { 1, (const byte *)">", c_gt },
14
14
  { 1, (const byte *)"?", c_debug },
15
15
  { 1, (const byte *)"[", c_leftslice },
16
16
  { 1, (const byte *)"]", c_rightslice },