mittens 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -3
- data/lib/mittens/version.rb +1 -1
- data/vendor/snowball/.github/workflows/ci.yml +216 -0
- data/vendor/snowball/CONTRIBUTING.rst +111 -62
- data/vendor/snowball/GNUmakefile +194 -136
- data/vendor/snowball/NEWS +798 -3
- data/vendor/snowball/README.rst +50 -1
- data/vendor/snowball/ada/src/stemmer.adb +25 -13
- data/vendor/snowball/ada/src/stemmer.ads +9 -9
- data/vendor/snowball/ada/stemmer_config.gpr +7 -7
- data/vendor/snowball/algorithms/basque.sbl +4 -19
- data/vendor/snowball/algorithms/catalan.sbl +2 -9
- data/vendor/snowball/algorithms/danish.sbl +1 -1
- data/vendor/snowball/algorithms/dutch.sbl +284 -122
- data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
- data/vendor/snowball/algorithms/english.sbl +52 -37
- data/vendor/snowball/algorithms/esperanto.sbl +157 -0
- data/vendor/snowball/algorithms/estonian.sbl +269 -0
- data/vendor/snowball/algorithms/finnish.sbl +2 -3
- data/vendor/snowball/algorithms/french.sbl +42 -16
- data/vendor/snowball/algorithms/german.sbl +35 -14
- data/vendor/snowball/algorithms/greek.sbl +76 -76
- data/vendor/snowball/algorithms/hungarian.sbl +8 -6
- data/vendor/snowball/algorithms/indonesian.sbl +14 -8
- data/vendor/snowball/algorithms/italian.sbl +11 -21
- data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
- data/vendor/snowball/algorithms/lovins.sbl +0 -1
- data/vendor/snowball/algorithms/nepali.sbl +138 -37
- data/vendor/snowball/algorithms/norwegian.sbl +19 -5
- data/vendor/snowball/algorithms/porter.sbl +2 -2
- data/vendor/snowball/algorithms/portuguese.sbl +9 -13
- data/vendor/snowball/algorithms/romanian.sbl +17 -4
- data/vendor/snowball/algorithms/serbian.sbl +467 -468
- data/vendor/snowball/algorithms/spanish.sbl +5 -7
- data/vendor/snowball/algorithms/swedish.sbl +60 -6
- data/vendor/snowball/algorithms/tamil.sbl +207 -176
- data/vendor/snowball/algorithms/turkish.sbl +461 -445
- data/vendor/snowball/algorithms/yiddish.sbl +36 -38
- data/vendor/snowball/compiler/analyser.c +445 -192
- data/vendor/snowball/compiler/driver.c +109 -101
- data/vendor/snowball/compiler/generator.c +853 -464
- data/vendor/snowball/compiler/generator_ada.c +404 -366
- data/vendor/snowball/compiler/generator_csharp.c +297 -260
- data/vendor/snowball/compiler/generator_go.c +323 -254
- data/vendor/snowball/compiler/generator_java.c +326 -252
- data/vendor/snowball/compiler/generator_js.c +362 -252
- data/vendor/snowball/compiler/generator_pascal.c +349 -197
- data/vendor/snowball/compiler/generator_python.c +257 -240
- data/vendor/snowball/compiler/generator_rust.c +423 -251
- data/vendor/snowball/compiler/header.h +117 -71
- data/vendor/snowball/compiler/space.c +137 -68
- data/vendor/snowball/compiler/syswords.h +2 -2
- data/vendor/snowball/compiler/tokeniser.c +125 -107
- data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
- data/vendor/snowball/csharp/Stemwords/App.config +2 -2
- data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
- data/vendor/snowball/doc/libstemmer_c_README +7 -4
- data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
- data/vendor/snowball/doc/libstemmer_java_README +12 -1
- data/vendor/snowball/doc/libstemmer_js_README +6 -4
- data/vendor/snowball/doc/libstemmer_python_README +9 -4
- data/vendor/snowball/examples/stemwords.c +12 -12
- data/vendor/snowball/go/env.go +107 -31
- data/vendor/snowball/go/util.go +0 -4
- data/vendor/snowball/include/libstemmer.h +4 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
- data/vendor/snowball/javascript/base-stemmer.js +186 -2
- data/vendor/snowball/javascript/stemwords.js +3 -6
- data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
- data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
- data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
- data/vendor/snowball/libstemmer/modules.txt +13 -10
- data/vendor/snowball/libstemmer/test.c +1 -1
- data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
- data/vendor/snowball/pascal/generate.pl +13 -13
- data/vendor/snowball/python/create_init.py +4 -1
- data/vendor/snowball/python/setup.cfg +0 -3
- data/vendor/snowball/python/setup.py +8 -3
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
- data/vendor/snowball/python/stemwords.py +8 -12
- data/vendor/snowball/runtime/api.c +10 -5
- data/vendor/snowball/runtime/header.h +10 -9
- data/vendor/snowball/runtime/utilities.c +9 -9
- data/vendor/snowball/rust/build.rs +1 -1
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
- data/vendor/snowball/tests/stemtest.c +7 -4
- metadata +7 -7
- data/vendor/snowball/.travis.yml +0 -112
- data/vendor/snowball/algorithms/german2.sbl +0 -145
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
- data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -17,30 +17,29 @@
|
|
17
17
|
|
18
18
|
symbol * b = create_b(n);
|
19
19
|
- create an empty block b with room for n symbols
|
20
|
-
b =
|
20
|
+
b = increase_capacity_b(b, n);
|
21
21
|
- increase the capacity of block b by n symbols (b may change)
|
22
22
|
b2 = copy_b(b)
|
23
23
|
- copy block b into b2
|
24
24
|
lose_b(b);
|
25
25
|
- lose block b
|
26
|
-
b =
|
27
|
-
- set the data in b to be the n symbols at address p
|
28
|
-
b = add_to_b(b, n, p);
|
26
|
+
b = add_to_b(b, p, n);
|
29
27
|
- add the n symbols at address p to the end of the data in b
|
30
28
|
SIZE(b)
|
31
29
|
- is the number of symbols in b
|
32
|
-
|
30
|
+
|
31
|
+
For example:
|
33
32
|
|
34
33
|
symbol * b = create_b(0);
|
35
|
-
{
|
36
|
-
|
37
|
-
|
38
|
-
sprintf(p, " %d", i);
|
39
|
-
add_s_to_b(b, p);
|
34
|
+
{ symbol i;
|
35
|
+
for (i = 'A'; i <= 'Z'; i++) {
|
36
|
+
add_symbol_to_b(b, i);
|
40
37
|
}
|
41
38
|
}
|
42
39
|
|
43
|
-
|
40
|
+
After running the above code b contains:
|
41
|
+
|
42
|
+
{ (symbol)'A', (symbol)'B', ..., (symbol)'Z' }
|
44
43
|
*/
|
45
44
|
|
46
45
|
/* For a block b, SIZE(b) is the number of symbols so far written into it,
|
@@ -69,45 +68,48 @@ extern void report_b(FILE * out, const symbol * p) {
|
|
69
68
|
}
|
70
69
|
|
71
70
|
extern void output_str(FILE * outfile, struct str * str) {
|
72
|
-
|
71
|
+
report_s(outfile, str_data(str));
|
73
72
|
}
|
74
73
|
|
75
74
|
extern void lose_b(symbol * p) {
|
76
|
-
if (p ==
|
75
|
+
if (p == NULL) return;
|
77
76
|
FREE((char *) p - HEAD);
|
78
77
|
}
|
79
78
|
|
80
|
-
extern symbol *
|
79
|
+
extern symbol * increase_capacity_b(symbol * p, int n) {
|
81
80
|
symbol * q = create_b(CAPACITY(p) + n + EXTENDER);
|
82
81
|
memmove(q, p, CAPACITY(p) * sizeof(symbol));
|
83
82
|
SIZE(q) = SIZE(p);
|
84
83
|
lose_b(p); return q;
|
85
84
|
}
|
86
85
|
|
87
|
-
extern symbol *
|
88
|
-
int x = n - CAPACITY(p);
|
89
|
-
if (x > 0) p = increase_capacity(p, x);
|
90
|
-
memmove(p, q, n * sizeof(symbol)); SIZE(p) = n; return p;
|
91
|
-
}
|
92
|
-
|
93
|
-
extern symbol * add_to_b(symbol * p, int n, const symbol * q) {
|
86
|
+
extern symbol * add_to_b(symbol * p, const symbol * q, int n) {
|
94
87
|
int x = SIZE(p) + n - CAPACITY(p);
|
95
|
-
if (x > 0) p =
|
88
|
+
if (x > 0) p = increase_capacity_b(p, x);
|
96
89
|
memmove(p + SIZE(p), q, n * sizeof(symbol)); SIZE(p) += n; return p;
|
97
90
|
}
|
98
91
|
|
99
92
|
extern symbol * copy_b(const symbol * p) {
|
100
93
|
int n = SIZE(p);
|
101
94
|
symbol * q = create_b(n);
|
102
|
-
|
95
|
+
add_to_b(q, p, n);
|
103
96
|
return q;
|
104
97
|
}
|
105
98
|
|
106
99
|
int space_count = 0;
|
107
100
|
|
108
|
-
|
101
|
+
static void * xmalloc(size_t n) {
|
102
|
+
void * result = malloc(n);
|
103
|
+
if (result == NULL) {
|
104
|
+
fprintf(stderr, "Failed to allocate %lu bytes\n", (unsigned long)n);
|
105
|
+
exit(1);
|
106
|
+
}
|
107
|
+
return result;
|
108
|
+
}
|
109
|
+
|
110
|
+
extern void * check_malloc(size_t n) {
|
109
111
|
space_count++;
|
110
|
-
return
|
112
|
+
return xmalloc(n);
|
111
113
|
}
|
112
114
|
|
113
115
|
extern void check_free(void * p) {
|
@@ -117,9 +119,9 @@ extern void check_free(void * p) {
|
|
117
119
|
|
118
120
|
/* To convert a block to a zero terminated string: */
|
119
121
|
|
120
|
-
extern char *
|
122
|
+
extern char * b_to_sz(const symbol * p) {
|
121
123
|
int n = SIZE(p);
|
122
|
-
char * s = (char *)
|
124
|
+
char * s = (char *)xmalloc(n + 1);
|
123
125
|
{
|
124
126
|
int i;
|
125
127
|
for (i = 0; i < n; i++) {
|
@@ -134,120 +136,179 @@ extern char * b_to_s(const symbol * p) {
|
|
134
136
|
return s;
|
135
137
|
}
|
136
138
|
|
137
|
-
/*
|
139
|
+
/* Add a single symbol to a block. If p = 0 the
|
138
140
|
block is created. */
|
139
141
|
|
140
|
-
extern symbol *
|
141
|
-
int n = strlen(s);
|
142
|
+
extern symbol * add_symbol_to_b(symbol * p, symbol ch) {
|
142
143
|
int k;
|
143
|
-
if (p ==
|
144
|
+
if (p == NULL) p = create_b(1);
|
144
145
|
k = SIZE(p);
|
145
146
|
{
|
146
|
-
int x = k +
|
147
|
-
if (x > 0) p =
|
147
|
+
int x = k + 1 - CAPACITY(p);
|
148
|
+
if (x > 0) p = increase_capacity_b(p, x);
|
148
149
|
}
|
150
|
+
p[k] = ch;
|
151
|
+
SIZE(p)++;
|
152
|
+
return p;
|
153
|
+
}
|
154
|
+
|
155
|
+
extern byte * create_s(int n) {
|
156
|
+
byte * p = (byte *) (HEAD + (byte *) MALLOC(HEAD + (n + 1)));
|
157
|
+
CAPACITY(p) = n;
|
158
|
+
SIZE(p) = 0;
|
159
|
+
return p;
|
160
|
+
}
|
161
|
+
|
162
|
+
extern void report_s(FILE * out, const byte * p) {
|
163
|
+
fwrite(p, 1, SIZE(p), out);
|
164
|
+
}
|
165
|
+
|
166
|
+
extern void lose_s(byte * p) {
|
167
|
+
if (p == NULL) return;
|
168
|
+
FREE((byte *) p - HEAD);
|
169
|
+
}
|
170
|
+
|
171
|
+
extern byte * increase_capacity_s(byte * p, int n) {
|
172
|
+
byte * q = create_s(CAPACITY(p) + n + EXTENDER);
|
173
|
+
memmove(q, p, CAPACITY(p));
|
174
|
+
SIZE(q) = SIZE(p);
|
175
|
+
lose_s(p);
|
176
|
+
return q;
|
177
|
+
}
|
178
|
+
|
179
|
+
extern byte * copy_s(const byte * p) {
|
180
|
+
return add_s_to_s(NULL, (const char*)p, SIZE(p));
|
181
|
+
}
|
182
|
+
|
183
|
+
/* Add a string with given length to a byte block. If p = 0 the
|
184
|
+
block is created. */
|
185
|
+
|
186
|
+
extern byte * add_s_to_s(byte * p, const char * s, int n) {
|
187
|
+
int k;
|
188
|
+
if (p == NULL) p = create_s(n);
|
189
|
+
k = SIZE(p);
|
149
190
|
{
|
150
|
-
int
|
151
|
-
|
191
|
+
int x = k + n - CAPACITY(p);
|
192
|
+
if (x > 0) p = increase_capacity_s(p, x);
|
152
193
|
}
|
194
|
+
memcpy(p + k, s, n);
|
153
195
|
SIZE(p) += n;
|
154
196
|
return p;
|
155
197
|
}
|
156
198
|
|
199
|
+
/* Add a zero terminated string to a byte block. If p = 0 the
|
200
|
+
block is created. */
|
201
|
+
|
202
|
+
extern byte * add_sz_to_s(byte * p, const char * s) {
|
203
|
+
return add_s_to_s(p, s, strlen(s));
|
204
|
+
}
|
205
|
+
|
206
|
+
/* Add a single character to a byte block. If p = 0 the
|
207
|
+
block is created. */
|
208
|
+
|
209
|
+
extern byte * add_char_to_s(byte * p, char ch) {
|
210
|
+
int k;
|
211
|
+
if (p == NULL) p = create_s(1);
|
212
|
+
k = SIZE(p);
|
213
|
+
{
|
214
|
+
int x = k + 1 - CAPACITY(p);
|
215
|
+
if (x > 0) p = increase_capacity_s(p, x);
|
216
|
+
}
|
217
|
+
p[k] = ch;
|
218
|
+
SIZE(p)++;
|
219
|
+
return p;
|
220
|
+
}
|
221
|
+
|
157
222
|
/* The next section defines string handling capabilities in terms
|
158
|
-
of the lower level block handling capabilities of space.c */
|
223
|
+
of the lower level byte block handling capabilities of space.c */
|
159
224
|
/* -------------------------------------------------------------*/
|
160
225
|
|
161
226
|
struct str {
|
162
|
-
|
227
|
+
byte * data;
|
163
228
|
};
|
164
229
|
|
165
230
|
/* Create a new string. */
|
166
231
|
extern struct str * str_new(void) {
|
167
|
-
|
168
|
-
|
169
|
-
output->data = create_b(0);
|
232
|
+
struct str * output = (struct str *) xmalloc(sizeof(struct str));
|
233
|
+
output->data = create_s(0);
|
170
234
|
return output;
|
171
235
|
}
|
172
236
|
|
173
237
|
/* Delete a string. */
|
174
238
|
extern void str_delete(struct str * str) {
|
175
|
-
|
176
|
-
lose_b(str->data);
|
239
|
+
lose_s(str->data);
|
177
240
|
free(str);
|
178
241
|
}
|
179
242
|
|
180
243
|
/* Append a str to this str. */
|
181
244
|
extern void str_append(struct str * str, const struct str * add) {
|
182
|
-
|
183
|
-
|
184
|
-
str->data = add_to_b(str->data, SIZE(q), q);
|
245
|
+
byte * q = add->data;
|
246
|
+
str->data = add_s_to_s(str->data, (char *)q, SIZE(q));
|
185
247
|
}
|
186
248
|
|
187
249
|
/* Append a character to this str. */
|
188
250
|
extern void str_append_ch(struct str * str, char add) {
|
189
|
-
|
190
|
-
symbol sym = (unsigned char)add;
|
191
|
-
str->data = add_to_b(str->data, 1, &sym);
|
251
|
+
str->data = add_char_to_s(str->data, add);
|
192
252
|
}
|
193
253
|
|
194
|
-
/* Append a low level block to a str. */
|
195
|
-
extern void
|
196
|
-
|
197
|
-
str->data = add_to_b(str->data, SIZE(q), q);
|
198
|
-
}
|
199
|
-
|
200
|
-
/* Append the tail of a low level block to a str. */
|
201
|
-
extern void str_append_b_tail(struct str * str, const symbol * q, int skip) {
|
202
|
-
if (skip < 0 || skip >= SIZE(q)) return;
|
203
|
-
|
204
|
-
str->data = add_to_b(str->data, SIZE(q) - skip, q + skip);
|
254
|
+
/* Append a low level byte block to a str. */
|
255
|
+
extern void str_append_s(struct str * str, const byte * q) {
|
256
|
+
str->data = add_s_to_s(str->data, (const char *)q, SIZE(q));
|
205
257
|
}
|
206
258
|
|
207
259
|
/* Append a (char *, null terminated) string to a str. */
|
208
260
|
extern void str_append_string(struct str * str, const char * s) {
|
209
|
-
|
210
|
-
str->data = add_s_to_b(str->data, s);
|
261
|
+
str->data = add_sz_to_s(str->data, s);
|
211
262
|
}
|
212
263
|
|
213
264
|
/* Append an integer to a str. */
|
214
265
|
extern void str_append_int(struct str * str, int i) {
|
215
|
-
|
216
266
|
char s[30];
|
217
267
|
sprintf(s, "%d", i);
|
218
268
|
str_append_string(str, s);
|
219
269
|
}
|
220
270
|
|
271
|
+
/* Append wide character to a string as UTF-8. */
|
272
|
+
extern void str_append_wchar_as_utf8(struct str * str, symbol ch) {
|
273
|
+
if (ch < 0x80) {
|
274
|
+
str_append_ch(str, ch);
|
275
|
+
return;
|
276
|
+
}
|
277
|
+
if (ch < 0x800) {
|
278
|
+
str_append_ch(str, (ch >> 6) | 0xC0);
|
279
|
+
str_append_ch(str, (ch & 0x3F) | 0x80);
|
280
|
+
return;
|
281
|
+
}
|
282
|
+
str_append_ch(str, (ch >> 12) | 0xE0);
|
283
|
+
str_append_ch(str, ((ch >> 6) & 0x3F) | 0x80);
|
284
|
+
str_append_ch(str, (ch & 0x3F) | 0x80);
|
285
|
+
}
|
286
|
+
|
221
287
|
/* Clear a string */
|
222
288
|
extern void str_clear(struct str * str) {
|
223
|
-
|
224
289
|
SIZE(str->data) = 0;
|
225
290
|
}
|
226
291
|
|
227
292
|
/* Set a string */
|
228
293
|
extern void str_assign(struct str * str, const char * s) {
|
229
|
-
|
230
294
|
str_clear(str);
|
231
295
|
str_append_string(str, s);
|
232
296
|
}
|
233
297
|
|
234
298
|
/* Copy a string. */
|
235
299
|
extern struct str * str_copy(const struct str * old) {
|
236
|
-
|
237
300
|
struct str * newstr = str_new();
|
238
301
|
str_append(newstr, old);
|
239
302
|
return newstr;
|
240
303
|
}
|
241
304
|
|
242
305
|
/* Get the data stored in this str. */
|
243
|
-
extern
|
244
|
-
|
306
|
+
extern byte * str_data(const struct str * str) {
|
245
307
|
return str->data;
|
246
308
|
}
|
247
309
|
|
248
310
|
/* Get the length of the str. */
|
249
311
|
extern int str_len(const struct str * str) {
|
250
|
-
|
251
312
|
return SIZE(str->data);
|
252
313
|
}
|
253
314
|
|
@@ -259,6 +320,14 @@ extern int str_back(const struct str *str) {
|
|
259
320
|
return SIZE(str->data) ? str->data[SIZE(str->data) - 1] : -1;
|
260
321
|
}
|
261
322
|
|
323
|
+
/* Remove the last character of the str.
|
324
|
+
*
|
325
|
+
* Or do nothing if the string is empty.
|
326
|
+
*/
|
327
|
+
extern void str_pop(const struct str *str) {
|
328
|
+
if (SIZE(str->data)) --SIZE(str->data);
|
329
|
+
}
|
330
|
+
|
262
331
|
extern int get_utf8(const symbol * p, int * slot) {
|
263
332
|
int b0, b1;
|
264
333
|
b0 = *p++;
|
@@ -8,9 +8,9 @@ static const struct system_word vocab[82+1] = {
|
|
8
8
|
{ 1, (const byte *)"+", c_plus },
|
9
9
|
{ 1, (const byte *)"-", c_minus },
|
10
10
|
{ 1, (const byte *)"/", c_divide },
|
11
|
-
{ 1, (const byte *)"<",
|
11
|
+
{ 1, (const byte *)"<", c_lt },
|
12
12
|
{ 1, (const byte *)"=", c_assign },
|
13
|
-
{ 1, (const byte *)">",
|
13
|
+
{ 1, (const byte *)">", c_gt },
|
14
14
|
{ 1, (const byte *)"?", c_debug },
|
15
15
|
{ 1, (const byte *)"[", c_leftslice },
|
16
16
|
{ 1, (const byte *)"]", c_rightslice },
|