mittens 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +4 -4
  5. data/lib/mittens/version.rb +1 -1
  6. data/mittens.gemspec +1 -1
  7. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  8. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  9. data/vendor/snowball/GNUmakefile +194 -136
  10. data/vendor/snowball/NEWS +798 -3
  11. data/vendor/snowball/README.rst +50 -1
  12. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  13. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  14. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  15. data/vendor/snowball/algorithms/basque.sbl +4 -19
  16. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  17. data/vendor/snowball/algorithms/danish.sbl +1 -1
  18. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  19. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  20. data/vendor/snowball/algorithms/english.sbl +52 -37
  21. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  22. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  23. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  24. data/vendor/snowball/algorithms/french.sbl +42 -16
  25. data/vendor/snowball/algorithms/german.sbl +35 -14
  26. data/vendor/snowball/algorithms/greek.sbl +76 -76
  27. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  28. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  29. data/vendor/snowball/algorithms/italian.sbl +11 -21
  30. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  31. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  32. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  33. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  34. data/vendor/snowball/algorithms/porter.sbl +2 -2
  35. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  36. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  37. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  38. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  39. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  40. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  41. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  42. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  43. data/vendor/snowball/compiler/analyser.c +445 -192
  44. data/vendor/snowball/compiler/driver.c +109 -101
  45. data/vendor/snowball/compiler/generator.c +853 -464
  46. data/vendor/snowball/compiler/generator_ada.c +404 -366
  47. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  48. data/vendor/snowball/compiler/generator_go.c +323 -254
  49. data/vendor/snowball/compiler/generator_java.c +326 -252
  50. data/vendor/snowball/compiler/generator_js.c +362 -252
  51. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  52. data/vendor/snowball/compiler/generator_python.c +257 -240
  53. data/vendor/snowball/compiler/generator_rust.c +423 -251
  54. data/vendor/snowball/compiler/header.h +117 -71
  55. data/vendor/snowball/compiler/space.c +137 -68
  56. data/vendor/snowball/compiler/syswords.h +2 -2
  57. data/vendor/snowball/compiler/tokeniser.c +125 -107
  58. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  59. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  60. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  61. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  62. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  63. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  64. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  65. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  66. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  67. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  68. data/vendor/snowball/examples/stemwords.c +12 -12
  69. data/vendor/snowball/go/env.go +107 -31
  70. data/vendor/snowball/go/util.go +0 -4
  71. data/vendor/snowball/include/libstemmer.h +4 -0
  72. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  73. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  74. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  75. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  76. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  77. data/vendor/snowball/javascript/stemwords.js +3 -6
  78. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  79. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  80. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  81. data/vendor/snowball/libstemmer/modules.txt +13 -10
  82. data/vendor/snowball/libstemmer/test.c +1 -1
  83. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  84. data/vendor/snowball/pascal/generate.pl +13 -13
  85. data/vendor/snowball/python/create_init.py +4 -1
  86. data/vendor/snowball/python/setup.cfg +0 -3
  87. data/vendor/snowball/python/setup.py +8 -3
  88. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  89. data/vendor/snowball/python/stemwords.py +8 -12
  90. data/vendor/snowball/runtime/api.c +10 -5
  91. data/vendor/snowball/runtime/header.h +10 -9
  92. data/vendor/snowball/runtime/utilities.c +9 -9
  93. data/vendor/snowball/rust/build.rs +1 -1
  94. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  95. data/vendor/snowball/tests/stemtest.c +7 -4
  96. metadata +8 -12
  97. data/vendor/snowball/.travis.yml +0 -112
  98. data/vendor/snowball/algorithms/german2.sbl +0 -145
  99. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  100. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -12,7 +12,7 @@ struct system_word {
12
12
  };
13
13
 
14
14
 
15
- /* ASCII collating assumed in syswords.c */
15
+ /* ASCII collating assumed in syswords.h */
16
16
 
17
17
  #include "syswords.h"
18
18
 
@@ -22,16 +22,16 @@ static int hex_to_num(int ch);
22
22
 
23
23
  static int smaller(int a, int b) { return a < b ? a : b; }
24
24
 
25
- extern symbol * get_input(const char * filename) {
25
+ extern byte * get_input(const char * filename) {
26
26
  FILE * input = fopen(filename, "r");
27
- if (input == 0) { return 0; }
27
+ if (input == NULL) { return NULL; }
28
28
  {
29
- symbol * u = create_b(INITIAL_INPUT_BUFFER_SIZE);
29
+ byte * u = create_s(INITIAL_INPUT_BUFFER_SIZE);
30
30
  int size = 0;
31
31
  while (true) {
32
32
  int ch = getc(input);
33
33
  if (ch == EOF) break;
34
- if (size >= CAPACITY(u)) u = increase_capacity(u, size);
34
+ if (size >= CAPACITY(u)) u = increase_capacity_s(u, size);
35
35
  u[size++] = ch;
36
36
  }
37
37
  fclose(input);
@@ -40,7 +40,7 @@ extern symbol * get_input(const char * filename) {
40
40
  }
41
41
  }
42
42
 
43
- static void error(struct tokeniser * t, const char * s1, int n, symbol * p, const char * s2) {
43
+ static void error(struct tokeniser * t, const char * s1, byte * p, int n, const char * s2) {
44
44
  if (t->error_count == 20) { fprintf(stderr, "... etc\n"); exit(1); }
45
45
  fprintf(stderr, "%s:%d: ", t->file, t->line_number);
46
46
  if (s1) fprintf(stderr, "%s", s1);
@@ -54,25 +54,19 @@ static void error(struct tokeniser * t, const char * s1, int n, symbol * p, cons
54
54
  }
55
55
 
56
56
  static void error1(struct tokeniser * t, const char * s) {
57
- error(t, s, 0,0, 0);
57
+ error(t, s, NULL, 0, NULL);
58
58
  }
59
59
 
60
60
  static void error2(struct tokeniser * t, const char * s) {
61
- error(t, "unexpected end of text after ", 0,0, s);
61
+ error(t, "unexpected end of text after ", NULL, 0, s);
62
62
  }
63
63
 
64
- static int compare_words(int m, symbol * p, int n, const byte * q) {
64
+ static int compare_words(int m, const byte * p, int n, const byte * q) {
65
65
  if (m != n) return m - n;
66
- {
67
- int i; for (i = 0; i < n; i++) {
68
- int diff = p[i] - q[i];
69
- if (diff) return diff;
70
- }
71
- }
72
- return 0;
66
+ return memcmp(p, q, n);
73
67
  }
74
68
 
75
- static int find_word(int n, symbol * p) {
69
+ static int find_word(int n, const byte * p) {
76
70
  int i = 0; int j = vocab->code;
77
71
  do {
78
72
  int k = i + (j - i)/2;
@@ -84,22 +78,6 @@ static int find_word(int n, symbol * p) {
84
78
  return -1;
85
79
  }
86
80
 
87
- static int get_number(int n, symbol * p) {
88
- int x = 0;
89
- int i; for (i = 0; i < n; i++) x = 10*x + p[i] - '0';
90
- return x;
91
- }
92
-
93
- static int eq_s(struct tokeniser * t, const char * s) {
94
- int l = strlen(s);
95
- if (SIZE(t->p) - t->c < l) return false;
96
- {
97
- int i;
98
- for (i = 0; i < l; i++) if (t->p[t->c + i] != s[i]) return false;
99
- }
100
- t->c += l; return true;
101
- }
102
-
103
81
  static int white_space(struct tokeniser * t, int ch) {
104
82
  switch (ch) {
105
83
  case '\n':
@@ -113,61 +91,65 @@ static int white_space(struct tokeniser * t, int ch) {
113
91
  return false;
114
92
  }
115
93
 
116
- static symbol * find_in_m(struct tokeniser * t, int n, symbol * p) {
94
+ static symbol * find_in_m(struct tokeniser * t, int n, byte * p) {
117
95
  struct m_pair * q;
118
96
  for (q = t->m_pairs; q; q = q->next) {
119
- symbol * name = q->name;
120
- if (n == SIZE(name) && memcmp(name, p, n * sizeof(symbol)) == 0) return q->value;
97
+ byte * name = q->name;
98
+ if (n == SIZE(name) && memcmp(name, p, n) == 0) return q->value;
121
99
  }
122
- return 0;
100
+ return NULL;
123
101
  }
124
102
 
125
103
  static int read_literal_string(struct tokeniser * t, int c) {
126
- symbol * p = t->p;
104
+ byte * p = t->p;
127
105
  int ch;
128
106
  SIZE(t->b) = 0;
129
107
  while (true) {
130
- if (c >= SIZE(p)) { error2(t, "'"); return c; }
108
+ if (c >= SIZE(p) || p[c] == '\n') {
109
+ error1(t, "string literal not terminated");
110
+ return c;
111
+ }
131
112
  ch = p[c];
132
- if (ch == '\n') { error1(t, "string not terminated"); return c; }
133
113
  c++;
134
114
  if (ch == t->m_start) {
135
115
  /* Inside insert characters. */
136
116
  int c0 = c;
137
117
  int newlines = false; /* no newlines as yet */
138
- int black_found = false; /* no printing chars as yet */
118
+ int all_whitespace = true; /* no printing chars as yet */
139
119
  while (true) {
140
- if (c >= SIZE(p)) { error2(t, "'"); return c; }
141
- ch = p[c]; c++;
142
- if (ch == t->m_end) break;
143
- if (!white_space(t, ch)) black_found = true;
144
- if (ch == '\n') newlines = true;
145
- if (newlines && black_found) {
146
- error1(t, "string not terminated");
120
+ if (c >= SIZE(p) || (p[c] == '\n' && !all_whitespace)) {
121
+ error1(t, "string literal not terminated");
147
122
  return c;
148
123
  }
124
+ ch = p[c];
125
+ if (ch == '\n') {
126
+ newlines = true;
127
+ }
128
+ c++;
129
+ if (ch == t->m_end) break;
130
+ if (!white_space(t, ch)) all_whitespace = false;
149
131
  }
150
132
  if (!newlines) {
151
133
  int n = c - c0 - 1; /* macro size */
152
134
  int firstch = p[c0];
153
135
  symbol * q = find_in_m(t, n, p + c0);
154
- if (q == 0) {
136
+ if (q == NULL) {
155
137
  if (n == 1 && (firstch == '\'' || firstch == t->m_start))
156
- t->b = add_to_b(t->b, 1, p + c0);
138
+ t->b = add_symbol_to_b(t->b, p[c0]);
157
139
  else if (n >= 3 && firstch == 'U' && p[c0 + 1] == '+') {
158
140
  int codepoint = 0;
159
141
  int x;
160
142
  if (t->uplusmode == UPLUS_DEFINED) {
161
143
  /* See if found with xxxx upper-cased. */
162
- symbol * uc = create_b(n);
144
+ byte * uc = create_s(n);
163
145
  int i;
164
146
  for (i = 0; i != n; ++i) {
165
147
  uc[i] = toupper(p[c0 + i]);
166
148
  }
167
149
  q = find_in_m(t, n, uc);
168
- lose_b(uc);
169
- if (q != 0) {
170
- t->b = add_to_b(t->b, SIZE(q), q);
150
+ lose_s(uc);
151
+ if (q != NULL) {
152
+ t->b = add_to_b(t->b, q, SIZE(q));
171
153
  continue;
172
154
  }
173
155
  error1(t, "Some U+xxxx stringdefs seen but not this one");
@@ -189,15 +171,14 @@ static int read_literal_string(struct tokeniser * t, int c) {
189
171
  /* Ensure there's enough space for a max length
190
172
  * UTF-8 sequence. */
191
173
  if (CAPACITY(t->b) < SIZE(t->b) + 3) {
192
- t->b = increase_capacity(t->b, 3);
174
+ t->b = increase_capacity_b(t->b, 3);
193
175
  }
194
176
  SIZE(t->b) += put_utf8(codepoint, t->b + SIZE(t->b));
195
177
  } else {
196
- symbol sym;
197
178
  if (t->encoding == ENC_SINGLEBYTE) {
198
179
  /* Only ISO-8859-1 is handled this way - for
199
180
  * other single-byte character sets you need
200
- * stringdef all the U+xxxx codes you use
181
+ * to stringdef all the U+xxxx codes you use
201
182
  * like - e.g.:
202
183
  *
203
184
  * stringdef U+0171 hex 'FB'
@@ -210,13 +191,14 @@ static int read_literal_string(struct tokeniser * t, int c) {
210
191
  error1(t, "character values exceed 64K");
211
192
  }
212
193
  }
213
- sym = codepoint;
214
- t->b = add_to_b(t->b, 1, &sym);
194
+ t->b = add_symbol_to_b(t->b, (symbol)codepoint);
215
195
  }
216
- } else
217
- error(t, "string macro '", n, p + c0, "' undeclared");
218
- } else
219
- t->b = add_to_b(t->b, SIZE(q), q);
196
+ } else {
197
+ error(t, "string macro '", p + c0, n, "' undeclared");
198
+ }
199
+ } else {
200
+ t->b = add_to_b(t->b, q, SIZE(q));
201
+ }
220
202
  }
221
203
  } else {
222
204
  if (ch == '\'') return c;
@@ -226,7 +208,7 @@ static int read_literal_string(struct tokeniser * t, int c) {
226
208
  * strings, but historically it's worked for single-byte
227
209
  * and UTF-8 if the source encoding matches what the
228
210
  * generated stemmer works in and it seems unfair to just
229
- * suddenly make this a hard error.`
211
+ * suddenly make this a hard error.
230
212
  */
231
213
  fprintf(stderr,
232
214
  "%s:%d: warning: Non-ASCII literal strings aren't "
@@ -237,13 +219,13 @@ static int read_literal_string(struct tokeniser * t, int c) {
237
219
  "portable - use stringdef instead");
238
220
  }
239
221
  }
240
- t->b = add_to_b(t->b, 1, p + c - 1);
222
+ t->b = add_symbol_to_b(t->b, p[c - 1]);
241
223
  }
242
224
  }
243
225
  }
244
226
 
245
227
  static int next_token(struct tokeniser * t) {
246
- symbol * p = t->p;
228
+ byte * p = t->p;
247
229
  int c = t->c;
248
230
  int ch;
249
231
  int code = -1;
@@ -256,21 +238,21 @@ static int next_token(struct tokeniser * t) {
256
238
  while (c < SIZE(p) && (isalnum(p[c]) || p[c] == '_')) c++;
257
239
  code = find_word(c - c0, p + c0);
258
240
  if (code < 0 || t->token_disabled[code]) {
259
- t->b = move_to_b(t->b, c - c0, p + c0);
241
+ SIZE(t->s) = 0;
242
+ t->s = add_s_to_s(t->s, (const char*)p + c0, c - c0);
260
243
  code = c_name;
261
244
  }
262
- } else
263
- if (isdigit(ch)) {
264
- int c0 = c;
265
- while (c < SIZE(p) && isdigit(p[c])) c++;
266
- t->number = get_number(c - c0, p + c0);
245
+ } else if (isdigit(ch)) {
246
+ int value = ch - '0';
247
+ while (++c < SIZE(p) && isdigit(p[c])) {
248
+ value = 10 * value + (p[c] - '0');
249
+ }
250
+ t->number = value;
267
251
  code = c_number;
268
- } else
269
- if (ch == '\'') {
252
+ } else if (ch == '\'') {
270
253
  c = read_literal_string(t, c + 1);
271
254
  code = c_literalstring;
272
- } else
273
- {
255
+ } else {
274
256
  int lim = smaller(2, SIZE(p) - c);
275
257
  int i;
276
258
  for (i = lim; i > 0; i--) {
@@ -282,7 +264,7 @@ static int next_token(struct tokeniser * t) {
282
264
  t->c = c;
283
265
  return code;
284
266
  }
285
- error(t, "'", 1, p + c, "' unknown");
267
+ error(t, "'", p + c, 1, "' unknown");
286
268
  c++;
287
269
  continue;
288
270
  }
@@ -309,7 +291,8 @@ static void read_chars(struct tokeniser * t) {
309
291
  ch = next_char(t);
310
292
  if (white_space(t, ch) || ch < 0) break;
311
293
  }
312
- t->b2 = move_to_b(t->b2, t->c - c0 - 1, t->p + c0);
294
+ SIZE(t->s) = 0;
295
+ t->s = add_s_to_s(t->s, (const char*)t->p + c0, t->c - c0 - 1);
313
296
  }
314
297
  }
315
298
 
@@ -372,28 +355,39 @@ static void convert_numeric_string(struct tokeniser * t, symbol * p, int base) {
372
355
  }
373
356
 
374
357
  extern int read_token(struct tokeniser * t) {
375
- symbol * p = t->p;
358
+ byte * p = t->p;
376
359
  int held = t->token_held;
377
360
  t->token_held = false;
378
361
  if (held) return t->token;
362
+ t->token_reported_as_unexpected = false;
379
363
  while (true) {
380
364
  int code = next_token(t);
381
365
  switch (code) {
382
366
  case c_comment1: /* slash-slash comment */
383
367
  while (t->c < SIZE(p) && p[t->c] != '\n') t->c++;
384
368
  continue;
385
- case c_comment2: /* slash-star comment */
369
+ case c_comment2: { /* slash-star comment */
370
+ // Scan for a '*' stopping one before the end since we need a
371
+ // '/' to follow it to close the comment.
372
+ int size_less_one = SIZE(p) - 1;
373
+ int c = t->c;
386
374
  while (true) {
387
- if (t->c >= SIZE(p)) {
375
+ if (c >= size_less_one) {
388
376
  error1(t, "/* comment not terminated");
389
377
  t->token = -1;
390
378
  return -1;
391
379
  }
392
- if (p[t->c] == '\n') t->line_number++;
393
- if (eq_s(t, "*/")) break;
394
- t->c++;
380
+ if (p[c] == '\n') {
381
+ t->line_number++;
382
+ } else if (p[c] == '*' && p[c + 1] == '/') {
383
+ // Found '*/' to end of comment.
384
+ t->c = c + 2;
385
+ break;
386
+ }
387
+ ++c;
395
388
  }
396
389
  continue;
390
+ }
397
391
  case c_stringescapes: {
398
392
  int ch1 = next_real_char(t);
399
393
  int ch2 = next_real_char(t);
@@ -422,11 +416,11 @@ extern int read_token(struct tokeniser * t) {
422
416
  if (base > 0) convert_numeric_string(t, t->b, base);
423
417
  { NEW(m_pair, q);
424
418
  q->next = t->m_pairs;
425
- q->name = copy_b(t->b2);
419
+ q->name = copy_s(t->s);
426
420
  q->value = copy_b(t->b);
427
421
  t->m_pairs = q;
428
422
  if (t->uplusmode != UPLUS_DEFINED &&
429
- (SIZE(t->b2) >= 3 && t->b2[0] == 'U' && t->b2[1] == '+')) {
423
+ (SIZE(t->s) >= 3 && t->s[0] == 'U' && t->s[1] == '+')) {
430
424
  if (t->uplusmode == UPLUS_UNICODE) {
431
425
  error1(t, "U+xxxx already used with implicit meaning");
432
426
  } else {
@@ -448,22 +442,28 @@ extern int read_token(struct tokeniser * t) {
448
442
  }
449
443
  {
450
444
  NEW(input, q);
451
- char * file = b_to_s(t->b);
452
- symbol * u = get_input(file);
453
- if (u == 0) {
445
+ char * file = b_to_sz(t->b);
446
+ int file_owned = 1;
447
+ byte * u = get_input(file);
448
+ if (u == NULL) {
454
449
  struct include * r;
455
450
  for (r = t->includes; r; r = r->next) {
456
- symbol * b = copy_b(r->b);
457
- b = add_to_b(b, SIZE(t->b), t->b);
458
- free(file);
459
- file = b_to_s(b);
451
+ byte * s = copy_s(r->s);
452
+ s = add_sz_to_s(s, file);
453
+ s[SIZE(s)] = 0;
454
+ if (file_owned > 0) {
455
+ free(file);
456
+ } else {
457
+ lose_s((byte *)file);
458
+ }
459
+ file = (char*)s;
460
+ file_owned = -1;
460
461
  u = get_input(file);
461
- lose_b(b);
462
- if (u != 0) break;
462
+ if (u != NULL) break;
463
463
  }
464
464
  }
465
- if (u == 0) {
466
- error(t, "Can't get '", SIZE(t->b), t->b, "'");
465
+ if (u == NULL) {
466
+ error(t, "Can't get '", (byte *)file, strlen(file), "'");
467
467
  exit(1);
468
468
  }
469
469
  memmove(q, t, sizeof(struct input));
@@ -471,14 +471,14 @@ extern int read_token(struct tokeniser * t) {
471
471
  t->p = u;
472
472
  t->c = 0;
473
473
  t->file = file;
474
- t->file_needs_freeing = true;
474
+ t->file_owned = file_owned;
475
475
  t->line_number = 1;
476
476
  }
477
477
  p = t->p;
478
478
  continue;
479
479
  case -1:
480
480
  if (t->next) {
481
- lose_b(p);
481
+ lose_s(p);
482
482
  {
483
483
  struct input * q = t->next;
484
484
  memmove(t, q, sizeof(struct input)); p = t->p;
@@ -496,6 +496,12 @@ extern int read_token(struct tokeniser * t) {
496
496
  }
497
497
  }
498
498
 
499
+ extern int peek_token(struct tokeniser * t) {
500
+ int token = read_token(t);
501
+ t->token_held = true;
502
+ return token;
503
+ }
504
+
499
505
  extern const char * name_of_token(int code) {
500
506
  int i;
501
507
  for (i = 1; i < vocab->code; i++)
@@ -509,6 +515,13 @@ extern const char * name_of_token(int code) {
509
515
  case c_grouping: return "grouping";
510
516
  case c_call: return "call";
511
517
  case c_booltest: return "Boolean test";
518
+ case c_functionend: return "Function end";
519
+ case c_goto_grouping:
520
+ return "goto grouping";
521
+ case c_gopast_grouping:
522
+ return "gopast grouping";
523
+ case c_goto_non: return "goto non";
524
+ case c_gopast_non: return "gopast non";
512
525
  case -2: return "start of text";
513
526
  case -1: return "end of text";
514
527
  default: return "?";
@@ -519,21 +532,22 @@ extern void disable_token(struct tokeniser * t, int code) {
519
532
  t->token_disabled[code] = 1;
520
533
  }
521
534
 
522
- extern struct tokeniser * create_tokeniser(symbol * p, char * file) {
535
+ extern struct tokeniser * create_tokeniser(byte * p, char * file) {
523
536
  NEW(tokeniser, t);
524
- t->next = 0;
537
+ t->next = NULL;
525
538
  t->p = p;
526
539
  t->c = 0;
527
540
  t->file = file;
528
- t->file_needs_freeing = false;
541
+ t->file_owned = 0;
529
542
  t->line_number = 1;
530
543
  t->b = create_b(0);
531
- t->b2 = create_b(0);
544
+ t->s = create_s(0);
532
545
  t->m_start = -1;
533
- t->m_pairs = 0;
546
+ t->m_pairs = NULL;
534
547
  t->get_depth = 0;
535
548
  t->error_count = 0;
536
549
  t->token_held = false;
550
+ t->token_reported_as_unexpected = false;
537
551
  t->token = -2;
538
552
  t->previous_token = -2;
539
553
  t->uplusmode = UPLUS_NONE;
@@ -543,12 +557,12 @@ extern struct tokeniser * create_tokeniser(symbol * p, char * file) {
543
557
 
544
558
  extern void close_tokeniser(struct tokeniser * t) {
545
559
  lose_b(t->b);
546
- lose_b(t->b2);
560
+ lose_s(t->s);
547
561
  {
548
562
  struct m_pair * q = t->m_pairs;
549
563
  while (q) {
550
564
  struct m_pair * q_next = q->next;
551
- lose_b(q->name);
565
+ lose_s(q->name);
552
566
  lose_b(q->value);
553
567
  FREE(q);
554
568
  q = q_next;
@@ -562,6 +576,10 @@ extern void close_tokeniser(struct tokeniser * t) {
562
576
  q = q_next;
563
577
  }
564
578
  }
565
- if (t->file_needs_freeing) free(t->file);
579
+ if (t->file_owned > 0) {
580
+ free(t->file);
581
+ } else if (t->file_owned < 0) {
582
+ lose_s((byte *)t->file);
583
+ }
566
584
  FREE(t);
567
585
  }
@@ -2,10 +2,10 @@
2
2
  // Copyright (c) 2002, Richard Boulton
3
3
  // Copyright (c) 2015, Cesar Souza
4
4
  // All rights reserved.
5
- //
5
+ //
6
6
  // Redistribution and use in source and binary forms, with or without
7
7
  // modification, are permitted provided that the following conditions are met:
8
- //
8
+ //
9
9
  // * Redistributions of source code must retain the above copyright notice,
10
10
  // * this list of conditions and the following disclaimer.
11
11
  // * Redistributions in binary form must reproduce the above copyright
@@ -14,7 +14,7 @@
14
14
  // * Neither the name of the copyright holders nor the names of its contributors
15
15
  // * may be used to endorse or promote products derived from this software
16
16
  // * without specific prior written permission.
17
- //
17
+ //
18
18
  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
19
  // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
20
  // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -34,41 +34,41 @@ namespace Snowball
34
34
  /// <summary>
35
35
  /// Snowball's among construction.
36
36
  /// </summary>
37
- ///
37
+ ///
38
38
  public sealed class Among
39
39
  {
40
40
  /// <summary>
41
41
  /// Search string.
42
42
  /// </summary>
43
- ///
43
+ ///
44
44
  public string SearchString { get; private set; }
45
45
 
46
46
  /// <summary>
47
47
  /// Index to longest matching substring.
48
48
  /// </summary>
49
- ///
49
+ ///
50
50
  public int MatchIndex { get; private set; }
51
51
 
52
52
  /// <summary>
53
53
  /// Result of the lookup.
54
54
  /// </summary>
55
- ///
55
+ ///
56
56
  public int Result { get; private set; }
57
57
 
58
58
  /// <summary>
59
59
  /// Action to be invoked.
60
60
  /// </summary>
61
- ///
61
+ ///
62
62
  public Func<bool> Action { get; private set; }
63
63
 
64
64
  /// <summary>
65
65
  /// Initializes a new instance of the <see cref="Among"/> class.
66
66
  /// </summary>
67
- ///
67
+ ///
68
68
  /// <param name="str">The search string.</param>
69
69
  /// <param name="index">The index to the longest matching substring.</param>
70
70
  /// <param name="result">The result of the lookup.</param>
71
- ///
71
+ ///
72
72
  public Among(String str, int index, int result)
73
73
  : this(str, index, result, null)
74
74
  {
@@ -77,12 +77,12 @@ namespace Snowball
77
77
  /// <summary>
78
78
  /// Initializes a new instance of the <see cref="Among"/> class.
79
79
  /// </summary>
80
- ///
80
+ ///
81
81
  /// <param name="str">The search string.</param>
82
82
  /// <param name="index">The index to the longest matching substring.</param>
83
83
  /// <param name="result">The result of the lookup.</param>
84
84
  /// <param name="action">The action to be performed, if any.</param>
85
- ///
85
+ ///
86
86
  public Among(String str, int index, int result, Func<bool> action)
87
87
  {
88
88
  this.SearchString = str;
@@ -94,11 +94,11 @@ namespace Snowball
94
94
  /// <summary>
95
95
  /// Returns a <see cref="System.String" /> that represents this instance.
96
96
  /// </summary>
97
- ///
97
+ ///
98
98
  /// <returns>
99
99
  /// A <see cref="System.String" /> that represents this instance.
100
100
  /// </returns>
101
- ///
101
+ ///
102
102
  public override string ToString()
103
103
  {
104
104
  return SearchString;
@@ -2,7 +2,7 @@
2
2
  using System.Runtime.CompilerServices;
3
3
  using System.Runtime.InteropServices;
4
4
 
5
- // General Information about an assembly is controlled through the following
5
+ // General Information about an assembly is controlled through the following
6
6
  // set of attributes. Change these attribute values to modify the information
7
7
  // associated with an assembly.
8
8
  [assembly: AssemblyTitle("Snowball")]
@@ -14,8 +14,8 @@ using System.Runtime.InteropServices;
14
14
  [assembly: AssemblyTrademark("")]
15
15
  [assembly: AssemblyCulture("")]
16
16
 
17
- // Setting ComVisible to false makes the types in this assembly not visible
18
- // to COM components. If you need to access a type in this assembly from
17
+ // Setting ComVisible to false makes the types in this assembly not visible
18
+ // to COM components. If you need to access a type in this assembly from
19
19
  // COM, set the ComVisible attribute to true on that type.
20
20
  [assembly: ComVisible(false)]
21
21
 
@@ -25,12 +25,12 @@ using System.Runtime.InteropServices;
25
25
  // Version information for an assembly consists of the following four values:
26
26
  //
27
27
  // Major Version
28
- // Minor Version
28
+ // Minor Version
29
29
  // Build Number
30
30
  // Revision
31
31
  //
32
- // You can specify all the values or you can default the Build and Revision Numbers
32
+ // You can specify all the values or you can default the Build and Revision Numbers
33
33
  // by using the '*' as shown below:
34
34
  // [assembly: AssemblyVersion("1.0.*")]
35
- [assembly: AssemblyVersion(/*SNOWBALL_VERSION*/"2.2.0.0")]
36
- [assembly: AssemblyFileVersion(/*SNOWBALL_VERSION*/"2.2.0.0")]
35
+ [assembly: AssemblyVersion(/*SNOWBALL_VERSION*/"3.0.1.0")]
36
+ [assembly: AssemblyFileVersion(/*SNOWBALL_VERSION*/"3.0.1.0")]