mittens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +3 -3
  4. data/lib/mittens/version.rb +1 -1
  5. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  6. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  7. data/vendor/snowball/GNUmakefile +194 -136
  8. data/vendor/snowball/NEWS +798 -3
  9. data/vendor/snowball/README.rst +50 -1
  10. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  11. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  12. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  13. data/vendor/snowball/algorithms/basque.sbl +4 -19
  14. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  15. data/vendor/snowball/algorithms/danish.sbl +1 -1
  16. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  17. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  18. data/vendor/snowball/algorithms/english.sbl +52 -37
  19. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  20. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  21. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  22. data/vendor/snowball/algorithms/french.sbl +42 -16
  23. data/vendor/snowball/algorithms/german.sbl +35 -14
  24. data/vendor/snowball/algorithms/greek.sbl +76 -76
  25. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  26. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  27. data/vendor/snowball/algorithms/italian.sbl +11 -21
  28. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  29. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  30. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  31. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  32. data/vendor/snowball/algorithms/porter.sbl +2 -2
  33. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  34. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  35. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  36. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  37. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  38. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  39. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  40. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  41. data/vendor/snowball/compiler/analyser.c +445 -192
  42. data/vendor/snowball/compiler/driver.c +109 -101
  43. data/vendor/snowball/compiler/generator.c +853 -464
  44. data/vendor/snowball/compiler/generator_ada.c +404 -366
  45. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  46. data/vendor/snowball/compiler/generator_go.c +323 -254
  47. data/vendor/snowball/compiler/generator_java.c +326 -252
  48. data/vendor/snowball/compiler/generator_js.c +362 -252
  49. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  50. data/vendor/snowball/compiler/generator_python.c +257 -240
  51. data/vendor/snowball/compiler/generator_rust.c +423 -251
  52. data/vendor/snowball/compiler/header.h +117 -71
  53. data/vendor/snowball/compiler/space.c +137 -68
  54. data/vendor/snowball/compiler/syswords.h +2 -2
  55. data/vendor/snowball/compiler/tokeniser.c +125 -107
  56. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  57. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  58. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  59. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  60. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  61. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  62. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  63. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  64. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  65. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  66. data/vendor/snowball/examples/stemwords.c +12 -12
  67. data/vendor/snowball/go/env.go +107 -31
  68. data/vendor/snowball/go/util.go +0 -4
  69. data/vendor/snowball/include/libstemmer.h +4 -0
  70. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  71. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  72. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  73. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  74. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  75. data/vendor/snowball/javascript/stemwords.js +3 -6
  76. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  77. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  78. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  79. data/vendor/snowball/libstemmer/modules.txt +13 -10
  80. data/vendor/snowball/libstemmer/test.c +1 -1
  81. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  82. data/vendor/snowball/pascal/generate.pl +13 -13
  83. data/vendor/snowball/python/create_init.py +4 -1
  84. data/vendor/snowball/python/setup.cfg +0 -3
  85. data/vendor/snowball/python/setup.py +8 -3
  86. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  87. data/vendor/snowball/python/stemwords.py +8 -12
  88. data/vendor/snowball/runtime/api.c +10 -5
  89. data/vendor/snowball/runtime/header.h +10 -9
  90. data/vendor/snowball/runtime/utilities.c +9 -9
  91. data/vendor/snowball/rust/build.rs +1 -1
  92. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  93. data/vendor/snowball/tests/stemtest.c +7 -4
  94. metadata +7 -7
  95. data/vendor/snowball/.travis.yml +0 -112
  96. data/vendor/snowball/algorithms/german2.sbl +0 -145
  97. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  98. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -1,4 +1,4 @@
1
-
1
+ #include <assert.h>
2
2
  #include <stdlib.h> /* for exit */
3
3
  #include <string.h> /* for strlen */
4
4
  #include <stdio.h> /* for fprintf etc */
@@ -15,7 +15,6 @@ static int new_label(struct generator * g) {
15
15
  }
16
16
 
17
17
  static struct str * vars_newname(struct generator * g) {
18
-
19
18
  struct str * output;
20
19
  g->var_number++;
21
20
  output = str_new();
@@ -24,11 +23,9 @@ static struct str * vars_newname(struct generator * g) {
24
23
  return output;
25
24
  }
26
25
 
27
-
28
26
  /* Write routines for items from the syntax tree */
29
27
 
30
28
  static void write_varname(struct generator * g, struct name * p) {
31
-
32
29
  switch (p->type) {
33
30
  case t_external:
34
31
  break;
@@ -39,7 +36,7 @@ static void write_varname(struct generator * g, struct name * p) {
39
36
  break;
40
37
  }
41
38
  }
42
- write_b(g, p->b);
39
+ write_s(g, p->s);
43
40
  }
44
41
 
45
42
  static void write_varref(struct generator * g, struct name * p) {
@@ -47,42 +44,29 @@ static void write_varref(struct generator * g, struct name * p) {
47
44
  write_varname(g, p);
48
45
  }
49
46
 
50
- static void write_hexdigit(struct generator * g, int n) {
51
-
52
- write_char(g, n < 10 ? n + '0' : n - 10 + 'A');
53
- }
54
-
55
- static void write_hex(struct generator * g, int ch) {
56
-
57
- write_string(g, "\\u{");
58
- {
59
- int i;
60
- for (i = 12; i >= 0; i -= 4) write_hexdigit(g, ch >> i & 0xf);
61
- }
62
- write_string(g, "}");
63
- }
64
-
65
47
  static void write_literal_string(struct generator * g, symbol * p) {
66
-
67
48
  int i = 0;
68
49
  write_string(g, "\"");
69
50
  while (i < SIZE(p)) {
70
51
  int ch;
71
52
  i += get_utf8(p + i, &ch);
72
- if (32 <= ch && ch < 127) {
73
- if (ch == '\"' || ch == '\\') write_string(g, "\\");
74
- write_char(g, ch);
53
+ if (32 <= ch && ch < 0x590 && ch != 127) {
54
+ if (ch == '"' || ch == '\\') write_char(g, '\\');
55
+ write_wchar_as_utf8(g, ch);
75
56
  } else {
76
- write_hex(g, ch);
57
+ // Use escapes for anything over 0x590 as a crude way to avoid
58
+ // LTR characters affecting the rendering of source character
59
+ // order in confusing ways.
60
+ write_string(g, "\\u{");
61
+ write_hex4(g, ch);
62
+ write_string(g, "}");
77
63
  }
78
64
  }
79
65
  write_string(g, "\"");
80
66
  }
81
67
 
82
68
  static void write_margin(struct generator * g) {
83
-
84
- int i;
85
- for (i = 0; i < g->margin; i++) write_string(g, " ");
69
+ for (int i = 0; i < g->margin; i++) write_string(g, " ");
86
70
  }
87
71
 
88
72
  static void write_comment(struct generator * g, struct node * p) {
@@ -94,53 +78,41 @@ static void write_comment(struct generator * g, struct node * p) {
94
78
  }
95
79
 
96
80
  static void write_block_start(struct generator * g) {
97
-
98
81
  w(g, "~+{~N");
99
82
  }
100
83
 
101
- static void write_block_end(struct generator * g) /* block end */ {
102
-
84
+ static void write_block_end(struct generator * g) {
103
85
  w(g, "~-~M}~N");
104
86
  }
105
87
 
106
88
  static void write_savecursor(struct generator * g, struct node * p,
107
89
  struct str * savevar) {
108
-
109
90
  g->B[0] = str_data(savevar);
110
91
  g->S[1] = "";
111
92
  if (p->mode != m_forward) g->S[1] = "env.limit - ";
112
93
  writef(g, "~Mlet ~B0 = ~S1env.cursor;~N", p);
113
94
  }
114
95
 
115
- static void restore_string(struct node * p, struct str * out, struct str * savevar) {
116
-
117
- str_clear(out);
96
+ static void append_restore_string(struct node * p, struct str * out, struct str * savevar) {
118
97
  str_append_string(out, "env.cursor = ");
119
98
  if (p->mode != m_forward) str_append_string(out, "env.limit - ");
120
99
  str_append(out, savevar);
121
100
  str_append_string(out, ";");
122
101
  }
123
102
 
124
- static void write_restorecursor(struct generator * g, struct node * p,
125
- struct str * savevar) {
126
-
127
- struct str * temp = str_new();
103
+ static void write_restorecursor(struct generator * g, struct node * p, struct str * savevar) {
128
104
  write_margin(g);
129
- restore_string(p, temp, savevar);
130
- write_str(g, temp);
105
+ append_restore_string(p, g->outbuf, savevar);
131
106
  write_newline(g);
132
- str_delete(temp);
133
107
  }
134
108
 
135
109
  static void write_inc_cursor(struct generator * g, struct node * p) {
136
-
137
110
  write_margin(g);
138
111
  write_string(g, p->mode == m_forward ? "env.next_char();" : "env.previous_char();");
139
112
  write_newline(g);
140
113
  }
141
114
 
142
115
  static void wsetlab_begin(struct generator * g, int n) {
143
-
144
116
  g->I[0] = n;
145
117
  w(g, "~M'lab~I0: loop {~N~+");
146
118
  }
@@ -159,7 +131,6 @@ static void wgotol(struct generator * g, int n) {
159
131
  }
160
132
 
161
133
  static void write_failure(struct generator * g) {
162
-
163
134
  if (str_len(g->failure_str) != 0) {
164
135
  write_margin(g);
165
136
  write_str(g, g->failure_str);
@@ -171,14 +142,14 @@ static void write_failure(struct generator * g) {
171
142
  g->unreachable = true;
172
143
  break;
173
144
  default:
174
- g->I[0] = g->failure_label;
175
- w(g, "~Mbreak 'lab~I0;~N");
145
+ w(g, "~Mbreak 'lab");
146
+ write_int(g, g->failure_label);
147
+ w(g, ";~N");
176
148
  g->unreachable = true;
177
149
  }
178
150
  }
179
151
 
180
- static void write_failure_if(struct generator * g, char * s, struct node * p) {
181
-
152
+ static void write_failure_if(struct generator * g, const char * s, struct node * p) {
182
153
  writef(g, "~Mif ", p);
183
154
  writef(g, s, p);
184
155
  writef(g, " ", p);
@@ -190,7 +161,6 @@ static void write_failure_if(struct generator * g, char * s, struct node * p) {
190
161
 
191
162
  /* if at limit fail */
192
163
  static void write_check_limit(struct generator * g, struct node * p) {
193
-
194
164
  if (p->mode == m_forward) {
195
165
  write_failure_if(g, "env.cursor >= env.limit", p);
196
166
  } else {
@@ -200,18 +170,18 @@ static void write_check_limit(struct generator * g, struct node * p) {
200
170
 
201
171
  /* Formatted write. */
202
172
  static void writef(struct generator * g, const char * input, struct node * p) {
173
+ (void)p;
203
174
  int i = 0;
204
- int l = strlen(input);
205
175
 
206
- while (i < l) {
176
+ while (input[i]) {
207
177
  int ch = input[i++];
208
178
  if (ch != '~') {
209
179
  write_char(g, ch);
210
180
  continue;
211
181
  }
212
- switch (input[i++]) {
213
- default: write_char(g, input[i - 1]); continue;
214
- case 'C': write_comment(g, p); continue;
182
+ ch = input[i++];
183
+ switch (ch) {
184
+ case '~': write_char(g, '~'); continue;
215
185
  case 'f': write_block_start(g);
216
186
  write_failure(g);
217
187
  g->unreachable = false;
@@ -221,21 +191,65 @@ static void writef(struct generator * g, const char * input, struct node * p) {
221
191
  case 'N': write_newline(g); continue;
222
192
  case '{': write_block_start(g); continue;
223
193
  case '}': write_block_end(g); continue;
224
- case 'S': write_string(g, g->S[input[i++] - '0']); continue;
225
- case 'B': write_b(g, g->B[input[i++] - '0']); continue;
226
- case 'I': write_int(g, g->I[input[i++] - '0']); continue;
227
- case 'V': write_varref(g, g->V[input[i++] - '0']); continue;
228
- case 'W': write_varname(g, g->V[input[i++] - '0']); continue;
229
- case 'L': write_literal_string(g, g->L[input[i++] - '0']); continue;
194
+ case 'S': {
195
+ int j = input[i++] - '0';
196
+ if (j < 0 || j > (int)(sizeof(g->S) / sizeof(g->S[0]))) {
197
+ printf("Invalid escape sequence ~%c%c in writef(g, \"%s\", p)\n",
198
+ ch, input[i - 1], input);
199
+ exit(1);
200
+ }
201
+ write_string(g, g->S[j]);
202
+ continue;
203
+ }
204
+ case 'B': {
205
+ int j = input[i++] - '0';
206
+ if (j < 0 || j > (int)(sizeof(g->B) / sizeof(g->B[0])))
207
+ goto invalid_escape2;
208
+ write_s(g, g->B[j]);
209
+ continue;
210
+ }
211
+ case 'I': {
212
+ int j = input[i++] - '0';
213
+ if (j < 0 || j > (int)(sizeof(g->I) / sizeof(g->I[0])))
214
+ goto invalid_escape2;
215
+ write_int(g, g->I[j]);
216
+ continue;
217
+ }
218
+ case 'V':
219
+ case 'W': {
220
+ int j = input[i++] - '0';
221
+ if (j < 0 || j > (int)(sizeof(g->V) / sizeof(g->V[0])))
222
+ goto invalid_escape2;
223
+ if (ch == 'V')
224
+ write_varref(g, g->V[j]);
225
+ else
226
+ write_varname(g, g->V[j]);
227
+ continue;
228
+ }
229
+ case 'L': {
230
+ int j = input[i++] - '0';
231
+ if (j < 0 || j > (int)(sizeof(g->L) / sizeof(g->L[0])))
232
+ goto invalid_escape2;
233
+ write_literal_string(g, g->L[j]);
234
+ continue;
235
+ }
230
236
  case '+': g->margin++; continue;
231
237
  case '-': g->margin--; continue;
232
238
  case 'n': write_string(g, g->options->name); continue;
239
+ default:
240
+ printf("Invalid escape sequence ~%c in writef(g, \"%s\", p)\n",
241
+ ch, input);
242
+ exit(1);
243
+ invalid_escape2:
244
+ printf("Invalid escape sequence ~%c%c in writef(g, \"%s\", p)\n",
245
+ ch, input[i - 1], input);
246
+ exit(1);
233
247
  }
234
248
  }
235
249
  }
236
250
 
237
251
  static void w(struct generator * g, const char * s) {
238
- writef(g, s, 0);
252
+ writef(g, s, NULL);
239
253
  }
240
254
 
241
255
  static void generate_AE(struct generator * g, struct node * p) {
@@ -284,7 +298,6 @@ static void generate_AE(struct generator * g, struct node * p) {
284
298
  }
285
299
 
286
300
  static void generate_bra(struct generator * g, struct node * p) {
287
-
288
301
  write_comment(g, p);
289
302
  p = p->left;
290
303
  while (p) {
@@ -294,28 +307,33 @@ static void generate_bra(struct generator * g, struct node * p) {
294
307
  }
295
308
 
296
309
  static void generate_and(struct generator * g, struct node * p) {
297
-
298
- struct str * savevar = vars_newname(g);
299
- int keep_c = K_needed(g, p->left);
310
+ struct str * savevar = NULL;
311
+ if (K_needed(g, p->left)) {
312
+ savevar = vars_newname(g);
313
+ }
300
314
 
301
315
  write_comment(g, p);
302
316
 
303
- if (keep_c) write_savecursor(g, p, savevar);
317
+ if (savevar) write_savecursor(g, p, savevar);
304
318
 
305
319
  p = p->left;
306
320
  while (p) {
307
321
  generate(g, p);
308
322
  if (g->unreachable) break;
309
- if (keep_c && p->right != 0) write_restorecursor(g, p, savevar);
323
+ if (savevar && p->right != NULL) write_restorecursor(g, p, savevar);
310
324
  p = p->right;
311
325
  }
312
- str_delete(savevar);
326
+
327
+ if (savevar) {
328
+ str_delete(savevar);
329
+ }
313
330
  }
314
331
 
315
332
  static void generate_or(struct generator * g, struct node * p) {
316
-
317
- struct str * savevar = vars_newname(g);
318
- int keep_c = K_needed(g, p->left);
333
+ struct str * savevar = NULL;
334
+ if (K_needed(g, p->left)) {
335
+ savevar = vars_newname(g);
336
+ }
319
337
 
320
338
  int a0 = g->failure_label;
321
339
  struct str * a1 = str_copy(g->failure_str);
@@ -326,18 +344,18 @@ static void generate_or(struct generator * g, struct node * p) {
326
344
  write_comment(g, p);
327
345
  wsetlab_begin(g, out_lab);
328
346
 
329
- if (keep_c) write_savecursor(g, p, savevar);
347
+ if (savevar) write_savecursor(g, p, savevar);
330
348
 
331
349
  p = p->left;
332
350
  str_clear(g->failure_str);
333
351
 
334
- if (p == 0) {
335
- /* p should never be 0 after an or: there should be at least two
352
+ if (p == NULL) {
353
+ /* p should never be NULL after an or: there should be at least two
336
354
  * sub nodes. */
337
355
  fprintf(stderr, "Error: \"or\" node without children nodes.");
338
356
  exit(1);
339
357
  }
340
- while (p->right != 0) {
358
+ while (p->right != NULL) {
341
359
  int label = new_label(g);
342
360
  g->failure_label = label;
343
361
  wsetlab_begin(g, label);
@@ -348,7 +366,7 @@ static void generate_or(struct generator * g, struct node * p) {
348
366
  }
349
367
  w(g, "~-~M}~N");
350
368
  g->unreachable = false;
351
- if (keep_c) write_restorecursor(g, p, savevar);
369
+ if (savevar) write_restorecursor(g, p, savevar);
352
370
  p = p->right;
353
371
  }
354
372
 
@@ -361,11 +379,13 @@ static void generate_or(struct generator * g, struct node * p) {
361
379
  if (!end_unreachable) {
362
380
  g->unreachable = false;
363
381
  }
364
- str_delete(savevar);
382
+
383
+ if (savevar) {
384
+ str_delete(savevar);
385
+ }
365
386
  }
366
387
 
367
388
  static void generate_backwards(struct generator * g, struct node * p) {
368
-
369
389
  write_comment(g, p);
370
390
  writef(g,"~Menv.limit_backward = env.cursor;~N"
371
391
  "~Menv.cursor = env.limit;~N", p);
@@ -375,9 +395,10 @@ static void generate_backwards(struct generator * g, struct node * p) {
375
395
 
376
396
 
377
397
  static void generate_not(struct generator * g, struct node * p) {
378
-
379
- struct str * savevar = vars_newname(g);
380
- int keep_c = K_needed(g, p->left);
398
+ struct str * savevar = NULL;
399
+ if (K_needed(g, p->left)) {
400
+ savevar = vars_newname(g);
401
+ }
381
402
 
382
403
  int a0 = g->failure_label;
383
404
  struct str * a1 = str_copy(g->failure_str);
@@ -385,7 +406,7 @@ static void generate_not(struct generator * g, struct node * p) {
385
406
  g->failure_label = label;
386
407
 
387
408
  write_comment(g, p);
388
- if (keep_c) {
409
+ if (savevar) {
389
410
  write_savecursor(g, p, savevar);
390
411
  }
391
412
 
@@ -404,48 +425,51 @@ static void generate_not(struct generator * g, struct node * p) {
404
425
 
405
426
  g->unreachable = false;
406
427
 
407
- if (keep_c) write_restorecursor(g, p, savevar);
408
- str_delete(savevar);
428
+ if (savevar) {
429
+ write_restorecursor(g, p, savevar);
430
+ str_delete(savevar);
431
+ }
409
432
  }
410
433
 
411
434
 
412
435
  static void generate_try(struct generator * g, struct node * p) {
436
+ struct str * savevar = NULL;
437
+ if (K_needed(g, p->left)) {
438
+ savevar = vars_newname(g);
439
+ }
413
440
 
414
- struct str * savevar = vars_newname(g);
415
- int keep_c = K_needed(g, p->left);
416
441
  int label = new_label(g);
417
442
  g->failure_label = label;
418
443
  str_clear(g->failure_str);
419
444
 
420
445
  write_comment(g, p);
421
- if (keep_c) {
446
+ if (savevar) {
422
447
  write_savecursor(g, p, savevar);
423
- restore_string(p, g->failure_str, savevar);
448
+ append_restore_string(p, g->failure_str, savevar);
424
449
  }
425
450
  wsetlab_begin(g, label);
426
451
  generate(g, p->left);
427
452
  wsetlab_end(g, label);
428
453
  g->unreachable = false;
429
454
 
430
- str_delete(savevar);
455
+ if (savevar) {
456
+ str_delete(savevar);
457
+ }
431
458
  }
432
459
 
433
460
  static void generate_set(struct generator * g, struct node * p) {
434
-
435
461
  write_comment(g, p);
436
462
  g->V[0] = p->name;
437
463
  writef(g, "~M~V0 = true;~N", p);
438
464
  }
439
465
 
440
466
  static void generate_unset(struct generator * g, struct node * p) {
441
-
442
467
  write_comment(g, p);
443
468
  g->V[0] = p->name;
444
469
  writef(g, "~M~V0 = false;~N", p);
445
470
  }
446
471
 
447
472
  static void generate_fail(struct generator * g, struct node * p) {
448
-
449
473
  write_comment(g, p);
450
474
  generate(g, p->left);
451
475
  if (!g->unreachable) write_failure(g);
@@ -454,32 +478,35 @@ static void generate_fail(struct generator * g, struct node * p) {
454
478
  /* generate_test() also implements 'reverse' */
455
479
 
456
480
  static void generate_test(struct generator * g, struct node * p) {
457
-
458
- struct str * savevar = vars_newname(g);
459
- int keep_c = K_needed(g, p->left);
481
+ struct str * savevar = NULL;
482
+ if (K_needed(g, p->left)) {
483
+ savevar = vars_newname(g);
484
+ }
460
485
 
461
486
  write_comment(g, p);
462
487
 
463
- if (keep_c) {
488
+ if (savevar) {
464
489
  write_savecursor(g, p, savevar);
465
490
  }
466
491
 
467
492
  generate(g, p->left);
468
493
 
469
- if (!g->unreachable) {
470
- if (keep_c) {
494
+ if (savevar) {
495
+ if (!g->unreachable) {
471
496
  write_restorecursor(g, p, savevar);
472
497
  }
498
+ str_delete(savevar);
473
499
  }
474
- str_delete(savevar);
475
500
  }
476
501
 
477
502
  static void generate_do(struct generator * g, struct node * p) {
503
+ struct str * savevar = NULL;
504
+ if (K_needed(g, p->left)) {
505
+ savevar = vars_newname(g);
506
+ }
478
507
 
479
- struct str * savevar = vars_newname(g);
480
- int keep_c = K_needed(g, p->left);
481
508
  write_comment(g, p);
482
- if (keep_c) write_savecursor(g, p, savevar);
509
+ if (savevar) write_savecursor(g, p, savevar);
483
510
 
484
511
  if (p->left->type == c_call) {
485
512
  /* Optimise do <call> */
@@ -497,24 +524,50 @@ static void generate_do(struct generator * g, struct node * p) {
497
524
  g->unreachable = false;
498
525
  }
499
526
 
500
- if (keep_c) write_restorecursor(g, p, savevar);
501
- str_delete(savevar);
527
+ if (savevar) {
528
+ write_restorecursor(g, p, savevar);
529
+ str_delete(savevar);
530
+ }
502
531
  }
503
532
 
504
- static void generate_GO(struct generator * g, struct node * p, int style) {
533
+ static void generate_next(struct generator * g, struct node * p) {
534
+ write_comment(g, p);
535
+ write_check_limit(g, p);
536
+ write_inc_cursor(g, p);
537
+ }
505
538
 
506
- int end_unreachable = false;
507
- struct str * savevar = vars_newname(g);
508
- int keep_c = style == 1 || repeat_restore(g, p->left);
539
+ static void generate_GO_grouping(struct generator * g, struct node * p, int is_goto, int complement) {
540
+ write_comment(g, p);
541
+
542
+ struct grouping * q = p->name->grouping;
543
+ g->S[0] = p->mode == m_forward ? "" : "_b";
544
+ g->S[1] = complement ? "in" : "out";
545
+ g->V[0] = p->name;
546
+ g->I[0] = q->smallest_ch;
547
+ g->I[1] = q->largest_ch;
548
+ write_failure_if(g, "!env.go_~S1_grouping~S0(~W0, ~I0, ~I1)", p);
549
+ if (!is_goto) {
550
+ write_string(g, p->mode == m_forward ? "env.next_char();" : "env.previous_char();");
551
+ }
552
+ }
553
+
554
+ static void generate_GO(struct generator * g, struct node * p, int style) {
555
+ write_comment(g, p);
509
556
 
510
557
  int a0 = g->failure_label;
511
558
  struct str * a1 = str_copy(g->failure_str);
512
559
 
560
+ int end_unreachable = false;
561
+
513
562
  int golab = new_label(g);
514
563
  g->I[0] = golab;
515
- write_comment(g, p);
516
564
  w(g, "~M'golab~I0: loop {~N~+");
517
- if (keep_c) write_savecursor(g, p, savevar);
565
+
566
+ struct str * savevar = NULL;
567
+ if (style == 1 || repeat_restore(g, p->left)) {
568
+ savevar = vars_newname(g);
569
+ write_savecursor(g, p, savevar);
570
+ }
518
571
 
519
572
  g->failure_label = new_label(g);
520
573
  str_clear(g->failure_str);
@@ -533,7 +586,10 @@ static void generate_GO(struct generator * g, struct node * p, int style) {
533
586
  }
534
587
  g->unreachable = false;
535
588
  w(g, "~-~M}~N");
536
- if (keep_c) write_restorecursor(g, p, savevar);
589
+ if (savevar) {
590
+ write_restorecursor(g, p, savevar);
591
+ str_delete(savevar);
592
+ }
537
593
 
538
594
  g->failure_label = a0;
539
595
  str_delete(g->failure_str);
@@ -542,13 +598,10 @@ static void generate_GO(struct generator * g, struct node * p, int style) {
542
598
  write_check_limit(g, p);
543
599
  write_inc_cursor(g, p);
544
600
  write_block_end(g);
545
-
546
- str_delete(savevar);
547
601
  g->unreachable = end_unreachable;
548
602
  }
549
603
 
550
604
  static void generate_loop(struct generator * g, struct node * p) {
551
-
552
605
  struct str * loopvar = vars_newname(g);
553
606
  write_comment(g, p);
554
607
  w(g, "~Mfor _ in 0..");
@@ -563,14 +616,15 @@ static void generate_loop(struct generator * g, struct node * p) {
563
616
  }
564
617
 
565
618
  static void generate_repeat_or_atleast(struct generator * g, struct node * p, struct str * loopvar) {
566
-
567
- struct str * savevar = vars_newname(g);
568
- int keep_c = repeat_restore(g, p->left);
569
619
  int replab = new_label(g);
570
620
  g->I[0] = replab;
571
621
  writef(g, "~M'replab~I0: loop{~N~+", p);
572
622
 
573
- if (keep_c) write_savecursor(g, p, savevar);
623
+ struct str * savevar = NULL;
624
+ if (repeat_restore(g, p->left)) {
625
+ savevar = vars_newname(g);
626
+ write_savecursor(g, p, savevar);
627
+ }
574
628
 
575
629
  g->failure_label = new_label(g);
576
630
  str_clear(g->failure_str);
@@ -579,7 +633,7 @@ static void generate_repeat_or_atleast(struct generator * g, struct node * p, st
579
633
  generate(g, p->left);
580
634
 
581
635
  if (!g->unreachable) {
582
- if (loopvar != 0) {
636
+ if (loopvar != NULL) {
583
637
  g->B[0] = str_data(loopvar);
584
638
  w(g, "~M~B0 -= 1;~N");
585
639
  }
@@ -590,11 +644,13 @@ static void generate_repeat_or_atleast(struct generator * g, struct node * p, st
590
644
  w(g, "~-~M}~N");
591
645
  g->unreachable = false;
592
646
 
593
- if (keep_c) write_restorecursor(g, p, savevar);
647
+ if (savevar) {
648
+ write_restorecursor(g, p, savevar);
649
+ str_delete(savevar);
650
+ }
594
651
 
595
652
  g->I[0] = replab;
596
653
  w(g, "~Mbreak 'replab~I0;~N~-~M}~N");
597
- str_delete(savevar);
598
654
  }
599
655
 
600
656
  static void generate_repeat(struct generator * g, struct node * p) {
@@ -603,8 +659,8 @@ static void generate_repeat(struct generator * g, struct node * p) {
603
659
  }
604
660
 
605
661
  static void generate_atleast(struct generator * g, struct node * p) {
606
-
607
662
  struct str * loopvar = vars_newname(g);
663
+
608
664
  write_comment(g, p);
609
665
  g->B[0] = str_data(loopvar);
610
666
  w(g, "~Mlet mut ~B0 = ");
@@ -626,14 +682,12 @@ static void generate_atleast(struct generator * g, struct node * p) {
626
682
  }
627
683
 
628
684
  static void generate_setmark(struct generator * g, struct node * p) {
629
-
630
685
  write_comment(g, p);
631
686
  g->V[0] = p->name;
632
687
  writef(g, "~M~V0 = env.cursor;~N", p);
633
688
  }
634
689
 
635
690
  static void generate_tomark(struct generator * g, struct node * p) {
636
-
637
691
  write_comment(g, p);
638
692
  g->S[0] = p->mode == m_forward ? ">" : "<";
639
693
 
@@ -647,10 +701,8 @@ static void generate_tomark(struct generator * g, struct node * p) {
647
701
  }
648
702
 
649
703
  static void generate_atmark(struct generator * g, struct node * p) {
650
-
651
704
  write_comment(g, p);
652
- w(g, "~Mif env.cursor != "); generate_AE(g, p->AE);
653
- writef(g, " ", p);
705
+ w(g, "~Mif env.cursor != "); generate_AE(g, p->AE); writef(g, " ", p);
654
706
  write_block_start(g);
655
707
  write_failure(g);
656
708
  write_block_end(g);
@@ -674,30 +726,19 @@ static void generate_hop(struct generator * g, struct node * p) {
674
726
  }
675
727
 
676
728
  static void generate_delete(struct generator * g, struct node * p) {
677
-
678
729
  write_comment(g, p);
679
730
  writef(g, "~Mif !env.slice_del() {~N"
680
731
  "~+~Mreturn false;~N~-"
681
732
  "~M}~N", p);
682
733
  }
683
734
 
684
-
685
- static void generate_next(struct generator * g, struct node * p) {
686
-
687
- write_comment(g, p);
688
- write_check_limit(g, p);
689
- write_inc_cursor(g, p);
690
- }
691
-
692
735
  static void generate_tolimit(struct generator * g, struct node * p) {
693
-
694
736
  write_comment(g, p);
695
737
  g->S[0] = p->mode == m_forward ? "env.limit" : "env.limit_backward";
696
738
  writef(g, "~Menv.cursor = ~S0;~N", p);
697
739
  }
698
740
 
699
741
  static void generate_atlimit(struct generator * g, struct node * p) {
700
-
701
742
  write_comment(g, p);
702
743
  g->S[0] = p->mode == m_forward ? "env.limit" : "env.limit_backward";
703
744
  g->S[1] = p->mode == m_forward ? "<" : ">";
@@ -705,28 +746,24 @@ static void generate_atlimit(struct generator * g, struct node * p) {
705
746
  }
706
747
 
707
748
  static void generate_leftslice(struct generator * g, struct node * p) {
708
-
709
749
  write_comment(g, p);
710
750
  g->S[0] = p->mode == m_forward ? "env.bra" : "env.ket";
711
751
  writef(g, "~M~S0 = env.cursor;~N", p);
712
752
  }
713
753
 
714
754
  static void generate_rightslice(struct generator * g, struct node * p) {
715
-
716
755
  write_comment(g, p);
717
756
  g->S[0] = p->mode == m_forward ? "env.ket" : "env.bra";
718
757
  writef(g, "~M~S0 = env.cursor;~N", p);
719
758
  }
720
759
 
721
760
  static void generate_assignto(struct generator * g, struct node * p) {
722
-
723
761
  write_comment(g, p);
724
762
  g->V[0] = p->name;
725
763
  writef(g, "~M~V0 = env.assign_to();~N", p);
726
764
  }
727
765
 
728
766
  static void generate_sliceto(struct generator * g, struct node * p) {
729
-
730
767
  write_comment(g, p);
731
768
  g->V[0] = p->name;
732
769
  writef(g, "~M~V0 = env.slice_to();~N"
@@ -735,13 +772,12 @@ static void generate_sliceto(struct generator * g, struct node * p) {
735
772
  }
736
773
 
737
774
  static void generate_address(struct generator * g, struct node * p) {
738
-
739
775
  /* If we deal with a string variable which is of type String we need to
740
776
  * pass it by reference not by value. Literalstrings on the other hand are
741
777
  * of type &'static str so we can pass them by value.
742
778
  */
743
779
  symbol * b = p->literalstring;
744
- if (b != 0) {
780
+ if (b != NULL) {
745
781
  write_literal_string(g, b);
746
782
  } else {
747
783
  write_char(g, '&');
@@ -750,7 +786,6 @@ static void generate_address(struct generator * g, struct node * p) {
750
786
  }
751
787
 
752
788
  static void generate_insert(struct generator * g, struct node * p, int style) {
753
-
754
789
  int keep_c = style == c_attach;
755
790
  write_comment(g, p);
756
791
  if (p->mode == m_backward) keep_c = !keep_c;
@@ -763,7 +798,6 @@ static void generate_insert(struct generator * g, struct node * p, int style) {
763
798
  }
764
799
 
765
800
  static void generate_assignfrom(struct generator * g, struct node * p) {
766
-
767
801
  int keep_c = p->mode == m_forward; /* like 'attach' */
768
802
 
769
803
  write_comment(g, p);
@@ -782,9 +816,7 @@ static void generate_assignfrom(struct generator * g, struct node * p) {
782
816
  if (keep_c) w(g, "~Menv.cursor = c;~N");
783
817
  }
784
818
 
785
-
786
819
  static void generate_slicefrom(struct generator * g, struct node * p) {
787
-
788
820
  write_comment(g, p);
789
821
  w(g, "~Mif !env.slice_from(");
790
822
  generate_address(g, p);
@@ -793,7 +825,6 @@ static void generate_slicefrom(struct generator * g, struct node * p) {
793
825
  }
794
826
 
795
827
  static void generate_setlimit(struct generator * g, struct node * p) {
796
- struct str * savevar = vars_newname(g);
797
828
  struct str * varname = vars_newname(g);
798
829
  write_comment(g, p);
799
830
  if (p->left && p->left->type == c_tomark) {
@@ -806,6 +837,7 @@ static void generate_setlimit(struct generator * g, struct node * p) {
806
837
  * restore c.
807
838
  */
808
839
  struct node * q = p->left;
840
+ write_comment(g, q);
809
841
  g->S[0] = q->mode == m_forward ? ">" : "<";
810
842
  w(g, "~Mif env.cursor ~S0 "); generate_AE(g, q->AE); w(g, " ");
811
843
  write_block_start(g);
@@ -821,7 +853,8 @@ static void generate_setlimit(struct generator * g, struct node * p) {
821
853
  w(g, "~Mlet ~B0 = env.limit_backward;~N");
822
854
  w(g, "~Menv.limit_backward = ");
823
855
  }
824
- generate_AE(g, q->AE); writef(g, ";~N", q);
856
+ generate_AE(g, q->AE);
857
+ writef(g, ";~N", q);
825
858
 
826
859
  if (p->mode == m_forward) {
827
860
  str_assign(g->failure_str, "env.limit += ");
@@ -833,7 +866,9 @@ static void generate_setlimit(struct generator * g, struct node * p) {
833
866
  str_append_string(g->failure_str, ";");
834
867
  }
835
868
  } else {
869
+ struct str * savevar = vars_newname(g);
836
870
  write_savecursor(g, p, savevar);
871
+
837
872
  generate(g, p->left);
838
873
 
839
874
  if (!g->unreachable) {
@@ -857,6 +892,7 @@ static void generate_setlimit(struct generator * g, struct node * p) {
857
892
  str_append_string(g->failure_str, ";");
858
893
  }
859
894
  }
895
+ str_delete(savevar);
860
896
  }
861
897
 
862
898
  if (!g->unreachable) {
@@ -869,17 +905,16 @@ static void generate_setlimit(struct generator * g, struct node * p) {
869
905
  }
870
906
  }
871
907
  str_delete(varname);
872
- str_delete(savevar);
873
908
  }
874
909
 
875
910
  /* dollar sets snowball up to operate on a string variable as if it were the
876
911
  * current string */
877
912
  static void generate_dollar(struct generator * g, struct node * p) {
878
-
879
- struct str * savevar_env = vars_newname(g);
880
913
  write_comment(g, p);
914
+
915
+ struct str * savevar = vars_newname(g);
881
916
  g->V[0] = p->name;
882
- g->B[0] = str_data(savevar_env);
917
+ g->B[0] = str_data(savevar);
883
918
  writef(g, "~Mlet ~B0 = env.clone();~N"
884
919
  "~Menv.set_current_s(~V0.clone());~N"
885
920
  "~Menv.cursor = 0;~N"
@@ -887,45 +922,72 @@ static void generate_dollar(struct generator * g, struct node * p) {
887
922
  generate(g, p->left);
888
923
  if (!g->unreachable) {
889
924
  g->V[0] = p->name;
890
- g->B[0] = str_data(savevar_env);
925
+ g->B[0] = str_data(savevar);
891
926
  /* Update string variable. */
892
927
  w(g, "~M~V0 = env.current.clone().into_owned();~N");
893
928
  /* Reset env */
894
929
  w(g, "~M*env = ~B0;~N");
895
930
  }
896
- str_delete(savevar_env);
931
+ str_delete(savevar);
897
932
  }
898
933
 
899
- static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
900
-
934
+ static void generate_integer_assign(struct generator * g, struct node * p, const char * s) {
935
+ write_comment(g, p);
901
936
  g->V[0] = p->name;
902
937
  g->S[0] = s;
903
938
  w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); w(g, ";~N");
904
939
  }
905
940
 
906
- static void generate_integer_test(struct generator * g, struct node * p, char * s) {
907
-
908
- w(g, "~Mif !(");
941
+ static void generate_integer_test(struct generator * g, struct node * p) {
942
+ write_comment(g, p);
943
+ int relop = p->type;
944
+ int optimise_to_return = (g->failure_label == x_return && p->right && p->right->type == c_functionend);
945
+ if (optimise_to_return) {
946
+ w(g, "~Mreturn ");
947
+ p->right = NULL;
948
+ } else {
949
+ w(g, "~Mif ");
950
+ // We want the inverse of the snowball test here.
951
+ relop ^= 1;
952
+ }
909
953
  generate_AE(g, p->left);
910
- write_char(g, ' ');
911
- write_string(g, s);
912
- write_char(g, ' ');
954
+ // Relational operators are the same as C.
955
+ write_c_relop(g, relop);
913
956
  generate_AE(g, p->AE);
914
- w(g, ")");
915
- write_block_start(g);
916
- write_failure(g);
917
- write_block_end(g);
918
- g->unreachable = false;
957
+ if (optimise_to_return) {
958
+ w(g, "~N");
959
+ } else {
960
+ write_block_start(g);
961
+ write_failure(g);
962
+ write_block_end(g);
963
+ g->unreachable = false;
964
+ }
919
965
  }
920
966
 
921
967
  static void generate_call(struct generator * g, struct node * p) {
922
-
968
+ int signals = check_possible_signals_list(g, p->name->definition, c_define, 0);
923
969
  write_comment(g, p);
924
970
  g->V[0] = p->name;
925
- write_failure_if(g, "!~W0(env, context)", p);
971
+ if (g->failure_label == x_return &&
972
+ (signals == 0 || (p->right && p->right->type == c_functionend))) {
973
+ /* Always fails or tail call. */
974
+ writef(g, "~Mreturn ~W0(env, context);~N", p);
975
+ return;
976
+ }
977
+ if (signals == 1) {
978
+ /* Always succeeds. */
979
+ writef(g, "~M~W0(env, context);~N", p);
980
+ } else if (signals == 0) {
981
+ /* Always fails. */
982
+ writef(g, "~M~W0(env, context);~N", p);
983
+ write_failure(g);
984
+ } else {
985
+ write_failure_if(g, "!~W0(env, context)", p);
986
+ }
926
987
  }
927
988
 
928
989
  static void generate_grouping(struct generator * g, struct node * p, int complement) {
990
+ write_comment(g, p);
929
991
 
930
992
  struct grouping * q = p->name->grouping;
931
993
  g->S[0] = p->mode == m_forward ? "" : "_b";
@@ -937,7 +999,6 @@ static void generate_grouping(struct generator * g, struct node * p, int complem
937
999
  }
938
1000
 
939
1001
  static void generate_namedstring(struct generator * g, struct node * p) {
940
-
941
1002
  write_comment(g, p);
942
1003
  g->S[0] = p->mode == m_forward ? "" : "_b";
943
1004
  g->V[0] = p->name;
@@ -953,10 +1014,8 @@ static void generate_literalstring(struct generator * g, struct node * p) {
953
1014
  }
954
1015
 
955
1016
  static void generate_setup_context(struct generator * g) {
956
-
957
- struct name * q;
958
1017
  w(g, "~Mlet mut context = &mut Context {~+~N");
959
- for (q = g->analyser->names; q; q = q->next) {
1018
+ for (struct name * q = g->analyser->names; q; q = q->next) {
960
1019
  g->V[0] = q;
961
1020
  switch (q->type) {
962
1021
  case t_string:
@@ -975,18 +1034,22 @@ static void generate_setup_context(struct generator * g) {
975
1034
 
976
1035
  static void generate_define(struct generator * g, struct node * p) {
977
1036
  struct name * q = p->name;
1037
+ if (q->type == t_routine && !q->used) return;
978
1038
 
979
- struct str * saved_output = g->outbuf;
1039
+ write_newline(g);
1040
+ write_comment(g, p);
980
1041
 
981
1042
  g->V[0] = q;
982
-
983
1043
  if (q->type == t_routine) {
984
- w(g, "~N~Mfn ~W0(env: &mut SnowballEnv, context: &mut Context) -> bool {~+~N");
1044
+ w(g, "~Mfn ~W0(env: &mut SnowballEnv, context: &mut Context) -> bool {~+~N");
985
1045
  } else {
986
- w(g, "~N~Mpub fn ~W0(env: &mut SnowballEnv) -> bool {~+~N");
1046
+ w(g, "~Mpub fn ~W0(env: &mut SnowballEnv) -> bool {~+~N");
987
1047
  generate_setup_context(g);
988
1048
  }
989
1049
  if (p->amongvar_needed) w(g, "~Mlet mut among_var;~N");
1050
+
1051
+ /* Save output. */
1052
+ struct str * saved_output = g->outbuf;
990
1053
  g->outbuf = str_new();
991
1054
 
992
1055
  g->next_label = 0;
@@ -995,8 +1058,16 @@ static void generate_define(struct generator * g, struct node * p) {
995
1058
  str_clear(g->failure_str);
996
1059
  g->failure_label = x_return;
997
1060
  g->unreachable = false;
1061
+ int signals = check_possible_signals_list(g, p->left, c_define, 0);
1062
+
1063
+ /* Generate function body. */
998
1064
  generate(g, p->left);
999
- if (!g->unreachable) w(g, "~Mreturn true;~N");
1065
+ if (p->left->right) {
1066
+ assert(p->left->right->type == c_functionend);
1067
+ if (signals) {
1068
+ generate(g, p->left->right);
1069
+ }
1070
+ }
1000
1071
  w(g, "~-~M}~N");
1001
1072
 
1002
1073
  str_append(saved_output, g->outbuf);
@@ -1004,64 +1075,175 @@ static void generate_define(struct generator * g, struct node * p) {
1004
1075
  g->outbuf = saved_output;
1005
1076
  }
1006
1077
 
1078
+ static void generate_functionend(struct generator * g, struct node * p) {
1079
+ (void)p;
1080
+ w(g, "~Mreturn true~N");
1081
+ }
1082
+
1007
1083
  static void generate_substring(struct generator * g, struct node * p) {
1084
+ write_comment(g, p);
1008
1085
 
1009
1086
  struct among * x = p->among;
1010
-
1011
- write_comment(g, p);
1087
+ int block = -1;
1088
+ unsigned int bitmap = 0;
1089
+ struct amongvec * among_cases = x->b;
1090
+ int empty_case = -1;
1091
+ int n_cases = 0;
1092
+ symbol cases[2];
1093
+ int shortest_size = x->shortest_size;
1094
+ int block_opened = 0;
1012
1095
 
1013
1096
  g->S[0] = p->mode == m_forward ? "" : "_b";
1014
1097
  g->I[0] = x->number;
1098
+ g->I[1] = x->literalstring_count;
1099
+
1100
+ /* In forward mode with non-ASCII UTF-8 characters, the first byte
1101
+ * of the string will often be the same, so instead look at the last
1102
+ * common byte position.
1103
+ *
1104
+ * In backward mode, we can't match if there are fewer characters before
1105
+ * the current position than the minimum length.
1106
+ */
1107
+ for (int c = 0; c < x->literalstring_count; ++c) {
1108
+ symbol ch;
1109
+ if (among_cases[c].size == 0) {
1110
+ empty_case = c;
1111
+ continue;
1112
+ }
1113
+ if (p->mode == m_forward) {
1114
+ ch = among_cases[c].b[shortest_size - 1];
1115
+ } else {
1116
+ ch = among_cases[c].b[among_cases[c].size - 1];
1117
+ }
1118
+ if (n_cases == 0) {
1119
+ block = ch >> 5;
1120
+ } else if (ch >> 5 != block) {
1121
+ block = -1;
1122
+ if (n_cases > 2) break;
1123
+ }
1124
+ if (block == -1) {
1125
+ if (n_cases > 0 && ch == cases[0]) continue;
1126
+ if (n_cases < 2) {
1127
+ cases[n_cases++] = ch;
1128
+ } else if (ch != cases[1]) {
1129
+ ++n_cases;
1130
+ break;
1131
+ }
1132
+ } else {
1133
+ if ((bitmap & (1u << (ch & 0x1f))) == 0) {
1134
+ bitmap |= 1u << (ch & 0x1f);
1135
+ if (n_cases < 2)
1136
+ cases[n_cases] = ch;
1137
+ ++n_cases;
1138
+ }
1139
+ }
1140
+ }
1141
+
1142
+ if (block != -1 || n_cases <= 2) {
1143
+ char buf[64];
1144
+ g->I[2] = block;
1145
+ g->I[3] = bitmap;
1146
+ g->I[4] = shortest_size - 1;
1147
+ if (p->mode == m_forward) {
1148
+ sprintf(buf, "env.current.as_bytes()[(env.cursor + %d) as usize]", shortest_size - 1);
1149
+ g->S[1] = buf;
1150
+ if (shortest_size == 1) {
1151
+ writef(g, "~Mif (env.cursor >= env.limit", p);
1152
+ } else {
1153
+ writef(g, "~Mif (env.cursor + ~I4 >= env.limit", p);
1154
+ }
1155
+ } else {
1156
+ g->S[1] = "env.current.as_bytes()[(env.cursor - 1) as usize]";
1157
+ if (shortest_size == 1) {
1158
+ writef(g, "~Mif (env.cursor <= env.limit_backward", p);
1159
+ } else {
1160
+ writef(g, "~Mif (env.cursor - ~I4 <= env.limit_backward", p);
1161
+ }
1162
+ }
1163
+ if (n_cases == 0) {
1164
+ /* We get this for the degenerate case: among ( '' )
1165
+ * This doesn't seem to be a useful construct, but it is
1166
+ * syntactically valid.
1167
+ */
1168
+ } else if (n_cases == 1) {
1169
+ g->I[4] = cases[0];
1170
+ writef(g, " || ~S1 as u8 != ~I4 as u8", p);
1171
+ } else if (n_cases == 2) {
1172
+ g->I[4] = cases[0];
1173
+ g->I[5] = cases[1];
1174
+ writef(g, " || (~S1 as u8 != ~I4 as u8 && ~S1 as u8 != ~I5 as u8)", p);
1175
+ } else {
1176
+ writef(g, " || ~S1 as u8 >> 5 != ~I2 as u8 || ((~I3 as i32 >> (~S1 as u8 & 0x1f)) & 1) == 0", p);
1177
+ }
1178
+ write_string(g, ") ");
1179
+ if (empty_case != -1) {
1180
+ /* If the among includes the empty string, it can never fail
1181
+ * so not matching the bitmap means we match the empty string.
1182
+ */
1183
+ g->I[4] = among_cases[empty_case].result;
1184
+ writef(g, "{among_var = ~I4;}~N~Melse ", p);
1185
+ write_block_start(g);
1186
+ block_opened = 1;
1187
+ } else {
1188
+ writef(g, "~f~N", p);
1189
+ }
1190
+ } else {
1191
+ #ifdef OPTIMISATION_WARNINGS
1192
+ printf("Couldn't shortcut among %d\n", x->number);
1193
+ #endif
1194
+ }
1015
1195
 
1016
- if (!x->amongvar_needed) {
1017
- write_failure_if(g, "env.find_among~S0(~A_~I0, context) == 0", p);
1196
+ if (x->amongvar_needed) {
1197
+ writef(g, "~Mamong_var = env.find_among~S0(A_~I0, context);~N", p);
1198
+ if (!x->always_matches) {
1199
+ write_failure_if(g, "among_var == 0", p);
1200
+ }
1201
+ } else if (x->always_matches) {
1202
+ writef(g, "~Menv.find_among~S0(A_~I0, context);~N", p);
1018
1203
  } else {
1019
- writef(g, "~Mamong_var = env.find_among~S0(~A_~I0, context);~N", p);
1020
- write_failure_if(g, "among_var == 0", p);
1204
+ write_failure_if(g, "env.find_among~S0(A_~I0, context) == 0", p);
1021
1205
  }
1206
+ if (block_opened) write_block_end(g);
1022
1207
  }
1023
1208
 
1024
1209
  static void generate_among(struct generator * g, struct node * p) {
1025
-
1026
1210
  struct among * x = p->among;
1027
1211
 
1028
- if (x->substring == 0) generate_substring(g, p);
1029
-
1030
- if (x->starter != 0) generate(g, x->starter);
1212
+ if (x->substring == NULL) {
1213
+ generate_substring(g, p);
1214
+ } else {
1215
+ write_comment(g, p);
1216
+ }
1031
1217
 
1032
1218
  if (x->command_count == 1 && x->nocommand_count == 0) {
1033
1219
  /* Only one outcome ("no match" already handled). */
1034
1220
  generate(g, x->commands[0]);
1035
1221
  } else if (x->command_count > 0) {
1036
- int i;
1037
- w(g, "~M");
1038
- for (i = 1; i <= x->command_count; i++) {
1222
+ w(g, "~Mmatch among_var {~N~+");
1223
+ for (int i = 1; i <= x->command_count; i++) {
1039
1224
  g->I[0] = i;
1040
- if (i > 1) w(g, " else ");
1041
- w(g, "if among_var == ~I0 {~N~+");
1225
+ w(g, "~M~I0 => {~N~+");
1042
1226
  generate(g, x->commands[i - 1]);
1043
- w(g, "~-~M}");
1227
+ w(g, "~-~M}~N");
1044
1228
  g->unreachable = false;
1045
1229
  }
1046
- w(g, "~N");
1230
+ w(g, "~M_ => ()~N");
1231
+ w(g, "~-~M}~N");
1047
1232
  }
1048
1233
  }
1049
1234
 
1050
1235
  static void generate_booltest(struct generator * g, struct node * p) {
1051
-
1052
1236
  write_comment(g, p);
1053
1237
  g->V[0] = p->name;
1054
1238
  write_failure_if(g, "!~V0", p);
1055
1239
  }
1056
1240
 
1057
1241
  static void generate_false(struct generator * g, struct node * p) {
1058
-
1059
1242
  write_comment(g, p);
1060
1243
  write_failure(g);
1061
1244
  }
1062
1245
 
1063
1246
  static void generate_debug(struct generator * g, struct node * p) {
1064
-
1065
1247
  write_comment(g, p);
1066
1248
  g->I[0] = g->debug_count++;
1067
1249
  g->I[1] = p->line_number;
@@ -1069,14 +1251,10 @@ static void generate_debug(struct generator * g, struct node * p) {
1069
1251
  }
1070
1252
 
1071
1253
  static void generate(struct generator * g, struct node * p) {
1072
-
1073
- int a0;
1074
- struct str * a1;
1075
-
1076
1254
  if (g->unreachable) return;
1077
1255
 
1078
- a0 = g->failure_label;
1079
- a1 = str_copy(g->failure_str);
1256
+ int a0 = g->failure_label;
1257
+ struct str * a1 = str_copy(g->failure_str);
1080
1258
 
1081
1259
  switch (p->type) {
1082
1260
  case c_define: generate_define(g, p); break;
@@ -1094,6 +1272,11 @@ static void generate(struct generator * g, struct node * p) {
1094
1272
  case c_do: generate_do(g, p); break;
1095
1273
  case c_goto: generate_GO(g, p, 1); break;
1096
1274
  case c_gopast: generate_GO(g, p, 0); break;
1275
+ case c_goto_grouping: generate_GO_grouping(g, p, 1, 0); break;
1276
+ case c_gopast_grouping:
1277
+ generate_GO_grouping(g, p, 0, 0); break;
1278
+ case c_goto_non: generate_GO_grouping(g, p, 1, 1); break;
1279
+ case c_gopast_non: generate_GO_grouping(g, p, 0, 1); break;
1097
1280
  case c_repeat: generate_repeat(g, p); break;
1098
1281
  case c_loop: generate_loop(g, p); break;
1099
1282
  case c_atleast: generate_atleast(g, p); break;
@@ -1120,12 +1303,14 @@ static void generate(struct generator * g, struct node * p) {
1120
1303
  case c_minusassign: generate_integer_assign(g, p, "-="); break;
1121
1304
  case c_multiplyassign:generate_integer_assign(g, p, "*="); break;
1122
1305
  case c_divideassign: generate_integer_assign(g, p, "/="); break;
1123
- case c_eq: generate_integer_test(g, p, "=="); break;
1124
- case c_ne: generate_integer_test(g, p, "!="); break;
1125
- case c_gr: generate_integer_test(g, p, ">"); break;
1126
- case c_ge: generate_integer_test(g, p, ">="); break;
1127
- case c_ls: generate_integer_test(g, p, "<"); break;
1128
- case c_le: generate_integer_test(g, p, "<="); break;
1306
+ case c_eq:
1307
+ case c_ne:
1308
+ case c_gt:
1309
+ case c_ge:
1310
+ case c_lt:
1311
+ case c_le:
1312
+ generate_integer_test(g, p);
1313
+ break;
1129
1314
  case c_call: generate_call(g, p); break;
1130
1315
  case c_grouping: generate_grouping(g, p, false); break;
1131
1316
  case c_non: generate_grouping(g, p, true); break;
@@ -1137,6 +1322,7 @@ static void generate(struct generator * g, struct node * p) {
1137
1322
  case c_false: generate_false(g, p); break;
1138
1323
  case c_true: break;
1139
1324
  case c_debug: generate_debug(g, p); break;
1325
+ case c_functionend: generate_functionend(g, p); break;
1140
1326
  default: fprintf(stderr, "%d encountered\n", p->type);
1141
1327
  exit(1);
1142
1328
  }
@@ -1151,7 +1337,6 @@ static void generate(struct generator * g, struct node * p) {
1151
1337
  /* To allow warning free compilation of generated code and */
1152
1338
  /* consistency with snowball variable namings we allow some kind of warnings here */
1153
1339
  static void generate_allow_warnings(struct generator * g) {
1154
-
1155
1340
  w(g, "#![allow(non_snake_case)]~N");
1156
1341
  w(g, "#![allow(non_upper_case_globals)]~N");
1157
1342
  w(g, "#![allow(unused_mut)]~N");
@@ -1160,7 +1345,6 @@ static void generate_allow_warnings(struct generator * g) {
1160
1345
  }
1161
1346
 
1162
1347
  static void generate_class_begin(struct generator * g) {
1163
-
1164
1348
  w(g, "use snowball::SnowballEnv;~N");
1165
1349
  if (g->analyser->among_count > 0) {
1166
1350
  w(g, "use snowball::Among;~N~N");
@@ -1168,6 +1352,7 @@ static void generate_class_begin(struct generator * g) {
1168
1352
  }
1169
1353
 
1170
1354
  static void generate_among_table(struct generator * g, struct among * x) {
1355
+ write_comment(g, x->node);
1171
1356
 
1172
1357
  struct amongvec * v = x->b;
1173
1358
 
@@ -1175,32 +1360,27 @@ static void generate_among_table(struct generator * g, struct among * x) {
1175
1360
  g->I[1] = x->literalstring_count;
1176
1361
 
1177
1362
  w(g, "~Mstatic A_~I0: &'static [Among<Context>; ~I1] = &[~N~+");
1178
- {
1179
- int i;
1180
- for (i = 0; i < x->literalstring_count; i++) {
1181
- g->I[0] = v->i;
1182
- g->I[1] = v->result;
1183
- g->L[0] = v->b;
1184
- g->S[0] = ",";
1185
-
1186
- w(g, "~MAmong(~L0, ~I0, ~I1, ");
1187
- if (v->function != 0) {
1188
- w(g, "Some(&");
1189
- write_varname(g, v->function);
1190
- w(g, ")");
1191
- } else {
1192
- w(g, "None");
1193
- }
1194
- w(g, ")~S0~N");
1195
- v++;
1363
+ for (int i = 0; i < x->literalstring_count; i++) {
1364
+ g->I[0] = v[i].i;
1365
+ g->I[1] = v[i].result;
1366
+ g->L[0] = v[i].b;
1367
+ g->S[0] = ",";
1368
+
1369
+ w(g, "~MAmong(~L0, ~I0, ~I1, ");
1370
+ if (v[i].function != NULL) {
1371
+ w(g, "Some(&");
1372
+ write_varname(g, v[i].function);
1373
+ w(g, ")");
1374
+ } else {
1375
+ w(g, "None");
1196
1376
  }
1377
+ w(g, ")~S0~N");
1197
1378
  }
1198
1379
  w(g, "~-~M];~N~N");
1199
1380
  }
1200
1381
 
1201
1382
  static void generate_amongs(struct generator * g) {
1202
- struct among * x;
1203
- for (x = g->analyser->amongs; x; x = x->next) {
1383
+ for (struct among * x = g->analyser->amongs; x; x = x->next) {
1204
1384
  generate_among_table(g, x);
1205
1385
  }
1206
1386
  }
@@ -1208,42 +1388,38 @@ static void generate_amongs(struct generator * g) {
1208
1388
  static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
1209
1389
 
1210
1390
  static void generate_grouping_table(struct generator * g, struct grouping * q) {
1211
-
1212
1391
  int range = q->largest_ch - q->smallest_ch + 1;
1213
1392
  int size = (range + 7)/ 8; /* assume 8 bits per symbol */
1214
1393
  symbol * b = q->b;
1215
1394
  symbol * map = create_b(size);
1216
- int i;
1217
- for (i = 0; i < size; i++) map[i] = 0;
1218
1395
 
1219
- for (i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
1396
+ for (int i = 0; i < size; i++) map[i] = 0;
1397
+
1398
+ for (int i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
1220
1399
 
1221
1400
  g->V[0] = q->name;
1222
1401
  g->I[0] = size;
1223
1402
  w(g, "~Mstatic ~W0: &'static [u8; ~I0] = &[");
1224
- for (i = 0; i < size; i++) {
1403
+ for (int i = 0; i < size; i++) {
1225
1404
  write_int(g, map[i]);
1226
1405
  if (i < size - 1) w(g, ", ");
1227
1406
  }
1228
1407
  w(g, "];~N~N");
1408
+
1229
1409
  lose_b(map);
1230
1410
  }
1231
1411
 
1232
1412
  static void generate_groupings(struct generator * g) {
1233
- struct grouping * q;
1234
- for (q = g->analyser->groupings; q; q = q->next) {
1413
+ for (struct grouping * q = g->analyser->groupings; q; q = q->next) {
1235
1414
  if (q->name->used)
1236
1415
  generate_grouping_table(g, q);
1237
1416
  }
1238
1417
  }
1239
1418
 
1240
-
1241
1419
  static void generate_members(struct generator * g) {
1242
-
1243
- struct name * q;
1244
1420
  w(g, "#[derive(Clone)]~N");
1245
1421
  w(g, "struct Context {~+~N");
1246
- for (q = g->analyser->names; q; q = q->next) {
1422
+ for (struct name * q = g->analyser->names; q; q = q->next) {
1247
1423
  g->V[0] = q;
1248
1424
  switch (q->type) {
1249
1425
  case t_string:
@@ -1261,17 +1437,13 @@ static void generate_members(struct generator * g) {
1261
1437
  }
1262
1438
 
1263
1439
  static void generate_methods(struct generator * g) {
1264
-
1265
- struct node * p = g->analyser->program;
1266
- while (p != 0) {
1440
+ for (struct node * p = g->analyser->program; p; p = p->right) {
1267
1441
  generate(g, p);
1268
1442
  g->unreachable = false;
1269
- p = p->right;
1270
1443
  }
1271
1444
  }
1272
1445
 
1273
1446
  extern void generate_program_rust(struct generator * g) {
1274
-
1275
1447
  g->outbuf = str_new();
1276
1448
  g->failure_str = str_new();
1277
1449