mittens 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +4 -4
  5. data/lib/mittens/version.rb +1 -1
  6. data/mittens.gemspec +1 -1
  7. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  8. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  9. data/vendor/snowball/GNUmakefile +194 -136
  10. data/vendor/snowball/NEWS +798 -3
  11. data/vendor/snowball/README.rst +50 -1
  12. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  13. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  14. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  15. data/vendor/snowball/algorithms/basque.sbl +4 -19
  16. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  17. data/vendor/snowball/algorithms/danish.sbl +1 -1
  18. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  19. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  20. data/vendor/snowball/algorithms/english.sbl +52 -37
  21. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  22. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  23. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  24. data/vendor/snowball/algorithms/french.sbl +42 -16
  25. data/vendor/snowball/algorithms/german.sbl +35 -14
  26. data/vendor/snowball/algorithms/greek.sbl +76 -76
  27. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  28. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  29. data/vendor/snowball/algorithms/italian.sbl +11 -21
  30. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  31. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  32. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  33. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  34. data/vendor/snowball/algorithms/porter.sbl +2 -2
  35. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  36. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  37. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  38. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  39. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  40. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  41. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  42. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  43. data/vendor/snowball/compiler/analyser.c +445 -192
  44. data/vendor/snowball/compiler/driver.c +109 -101
  45. data/vendor/snowball/compiler/generator.c +853 -464
  46. data/vendor/snowball/compiler/generator_ada.c +404 -366
  47. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  48. data/vendor/snowball/compiler/generator_go.c +323 -254
  49. data/vendor/snowball/compiler/generator_java.c +326 -252
  50. data/vendor/snowball/compiler/generator_js.c +362 -252
  51. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  52. data/vendor/snowball/compiler/generator_python.c +257 -240
  53. data/vendor/snowball/compiler/generator_rust.c +423 -251
  54. data/vendor/snowball/compiler/header.h +117 -71
  55. data/vendor/snowball/compiler/space.c +137 -68
  56. data/vendor/snowball/compiler/syswords.h +2 -2
  57. data/vendor/snowball/compiler/tokeniser.c +125 -107
  58. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  59. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  60. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  61. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  62. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  63. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  64. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  65. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  66. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  67. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  68. data/vendor/snowball/examples/stemwords.c +12 -12
  69. data/vendor/snowball/go/env.go +107 -31
  70. data/vendor/snowball/go/util.go +0 -4
  71. data/vendor/snowball/include/libstemmer.h +4 -0
  72. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  73. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  74. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  75. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  76. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  77. data/vendor/snowball/javascript/stemwords.js +3 -6
  78. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  79. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  80. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  81. data/vendor/snowball/libstemmer/modules.txt +13 -10
  82. data/vendor/snowball/libstemmer/test.c +1 -1
  83. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  84. data/vendor/snowball/pascal/generate.pl +13 -13
  85. data/vendor/snowball/python/create_init.py +4 -1
  86. data/vendor/snowball/python/setup.cfg +0 -3
  87. data/vendor/snowball/python/setup.py +8 -3
  88. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  89. data/vendor/snowball/python/stemwords.py +8 -12
  90. data/vendor/snowball/runtime/api.c +10 -5
  91. data/vendor/snowball/runtime/header.h +10 -9
  92. data/vendor/snowball/runtime/utilities.c +9 -9
  93. data/vendor/snowball/rust/build.rs +1 -1
  94. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  95. data/vendor/snowball/tests/stemtest.c +7 -4
  96. metadata +8 -12
  97. data/vendor/snowball/.travis.yml +0 -112
  98. data/vendor/snowball/algorithms/german2.sbl +0 -145
  99. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  100. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -1,4 +1,4 @@
1
-
1
+ #include <assert.h>
2
2
  #include <stdlib.h> /* for exit */
3
3
  #include <string.h> /* for strlen */
4
4
  #include <stdio.h> /* for fprintf etc */
@@ -17,7 +17,6 @@ static int new_label(struct generator * g) {
17
17
  }
18
18
 
19
19
  static struct str * vars_newname(struct generator * g) {
20
-
21
20
  struct str * output;
22
21
  g->var_number++;
23
22
  output = str_new();
@@ -26,11 +25,9 @@ static struct str * vars_newname(struct generator * g) {
26
25
  return output;
27
26
  }
28
27
 
29
-
30
28
  /* Write routines for items from the syntax tree */
31
29
 
32
30
  static void write_varname(struct generator * g, struct name * p) {
33
-
34
31
  switch (p->type) {
35
32
  case t_external:
36
33
  write_char(g, '_');
@@ -45,7 +42,7 @@ static void write_varname(struct generator * g, struct name * p) {
45
42
  break;
46
43
  }
47
44
  }
48
- write_b(g, p->b);
45
+ write_s(g, p->s);
49
46
  }
50
47
 
51
48
  static void write_varref(struct generator * g, struct name * p) {
@@ -53,40 +50,43 @@ static void write_varref(struct generator * g, struct name * p) {
53
50
  write_varname(g, p);
54
51
  }
55
52
 
56
- static void write_hexdigit(struct generator * g, int n) {
57
-
58
- write_char(g, n < 10 ? n + '0' : n - 10 + 'A');
59
- }
60
-
61
- static void write_hex(struct generator * g, int ch) {
62
-
63
- write_string(g, "\\u");
64
- {
65
- int i;
66
- for (i = 12; i >= 0; i -= 4) write_hexdigit(g, ch >> i & 0xf);
67
- }
68
- }
69
-
70
53
  static void write_literal_string(struct generator * g, symbol * p) {
71
-
72
- int i;
73
54
  write_string(g, "u\"");
74
- for (i = 0; i < SIZE(p); i++) {
55
+ for (int i = 0; i < SIZE(p); i++) {
75
56
  int ch = p[i];
76
- if (32 <= ch && ch < 127) {
77
- if (ch == '\"' || ch == '\\') write_string(g, "\\");
78
- write_char(g, ch);
57
+ if (32 <= ch && ch < 0x590 && ch != 127) {
58
+ if (ch == '"' || ch == '\\') write_char(g, '\\');
59
+ // Our Python generator uses ENC_WIDECHARS so we need to convert.
60
+ write_wchar_as_utf8(g, ch);
79
61
  } else {
80
- write_hex(g, ch);
62
+ // Use escapes for anything over 0x590 as a crude way to avoid
63
+ // LTR characters affecting the rendering of source character
64
+ // order in confusing ways.
65
+ write_string(g, "\\u");
66
+ write_hex4(g, ch);
81
67
  }
82
68
  }
83
69
  write_string(g, "\"");
84
70
  }
85
71
 
86
- static void write_margin(struct generator * g) {
72
+ static void write_literal_char(struct generator * g, symbol ch) {
73
+ write_string(g, "u\"");
74
+ if (32 <= ch && ch < 0x590 && ch != 127) {
75
+ if (ch == '"' || ch == '\\') write_char(g, '\\');
76
+ // Python uses ENC_WIDECHARS so we need to convert.
77
+ write_wchar_as_utf8(g, ch);
78
+ } else {
79
+ // Use escapes for anything over 0x590 as a crude way to avoid
80
+ // LTR characters affecting the rendering of source character
81
+ // order in confusing ways.
82
+ write_string(g, "\\u");
83
+ write_hex4(g, ch);
84
+ }
85
+ write_string(g, "\"");
86
+ }
87
87
 
88
- int i;
89
- for (i = 0; i < g->margin; i++) write_string(g, " ");
88
+ static void write_margin(struct generator * g) {
89
+ for (int i = 0; i < g->margin; i++) write_string(g, " ");
90
90
  }
91
91
 
92
92
  static void write_comment(struct generator * g, struct node * p) {
@@ -98,18 +98,15 @@ static void write_comment(struct generator * g, struct node * p) {
98
98
  }
99
99
 
100
100
  static void write_block_start(struct generator * g) {
101
-
102
101
  w(g, "~+~N");
103
102
  }
104
103
 
105
- static void write_block_end(struct generator * g) /* block end */ {
106
-
104
+ static void write_block_end(struct generator * g) {
107
105
  w(g, "~-");
108
106
  }
109
107
 
110
108
  static void write_savecursor(struct generator * g, struct node * p,
111
109
  struct str * savevar) {
112
-
113
110
  g->B[0] = str_data(savevar);
114
111
  g->S[1] = "";
115
112
  if (p->mode != m_forward) g->S[1] = "self.limit - ";
@@ -117,7 +114,6 @@ static void write_savecursor(struct generator * g, struct node * p,
117
114
  }
118
115
 
119
116
  static void restore_string(struct node * p, struct str * out, struct str * savevar) {
120
-
121
117
  str_clear(out);
122
118
  str_append_string(out, "self.cursor = ");
123
119
  if (p->mode != m_forward) str_append_string(out, "self.limit - ");
@@ -126,7 +122,6 @@ static void restore_string(struct node * p, struct str * out, struct str * savev
126
122
 
127
123
  static void write_restorecursor(struct generator * g, struct node * p,
128
124
  struct str * savevar) {
129
-
130
125
  struct str * temp = str_new();
131
126
  write_margin(g);
132
127
  restore_string(p, temp, savevar);
@@ -136,14 +131,12 @@ static void write_restorecursor(struct generator * g, struct node * p,
136
131
  }
137
132
 
138
133
  static void write_inc_cursor(struct generator * g, struct node * p) {
139
-
140
134
  write_margin(g);
141
135
  write_string(g, p->mode == m_forward ? "self.cursor += 1" : "self.cursor -= 1");
142
136
  write_newline(g);
143
137
  }
144
138
 
145
139
  static void wsetlab_begin(struct generator * g) {
146
-
147
140
  w(g, "~Mtry:~N~+");
148
141
  }
149
142
 
@@ -158,7 +151,6 @@ static void wgotol(struct generator * g, int n) {
158
151
  }
159
152
 
160
153
  static void write_failure(struct generator * g) {
161
-
162
154
  if (str_len(g->failure_str) != 0) {
163
155
  write_margin(g);
164
156
  write_str(g, g->failure_str);
@@ -176,8 +168,7 @@ static void write_failure(struct generator * g) {
176
168
  }
177
169
  }
178
170
 
179
- static void write_failure_if(struct generator * g, char * s, struct node * p) {
180
-
171
+ static void write_failure_if(struct generator * g, const char * s, struct node * p) {
181
172
  writef(g, "~Mif ", p);
182
173
  writef(g, s, p);
183
174
  writef(g, ":", p);
@@ -189,7 +180,6 @@ static void write_failure_if(struct generator * g, char * s, struct node * p) {
189
180
 
190
181
  /* if at limit fail */
191
182
  static void write_check_limit(struct generator * g, struct node * p) {
192
-
193
183
  if (p->mode == m_forward) {
194
184
  write_failure_if(g, "self.cursor >= self.limit", p);
195
185
  } else {
@@ -199,18 +189,18 @@ static void write_check_limit(struct generator * g, struct node * p) {
199
189
 
200
190
  /* Formatted write. */
201
191
  static void writef(struct generator * g, const char * input, struct node * p) {
192
+ (void)p;
202
193
  int i = 0;
203
- int l = strlen(input);
204
194
 
205
- while (i < l) {
195
+ while (input[i]) {
206
196
  int ch = input[i++];
207
197
  if (ch != '~') {
208
198
  write_char(g, ch);
209
199
  continue;
210
200
  }
211
- switch (input[i++]) {
212
- default: write_char(g, input[i - 1]); continue;
213
- case 'C': write_comment(g, p); continue;
201
+ ch = input[i++];
202
+ switch (ch) {
203
+ case '~': write_char(g, '~'); continue;
214
204
  case 'f': write_block_start(g);
215
205
  write_failure(g);
216
206
  g->unreachable = false;
@@ -220,21 +210,65 @@ static void writef(struct generator * g, const char * input, struct node * p) {
220
210
  case 'N': write_newline(g); continue;
221
211
  case '{': write_block_start(g); continue;
222
212
  case '}': write_block_end(g); continue;
223
- case 'S': write_string(g, g->S[input[i++] - '0']); continue;
224
- case 'B': write_b(g, g->B[input[i++] - '0']); continue;
225
- case 'I': write_int(g, g->I[input[i++] - '0']); continue;
226
- case 'V': write_varref(g, g->V[input[i++] - '0']); continue;
227
- case 'W': write_varname(g, g->V[input[i++] - '0']); continue;
228
- case 'L': write_literal_string(g, g->L[input[i++] - '0']); continue;
213
+ case 'S': {
214
+ int j = input[i++] - '0';
215
+ if (j < 0 || j > (int)(sizeof(g->S) / sizeof(g->S[0]))) {
216
+ printf("Invalid escape sequence ~%c%c in writef(g, \"%s\", p)\n",
217
+ ch, input[i - 1], input);
218
+ exit(1);
219
+ }
220
+ write_string(g, g->S[j]);
221
+ continue;
222
+ }
223
+ case 'B': {
224
+ int j = input[i++] - '0';
225
+ if (j < 0 || j > (int)(sizeof(g->B) / sizeof(g->B[0])))
226
+ goto invalid_escape2;
227
+ write_s(g, g->B[j]);
228
+ continue;
229
+ }
230
+ case 'I': {
231
+ int j = input[i++] - '0';
232
+ if (j < 0 || j > (int)(sizeof(g->I) / sizeof(g->I[0])))
233
+ goto invalid_escape2;
234
+ write_int(g, g->I[j]);
235
+ continue;
236
+ }
237
+ case 'V':
238
+ case 'W': {
239
+ int j = input[i++] - '0';
240
+ if (j < 0 || j > (int)(sizeof(g->V) / sizeof(g->V[0])))
241
+ goto invalid_escape2;
242
+ if (ch == 'V')
243
+ write_varref(g, g->V[j]);
244
+ else
245
+ write_varname(g, g->V[j]);
246
+ continue;
247
+ }
248
+ case 'L': {
249
+ int j = input[i++] - '0';
250
+ if (j < 0 || j > (int)(sizeof(g->L) / sizeof(g->L[0])))
251
+ goto invalid_escape2;
252
+ write_literal_string(g, g->L[j]);
253
+ continue;
254
+ }
229
255
  case '+': g->margin++; continue;
230
256
  case '-': g->margin--; continue;
231
257
  case 'n': write_string(g, g->options->name); continue;
258
+ default:
259
+ printf("Invalid escape sequence ~%c in writef(g, \"%s\", p)\n",
260
+ ch, input);
261
+ exit(1);
262
+ invalid_escape2:
263
+ printf("Invalid escape sequence ~%c%c in writef(g, \"%s\", p)\n",
264
+ ch, input[i - 1], input);
265
+ exit(1);
232
266
  }
233
267
  }
234
268
  }
235
269
 
236
270
  static void w(struct generator * g, const char * s) {
237
- writef(g, s, 0);
271
+ writef(g, s, NULL);
238
272
  }
239
273
 
240
274
  static void generate_AE(struct generator * g, struct node * p) {
@@ -289,7 +323,6 @@ static void generate_AE(struct generator * g, struct node * p) {
289
323
  }
290
324
 
291
325
  static void generate_bra(struct generator * g, struct node * p) {
292
-
293
326
  write_comment(g, p);
294
327
  p = p->left;
295
328
  while (p) {
@@ -299,7 +332,6 @@ static void generate_bra(struct generator * g, struct node * p) {
299
332
  }
300
333
 
301
334
  static void generate_and(struct generator * g, struct node * p) {
302
-
303
335
  struct str * savevar = vars_newname(g);
304
336
  int keep_c = K_needed(g, p->left);
305
337
 
@@ -311,14 +343,13 @@ static void generate_and(struct generator * g, struct node * p) {
311
343
  while (p) {
312
344
  generate(g, p);
313
345
  if (g->unreachable) break;
314
- if (keep_c && p->right != 0) write_restorecursor(g, p, savevar);
346
+ if (keep_c && p->right != NULL) write_restorecursor(g, p, savevar);
315
347
  p = p->right;
316
348
  }
317
349
  str_delete(savevar);
318
350
  }
319
351
 
320
352
  static void generate_or(struct generator * g, struct node * p) {
321
-
322
353
  struct str * savevar = vars_newname(g);
323
354
  int keep_c = K_needed(g, p->left);
324
355
 
@@ -336,13 +367,13 @@ static void generate_or(struct generator * g, struct node * p) {
336
367
  p = p->left;
337
368
  str_clear(g->failure_str);
338
369
 
339
- if (p == 0) {
340
- /* p should never be 0 after an or: there should be at least two
370
+ if (p == NULL) {
371
+ /* p should never be NULL after an or: there should be at least two
341
372
  * sub nodes. */
342
373
  fprintf(stderr, "Error: \"or\" node without children nodes.");
343
374
  exit(1);
344
375
  }
345
- while (p->right != 0) {
376
+ while (p->right != NULL) {
346
377
  int label = new_label(g);
347
378
  g->failure_label = label;
348
379
  wsetlab_begin(g);
@@ -370,7 +401,6 @@ static void generate_or(struct generator * g, struct node * p) {
370
401
  }
371
402
 
372
403
  static void generate_backwards(struct generator * g, struct node * p) {
373
-
374
404
  write_comment(g, p);
375
405
  writef(g, "~Mself.limit_backward = self.cursor~N"
376
406
  "~Mself.cursor = self.limit~N", p);
@@ -380,7 +410,6 @@ static void generate_backwards(struct generator * g, struct node * p) {
380
410
 
381
411
 
382
412
  static void generate_not(struct generator * g, struct node * p) {
383
-
384
413
  struct str * savevar = vars_newname(g);
385
414
  int keep_c = K_needed(g, p->left);
386
415
 
@@ -414,7 +443,6 @@ static void generate_not(struct generator * g, struct node * p) {
414
443
 
415
444
 
416
445
  static void generate_try(struct generator * g, struct node * p) {
417
-
418
446
  struct str * savevar = vars_newname(g);
419
447
  int keep_c = K_needed(g, p->left);
420
448
  int label = new_label(g);
@@ -436,21 +464,18 @@ static void generate_try(struct generator * g, struct node * p) {
436
464
  }
437
465
 
438
466
  static void generate_set(struct generator * g, struct node * p) {
439
-
440
467
  write_comment(g, p);
441
468
  g->V[0] = p->name;
442
469
  writef(g, "~M~V0 = True~N", p);
443
470
  }
444
471
 
445
472
  static void generate_unset(struct generator * g, struct node * p) {
446
-
447
473
  write_comment(g, p);
448
474
  g->V[0] = p->name;
449
475
  writef(g, "~M~V0 = False~N", p);
450
476
  }
451
477
 
452
478
  static void generate_fail(struct generator * g, struct node * p) {
453
-
454
479
  write_comment(g, p);
455
480
  generate(g, p->left);
456
481
  if (!g->unreachable) write_failure(g);
@@ -459,7 +484,6 @@ static void generate_fail(struct generator * g, struct node * p) {
459
484
  /* generate_test() also implements 'reverse' */
460
485
 
461
486
  static void generate_test(struct generator * g, struct node * p) {
462
-
463
487
  struct str * savevar = vars_newname(g);
464
488
  int keep_c = K_needed(g, p->left);
465
489
 
@@ -480,7 +504,6 @@ static void generate_test(struct generator * g, struct node * p) {
480
504
  }
481
505
 
482
506
  static void generate_do(struct generator * g, struct node * p) {
483
-
484
507
  struct str * savevar = vars_newname(g);
485
508
  int keep_c = K_needed(g, p->left);
486
509
  write_comment(g, p);
@@ -506,17 +529,19 @@ static void generate_do(struct generator * g, struct node * p) {
506
529
  str_delete(savevar);
507
530
  }
508
531
 
509
- static void generate_GO_grouping(struct generator * g, struct node * p, int is_goto, int complement) {
532
+ static void generate_next(struct generator * g, struct node * p) {
533
+ write_comment(g, p);
534
+ write_check_limit(g, p);
535
+ write_inc_cursor(g, p);
536
+ }
510
537
 
511
- struct grouping * q = p->name->grouping;
538
+ static void generate_GO_grouping(struct generator * g, struct node * p, int is_goto, int complement) {
512
539
  write_comment(g, p);
540
+
513
541
  g->S[0] = p->mode == m_forward ? "" : "_b";
514
542
  g->S[1] = complement ? "in" : "out";
515
- g->S[2] = g->options->encoding == ENC_UTF8 ? "_U" : "";
516
543
  g->V[0] = p->name;
517
- g->I[0] = q->smallest_ch;
518
- g->I[1] = q->largest_ch;
519
- write_failure_if(g, "not self.go_~S1_grouping~S0~S2(~n.~W0, ~I0, ~I1)", p);
544
+ write_failure_if(g, "not self.go_~S1_grouping~S0(~n.~W0)", p);
520
545
  if (!is_goto) {
521
546
  if (p->mode == m_forward)
522
547
  w(g, "~Mself.cursor += 1~N");
@@ -526,42 +551,21 @@ static void generate_GO_grouping(struct generator * g, struct node * p, int is_g
526
551
  }
527
552
 
528
553
  static void generate_GO(struct generator * g, struct node * p, int style) {
529
- int end_unreachable;
530
- struct str * savevar;
531
- int keep_c;
532
-
533
- int a0;
534
- struct str * a1;
535
-
536
- int golab;
537
- int label;
538
-
539
- if (p->left->type == c_grouping || p->left->type == c_non) {
540
- /* Special case for "goto" or "gopast" when used on a grouping or an
541
- * inverted grouping - the movement of c by the matching action is
542
- * exactly what we want! */
543
- #ifdef OPTIMISATION_WARNINGS
544
- printf("Optimising %s %s\n", style ? "goto" : "gopast", p->left->type == c_non ? "non" : "grouping");
545
- #endif
546
- write_comment(g, p);
547
- generate_GO_grouping(g, p->left, style, p->left->type == c_non);
548
- return;
549
- }
554
+ write_comment(g, p);
550
555
 
551
- end_unreachable = false;
552
- savevar = vars_newname(g);
553
- keep_c = style == 1 || repeat_restore(g, p->left);
556
+ int end_unreachable = false;
557
+ struct str * savevar = vars_newname(g);
558
+ int keep_c = style == 1 || repeat_restore(g, p->left);
554
559
 
555
- a0 = g->failure_label;
556
- a1 = str_copy(g->failure_str);
560
+ int a0 = g->failure_label;
561
+ struct str * a1 = str_copy(g->failure_str);
557
562
 
558
- golab = new_label(g);
559
- write_comment(g, p);
563
+ int golab = new_label(g);
560
564
  w(g, "~Mtry:~N~+"
561
565
  "~Mwhile True:~N~+");
562
566
  if (keep_c) write_savecursor(g, p, savevar);
563
567
 
564
- label = new_label(g);
568
+ int label = new_label(g);
565
569
  g->failure_label = label;
566
570
  str_clear(g->failure_str);
567
571
  wsetlab_begin(g);
@@ -595,14 +599,24 @@ static void generate_GO(struct generator * g, struct node * p, int style) {
595
599
  }
596
600
 
597
601
  static void generate_loop(struct generator * g, struct node * p) {
598
-
599
602
  struct str * loopvar = vars_newname(g);
600
603
  write_comment(g, p);
601
604
  g->B[0] = str_data(loopvar);
602
- w(g, "~Mfor ~B0 in range (");
603
- generate_AE(g, p->AE);
604
- g->B[0] = str_data(loopvar);
605
- writef(g, ", 0, -1):~N", p);
605
+ if (p->AE->type == c_number && p->AE->number <= 4) {
606
+ // Use a tuple instead of range() for small constant numbers of
607
+ // iterations.
608
+ w(g, "~Mfor ~B0 in ");
609
+ for (int i = p->AE->number; i > 0; --i) {
610
+ w(g, "0");
611
+ if (i > 1) w(g, ", ");
612
+ }
613
+ writef(g, ":~N", p);
614
+ } else {
615
+ w(g, "~Mfor ~B0 in range(");
616
+ generate_AE(g, p->AE);
617
+ g->B[0] = str_data(loopvar);
618
+ writef(g, "):~N", p);
619
+ }
606
620
  writef(g, "~{", p);
607
621
 
608
622
  generate(g, p->left);
@@ -613,7 +627,6 @@ static void generate_loop(struct generator * g, struct node * p) {
613
627
  }
614
628
 
615
629
  static void generate_repeat_or_atleast(struct generator * g, struct node * p, struct str * loopvar) {
616
-
617
630
  struct str * savevar = vars_newname(g);
618
631
  int keep_c = repeat_restore(g, p->left);
619
632
  int label = new_label(g);
@@ -626,7 +639,7 @@ static void generate_repeat_or_atleast(struct generator * g, struct node * p, st
626
639
  generate(g, p->left);
627
640
 
628
641
  if (!g->unreachable) {
629
- if (loopvar != 0) {
642
+ if (loopvar != NULL) {
630
643
  g->B[0] = str_data(loopvar);
631
644
  w(g, "~M~B0 -= 1~N");
632
645
  }
@@ -649,7 +662,6 @@ static void generate_repeat(struct generator * g, struct node * p) {
649
662
  }
650
663
 
651
664
  static void generate_atleast(struct generator * g, struct node * p) {
652
-
653
665
  struct str * loopvar = vars_newname(g);
654
666
  write_comment(g, p);
655
667
  g->B[0] = str_data(loopvar);
@@ -672,14 +684,12 @@ static void generate_atleast(struct generator * g, struct node * p) {
672
684
  }
673
685
 
674
686
  static void generate_setmark(struct generator * g, struct node * p) {
675
-
676
687
  write_comment(g, p);
677
688
  g->V[0] = p->name;
678
689
  writef(g, "~M~V0 = self.cursor~N", p);
679
690
  }
680
691
 
681
692
  static void generate_tomark(struct generator * g, struct node * p) {
682
-
683
693
  write_comment(g, p);
684
694
  g->S[0] = p->mode == m_forward ? ">" : "<";
685
695
 
@@ -692,7 +702,6 @@ static void generate_tomark(struct generator * g, struct node * p) {
692
702
  }
693
703
 
694
704
  static void generate_atmark(struct generator * g, struct node * p) {
695
-
696
705
  write_comment(g, p);
697
706
  w(g, "~Mif self.cursor != "); generate_AE(g, p->AE); writef(g, ":", p);
698
707
  write_block_start(g);
@@ -702,7 +711,6 @@ static void generate_atmark(struct generator * g, struct node * p) {
702
711
  }
703
712
 
704
713
  static void generate_hop(struct generator * g, struct node * p) {
705
-
706
714
  write_comment(g, p);
707
715
  g->S[0] = p->mode == m_forward ? "+" : "-";
708
716
 
@@ -725,30 +733,19 @@ static void generate_hop(struct generator * g, struct node * p) {
725
733
  }
726
734
 
727
735
  static void generate_delete(struct generator * g, struct node * p) {
728
-
729
736
  write_comment(g, p);
730
737
  writef(g, "~Mif not self.slice_del():~N"
731
738
  "~+~Mreturn False~N~-"
732
739
  "~N", p);
733
740
  }
734
741
 
735
-
736
- static void generate_next(struct generator * g, struct node * p) {
737
-
738
- write_comment(g, p);
739
- write_check_limit(g, p);
740
- write_inc_cursor(g, p);
741
- }
742
-
743
742
  static void generate_tolimit(struct generator * g, struct node * p) {
744
-
745
743
  write_comment(g, p);
746
744
  g->S[0] = p->mode == m_forward ? "self.limit" : "self.limit_backward";
747
745
  writef(g, "~Mself.cursor = ~S0~N", p);
748
746
  }
749
747
 
750
748
  static void generate_atlimit(struct generator * g, struct node * p) {
751
-
752
749
  write_comment(g, p);
753
750
  g->S[0] = p->mode == m_forward ? "self.limit" : "self.limit_backward";
754
751
  g->S[1] = p->mode == m_forward ? "<" : ">";
@@ -756,28 +753,24 @@ static void generate_atlimit(struct generator * g, struct node * p) {
756
753
  }
757
754
 
758
755
  static void generate_leftslice(struct generator * g, struct node * p) {
759
-
760
756
  write_comment(g, p);
761
757
  g->S[0] = p->mode == m_forward ? "self.bra" : "self.ket";
762
758
  writef(g, "~M~S0 = self.cursor~N", p);
763
759
  }
764
760
 
765
761
  static void generate_rightslice(struct generator * g, struct node * p) {
766
-
767
762
  write_comment(g, p);
768
763
  g->S[0] = p->mode == m_forward ? "self.ket" : "self.bra";
769
764
  writef(g, "~M~S0 = self.cursor~N", p);
770
765
  }
771
766
 
772
767
  static void generate_assignto(struct generator * g, struct node * p) {
773
-
774
768
  write_comment(g, p);
775
769
  g->V[0] = p->name;
776
770
  writef(g, "~M~V0 = self.assign_to()~N", p);
777
771
  }
778
772
 
779
773
  static void generate_sliceto(struct generator * g, struct node * p) {
780
-
781
774
  write_comment(g, p);
782
775
  g->V[0] = p->name;
783
776
  writef(g, "~M~V0 = self.slice_to()~N"
@@ -786,9 +779,8 @@ static void generate_sliceto(struct generator * g, struct node * p) {
786
779
  }
787
780
 
788
781
  static void generate_address(struct generator * g, struct node * p) {
789
-
790
782
  symbol * b = p->literalstring;
791
- if (b != 0) {
783
+ if (b != NULL) {
792
784
  write_literal_string(g, b);
793
785
  } else {
794
786
  write_varref(g, p->name);
@@ -796,7 +788,6 @@ static void generate_address(struct generator * g, struct node * p) {
796
788
  }
797
789
 
798
790
  static void generate_insert(struct generator * g, struct node * p, int style) {
799
-
800
791
  int keep_c = style == c_attach;
801
792
  write_comment(g, p);
802
793
  if (p->mode == m_backward) keep_c = !keep_c;
@@ -808,7 +799,6 @@ static void generate_insert(struct generator * g, struct node * p, int style) {
808
799
  }
809
800
 
810
801
  static void generate_assignfrom(struct generator * g, struct node * p) {
811
-
812
802
  int keep_c = p->mode == m_forward; /* like 'attach' */
813
803
 
814
804
  write_comment(g, p);
@@ -823,9 +813,7 @@ static void generate_assignfrom(struct generator * g, struct node * p) {
823
813
  if (keep_c) w(g, "~Mself.cursor = c~N");
824
814
  }
825
815
 
826
-
827
816
  static void generate_slicefrom(struct generator * g, struct node * p) {
828
-
829
817
  write_comment(g, p);
830
818
  w(g, "~Mif not self.slice_from(");
831
819
  generate_address(g, p);
@@ -847,6 +835,7 @@ static void generate_setlimit(struct generator * g, struct node * p) {
847
835
  * restore c.
848
836
  */
849
837
  struct node * q = p->left;
838
+ write_comment(g, q);
850
839
  g->S[0] = q->mode == m_forward ? ">" : "<";
851
840
  w(g, "~Mif self.cursor ~S0 "); generate_AE(g, q->AE); w(g, ":");
852
841
  write_block_start(g);
@@ -913,7 +902,6 @@ static void generate_setlimit(struct generator * g, struct node * p) {
913
902
  /* dollar sets snowball up to operate on a string variable as if it were the
914
903
  * current string */
915
904
  static void generate_dollar(struct generator * g, struct node * p) {
916
-
917
905
  struct str * savevar = vars_newname(g);
918
906
  g->B[0] = str_data(savevar);
919
907
  write_comment(g, p);
@@ -944,48 +932,72 @@ static void generate_dollar(struct generator * g, struct node * p) {
944
932
  str_delete(savevar);
945
933
  }
946
934
 
947
- static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
948
-
935
+ static void generate_integer_assign(struct generator * g, struct node * p, const char * s) {
936
+ write_comment(g, p);
949
937
  g->V[0] = p->name;
950
938
  g->S[0] = s;
951
939
  w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); w(g, "~N");
952
940
  }
953
941
 
954
- static void generate_integer_test(struct generator * g, struct node * p, char * s) {
955
-
956
- w(g, "~Mif not ");
942
+ static void generate_integer_test(struct generator * g, struct node * p) {
943
+ write_comment(g, p);
944
+ int relop = p->type;
945
+ int optimise_to_return = (g->failure_label == x_return && p->right && p->right->type == c_functionend);
946
+ if (optimise_to_return) {
947
+ w(g, "~Mreturn ");
948
+ p->right = NULL;
949
+ } else {
950
+ w(g, "~Mif ");
951
+ // We want the inverse of the snowball test here.
952
+ relop ^= 1;
953
+ }
957
954
  generate_AE(g, p->left);
958
- write_char(g, ' ');
959
- write_string(g, s);
960
- write_char(g, ' ');
955
+ // Relational operators are the same as C.
956
+ write_c_relop(g, relop);
961
957
  generate_AE(g, p->AE);
962
- w(g, ":");
963
- write_block_start(g);
964
- write_failure(g);
965
- write_block_end(g);
966
- g->unreachable = false;
958
+ if (optimise_to_return) {
959
+ w(g, "~N");
960
+ } else {
961
+ w(g, ":");
962
+ write_block_start(g);
963
+ write_failure(g);
964
+ write_block_end(g);
965
+ g->unreachable = false;
966
+ }
967
967
  }
968
968
 
969
969
  static void generate_call(struct generator * g, struct node * p) {
970
-
970
+ int signals = check_possible_signals_list(g, p->name->definition, c_define, 0);
971
971
  write_comment(g, p);
972
972
  g->V[0] = p->name;
973
- write_failure_if(g, "not ~V0()", p);
973
+ if (g->failure_label == x_return &&
974
+ (signals == 0 || (p->right && p->right->type == c_functionend))) {
975
+ /* Always fails or tail call. */
976
+ writef(g, "~Mreturn ~V0()~N", p);
977
+ return;
978
+ }
979
+ if (signals == 1) {
980
+ /* Always succeeds. */
981
+ writef(g, "~M~V0()~N", p);
982
+ } else if (signals == 0) {
983
+ /* Always fails. */
984
+ writef(g, "~M~V0()~N", p);
985
+ write_failure(g);
986
+ } else {
987
+ write_failure_if(g, "not ~V0()", p);
988
+ }
974
989
  }
975
990
 
976
991
  static void generate_grouping(struct generator * g, struct node * p, int complement) {
992
+ write_comment(g, p);
977
993
 
978
- struct grouping * q = p->name->grouping;
979
994
  g->S[0] = p->mode == m_forward ? "" : "_b";
980
995
  g->S[1] = complement ? "out" : "in";
981
996
  g->V[0] = p->name;
982
- g->I[0] = q->smallest_ch;
983
- g->I[1] = q->largest_ch;
984
- write_failure_if(g, "not self.~S1_grouping~S0(~n.~W0, ~I0, ~I1)", p);
997
+ write_failure_if(g, "not self.~S1_grouping~S0(~n.~W0)", p);
985
998
  }
986
999
 
987
1000
  static void generate_namedstring(struct generator * g, struct node * p) {
988
-
989
1001
  write_comment(g, p);
990
1002
  g->S[0] = p->mode == m_forward ? "" : "_b";
991
1003
  g->V[0] = p->name;
@@ -1002,12 +1014,16 @@ static void generate_literalstring(struct generator * g, struct node * p) {
1002
1014
 
1003
1015
  static void generate_define(struct generator * g, struct node * p) {
1004
1016
  struct name * q = p->name;
1017
+ if (q->type == t_routine && !q->used) return;
1005
1018
 
1006
- struct str * saved_output = g->outbuf;
1019
+ write_newline(g);
1020
+ write_comment(g, p);
1007
1021
 
1008
1022
  g->V[0] = q;
1009
- w(g, "~N~Mdef ~W0(self):~+~N");
1023
+ w(g, "~Mdef ~W0(self):~+~N");
1010
1024
 
1025
+ /* Save output. */
1026
+ struct str * saved_output = g->outbuf;
1011
1027
  g->outbuf = str_new();
1012
1028
 
1013
1029
  g->next_label = 0;
@@ -1016,8 +1032,14 @@ static void generate_define(struct generator * g, struct node * p) {
1016
1032
  str_clear(g->failure_str);
1017
1033
  g->failure_label = x_return;
1018
1034
  g->unreachable = false;
1035
+ int signals = check_possible_signals_list(g, p->left, c_define, 0);
1019
1036
  generate(g, p->left);
1020
- if (!g->unreachable) w(g, "~Mreturn True~N");
1037
+ if (p->left->right) {
1038
+ assert(p->left->right->type == c_functionend);
1039
+ if (signals) {
1040
+ generate(g, p->left->right);
1041
+ }
1042
+ }
1021
1043
  w(g, "~-");
1022
1044
 
1023
1045
  str_append(saved_output, g->outbuf);
@@ -1025,37 +1047,53 @@ static void generate_define(struct generator * g, struct node * p) {
1025
1047
  g->outbuf = saved_output;
1026
1048
  }
1027
1049
 
1050
+ static void generate_functionend(struct generator * g, struct node * p) {
1051
+ (void)p;
1052
+ w(g, "~Mreturn True~N");
1053
+ }
1054
+
1028
1055
  static void generate_substring(struct generator * g, struct node * p) {
1056
+ write_comment(g, p);
1029
1057
 
1030
1058
  struct among * x = p->among;
1031
1059
 
1032
- write_comment(g, p);
1033
-
1034
1060
  g->S[0] = p->mode == m_forward ? "" : "_b";
1035
1061
  g->I[0] = x->number;
1036
1062
 
1037
- if (!x->amongvar_needed) {
1038
- write_failure_if(g, "self.find_among~S0(~n.a_~I0) == 0", p);
1039
- } else {
1063
+ if (x->amongvar_needed) {
1040
1064
  writef(g, "~Mamong_var = self.find_among~S0(~n.a_~I0)~N", p);
1041
- write_failure_if(g, "among_var == 0", p);
1065
+ if (!x->always_matches) {
1066
+ write_failure_if(g, "among_var == 0", p);
1067
+ }
1068
+ } else if (x->always_matches) {
1069
+ writef(g, "~Mself.find_among~S0(~n.a_~I0)~N", p);
1070
+ } else {
1071
+ write_failure_if(g, "self.find_among~S0(~n.a_~I0) == 0", p);
1042
1072
  }
1043
1073
  }
1044
1074
 
1045
1075
  static void generate_among(struct generator * g, struct node * p) {
1046
-
1047
1076
  struct among * x = p->among;
1048
1077
 
1049
- if (x->substring == 0) generate_substring(g, p);
1050
-
1051
- if (x->starter != 0) generate(g, x->starter);
1078
+ if (x->substring == NULL) {
1079
+ generate_substring(g, p);
1080
+ } else {
1081
+ write_comment(g, p);
1082
+ }
1052
1083
 
1053
1084
  if (x->command_count == 1 && x->nocommand_count == 0) {
1054
1085
  /* Only one outcome ("no match" already handled). */
1055
1086
  generate(g, x->commands[0]);
1056
1087
  } else if (x->command_count > 0) {
1057
- int i;
1058
- for (i = 1; i <= x->command_count; i++) {
1088
+ /* We dispatch the integer result in `among_var` with an if-chain,
1089
+ * which is O(n) unless Python has a special optimisation (and
1090
+ * profiling with the `timeit` module suggests it doesn't). There
1091
+ * doesn't appear to be a good alternative in Python (3.10 added
1092
+ * `match` but that seems to be aimed more at pattern matching rather
1093
+ * than O(1) dispatch of an integer and it was actually slower when we
1094
+ * tried generating it here).
1095
+ */
1096
+ for (int i = 1; i <= x->command_count; i++) {
1059
1097
  if (i == x->command_count && x->nocommand_count == 0) {
1060
1098
  w(g, "~Melse:~N~+");
1061
1099
  } else {
@@ -1071,20 +1109,17 @@ static void generate_among(struct generator * g, struct node * p) {
1071
1109
  }
1072
1110
 
1073
1111
  static void generate_booltest(struct generator * g, struct node * p) {
1074
-
1075
1112
  write_comment(g, p);
1076
1113
  g->V[0] = p->name;
1077
1114
  write_failure_if(g, "not ~V0", p);
1078
1115
  }
1079
1116
 
1080
1117
  static void generate_false(struct generator * g, struct node * p) {
1081
-
1082
1118
  write_comment(g, p);
1083
1119
  write_failure(g);
1084
1120
  }
1085
1121
 
1086
1122
  static void generate_debug(struct generator * g, struct node * p) {
1087
-
1088
1123
  write_comment(g, p);
1089
1124
  g->I[0] = g->debug_count++;
1090
1125
  g->I[1] = p->line_number;
@@ -1092,14 +1127,10 @@ static void generate_debug(struct generator * g, struct node * p) {
1092
1127
  }
1093
1128
 
1094
1129
  static void generate(struct generator * g, struct node * p) {
1095
-
1096
- int a0;
1097
- struct str * a1;
1098
-
1099
1130
  if (g->unreachable) return;
1100
1131
 
1101
- a0 = g->failure_label;
1102
- a1 = str_copy(g->failure_str);
1132
+ int a0 = g->failure_label;
1133
+ struct str * a1 = str_copy(g->failure_str);
1103
1134
 
1104
1135
  switch (p->type) {
1105
1136
  case c_define: generate_define(g, p); break;
@@ -1117,6 +1148,11 @@ static void generate(struct generator * g, struct node * p) {
1117
1148
  case c_do: generate_do(g, p); break;
1118
1149
  case c_goto: generate_GO(g, p, 1); break;
1119
1150
  case c_gopast: generate_GO(g, p, 0); break;
1151
+ case c_goto_grouping: generate_GO_grouping(g, p, 1, 0); break;
1152
+ case c_gopast_grouping:
1153
+ generate_GO_grouping(g, p, 0, 0); break;
1154
+ case c_goto_non: generate_GO_grouping(g, p, 1, 1); break;
1155
+ case c_gopast_non: generate_GO_grouping(g, p, 0, 1); break;
1120
1156
  case c_repeat: generate_repeat(g, p); break;
1121
1157
  case c_loop: generate_loop(g, p); break;
1122
1158
  case c_atleast: generate_atleast(g, p); break;
@@ -1154,12 +1190,14 @@ static void generate(struct generator * g, struct node * p) {
1154
1190
  generate_AE(g, p->AE);
1155
1191
  w(g, ")~N");
1156
1192
  break;
1157
- case c_eq: generate_integer_test(g, p, "=="); break;
1158
- case c_ne: generate_integer_test(g, p, "!="); break;
1159
- case c_gr: generate_integer_test(g, p, ">"); break;
1160
- case c_ge: generate_integer_test(g, p, ">="); break;
1161
- case c_ls: generate_integer_test(g, p, "<"); break;
1162
- case c_le: generate_integer_test(g, p, "<="); break;
1193
+ case c_eq:
1194
+ case c_ne:
1195
+ case c_gt:
1196
+ case c_ge:
1197
+ case c_lt:
1198
+ case c_le:
1199
+ generate_integer_test(g, p);
1200
+ break;
1163
1201
  case c_call: generate_call(g, p); break;
1164
1202
  case c_grouping: generate_grouping(g, p, false); break;
1165
1203
  case c_non: generate_grouping(g, p, true); break;
@@ -1171,6 +1209,7 @@ static void generate(struct generator * g, struct node * p) {
1171
1209
  case c_false: generate_false(g, p); break;
1172
1210
  case c_true: break;
1173
1211
  case c_debug: generate_debug(g, p); break;
1212
+ case c_functionend: generate_functionend(g, p); break;
1174
1213
  default: fprintf(stderr, "%d encountered\n", p->type);
1175
1214
  exit(1);
1176
1215
  }
@@ -1181,7 +1220,6 @@ static void generate(struct generator * g, struct node * p) {
1181
1220
  }
1182
1221
 
1183
1222
  static void generate_class_begin(struct generator * g) {
1184
-
1185
1223
  w(g, "from .basestemmer import ");
1186
1224
  w(g, g->options->parent_class_name);
1187
1225
  w(g, "~N"
@@ -1201,82 +1239,60 @@ static void generate_class_begin(struct generator * g) {
1201
1239
  }
1202
1240
 
1203
1241
  static void generate_among_table(struct generator * g, struct among * x) {
1242
+ write_newline(g);
1243
+ write_comment(g, x->node);
1204
1244
 
1205
1245
  struct amongvec * v = x->b;
1206
1246
 
1207
1247
  g->I[0] = x->number;
1208
1248
 
1209
1249
  w(g, "~Ma_~I0 = [~N~+");
1210
- {
1211
- int i;
1212
- for (i = 0; i < x->literalstring_count; i++) {
1213
- g->I[0] = v->i;
1214
- g->I[1] = v->result;
1215
- g->L[0] = v->b;
1216
- g->S[0] = i < x->literalstring_count - 1 ? "," : "";
1217
-
1218
- w(g, "~MAmong(~L0, ~I0, ~I1");
1219
- if (v->function != 0) {
1220
- w(g, ", \"");
1221
- if (v->function->type == t_routine) {
1222
- /* Need to use mangled version of private name here. */
1223
- w(g, "_~n");
1224
- }
1225
- write_varname(g, v->function);
1226
- w(g, "\"");
1227
- }
1228
- w(g, ")~S0~N");
1229
- v++;
1250
+ for (int i = 0; i < x->literalstring_count; i++) {
1251
+ g->I[0] = v[i].i;
1252
+ g->I[1] = v[i].result;
1253
+ g->L[0] = v[i].b;
1254
+ g->S[0] = i < x->literalstring_count - 1 ? "," : "";
1255
+
1256
+ w(g, "~MAmong(~L0, ~I0, ~I1");
1257
+ if (v[i].function != NULL) {
1258
+ w(g, ", ");
1259
+ write_varname(g, v[i].function);
1230
1260
  }
1261
+ w(g, ")~S0~N");
1231
1262
  }
1232
- w(g, "~-~M]~N~N");
1263
+ w(g, "~-~M]~N");
1233
1264
  }
1234
1265
 
1235
1266
  static void generate_amongs(struct generator * g) {
1236
- struct among * x;
1237
- for (x = g->analyser->amongs; x; x = x->next) {
1267
+ for (struct among * x = g->analyser->amongs; x; x = x->next) {
1238
1268
  generate_among_table(g, x);
1239
1269
  }
1240
1270
  }
1241
1271
 
1242
- static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
1243
-
1244
1272
  static void generate_grouping_table(struct generator * g, struct grouping * q) {
1245
-
1246
- int range = q->largest_ch - q->smallest_ch + 1;
1247
- int size = (range + 7)/ 8; /* assume 8 bits per symbol */
1248
1273
  symbol * b = q->b;
1249
- symbol * map = create_b(size);
1250
- int i;
1251
- for (i = 0; i < size; i++) map[i] = 0;
1252
-
1253
- /* Using unicode would require revision here */
1254
-
1255
- for (i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
1256
1274
 
1257
1275
  g->V[0] = q->name;
1258
1276
 
1259
- w(g, "~M~W0 = [");
1260
- for (i = 0; i < size; i++) {
1261
- write_int(g, map[i]);
1262
- if (i < size - 1) w(g, ", ");
1277
+ // We could use frozenset, but it seems slightly slower to construct which
1278
+ // adds to startup time.
1279
+ w(g, "~M~W0 = {");
1280
+ for (int i = 0; i < SIZE(b); i++) {
1281
+ if (i > 0) w(g, ", ");
1282
+ write_literal_char(g, b[i]);
1263
1283
  }
1264
- w(g, "]~N~N");
1265
- lose_b(map);
1284
+ w(g, "}~N~N");
1266
1285
  }
1267
1286
 
1268
1287
  static void generate_groupings(struct generator * g) {
1269
- struct grouping * q;
1270
- for (q = g->analyser->groupings; q; q = q->next) {
1288
+ for (struct grouping * q = g->analyser->groupings; q; q = q->next) {
1271
1289
  if (q->name->used)
1272
1290
  generate_grouping_table(g, q);
1273
1291
  }
1274
1292
  }
1275
1293
 
1276
1294
  static void generate_members(struct generator * g) {
1277
-
1278
- struct name * q;
1279
- for (q = g->analyser->names; q; q = q->next) {
1295
+ for (struct name * q = g->analyser->names; q; q = q->next) {
1280
1296
  g->V[0] = q;
1281
1297
  switch (q->type) {
1282
1298
  case t_string:
@@ -1293,9 +1309,8 @@ static void generate_members(struct generator * g) {
1293
1309
  }
1294
1310
 
1295
1311
  static void generate_methods(struct generator * g) {
1296
-
1297
1312
  struct node * p = g->analyser->program;
1298
- while (p != 0) {
1313
+ while (p != NULL) {
1299
1314
  generate(g, p);
1300
1315
  g->unreachable = false;
1301
1316
  p = p->right;
@@ -1304,18 +1319,19 @@ static void generate_methods(struct generator * g) {
1304
1319
 
1305
1320
  static void generate_label_classes(struct generator * g)
1306
1321
  {
1307
- int i;
1308
- for (i = 0; i <= g->max_label; i++) {
1322
+ for (int i = 0; i <= g->max_label; i++) {
1309
1323
  g->I[0] = i;
1310
1324
  w(g, "~N~Nclass lab~I0(BaseException): pass~N");
1311
1325
  }
1312
1326
  }
1313
1327
 
1314
1328
  extern void generate_program_python(struct generator * g) {
1315
-
1316
1329
  g->outbuf = str_new();
1317
1330
  g->failure_str = str_new();
1318
1331
 
1332
+ // Only needed for Python 2, which defaults to ASCII.
1333
+ w(g, "#-*- coding: utf-8 -*-~N");
1334
+
1319
1335
  write_start_comment(g, "# ", NULL);
1320
1336
  if (g->analyser->int_limits_used) {
1321
1337
  /* sys.maxsize is used in the code generated for maxint and minint */
@@ -1323,12 +1339,13 @@ extern void generate_program_python(struct generator * g) {
1323
1339
  }
1324
1340
  generate_class_begin(g);
1325
1341
 
1326
- generate_amongs(g);
1327
1342
  generate_groupings(g);
1328
1343
 
1329
1344
  generate_members(g);
1330
1345
  generate_methods(g);
1331
1346
 
1347
+ generate_amongs(g);
1348
+
1332
1349
  generate_label_classes(g);
1333
1350
 
1334
1351
  output_str(g->options->output_src, g->outbuf);