mittens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +3 -3
  4. data/lib/mittens/version.rb +1 -1
  5. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  6. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  7. data/vendor/snowball/GNUmakefile +194 -136
  8. data/vendor/snowball/NEWS +798 -3
  9. data/vendor/snowball/README.rst +50 -1
  10. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  11. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  12. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  13. data/vendor/snowball/algorithms/basque.sbl +4 -19
  14. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  15. data/vendor/snowball/algorithms/danish.sbl +1 -1
  16. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  17. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  18. data/vendor/snowball/algorithms/english.sbl +52 -37
  19. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  20. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  21. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  22. data/vendor/snowball/algorithms/french.sbl +42 -16
  23. data/vendor/snowball/algorithms/german.sbl +35 -14
  24. data/vendor/snowball/algorithms/greek.sbl +76 -76
  25. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  26. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  27. data/vendor/snowball/algorithms/italian.sbl +11 -21
  28. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  29. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  30. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  31. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  32. data/vendor/snowball/algorithms/porter.sbl +2 -2
  33. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  34. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  35. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  36. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  37. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  38. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  39. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  40. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  41. data/vendor/snowball/compiler/analyser.c +445 -192
  42. data/vendor/snowball/compiler/driver.c +109 -101
  43. data/vendor/snowball/compiler/generator.c +853 -464
  44. data/vendor/snowball/compiler/generator_ada.c +404 -366
  45. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  46. data/vendor/snowball/compiler/generator_go.c +323 -254
  47. data/vendor/snowball/compiler/generator_java.c +326 -252
  48. data/vendor/snowball/compiler/generator_js.c +362 -252
  49. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  50. data/vendor/snowball/compiler/generator_python.c +257 -240
  51. data/vendor/snowball/compiler/generator_rust.c +423 -251
  52. data/vendor/snowball/compiler/header.h +117 -71
  53. data/vendor/snowball/compiler/space.c +137 -68
  54. data/vendor/snowball/compiler/syswords.h +2 -2
  55. data/vendor/snowball/compiler/tokeniser.c +125 -107
  56. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  57. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  58. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  59. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  60. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  61. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  62. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  63. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  64. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  65. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  66. data/vendor/snowball/examples/stemwords.c +12 -12
  67. data/vendor/snowball/go/env.go +107 -31
  68. data/vendor/snowball/go/util.go +0 -4
  69. data/vendor/snowball/include/libstemmer.h +4 -0
  70. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  71. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  72. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  73. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  74. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  75. data/vendor/snowball/javascript/stemwords.js +3 -6
  76. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  77. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  78. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  79. data/vendor/snowball/libstemmer/modules.txt +13 -10
  80. data/vendor/snowball/libstemmer/test.c +1 -1
  81. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  82. data/vendor/snowball/pascal/generate.pl +13 -13
  83. data/vendor/snowball/python/create_init.py +4 -1
  84. data/vendor/snowball/python/setup.cfg +0 -3
  85. data/vendor/snowball/python/setup.py +8 -3
  86. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  87. data/vendor/snowball/python/stemwords.py +8 -12
  88. data/vendor/snowball/runtime/api.c +10 -5
  89. data/vendor/snowball/runtime/header.h +10 -9
  90. data/vendor/snowball/runtime/utilities.c +9 -9
  91. data/vendor/snowball/rust/build.rs +1 -1
  92. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  93. data/vendor/snowball/tests/stemtest.c +7 -4
  94. metadata +7 -7
  95. data/vendor/snowball/.travis.yml +0 -112
  96. data/vendor/snowball/algorithms/german2.sbl +0 -145
  97. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  98. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -1,4 +1,5 @@
1
-
1
+ #include <assert.h>
2
+ #include <limits.h> /* for INT_MAX */
2
3
  #include <stdio.h> /* printf etc */
3
4
  #include <stdlib.h> /* exit */
4
5
  #include <string.h> /* memmove */
@@ -17,7 +18,7 @@ typedef enum {
17
18
  e_empty_among = 18,
18
19
  e_adjacent_bracketed_in_among = 19,
19
20
  e_substring_preceded_by_substring = 20,
20
- /* For codes below here, tokeniser->b is printed before the error. */
21
+ /* For codes below here, tokeniser->s is printed before the error. */
21
22
  e_redeclared = 30,
22
23
  e_undeclared = 31,
23
24
  e_declared_as_different_mode = 32,
@@ -36,38 +37,38 @@ static struct node * C_style(struct analyser * a, const char * s, int token);
36
37
 
37
38
 
38
39
  static void print_node_(struct node * p, int n, const char * s) {
39
-
40
- int i;
41
- for (i = 0; i < n; i++) fputs(i == n - 1 ? s : " ", stdout);
42
- printf("%s ", name_of_token(p->type));
43
- if (p->name) report_b(stdout, p->name->b);
40
+ printf("%*s%s", n * 2, s, name_of_token(p->type));
41
+ if (p->name) {
42
+ putchar(' ');
43
+ report_s(stdout, p->name->s);
44
+ }
44
45
  if (p->literalstring) {
45
- printf("'");
46
+ printf(" '");
46
47
  report_b(stdout, p->literalstring);
47
48
  printf("'");
48
49
  } else if (p->type == c_number) {
49
- printf("%d", p->number);
50
+ printf(" %d", p->number);
50
51
  }
51
52
  printf("\n");
52
53
  if (p->AE) print_node_(p->AE, n+1, "# ");
53
- if (p->left) print_node_(p->left, n+1, " ");
54
+ if (p->left) print_node_(p->left, n+1, "");
54
55
  if (p->aux) print_node_(p->aux, n+1, "@ ");
55
- if (p->right) print_node_(p->right, n, " ");
56
+ if (p->right) print_node_(p->right, n, "");
56
57
  }
57
58
 
58
59
  extern void print_program(struct analyser * a) {
59
- print_node_(a->program, 0, " ");
60
+ print_node_(a->program, 0, "");
60
61
  }
61
62
 
62
63
  static struct node * new_node(struct analyser * a, int type) {
63
64
  NEW(node, p);
64
65
  p->next = a->nodes; a->nodes = p;
65
- p->left = 0;
66
- p->right = 0;
67
- p->aux = 0;
68
- p->AE = 0;
69
- p->name = 0;
70
- p->literalstring = 0;
66
+ p->left = NULL;
67
+ p->right = NULL;
68
+ p->aux = NULL;
69
+ p->AE = NULL;
70
+ p->name = NULL;
71
+ p->literalstring = NULL;
71
72
  p->mode = a->mode;
72
73
  p->line_number = a->tokeniser->line_number;
73
74
  p->type = type;
@@ -78,7 +79,6 @@ static const char * name_of_mode(int n) {
78
79
  switch (n) {
79
80
  case m_backward: return "string backward";
80
81
  case m_forward: return "string forward";
81
- /* case m_integer: return "integer"; */
82
82
  }
83
83
  fprintf(stderr, "Invalid mode %d in name_of_mode()\n", n);
84
84
  exit(1);
@@ -86,6 +86,7 @@ static const char * name_of_mode(int n) {
86
86
 
87
87
  static const char * name_of_type(int n) {
88
88
  switch (n) {
89
+ case 'b': return "boolean";
89
90
  case 's': return "string";
90
91
  case 'i': return "integer";
91
92
  case 'r': return "routine";
@@ -117,9 +118,14 @@ static void count_error(struct analyser * a) {
117
118
 
118
119
  static void error2(struct analyser * a, error_code n, int x) {
119
120
  struct tokeniser * t = a->tokeniser;
121
+ if (n == e_unexpected_token && t->token_reported_as_unexpected) {
122
+ // Avoid duplicate errors if this token was already reported as
123
+ // unexpected and then held.
124
+ return;
125
+ }
120
126
  count_error(a);
121
127
  fprintf(stderr, "%s:%d: ", t->file, t->line_number);
122
- if ((int)n >= (int)e_redeclared) report_b(stderr, t->b);
128
+ if ((int)n >= (int)e_redeclared) report_s(stderr, t->s);
123
129
  switch (n) {
124
130
  case e_token_omitted:
125
131
  fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
@@ -127,12 +133,14 @@ static void error2(struct analyser * a, error_code n, int x) {
127
133
  fprintf(stderr, "in among(...), ");
128
134
  /* fall through */
129
135
  case e_unexpected_token:
136
+ t->token_reported_as_unexpected = true;
130
137
  fprintf(stderr, "unexpected %s", name_of_token(t->token));
131
138
  if (t->token == c_number) fprintf(stderr, " %d", t->number);
132
139
  if (t->token == c_name) {
133
- fprintf(stderr, " ");
134
- report_b(stderr, t->b);
135
- } break;
140
+ t->s[SIZE(t->s)] = 0;
141
+ fprintf(stderr, " %s", t->s);
142
+ }
143
+ break;
136
144
  case e_string_omitted:
137
145
  fprintf(stderr, "string omitted"); break;
138
146
 
@@ -179,9 +187,8 @@ static void error(struct analyser * a, error_code n) { error2(a, n, 0); }
179
187
 
180
188
  static void error4(struct analyser * a, struct name * q) {
181
189
  count_error(a);
182
- fprintf(stderr, "%s:%d: ", a->tokeniser->file, q->used->line_number);
183
- report_b(stderr, q->b);
184
- fprintf(stderr, " undefined\n");
190
+ q->s[SIZE(q->s)] = 0;
191
+ fprintf(stderr, "%s:%d: %s undefined\n", a->tokeniser->file, q->used->line_number, q->s);
185
192
  }
186
193
 
187
194
  static void omission_error(struct analyser * a, int n) {
@@ -198,35 +205,33 @@ static int check_token(struct analyser * a, int code) {
198
205
  static int get_token(struct analyser * a, int code) {
199
206
  struct tokeniser * t = a->tokeniser;
200
207
  read_token(t);
201
- {
202
- int x = check_token(a, code);
203
- if (!x) t->token_held = true;
204
- return x;
205
- }
208
+ int x = check_token(a, code);
209
+ if (!x) hold_token(t);
210
+ return x;
206
211
  }
207
212
 
208
213
  static struct name * look_for_name(struct analyser * a) {
209
- symbol * q = a->tokeniser->b;
214
+ const byte * q = a->tokeniser->s;
210
215
  struct name * p;
211
216
  for (p = a->names; p; p = p->next) {
212
- symbol * b = p->b;
217
+ byte * b = p->s;
213
218
  int n = SIZE(b);
214
- if (n == SIZE(q) && memcmp(q, b, n * sizeof(symbol)) == 0) {
219
+ if (n == SIZE(q) && memcmp(q, b, n) == 0) {
215
220
  p->referenced = true;
216
221
  return p;
217
222
  }
218
223
  }
219
- return 0;
224
+ return NULL;
220
225
  }
221
226
 
222
227
  static struct name * find_name(struct analyser * a) {
223
228
  struct name * p = look_for_name(a);
224
- if (p == 0) error(a, e_undeclared);
229
+ if (p == NULL) error(a, e_undeclared);
225
230
  return p;
226
231
  }
227
232
 
228
233
  static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
229
- if (p->mode < 0) p->mode = mode; else
234
+ if (p->mode == m_unknown) p->mode = mode; else
230
235
  if (p->mode != mode) error2(a, e_misused, mode);
231
236
  }
232
237
 
@@ -265,10 +270,8 @@ static void read_names(struct analyser * a, int type) {
265
270
  * its special meaning, for compatibility with older versions
266
271
  * of snowball.
267
272
  */
268
- static const symbol c_len_lit[] = {
269
- 'l', 'e', 'n'
270
- };
271
- t->b = MOVE_TO_B(t->b, c_len_lit);
273
+ SIZE(t->s) = 0;
274
+ t->s = add_literal_to_s(t->s, "len");
272
275
  goto handle_as_name;
273
276
  }
274
277
  case c_lenof: {
@@ -276,31 +279,29 @@ static void read_names(struct analyser * a, int type) {
276
279
  * its special meaning, for compatibility with older versions
277
280
  * of snowball.
278
281
  */
279
- static const symbol c_lenof_lit[] = {
280
- 'l', 'e', 'n', 'o', 'f'
281
- };
282
- t->b = MOVE_TO_B(t->b, c_lenof_lit);
282
+ SIZE(t->s) = 0;
283
+ t->s = add_literal_to_s(t->s, "lenof");
283
284
  goto handle_as_name;
284
285
  }
285
286
  case c_name:
286
287
  handle_as_name:
287
- if (look_for_name(a) != 0) error(a, e_redeclared); else {
288
+ if (look_for_name(a) != NULL) error(a, e_redeclared); else {
288
289
  NEW(name, p);
289
- p->b = copy_b(t->b);
290
+ p->s = copy_s(t->s);
290
291
  p->type = type;
291
- p->mode = -1; /* routines, externals */
292
+ p->mode = m_unknown; /* used for routines, externals */
292
293
  /* We defer assigning counts until after we've eliminated
293
294
  * variables whose values are never used. */
294
295
  p->count = -1;
295
296
  p->referenced = false;
296
297
  p->used_in_among = false;
297
- p->used = 0;
298
+ p->used = NULL;
298
299
  p->value_used = false;
299
300
  p->initialised = false;
300
301
  p->used_in_definition = false;
301
- p->local_to = 0;
302
- p->grouping = 0;
303
- p->definition = 0;
302
+ p->local_to = NULL;
303
+ p->grouping = NULL;
304
+ p->definition = NULL;
304
305
  p->declaration_line_number = t->line_number;
305
306
  p->next = a->names;
306
307
  a->names = p;
@@ -310,7 +311,7 @@ handle_as_name:
310
311
  }
311
312
  break;
312
313
  default:
313
- if (!check_token(a, c_ket)) t->token_held = true;
314
+ if (!check_token(a, c_ket)) hold_token(t);
314
315
  return;
315
316
  }
316
317
  }
@@ -325,7 +326,6 @@ static symbol * new_literalstring(struct analyser * a) {
325
326
  }
326
327
 
327
328
  static int read_AE_test(struct analyser * a) {
328
-
329
329
  struct tokeniser * t = a->tokeniser;
330
330
  switch (read_token(t)) {
331
331
  case c_assign: return c_mathassign;
@@ -335,11 +335,14 @@ static int read_AE_test(struct analyser * a) {
335
335
  case c_divideassign:
336
336
  case c_eq:
337
337
  case c_ne:
338
- case c_gr:
338
+ case c_gt:
339
339
  case c_ge:
340
- case c_ls:
340
+ case c_lt:
341
341
  case c_le: return t->token;
342
- default: error(a, e_unexpected_token); t->token_held = true; return c_eq;
342
+ default:
343
+ error(a, e_unexpected_token);
344
+ hold_token(t);
345
+ return c_eq;
343
346
  }
344
347
  }
345
348
 
@@ -422,12 +425,16 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
422
425
  case c_number:
423
426
  p = new_node(a, c_number);
424
427
  p->number = t->number;
428
+ p->fixed_constant = true;
425
429
  break;
426
430
  case c_lenof:
427
431
  case c_sizeof: {
428
432
  int token = t->token;
429
433
  p = C_style(a, "S", token);
430
- if (!p->literalstring) break;
434
+ if (!p->literalstring) {
435
+ if (p->name) p->name->value_used = true;
436
+ break;
437
+ }
431
438
 
432
439
  /* Replace lenof or sizeof on a literal string with a numeric
433
440
  * constant.
@@ -449,18 +456,19 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
449
456
  p->type = c_number;
450
457
  p->literalstring = NULL;
451
458
  p->number = result;
459
+ p->fixed_constant = (token == c_lenof);
452
460
  break;
453
461
  }
454
462
  default:
455
463
  error(a, e_unexpected_token);
456
- t->token_held = true;
457
- return 0;
464
+ hold_token(t);
465
+ return NULL;
458
466
  }
459
467
  while (true) {
460
468
  int token = read_token(t);
461
469
  int b = binding(token);
462
470
  if (binding(token) <= B) {
463
- t->token_held = true;
471
+ hold_token(t);
464
472
  return p;
465
473
  }
466
474
  struct node * r = read_AE(a, assigned_to, b);
@@ -478,6 +486,11 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
478
486
  q->number = p->number * r->number;
479
487
  break;
480
488
  case c_divide:
489
+ if (r->number == 0) {
490
+ fprintf(stderr, "%s:%d: Division by zero\n",
491
+ t->file, t->line_number);
492
+ exit(1);
493
+ }
481
494
  q->number = p->number / r->number;
482
495
  break;
483
496
  default:
@@ -485,10 +498,101 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
485
498
  name_of_token(token));
486
499
  exit(1);
487
500
  }
501
+ q->fixed_constant = p->fixed_constant && r->fixed_constant;
502
+ q->line_number = p->line_number;
488
503
  } else {
489
- q = new_node(a, token);
490
- q->left = p;
491
- q->right = r;
504
+ // Check for specific constant or no-op cases.
505
+ q = NULL;
506
+ switch (token) {
507
+ case c_plus:
508
+ // 0 + r is r
509
+ if (p->type == c_number && p->number == 0) {
510
+ q = r;
511
+ break;
512
+ }
513
+ // p + 0 is p
514
+ if (r->type == c_number && r->number == 0) {
515
+ q = p;
516
+ break;
517
+ }
518
+ break;
519
+ case c_minus:
520
+ // 0 - r is -r
521
+ if (p->type == c_number && p->number == 0) {
522
+ q = new_node(a, c_neg);
523
+ q->right = r;
524
+ break;
525
+ }
526
+ // p - 0 is p
527
+ if (r->type == c_number && r->number == 0) {
528
+ q = p;
529
+ break;
530
+ }
531
+ break;
532
+ case c_multiply:
533
+ // 0 * r is 0
534
+ if (p->type == c_number && p->number == 0) {
535
+ q = p;
536
+ break;
537
+ }
538
+ // p * 0 is 0
539
+ if (r->type == c_number && r->number == 0) {
540
+ q = r;
541
+ q->line_number = p->line_number;
542
+ break;
543
+ }
544
+ // -1 * r is -r
545
+ if (p->type == c_number && p->number == -1) {
546
+ q = new_node(a, c_neg);
547
+ q->right = r;
548
+ q->line_number = p->line_number;
549
+ break;
550
+ }
551
+ // p * -1 is -p
552
+ if (r->type == c_number && r->number == -1) {
553
+ q = new_node(a, c_neg);
554
+ q->right = p;
555
+ q->line_number = p->line_number;
556
+ break;
557
+ }
558
+ // 1 * r is r
559
+ if (p->type == c_number && p->number == 1) {
560
+ q = r;
561
+ q->line_number = p->line_number;
562
+ break;
563
+ }
564
+ // p * 1 is p
565
+ if (r->type == c_number && r->number == 1) {
566
+ q = p;
567
+ break;
568
+ }
569
+ break;
570
+ case c_divide:
571
+ // p / 1 is p
572
+ if (r->type == c_number && r->number == 1) {
573
+ q = p;
574
+ break;
575
+ }
576
+ // p / -1 is -p
577
+ if (r->type == c_number && r->number == -1) {
578
+ q = new_node(a, c_neg);
579
+ q->right = p;
580
+ q->line_number = p->line_number;
581
+ break;
582
+ }
583
+ // p / 0 is an error!
584
+ if (r->type == c_number && r->number == 0) {
585
+ fprintf(stderr, "%s:%d: Division by zero\n",
586
+ t->file, t->line_number);
587
+ exit(1);
588
+ }
589
+ break;
590
+ }
591
+ if (!q) {
592
+ q = new_node(a, token);
593
+ q->left = p;
594
+ q->right = r;
595
+ }
492
596
  }
493
597
  p = q;
494
598
  }
@@ -503,30 +607,30 @@ static struct node * read_C_connection(struct analyser * a, struct node * q, int
503
607
  q = read_C(a);
504
608
  p_end->right = q; p_end = q;
505
609
  } while (read_token(t) == op);
506
- t->token_held = true;
610
+ hold_token(t);
507
611
  return p;
508
612
  }
509
613
 
510
614
  static struct node * read_C_list(struct analyser * a) {
511
615
  struct tokeniser * t = a->tokeniser;
512
616
  struct node * p = new_node(a, c_bra);
513
- struct node * p_end = 0;
617
+ struct node * p_end = NULL;
514
618
  while (true) {
515
619
  int token = read_token(t);
516
620
  if (token == c_ket) return p;
517
621
  if (token < 0) { omission_error(a, c_ket); return p; }
518
- t->token_held = true;
622
+ hold_token(t);
519
623
  {
520
624
  struct node * q = read_C(a);
521
625
  while (true) {
522
626
  token = read_token(t);
523
627
  if (token != c_and && token != c_or) {
524
- t->token_held = true;
628
+ hold_token(t);
525
629
  break;
526
630
  }
527
631
  q = read_C_connection(a, q, token);
528
632
  }
529
- if (p_end == 0) p->left = q; else p_end->right = q;
633
+ if (p_end == NULL) p->left = q; else p_end->right = q;
530
634
  p_end = q;
531
635
  }
532
636
  }
@@ -541,7 +645,7 @@ static struct node * C_style(struct analyser * a, const char * s, int token) {
541
645
  case 'D':
542
646
  p->aux = read_C(a); continue;
543
647
  case 'A':
544
- p->AE = read_AE(a, 0, 0); continue;
648
+ p->AE = read_AE(a, NULL, 0); continue;
545
649
  case 'f':
546
650
  get_token(a, c_for); continue;
547
651
  case 'S':
@@ -632,11 +736,10 @@ static int compare_node(const struct node *p, const struct node *q) {
632
736
  PTR_NULL_CHECK(p->name, q->name);
633
737
  if (p->name) {
634
738
  int r;
635
- if (SIZE(p->name->b) != SIZE(q->name->b)) {
636
- return SIZE(p->name->b) - SIZE(q->name->b);
739
+ if (SIZE(p->name->s) != SIZE(q->name->s)) {
740
+ return SIZE(p->name->s) - SIZE(q->name->s);
637
741
  }
638
- r = memcmp(p->name->b, q->name->b,
639
- SIZE(p->name->b) * sizeof(symbol));
742
+ r = memcmp(p->name->s, q->name->s, SIZE(p->name->s));
640
743
  if (r != 0) return r;
641
744
  }
642
745
 
@@ -654,29 +757,34 @@ static int compare_node(const struct node *p, const struct node *q) {
654
757
  return compare_node(p->right, q->right);
655
758
  }
656
759
 
657
- static void make_among(struct analyser * a, struct node * p, struct node * substring) {
658
-
760
+ static struct node * make_among(struct analyser * a, struct node * p, struct node * substring) {
659
761
  NEW(among, x);
660
762
  NEWVEC(amongvec, v, p->number);
661
763
  struct node * q = p->left;
764
+ struct node * starter = NULL;
662
765
  struct amongvec * w0 = v;
663
766
  struct amongvec * w1 = v;
664
767
  int result = 1;
665
768
 
666
- int direction = substring != 0 ? substring->mode : p->mode;
769
+ int direction = substring != NULL ? substring->mode : p->mode;
667
770
  int backward = direction == m_backward;
668
771
 
669
- if (a->amongs == 0) a->amongs = x; else a->amongs_end->next = x;
772
+ if (a->amongs == NULL) a->amongs = x; else a->amongs_end->next = x;
670
773
  a->amongs_end = x;
671
- x->next = 0;
774
+ x->next = NULL;
775
+ x->node = p;
672
776
  x->b = v;
673
777
  x->number = a->among_count++;
674
778
  x->function_count = 0;
675
- x->starter = 0;
676
779
  x->nocommand_count = 0;
677
780
  x->amongvar_needed = false;
781
+ x->always_matches = false;
782
+ x->shortest_size = INT_MAX;
678
783
 
679
- if (q->type == c_bra) { x->starter = q; q = q->right; }
784
+ if (q->type == c_bra) {
785
+ starter = q;
786
+ p->left = q = q->right;
787
+ }
680
788
 
681
789
  while (q) {
682
790
  if (q->type == c_literalstring) {
@@ -694,10 +802,15 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
694
802
  check_routine_mode(a, function, direction);
695
803
  x->function_count++;
696
804
  } else {
697
- w1->function = 0;
805
+ w1->function = NULL;
806
+ if (w1->size == 0) {
807
+ // This among contains the empty string without a gating
808
+ // function so it will always match.
809
+ x->always_matches = true;
810
+ }
698
811
  }
699
812
  w1++;
700
- } else if (q->left == 0) {
813
+ } else if (q->left == NULL) {
701
814
  /* empty command: () */
702
815
  w0 = w1;
703
816
  } else {
@@ -732,7 +845,8 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
732
845
  x->command_count = result - 1;
733
846
  {
734
847
  NEWVEC(node*, commands, x->command_count);
735
- memset(commands, 0, x->command_count * sizeof(struct node*));
848
+ for (int i = 0; i != x->command_count; ++i)
849
+ commands[i] = NULL;
736
850
  for (w0 = v; w0 < w1; w0++) {
737
851
  if (w0->result > 0) {
738
852
  /* result == -1 when there's no command. */
@@ -757,6 +871,8 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
757
871
  int size = w0->size;
758
872
  struct amongvec * w;
759
873
 
874
+ if (size && size < x->shortest_size) x->shortest_size = size;
875
+
760
876
  for (w = w0 - 1; w >= v; w--) {
761
877
  if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
762
878
  w0->i = w - v; /* fill in index of longest substring */
@@ -782,16 +898,29 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
782
898
  x->literalstring_count = p->number;
783
899
  p->among = x;
784
900
 
785
- x->substring = substring;
786
- if (substring != 0) substring->among = x;
787
901
  if (x->command_count > 1 ||
788
- (x->command_count == 1 && x->nocommand_count > 0) ||
789
- x->starter != 0) {
902
+ (x->command_count == 1 && x->nocommand_count > 0)) {
790
903
  /* We need to set among_var rather than just checking if find_among*()
791
904
  * returns zero or not.
792
905
  */
793
906
  x->amongvar_needed = a->amongvar_needed = true;
794
907
  }
908
+ if (starter) {
909
+ starter->right = p;
910
+ if (substring) {
911
+ p = starter;
912
+ } else {
913
+ substring = new_node(a, c_substring);
914
+ substring->right = starter;
915
+ p = substring;
916
+ }
917
+ }
918
+ x->substring = substring;
919
+ if (substring != NULL) substring->among = x;
920
+
921
+ if (x->function_count > 0) ++a->among_with_function_count;
922
+
923
+ return p;
795
924
  }
796
925
 
797
926
  static int
@@ -805,11 +934,11 @@ is_just_true(struct node * q)
805
934
  static struct node * read_among(struct analyser * a) {
806
935
  struct tokeniser * t = a->tokeniser;
807
936
  struct node * p = new_node(a, c_among);
808
- struct node * p_end = 0;
937
+ struct node * p_end = NULL;
809
938
  int previous_token = -1;
810
939
  struct node * substring = a->substring;
811
940
 
812
- a->substring = 0;
941
+ a->substring = NULL;
813
942
  p->number = 0; /* counts the number of literals */
814
943
  if (!get_token(a, c_bra)) return p;
815
944
  while (true) {
@@ -822,8 +951,9 @@ static struct node * read_among(struct analyser * a) {
822
951
  struct node * r = new_node(a, c_name);
823
952
  name_to_node(a, r, 'r');
824
953
  q->left = r;
954
+ } else {
955
+ hold_token(t);
825
956
  }
826
- else t->token_held = true;
827
957
  p->number++; break;
828
958
  case c_bra:
829
959
  if (previous_token == c_bra) error(a, e_adjacent_bracketed_in_among);
@@ -832,7 +962,7 @@ static struct node * read_among(struct analyser * a) {
832
962
  /* Convert anything equivalent to () to () so we handle it
833
963
  * the same way.
834
964
  */
835
- q->left = 0;
965
+ q->left = NULL;
836
966
  }
837
967
  break;
838
968
  default:
@@ -841,19 +971,18 @@ static struct node * read_among(struct analyser * a) {
841
971
  continue;
842
972
  case c_ket:
843
973
  if (p->number == 0) error(a, e_empty_among);
844
- if (t->error_count == 0) make_among(a, p, substring);
974
+ if (t->error_count == 0) p = make_among(a, p, substring);
845
975
  return p;
846
976
  }
847
977
  previous_token = token;
848
- if (p_end == 0) p->left = q; else p_end->right = q;
978
+ if (p_end == NULL) p->left = q; else p_end->right = q;
849
979
  p_end = q;
850
980
  }
851
981
  }
852
982
 
853
983
  static struct node * read_substring(struct analyser * a) {
854
-
855
984
  struct node * p = new_node(a, c_substring);
856
- if (a->substring != 0) error2(a, e_substring_preceded_by_substring, a->substring->line_number);
985
+ if (a->substring != NULL) error2(a, e_substring_preceded_by_substring, a->substring->line_number);
857
986
  a->substring = p;
858
987
  return p;
859
988
  }
@@ -863,6 +992,10 @@ static void check_modifyable(struct analyser * a) {
863
992
  }
864
993
 
865
994
  static int ae_uses_name(struct node * p, struct name * q) {
995
+ if (!p) {
996
+ // AE is NULL after a syntax error, e.g. `$x = $y`
997
+ return 0;
998
+ }
866
999
  switch (p->type) {
867
1000
  case c_name:
868
1001
  case c_lenof:
@@ -925,13 +1058,88 @@ static struct node * read_C(struct analyser * a) {
925
1058
  case c_fail:
926
1059
  case c_test:
927
1060
  case c_do:
928
- case c_goto:
929
- case c_gopast:
930
1061
  case c_repeat:
931
1062
  return C_style(a, "C", token);
932
- case c_loop:
933
- case c_atleast:
934
- return C_style(a, "AC", token);
1063
+ case c_goto:
1064
+ case c_gopast: {
1065
+ struct node * subcommand = read_C(a);
1066
+ if (subcommand->type == c_grouping || subcommand->type == c_non) {
1067
+ /* We synthesise special commands for "goto" or "gopast" when
1068
+ * used on a grouping or an inverted grouping - the movement of
1069
+ * c by the matching action is exactly what we want!
1070
+ *
1071
+ * Adding the tokens happens to give unique values (the code
1072
+ * would fail to compile if it didn't!)
1073
+ */
1074
+ switch (token + subcommand->type) {
1075
+ case c_goto + c_grouping:
1076
+ subcommand->type = c_goto_grouping;
1077
+ break;
1078
+ case c_gopast + c_grouping:
1079
+ subcommand->type = c_gopast_grouping;
1080
+ break;
1081
+ case c_goto + c_non:
1082
+ subcommand->type = c_goto_non;
1083
+ break;
1084
+ case c_gopast + c_non:
1085
+ subcommand->type = c_gopast_non;
1086
+ break;
1087
+ default:
1088
+ fprintf(stderr, "Unexpected go/grouping combination: %s %s",
1089
+ name_of_token(token),
1090
+ name_of_token(subcommand->type));
1091
+ exit(1);
1092
+ }
1093
+ return subcommand;
1094
+ }
1095
+
1096
+ struct node * p = new_node(a, token);
1097
+ p->left = subcommand;
1098
+ return p;
1099
+ }
1100
+ case c_loop: {
1101
+ struct node * n = C_style(a, "AC", token);
1102
+ // n->AE is NULL after a syntax error, e.g. `loop next`.
1103
+ if (n->AE && n->AE->type == c_number) {
1104
+ if (n->AE->number <= 0) {
1105
+ // `loop N C`, where N <= 0 is a no-op.
1106
+ if (n->AE->fixed_constant) {
1107
+ fprintf(stderr,
1108
+ "%s:%d: warning: loop %d C is a no-op\n",
1109
+ t->file, n->AE->line_number, n->AE->number);
1110
+ }
1111
+ n->AE = NULL;
1112
+ n->left = NULL;
1113
+ n->type = c_true;
1114
+ } else if (n->AE->number == 1) {
1115
+ // `loop 1 C` -> `C`.
1116
+ if (n->AE->fixed_constant) {
1117
+ fprintf(stderr,
1118
+ "%s:%d: warning: loop 1 C is just C\n",
1119
+ t->file, n->AE->line_number);
1120
+ }
1121
+ n = n->left;
1122
+ }
1123
+ }
1124
+ return n;
1125
+ }
1126
+ case c_atleast: {
1127
+ struct node * n = C_style(a, "AC", token);
1128
+ // n->AE is NULL after a syntax error, e.g. `loop next`.
1129
+ if (n->AE && n->AE->type == c_number) {
1130
+ if (n->AE->number <= 0) {
1131
+ // `atleast N C` where N <= 0 -> `repeat C`.
1132
+ if (n->AE->fixed_constant) {
1133
+ fprintf(stderr,
1134
+ "%s:%d: warning: atleast %d C is just repeat C\n",
1135
+ t->file, n->AE->line_number, n->AE->number);
1136
+ }
1137
+ n->AE = NULL;
1138
+ n->type = c_repeat;
1139
+ }
1140
+ }
1141
+ return n;
1142
+ }
935
1143
  case c_setmark: {
936
1144
  struct node * n = C_style(a, "i", token);
937
1145
  if (n->name) n->name->initialised = true;
@@ -942,24 +1150,28 @@ static struct node * read_C(struct analyser * a) {
942
1150
  return C_style(a, "A", token);
943
1151
  case c_hop: {
944
1152
  struct node * n = C_style(a, "A", token);
945
- if (n->AE->type == c_number) {
946
- if (n->AE->number < 0) {
1153
+ // n->AE is NULL after a syntax error, e.g. `hop hop`.
1154
+ if (n->AE && n->AE->type == c_number) {
1155
+ if (n->AE->number == 1) {
1156
+ // Convert `hop 1` to `next`.
1157
+ n->AE = NULL;
1158
+ n->type = c_next;
1159
+ } else if (n->AE->number == 0) {
1160
+ if (n->AE->fixed_constant) {
1161
+ fprintf(stderr,
1162
+ "%s:%d: warning: hop 0 is a no-op\n",
1163
+ t->file, n->AE->line_number);
1164
+ }
1165
+ n->AE = NULL;
1166
+ n->type = c_true;
1167
+ } else if (n->AE->number < 0) {
947
1168
  fprintf(stderr,
948
1169
  "%s:%d: warning: hop %d now signals f (as was "
949
1170
  "always documented) rather than moving the cursor "
950
1171
  "in the opposite direction\n",
951
- a->tokeniser->file,
952
- n->AE->line_number,
953
- n->AE->number);
1172
+ t->file, n->AE->line_number, n->AE->number);
954
1173
  n->AE = NULL;
955
1174
  n->type = c_false;
956
- } else if (n->AE->number == 0) {
957
- fprintf(stderr,
958
- "%s:%d: warning: hop 0 is a no-op\n",
959
- a->tokeniser->file,
960
- n->AE->line_number);
961
- n->AE = NULL;
962
- n->type = c_true;
963
1175
  }
964
1176
  }
965
1177
  return n;
@@ -978,10 +1190,16 @@ static struct node * read_C(struct analyser * a) {
978
1190
  return new_node(a, token);
979
1191
  case c_assignto:
980
1192
  case c_sliceto: {
981
- struct node *n;
982
1193
  check_modifyable(a);
983
- n = C_style(a, "s", token);
1194
+ struct node *n = C_style(a, "s", token);
984
1195
  if (n->name) n->name->initialised = true;
1196
+ if (token == c_assignto) {
1197
+ fprintf(stderr,
1198
+ "%s:%d: warning: Use of `=>` is not recommended, "
1199
+ "see https://snowballstem.org/compiler/snowman.html "
1200
+ "section 13.3 for details\n",
1201
+ t->file, n->line_number);
1202
+ }
985
1203
  return n;
986
1204
  }
987
1205
  case c_assign:
@@ -1003,29 +1221,28 @@ static struct node * read_C(struct analyser * a) {
1003
1221
  return n;
1004
1222
  }
1005
1223
  case c_dollar: {
1006
- struct tokeniser * t = a->tokeniser;
1007
1224
  read_token(t);
1008
1225
  if (t->token == c_bra) {
1009
1226
  /* Handle newer $(AE REL_OP AE) syntax. */
1010
- struct node * n = read_AE(a, 0, 0);
1227
+ struct node * n = read_AE(a, NULL, 0);
1011
1228
  read_token(t);
1012
- int token = t->token;
1229
+ token = t->token;
1013
1230
  switch (token) {
1014
1231
  case c_assign:
1015
1232
  count_error(a);
1016
1233
  fprintf(stderr, "%s:%d: Expected relational operator (did you mean '=='?)\n",
1017
- t->file, t->line_number);
1234
+ t->file, t->line_number);
1018
1235
  /* Assume it was == to try to avoid an error avalanche. */
1019
1236
  token = c_eq;
1020
1237
  /* FALLTHRU */
1021
1238
  case c_eq:
1022
1239
  case c_ne:
1023
- case c_gr:
1240
+ case c_gt:
1024
1241
  case c_ge:
1025
- case c_ls:
1242
+ case c_lt:
1026
1243
  case c_le: {
1027
1244
  struct node * lhs = n;
1028
- struct node * rhs = read_AE(a, 0, 0);
1245
+ struct node * rhs = read_AE(a, NULL, 0);
1029
1246
  if (lhs->type == c_number && rhs->type == c_number) {
1030
1247
  // Evaluate constant numeric test expression.
1031
1248
  int result;
@@ -1036,13 +1253,13 @@ static struct node * read_C(struct analyser * a) {
1036
1253
  case c_ne:
1037
1254
  result = (lhs->number != rhs->number);
1038
1255
  break;
1039
- case c_gr:
1256
+ case c_gt:
1040
1257
  result = (lhs->number > rhs->number);
1041
1258
  break;
1042
1259
  case c_ge:
1043
1260
  result = (lhs->number >= rhs->number);
1044
1261
  break;
1045
- case c_ls:
1262
+ case c_lt:
1046
1263
  result = (lhs->number < rhs->number);
1047
1264
  break;
1048
1265
  case c_le:
@@ -1064,7 +1281,7 @@ static struct node * read_C(struct analyser * a) {
1064
1281
  }
1065
1282
  default:
1066
1283
  error(a, e_unexpected_token);
1067
- t->token_held = true;
1284
+ hold_token(t);
1068
1285
  break;
1069
1286
  }
1070
1287
  return n;
@@ -1104,9 +1321,9 @@ static struct node * read_C(struct analyser * a) {
1104
1321
  switch (p->type) {
1105
1322
  case c_eq:
1106
1323
  case c_ne:
1107
- case c_gr:
1324
+ case c_gt:
1108
1325
  case c_ge:
1109
- case c_ls:
1326
+ case c_lt:
1110
1327
  case c_le:
1111
1328
  p->left = new_node(a, c_name);
1112
1329
  p->left->name = q;
@@ -1136,7 +1353,7 @@ static struct node * read_C(struct analyser * a) {
1136
1353
  }
1137
1354
 
1138
1355
  error(a, e_unexpected_token);
1139
- t->token_held = true;
1356
+ hold_token(t);
1140
1357
  return new_node(a, c_dollar);
1141
1358
  }
1142
1359
  case c_name:
@@ -1181,7 +1398,7 @@ static struct node * read_C(struct analyser * a) {
1181
1398
  return read_literalstring(a);
1182
1399
  case c_among: return read_among(a);
1183
1400
  case c_substring: return read_substring(a);
1184
- default: error(a, e_unexpected_token); return 0;
1401
+ default: error(a, e_unexpected_token); return NULL;
1185
1402
  }
1186
1403
  }
1187
1404
 
@@ -1189,28 +1406,30 @@ static int next_symbol(symbol * p, symbol * W, int utf8) {
1189
1406
  if (utf8) {
1190
1407
  int ch;
1191
1408
  int j = get_utf8(p, & ch);
1192
- W[0] = ch; return j;
1409
+ *W = ch;
1410
+ return j;
1193
1411
  } else {
1194
- W[0] = p[0]; return 1;
1412
+ *W = *p;
1413
+ return 1;
1195
1414
  }
1196
1415
  }
1197
1416
 
1198
1417
  static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
1199
1418
  int j = 0;
1200
- symbol W[1];
1419
+ symbol W;
1201
1420
  int width;
1202
1421
  if (style == c_plus) {
1203
1422
  while (j < SIZE(q)) {
1204
- width = next_symbol(q + j, W, utf8);
1205
- p = add_to_b(p, 1, W);
1423
+ width = next_symbol(q + j, &W, utf8);
1424
+ p = add_symbol_to_b(p, W);
1206
1425
  j += width;
1207
1426
  }
1208
1427
  } else {
1209
1428
  while (j < SIZE(q)) {
1210
1429
  int i;
1211
- width = next_symbol(q + j, W, utf8);
1430
+ width = next_symbol(q + j, &W, utf8);
1212
1431
  for (i = 0; i < SIZE(p); i++) {
1213
- if (p[i] == W[0]) {
1432
+ if (p[i] == W) {
1214
1433
  memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
1215
1434
  SIZE(p)--;
1216
1435
  }
@@ -1226,25 +1445,42 @@ static void read_define_grouping(struct analyser * a, struct name * q) {
1226
1445
  int style = c_plus;
1227
1446
  {
1228
1447
  NEW(grouping, p);
1229
- if (a->groupings == 0) a->groupings = p; else a->groupings_end->next = p;
1448
+ if (a->groupings == NULL) a->groupings = p; else a->groupings_end->next = p;
1230
1449
  a->groupings_end = p;
1231
- if (q) q->grouping = p;
1232
- p->next = 0;
1450
+ if (q) {
1451
+ if (q->grouping != NULL) {
1452
+ error(a, e_redefined);
1453
+ FREE(q->grouping);
1454
+ }
1455
+ q->grouping = p;
1456
+ }
1457
+ p->next = NULL;
1233
1458
  p->name = q;
1234
- p->line_number = a->tokeniser->line_number;
1459
+ p->line_number = t->line_number;
1235
1460
  p->b = create_b(0);
1236
1461
  while (true) {
1237
1462
  switch (read_token(t)) {
1238
- case c_name:
1239
- {
1240
- struct name * r = find_name(a);
1241
- if (r) {
1242
- check_name_type(a, r, 'g');
1243
- p->b = alter_grouping(p->b, r->grouping->b, style, false);
1244
- r->used_in_definition = true;
1245
- }
1463
+ case c_name: {
1464
+ struct name * r = find_name(a);
1465
+ if (!r) break;
1466
+
1467
+ check_name_type(a, r, 'g');
1468
+ if (r == q) {
1469
+ count_error(a);
1470
+ r->s[SIZE(r->s)] = 0;
1471
+ fprintf(stderr, "%s:%d: %s defined in terms of itself\n",
1472
+ t->file, t->line_number, r->s);
1473
+ } else if (!r->grouping) {
1474
+ count_error(a);
1475
+ r->s[SIZE(r->s)] = 0;
1476
+ fprintf(stderr, "%s:%d: %s undefined\n",
1477
+ t->file, t->line_number, r->s);
1478
+ } else {
1479
+ p->b = alter_grouping(p->b, r->grouping->b, style, false);
1246
1480
  }
1481
+ r->used_in_definition = true;
1247
1482
  break;
1483
+ }
1248
1484
  case c_literalstring:
1249
1485
  p->b = alter_grouping(p->b, t->b, style, (a->encoding == ENC_UTF8));
1250
1486
  break;
@@ -1269,7 +1505,7 @@ static void read_define_grouping(struct analyser * a, struct name * q) {
1269
1505
  p->smallest_ch = min;
1270
1506
  if (min == 1<<16) error(a, e_empty_grouping);
1271
1507
  }
1272
- t->token_held = true; return;
1508
+ hold_token(t);
1273
1509
  }
1274
1510
  }
1275
1511
 
@@ -1278,20 +1514,27 @@ static void read_define_routine(struct analyser * a, struct name * q) {
1278
1514
  a->amongvar_needed = false;
1279
1515
  if (q) {
1280
1516
  check_name_type(a, q, 'R');
1281
- if (q->definition != 0) error(a, e_redefined);
1282
- if (q->mode < 0) q->mode = a->mode; else
1517
+ if (q->definition != NULL) error(a, e_redefined);
1518
+ if (q->mode == m_unknown) q->mode = a->mode; else
1283
1519
  if (q->mode != a->mode) error2(a, e_declared_as_different_mode, q->mode);
1284
1520
  }
1285
1521
  p->name = q;
1286
- if (a->program == 0) a->program = p; else a->program_end->right = p;
1522
+ if (a->program == NULL) a->program = p; else a->program_end->right = p;
1287
1523
  a->program_end = p;
1288
1524
  get_token(a, c_as);
1289
1525
  p->left = read_C(a);
1290
1526
  if (q) q->definition = p->left;
1291
-
1292
- if (a->substring != 0) {
1527
+ /* We should get a node with a NULL right pointer from read_C() for the
1528
+ * routine's code. We synthesise a "functionend" node there so
1529
+ * optimisations such as dead code elimination and tail call optimisation
1530
+ * can easily see where the function ends.
1531
+ */
1532
+ assert(p->left->right == NULL);
1533
+ p->left->right = new_node(a, c_functionend);
1534
+
1535
+ if (a->substring != NULL) {
1293
1536
  error2(a, e_unresolved_substring, a->substring->line_number);
1294
- a->substring = 0;
1537
+ a->substring = NULL;
1295
1538
  }
1296
1539
  p->amongvar_needed = a->amongvar_needed;
1297
1540
  }
@@ -1303,15 +1546,19 @@ static void read_define(struct analyser * a) {
1303
1546
  if (q) {
1304
1547
  type = q->type;
1305
1548
  } else {
1306
- /* No declaration, so sniff next token - if it is 'as' then parse
1307
- * as a routine, otherwise as a grouping.
1549
+ /* No declaration so sniff next token - if it is a string or name
1550
+ * we parse as a grouping, otherwise we parse as a routine. This
1551
+ * avoids an avalanche of further errors if `as` is missing from a
1552
+ * routine definition.
1308
1553
  */
1309
- if (read_token(a->tokeniser) == c_as) {
1310
- type = t_routine;
1311
- } else {
1312
- type = t_grouping;
1554
+ switch (peek_token(a->tokeniser)) {
1555
+ case c_literalstring:
1556
+ case c_name:
1557
+ type = t_grouping;
1558
+ break;
1559
+ default:
1560
+ type = t_routine;
1313
1561
  }
1314
- a->tokeniser->token_held = true;
1315
1562
  }
1316
1563
 
1317
1564
  if (type == t_grouping) {
@@ -1396,10 +1643,10 @@ extern void read_program(struct analyser * a) {
1396
1643
  while (q) {
1397
1644
  switch (q->type) {
1398
1645
  case t_external: case t_routine:
1399
- if (q->used && q->definition == 0) error4(a, q);
1646
+ if (q->used && q->definition == NULL) error4(a, q);
1400
1647
  break;
1401
1648
  case t_grouping:
1402
- if (q->used && q->grouping == 0) error4(a, q);
1649
+ if (q->used && q->grouping == NULL) error4(a, q);
1403
1650
  break;
1404
1651
  }
1405
1652
  q = q->next;
@@ -1411,21 +1658,22 @@ extern void read_program(struct analyser * a) {
1411
1658
  struct name ** ptr = &(a->names);
1412
1659
  while (q) {
1413
1660
  if (!q->referenced) {
1414
- fprintf(stderr, "%s:%d: warning: %s '",
1661
+ q->s[SIZE(q->s)] = 0;
1662
+ fprintf(stderr, "%s:%d: warning: %s '%s' ",
1415
1663
  a->tokeniser->file,
1416
1664
  q->declaration_line_number,
1417
- name_of_name_type(q->type));
1418
- report_b(stderr, q->b);
1665
+ name_of_name_type(q->type),
1666
+ q->s);
1419
1667
  if (q->type == t_routine ||
1420
1668
  q->type == t_external ||
1421
1669
  q->type == t_grouping) {
1422
- fprintf(stderr, "' declared but not defined\n");
1670
+ fprintf(stderr, "declared but not defined\n");
1423
1671
  } else {
1424
- fprintf(stderr, "' defined but not used\n");
1425
- q = q->next;
1426
- *ptr = q;
1427
- continue;
1672
+ fprintf(stderr, "defined but not used\n");
1428
1673
  }
1674
+ q = q->next;
1675
+ *ptr = q;
1676
+ continue;
1429
1677
  } else if (q->type == t_routine || q->type == t_grouping) {
1430
1678
  /* It's OK to define a grouping but only use it to define other
1431
1679
  * groupings.
@@ -1437,29 +1685,32 @@ extern void read_program(struct analyser * a) {
1437
1685
  } else {
1438
1686
  line_num = q->grouping->line_number;
1439
1687
  }
1440
- fprintf(stderr, "%s:%d: warning: %s '",
1688
+ q->s[SIZE(q->s)] = 0;
1689
+ fprintf(stderr, "%s:%d: warning: %s '%s' defined but not used\n",
1441
1690
  a->tokeniser->file,
1442
1691
  line_num,
1443
- name_of_name_type(q->type));
1444
- report_b(stderr, q->b);
1445
- fprintf(stderr, "' defined but not used\n");
1692
+ name_of_name_type(q->type),
1693
+ q->s);
1694
+ q = q->next;
1695
+ *ptr = q;
1696
+ continue;
1446
1697
  }
1447
1698
  } else if (q->type == t_external) {
1448
1699
  /* Unused is OK. */
1449
1700
  } else if (!q->initialised) {
1450
- fprintf(stderr, "%s:%d: warning: %s '",
1701
+ q->s[SIZE(q->s)] = 0;
1702
+ fprintf(stderr, "%s:%d: warning: %s '%s' is never initialised\n",
1451
1703
  a->tokeniser->file,
1452
1704
  q->declaration_line_number,
1453
- name_of_name_type(q->type));
1454
- report_b(stderr, q->b);
1455
- fprintf(stderr, "' is never initialised\n");
1705
+ name_of_name_type(q->type),
1706
+ q->s);
1456
1707
  } else if (!q->value_used) {
1457
- fprintf(stderr, "%s:%d: warning: %s '",
1708
+ q->s[SIZE(q->s)] = 0;
1709
+ fprintf(stderr, "%s:%d: warning: %s '%s' is set but never used\n",
1458
1710
  a->tokeniser->file,
1459
1711
  q->declaration_line_number,
1460
- name_of_name_type(q->type));
1461
- report_b(stderr, q->b);
1462
- fprintf(stderr, "' is set but never used\n");
1712
+ name_of_name_type(q->type),
1713
+ q->s);
1463
1714
  remove_dead_assignments(a->program, q);
1464
1715
  q = q->next;
1465
1716
  *ptr = q;
@@ -1485,17 +1736,18 @@ extern void read_program(struct analyser * a) {
1485
1736
  extern struct analyser * create_analyser(struct tokeniser * t) {
1486
1737
  NEW(analyser, a);
1487
1738
  a->tokeniser = t;
1488
- a->nodes = 0;
1489
- a->names = 0;
1490
- a->literalstrings = 0;
1491
- a->program = 0;
1492
- a->amongs = 0;
1739
+ a->nodes = NULL;
1740
+ a->names = NULL;
1741
+ a->literalstrings = NULL;
1742
+ a->program = NULL;
1743
+ a->amongs = NULL;
1493
1744
  a->among_count = 0;
1494
- a->groupings = 0;
1745
+ a->among_with_function_count = 0;
1746
+ a->groupings = NULL;
1495
1747
  a->mode = m_forward;
1496
1748
  a->modifyable = true;
1497
1749
  { int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
1498
- a->substring = 0;
1750
+ a->substring = NULL;
1499
1751
  a->int_limits_used = false;
1500
1752
  return a;
1501
1753
  }
@@ -1513,7 +1765,8 @@ extern void close_analyser(struct analyser * a) {
1513
1765
  struct name * q = a->names;
1514
1766
  while (q) {
1515
1767
  struct name * q_next = q->next;
1516
- lose_b(q->b); FREE(q);
1768
+ lose_s(q->s);
1769
+ FREE(q);
1517
1770
  q = q_next;
1518
1771
  }
1519
1772
  }