mittens 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -3
- data/lib/mittens/version.rb +1 -1
- data/vendor/snowball/.github/workflows/ci.yml +216 -0
- data/vendor/snowball/CONTRIBUTING.rst +111 -62
- data/vendor/snowball/GNUmakefile +194 -136
- data/vendor/snowball/NEWS +798 -3
- data/vendor/snowball/README.rst +50 -1
- data/vendor/snowball/ada/src/stemmer.adb +25 -13
- data/vendor/snowball/ada/src/stemmer.ads +9 -9
- data/vendor/snowball/ada/stemmer_config.gpr +7 -7
- data/vendor/snowball/algorithms/basque.sbl +4 -19
- data/vendor/snowball/algorithms/catalan.sbl +2 -9
- data/vendor/snowball/algorithms/danish.sbl +1 -1
- data/vendor/snowball/algorithms/dutch.sbl +284 -122
- data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
- data/vendor/snowball/algorithms/english.sbl +52 -37
- data/vendor/snowball/algorithms/esperanto.sbl +157 -0
- data/vendor/snowball/algorithms/estonian.sbl +269 -0
- data/vendor/snowball/algorithms/finnish.sbl +2 -3
- data/vendor/snowball/algorithms/french.sbl +42 -16
- data/vendor/snowball/algorithms/german.sbl +35 -14
- data/vendor/snowball/algorithms/greek.sbl +76 -76
- data/vendor/snowball/algorithms/hungarian.sbl +8 -6
- data/vendor/snowball/algorithms/indonesian.sbl +14 -8
- data/vendor/snowball/algorithms/italian.sbl +11 -21
- data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
- data/vendor/snowball/algorithms/lovins.sbl +0 -1
- data/vendor/snowball/algorithms/nepali.sbl +138 -37
- data/vendor/snowball/algorithms/norwegian.sbl +19 -5
- data/vendor/snowball/algorithms/porter.sbl +2 -2
- data/vendor/snowball/algorithms/portuguese.sbl +9 -13
- data/vendor/snowball/algorithms/romanian.sbl +17 -4
- data/vendor/snowball/algorithms/serbian.sbl +467 -468
- data/vendor/snowball/algorithms/spanish.sbl +5 -7
- data/vendor/snowball/algorithms/swedish.sbl +60 -6
- data/vendor/snowball/algorithms/tamil.sbl +207 -176
- data/vendor/snowball/algorithms/turkish.sbl +461 -445
- data/vendor/snowball/algorithms/yiddish.sbl +36 -38
- data/vendor/snowball/compiler/analyser.c +445 -192
- data/vendor/snowball/compiler/driver.c +109 -101
- data/vendor/snowball/compiler/generator.c +853 -464
- data/vendor/snowball/compiler/generator_ada.c +404 -366
- data/vendor/snowball/compiler/generator_csharp.c +297 -260
- data/vendor/snowball/compiler/generator_go.c +323 -254
- data/vendor/snowball/compiler/generator_java.c +326 -252
- data/vendor/snowball/compiler/generator_js.c +362 -252
- data/vendor/snowball/compiler/generator_pascal.c +349 -197
- data/vendor/snowball/compiler/generator_python.c +257 -240
- data/vendor/snowball/compiler/generator_rust.c +423 -251
- data/vendor/snowball/compiler/header.h +117 -71
- data/vendor/snowball/compiler/space.c +137 -68
- data/vendor/snowball/compiler/syswords.h +2 -2
- data/vendor/snowball/compiler/tokeniser.c +125 -107
- data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
- data/vendor/snowball/csharp/Stemwords/App.config +2 -2
- data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
- data/vendor/snowball/doc/libstemmer_c_README +7 -4
- data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
- data/vendor/snowball/doc/libstemmer_java_README +12 -1
- data/vendor/snowball/doc/libstemmer_js_README +6 -4
- data/vendor/snowball/doc/libstemmer_python_README +9 -4
- data/vendor/snowball/examples/stemwords.c +12 -12
- data/vendor/snowball/go/env.go +107 -31
- data/vendor/snowball/go/util.go +0 -4
- data/vendor/snowball/include/libstemmer.h +4 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
- data/vendor/snowball/javascript/base-stemmer.js +186 -2
- data/vendor/snowball/javascript/stemwords.js +3 -6
- data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
- data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
- data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
- data/vendor/snowball/libstemmer/modules.txt +13 -10
- data/vendor/snowball/libstemmer/test.c +1 -1
- data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
- data/vendor/snowball/pascal/generate.pl +13 -13
- data/vendor/snowball/python/create_init.py +4 -1
- data/vendor/snowball/python/setup.cfg +0 -3
- data/vendor/snowball/python/setup.py +8 -3
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
- data/vendor/snowball/python/stemwords.py +8 -12
- data/vendor/snowball/runtime/api.c +10 -5
- data/vendor/snowball/runtime/header.h +10 -9
- data/vendor/snowball/runtime/utilities.c +9 -9
- data/vendor/snowball/rust/build.rs +1 -1
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
- data/vendor/snowball/tests/stemtest.c +7 -4
- metadata +7 -7
- data/vendor/snowball/.travis.yml +0 -112
- data/vendor/snowball/algorithms/german2.sbl +0 -145
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
- data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
#include <assert.h>
|
2
|
+
#include <limits.h> /* for INT_MAX */
|
2
3
|
#include <stdio.h> /* printf etc */
|
3
4
|
#include <stdlib.h> /* exit */
|
4
5
|
#include <string.h> /* memmove */
|
@@ -17,7 +18,7 @@ typedef enum {
|
|
17
18
|
e_empty_among = 18,
|
18
19
|
e_adjacent_bracketed_in_among = 19,
|
19
20
|
e_substring_preceded_by_substring = 20,
|
20
|
-
/* For codes below here, tokeniser->
|
21
|
+
/* For codes below here, tokeniser->s is printed before the error. */
|
21
22
|
e_redeclared = 30,
|
22
23
|
e_undeclared = 31,
|
23
24
|
e_declared_as_different_mode = 32,
|
@@ -36,38 +37,38 @@ static struct node * C_style(struct analyser * a, const char * s, int token);
|
|
36
37
|
|
37
38
|
|
38
39
|
static void print_node_(struct node * p, int n, const char * s) {
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
printf("%*s%s", n * 2, s, name_of_token(p->type));
|
41
|
+
if (p->name) {
|
42
|
+
putchar(' ');
|
43
|
+
report_s(stdout, p->name->s);
|
44
|
+
}
|
44
45
|
if (p->literalstring) {
|
45
|
-
printf("'");
|
46
|
+
printf(" '");
|
46
47
|
report_b(stdout, p->literalstring);
|
47
48
|
printf("'");
|
48
49
|
} else if (p->type == c_number) {
|
49
|
-
printf("%d", p->number);
|
50
|
+
printf(" %d", p->number);
|
50
51
|
}
|
51
52
|
printf("\n");
|
52
53
|
if (p->AE) print_node_(p->AE, n+1, "# ");
|
53
|
-
if (p->left) print_node_(p->left, n+1, "
|
54
|
+
if (p->left) print_node_(p->left, n+1, "");
|
54
55
|
if (p->aux) print_node_(p->aux, n+1, "@ ");
|
55
|
-
if (p->right) print_node_(p->right, n, "
|
56
|
+
if (p->right) print_node_(p->right, n, "");
|
56
57
|
}
|
57
58
|
|
58
59
|
extern void print_program(struct analyser * a) {
|
59
|
-
print_node_(a->program, 0, "
|
60
|
+
print_node_(a->program, 0, "");
|
60
61
|
}
|
61
62
|
|
62
63
|
static struct node * new_node(struct analyser * a, int type) {
|
63
64
|
NEW(node, p);
|
64
65
|
p->next = a->nodes; a->nodes = p;
|
65
|
-
p->left =
|
66
|
-
p->right =
|
67
|
-
p->aux =
|
68
|
-
p->AE =
|
69
|
-
p->name =
|
70
|
-
p->literalstring =
|
66
|
+
p->left = NULL;
|
67
|
+
p->right = NULL;
|
68
|
+
p->aux = NULL;
|
69
|
+
p->AE = NULL;
|
70
|
+
p->name = NULL;
|
71
|
+
p->literalstring = NULL;
|
71
72
|
p->mode = a->mode;
|
72
73
|
p->line_number = a->tokeniser->line_number;
|
73
74
|
p->type = type;
|
@@ -78,7 +79,6 @@ static const char * name_of_mode(int n) {
|
|
78
79
|
switch (n) {
|
79
80
|
case m_backward: return "string backward";
|
80
81
|
case m_forward: return "string forward";
|
81
|
-
/* case m_integer: return "integer"; */
|
82
82
|
}
|
83
83
|
fprintf(stderr, "Invalid mode %d in name_of_mode()\n", n);
|
84
84
|
exit(1);
|
@@ -86,6 +86,7 @@ static const char * name_of_mode(int n) {
|
|
86
86
|
|
87
87
|
static const char * name_of_type(int n) {
|
88
88
|
switch (n) {
|
89
|
+
case 'b': return "boolean";
|
89
90
|
case 's': return "string";
|
90
91
|
case 'i': return "integer";
|
91
92
|
case 'r': return "routine";
|
@@ -117,9 +118,14 @@ static void count_error(struct analyser * a) {
|
|
117
118
|
|
118
119
|
static void error2(struct analyser * a, error_code n, int x) {
|
119
120
|
struct tokeniser * t = a->tokeniser;
|
121
|
+
if (n == e_unexpected_token && t->token_reported_as_unexpected) {
|
122
|
+
// Avoid duplicate errors if this token was already reported as
|
123
|
+
// unexpected and then held.
|
124
|
+
return;
|
125
|
+
}
|
120
126
|
count_error(a);
|
121
127
|
fprintf(stderr, "%s:%d: ", t->file, t->line_number);
|
122
|
-
if ((int)n >= (int)e_redeclared)
|
128
|
+
if ((int)n >= (int)e_redeclared) report_s(stderr, t->s);
|
123
129
|
switch (n) {
|
124
130
|
case e_token_omitted:
|
125
131
|
fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
|
@@ -127,12 +133,14 @@ static void error2(struct analyser * a, error_code n, int x) {
|
|
127
133
|
fprintf(stderr, "in among(...), ");
|
128
134
|
/* fall through */
|
129
135
|
case e_unexpected_token:
|
136
|
+
t->token_reported_as_unexpected = true;
|
130
137
|
fprintf(stderr, "unexpected %s", name_of_token(t->token));
|
131
138
|
if (t->token == c_number) fprintf(stderr, " %d", t->number);
|
132
139
|
if (t->token == c_name) {
|
133
|
-
|
134
|
-
|
135
|
-
}
|
140
|
+
t->s[SIZE(t->s)] = 0;
|
141
|
+
fprintf(stderr, " %s", t->s);
|
142
|
+
}
|
143
|
+
break;
|
136
144
|
case e_string_omitted:
|
137
145
|
fprintf(stderr, "string omitted"); break;
|
138
146
|
|
@@ -179,9 +187,8 @@ static void error(struct analyser * a, error_code n) { error2(a, n, 0); }
|
|
179
187
|
|
180
188
|
static void error4(struct analyser * a, struct name * q) {
|
181
189
|
count_error(a);
|
182
|
-
|
183
|
-
|
184
|
-
fprintf(stderr, " undefined\n");
|
190
|
+
q->s[SIZE(q->s)] = 0;
|
191
|
+
fprintf(stderr, "%s:%d: %s undefined\n", a->tokeniser->file, q->used->line_number, q->s);
|
185
192
|
}
|
186
193
|
|
187
194
|
static void omission_error(struct analyser * a, int n) {
|
@@ -198,35 +205,33 @@ static int check_token(struct analyser * a, int code) {
|
|
198
205
|
static int get_token(struct analyser * a, int code) {
|
199
206
|
struct tokeniser * t = a->tokeniser;
|
200
207
|
read_token(t);
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
return x;
|
205
|
-
}
|
208
|
+
int x = check_token(a, code);
|
209
|
+
if (!x) hold_token(t);
|
210
|
+
return x;
|
206
211
|
}
|
207
212
|
|
208
213
|
static struct name * look_for_name(struct analyser * a) {
|
209
|
-
|
214
|
+
const byte * q = a->tokeniser->s;
|
210
215
|
struct name * p;
|
211
216
|
for (p = a->names; p; p = p->next) {
|
212
|
-
|
217
|
+
byte * b = p->s;
|
213
218
|
int n = SIZE(b);
|
214
|
-
if (n == SIZE(q) && memcmp(q, b, n
|
219
|
+
if (n == SIZE(q) && memcmp(q, b, n) == 0) {
|
215
220
|
p->referenced = true;
|
216
221
|
return p;
|
217
222
|
}
|
218
223
|
}
|
219
|
-
return
|
224
|
+
return NULL;
|
220
225
|
}
|
221
226
|
|
222
227
|
static struct name * find_name(struct analyser * a) {
|
223
228
|
struct name * p = look_for_name(a);
|
224
|
-
if (p ==
|
229
|
+
if (p == NULL) error(a, e_undeclared);
|
225
230
|
return p;
|
226
231
|
}
|
227
232
|
|
228
233
|
static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
|
229
|
-
if (p->mode
|
234
|
+
if (p->mode == m_unknown) p->mode = mode; else
|
230
235
|
if (p->mode != mode) error2(a, e_misused, mode);
|
231
236
|
}
|
232
237
|
|
@@ -265,10 +270,8 @@ static void read_names(struct analyser * a, int type) {
|
|
265
270
|
* its special meaning, for compatibility with older versions
|
266
271
|
* of snowball.
|
267
272
|
*/
|
268
|
-
|
269
|
-
|
270
|
-
};
|
271
|
-
t->b = MOVE_TO_B(t->b, c_len_lit);
|
273
|
+
SIZE(t->s) = 0;
|
274
|
+
t->s = add_literal_to_s(t->s, "len");
|
272
275
|
goto handle_as_name;
|
273
276
|
}
|
274
277
|
case c_lenof: {
|
@@ -276,31 +279,29 @@ static void read_names(struct analyser * a, int type) {
|
|
276
279
|
* its special meaning, for compatibility with older versions
|
277
280
|
* of snowball.
|
278
281
|
*/
|
279
|
-
|
280
|
-
|
281
|
-
};
|
282
|
-
t->b = MOVE_TO_B(t->b, c_lenof_lit);
|
282
|
+
SIZE(t->s) = 0;
|
283
|
+
t->s = add_literal_to_s(t->s, "lenof");
|
283
284
|
goto handle_as_name;
|
284
285
|
}
|
285
286
|
case c_name:
|
286
287
|
handle_as_name:
|
287
|
-
if (look_for_name(a) !=
|
288
|
+
if (look_for_name(a) != NULL) error(a, e_redeclared); else {
|
288
289
|
NEW(name, p);
|
289
|
-
p->
|
290
|
+
p->s = copy_s(t->s);
|
290
291
|
p->type = type;
|
291
|
-
p->mode =
|
292
|
+
p->mode = m_unknown; /* used for routines, externals */
|
292
293
|
/* We defer assigning counts until after we've eliminated
|
293
294
|
* variables whose values are never used. */
|
294
295
|
p->count = -1;
|
295
296
|
p->referenced = false;
|
296
297
|
p->used_in_among = false;
|
297
|
-
p->used =
|
298
|
+
p->used = NULL;
|
298
299
|
p->value_used = false;
|
299
300
|
p->initialised = false;
|
300
301
|
p->used_in_definition = false;
|
301
|
-
p->local_to =
|
302
|
-
p->grouping =
|
303
|
-
p->definition =
|
302
|
+
p->local_to = NULL;
|
303
|
+
p->grouping = NULL;
|
304
|
+
p->definition = NULL;
|
304
305
|
p->declaration_line_number = t->line_number;
|
305
306
|
p->next = a->names;
|
306
307
|
a->names = p;
|
@@ -310,7 +311,7 @@ handle_as_name:
|
|
310
311
|
}
|
311
312
|
break;
|
312
313
|
default:
|
313
|
-
if (!check_token(a, c_ket)) t
|
314
|
+
if (!check_token(a, c_ket)) hold_token(t);
|
314
315
|
return;
|
315
316
|
}
|
316
317
|
}
|
@@ -325,7 +326,6 @@ static symbol * new_literalstring(struct analyser * a) {
|
|
325
326
|
}
|
326
327
|
|
327
328
|
static int read_AE_test(struct analyser * a) {
|
328
|
-
|
329
329
|
struct tokeniser * t = a->tokeniser;
|
330
330
|
switch (read_token(t)) {
|
331
331
|
case c_assign: return c_mathassign;
|
@@ -335,11 +335,14 @@ static int read_AE_test(struct analyser * a) {
|
|
335
335
|
case c_divideassign:
|
336
336
|
case c_eq:
|
337
337
|
case c_ne:
|
338
|
-
case
|
338
|
+
case c_gt:
|
339
339
|
case c_ge:
|
340
|
-
case
|
340
|
+
case c_lt:
|
341
341
|
case c_le: return t->token;
|
342
|
-
default:
|
342
|
+
default:
|
343
|
+
error(a, e_unexpected_token);
|
344
|
+
hold_token(t);
|
345
|
+
return c_eq;
|
343
346
|
}
|
344
347
|
}
|
345
348
|
|
@@ -422,12 +425,16 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
|
|
422
425
|
case c_number:
|
423
426
|
p = new_node(a, c_number);
|
424
427
|
p->number = t->number;
|
428
|
+
p->fixed_constant = true;
|
425
429
|
break;
|
426
430
|
case c_lenof:
|
427
431
|
case c_sizeof: {
|
428
432
|
int token = t->token;
|
429
433
|
p = C_style(a, "S", token);
|
430
|
-
if (!p->literalstring)
|
434
|
+
if (!p->literalstring) {
|
435
|
+
if (p->name) p->name->value_used = true;
|
436
|
+
break;
|
437
|
+
}
|
431
438
|
|
432
439
|
/* Replace lenof or sizeof on a literal string with a numeric
|
433
440
|
* constant.
|
@@ -449,18 +456,19 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
|
|
449
456
|
p->type = c_number;
|
450
457
|
p->literalstring = NULL;
|
451
458
|
p->number = result;
|
459
|
+
p->fixed_constant = (token == c_lenof);
|
452
460
|
break;
|
453
461
|
}
|
454
462
|
default:
|
455
463
|
error(a, e_unexpected_token);
|
456
|
-
t
|
457
|
-
return
|
464
|
+
hold_token(t);
|
465
|
+
return NULL;
|
458
466
|
}
|
459
467
|
while (true) {
|
460
468
|
int token = read_token(t);
|
461
469
|
int b = binding(token);
|
462
470
|
if (binding(token) <= B) {
|
463
|
-
t
|
471
|
+
hold_token(t);
|
464
472
|
return p;
|
465
473
|
}
|
466
474
|
struct node * r = read_AE(a, assigned_to, b);
|
@@ -478,6 +486,11 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
|
|
478
486
|
q->number = p->number * r->number;
|
479
487
|
break;
|
480
488
|
case c_divide:
|
489
|
+
if (r->number == 0) {
|
490
|
+
fprintf(stderr, "%s:%d: Division by zero\n",
|
491
|
+
t->file, t->line_number);
|
492
|
+
exit(1);
|
493
|
+
}
|
481
494
|
q->number = p->number / r->number;
|
482
495
|
break;
|
483
496
|
default:
|
@@ -485,10 +498,101 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
|
|
485
498
|
name_of_token(token));
|
486
499
|
exit(1);
|
487
500
|
}
|
501
|
+
q->fixed_constant = p->fixed_constant && r->fixed_constant;
|
502
|
+
q->line_number = p->line_number;
|
488
503
|
} else {
|
489
|
-
|
490
|
-
q
|
491
|
-
|
504
|
+
// Check for specific constant or no-op cases.
|
505
|
+
q = NULL;
|
506
|
+
switch (token) {
|
507
|
+
case c_plus:
|
508
|
+
// 0 + r is r
|
509
|
+
if (p->type == c_number && p->number == 0) {
|
510
|
+
q = r;
|
511
|
+
break;
|
512
|
+
}
|
513
|
+
// p + 0 is p
|
514
|
+
if (r->type == c_number && r->number == 0) {
|
515
|
+
q = p;
|
516
|
+
break;
|
517
|
+
}
|
518
|
+
break;
|
519
|
+
case c_minus:
|
520
|
+
// 0 - r is -r
|
521
|
+
if (p->type == c_number && p->number == 0) {
|
522
|
+
q = new_node(a, c_neg);
|
523
|
+
q->right = r;
|
524
|
+
break;
|
525
|
+
}
|
526
|
+
// p - 0 is p
|
527
|
+
if (r->type == c_number && r->number == 0) {
|
528
|
+
q = p;
|
529
|
+
break;
|
530
|
+
}
|
531
|
+
break;
|
532
|
+
case c_multiply:
|
533
|
+
// 0 * r is 0
|
534
|
+
if (p->type == c_number && p->number == 0) {
|
535
|
+
q = p;
|
536
|
+
break;
|
537
|
+
}
|
538
|
+
// p * 0 is 0
|
539
|
+
if (r->type == c_number && r->number == 0) {
|
540
|
+
q = r;
|
541
|
+
q->line_number = p->line_number;
|
542
|
+
break;
|
543
|
+
}
|
544
|
+
// -1 * r is -r
|
545
|
+
if (p->type == c_number && p->number == -1) {
|
546
|
+
q = new_node(a, c_neg);
|
547
|
+
q->right = r;
|
548
|
+
q->line_number = p->line_number;
|
549
|
+
break;
|
550
|
+
}
|
551
|
+
// p * -1 is -p
|
552
|
+
if (r->type == c_number && r->number == -1) {
|
553
|
+
q = new_node(a, c_neg);
|
554
|
+
q->right = p;
|
555
|
+
q->line_number = p->line_number;
|
556
|
+
break;
|
557
|
+
}
|
558
|
+
// 1 * r is r
|
559
|
+
if (p->type == c_number && p->number == 1) {
|
560
|
+
q = r;
|
561
|
+
q->line_number = p->line_number;
|
562
|
+
break;
|
563
|
+
}
|
564
|
+
// p * 1 is p
|
565
|
+
if (r->type == c_number && r->number == 1) {
|
566
|
+
q = p;
|
567
|
+
break;
|
568
|
+
}
|
569
|
+
break;
|
570
|
+
case c_divide:
|
571
|
+
// p / 1 is p
|
572
|
+
if (r->type == c_number && r->number == 1) {
|
573
|
+
q = p;
|
574
|
+
break;
|
575
|
+
}
|
576
|
+
// p / -1 is -p
|
577
|
+
if (r->type == c_number && r->number == -1) {
|
578
|
+
q = new_node(a, c_neg);
|
579
|
+
q->right = p;
|
580
|
+
q->line_number = p->line_number;
|
581
|
+
break;
|
582
|
+
}
|
583
|
+
// p / 0 is an error!
|
584
|
+
if (r->type == c_number && r->number == 0) {
|
585
|
+
fprintf(stderr, "%s:%d: Division by zero\n",
|
586
|
+
t->file, t->line_number);
|
587
|
+
exit(1);
|
588
|
+
}
|
589
|
+
break;
|
590
|
+
}
|
591
|
+
if (!q) {
|
592
|
+
q = new_node(a, token);
|
593
|
+
q->left = p;
|
594
|
+
q->right = r;
|
595
|
+
}
|
492
596
|
}
|
493
597
|
p = q;
|
494
598
|
}
|
@@ -503,30 +607,30 @@ static struct node * read_C_connection(struct analyser * a, struct node * q, int
|
|
503
607
|
q = read_C(a);
|
504
608
|
p_end->right = q; p_end = q;
|
505
609
|
} while (read_token(t) == op);
|
506
|
-
t
|
610
|
+
hold_token(t);
|
507
611
|
return p;
|
508
612
|
}
|
509
613
|
|
510
614
|
static struct node * read_C_list(struct analyser * a) {
|
511
615
|
struct tokeniser * t = a->tokeniser;
|
512
616
|
struct node * p = new_node(a, c_bra);
|
513
|
-
struct node * p_end =
|
617
|
+
struct node * p_end = NULL;
|
514
618
|
while (true) {
|
515
619
|
int token = read_token(t);
|
516
620
|
if (token == c_ket) return p;
|
517
621
|
if (token < 0) { omission_error(a, c_ket); return p; }
|
518
|
-
t
|
622
|
+
hold_token(t);
|
519
623
|
{
|
520
624
|
struct node * q = read_C(a);
|
521
625
|
while (true) {
|
522
626
|
token = read_token(t);
|
523
627
|
if (token != c_and && token != c_or) {
|
524
|
-
t
|
628
|
+
hold_token(t);
|
525
629
|
break;
|
526
630
|
}
|
527
631
|
q = read_C_connection(a, q, token);
|
528
632
|
}
|
529
|
-
if (p_end ==
|
633
|
+
if (p_end == NULL) p->left = q; else p_end->right = q;
|
530
634
|
p_end = q;
|
531
635
|
}
|
532
636
|
}
|
@@ -541,7 +645,7 @@ static struct node * C_style(struct analyser * a, const char * s, int token) {
|
|
541
645
|
case 'D':
|
542
646
|
p->aux = read_C(a); continue;
|
543
647
|
case 'A':
|
544
|
-
p->AE = read_AE(a,
|
648
|
+
p->AE = read_AE(a, NULL, 0); continue;
|
545
649
|
case 'f':
|
546
650
|
get_token(a, c_for); continue;
|
547
651
|
case 'S':
|
@@ -632,11 +736,10 @@ static int compare_node(const struct node *p, const struct node *q) {
|
|
632
736
|
PTR_NULL_CHECK(p->name, q->name);
|
633
737
|
if (p->name) {
|
634
738
|
int r;
|
635
|
-
if (SIZE(p->name->
|
636
|
-
return SIZE(p->name->
|
739
|
+
if (SIZE(p->name->s) != SIZE(q->name->s)) {
|
740
|
+
return SIZE(p->name->s) - SIZE(q->name->s);
|
637
741
|
}
|
638
|
-
r = memcmp(p->name->
|
639
|
-
SIZE(p->name->b) * sizeof(symbol));
|
742
|
+
r = memcmp(p->name->s, q->name->s, SIZE(p->name->s));
|
640
743
|
if (r != 0) return r;
|
641
744
|
}
|
642
745
|
|
@@ -654,29 +757,34 @@ static int compare_node(const struct node *p, const struct node *q) {
|
|
654
757
|
return compare_node(p->right, q->right);
|
655
758
|
}
|
656
759
|
|
657
|
-
static
|
658
|
-
|
760
|
+
static struct node * make_among(struct analyser * a, struct node * p, struct node * substring) {
|
659
761
|
NEW(among, x);
|
660
762
|
NEWVEC(amongvec, v, p->number);
|
661
763
|
struct node * q = p->left;
|
764
|
+
struct node * starter = NULL;
|
662
765
|
struct amongvec * w0 = v;
|
663
766
|
struct amongvec * w1 = v;
|
664
767
|
int result = 1;
|
665
768
|
|
666
|
-
int direction = substring !=
|
769
|
+
int direction = substring != NULL ? substring->mode : p->mode;
|
667
770
|
int backward = direction == m_backward;
|
668
771
|
|
669
|
-
if (a->amongs ==
|
772
|
+
if (a->amongs == NULL) a->amongs = x; else a->amongs_end->next = x;
|
670
773
|
a->amongs_end = x;
|
671
|
-
x->next =
|
774
|
+
x->next = NULL;
|
775
|
+
x->node = p;
|
672
776
|
x->b = v;
|
673
777
|
x->number = a->among_count++;
|
674
778
|
x->function_count = 0;
|
675
|
-
x->starter = 0;
|
676
779
|
x->nocommand_count = 0;
|
677
780
|
x->amongvar_needed = false;
|
781
|
+
x->always_matches = false;
|
782
|
+
x->shortest_size = INT_MAX;
|
678
783
|
|
679
|
-
if (q->type == c_bra) {
|
784
|
+
if (q->type == c_bra) {
|
785
|
+
starter = q;
|
786
|
+
p->left = q = q->right;
|
787
|
+
}
|
680
788
|
|
681
789
|
while (q) {
|
682
790
|
if (q->type == c_literalstring) {
|
@@ -694,10 +802,15 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
|
|
694
802
|
check_routine_mode(a, function, direction);
|
695
803
|
x->function_count++;
|
696
804
|
} else {
|
697
|
-
w1->function =
|
805
|
+
w1->function = NULL;
|
806
|
+
if (w1->size == 0) {
|
807
|
+
// This among contains the empty string without a gating
|
808
|
+
// function so it will always match.
|
809
|
+
x->always_matches = true;
|
810
|
+
}
|
698
811
|
}
|
699
812
|
w1++;
|
700
|
-
} else if (q->left ==
|
813
|
+
} else if (q->left == NULL) {
|
701
814
|
/* empty command: () */
|
702
815
|
w0 = w1;
|
703
816
|
} else {
|
@@ -732,7 +845,8 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
|
|
732
845
|
x->command_count = result - 1;
|
733
846
|
{
|
734
847
|
NEWVEC(node*, commands, x->command_count);
|
735
|
-
|
848
|
+
for (int i = 0; i != x->command_count; ++i)
|
849
|
+
commands[i] = NULL;
|
736
850
|
for (w0 = v; w0 < w1; w0++) {
|
737
851
|
if (w0->result > 0) {
|
738
852
|
/* result == -1 when there's no command. */
|
@@ -757,6 +871,8 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
|
|
757
871
|
int size = w0->size;
|
758
872
|
struct amongvec * w;
|
759
873
|
|
874
|
+
if (size && size < x->shortest_size) x->shortest_size = size;
|
875
|
+
|
760
876
|
for (w = w0 - 1; w >= v; w--) {
|
761
877
|
if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
|
762
878
|
w0->i = w - v; /* fill in index of longest substring */
|
@@ -782,16 +898,29 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
|
|
782
898
|
x->literalstring_count = p->number;
|
783
899
|
p->among = x;
|
784
900
|
|
785
|
-
x->substring = substring;
|
786
|
-
if (substring != 0) substring->among = x;
|
787
901
|
if (x->command_count > 1 ||
|
788
|
-
(x->command_count == 1 && x->nocommand_count > 0)
|
789
|
-
x->starter != 0) {
|
902
|
+
(x->command_count == 1 && x->nocommand_count > 0)) {
|
790
903
|
/* We need to set among_var rather than just checking if find_among*()
|
791
904
|
* returns zero or not.
|
792
905
|
*/
|
793
906
|
x->amongvar_needed = a->amongvar_needed = true;
|
794
907
|
}
|
908
|
+
if (starter) {
|
909
|
+
starter->right = p;
|
910
|
+
if (substring) {
|
911
|
+
p = starter;
|
912
|
+
} else {
|
913
|
+
substring = new_node(a, c_substring);
|
914
|
+
substring->right = starter;
|
915
|
+
p = substring;
|
916
|
+
}
|
917
|
+
}
|
918
|
+
x->substring = substring;
|
919
|
+
if (substring != NULL) substring->among = x;
|
920
|
+
|
921
|
+
if (x->function_count > 0) ++a->among_with_function_count;
|
922
|
+
|
923
|
+
return p;
|
795
924
|
}
|
796
925
|
|
797
926
|
static int
|
@@ -805,11 +934,11 @@ is_just_true(struct node * q)
|
|
805
934
|
static struct node * read_among(struct analyser * a) {
|
806
935
|
struct tokeniser * t = a->tokeniser;
|
807
936
|
struct node * p = new_node(a, c_among);
|
808
|
-
struct node * p_end =
|
937
|
+
struct node * p_end = NULL;
|
809
938
|
int previous_token = -1;
|
810
939
|
struct node * substring = a->substring;
|
811
940
|
|
812
|
-
a->substring =
|
941
|
+
a->substring = NULL;
|
813
942
|
p->number = 0; /* counts the number of literals */
|
814
943
|
if (!get_token(a, c_bra)) return p;
|
815
944
|
while (true) {
|
@@ -822,8 +951,9 @@ static struct node * read_among(struct analyser * a) {
|
|
822
951
|
struct node * r = new_node(a, c_name);
|
823
952
|
name_to_node(a, r, 'r');
|
824
953
|
q->left = r;
|
954
|
+
} else {
|
955
|
+
hold_token(t);
|
825
956
|
}
|
826
|
-
else t->token_held = true;
|
827
957
|
p->number++; break;
|
828
958
|
case c_bra:
|
829
959
|
if (previous_token == c_bra) error(a, e_adjacent_bracketed_in_among);
|
@@ -832,7 +962,7 @@ static struct node * read_among(struct analyser * a) {
|
|
832
962
|
/* Convert anything equivalent to () to () so we handle it
|
833
963
|
* the same way.
|
834
964
|
*/
|
835
|
-
q->left =
|
965
|
+
q->left = NULL;
|
836
966
|
}
|
837
967
|
break;
|
838
968
|
default:
|
@@ -841,19 +971,18 @@ static struct node * read_among(struct analyser * a) {
|
|
841
971
|
continue;
|
842
972
|
case c_ket:
|
843
973
|
if (p->number == 0) error(a, e_empty_among);
|
844
|
-
if (t->error_count == 0) make_among(a, p, substring);
|
974
|
+
if (t->error_count == 0) p = make_among(a, p, substring);
|
845
975
|
return p;
|
846
976
|
}
|
847
977
|
previous_token = token;
|
848
|
-
if (p_end ==
|
978
|
+
if (p_end == NULL) p->left = q; else p_end->right = q;
|
849
979
|
p_end = q;
|
850
980
|
}
|
851
981
|
}
|
852
982
|
|
853
983
|
static struct node * read_substring(struct analyser * a) {
|
854
|
-
|
855
984
|
struct node * p = new_node(a, c_substring);
|
856
|
-
if (a->substring !=
|
985
|
+
if (a->substring != NULL) error2(a, e_substring_preceded_by_substring, a->substring->line_number);
|
857
986
|
a->substring = p;
|
858
987
|
return p;
|
859
988
|
}
|
@@ -863,6 +992,10 @@ static void check_modifyable(struct analyser * a) {
|
|
863
992
|
}
|
864
993
|
|
865
994
|
static int ae_uses_name(struct node * p, struct name * q) {
|
995
|
+
if (!p) {
|
996
|
+
// AE is NULL after a syntax error, e.g. `$x = $y`
|
997
|
+
return 0;
|
998
|
+
}
|
866
999
|
switch (p->type) {
|
867
1000
|
case c_name:
|
868
1001
|
case c_lenof:
|
@@ -925,13 +1058,88 @@ static struct node * read_C(struct analyser * a) {
|
|
925
1058
|
case c_fail:
|
926
1059
|
case c_test:
|
927
1060
|
case c_do:
|
928
|
-
case c_goto:
|
929
|
-
case c_gopast:
|
930
1061
|
case c_repeat:
|
931
1062
|
return C_style(a, "C", token);
|
932
|
-
case
|
933
|
-
case
|
934
|
-
|
1063
|
+
case c_goto:
|
1064
|
+
case c_gopast: {
|
1065
|
+
struct node * subcommand = read_C(a);
|
1066
|
+
if (subcommand->type == c_grouping || subcommand->type == c_non) {
|
1067
|
+
/* We synthesise special commands for "goto" or "gopast" when
|
1068
|
+
* used on a grouping or an inverted grouping - the movement of
|
1069
|
+
* c by the matching action is exactly what we want!
|
1070
|
+
*
|
1071
|
+
* Adding the tokens happens to give unique values (the code
|
1072
|
+
* would fail to compile if it didn't!)
|
1073
|
+
*/
|
1074
|
+
switch (token + subcommand->type) {
|
1075
|
+
case c_goto + c_grouping:
|
1076
|
+
subcommand->type = c_goto_grouping;
|
1077
|
+
break;
|
1078
|
+
case c_gopast + c_grouping:
|
1079
|
+
subcommand->type = c_gopast_grouping;
|
1080
|
+
break;
|
1081
|
+
case c_goto + c_non:
|
1082
|
+
subcommand->type = c_goto_non;
|
1083
|
+
break;
|
1084
|
+
case c_gopast + c_non:
|
1085
|
+
subcommand->type = c_gopast_non;
|
1086
|
+
break;
|
1087
|
+
default:
|
1088
|
+
fprintf(stderr, "Unexpected go/grouping combination: %s %s",
|
1089
|
+
name_of_token(token),
|
1090
|
+
name_of_token(subcommand->type));
|
1091
|
+
exit(1);
|
1092
|
+
}
|
1093
|
+
return subcommand;
|
1094
|
+
}
|
1095
|
+
|
1096
|
+
struct node * p = new_node(a, token);
|
1097
|
+
p->left = subcommand;
|
1098
|
+
return p;
|
1099
|
+
}
|
1100
|
+
case c_loop: {
|
1101
|
+
struct node * n = C_style(a, "AC", token);
|
1102
|
+
// n->AE is NULL after a syntax error, e.g. `loop next`.
|
1103
|
+
if (n->AE && n->AE->type == c_number) {
|
1104
|
+
if (n->AE->number <= 0) {
|
1105
|
+
// `loop N C`, where N <= 0 is a no-op.
|
1106
|
+
if (n->AE->fixed_constant) {
|
1107
|
+
fprintf(stderr,
|
1108
|
+
"%s:%d: warning: loop %d C is a no-op\n",
|
1109
|
+
t->file, n->AE->line_number, n->AE->number);
|
1110
|
+
}
|
1111
|
+
n->AE = NULL;
|
1112
|
+
n->left = NULL;
|
1113
|
+
n->type = c_true;
|
1114
|
+
} else if (n->AE->number == 1) {
|
1115
|
+
// `loop 1 C` -> `C`.
|
1116
|
+
if (n->AE->fixed_constant) {
|
1117
|
+
fprintf(stderr,
|
1118
|
+
"%s:%d: warning: loop 1 C is just C\n",
|
1119
|
+
t->file, n->AE->line_number);
|
1120
|
+
}
|
1121
|
+
n = n->left;
|
1122
|
+
}
|
1123
|
+
}
|
1124
|
+
return n;
|
1125
|
+
}
|
1126
|
+
case c_atleast: {
|
1127
|
+
struct node * n = C_style(a, "AC", token);
|
1128
|
+
// n->AE is NULL after a syntax error, e.g. `loop next`.
|
1129
|
+
if (n->AE && n->AE->type == c_number) {
|
1130
|
+
if (n->AE->number <= 0) {
|
1131
|
+
// `atleast N C` where N <= 0 -> `repeat C`.
|
1132
|
+
if (n->AE->fixed_constant) {
|
1133
|
+
fprintf(stderr,
|
1134
|
+
"%s:%d: warning: atleast %d C is just repeat C\n",
|
1135
|
+
t->file, n->AE->line_number, n->AE->number);
|
1136
|
+
}
|
1137
|
+
n->AE = NULL;
|
1138
|
+
n->type = c_repeat;
|
1139
|
+
}
|
1140
|
+
}
|
1141
|
+
return n;
|
1142
|
+
}
|
935
1143
|
case c_setmark: {
|
936
1144
|
struct node * n = C_style(a, "i", token);
|
937
1145
|
if (n->name) n->name->initialised = true;
|
@@ -942,24 +1150,28 @@ static struct node * read_C(struct analyser * a) {
|
|
942
1150
|
return C_style(a, "A", token);
|
943
1151
|
case c_hop: {
|
944
1152
|
struct node * n = C_style(a, "A", token);
|
945
|
-
|
946
|
-
|
1153
|
+
// n->AE is NULL after a syntax error, e.g. `hop hop`.
|
1154
|
+
if (n->AE && n->AE->type == c_number) {
|
1155
|
+
if (n->AE->number == 1) {
|
1156
|
+
// Convert `hop 1` to `next`.
|
1157
|
+
n->AE = NULL;
|
1158
|
+
n->type = c_next;
|
1159
|
+
} else if (n->AE->number == 0) {
|
1160
|
+
if (n->AE->fixed_constant) {
|
1161
|
+
fprintf(stderr,
|
1162
|
+
"%s:%d: warning: hop 0 is a no-op\n",
|
1163
|
+
t->file, n->AE->line_number);
|
1164
|
+
}
|
1165
|
+
n->AE = NULL;
|
1166
|
+
n->type = c_true;
|
1167
|
+
} else if (n->AE->number < 0) {
|
947
1168
|
fprintf(stderr,
|
948
1169
|
"%s:%d: warning: hop %d now signals f (as was "
|
949
1170
|
"always documented) rather than moving the cursor "
|
950
1171
|
"in the opposite direction\n",
|
951
|
-
|
952
|
-
n->AE->line_number,
|
953
|
-
n->AE->number);
|
1172
|
+
t->file, n->AE->line_number, n->AE->number);
|
954
1173
|
n->AE = NULL;
|
955
1174
|
n->type = c_false;
|
956
|
-
} else if (n->AE->number == 0) {
|
957
|
-
fprintf(stderr,
|
958
|
-
"%s:%d: warning: hop 0 is a no-op\n",
|
959
|
-
a->tokeniser->file,
|
960
|
-
n->AE->line_number);
|
961
|
-
n->AE = NULL;
|
962
|
-
n->type = c_true;
|
963
1175
|
}
|
964
1176
|
}
|
965
1177
|
return n;
|
@@ -978,10 +1190,16 @@ static struct node * read_C(struct analyser * a) {
|
|
978
1190
|
return new_node(a, token);
|
979
1191
|
case c_assignto:
|
980
1192
|
case c_sliceto: {
|
981
|
-
struct node *n;
|
982
1193
|
check_modifyable(a);
|
983
|
-
n = C_style(a, "s", token);
|
1194
|
+
struct node *n = C_style(a, "s", token);
|
984
1195
|
if (n->name) n->name->initialised = true;
|
1196
|
+
if (token == c_assignto) {
|
1197
|
+
fprintf(stderr,
|
1198
|
+
"%s:%d: warning: Use of `=>` is not recommended, "
|
1199
|
+
"see https://snowballstem.org/compiler/snowman.html "
|
1200
|
+
"section 13.3 for details\n",
|
1201
|
+
t->file, n->line_number);
|
1202
|
+
}
|
985
1203
|
return n;
|
986
1204
|
}
|
987
1205
|
case c_assign:
|
@@ -1003,29 +1221,28 @@ static struct node * read_C(struct analyser * a) {
|
|
1003
1221
|
return n;
|
1004
1222
|
}
|
1005
1223
|
case c_dollar: {
|
1006
|
-
struct tokeniser * t = a->tokeniser;
|
1007
1224
|
read_token(t);
|
1008
1225
|
if (t->token == c_bra) {
|
1009
1226
|
/* Handle newer $(AE REL_OP AE) syntax. */
|
1010
|
-
struct node * n = read_AE(a,
|
1227
|
+
struct node * n = read_AE(a, NULL, 0);
|
1011
1228
|
read_token(t);
|
1012
|
-
|
1229
|
+
token = t->token;
|
1013
1230
|
switch (token) {
|
1014
1231
|
case c_assign:
|
1015
1232
|
count_error(a);
|
1016
1233
|
fprintf(stderr, "%s:%d: Expected relational operator (did you mean '=='?)\n",
|
1017
|
-
|
1234
|
+
t->file, t->line_number);
|
1018
1235
|
/* Assume it was == to try to avoid an error avalanche. */
|
1019
1236
|
token = c_eq;
|
1020
1237
|
/* FALLTHRU */
|
1021
1238
|
case c_eq:
|
1022
1239
|
case c_ne:
|
1023
|
-
case
|
1240
|
+
case c_gt:
|
1024
1241
|
case c_ge:
|
1025
|
-
case
|
1242
|
+
case c_lt:
|
1026
1243
|
case c_le: {
|
1027
1244
|
struct node * lhs = n;
|
1028
|
-
struct node * rhs = read_AE(a,
|
1245
|
+
struct node * rhs = read_AE(a, NULL, 0);
|
1029
1246
|
if (lhs->type == c_number && rhs->type == c_number) {
|
1030
1247
|
// Evaluate constant numeric test expression.
|
1031
1248
|
int result;
|
@@ -1036,13 +1253,13 @@ static struct node * read_C(struct analyser * a) {
|
|
1036
1253
|
case c_ne:
|
1037
1254
|
result = (lhs->number != rhs->number);
|
1038
1255
|
break;
|
1039
|
-
case
|
1256
|
+
case c_gt:
|
1040
1257
|
result = (lhs->number > rhs->number);
|
1041
1258
|
break;
|
1042
1259
|
case c_ge:
|
1043
1260
|
result = (lhs->number >= rhs->number);
|
1044
1261
|
break;
|
1045
|
-
case
|
1262
|
+
case c_lt:
|
1046
1263
|
result = (lhs->number < rhs->number);
|
1047
1264
|
break;
|
1048
1265
|
case c_le:
|
@@ -1064,7 +1281,7 @@ static struct node * read_C(struct analyser * a) {
|
|
1064
1281
|
}
|
1065
1282
|
default:
|
1066
1283
|
error(a, e_unexpected_token);
|
1067
|
-
t
|
1284
|
+
hold_token(t);
|
1068
1285
|
break;
|
1069
1286
|
}
|
1070
1287
|
return n;
|
@@ -1104,9 +1321,9 @@ static struct node * read_C(struct analyser * a) {
|
|
1104
1321
|
switch (p->type) {
|
1105
1322
|
case c_eq:
|
1106
1323
|
case c_ne:
|
1107
|
-
case
|
1324
|
+
case c_gt:
|
1108
1325
|
case c_ge:
|
1109
|
-
case
|
1326
|
+
case c_lt:
|
1110
1327
|
case c_le:
|
1111
1328
|
p->left = new_node(a, c_name);
|
1112
1329
|
p->left->name = q;
|
@@ -1136,7 +1353,7 @@ static struct node * read_C(struct analyser * a) {
|
|
1136
1353
|
}
|
1137
1354
|
|
1138
1355
|
error(a, e_unexpected_token);
|
1139
|
-
t
|
1356
|
+
hold_token(t);
|
1140
1357
|
return new_node(a, c_dollar);
|
1141
1358
|
}
|
1142
1359
|
case c_name:
|
@@ -1181,7 +1398,7 @@ static struct node * read_C(struct analyser * a) {
|
|
1181
1398
|
return read_literalstring(a);
|
1182
1399
|
case c_among: return read_among(a);
|
1183
1400
|
case c_substring: return read_substring(a);
|
1184
|
-
default: error(a, e_unexpected_token); return
|
1401
|
+
default: error(a, e_unexpected_token); return NULL;
|
1185
1402
|
}
|
1186
1403
|
}
|
1187
1404
|
|
@@ -1189,28 +1406,30 @@ static int next_symbol(symbol * p, symbol * W, int utf8) {
|
|
1189
1406
|
if (utf8) {
|
1190
1407
|
int ch;
|
1191
1408
|
int j = get_utf8(p, & ch);
|
1192
|
-
W
|
1409
|
+
*W = ch;
|
1410
|
+
return j;
|
1193
1411
|
} else {
|
1194
|
-
W
|
1412
|
+
*W = *p;
|
1413
|
+
return 1;
|
1195
1414
|
}
|
1196
1415
|
}
|
1197
1416
|
|
1198
1417
|
static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
|
1199
1418
|
int j = 0;
|
1200
|
-
symbol W
|
1419
|
+
symbol W;
|
1201
1420
|
int width;
|
1202
1421
|
if (style == c_plus) {
|
1203
1422
|
while (j < SIZE(q)) {
|
1204
|
-
width = next_symbol(q + j, W, utf8);
|
1205
|
-
p =
|
1423
|
+
width = next_symbol(q + j, &W, utf8);
|
1424
|
+
p = add_symbol_to_b(p, W);
|
1206
1425
|
j += width;
|
1207
1426
|
}
|
1208
1427
|
} else {
|
1209
1428
|
while (j < SIZE(q)) {
|
1210
1429
|
int i;
|
1211
|
-
width = next_symbol(q + j, W, utf8);
|
1430
|
+
width = next_symbol(q + j, &W, utf8);
|
1212
1431
|
for (i = 0; i < SIZE(p); i++) {
|
1213
|
-
if (p[i] == W
|
1432
|
+
if (p[i] == W) {
|
1214
1433
|
memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
|
1215
1434
|
SIZE(p)--;
|
1216
1435
|
}
|
@@ -1226,25 +1445,42 @@ static void read_define_grouping(struct analyser * a, struct name * q) {
|
|
1226
1445
|
int style = c_plus;
|
1227
1446
|
{
|
1228
1447
|
NEW(grouping, p);
|
1229
|
-
if (a->groupings ==
|
1448
|
+
if (a->groupings == NULL) a->groupings = p; else a->groupings_end->next = p;
|
1230
1449
|
a->groupings_end = p;
|
1231
|
-
if (q)
|
1232
|
-
|
1450
|
+
if (q) {
|
1451
|
+
if (q->grouping != NULL) {
|
1452
|
+
error(a, e_redefined);
|
1453
|
+
FREE(q->grouping);
|
1454
|
+
}
|
1455
|
+
q->grouping = p;
|
1456
|
+
}
|
1457
|
+
p->next = NULL;
|
1233
1458
|
p->name = q;
|
1234
|
-
p->line_number =
|
1459
|
+
p->line_number = t->line_number;
|
1235
1460
|
p->b = create_b(0);
|
1236
1461
|
while (true) {
|
1237
1462
|
switch (read_token(t)) {
|
1238
|
-
case c_name:
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1463
|
+
case c_name: {
|
1464
|
+
struct name * r = find_name(a);
|
1465
|
+
if (!r) break;
|
1466
|
+
|
1467
|
+
check_name_type(a, r, 'g');
|
1468
|
+
if (r == q) {
|
1469
|
+
count_error(a);
|
1470
|
+
r->s[SIZE(r->s)] = 0;
|
1471
|
+
fprintf(stderr, "%s:%d: %s defined in terms of itself\n",
|
1472
|
+
t->file, t->line_number, r->s);
|
1473
|
+
} else if (!r->grouping) {
|
1474
|
+
count_error(a);
|
1475
|
+
r->s[SIZE(r->s)] = 0;
|
1476
|
+
fprintf(stderr, "%s:%d: %s undefined\n",
|
1477
|
+
t->file, t->line_number, r->s);
|
1478
|
+
} else {
|
1479
|
+
p->b = alter_grouping(p->b, r->grouping->b, style, false);
|
1246
1480
|
}
|
1481
|
+
r->used_in_definition = true;
|
1247
1482
|
break;
|
1483
|
+
}
|
1248
1484
|
case c_literalstring:
|
1249
1485
|
p->b = alter_grouping(p->b, t->b, style, (a->encoding == ENC_UTF8));
|
1250
1486
|
break;
|
@@ -1269,7 +1505,7 @@ static void read_define_grouping(struct analyser * a, struct name * q) {
|
|
1269
1505
|
p->smallest_ch = min;
|
1270
1506
|
if (min == 1<<16) error(a, e_empty_grouping);
|
1271
1507
|
}
|
1272
|
-
t
|
1508
|
+
hold_token(t);
|
1273
1509
|
}
|
1274
1510
|
}
|
1275
1511
|
|
@@ -1278,20 +1514,27 @@ static void read_define_routine(struct analyser * a, struct name * q) {
|
|
1278
1514
|
a->amongvar_needed = false;
|
1279
1515
|
if (q) {
|
1280
1516
|
check_name_type(a, q, 'R');
|
1281
|
-
if (q->definition !=
|
1282
|
-
if (q->mode
|
1517
|
+
if (q->definition != NULL) error(a, e_redefined);
|
1518
|
+
if (q->mode == m_unknown) q->mode = a->mode; else
|
1283
1519
|
if (q->mode != a->mode) error2(a, e_declared_as_different_mode, q->mode);
|
1284
1520
|
}
|
1285
1521
|
p->name = q;
|
1286
|
-
if (a->program ==
|
1522
|
+
if (a->program == NULL) a->program = p; else a->program_end->right = p;
|
1287
1523
|
a->program_end = p;
|
1288
1524
|
get_token(a, c_as);
|
1289
1525
|
p->left = read_C(a);
|
1290
1526
|
if (q) q->definition = p->left;
|
1291
|
-
|
1292
|
-
|
1527
|
+
/* We should get a node with a NULL right pointer from read_C() for the
|
1528
|
+
* routine's code. We synthesise a "functionend" node there so
|
1529
|
+
* optimisations such as dead code elimination and tail call optimisation
|
1530
|
+
* can easily see where the function ends.
|
1531
|
+
*/
|
1532
|
+
assert(p->left->right == NULL);
|
1533
|
+
p->left->right = new_node(a, c_functionend);
|
1534
|
+
|
1535
|
+
if (a->substring != NULL) {
|
1293
1536
|
error2(a, e_unresolved_substring, a->substring->line_number);
|
1294
|
-
a->substring =
|
1537
|
+
a->substring = NULL;
|
1295
1538
|
}
|
1296
1539
|
p->amongvar_needed = a->amongvar_needed;
|
1297
1540
|
}
|
@@ -1303,15 +1546,19 @@ static void read_define(struct analyser * a) {
|
|
1303
1546
|
if (q) {
|
1304
1547
|
type = q->type;
|
1305
1548
|
} else {
|
1306
|
-
/* No declaration
|
1307
|
-
* as a
|
1549
|
+
/* No declaration so sniff next token - if it is a string or name
|
1550
|
+
* we parse as a grouping, otherwise we parse as a routine. This
|
1551
|
+
* avoids an avalanche of further errors if `as` is missing from a
|
1552
|
+
* routine definition.
|
1308
1553
|
*/
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1554
|
+
switch (peek_token(a->tokeniser)) {
|
1555
|
+
case c_literalstring:
|
1556
|
+
case c_name:
|
1557
|
+
type = t_grouping;
|
1558
|
+
break;
|
1559
|
+
default:
|
1560
|
+
type = t_routine;
|
1313
1561
|
}
|
1314
|
-
a->tokeniser->token_held = true;
|
1315
1562
|
}
|
1316
1563
|
|
1317
1564
|
if (type == t_grouping) {
|
@@ -1396,10 +1643,10 @@ extern void read_program(struct analyser * a) {
|
|
1396
1643
|
while (q) {
|
1397
1644
|
switch (q->type) {
|
1398
1645
|
case t_external: case t_routine:
|
1399
|
-
if (q->used && q->definition ==
|
1646
|
+
if (q->used && q->definition == NULL) error4(a, q);
|
1400
1647
|
break;
|
1401
1648
|
case t_grouping:
|
1402
|
-
if (q->used && q->grouping ==
|
1649
|
+
if (q->used && q->grouping == NULL) error4(a, q);
|
1403
1650
|
break;
|
1404
1651
|
}
|
1405
1652
|
q = q->next;
|
@@ -1411,21 +1658,22 @@ extern void read_program(struct analyser * a) {
|
|
1411
1658
|
struct name ** ptr = &(a->names);
|
1412
1659
|
while (q) {
|
1413
1660
|
if (!q->referenced) {
|
1414
|
-
|
1661
|
+
q->s[SIZE(q->s)] = 0;
|
1662
|
+
fprintf(stderr, "%s:%d: warning: %s '%s' ",
|
1415
1663
|
a->tokeniser->file,
|
1416
1664
|
q->declaration_line_number,
|
1417
|
-
name_of_name_type(q->type)
|
1418
|
-
|
1665
|
+
name_of_name_type(q->type),
|
1666
|
+
q->s);
|
1419
1667
|
if (q->type == t_routine ||
|
1420
1668
|
q->type == t_external ||
|
1421
1669
|
q->type == t_grouping) {
|
1422
|
-
fprintf(stderr, "
|
1670
|
+
fprintf(stderr, "declared but not defined\n");
|
1423
1671
|
} else {
|
1424
|
-
fprintf(stderr, "
|
1425
|
-
q = q->next;
|
1426
|
-
*ptr = q;
|
1427
|
-
continue;
|
1672
|
+
fprintf(stderr, "defined but not used\n");
|
1428
1673
|
}
|
1674
|
+
q = q->next;
|
1675
|
+
*ptr = q;
|
1676
|
+
continue;
|
1429
1677
|
} else if (q->type == t_routine || q->type == t_grouping) {
|
1430
1678
|
/* It's OK to define a grouping but only use it to define other
|
1431
1679
|
* groupings.
|
@@ -1437,29 +1685,32 @@ extern void read_program(struct analyser * a) {
|
|
1437
1685
|
} else {
|
1438
1686
|
line_num = q->grouping->line_number;
|
1439
1687
|
}
|
1440
|
-
|
1688
|
+
q->s[SIZE(q->s)] = 0;
|
1689
|
+
fprintf(stderr, "%s:%d: warning: %s '%s' defined but not used\n",
|
1441
1690
|
a->tokeniser->file,
|
1442
1691
|
line_num,
|
1443
|
-
name_of_name_type(q->type)
|
1444
|
-
|
1445
|
-
|
1692
|
+
name_of_name_type(q->type),
|
1693
|
+
q->s);
|
1694
|
+
q = q->next;
|
1695
|
+
*ptr = q;
|
1696
|
+
continue;
|
1446
1697
|
}
|
1447
1698
|
} else if (q->type == t_external) {
|
1448
1699
|
/* Unused is OK. */
|
1449
1700
|
} else if (!q->initialised) {
|
1450
|
-
|
1701
|
+
q->s[SIZE(q->s)] = 0;
|
1702
|
+
fprintf(stderr, "%s:%d: warning: %s '%s' is never initialised\n",
|
1451
1703
|
a->tokeniser->file,
|
1452
1704
|
q->declaration_line_number,
|
1453
|
-
name_of_name_type(q->type)
|
1454
|
-
|
1455
|
-
fprintf(stderr, "' is never initialised\n");
|
1705
|
+
name_of_name_type(q->type),
|
1706
|
+
q->s);
|
1456
1707
|
} else if (!q->value_used) {
|
1457
|
-
|
1708
|
+
q->s[SIZE(q->s)] = 0;
|
1709
|
+
fprintf(stderr, "%s:%d: warning: %s '%s' is set but never used\n",
|
1458
1710
|
a->tokeniser->file,
|
1459
1711
|
q->declaration_line_number,
|
1460
|
-
name_of_name_type(q->type)
|
1461
|
-
|
1462
|
-
fprintf(stderr, "' is set but never used\n");
|
1712
|
+
name_of_name_type(q->type),
|
1713
|
+
q->s);
|
1463
1714
|
remove_dead_assignments(a->program, q);
|
1464
1715
|
q = q->next;
|
1465
1716
|
*ptr = q;
|
@@ -1485,17 +1736,18 @@ extern void read_program(struct analyser * a) {
|
|
1485
1736
|
extern struct analyser * create_analyser(struct tokeniser * t) {
|
1486
1737
|
NEW(analyser, a);
|
1487
1738
|
a->tokeniser = t;
|
1488
|
-
a->nodes =
|
1489
|
-
a->names =
|
1490
|
-
a->literalstrings =
|
1491
|
-
a->program =
|
1492
|
-
a->amongs =
|
1739
|
+
a->nodes = NULL;
|
1740
|
+
a->names = NULL;
|
1741
|
+
a->literalstrings = NULL;
|
1742
|
+
a->program = NULL;
|
1743
|
+
a->amongs = NULL;
|
1493
1744
|
a->among_count = 0;
|
1494
|
-
a->
|
1745
|
+
a->among_with_function_count = 0;
|
1746
|
+
a->groupings = NULL;
|
1495
1747
|
a->mode = m_forward;
|
1496
1748
|
a->modifyable = true;
|
1497
1749
|
{ int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
|
1498
|
-
a->substring =
|
1750
|
+
a->substring = NULL;
|
1499
1751
|
a->int_limits_used = false;
|
1500
1752
|
return a;
|
1501
1753
|
}
|
@@ -1513,7 +1765,8 @@ extern void close_analyser(struct analyser * a) {
|
|
1513
1765
|
struct name * q = a->names;
|
1514
1766
|
while (q) {
|
1515
1767
|
struct name * q_next = q->next;
|
1516
|
-
|
1768
|
+
lose_s(q->s);
|
1769
|
+
FREE(q);
|
1517
1770
|
q = q_next;
|
1518
1771
|
}
|
1519
1772
|
}
|