mittens 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,1547 @@
1
+
2
+ #include <stdio.h> /* printf etc */
3
+ #include <stdlib.h> /* exit */
4
+ #include <string.h> /* memmove */
5
+ #include "header.h"
6
+
7
+ typedef enum {
8
+ e_token_omitted = 0,
9
+ e_unexpected_token = 1,
10
+ e_string_omitted = 2,
11
+ e_unexpected_token_in_among = 3,
12
+ /* For codes above here, report "after " t->previous_token after the error. */
13
+ e_unresolved_substring = 14,
14
+ e_not_allowed_inside_reverse = 15,
15
+ e_empty_grouping = 16,
16
+ e_already_backwards = 17,
17
+ e_empty_among = 18,
18
+ e_adjacent_bracketed_in_among = 19,
19
+ e_substring_preceded_by_substring = 20,
20
+ /* For codes below here, tokeniser->b is printed before the error. */
21
+ e_redeclared = 30,
22
+ e_undeclared = 31,
23
+ e_declared_as_different_mode = 32,
24
+ e_not_of_type_x = 33,
25
+ e_not_of_type_string_or_integer = 34,
26
+ e_misplaced = 35,
27
+ e_redefined = 36,
28
+ e_misused = 37
29
+ } error_code;
30
+
31
+ /* recursive usage: */
32
+
33
+ static void read_program_(struct analyser * a, int terminator);
34
+ static struct node * read_C(struct analyser * a);
35
+ static struct node * C_style(struct analyser * a, const char * s, int token);
36
+
37
+
38
+ static void print_node_(struct node * p, int n, const char * s) {
39
+
40
+ int i;
41
+ for (i = 0; i < n; i++) fputs(i == n - 1 ? s : " ", stdout);
42
+ printf("%s ", name_of_token(p->type));
43
+ if (p->name) report_b(stdout, p->name->b);
44
+ if (p->literalstring) {
45
+ printf("'");
46
+ report_b(stdout, p->literalstring);
47
+ printf("'");
48
+ } else if (p->type == c_number) {
49
+ printf("%d", p->number);
50
+ }
51
+ printf("\n");
52
+ if (p->AE) print_node_(p->AE, n+1, "# ");
53
+ if (p->left) print_node_(p->left, n+1, " ");
54
+ if (p->aux) print_node_(p->aux, n+1, "@ ");
55
+ if (p->right) print_node_(p->right, n, " ");
56
+ }
57
+
58
+ extern void print_program(struct analyser * a) {
59
+ print_node_(a->program, 0, " ");
60
+ }
61
+
62
+ static struct node * new_node(struct analyser * a, int type) {
63
+ NEW(node, p);
64
+ p->next = a->nodes; a->nodes = p;
65
+ p->left = 0;
66
+ p->right = 0;
67
+ p->aux = 0;
68
+ p->AE = 0;
69
+ p->name = 0;
70
+ p->literalstring = 0;
71
+ p->mode = a->mode;
72
+ p->line_number = a->tokeniser->line_number;
73
+ p->type = type;
74
+ return p;
75
+ }
76
+
77
+ static const char * name_of_mode(int n) {
78
+ switch (n) {
79
+ case m_backward: return "string backward";
80
+ case m_forward: return "string forward";
81
+ /* case m_integer: return "integer"; */
82
+ }
83
+ fprintf(stderr, "Invalid mode %d in name_of_mode()\n", n);
84
+ exit(1);
85
+ }
86
+
87
+ static const char * name_of_type(int n) {
88
+ switch (n) {
89
+ case 's': return "string";
90
+ case 'i': return "integer";
91
+ case 'r': return "routine";
92
+ case 'R': return "routine or grouping";
93
+ case 'g': return "grouping";
94
+ }
95
+ fprintf(stderr, "Invalid type %d in name_of_type()\n", n);
96
+ exit(1);
97
+ }
98
+
99
+ static const char * name_of_name_type(int code) {
100
+ switch (code) {
101
+ case t_string: return "string";
102
+ case t_boolean: return "boolean";
103
+ case t_integer: return "integer";
104
+ case t_routine: return "routine";
105
+ case t_external: return "external";
106
+ case t_grouping: return "grouping";
107
+ }
108
+ fprintf(stderr, "Invalid type code %d in name_of_name_type()\n", code);
109
+ exit(1);
110
+ }
111
+
112
+ static void count_error(struct analyser * a) {
113
+ struct tokeniser * t = a->tokeniser;
114
+ if (t->error_count >= 20) { fprintf(stderr, "... etc\n"); exit(1); }
115
+ t->error_count++;
116
+ }
117
+
118
+ static void error2(struct analyser * a, error_code n, int x) {
119
+ struct tokeniser * t = a->tokeniser;
120
+ count_error(a);
121
+ fprintf(stderr, "%s:%d: ", t->file, t->line_number);
122
+ if ((int)n >= (int)e_redeclared) report_b(stderr, t->b);
123
+ switch (n) {
124
+ case e_token_omitted:
125
+ fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
126
+ case e_unexpected_token_in_among:
127
+ fprintf(stderr, "in among(...), ");
128
+ /* fall through */
129
+ case e_unexpected_token:
130
+ fprintf(stderr, "unexpected %s", name_of_token(t->token));
131
+ if (t->token == c_number) fprintf(stderr, " %d", t->number);
132
+ if (t->token == c_name) {
133
+ fprintf(stderr, " ");
134
+ report_b(stderr, t->b);
135
+ } break;
136
+ case e_string_omitted:
137
+ fprintf(stderr, "string omitted"); break;
138
+
139
+ case e_unresolved_substring:
140
+ fprintf(stderr, "unresolved substring on line %d", x); break;
141
+ case e_not_allowed_inside_reverse:
142
+ fprintf(stderr, "%s not allowed inside reverse(...)", name_of_token(t->token)); break;
143
+ case e_empty_grouping:
144
+ fprintf(stderr, "empty grouping"); break;
145
+ case e_already_backwards:
146
+ fprintf(stderr, "backwards used when already in this mode"); break;
147
+ case e_empty_among:
148
+ fprintf(stderr, "empty among(...)"); break;
149
+ case e_adjacent_bracketed_in_among:
150
+ fprintf(stderr, "two adjacent bracketed expressions in among(...)"); break;
151
+ case e_substring_preceded_by_substring:
152
+ fprintf(stderr, "substring preceded by another substring on line %d", x); break;
153
+
154
+ case e_redeclared:
155
+ fprintf(stderr, " re-declared"); break;
156
+ case e_undeclared:
157
+ fprintf(stderr, " undeclared"); break;
158
+ case e_declared_as_different_mode:
159
+ fprintf(stderr, " declared as %s mode; used as %s mode",
160
+ name_of_mode(a->mode), name_of_mode(x)); break;
161
+ case e_not_of_type_x:
162
+ fprintf(stderr, " not of type %s", name_of_type(x)); break;
163
+ case e_not_of_type_string_or_integer:
164
+ fprintf(stderr, " not of type string or integer"); break;
165
+ case e_misplaced:
166
+ fprintf(stderr, " misplaced"); break;
167
+ case e_redefined:
168
+ fprintf(stderr, " redefined"); break;
169
+ case e_misused:
170
+ fprintf(stderr, " mis-used as %s mode",
171
+ name_of_mode(x)); break;
172
+ }
173
+ if ((int)n < (int)e_unresolved_substring && t->previous_token > 0)
174
+ fprintf(stderr, " after %s", name_of_token(t->previous_token));
175
+ fprintf(stderr, "\n");
176
+ }
177
+
178
+ static void error(struct analyser * a, error_code n) { error2(a, n, 0); }
179
+
180
+ static void error4(struct analyser * a, struct name * q) {
181
+ count_error(a);
182
+ fprintf(stderr, "%s:%d: ", a->tokeniser->file, q->used->line_number);
183
+ report_b(stderr, q->b);
184
+ fprintf(stderr, " undefined\n");
185
+ }
186
+
187
+ static void omission_error(struct analyser * a, int n) {
188
+ a->tokeniser->omission = n;
189
+ error(a, e_token_omitted);
190
+ }
191
+
192
+ static int check_token(struct analyser * a, int code) {
193
+ struct tokeniser * t = a->tokeniser;
194
+ if (t->token != code) { omission_error(a, code); return false; }
195
+ return true;
196
+ }
197
+
198
+ static int get_token(struct analyser * a, int code) {
199
+ struct tokeniser * t = a->tokeniser;
200
+ read_token(t);
201
+ {
202
+ int x = check_token(a, code);
203
+ if (!x) t->token_held = true;
204
+ return x;
205
+ }
206
+ }
207
+
208
+ static struct name * look_for_name(struct analyser * a) {
209
+ symbol * q = a->tokeniser->b;
210
+ struct name * p;
211
+ for (p = a->names; p; p = p->next) {
212
+ symbol * b = p->b;
213
+ int n = SIZE(b);
214
+ if (n == SIZE(q) && memcmp(q, b, n * sizeof(symbol)) == 0) {
215
+ p->referenced = true;
216
+ return p;
217
+ }
218
+ }
219
+ return 0;
220
+ }
221
+
222
+ static struct name * find_name(struct analyser * a) {
223
+ struct name * p = look_for_name(a);
224
+ if (p == 0) error(a, e_undeclared);
225
+ return p;
226
+ }
227
+
228
+ static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
229
+ if (p->mode < 0) p->mode = mode; else
230
+ if (p->mode != mode) error2(a, e_misused, mode);
231
+ }
232
+
233
+ static void check_name_type(struct analyser * a, struct name * p, int type) {
234
+ switch (type) {
235
+ case 's':
236
+ if (p->type == t_string) return;
237
+ break;
238
+ case 'i':
239
+ if (p->type == t_integer) return;
240
+ break;
241
+ case 'b':
242
+ if (p->type == t_boolean) return;
243
+ break;
244
+ case 'R':
245
+ if (p->type == t_grouping) return;
246
+ /* FALLTHRU */
247
+ case 'r':
248
+ if (p->type == t_routine || p->type == t_external) return;
249
+ break;
250
+ case 'g':
251
+ if (p->type == t_grouping) return;
252
+ break;
253
+ }
254
+ error2(a, e_not_of_type_x, type);
255
+ }
256
+
257
+ static void read_names(struct analyser * a, int type) {
258
+ struct tokeniser * t = a->tokeniser;
259
+ if (!get_token(a, c_bra)) return;
260
+ while (true) {
261
+ int token = read_token(t);
262
+ switch (token) {
263
+ case c_len: {
264
+ /* Context-sensitive token - once declared as a name, it loses
265
+ * its special meaning, for compatibility with older versions
266
+ * of snowball.
267
+ */
268
+ static const symbol c_len_lit[] = {
269
+ 'l', 'e', 'n'
270
+ };
271
+ t->b = MOVE_TO_B(t->b, c_len_lit);
272
+ goto handle_as_name;
273
+ }
274
+ case c_lenof: {
275
+ /* Context-sensitive token - once declared as a name, it loses
276
+ * its special meaning, for compatibility with older versions
277
+ * of snowball.
278
+ */
279
+ static const symbol c_lenof_lit[] = {
280
+ 'l', 'e', 'n', 'o', 'f'
281
+ };
282
+ t->b = MOVE_TO_B(t->b, c_lenof_lit);
283
+ goto handle_as_name;
284
+ }
285
+ case c_name:
286
+ handle_as_name:
287
+ if (look_for_name(a) != 0) error(a, e_redeclared); else {
288
+ NEW(name, p);
289
+ p->b = copy_b(t->b);
290
+ p->type = type;
291
+ p->mode = -1; /* routines, externals */
292
+ /* We defer assigning counts until after we've eliminated
293
+ * variables whose values are never used. */
294
+ p->count = -1;
295
+ p->referenced = false;
296
+ p->used_in_among = false;
297
+ p->used = 0;
298
+ p->value_used = false;
299
+ p->initialised = false;
300
+ p->used_in_definition = false;
301
+ p->local_to = 0;
302
+ p->grouping = 0;
303
+ p->definition = 0;
304
+ p->declaration_line_number = t->line_number;
305
+ p->next = a->names;
306
+ a->names = p;
307
+ if (token != c_name) {
308
+ disable_token(t, token);
309
+ }
310
+ }
311
+ break;
312
+ default:
313
+ if (!check_token(a, c_ket)) t->token_held = true;
314
+ return;
315
+ }
316
+ }
317
+ }
318
+
319
+ static symbol * new_literalstring(struct analyser * a) {
320
+ NEW(literalstring, p);
321
+ p->b = copy_b(a->tokeniser->b);
322
+ p->next = a->literalstrings;
323
+ a->literalstrings = p;
324
+ return p->b;
325
+ }
326
+
327
+ static int read_AE_test(struct analyser * a) {
328
+
329
+ struct tokeniser * t = a->tokeniser;
330
+ switch (read_token(t)) {
331
+ case c_assign: return c_mathassign;
332
+ case c_plusassign:
333
+ case c_minusassign:
334
+ case c_multiplyassign:
335
+ case c_divideassign:
336
+ case c_eq:
337
+ case c_ne:
338
+ case c_gr:
339
+ case c_ge:
340
+ case c_ls:
341
+ case c_le: return t->token;
342
+ default: error(a, e_unexpected_token); t->token_held = true; return c_eq;
343
+ }
344
+ }
345
+
346
+ static int binding(int t) {
347
+ switch (t) {
348
+ case c_plus: case c_minus: return 1;
349
+ case c_multiply: case c_divide: return 2;
350
+ default: return -2;
351
+ }
352
+ }
353
+
354
+ static void mark_used_in(struct analyser * a, struct name * q, struct node * p) {
355
+ if (!q->used) {
356
+ q->used = p;
357
+ q->local_to = a->program_end->name;
358
+ } else if (q->local_to) {
359
+ if (q->local_to != a->program_end->name) {
360
+ /* Used in more than one routine/external. */
361
+ q->local_to = NULL;
362
+ }
363
+ }
364
+ }
365
+
366
+ static void name_to_node(struct analyser * a, struct node * p, int type) {
367
+ struct name * q = find_name(a);
368
+ if (q) {
369
+ check_name_type(a, q, type);
370
+ mark_used_in(a, q, p);
371
+ }
372
+ p->name = q;
373
+ }
374
+
375
+ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int B) {
376
+ struct tokeniser * t = a->tokeniser;
377
+ struct node * p;
378
+ struct node * q;
379
+ switch (read_token(t)) {
380
+ case c_minus: /* monadic */
381
+ q = read_AE(a, assigned_to, 100);
382
+ if (q->type == c_neg) {
383
+ /* Optimise away double negation, which avoids generators
384
+ * having to worry about generating "--" (decrement operator
385
+ * in many languages).
386
+ */
387
+ p = q->right;
388
+ /* Don't free q, it's in the linked list a->nodes. */
389
+ break;
390
+ }
391
+ if (q->type == c_number) {
392
+ /* Negated constant. */
393
+ q->number = -q->number;
394
+ p = q;
395
+ break;
396
+ }
397
+ p = new_node(a, c_neg);
398
+ p->right = q;
399
+ break;
400
+ case c_bra:
401
+ p = read_AE(a, assigned_to, 0);
402
+ get_token(a, c_ket);
403
+ break;
404
+ case c_name:
405
+ p = new_node(a, c_name);
406
+ name_to_node(a, p, 'i');
407
+ if (p->name) {
408
+ // $x = x + 1 shouldn't count as a use of x.
409
+ p->name->value_used = (p->name != assigned_to);
410
+ }
411
+ break;
412
+ case c_maxint:
413
+ case c_minint:
414
+ a->int_limits_used = true;
415
+ /* fall through */
416
+ case c_cursor:
417
+ case c_limit:
418
+ case c_len:
419
+ case c_size:
420
+ p = new_node(a, t->token);
421
+ break;
422
+ case c_number:
423
+ p = new_node(a, c_number);
424
+ p->number = t->number;
425
+ break;
426
+ case c_lenof:
427
+ case c_sizeof: {
428
+ int token = t->token;
429
+ p = C_style(a, "S", token);
430
+ if (!p->literalstring) break;
431
+
432
+ /* Replace lenof or sizeof on a literal string with a numeric
433
+ * constant.
434
+ */
435
+ int result;
436
+ if (token == c_lenof && t->encoding == ENC_UTF8) {
437
+ // UTF-8.
438
+ int i = 0;
439
+ symbol * b = p->literalstring;
440
+ result = 0;
441
+ while (i < SIZE(b)) {
442
+ int dummy;
443
+ i += get_utf8(b + i, &dummy);
444
+ ++result;
445
+ }
446
+ } else {
447
+ result = SIZE(p->literalstring);
448
+ }
449
+ p->type = c_number;
450
+ p->literalstring = NULL;
451
+ p->number = result;
452
+ break;
453
+ }
454
+ default:
455
+ error(a, e_unexpected_token);
456
+ t->token_held = true;
457
+ return 0;
458
+ }
459
+ while (true) {
460
+ int token = read_token(t);
461
+ int b = binding(token);
462
+ if (binding(token) <= B) {
463
+ t->token_held = true;
464
+ return p;
465
+ }
466
+ struct node * r = read_AE(a, assigned_to, b);
467
+ if (p->type == c_number && r->type == c_number) {
468
+ // Evaluate constant sub-expression.
469
+ q = new_node(a, c_number);
470
+ switch (token) {
471
+ case c_plus:
472
+ q->number = p->number + r->number;
473
+ break;
474
+ case c_minus:
475
+ q->number = p->number - r->number;
476
+ break;
477
+ case c_multiply:
478
+ q->number = p->number * r->number;
479
+ break;
480
+ case c_divide:
481
+ q->number = p->number / r->number;
482
+ break;
483
+ default:
484
+ fprintf(stderr, "Unexpected AE operator %s\n",
485
+ name_of_token(token));
486
+ exit(1);
487
+ }
488
+ } else {
489
+ q = new_node(a, token);
490
+ q->left = p;
491
+ q->right = r;
492
+ }
493
+ p = q;
494
+ }
495
+ }
496
+
497
+ static struct node * read_C_connection(struct analyser * a, struct node * q, int op) {
498
+ struct tokeniser * t = a->tokeniser;
499
+ struct node * p = new_node(a, op);
500
+ struct node * p_end = q;
501
+ p->left = q;
502
+ do {
503
+ q = read_C(a);
504
+ p_end->right = q; p_end = q;
505
+ } while (read_token(t) == op);
506
+ t->token_held = true;
507
+ return p;
508
+ }
509
+
510
+ static struct node * read_C_list(struct analyser * a) {
511
+ struct tokeniser * t = a->tokeniser;
512
+ struct node * p = new_node(a, c_bra);
513
+ struct node * p_end = 0;
514
+ while (true) {
515
+ int token = read_token(t);
516
+ if (token == c_ket) return p;
517
+ if (token < 0) { omission_error(a, c_ket); return p; }
518
+ t->token_held = true;
519
+ {
520
+ struct node * q = read_C(a);
521
+ while (true) {
522
+ token = read_token(t);
523
+ if (token != c_and && token != c_or) {
524
+ t->token_held = true;
525
+ break;
526
+ }
527
+ q = read_C_connection(a, q, token);
528
+ }
529
+ if (p_end == 0) p->left = q; else p_end->right = q;
530
+ p_end = q;
531
+ }
532
+ }
533
+ }
534
+
535
+ static struct node * C_style(struct analyser * a, const char * s, int token) {
536
+ int i;
537
+ struct node * p = new_node(a, token);
538
+ for (i = 0; s[i] != 0; i++) switch (s[i]) {
539
+ case 'C':
540
+ p->left = read_C(a); continue;
541
+ case 'D':
542
+ p->aux = read_C(a); continue;
543
+ case 'A':
544
+ p->AE = read_AE(a, 0, 0); continue;
545
+ case 'f':
546
+ get_token(a, c_for); continue;
547
+ case 'S':
548
+ {
549
+ int str_token = read_token(a->tokeniser);
550
+ if (str_token == c_name) name_to_node(a, p, 's'); else
551
+ if (str_token == c_literalstring) p->literalstring = new_literalstring(a);
552
+ else error(a, e_string_omitted);
553
+ }
554
+ continue;
555
+ case 'b':
556
+ case 's':
557
+ case 'i':
558
+ if (get_token(a, c_name)) name_to_node(a, p, s[i]);
559
+ continue;
560
+ }
561
+ return p;
562
+ }
563
+
564
+ static struct node * read_literalstring(struct analyser * a) {
565
+ struct node * p = new_node(a, c_literalstring);
566
+ p->literalstring = new_literalstring(a);
567
+ return p;
568
+ }
569
+
570
+ static void reverse_b(symbol * b) {
571
+ int i = 0; int j = SIZE(b) - 1;
572
+ while (i < j) {
573
+ int ch1 = b[i]; int ch2 = b[j];
574
+ b[i++] = ch2; b[j--] = ch1;
575
+ }
576
+ }
577
+
578
+ static int compare_amongvec(const void *pv, const void *qv) {
579
+ const struct amongvec * p = (const struct amongvec*)pv;
580
+ const struct amongvec * q = (const struct amongvec*)qv;
581
+ symbol * b_p = p->b; int p_size = p->size;
582
+ symbol * b_q = q->b; int q_size = q->size;
583
+ int smaller_size = p_size < q_size ? p_size : q_size;
584
+ int i;
585
+ for (i = 0; i < smaller_size; i++)
586
+ if (b_p[i] != b_q[i]) return b_p[i] - b_q[i];
587
+ if (p_size - q_size)
588
+ return p_size - q_size;
589
+ return p->line_number - q->line_number;
590
+ }
591
+
592
+ #define PTR_NULL_CHECK(P, Q) do {\
593
+ if ((Q) == NULL) {\
594
+ if ((P) != NULL) return 1;\
595
+ } else {\
596
+ if ((P) == NULL) return -1;\
597
+ }\
598
+ } while (0)
599
+
600
+ static int compare_node(const struct node *p, const struct node *q) {
601
+ PTR_NULL_CHECK(p, q);
602
+ if (q == NULL) {
603
+ /* p must be NULL too. */
604
+ return 0;
605
+ }
606
+
607
+ if (p->type != q->type) return p->type > q->type ? 1 : -1;
608
+ if (p->mode != q->mode) return p->mode > q->mode ? 1 : -1;
609
+ if (p->type == c_number) {
610
+ if (p->number != q->number)
611
+ return p->number > q->number ? 1 : -1;
612
+ }
613
+
614
+ PTR_NULL_CHECK(p->left, q->left);
615
+ if (p->left) {
616
+ int r = compare_node(p->left, q->left);
617
+ if (r != 0) return r;
618
+ }
619
+
620
+ PTR_NULL_CHECK(p->AE, q->AE);
621
+ if (p->AE) {
622
+ int r = compare_node(p->AE, q->AE);
623
+ if (r != 0) return r;
624
+ }
625
+
626
+ PTR_NULL_CHECK(p->aux, q->aux);
627
+ if (p->aux) {
628
+ int r = compare_node(p->aux, q->aux);
629
+ if (r != 0) return r;
630
+ }
631
+
632
+ PTR_NULL_CHECK(p->name, q->name);
633
+ if (p->name) {
634
+ int r;
635
+ if (SIZE(p->name->b) != SIZE(q->name->b)) {
636
+ return SIZE(p->name->b) - SIZE(q->name->b);
637
+ }
638
+ r = memcmp(p->name->b, q->name->b,
639
+ SIZE(p->name->b) * sizeof(symbol));
640
+ if (r != 0) return r;
641
+ }
642
+
643
+ PTR_NULL_CHECK(p->literalstring, q->literalstring);
644
+ if (p->literalstring) {
645
+ int r;
646
+ if (SIZE(p->literalstring) != SIZE(q->literalstring)) {
647
+ return SIZE(p->literalstring) - SIZE(q->literalstring);
648
+ }
649
+ r = memcmp(p->literalstring, q->literalstring,
650
+ SIZE(p->literalstring) * sizeof(symbol));
651
+ if (r != 0) return r;
652
+ }
653
+
654
+ return compare_node(p->right, q->right);
655
+ }
656
+
657
+ static void make_among(struct analyser * a, struct node * p, struct node * substring) {
658
+
659
+ NEW(among, x);
660
+ NEWVEC(amongvec, v, p->number);
661
+ struct node * q = p->left;
662
+ struct amongvec * w0 = v;
663
+ struct amongvec * w1 = v;
664
+ int result = 1;
665
+
666
+ int direction = substring != 0 ? substring->mode : p->mode;
667
+ int backward = direction == m_backward;
668
+
669
+ if (a->amongs == 0) a->amongs = x; else a->amongs_end->next = x;
670
+ a->amongs_end = x;
671
+ x->next = 0;
672
+ x->b = v;
673
+ x->number = a->among_count++;
674
+ x->function_count = 0;
675
+ x->starter = 0;
676
+ x->nocommand_count = 0;
677
+ x->amongvar_needed = false;
678
+
679
+ if (q->type == c_bra) { x->starter = q; q = q->right; }
680
+
681
+ while (q) {
682
+ if (q->type == c_literalstring) {
683
+ symbol * b = q->literalstring;
684
+ w1->b = b; /* pointer to case string */
685
+ w1->action = NULL; /* action gets filled in below */
686
+ w1->line_number = q->line_number;
687
+ w1->size = SIZE(b); /* number of characters in string */
688
+ w1->i = -1; /* index of longest substring */
689
+ w1->result = -1; /* number of corresponding case expression */
690
+ if (q->left) {
691
+ struct name * function = q->left->name;
692
+ w1->function = function;
693
+ function->used_in_among = true;
694
+ check_routine_mode(a, function, direction);
695
+ x->function_count++;
696
+ } else {
697
+ w1->function = 0;
698
+ }
699
+ w1++;
700
+ } else if (q->left == 0) {
701
+ /* empty command: () */
702
+ w0 = w1;
703
+ } else {
704
+ /* Check for previous action which is the same as this one and use
705
+ * the same action code if we find one.
706
+ */
707
+ int among_result = -1;
708
+ struct amongvec * w;
709
+ for (w = v; w < w0; ++w) {
710
+ if (w->action && compare_node(w->action->left, q->left) == 0) {
711
+ if (w->result <= 0) {
712
+ printf("Among code %d isn't positive\n", w->result);
713
+ exit(1);
714
+ }
715
+ among_result = w->result;
716
+ break;
717
+ }
718
+ }
719
+ if (among_result < 0) {
720
+ among_result = result++;
721
+ }
722
+
723
+ while (w0 != w1) {
724
+ w0->action = q;
725
+ w0->result = among_result;
726
+ w0++;
727
+ }
728
+ }
729
+ q = q->right;
730
+ }
731
+ if (w1-v != p->number) { fprintf(stderr, "oh! %d %d\n", (int)(w1-v), p->number); exit(1); }
732
+ x->command_count = result - 1;
733
+ {
734
+ NEWVEC(node*, commands, x->command_count);
735
+ memset(commands, 0, x->command_count * sizeof(struct node*));
736
+ for (w0 = v; w0 < w1; w0++) {
737
+ if (w0->result > 0) {
738
+ /* result == -1 when there's no command. */
739
+ if (w0->result > x->command_count) {
740
+ fprintf(stderr, "More among codes than expected\n");
741
+ exit(1);
742
+ }
743
+ if (!commands[w0->result - 1])
744
+ commands[w0->result - 1] = w0->action;
745
+ } else {
746
+ ++x->nocommand_count;
747
+ }
748
+ if (backward) reverse_b(w0->b);
749
+ }
750
+ x->commands = commands;
751
+ }
752
+ qsort(v, w1 - v, sizeof(struct amongvec), compare_amongvec);
753
+
754
+ /* the following loop is O(n squared) */
755
+ for (w0 = w1 - 1; w0 >= v; w0--) {
756
+ symbol * b = w0->b;
757
+ int size = w0->size;
758
+ struct amongvec * w;
759
+
760
+ for (w = w0 - 1; w >= v; w--) {
761
+ if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
762
+ w0->i = w - v; /* fill in index of longest substring */
763
+ break;
764
+ }
765
+ }
766
+ }
767
+ if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
768
+
769
+ for (w0 = v; w0 < w1 - 1; w0++)
770
+ if (w0->size == (w0 + 1)->size &&
771
+ memcmp(w0->b, (w0 + 1)->b, w0->size * sizeof(symbol)) == 0) {
772
+ count_error(a);
773
+ fprintf(stderr, "%s:%d: among(...) has repeated string '",
774
+ a->tokeniser->file, (w0 + 1)->line_number);
775
+ report_b(stderr, (w0 + 1)->b);
776
+ fprintf(stderr, "'\n");
777
+ count_error(a);
778
+ fprintf(stderr, "%s:%d: previously seen here\n",
779
+ a->tokeniser->file, w0->line_number);
780
+ }
781
+
782
+ x->literalstring_count = p->number;
783
+ p->among = x;
784
+
785
+ x->substring = substring;
786
+ if (substring != 0) substring->among = x;
787
+ if (x->command_count > 1 ||
788
+ (x->command_count == 1 && x->nocommand_count > 0) ||
789
+ x->starter != 0) {
790
+ /* We need to set among_var rather than just checking if find_among*()
791
+ * returns zero or not.
792
+ */
793
+ x->amongvar_needed = a->amongvar_needed = true;
794
+ }
795
+ }
796
+
797
+ static int
798
+ is_just_true(struct node * q)
799
+ {
800
+ if (!q) return 1;
801
+ if (q->type != c_bra && q->type != c_true) return 0;
802
+ return is_just_true(q->left) && is_just_true(q->right);
803
+ }
804
+
805
+ static struct node * read_among(struct analyser * a) {
806
+ struct tokeniser * t = a->tokeniser;
807
+ struct node * p = new_node(a, c_among);
808
+ struct node * p_end = 0;
809
+ int previous_token = -1;
810
+ struct node * substring = a->substring;
811
+
812
+ a->substring = 0;
813
+ p->number = 0; /* counts the number of literals */
814
+ if (!get_token(a, c_bra)) return p;
815
+ while (true) {
816
+ struct node * q;
817
+ int token = read_token(t);
818
+ switch (token) {
819
+ case c_literalstring:
820
+ q = read_literalstring(a);
821
+ if (read_token(t) == c_name) {
822
+ struct node * r = new_node(a, c_name);
823
+ name_to_node(a, r, 'r');
824
+ q->left = r;
825
+ }
826
+ else t->token_held = true;
827
+ p->number++; break;
828
+ case c_bra:
829
+ if (previous_token == c_bra) error(a, e_adjacent_bracketed_in_among);
830
+ q = read_C_list(a);
831
+ if (is_just_true(q->left)) {
832
+ /* Convert anything equivalent to () to () so we handle it
833
+ * the same way.
834
+ */
835
+ q->left = 0;
836
+ }
837
+ break;
838
+ default:
839
+ error(a, e_unexpected_token_in_among);
840
+ previous_token = token;
841
+ continue;
842
+ case c_ket:
843
+ if (p->number == 0) error(a, e_empty_among);
844
+ if (t->error_count == 0) make_among(a, p, substring);
845
+ return p;
846
+ }
847
+ previous_token = token;
848
+ if (p_end == 0) p->left = q; else p_end->right = q;
849
+ p_end = q;
850
+ }
851
+ }
852
+
853
+ static struct node * read_substring(struct analyser * a) {
854
+
855
+ struct node * p = new_node(a, c_substring);
856
+ if (a->substring != 0) error2(a, e_substring_preceded_by_substring, a->substring->line_number);
857
+ a->substring = p;
858
+ return p;
859
+ }
860
+
861
+ static void check_modifyable(struct analyser * a) {
862
+ if (!a->modifyable) error(a, e_not_allowed_inside_reverse);
863
+ }
864
+
865
+ static int ae_uses_name(struct node * p, struct name * q) {
866
+ switch (p->type) {
867
+ case c_name:
868
+ case c_lenof:
869
+ case c_sizeof:
870
+ if (p->name == q) return 1;
871
+ break;
872
+ case c_neg:
873
+ return ae_uses_name(p->right, q);
874
+ case c_multiply:
875
+ case c_plus:
876
+ case c_minus:
877
+ case c_divide:
878
+ return ae_uses_name(p->left, q) || ae_uses_name(p->right, q);
879
+ }
880
+ return 0;
881
+ }
882
+
883
+ static struct node * read_C(struct analyser * a) {
884
+ struct tokeniser * t = a->tokeniser;
885
+ int token = read_token(t);
886
+ switch (token) {
887
+ case c_bra: {
888
+ struct node * p = read_C_list(a);
889
+ if (p->type != c_bra) {
890
+ fprintf(stderr, "read_C_list returned unexpected type %s\n",
891
+ name_of_token(p->type));
892
+ exit(1);
893
+ }
894
+ if (p->left && !p->left->right) {
895
+ // Replace a single entry command list with the command it
896
+ // contains in order to make subsequent optimisations easier.
897
+ p = p->left;
898
+ }
899
+ return p;
900
+ }
901
+ case c_backwards:
902
+ {
903
+ int mode = a->mode;
904
+ if (a->mode == m_backward) error(a, e_already_backwards); else a->mode = m_backward;
905
+ { struct node * p = C_style(a, "C", token);
906
+ a->mode = mode;
907
+ return p;
908
+ }
909
+ }
910
+ case c_reverse:
911
+ {
912
+ int mode = a->mode;
913
+ int modifyable = a->modifyable;
914
+ a->modifyable = false;
915
+ a->mode = mode == m_forward ? m_backward : m_forward;
916
+ {
917
+ struct node * p = C_style(a, "C", token);
918
+ a->mode = mode;
919
+ a->modifyable = modifyable;
920
+ return p;
921
+ }
922
+ }
923
+ case c_not:
924
+ case c_try:
925
+ case c_fail:
926
+ case c_test:
927
+ case c_do:
928
+ case c_goto:
929
+ case c_gopast:
930
+ case c_repeat:
931
+ return C_style(a, "C", token);
932
+ case c_loop:
933
+ case c_atleast:
934
+ return C_style(a, "AC", token);
935
+ case c_setmark: {
936
+ struct node * n = C_style(a, "i", token);
937
+ if (n->name) n->name->initialised = true;
938
+ return n;
939
+ }
940
+ case c_tomark:
941
+ case c_atmark:
942
+ return C_style(a, "A", token);
943
+ case c_hop: {
944
+ struct node * n = C_style(a, "A", token);
945
+ if (n->AE->type == c_number) {
946
+ if (n->AE->number < 0) {
947
+ fprintf(stderr,
948
+ "%s:%d: warning: hop %d now signals f (as was "
949
+ "always documented) rather than moving the cursor "
950
+ "in the opposite direction\n",
951
+ a->tokeniser->file,
952
+ n->AE->line_number,
953
+ n->AE->number);
954
+ n->AE = NULL;
955
+ n->type = c_false;
956
+ } else if (n->AE->number == 0) {
957
+ fprintf(stderr,
958
+ "%s:%d: warning: hop 0 is a no-op\n",
959
+ a->tokeniser->file,
960
+ n->AE->line_number);
961
+ n->AE = NULL;
962
+ n->type = c_true;
963
+ }
964
+ }
965
+ return n;
966
+ }
967
+ case c_delete:
968
+ check_modifyable(a);
969
+ /* fall through */
970
+ case c_next:
971
+ case c_tolimit:
972
+ case c_atlimit:
973
+ case c_leftslice:
974
+ case c_rightslice:
975
+ case c_true:
976
+ case c_false:
977
+ case c_debug:
978
+ return new_node(a, token);
979
+ case c_assignto:
980
+ case c_sliceto: {
981
+ struct node *n;
982
+ check_modifyable(a);
983
+ n = C_style(a, "s", token);
984
+ if (n->name) n->name->initialised = true;
985
+ return n;
986
+ }
987
+ case c_assign:
988
+ case c_insert:
989
+ case c_attach:
990
+ case c_slicefrom: {
991
+ struct node *n;
992
+ check_modifyable(a);
993
+ n = C_style(a, "S", token);
994
+ if (n->name) n->name->value_used = true;
995
+ return n;
996
+ }
997
+ case c_setlimit:
998
+ return C_style(a, "CfD", token);
999
+ case c_set:
1000
+ case c_unset: {
1001
+ struct node * n = C_style(a, "b", token);
1002
+ if (n->name) n->name->initialised = true;
1003
+ return n;
1004
+ }
1005
+ case c_dollar: {
1006
+ struct tokeniser * t = a->tokeniser;
1007
+ read_token(t);
1008
+ if (t->token == c_bra) {
1009
+ /* Handle newer $(AE REL_OP AE) syntax. */
1010
+ struct node * n = read_AE(a, 0, 0);
1011
+ read_token(t);
1012
+ int token = t->token;
1013
+ switch (token) {
1014
+ case c_assign:
1015
+ count_error(a);
1016
+ fprintf(stderr, "%s:%d: Expected relational operator (did you mean '=='?)\n",
1017
+ t->file, t->line_number);
1018
+ /* Assume it was == to try to avoid an error avalanche. */
1019
+ token = c_eq;
1020
+ /* FALLTHRU */
1021
+ case c_eq:
1022
+ case c_ne:
1023
+ case c_gr:
1024
+ case c_ge:
1025
+ case c_ls:
1026
+ case c_le: {
1027
+ struct node * lhs = n;
1028
+ struct node * rhs = read_AE(a, 0, 0);
1029
+ if (lhs->type == c_number && rhs->type == c_number) {
1030
+ // Evaluate constant numeric test expression.
1031
+ int result;
1032
+ switch (token) {
1033
+ case c_eq:
1034
+ result = (lhs->number == rhs->number);
1035
+ break;
1036
+ case c_ne:
1037
+ result = (lhs->number != rhs->number);
1038
+ break;
1039
+ case c_gr:
1040
+ result = (lhs->number > rhs->number);
1041
+ break;
1042
+ case c_ge:
1043
+ result = (lhs->number >= rhs->number);
1044
+ break;
1045
+ case c_ls:
1046
+ result = (lhs->number < rhs->number);
1047
+ break;
1048
+ case c_le:
1049
+ result = (lhs->number <= rhs->number);
1050
+ break;
1051
+ default:
1052
+ fprintf(stderr, "Unexpected numeric test operator %s\n",
1053
+ name_of_token(t->token));
1054
+ exit(1);
1055
+ }
1056
+ n = new_node(a, result ? c_true : c_false);
1057
+ } else {
1058
+ n = new_node(a, token);
1059
+ n->left = lhs;
1060
+ n->AE = rhs;
1061
+ }
1062
+ get_token(a, c_ket);
1063
+ break;
1064
+ }
1065
+ default:
1066
+ error(a, e_unexpected_token);
1067
+ t->token_held = true;
1068
+ break;
1069
+ }
1070
+ return n;
1071
+ }
1072
+
1073
+ if (t->token == c_name) {
1074
+ struct node * p;
1075
+ struct name * q = find_name(a);
1076
+ int mode = a->mode;
1077
+ int modifyable = a->modifyable;
1078
+ if (q && q->type == t_string) {
1079
+ /* Assume for now that $ on string both initialises and
1080
+ * uses the string variable. FIXME: Can we do better?
1081
+ */
1082
+ q->initialised = true;
1083
+ q->value_used = true;
1084
+ a->mode = m_forward;
1085
+ a->modifyable = true;
1086
+ p = new_node(a, c_dollar);
1087
+ p->left = read_C(a);
1088
+ p->name = q;
1089
+ } else {
1090
+ if (q && q->type != t_integer) {
1091
+ /* If $ is used on an unknown name or a name which
1092
+ * isn't a string or an integer then we assume the
1093
+ * unknown name is an integer as $ is used more often
1094
+ * on integers than strings, so hopefully this it less
1095
+ * likely to cause an error avalanche.
1096
+ *
1097
+ * For an unknown name, we'll already have reported an
1098
+ * error.
1099
+ */
1100
+ error(a, e_not_of_type_string_or_integer);
1101
+ q = NULL;
1102
+ }
1103
+ p = new_node(a, read_AE_test(a));
1104
+ switch (p->type) {
1105
+ case c_eq:
1106
+ case c_ne:
1107
+ case c_gr:
1108
+ case c_ge:
1109
+ case c_ls:
1110
+ case c_le:
1111
+ p->left = new_node(a, c_name);
1112
+ p->left->name = q;
1113
+ if (q) {
1114
+ q->value_used = true;
1115
+ }
1116
+ p->AE = read_AE(a, NULL, 0);
1117
+ break;
1118
+ default:
1119
+ /* +=, etc don't "initialise" as they only
1120
+ * amend an existing value. Similarly, they
1121
+ * don't count as using the value.
1122
+ */
1123
+ p->name = q;
1124
+ p->AE = read_AE(a, q, 0);
1125
+ if (p->type == c_mathassign && q) {
1126
+ /* $x = x + 1 doesn't initialise x. */
1127
+ q->initialised = !ae_uses_name(p->AE, q);
1128
+ }
1129
+ break;
1130
+ }
1131
+ }
1132
+ if (q) mark_used_in(a, q, p);
1133
+ a->mode = mode;
1134
+ a->modifyable = modifyable;
1135
+ return p;
1136
+ }
1137
+
1138
+ error(a, e_unexpected_token);
1139
+ t->token_held = true;
1140
+ return new_node(a, c_dollar);
1141
+ }
1142
+ case c_name:
1143
+ {
1144
+ struct name * q = find_name(a);
1145
+ struct node * p = new_node(a, c_name);
1146
+ if (q) {
1147
+ mark_used_in(a, q, p);
1148
+ switch (q->type) {
1149
+ case t_boolean:
1150
+ p->type = c_booltest;
1151
+ q->value_used = true;
1152
+ break;
1153
+ case t_integer:
1154
+ error(a, e_misplaced); /* integer name misplaced */
1155
+ break;
1156
+ case t_string:
1157
+ q->value_used = true;
1158
+ break;
1159
+ case t_routine:
1160
+ case t_external:
1161
+ p->type = c_call;
1162
+ check_routine_mode(a, q, a->mode);
1163
+ break;
1164
+ case t_grouping:
1165
+ p->type = c_grouping; break;
1166
+ }
1167
+ }
1168
+ p->name = q;
1169
+ return p;
1170
+ }
1171
+ case c_non:
1172
+ {
1173
+ struct node * p = new_node(a, token);
1174
+ read_token(t);
1175
+ if (t->token == c_minus) read_token(t);
1176
+ if (!check_token(a, c_name)) { omission_error(a, c_name); return p; }
1177
+ name_to_node(a, p, 'g');
1178
+ return p;
1179
+ }
1180
+ case c_literalstring:
1181
+ return read_literalstring(a);
1182
+ case c_among: return read_among(a);
1183
+ case c_substring: return read_substring(a);
1184
+ default: error(a, e_unexpected_token); return 0;
1185
+ }
1186
+ }
1187
+
1188
+ static int next_symbol(symbol * p, symbol * W, int utf8) {
1189
+ if (utf8) {
1190
+ int ch;
1191
+ int j = get_utf8(p, & ch);
1192
+ W[0] = ch; return j;
1193
+ } else {
1194
+ W[0] = p[0]; return 1;
1195
+ }
1196
+ }
1197
+
1198
+ static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
1199
+ int j = 0;
1200
+ symbol W[1];
1201
+ int width;
1202
+ if (style == c_plus) {
1203
+ while (j < SIZE(q)) {
1204
+ width = next_symbol(q + j, W, utf8);
1205
+ p = add_to_b(p, 1, W);
1206
+ j += width;
1207
+ }
1208
+ } else {
1209
+ while (j < SIZE(q)) {
1210
+ int i;
1211
+ width = next_symbol(q + j, W, utf8);
1212
+ for (i = 0; i < SIZE(p); i++) {
1213
+ if (p[i] == W[0]) {
1214
+ memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
1215
+ SIZE(p)--;
1216
+ }
1217
+ }
1218
+ j += width;
1219
+ }
1220
+ }
1221
+ return p;
1222
+ }
1223
+
1224
+ static void read_define_grouping(struct analyser * a, struct name * q) {
1225
+ struct tokeniser * t = a->tokeniser;
1226
+ int style = c_plus;
1227
+ {
1228
+ NEW(grouping, p);
1229
+ if (a->groupings == 0) a->groupings = p; else a->groupings_end->next = p;
1230
+ a->groupings_end = p;
1231
+ if (q) q->grouping = p;
1232
+ p->next = 0;
1233
+ p->name = q;
1234
+ p->line_number = a->tokeniser->line_number;
1235
+ p->b = create_b(0);
1236
+ while (true) {
1237
+ switch (read_token(t)) {
1238
+ case c_name:
1239
+ {
1240
+ struct name * r = find_name(a);
1241
+ if (r) {
1242
+ check_name_type(a, r, 'g');
1243
+ p->b = alter_grouping(p->b, r->grouping->b, style, false);
1244
+ r->used_in_definition = true;
1245
+ }
1246
+ }
1247
+ break;
1248
+ case c_literalstring:
1249
+ p->b = alter_grouping(p->b, t->b, style, (a->encoding == ENC_UTF8));
1250
+ break;
1251
+ default: error(a, e_unexpected_token); return;
1252
+ }
1253
+ switch (read_token(t)) {
1254
+ case c_plus:
1255
+ case c_minus: style = t->token; break;
1256
+ default: goto label0;
1257
+ }
1258
+ }
1259
+ label0:
1260
+ {
1261
+ int i;
1262
+ int max = 0;
1263
+ int min = 1<<16;
1264
+ for (i = 0; i < SIZE(p->b); i++) {
1265
+ if (p->b[i] > max) max = p->b[i];
1266
+ if (p->b[i] < min) min = p->b[i];
1267
+ }
1268
+ p->largest_ch = max;
1269
+ p->smallest_ch = min;
1270
+ if (min == 1<<16) error(a, e_empty_grouping);
1271
+ }
1272
+ t->token_held = true; return;
1273
+ }
1274
+ }
1275
+
1276
+ static void read_define_routine(struct analyser * a, struct name * q) {
1277
+ struct node * p = new_node(a, c_define);
1278
+ a->amongvar_needed = false;
1279
+ if (q) {
1280
+ check_name_type(a, q, 'R');
1281
+ if (q->definition != 0) error(a, e_redefined);
1282
+ if (q->mode < 0) q->mode = a->mode; else
1283
+ if (q->mode != a->mode) error2(a, e_declared_as_different_mode, q->mode);
1284
+ }
1285
+ p->name = q;
1286
+ if (a->program == 0) a->program = p; else a->program_end->right = p;
1287
+ a->program_end = p;
1288
+ get_token(a, c_as);
1289
+ p->left = read_C(a);
1290
+ if (q) q->definition = p->left;
1291
+
1292
+ if (a->substring != 0) {
1293
+ error2(a, e_unresolved_substring, a->substring->line_number);
1294
+ a->substring = 0;
1295
+ }
1296
+ p->amongvar_needed = a->amongvar_needed;
1297
+ }
1298
+
1299
+ static void read_define(struct analyser * a) {
1300
+ if (get_token(a, c_name)) {
1301
+ struct name * q = find_name(a);
1302
+ int type;
1303
+ if (q) {
1304
+ type = q->type;
1305
+ } else {
1306
+ /* No declaration, so sniff next token - if it is 'as' then parse
1307
+ * as a routine, otherwise as a grouping.
1308
+ */
1309
+ if (read_token(a->tokeniser) == c_as) {
1310
+ type = t_routine;
1311
+ } else {
1312
+ type = t_grouping;
1313
+ }
1314
+ a->tokeniser->token_held = true;
1315
+ }
1316
+
1317
+ if (type == t_grouping) {
1318
+ read_define_grouping(a, q);
1319
+ } else {
1320
+ read_define_routine(a, q);
1321
+ }
1322
+ }
1323
+ }
1324
+
1325
+ static void read_backwardmode(struct analyser * a) {
1326
+ int mode = a->mode;
1327
+ a->mode = m_backward;
1328
+ if (get_token(a, c_bra)) {
1329
+ read_program_(a, c_ket);
1330
+ check_token(a, c_ket);
1331
+ }
1332
+ a->mode = mode;
1333
+ }
1334
+
1335
+ static void read_program_(struct analyser * a, int terminator) {
1336
+ struct tokeniser * t = a->tokeniser;
1337
+ while (true) {
1338
+ switch (read_token(t)) {
1339
+ case c_strings: read_names(a, t_string); break;
1340
+ case c_booleans: read_names(a, t_boolean); break;
1341
+ case c_integers: read_names(a, t_integer); break;
1342
+ case c_routines: read_names(a, t_routine); break;
1343
+ case c_externals: read_names(a, t_external); break;
1344
+ case c_groupings: read_names(a, t_grouping); break;
1345
+ case c_define: read_define(a); break;
1346
+ case c_backwardmode:read_backwardmode(a); break;
1347
+ case c_ket:
1348
+ if (terminator == c_ket) return;
1349
+ /* fall through */
1350
+ default:
1351
+ error(a, e_unexpected_token); break;
1352
+ case -1:
1353
+ if (terminator >= 0) omission_error(a, c_ket);
1354
+ return;
1355
+ }
1356
+ }
1357
+ }
1358
+
1359
+ static void remove_dead_assignments(struct node * p, struct name * q) {
1360
+ if (p->name == q) {
1361
+ switch (p->type) {
1362
+ case c_assignto:
1363
+ case c_sliceto:
1364
+ case c_mathassign:
1365
+ case c_plusassign:
1366
+ case c_minusassign:
1367
+ case c_multiplyassign:
1368
+ case c_divideassign:
1369
+ case c_setmark:
1370
+ case c_set:
1371
+ case c_unset:
1372
+ case c_dollar:
1373
+ /* c_true is a no-op. */
1374
+ p->type = c_true;
1375
+ p->AE = NULL;
1376
+ break;
1377
+ default:
1378
+ /* There are no read accesses to this variable, so any
1379
+ * references must be assignments.
1380
+ */
1381
+ fprintf(stderr, "Unhandled type of dead assignment via %s\n",
1382
+ name_of_token(p->type));
1383
+ exit(1);
1384
+ }
1385
+ }
1386
+ if (p->AE) remove_dead_assignments(p->AE, q);
1387
+ if (p->left) remove_dead_assignments(p->left, q);
1388
+ if (p->aux) remove_dead_assignments(p->aux, q);
1389
+ if (p->right) remove_dead_assignments(p->right, q);
1390
+ }
1391
+
1392
+ extern void read_program(struct analyser * a) {
1393
+ read_program_(a, -1);
1394
+ {
1395
+ struct name * q = a->names;
1396
+ while (q) {
1397
+ switch (q->type) {
1398
+ case t_external: case t_routine:
1399
+ if (q->used && q->definition == 0) error4(a, q);
1400
+ break;
1401
+ case t_grouping:
1402
+ if (q->used && q->grouping == 0) error4(a, q);
1403
+ break;
1404
+ }
1405
+ q = q->next;
1406
+ }
1407
+ }
1408
+
1409
+ if (a->tokeniser->error_count == 0) {
1410
+ struct name * q = a->names;
1411
+ struct name ** ptr = &(a->names);
1412
+ while (q) {
1413
+ if (!q->referenced) {
1414
+ fprintf(stderr, "%s:%d: warning: %s '",
1415
+ a->tokeniser->file,
1416
+ q->declaration_line_number,
1417
+ name_of_name_type(q->type));
1418
+ report_b(stderr, q->b);
1419
+ if (q->type == t_routine ||
1420
+ q->type == t_external ||
1421
+ q->type == t_grouping) {
1422
+ fprintf(stderr, "' declared but not defined\n");
1423
+ } else {
1424
+ fprintf(stderr, "' defined but not used\n");
1425
+ q = q->next;
1426
+ *ptr = q;
1427
+ continue;
1428
+ }
1429
+ } else if (q->type == t_routine || q->type == t_grouping) {
1430
+ /* It's OK to define a grouping but only use it to define other
1431
+ * groupings.
1432
+ */
1433
+ if (!q->used && !q->used_in_definition) {
1434
+ int line_num;
1435
+ if (q->type == t_routine) {
1436
+ line_num = q->definition->line_number;
1437
+ } else {
1438
+ line_num = q->grouping->line_number;
1439
+ }
1440
+ fprintf(stderr, "%s:%d: warning: %s '",
1441
+ a->tokeniser->file,
1442
+ line_num,
1443
+ name_of_name_type(q->type));
1444
+ report_b(stderr, q->b);
1445
+ fprintf(stderr, "' defined but not used\n");
1446
+ }
1447
+ } else if (q->type == t_external) {
1448
+ /* Unused is OK. */
1449
+ } else if (!q->initialised) {
1450
+ fprintf(stderr, "%s:%d: warning: %s '",
1451
+ a->tokeniser->file,
1452
+ q->declaration_line_number,
1453
+ name_of_name_type(q->type));
1454
+ report_b(stderr, q->b);
1455
+ fprintf(stderr, "' is never initialised\n");
1456
+ } else if (!q->value_used) {
1457
+ fprintf(stderr, "%s:%d: warning: %s '",
1458
+ a->tokeniser->file,
1459
+ q->declaration_line_number,
1460
+ name_of_name_type(q->type));
1461
+ report_b(stderr, q->b);
1462
+ fprintf(stderr, "' is set but never used\n");
1463
+ remove_dead_assignments(a->program, q);
1464
+ q = q->next;
1465
+ *ptr = q;
1466
+ continue;
1467
+ }
1468
+ ptr = &(q->next);
1469
+ q = q->next;
1470
+ }
1471
+
1472
+ {
1473
+ /* Now we've eliminated variables whose values are never used we
1474
+ * can number the variables, which is used by some generators.
1475
+ */
1476
+ int * name_count = a->name_count;
1477
+ struct name * n;
1478
+ for (n = a->names; n; n = n->next) {
1479
+ n->count = name_count[n->type]++;
1480
+ }
1481
+ }
1482
+ }
1483
+ }
1484
+
1485
+ extern struct analyser * create_analyser(struct tokeniser * t) {
1486
+ NEW(analyser, a);
1487
+ a->tokeniser = t;
1488
+ a->nodes = 0;
1489
+ a->names = 0;
1490
+ a->literalstrings = 0;
1491
+ a->program = 0;
1492
+ a->amongs = 0;
1493
+ a->among_count = 0;
1494
+ a->groupings = 0;
1495
+ a->mode = m_forward;
1496
+ a->modifyable = true;
1497
+ { int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
1498
+ a->substring = 0;
1499
+ a->int_limits_used = false;
1500
+ return a;
1501
+ }
1502
+
1503
+ extern void close_analyser(struct analyser * a) {
1504
+ {
1505
+ struct node * q = a->nodes;
1506
+ while (q) {
1507
+ struct node * q_next = q->next;
1508
+ FREE(q);
1509
+ q = q_next;
1510
+ }
1511
+ }
1512
+ {
1513
+ struct name * q = a->names;
1514
+ while (q) {
1515
+ struct name * q_next = q->next;
1516
+ lose_b(q->b); FREE(q);
1517
+ q = q_next;
1518
+ }
1519
+ }
1520
+ {
1521
+ struct literalstring * q = a->literalstrings;
1522
+ while (q) {
1523
+ struct literalstring * q_next = q->next;
1524
+ lose_b(q->b); FREE(q);
1525
+ q = q_next;
1526
+ }
1527
+ }
1528
+ {
1529
+ struct among * q = a->amongs;
1530
+ while (q) {
1531
+ struct among * q_next = q->next;
1532
+ FREE(q->b);
1533
+ FREE(q->commands);
1534
+ FREE(q);
1535
+ q = q_next;
1536
+ }
1537
+ }
1538
+ {
1539
+ struct grouping * q = a->groupings;
1540
+ while (q) {
1541
+ struct grouping * q_next = q->next;
1542
+ lose_b(q->b); FREE(q);
1543
+ q = q_next;
1544
+ }
1545
+ }
1546
+ FREE(a);
1547
+ }