mittens 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -3
- data/lib/mittens/version.rb +1 -1
- data/vendor/snowball/.github/workflows/ci.yml +216 -0
- data/vendor/snowball/CONTRIBUTING.rst +111 -62
- data/vendor/snowball/GNUmakefile +194 -136
- data/vendor/snowball/NEWS +798 -3
- data/vendor/snowball/README.rst +50 -1
- data/vendor/snowball/ada/src/stemmer.adb +25 -13
- data/vendor/snowball/ada/src/stemmer.ads +9 -9
- data/vendor/snowball/ada/stemmer_config.gpr +7 -7
- data/vendor/snowball/algorithms/basque.sbl +4 -19
- data/vendor/snowball/algorithms/catalan.sbl +2 -9
- data/vendor/snowball/algorithms/danish.sbl +1 -1
- data/vendor/snowball/algorithms/dutch.sbl +284 -122
- data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
- data/vendor/snowball/algorithms/english.sbl +52 -37
- data/vendor/snowball/algorithms/esperanto.sbl +157 -0
- data/vendor/snowball/algorithms/estonian.sbl +269 -0
- data/vendor/snowball/algorithms/finnish.sbl +2 -3
- data/vendor/snowball/algorithms/french.sbl +42 -16
- data/vendor/snowball/algorithms/german.sbl +35 -14
- data/vendor/snowball/algorithms/greek.sbl +76 -76
- data/vendor/snowball/algorithms/hungarian.sbl +8 -6
- data/vendor/snowball/algorithms/indonesian.sbl +14 -8
- data/vendor/snowball/algorithms/italian.sbl +11 -21
- data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
- data/vendor/snowball/algorithms/lovins.sbl +0 -1
- data/vendor/snowball/algorithms/nepali.sbl +138 -37
- data/vendor/snowball/algorithms/norwegian.sbl +19 -5
- data/vendor/snowball/algorithms/porter.sbl +2 -2
- data/vendor/snowball/algorithms/portuguese.sbl +9 -13
- data/vendor/snowball/algorithms/romanian.sbl +17 -4
- data/vendor/snowball/algorithms/serbian.sbl +467 -468
- data/vendor/snowball/algorithms/spanish.sbl +5 -7
- data/vendor/snowball/algorithms/swedish.sbl +60 -6
- data/vendor/snowball/algorithms/tamil.sbl +207 -176
- data/vendor/snowball/algorithms/turkish.sbl +461 -445
- data/vendor/snowball/algorithms/yiddish.sbl +36 -38
- data/vendor/snowball/compiler/analyser.c +445 -192
- data/vendor/snowball/compiler/driver.c +109 -101
- data/vendor/snowball/compiler/generator.c +853 -464
- data/vendor/snowball/compiler/generator_ada.c +404 -366
- data/vendor/snowball/compiler/generator_csharp.c +297 -260
- data/vendor/snowball/compiler/generator_go.c +323 -254
- data/vendor/snowball/compiler/generator_java.c +326 -252
- data/vendor/snowball/compiler/generator_js.c +362 -252
- data/vendor/snowball/compiler/generator_pascal.c +349 -197
- data/vendor/snowball/compiler/generator_python.c +257 -240
- data/vendor/snowball/compiler/generator_rust.c +423 -251
- data/vendor/snowball/compiler/header.h +117 -71
- data/vendor/snowball/compiler/space.c +137 -68
- data/vendor/snowball/compiler/syswords.h +2 -2
- data/vendor/snowball/compiler/tokeniser.c +125 -107
- data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
- data/vendor/snowball/csharp/Stemwords/App.config +2 -2
- data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
- data/vendor/snowball/doc/libstemmer_c_README +7 -4
- data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
- data/vendor/snowball/doc/libstemmer_java_README +12 -1
- data/vendor/snowball/doc/libstemmer_js_README +6 -4
- data/vendor/snowball/doc/libstemmer_python_README +9 -4
- data/vendor/snowball/examples/stemwords.c +12 -12
- data/vendor/snowball/go/env.go +107 -31
- data/vendor/snowball/go/util.go +0 -4
- data/vendor/snowball/include/libstemmer.h +4 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
- data/vendor/snowball/javascript/base-stemmer.js +186 -2
- data/vendor/snowball/javascript/stemwords.js +3 -6
- data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
- data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
- data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
- data/vendor/snowball/libstemmer/modules.txt +13 -10
- data/vendor/snowball/libstemmer/test.c +1 -1
- data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
- data/vendor/snowball/pascal/generate.pl +13 -13
- data/vendor/snowball/python/create_init.py +4 -1
- data/vendor/snowball/python/setup.cfg +0 -3
- data/vendor/snowball/python/setup.py +8 -3
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
- data/vendor/snowball/python/stemwords.py +8 -12
- data/vendor/snowball/runtime/api.c +10 -5
- data/vendor/snowball/runtime/header.h +10 -9
- data/vendor/snowball/runtime/utilities.c +9 -9
- data/vendor/snowball/rust/build.rs +1 -1
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
- data/vendor/snowball/tests/stemtest.c +7 -4
- metadata +7 -7
- data/vendor/snowball/.travis.yml +0 -112
- data/vendor/snowball/algorithms/german2.sbl +0 -145
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
- data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
#include <assert.h>
|
2
2
|
#include <stdlib.h> /* for exit */
|
3
3
|
#include <string.h> /* for strlen */
|
4
4
|
#include <stdio.h> /* for fprintf etc */
|
@@ -15,7 +15,6 @@ static int new_label(struct generator * g) {
|
|
15
15
|
}
|
16
16
|
|
17
17
|
static struct str * vars_newname(struct generator * g) {
|
18
|
-
|
19
18
|
struct str * output;
|
20
19
|
g->var_number++;
|
21
20
|
output = str_new();
|
@@ -24,11 +23,9 @@ static struct str * vars_newname(struct generator * g) {
|
|
24
23
|
return output;
|
25
24
|
}
|
26
25
|
|
27
|
-
|
28
26
|
/* Write routines for items from the syntax tree */
|
29
27
|
|
30
28
|
static void write_varname(struct generator * g, struct name * p) {
|
31
|
-
|
32
29
|
switch (p->type) {
|
33
30
|
case t_external:
|
34
31
|
break;
|
@@ -39,7 +36,7 @@ static void write_varname(struct generator * g, struct name * p) {
|
|
39
36
|
break;
|
40
37
|
}
|
41
38
|
}
|
42
|
-
|
39
|
+
write_s(g, p->s);
|
43
40
|
}
|
44
41
|
|
45
42
|
static void write_varref(struct generator * g, struct name * p) {
|
@@ -47,42 +44,29 @@ static void write_varref(struct generator * g, struct name * p) {
|
|
47
44
|
write_varname(g, p);
|
48
45
|
}
|
49
46
|
|
50
|
-
static void write_hexdigit(struct generator * g, int n) {
|
51
|
-
|
52
|
-
write_char(g, n < 10 ? n + '0' : n - 10 + 'A');
|
53
|
-
}
|
54
|
-
|
55
|
-
static void write_hex(struct generator * g, int ch) {
|
56
|
-
|
57
|
-
write_string(g, "\\u{");
|
58
|
-
{
|
59
|
-
int i;
|
60
|
-
for (i = 12; i >= 0; i -= 4) write_hexdigit(g, ch >> i & 0xf);
|
61
|
-
}
|
62
|
-
write_string(g, "}");
|
63
|
-
}
|
64
|
-
|
65
47
|
static void write_literal_string(struct generator * g, symbol * p) {
|
66
|
-
|
67
48
|
int i = 0;
|
68
49
|
write_string(g, "\"");
|
69
50
|
while (i < SIZE(p)) {
|
70
51
|
int ch;
|
71
52
|
i += get_utf8(p + i, &ch);
|
72
|
-
if (32 <= ch && ch < 127) {
|
73
|
-
if (ch == '
|
74
|
-
|
53
|
+
if (32 <= ch && ch < 0x590 && ch != 127) {
|
54
|
+
if (ch == '"' || ch == '\\') write_char(g, '\\');
|
55
|
+
write_wchar_as_utf8(g, ch);
|
75
56
|
} else {
|
76
|
-
|
57
|
+
// Use escapes for anything over 0x590 as a crude way to avoid
|
58
|
+
// LTR characters affecting the rendering of source character
|
59
|
+
// order in confusing ways.
|
60
|
+
write_string(g, "\\u{");
|
61
|
+
write_hex4(g, ch);
|
62
|
+
write_string(g, "}");
|
77
63
|
}
|
78
64
|
}
|
79
65
|
write_string(g, "\"");
|
80
66
|
}
|
81
67
|
|
82
68
|
static void write_margin(struct generator * g) {
|
83
|
-
|
84
|
-
int i;
|
85
|
-
for (i = 0; i < g->margin; i++) write_string(g, " ");
|
69
|
+
for (int i = 0; i < g->margin; i++) write_string(g, " ");
|
86
70
|
}
|
87
71
|
|
88
72
|
static void write_comment(struct generator * g, struct node * p) {
|
@@ -94,53 +78,41 @@ static void write_comment(struct generator * g, struct node * p) {
|
|
94
78
|
}
|
95
79
|
|
96
80
|
static void write_block_start(struct generator * g) {
|
97
|
-
|
98
81
|
w(g, "~+{~N");
|
99
82
|
}
|
100
83
|
|
101
|
-
static void write_block_end(struct generator * g)
|
102
|
-
|
84
|
+
static void write_block_end(struct generator * g) {
|
103
85
|
w(g, "~-~M}~N");
|
104
86
|
}
|
105
87
|
|
106
88
|
static void write_savecursor(struct generator * g, struct node * p,
|
107
89
|
struct str * savevar) {
|
108
|
-
|
109
90
|
g->B[0] = str_data(savevar);
|
110
91
|
g->S[1] = "";
|
111
92
|
if (p->mode != m_forward) g->S[1] = "env.limit - ";
|
112
93
|
writef(g, "~Mlet ~B0 = ~S1env.cursor;~N", p);
|
113
94
|
}
|
114
95
|
|
115
|
-
static void
|
116
|
-
|
117
|
-
str_clear(out);
|
96
|
+
static void append_restore_string(struct node * p, struct str * out, struct str * savevar) {
|
118
97
|
str_append_string(out, "env.cursor = ");
|
119
98
|
if (p->mode != m_forward) str_append_string(out, "env.limit - ");
|
120
99
|
str_append(out, savevar);
|
121
100
|
str_append_string(out, ";");
|
122
101
|
}
|
123
102
|
|
124
|
-
static void write_restorecursor(struct generator * g, struct node * p,
|
125
|
-
struct str * savevar) {
|
126
|
-
|
127
|
-
struct str * temp = str_new();
|
103
|
+
static void write_restorecursor(struct generator * g, struct node * p, struct str * savevar) {
|
128
104
|
write_margin(g);
|
129
|
-
|
130
|
-
write_str(g, temp);
|
105
|
+
append_restore_string(p, g->outbuf, savevar);
|
131
106
|
write_newline(g);
|
132
|
-
str_delete(temp);
|
133
107
|
}
|
134
108
|
|
135
109
|
static void write_inc_cursor(struct generator * g, struct node * p) {
|
136
|
-
|
137
110
|
write_margin(g);
|
138
111
|
write_string(g, p->mode == m_forward ? "env.next_char();" : "env.previous_char();");
|
139
112
|
write_newline(g);
|
140
113
|
}
|
141
114
|
|
142
115
|
static void wsetlab_begin(struct generator * g, int n) {
|
143
|
-
|
144
116
|
g->I[0] = n;
|
145
117
|
w(g, "~M'lab~I0: loop {~N~+");
|
146
118
|
}
|
@@ -159,7 +131,6 @@ static void wgotol(struct generator * g, int n) {
|
|
159
131
|
}
|
160
132
|
|
161
133
|
static void write_failure(struct generator * g) {
|
162
|
-
|
163
134
|
if (str_len(g->failure_str) != 0) {
|
164
135
|
write_margin(g);
|
165
136
|
write_str(g, g->failure_str);
|
@@ -171,14 +142,14 @@ static void write_failure(struct generator * g) {
|
|
171
142
|
g->unreachable = true;
|
172
143
|
break;
|
173
144
|
default:
|
174
|
-
g
|
175
|
-
|
145
|
+
w(g, "~Mbreak 'lab");
|
146
|
+
write_int(g, g->failure_label);
|
147
|
+
w(g, ";~N");
|
176
148
|
g->unreachable = true;
|
177
149
|
}
|
178
150
|
}
|
179
151
|
|
180
|
-
static void write_failure_if(struct generator * g, char * s, struct node * p) {
|
181
|
-
|
152
|
+
static void write_failure_if(struct generator * g, const char * s, struct node * p) {
|
182
153
|
writef(g, "~Mif ", p);
|
183
154
|
writef(g, s, p);
|
184
155
|
writef(g, " ", p);
|
@@ -190,7 +161,6 @@ static void write_failure_if(struct generator * g, char * s, struct node * p) {
|
|
190
161
|
|
191
162
|
/* if at limit fail */
|
192
163
|
static void write_check_limit(struct generator * g, struct node * p) {
|
193
|
-
|
194
164
|
if (p->mode == m_forward) {
|
195
165
|
write_failure_if(g, "env.cursor >= env.limit", p);
|
196
166
|
} else {
|
@@ -200,18 +170,18 @@ static void write_check_limit(struct generator * g, struct node * p) {
|
|
200
170
|
|
201
171
|
/* Formatted write. */
|
202
172
|
static void writef(struct generator * g, const char * input, struct node * p) {
|
173
|
+
(void)p;
|
203
174
|
int i = 0;
|
204
|
-
int l = strlen(input);
|
205
175
|
|
206
|
-
while (i
|
176
|
+
while (input[i]) {
|
207
177
|
int ch = input[i++];
|
208
178
|
if (ch != '~') {
|
209
179
|
write_char(g, ch);
|
210
180
|
continue;
|
211
181
|
}
|
212
|
-
|
213
|
-
|
214
|
-
case '
|
182
|
+
ch = input[i++];
|
183
|
+
switch (ch) {
|
184
|
+
case '~': write_char(g, '~'); continue;
|
215
185
|
case 'f': write_block_start(g);
|
216
186
|
write_failure(g);
|
217
187
|
g->unreachable = false;
|
@@ -221,21 +191,65 @@ static void writef(struct generator * g, const char * input, struct node * p) {
|
|
221
191
|
case 'N': write_newline(g); continue;
|
222
192
|
case '{': write_block_start(g); continue;
|
223
193
|
case '}': write_block_end(g); continue;
|
224
|
-
case 'S':
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
194
|
+
case 'S': {
|
195
|
+
int j = input[i++] - '0';
|
196
|
+
if (j < 0 || j > (int)(sizeof(g->S) / sizeof(g->S[0]))) {
|
197
|
+
printf("Invalid escape sequence ~%c%c in writef(g, \"%s\", p)\n",
|
198
|
+
ch, input[i - 1], input);
|
199
|
+
exit(1);
|
200
|
+
}
|
201
|
+
write_string(g, g->S[j]);
|
202
|
+
continue;
|
203
|
+
}
|
204
|
+
case 'B': {
|
205
|
+
int j = input[i++] - '0';
|
206
|
+
if (j < 0 || j > (int)(sizeof(g->B) / sizeof(g->B[0])))
|
207
|
+
goto invalid_escape2;
|
208
|
+
write_s(g, g->B[j]);
|
209
|
+
continue;
|
210
|
+
}
|
211
|
+
case 'I': {
|
212
|
+
int j = input[i++] - '0';
|
213
|
+
if (j < 0 || j > (int)(sizeof(g->I) / sizeof(g->I[0])))
|
214
|
+
goto invalid_escape2;
|
215
|
+
write_int(g, g->I[j]);
|
216
|
+
continue;
|
217
|
+
}
|
218
|
+
case 'V':
|
219
|
+
case 'W': {
|
220
|
+
int j = input[i++] - '0';
|
221
|
+
if (j < 0 || j > (int)(sizeof(g->V) / sizeof(g->V[0])))
|
222
|
+
goto invalid_escape2;
|
223
|
+
if (ch == 'V')
|
224
|
+
write_varref(g, g->V[j]);
|
225
|
+
else
|
226
|
+
write_varname(g, g->V[j]);
|
227
|
+
continue;
|
228
|
+
}
|
229
|
+
case 'L': {
|
230
|
+
int j = input[i++] - '0';
|
231
|
+
if (j < 0 || j > (int)(sizeof(g->L) / sizeof(g->L[0])))
|
232
|
+
goto invalid_escape2;
|
233
|
+
write_literal_string(g, g->L[j]);
|
234
|
+
continue;
|
235
|
+
}
|
230
236
|
case '+': g->margin++; continue;
|
231
237
|
case '-': g->margin--; continue;
|
232
238
|
case 'n': write_string(g, g->options->name); continue;
|
239
|
+
default:
|
240
|
+
printf("Invalid escape sequence ~%c in writef(g, \"%s\", p)\n",
|
241
|
+
ch, input);
|
242
|
+
exit(1);
|
243
|
+
invalid_escape2:
|
244
|
+
printf("Invalid escape sequence ~%c%c in writef(g, \"%s\", p)\n",
|
245
|
+
ch, input[i - 1], input);
|
246
|
+
exit(1);
|
233
247
|
}
|
234
248
|
}
|
235
249
|
}
|
236
250
|
|
237
251
|
static void w(struct generator * g, const char * s) {
|
238
|
-
writef(g, s,
|
252
|
+
writef(g, s, NULL);
|
239
253
|
}
|
240
254
|
|
241
255
|
static void generate_AE(struct generator * g, struct node * p) {
|
@@ -284,7 +298,6 @@ static void generate_AE(struct generator * g, struct node * p) {
|
|
284
298
|
}
|
285
299
|
|
286
300
|
static void generate_bra(struct generator * g, struct node * p) {
|
287
|
-
|
288
301
|
write_comment(g, p);
|
289
302
|
p = p->left;
|
290
303
|
while (p) {
|
@@ -294,28 +307,33 @@ static void generate_bra(struct generator * g, struct node * p) {
|
|
294
307
|
}
|
295
308
|
|
296
309
|
static void generate_and(struct generator * g, struct node * p) {
|
297
|
-
|
298
|
-
|
299
|
-
|
310
|
+
struct str * savevar = NULL;
|
311
|
+
if (K_needed(g, p->left)) {
|
312
|
+
savevar = vars_newname(g);
|
313
|
+
}
|
300
314
|
|
301
315
|
write_comment(g, p);
|
302
316
|
|
303
|
-
if (
|
317
|
+
if (savevar) write_savecursor(g, p, savevar);
|
304
318
|
|
305
319
|
p = p->left;
|
306
320
|
while (p) {
|
307
321
|
generate(g, p);
|
308
322
|
if (g->unreachable) break;
|
309
|
-
if (
|
323
|
+
if (savevar && p->right != NULL) write_restorecursor(g, p, savevar);
|
310
324
|
p = p->right;
|
311
325
|
}
|
312
|
-
|
326
|
+
|
327
|
+
if (savevar) {
|
328
|
+
str_delete(savevar);
|
329
|
+
}
|
313
330
|
}
|
314
331
|
|
315
332
|
static void generate_or(struct generator * g, struct node * p) {
|
316
|
-
|
317
|
-
|
318
|
-
|
333
|
+
struct str * savevar = NULL;
|
334
|
+
if (K_needed(g, p->left)) {
|
335
|
+
savevar = vars_newname(g);
|
336
|
+
}
|
319
337
|
|
320
338
|
int a0 = g->failure_label;
|
321
339
|
struct str * a1 = str_copy(g->failure_str);
|
@@ -326,18 +344,18 @@ static void generate_or(struct generator * g, struct node * p) {
|
|
326
344
|
write_comment(g, p);
|
327
345
|
wsetlab_begin(g, out_lab);
|
328
346
|
|
329
|
-
if (
|
347
|
+
if (savevar) write_savecursor(g, p, savevar);
|
330
348
|
|
331
349
|
p = p->left;
|
332
350
|
str_clear(g->failure_str);
|
333
351
|
|
334
|
-
if (p ==
|
335
|
-
/* p should never be
|
352
|
+
if (p == NULL) {
|
353
|
+
/* p should never be NULL after an or: there should be at least two
|
336
354
|
* sub nodes. */
|
337
355
|
fprintf(stderr, "Error: \"or\" node without children nodes.");
|
338
356
|
exit(1);
|
339
357
|
}
|
340
|
-
while (p->right !=
|
358
|
+
while (p->right != NULL) {
|
341
359
|
int label = new_label(g);
|
342
360
|
g->failure_label = label;
|
343
361
|
wsetlab_begin(g, label);
|
@@ -348,7 +366,7 @@ static void generate_or(struct generator * g, struct node * p) {
|
|
348
366
|
}
|
349
367
|
w(g, "~-~M}~N");
|
350
368
|
g->unreachable = false;
|
351
|
-
if (
|
369
|
+
if (savevar) write_restorecursor(g, p, savevar);
|
352
370
|
p = p->right;
|
353
371
|
}
|
354
372
|
|
@@ -361,11 +379,13 @@ static void generate_or(struct generator * g, struct node * p) {
|
|
361
379
|
if (!end_unreachable) {
|
362
380
|
g->unreachable = false;
|
363
381
|
}
|
364
|
-
|
382
|
+
|
383
|
+
if (savevar) {
|
384
|
+
str_delete(savevar);
|
385
|
+
}
|
365
386
|
}
|
366
387
|
|
367
388
|
static void generate_backwards(struct generator * g, struct node * p) {
|
368
|
-
|
369
389
|
write_comment(g, p);
|
370
390
|
writef(g,"~Menv.limit_backward = env.cursor;~N"
|
371
391
|
"~Menv.cursor = env.limit;~N", p);
|
@@ -375,9 +395,10 @@ static void generate_backwards(struct generator * g, struct node * p) {
|
|
375
395
|
|
376
396
|
|
377
397
|
static void generate_not(struct generator * g, struct node * p) {
|
378
|
-
|
379
|
-
|
380
|
-
|
398
|
+
struct str * savevar = NULL;
|
399
|
+
if (K_needed(g, p->left)) {
|
400
|
+
savevar = vars_newname(g);
|
401
|
+
}
|
381
402
|
|
382
403
|
int a0 = g->failure_label;
|
383
404
|
struct str * a1 = str_copy(g->failure_str);
|
@@ -385,7 +406,7 @@ static void generate_not(struct generator * g, struct node * p) {
|
|
385
406
|
g->failure_label = label;
|
386
407
|
|
387
408
|
write_comment(g, p);
|
388
|
-
if (
|
409
|
+
if (savevar) {
|
389
410
|
write_savecursor(g, p, savevar);
|
390
411
|
}
|
391
412
|
|
@@ -404,48 +425,51 @@ static void generate_not(struct generator * g, struct node * p) {
|
|
404
425
|
|
405
426
|
g->unreachable = false;
|
406
427
|
|
407
|
-
if (
|
408
|
-
|
428
|
+
if (savevar) {
|
429
|
+
write_restorecursor(g, p, savevar);
|
430
|
+
str_delete(savevar);
|
431
|
+
}
|
409
432
|
}
|
410
433
|
|
411
434
|
|
412
435
|
static void generate_try(struct generator * g, struct node * p) {
|
436
|
+
struct str * savevar = NULL;
|
437
|
+
if (K_needed(g, p->left)) {
|
438
|
+
savevar = vars_newname(g);
|
439
|
+
}
|
413
440
|
|
414
|
-
struct str * savevar = vars_newname(g);
|
415
|
-
int keep_c = K_needed(g, p->left);
|
416
441
|
int label = new_label(g);
|
417
442
|
g->failure_label = label;
|
418
443
|
str_clear(g->failure_str);
|
419
444
|
|
420
445
|
write_comment(g, p);
|
421
|
-
if (
|
446
|
+
if (savevar) {
|
422
447
|
write_savecursor(g, p, savevar);
|
423
|
-
|
448
|
+
append_restore_string(p, g->failure_str, savevar);
|
424
449
|
}
|
425
450
|
wsetlab_begin(g, label);
|
426
451
|
generate(g, p->left);
|
427
452
|
wsetlab_end(g, label);
|
428
453
|
g->unreachable = false;
|
429
454
|
|
430
|
-
|
455
|
+
if (savevar) {
|
456
|
+
str_delete(savevar);
|
457
|
+
}
|
431
458
|
}
|
432
459
|
|
433
460
|
static void generate_set(struct generator * g, struct node * p) {
|
434
|
-
|
435
461
|
write_comment(g, p);
|
436
462
|
g->V[0] = p->name;
|
437
463
|
writef(g, "~M~V0 = true;~N", p);
|
438
464
|
}
|
439
465
|
|
440
466
|
static void generate_unset(struct generator * g, struct node * p) {
|
441
|
-
|
442
467
|
write_comment(g, p);
|
443
468
|
g->V[0] = p->name;
|
444
469
|
writef(g, "~M~V0 = false;~N", p);
|
445
470
|
}
|
446
471
|
|
447
472
|
static void generate_fail(struct generator * g, struct node * p) {
|
448
|
-
|
449
473
|
write_comment(g, p);
|
450
474
|
generate(g, p->left);
|
451
475
|
if (!g->unreachable) write_failure(g);
|
@@ -454,32 +478,35 @@ static void generate_fail(struct generator * g, struct node * p) {
|
|
454
478
|
/* generate_test() also implements 'reverse' */
|
455
479
|
|
456
480
|
static void generate_test(struct generator * g, struct node * p) {
|
457
|
-
|
458
|
-
|
459
|
-
|
481
|
+
struct str * savevar = NULL;
|
482
|
+
if (K_needed(g, p->left)) {
|
483
|
+
savevar = vars_newname(g);
|
484
|
+
}
|
460
485
|
|
461
486
|
write_comment(g, p);
|
462
487
|
|
463
|
-
if (
|
488
|
+
if (savevar) {
|
464
489
|
write_savecursor(g, p, savevar);
|
465
490
|
}
|
466
491
|
|
467
492
|
generate(g, p->left);
|
468
493
|
|
469
|
-
if (
|
470
|
-
if (
|
494
|
+
if (savevar) {
|
495
|
+
if (!g->unreachable) {
|
471
496
|
write_restorecursor(g, p, savevar);
|
472
497
|
}
|
498
|
+
str_delete(savevar);
|
473
499
|
}
|
474
|
-
str_delete(savevar);
|
475
500
|
}
|
476
501
|
|
477
502
|
static void generate_do(struct generator * g, struct node * p) {
|
503
|
+
struct str * savevar = NULL;
|
504
|
+
if (K_needed(g, p->left)) {
|
505
|
+
savevar = vars_newname(g);
|
506
|
+
}
|
478
507
|
|
479
|
-
struct str * savevar = vars_newname(g);
|
480
|
-
int keep_c = K_needed(g, p->left);
|
481
508
|
write_comment(g, p);
|
482
|
-
if (
|
509
|
+
if (savevar) write_savecursor(g, p, savevar);
|
483
510
|
|
484
511
|
if (p->left->type == c_call) {
|
485
512
|
/* Optimise do <call> */
|
@@ -497,24 +524,50 @@ static void generate_do(struct generator * g, struct node * p) {
|
|
497
524
|
g->unreachable = false;
|
498
525
|
}
|
499
526
|
|
500
|
-
if (
|
501
|
-
|
527
|
+
if (savevar) {
|
528
|
+
write_restorecursor(g, p, savevar);
|
529
|
+
str_delete(savevar);
|
530
|
+
}
|
502
531
|
}
|
503
532
|
|
504
|
-
static void
|
533
|
+
static void generate_next(struct generator * g, struct node * p) {
|
534
|
+
write_comment(g, p);
|
535
|
+
write_check_limit(g, p);
|
536
|
+
write_inc_cursor(g, p);
|
537
|
+
}
|
505
538
|
|
506
|
-
|
507
|
-
|
508
|
-
|
539
|
+
static void generate_GO_grouping(struct generator * g, struct node * p, int is_goto, int complement) {
|
540
|
+
write_comment(g, p);
|
541
|
+
|
542
|
+
struct grouping * q = p->name->grouping;
|
543
|
+
g->S[0] = p->mode == m_forward ? "" : "_b";
|
544
|
+
g->S[1] = complement ? "in" : "out";
|
545
|
+
g->V[0] = p->name;
|
546
|
+
g->I[0] = q->smallest_ch;
|
547
|
+
g->I[1] = q->largest_ch;
|
548
|
+
write_failure_if(g, "!env.go_~S1_grouping~S0(~W0, ~I0, ~I1)", p);
|
549
|
+
if (!is_goto) {
|
550
|
+
write_string(g, p->mode == m_forward ? "env.next_char();" : "env.previous_char();");
|
551
|
+
}
|
552
|
+
}
|
553
|
+
|
554
|
+
static void generate_GO(struct generator * g, struct node * p, int style) {
|
555
|
+
write_comment(g, p);
|
509
556
|
|
510
557
|
int a0 = g->failure_label;
|
511
558
|
struct str * a1 = str_copy(g->failure_str);
|
512
559
|
|
560
|
+
int end_unreachable = false;
|
561
|
+
|
513
562
|
int golab = new_label(g);
|
514
563
|
g->I[0] = golab;
|
515
|
-
write_comment(g, p);
|
516
564
|
w(g, "~M'golab~I0: loop {~N~+");
|
517
|
-
|
565
|
+
|
566
|
+
struct str * savevar = NULL;
|
567
|
+
if (style == 1 || repeat_restore(g, p->left)) {
|
568
|
+
savevar = vars_newname(g);
|
569
|
+
write_savecursor(g, p, savevar);
|
570
|
+
}
|
518
571
|
|
519
572
|
g->failure_label = new_label(g);
|
520
573
|
str_clear(g->failure_str);
|
@@ -533,7 +586,10 @@ static void generate_GO(struct generator * g, struct node * p, int style) {
|
|
533
586
|
}
|
534
587
|
g->unreachable = false;
|
535
588
|
w(g, "~-~M}~N");
|
536
|
-
if (
|
589
|
+
if (savevar) {
|
590
|
+
write_restorecursor(g, p, savevar);
|
591
|
+
str_delete(savevar);
|
592
|
+
}
|
537
593
|
|
538
594
|
g->failure_label = a0;
|
539
595
|
str_delete(g->failure_str);
|
@@ -542,13 +598,10 @@ static void generate_GO(struct generator * g, struct node * p, int style) {
|
|
542
598
|
write_check_limit(g, p);
|
543
599
|
write_inc_cursor(g, p);
|
544
600
|
write_block_end(g);
|
545
|
-
|
546
|
-
str_delete(savevar);
|
547
601
|
g->unreachable = end_unreachable;
|
548
602
|
}
|
549
603
|
|
550
604
|
static void generate_loop(struct generator * g, struct node * p) {
|
551
|
-
|
552
605
|
struct str * loopvar = vars_newname(g);
|
553
606
|
write_comment(g, p);
|
554
607
|
w(g, "~Mfor _ in 0..");
|
@@ -563,14 +616,15 @@ static void generate_loop(struct generator * g, struct node * p) {
|
|
563
616
|
}
|
564
617
|
|
565
618
|
static void generate_repeat_or_atleast(struct generator * g, struct node * p, struct str * loopvar) {
|
566
|
-
|
567
|
-
struct str * savevar = vars_newname(g);
|
568
|
-
int keep_c = repeat_restore(g, p->left);
|
569
619
|
int replab = new_label(g);
|
570
620
|
g->I[0] = replab;
|
571
621
|
writef(g, "~M'replab~I0: loop{~N~+", p);
|
572
622
|
|
573
|
-
|
623
|
+
struct str * savevar = NULL;
|
624
|
+
if (repeat_restore(g, p->left)) {
|
625
|
+
savevar = vars_newname(g);
|
626
|
+
write_savecursor(g, p, savevar);
|
627
|
+
}
|
574
628
|
|
575
629
|
g->failure_label = new_label(g);
|
576
630
|
str_clear(g->failure_str);
|
@@ -579,7 +633,7 @@ static void generate_repeat_or_atleast(struct generator * g, struct node * p, st
|
|
579
633
|
generate(g, p->left);
|
580
634
|
|
581
635
|
if (!g->unreachable) {
|
582
|
-
if (loopvar !=
|
636
|
+
if (loopvar != NULL) {
|
583
637
|
g->B[0] = str_data(loopvar);
|
584
638
|
w(g, "~M~B0 -= 1;~N");
|
585
639
|
}
|
@@ -590,11 +644,13 @@ static void generate_repeat_or_atleast(struct generator * g, struct node * p, st
|
|
590
644
|
w(g, "~-~M}~N");
|
591
645
|
g->unreachable = false;
|
592
646
|
|
593
|
-
if (
|
647
|
+
if (savevar) {
|
648
|
+
write_restorecursor(g, p, savevar);
|
649
|
+
str_delete(savevar);
|
650
|
+
}
|
594
651
|
|
595
652
|
g->I[0] = replab;
|
596
653
|
w(g, "~Mbreak 'replab~I0;~N~-~M}~N");
|
597
|
-
str_delete(savevar);
|
598
654
|
}
|
599
655
|
|
600
656
|
static void generate_repeat(struct generator * g, struct node * p) {
|
@@ -603,8 +659,8 @@ static void generate_repeat(struct generator * g, struct node * p) {
|
|
603
659
|
}
|
604
660
|
|
605
661
|
static void generate_atleast(struct generator * g, struct node * p) {
|
606
|
-
|
607
662
|
struct str * loopvar = vars_newname(g);
|
663
|
+
|
608
664
|
write_comment(g, p);
|
609
665
|
g->B[0] = str_data(loopvar);
|
610
666
|
w(g, "~Mlet mut ~B0 = ");
|
@@ -626,14 +682,12 @@ static void generate_atleast(struct generator * g, struct node * p) {
|
|
626
682
|
}
|
627
683
|
|
628
684
|
static void generate_setmark(struct generator * g, struct node * p) {
|
629
|
-
|
630
685
|
write_comment(g, p);
|
631
686
|
g->V[0] = p->name;
|
632
687
|
writef(g, "~M~V0 = env.cursor;~N", p);
|
633
688
|
}
|
634
689
|
|
635
690
|
static void generate_tomark(struct generator * g, struct node * p) {
|
636
|
-
|
637
691
|
write_comment(g, p);
|
638
692
|
g->S[0] = p->mode == m_forward ? ">" : "<";
|
639
693
|
|
@@ -647,10 +701,8 @@ static void generate_tomark(struct generator * g, struct node * p) {
|
|
647
701
|
}
|
648
702
|
|
649
703
|
static void generate_atmark(struct generator * g, struct node * p) {
|
650
|
-
|
651
704
|
write_comment(g, p);
|
652
|
-
w(g, "~Mif env.cursor != "); generate_AE(g, p->AE);
|
653
|
-
writef(g, " ", p);
|
705
|
+
w(g, "~Mif env.cursor != "); generate_AE(g, p->AE); writef(g, " ", p);
|
654
706
|
write_block_start(g);
|
655
707
|
write_failure(g);
|
656
708
|
write_block_end(g);
|
@@ -674,30 +726,19 @@ static void generate_hop(struct generator * g, struct node * p) {
|
|
674
726
|
}
|
675
727
|
|
676
728
|
static void generate_delete(struct generator * g, struct node * p) {
|
677
|
-
|
678
729
|
write_comment(g, p);
|
679
730
|
writef(g, "~Mif !env.slice_del() {~N"
|
680
731
|
"~+~Mreturn false;~N~-"
|
681
732
|
"~M}~N", p);
|
682
733
|
}
|
683
734
|
|
684
|
-
|
685
|
-
static void generate_next(struct generator * g, struct node * p) {
|
686
|
-
|
687
|
-
write_comment(g, p);
|
688
|
-
write_check_limit(g, p);
|
689
|
-
write_inc_cursor(g, p);
|
690
|
-
}
|
691
|
-
|
692
735
|
static void generate_tolimit(struct generator * g, struct node * p) {
|
693
|
-
|
694
736
|
write_comment(g, p);
|
695
737
|
g->S[0] = p->mode == m_forward ? "env.limit" : "env.limit_backward";
|
696
738
|
writef(g, "~Menv.cursor = ~S0;~N", p);
|
697
739
|
}
|
698
740
|
|
699
741
|
static void generate_atlimit(struct generator * g, struct node * p) {
|
700
|
-
|
701
742
|
write_comment(g, p);
|
702
743
|
g->S[0] = p->mode == m_forward ? "env.limit" : "env.limit_backward";
|
703
744
|
g->S[1] = p->mode == m_forward ? "<" : ">";
|
@@ -705,28 +746,24 @@ static void generate_atlimit(struct generator * g, struct node * p) {
|
|
705
746
|
}
|
706
747
|
|
707
748
|
static void generate_leftslice(struct generator * g, struct node * p) {
|
708
|
-
|
709
749
|
write_comment(g, p);
|
710
750
|
g->S[0] = p->mode == m_forward ? "env.bra" : "env.ket";
|
711
751
|
writef(g, "~M~S0 = env.cursor;~N", p);
|
712
752
|
}
|
713
753
|
|
714
754
|
static void generate_rightslice(struct generator * g, struct node * p) {
|
715
|
-
|
716
755
|
write_comment(g, p);
|
717
756
|
g->S[0] = p->mode == m_forward ? "env.ket" : "env.bra";
|
718
757
|
writef(g, "~M~S0 = env.cursor;~N", p);
|
719
758
|
}
|
720
759
|
|
721
760
|
static void generate_assignto(struct generator * g, struct node * p) {
|
722
|
-
|
723
761
|
write_comment(g, p);
|
724
762
|
g->V[0] = p->name;
|
725
763
|
writef(g, "~M~V0 = env.assign_to();~N", p);
|
726
764
|
}
|
727
765
|
|
728
766
|
static void generate_sliceto(struct generator * g, struct node * p) {
|
729
|
-
|
730
767
|
write_comment(g, p);
|
731
768
|
g->V[0] = p->name;
|
732
769
|
writef(g, "~M~V0 = env.slice_to();~N"
|
@@ -735,13 +772,12 @@ static void generate_sliceto(struct generator * g, struct node * p) {
|
|
735
772
|
}
|
736
773
|
|
737
774
|
static void generate_address(struct generator * g, struct node * p) {
|
738
|
-
|
739
775
|
/* If we deal with a string variable which is of type String we need to
|
740
776
|
* pass it by reference not by value. Literalstrings on the other hand are
|
741
777
|
* of type &'static str so we can pass them by value.
|
742
778
|
*/
|
743
779
|
symbol * b = p->literalstring;
|
744
|
-
if (b !=
|
780
|
+
if (b != NULL) {
|
745
781
|
write_literal_string(g, b);
|
746
782
|
} else {
|
747
783
|
write_char(g, '&');
|
@@ -750,7 +786,6 @@ static void generate_address(struct generator * g, struct node * p) {
|
|
750
786
|
}
|
751
787
|
|
752
788
|
static void generate_insert(struct generator * g, struct node * p, int style) {
|
753
|
-
|
754
789
|
int keep_c = style == c_attach;
|
755
790
|
write_comment(g, p);
|
756
791
|
if (p->mode == m_backward) keep_c = !keep_c;
|
@@ -763,7 +798,6 @@ static void generate_insert(struct generator * g, struct node * p, int style) {
|
|
763
798
|
}
|
764
799
|
|
765
800
|
static void generate_assignfrom(struct generator * g, struct node * p) {
|
766
|
-
|
767
801
|
int keep_c = p->mode == m_forward; /* like 'attach' */
|
768
802
|
|
769
803
|
write_comment(g, p);
|
@@ -782,9 +816,7 @@ static void generate_assignfrom(struct generator * g, struct node * p) {
|
|
782
816
|
if (keep_c) w(g, "~Menv.cursor = c;~N");
|
783
817
|
}
|
784
818
|
|
785
|
-
|
786
819
|
static void generate_slicefrom(struct generator * g, struct node * p) {
|
787
|
-
|
788
820
|
write_comment(g, p);
|
789
821
|
w(g, "~Mif !env.slice_from(");
|
790
822
|
generate_address(g, p);
|
@@ -793,7 +825,6 @@ static void generate_slicefrom(struct generator * g, struct node * p) {
|
|
793
825
|
}
|
794
826
|
|
795
827
|
static void generate_setlimit(struct generator * g, struct node * p) {
|
796
|
-
struct str * savevar = vars_newname(g);
|
797
828
|
struct str * varname = vars_newname(g);
|
798
829
|
write_comment(g, p);
|
799
830
|
if (p->left && p->left->type == c_tomark) {
|
@@ -806,6 +837,7 @@ static void generate_setlimit(struct generator * g, struct node * p) {
|
|
806
837
|
* restore c.
|
807
838
|
*/
|
808
839
|
struct node * q = p->left;
|
840
|
+
write_comment(g, q);
|
809
841
|
g->S[0] = q->mode == m_forward ? ">" : "<";
|
810
842
|
w(g, "~Mif env.cursor ~S0 "); generate_AE(g, q->AE); w(g, " ");
|
811
843
|
write_block_start(g);
|
@@ -821,7 +853,8 @@ static void generate_setlimit(struct generator * g, struct node * p) {
|
|
821
853
|
w(g, "~Mlet ~B0 = env.limit_backward;~N");
|
822
854
|
w(g, "~Menv.limit_backward = ");
|
823
855
|
}
|
824
|
-
generate_AE(g, q->AE);
|
856
|
+
generate_AE(g, q->AE);
|
857
|
+
writef(g, ";~N", q);
|
825
858
|
|
826
859
|
if (p->mode == m_forward) {
|
827
860
|
str_assign(g->failure_str, "env.limit += ");
|
@@ -833,7 +866,9 @@ static void generate_setlimit(struct generator * g, struct node * p) {
|
|
833
866
|
str_append_string(g->failure_str, ";");
|
834
867
|
}
|
835
868
|
} else {
|
869
|
+
struct str * savevar = vars_newname(g);
|
836
870
|
write_savecursor(g, p, savevar);
|
871
|
+
|
837
872
|
generate(g, p->left);
|
838
873
|
|
839
874
|
if (!g->unreachable) {
|
@@ -857,6 +892,7 @@ static void generate_setlimit(struct generator * g, struct node * p) {
|
|
857
892
|
str_append_string(g->failure_str, ";");
|
858
893
|
}
|
859
894
|
}
|
895
|
+
str_delete(savevar);
|
860
896
|
}
|
861
897
|
|
862
898
|
if (!g->unreachable) {
|
@@ -869,17 +905,16 @@ static void generate_setlimit(struct generator * g, struct node * p) {
|
|
869
905
|
}
|
870
906
|
}
|
871
907
|
str_delete(varname);
|
872
|
-
str_delete(savevar);
|
873
908
|
}
|
874
909
|
|
875
910
|
/* dollar sets snowball up to operate on a string variable as if it were the
|
876
911
|
* current string */
|
877
912
|
static void generate_dollar(struct generator * g, struct node * p) {
|
878
|
-
|
879
|
-
struct str * savevar_env = vars_newname(g);
|
880
913
|
write_comment(g, p);
|
914
|
+
|
915
|
+
struct str * savevar = vars_newname(g);
|
881
916
|
g->V[0] = p->name;
|
882
|
-
g->B[0] = str_data(
|
917
|
+
g->B[0] = str_data(savevar);
|
883
918
|
writef(g, "~Mlet ~B0 = env.clone();~N"
|
884
919
|
"~Menv.set_current_s(~V0.clone());~N"
|
885
920
|
"~Menv.cursor = 0;~N"
|
@@ -887,45 +922,72 @@ static void generate_dollar(struct generator * g, struct node * p) {
|
|
887
922
|
generate(g, p->left);
|
888
923
|
if (!g->unreachable) {
|
889
924
|
g->V[0] = p->name;
|
890
|
-
g->B[0] = str_data(
|
925
|
+
g->B[0] = str_data(savevar);
|
891
926
|
/* Update string variable. */
|
892
927
|
w(g, "~M~V0 = env.current.clone().into_owned();~N");
|
893
928
|
/* Reset env */
|
894
929
|
w(g, "~M*env = ~B0;~N");
|
895
930
|
}
|
896
|
-
str_delete(
|
931
|
+
str_delete(savevar);
|
897
932
|
}
|
898
933
|
|
899
|
-
static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
|
900
|
-
|
934
|
+
static void generate_integer_assign(struct generator * g, struct node * p, const char * s) {
|
935
|
+
write_comment(g, p);
|
901
936
|
g->V[0] = p->name;
|
902
937
|
g->S[0] = s;
|
903
938
|
w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); w(g, ";~N");
|
904
939
|
}
|
905
940
|
|
906
|
-
static void generate_integer_test(struct generator * g, struct node * p
|
907
|
-
|
908
|
-
|
941
|
+
static void generate_integer_test(struct generator * g, struct node * p) {
|
942
|
+
write_comment(g, p);
|
943
|
+
int relop = p->type;
|
944
|
+
int optimise_to_return = (g->failure_label == x_return && p->right && p->right->type == c_functionend);
|
945
|
+
if (optimise_to_return) {
|
946
|
+
w(g, "~Mreturn ");
|
947
|
+
p->right = NULL;
|
948
|
+
} else {
|
949
|
+
w(g, "~Mif ");
|
950
|
+
// We want the inverse of the snowball test here.
|
951
|
+
relop ^= 1;
|
952
|
+
}
|
909
953
|
generate_AE(g, p->left);
|
910
|
-
|
911
|
-
|
912
|
-
write_char(g, ' ');
|
954
|
+
// Relational operators are the same as C.
|
955
|
+
write_c_relop(g, relop);
|
913
956
|
generate_AE(g, p->AE);
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
957
|
+
if (optimise_to_return) {
|
958
|
+
w(g, "~N");
|
959
|
+
} else {
|
960
|
+
write_block_start(g);
|
961
|
+
write_failure(g);
|
962
|
+
write_block_end(g);
|
963
|
+
g->unreachable = false;
|
964
|
+
}
|
919
965
|
}
|
920
966
|
|
921
967
|
static void generate_call(struct generator * g, struct node * p) {
|
922
|
-
|
968
|
+
int signals = check_possible_signals_list(g, p->name->definition, c_define, 0);
|
923
969
|
write_comment(g, p);
|
924
970
|
g->V[0] = p->name;
|
925
|
-
|
971
|
+
if (g->failure_label == x_return &&
|
972
|
+
(signals == 0 || (p->right && p->right->type == c_functionend))) {
|
973
|
+
/* Always fails or tail call. */
|
974
|
+
writef(g, "~Mreturn ~W0(env, context);~N", p);
|
975
|
+
return;
|
976
|
+
}
|
977
|
+
if (signals == 1) {
|
978
|
+
/* Always succeeds. */
|
979
|
+
writef(g, "~M~W0(env, context);~N", p);
|
980
|
+
} else if (signals == 0) {
|
981
|
+
/* Always fails. */
|
982
|
+
writef(g, "~M~W0(env, context);~N", p);
|
983
|
+
write_failure(g);
|
984
|
+
} else {
|
985
|
+
write_failure_if(g, "!~W0(env, context)", p);
|
986
|
+
}
|
926
987
|
}
|
927
988
|
|
928
989
|
static void generate_grouping(struct generator * g, struct node * p, int complement) {
|
990
|
+
write_comment(g, p);
|
929
991
|
|
930
992
|
struct grouping * q = p->name->grouping;
|
931
993
|
g->S[0] = p->mode == m_forward ? "" : "_b";
|
@@ -937,7 +999,6 @@ static void generate_grouping(struct generator * g, struct node * p, int complem
|
|
937
999
|
}
|
938
1000
|
|
939
1001
|
static void generate_namedstring(struct generator * g, struct node * p) {
|
940
|
-
|
941
1002
|
write_comment(g, p);
|
942
1003
|
g->S[0] = p->mode == m_forward ? "" : "_b";
|
943
1004
|
g->V[0] = p->name;
|
@@ -953,10 +1014,8 @@ static void generate_literalstring(struct generator * g, struct node * p) {
|
|
953
1014
|
}
|
954
1015
|
|
955
1016
|
static void generate_setup_context(struct generator * g) {
|
956
|
-
|
957
|
-
struct name * q;
|
958
1017
|
w(g, "~Mlet mut context = &mut Context {~+~N");
|
959
|
-
for (q = g->analyser->names; q; q = q->next) {
|
1018
|
+
for (struct name * q = g->analyser->names; q; q = q->next) {
|
960
1019
|
g->V[0] = q;
|
961
1020
|
switch (q->type) {
|
962
1021
|
case t_string:
|
@@ -975,18 +1034,22 @@ static void generate_setup_context(struct generator * g) {
|
|
975
1034
|
|
976
1035
|
static void generate_define(struct generator * g, struct node * p) {
|
977
1036
|
struct name * q = p->name;
|
1037
|
+
if (q->type == t_routine && !q->used) return;
|
978
1038
|
|
979
|
-
|
1039
|
+
write_newline(g);
|
1040
|
+
write_comment(g, p);
|
980
1041
|
|
981
1042
|
g->V[0] = q;
|
982
|
-
|
983
1043
|
if (q->type == t_routine) {
|
984
|
-
w(g, "~
|
1044
|
+
w(g, "~Mfn ~W0(env: &mut SnowballEnv, context: &mut Context) -> bool {~+~N");
|
985
1045
|
} else {
|
986
|
-
w(g, "~
|
1046
|
+
w(g, "~Mpub fn ~W0(env: &mut SnowballEnv) -> bool {~+~N");
|
987
1047
|
generate_setup_context(g);
|
988
1048
|
}
|
989
1049
|
if (p->amongvar_needed) w(g, "~Mlet mut among_var;~N");
|
1050
|
+
|
1051
|
+
/* Save output. */
|
1052
|
+
struct str * saved_output = g->outbuf;
|
990
1053
|
g->outbuf = str_new();
|
991
1054
|
|
992
1055
|
g->next_label = 0;
|
@@ -995,8 +1058,16 @@ static void generate_define(struct generator * g, struct node * p) {
|
|
995
1058
|
str_clear(g->failure_str);
|
996
1059
|
g->failure_label = x_return;
|
997
1060
|
g->unreachable = false;
|
1061
|
+
int signals = check_possible_signals_list(g, p->left, c_define, 0);
|
1062
|
+
|
1063
|
+
/* Generate function body. */
|
998
1064
|
generate(g, p->left);
|
999
|
-
if (
|
1065
|
+
if (p->left->right) {
|
1066
|
+
assert(p->left->right->type == c_functionend);
|
1067
|
+
if (signals) {
|
1068
|
+
generate(g, p->left->right);
|
1069
|
+
}
|
1070
|
+
}
|
1000
1071
|
w(g, "~-~M}~N");
|
1001
1072
|
|
1002
1073
|
str_append(saved_output, g->outbuf);
|
@@ -1004,64 +1075,175 @@ static void generate_define(struct generator * g, struct node * p) {
|
|
1004
1075
|
g->outbuf = saved_output;
|
1005
1076
|
}
|
1006
1077
|
|
1078
|
+
static void generate_functionend(struct generator * g, struct node * p) {
|
1079
|
+
(void)p;
|
1080
|
+
w(g, "~Mreturn true~N");
|
1081
|
+
}
|
1082
|
+
|
1007
1083
|
static void generate_substring(struct generator * g, struct node * p) {
|
1084
|
+
write_comment(g, p);
|
1008
1085
|
|
1009
1086
|
struct among * x = p->among;
|
1010
|
-
|
1011
|
-
|
1087
|
+
int block = -1;
|
1088
|
+
unsigned int bitmap = 0;
|
1089
|
+
struct amongvec * among_cases = x->b;
|
1090
|
+
int empty_case = -1;
|
1091
|
+
int n_cases = 0;
|
1092
|
+
symbol cases[2];
|
1093
|
+
int shortest_size = x->shortest_size;
|
1094
|
+
int block_opened = 0;
|
1012
1095
|
|
1013
1096
|
g->S[0] = p->mode == m_forward ? "" : "_b";
|
1014
1097
|
g->I[0] = x->number;
|
1098
|
+
g->I[1] = x->literalstring_count;
|
1099
|
+
|
1100
|
+
/* In forward mode with non-ASCII UTF-8 characters, the first byte
|
1101
|
+
* of the string will often be the same, so instead look at the last
|
1102
|
+
* common byte position.
|
1103
|
+
*
|
1104
|
+
* In backward mode, we can't match if there are fewer characters before
|
1105
|
+
* the current position than the minimum length.
|
1106
|
+
*/
|
1107
|
+
for (int c = 0; c < x->literalstring_count; ++c) {
|
1108
|
+
symbol ch;
|
1109
|
+
if (among_cases[c].size == 0) {
|
1110
|
+
empty_case = c;
|
1111
|
+
continue;
|
1112
|
+
}
|
1113
|
+
if (p->mode == m_forward) {
|
1114
|
+
ch = among_cases[c].b[shortest_size - 1];
|
1115
|
+
} else {
|
1116
|
+
ch = among_cases[c].b[among_cases[c].size - 1];
|
1117
|
+
}
|
1118
|
+
if (n_cases == 0) {
|
1119
|
+
block = ch >> 5;
|
1120
|
+
} else if (ch >> 5 != block) {
|
1121
|
+
block = -1;
|
1122
|
+
if (n_cases > 2) break;
|
1123
|
+
}
|
1124
|
+
if (block == -1) {
|
1125
|
+
if (n_cases > 0 && ch == cases[0]) continue;
|
1126
|
+
if (n_cases < 2) {
|
1127
|
+
cases[n_cases++] = ch;
|
1128
|
+
} else if (ch != cases[1]) {
|
1129
|
+
++n_cases;
|
1130
|
+
break;
|
1131
|
+
}
|
1132
|
+
} else {
|
1133
|
+
if ((bitmap & (1u << (ch & 0x1f))) == 0) {
|
1134
|
+
bitmap |= 1u << (ch & 0x1f);
|
1135
|
+
if (n_cases < 2)
|
1136
|
+
cases[n_cases] = ch;
|
1137
|
+
++n_cases;
|
1138
|
+
}
|
1139
|
+
}
|
1140
|
+
}
|
1141
|
+
|
1142
|
+
if (block != -1 || n_cases <= 2) {
|
1143
|
+
char buf[64];
|
1144
|
+
g->I[2] = block;
|
1145
|
+
g->I[3] = bitmap;
|
1146
|
+
g->I[4] = shortest_size - 1;
|
1147
|
+
if (p->mode == m_forward) {
|
1148
|
+
sprintf(buf, "env.current.as_bytes()[(env.cursor + %d) as usize]", shortest_size - 1);
|
1149
|
+
g->S[1] = buf;
|
1150
|
+
if (shortest_size == 1) {
|
1151
|
+
writef(g, "~Mif (env.cursor >= env.limit", p);
|
1152
|
+
} else {
|
1153
|
+
writef(g, "~Mif (env.cursor + ~I4 >= env.limit", p);
|
1154
|
+
}
|
1155
|
+
} else {
|
1156
|
+
g->S[1] = "env.current.as_bytes()[(env.cursor - 1) as usize]";
|
1157
|
+
if (shortest_size == 1) {
|
1158
|
+
writef(g, "~Mif (env.cursor <= env.limit_backward", p);
|
1159
|
+
} else {
|
1160
|
+
writef(g, "~Mif (env.cursor - ~I4 <= env.limit_backward", p);
|
1161
|
+
}
|
1162
|
+
}
|
1163
|
+
if (n_cases == 0) {
|
1164
|
+
/* We get this for the degenerate case: among ( '' )
|
1165
|
+
* This doesn't seem to be a useful construct, but it is
|
1166
|
+
* syntactically valid.
|
1167
|
+
*/
|
1168
|
+
} else if (n_cases == 1) {
|
1169
|
+
g->I[4] = cases[0];
|
1170
|
+
writef(g, " || ~S1 as u8 != ~I4 as u8", p);
|
1171
|
+
} else if (n_cases == 2) {
|
1172
|
+
g->I[4] = cases[0];
|
1173
|
+
g->I[5] = cases[1];
|
1174
|
+
writef(g, " || (~S1 as u8 != ~I4 as u8 && ~S1 as u8 != ~I5 as u8)", p);
|
1175
|
+
} else {
|
1176
|
+
writef(g, " || ~S1 as u8 >> 5 != ~I2 as u8 || ((~I3 as i32 >> (~S1 as u8 & 0x1f)) & 1) == 0", p);
|
1177
|
+
}
|
1178
|
+
write_string(g, ") ");
|
1179
|
+
if (empty_case != -1) {
|
1180
|
+
/* If the among includes the empty string, it can never fail
|
1181
|
+
* so not matching the bitmap means we match the empty string.
|
1182
|
+
*/
|
1183
|
+
g->I[4] = among_cases[empty_case].result;
|
1184
|
+
writef(g, "{among_var = ~I4;}~N~Melse ", p);
|
1185
|
+
write_block_start(g);
|
1186
|
+
block_opened = 1;
|
1187
|
+
} else {
|
1188
|
+
writef(g, "~f~N", p);
|
1189
|
+
}
|
1190
|
+
} else {
|
1191
|
+
#ifdef OPTIMISATION_WARNINGS
|
1192
|
+
printf("Couldn't shortcut among %d\n", x->number);
|
1193
|
+
#endif
|
1194
|
+
}
|
1015
1195
|
|
1016
|
-
if (
|
1017
|
-
|
1196
|
+
if (x->amongvar_needed) {
|
1197
|
+
writef(g, "~Mamong_var = env.find_among~S0(A_~I0, context);~N", p);
|
1198
|
+
if (!x->always_matches) {
|
1199
|
+
write_failure_if(g, "among_var == 0", p);
|
1200
|
+
}
|
1201
|
+
} else if (x->always_matches) {
|
1202
|
+
writef(g, "~Menv.find_among~S0(A_~I0, context);~N", p);
|
1018
1203
|
} else {
|
1019
|
-
|
1020
|
-
write_failure_if(g, "among_var == 0", p);
|
1204
|
+
write_failure_if(g, "env.find_among~S0(A_~I0, context) == 0", p);
|
1021
1205
|
}
|
1206
|
+
if (block_opened) write_block_end(g);
|
1022
1207
|
}
|
1023
1208
|
|
1024
1209
|
static void generate_among(struct generator * g, struct node * p) {
|
1025
|
-
|
1026
1210
|
struct among * x = p->among;
|
1027
1211
|
|
1028
|
-
if (x->substring ==
|
1029
|
-
|
1030
|
-
|
1212
|
+
if (x->substring == NULL) {
|
1213
|
+
generate_substring(g, p);
|
1214
|
+
} else {
|
1215
|
+
write_comment(g, p);
|
1216
|
+
}
|
1031
1217
|
|
1032
1218
|
if (x->command_count == 1 && x->nocommand_count == 0) {
|
1033
1219
|
/* Only one outcome ("no match" already handled). */
|
1034
1220
|
generate(g, x->commands[0]);
|
1035
1221
|
} else if (x->command_count > 0) {
|
1036
|
-
|
1037
|
-
|
1038
|
-
for (i = 1; i <= x->command_count; i++) {
|
1222
|
+
w(g, "~Mmatch among_var {~N~+");
|
1223
|
+
for (int i = 1; i <= x->command_count; i++) {
|
1039
1224
|
g->I[0] = i;
|
1040
|
-
|
1041
|
-
w(g, "if among_var == ~I0 {~N~+");
|
1225
|
+
w(g, "~M~I0 => {~N~+");
|
1042
1226
|
generate(g, x->commands[i - 1]);
|
1043
|
-
w(g, "~-~M}");
|
1227
|
+
w(g, "~-~M}~N");
|
1044
1228
|
g->unreachable = false;
|
1045
1229
|
}
|
1046
|
-
w(g, "~N");
|
1230
|
+
w(g, "~M_ => ()~N");
|
1231
|
+
w(g, "~-~M}~N");
|
1047
1232
|
}
|
1048
1233
|
}
|
1049
1234
|
|
1050
1235
|
static void generate_booltest(struct generator * g, struct node * p) {
|
1051
|
-
|
1052
1236
|
write_comment(g, p);
|
1053
1237
|
g->V[0] = p->name;
|
1054
1238
|
write_failure_if(g, "!~V0", p);
|
1055
1239
|
}
|
1056
1240
|
|
1057
1241
|
static void generate_false(struct generator * g, struct node * p) {
|
1058
|
-
|
1059
1242
|
write_comment(g, p);
|
1060
1243
|
write_failure(g);
|
1061
1244
|
}
|
1062
1245
|
|
1063
1246
|
static void generate_debug(struct generator * g, struct node * p) {
|
1064
|
-
|
1065
1247
|
write_comment(g, p);
|
1066
1248
|
g->I[0] = g->debug_count++;
|
1067
1249
|
g->I[1] = p->line_number;
|
@@ -1069,14 +1251,10 @@ static void generate_debug(struct generator * g, struct node * p) {
|
|
1069
1251
|
}
|
1070
1252
|
|
1071
1253
|
static void generate(struct generator * g, struct node * p) {
|
1072
|
-
|
1073
|
-
int a0;
|
1074
|
-
struct str * a1;
|
1075
|
-
|
1076
1254
|
if (g->unreachable) return;
|
1077
1255
|
|
1078
|
-
a0 = g->failure_label;
|
1079
|
-
a1 = str_copy(g->failure_str);
|
1256
|
+
int a0 = g->failure_label;
|
1257
|
+
struct str * a1 = str_copy(g->failure_str);
|
1080
1258
|
|
1081
1259
|
switch (p->type) {
|
1082
1260
|
case c_define: generate_define(g, p); break;
|
@@ -1094,6 +1272,11 @@ static void generate(struct generator * g, struct node * p) {
|
|
1094
1272
|
case c_do: generate_do(g, p); break;
|
1095
1273
|
case c_goto: generate_GO(g, p, 1); break;
|
1096
1274
|
case c_gopast: generate_GO(g, p, 0); break;
|
1275
|
+
case c_goto_grouping: generate_GO_grouping(g, p, 1, 0); break;
|
1276
|
+
case c_gopast_grouping:
|
1277
|
+
generate_GO_grouping(g, p, 0, 0); break;
|
1278
|
+
case c_goto_non: generate_GO_grouping(g, p, 1, 1); break;
|
1279
|
+
case c_gopast_non: generate_GO_grouping(g, p, 0, 1); break;
|
1097
1280
|
case c_repeat: generate_repeat(g, p); break;
|
1098
1281
|
case c_loop: generate_loop(g, p); break;
|
1099
1282
|
case c_atleast: generate_atleast(g, p); break;
|
@@ -1120,12 +1303,14 @@ static void generate(struct generator * g, struct node * p) {
|
|
1120
1303
|
case c_minusassign: generate_integer_assign(g, p, "-="); break;
|
1121
1304
|
case c_multiplyassign:generate_integer_assign(g, p, "*="); break;
|
1122
1305
|
case c_divideassign: generate_integer_assign(g, p, "/="); break;
|
1123
|
-
case c_eq:
|
1124
|
-
case c_ne:
|
1125
|
-
case
|
1126
|
-
case c_ge:
|
1127
|
-
case
|
1128
|
-
case c_le:
|
1306
|
+
case c_eq:
|
1307
|
+
case c_ne:
|
1308
|
+
case c_gt:
|
1309
|
+
case c_ge:
|
1310
|
+
case c_lt:
|
1311
|
+
case c_le:
|
1312
|
+
generate_integer_test(g, p);
|
1313
|
+
break;
|
1129
1314
|
case c_call: generate_call(g, p); break;
|
1130
1315
|
case c_grouping: generate_grouping(g, p, false); break;
|
1131
1316
|
case c_non: generate_grouping(g, p, true); break;
|
@@ -1137,6 +1322,7 @@ static void generate(struct generator * g, struct node * p) {
|
|
1137
1322
|
case c_false: generate_false(g, p); break;
|
1138
1323
|
case c_true: break;
|
1139
1324
|
case c_debug: generate_debug(g, p); break;
|
1325
|
+
case c_functionend: generate_functionend(g, p); break;
|
1140
1326
|
default: fprintf(stderr, "%d encountered\n", p->type);
|
1141
1327
|
exit(1);
|
1142
1328
|
}
|
@@ -1151,7 +1337,6 @@ static void generate(struct generator * g, struct node * p) {
|
|
1151
1337
|
/* To allow warning free compilation of generated code and */
|
1152
1338
|
/* consistency with snowball variable namings we allow some kind of warnings here */
|
1153
1339
|
static void generate_allow_warnings(struct generator * g) {
|
1154
|
-
|
1155
1340
|
w(g, "#![allow(non_snake_case)]~N");
|
1156
1341
|
w(g, "#![allow(non_upper_case_globals)]~N");
|
1157
1342
|
w(g, "#![allow(unused_mut)]~N");
|
@@ -1160,7 +1345,6 @@ static void generate_allow_warnings(struct generator * g) {
|
|
1160
1345
|
}
|
1161
1346
|
|
1162
1347
|
static void generate_class_begin(struct generator * g) {
|
1163
|
-
|
1164
1348
|
w(g, "use snowball::SnowballEnv;~N");
|
1165
1349
|
if (g->analyser->among_count > 0) {
|
1166
1350
|
w(g, "use snowball::Among;~N~N");
|
@@ -1168,6 +1352,7 @@ static void generate_class_begin(struct generator * g) {
|
|
1168
1352
|
}
|
1169
1353
|
|
1170
1354
|
static void generate_among_table(struct generator * g, struct among * x) {
|
1355
|
+
write_comment(g, x->node);
|
1171
1356
|
|
1172
1357
|
struct amongvec * v = x->b;
|
1173
1358
|
|
@@ -1175,32 +1360,27 @@ static void generate_among_table(struct generator * g, struct among * x) {
|
|
1175
1360
|
g->I[1] = x->literalstring_count;
|
1176
1361
|
|
1177
1362
|
w(g, "~Mstatic A_~I0: &'static [Among<Context>; ~I1] = &[~N~+");
|
1178
|
-
{
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1184
|
-
|
1185
|
-
|
1186
|
-
w(g, "
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
} else {
|
1192
|
-
w(g, "None");
|
1193
|
-
}
|
1194
|
-
w(g, ")~S0~N");
|
1195
|
-
v++;
|
1363
|
+
for (int i = 0; i < x->literalstring_count; i++) {
|
1364
|
+
g->I[0] = v[i].i;
|
1365
|
+
g->I[1] = v[i].result;
|
1366
|
+
g->L[0] = v[i].b;
|
1367
|
+
g->S[0] = ",";
|
1368
|
+
|
1369
|
+
w(g, "~MAmong(~L0, ~I0, ~I1, ");
|
1370
|
+
if (v[i].function != NULL) {
|
1371
|
+
w(g, "Some(&");
|
1372
|
+
write_varname(g, v[i].function);
|
1373
|
+
w(g, ")");
|
1374
|
+
} else {
|
1375
|
+
w(g, "None");
|
1196
1376
|
}
|
1377
|
+
w(g, ")~S0~N");
|
1197
1378
|
}
|
1198
1379
|
w(g, "~-~M];~N~N");
|
1199
1380
|
}
|
1200
1381
|
|
1201
1382
|
static void generate_amongs(struct generator * g) {
|
1202
|
-
struct among * x;
|
1203
|
-
for (x = g->analyser->amongs; x; x = x->next) {
|
1383
|
+
for (struct among * x = g->analyser->amongs; x; x = x->next) {
|
1204
1384
|
generate_among_table(g, x);
|
1205
1385
|
}
|
1206
1386
|
}
|
@@ -1208,42 +1388,38 @@ static void generate_amongs(struct generator * g) {
|
|
1208
1388
|
static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
|
1209
1389
|
|
1210
1390
|
static void generate_grouping_table(struct generator * g, struct grouping * q) {
|
1211
|
-
|
1212
1391
|
int range = q->largest_ch - q->smallest_ch + 1;
|
1213
1392
|
int size = (range + 7)/ 8; /* assume 8 bits per symbol */
|
1214
1393
|
symbol * b = q->b;
|
1215
1394
|
symbol * map = create_b(size);
|
1216
|
-
int i;
|
1217
|
-
for (i = 0; i < size; i++) map[i] = 0;
|
1218
1395
|
|
1219
|
-
for (i = 0; i <
|
1396
|
+
for (int i = 0; i < size; i++) map[i] = 0;
|
1397
|
+
|
1398
|
+
for (int i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
|
1220
1399
|
|
1221
1400
|
g->V[0] = q->name;
|
1222
1401
|
g->I[0] = size;
|
1223
1402
|
w(g, "~Mstatic ~W0: &'static [u8; ~I0] = &[");
|
1224
|
-
for (i = 0; i < size; i++) {
|
1403
|
+
for (int i = 0; i < size; i++) {
|
1225
1404
|
write_int(g, map[i]);
|
1226
1405
|
if (i < size - 1) w(g, ", ");
|
1227
1406
|
}
|
1228
1407
|
w(g, "];~N~N");
|
1408
|
+
|
1229
1409
|
lose_b(map);
|
1230
1410
|
}
|
1231
1411
|
|
1232
1412
|
static void generate_groupings(struct generator * g) {
|
1233
|
-
struct grouping * q;
|
1234
|
-
for (q = g->analyser->groupings; q; q = q->next) {
|
1413
|
+
for (struct grouping * q = g->analyser->groupings; q; q = q->next) {
|
1235
1414
|
if (q->name->used)
|
1236
1415
|
generate_grouping_table(g, q);
|
1237
1416
|
}
|
1238
1417
|
}
|
1239
1418
|
|
1240
|
-
|
1241
1419
|
static void generate_members(struct generator * g) {
|
1242
|
-
|
1243
|
-
struct name * q;
|
1244
1420
|
w(g, "#[derive(Clone)]~N");
|
1245
1421
|
w(g, "struct Context {~+~N");
|
1246
|
-
for (q = g->analyser->names; q; q = q->next) {
|
1422
|
+
for (struct name * q = g->analyser->names; q; q = q->next) {
|
1247
1423
|
g->V[0] = q;
|
1248
1424
|
switch (q->type) {
|
1249
1425
|
case t_string:
|
@@ -1261,17 +1437,13 @@ static void generate_members(struct generator * g) {
|
|
1261
1437
|
}
|
1262
1438
|
|
1263
1439
|
static void generate_methods(struct generator * g) {
|
1264
|
-
|
1265
|
-
struct node * p = g->analyser->program;
|
1266
|
-
while (p != 0) {
|
1440
|
+
for (struct node * p = g->analyser->program; p; p = p->right) {
|
1267
1441
|
generate(g, p);
|
1268
1442
|
g->unreachable = false;
|
1269
|
-
p = p->right;
|
1270
1443
|
}
|
1271
1444
|
}
|
1272
1445
|
|
1273
1446
|
extern void generate_program_rust(struct generator * g) {
|
1274
|
-
|
1275
1447
|
g->outbuf = str_new();
|
1276
1448
|
g->failure_str = str_new();
|
1277
1449
|
|