@ohm-js/wasm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mise.toml +2 -0
- package/AGENT.md +25 -0
- package/LICENSE +21 -0
- package/Makefile +23 -0
- package/README.md +34 -0
- package/TODO.md +28 -0
- package/package.json +32 -0
- package/runtime/ohmRuntime.ts +252 -0
- package/scripts/bundlewasm.ts +49 -0
- package/scripts/modparse.ts +397 -0
- package/src/cli.js +36 -0
- package/src/index.js +1195 -0
- package/test/data/_book-review.liquid +257 -0
- package/test/data/_es5.js +1057 -0
- package/test/data/_es5.wasm +0 -0
- package/test/data/_html5shiv-3.7.3.js +326 -0
- package/test/data/_liquid-html.ohm +605 -0
- package/test/go/README.md +67 -0
- package/test/go/cst.go +164 -0
- package/test/go/go.mod +5 -0
- package/test/go/go.sum +2 -0
- package/test/go/matcher.go +370 -0
- package/test/go/testmain.go +161 -0
- package/test/test-es5.js +104 -0
- package/test/test-liquid-html.js +27 -0
- package/test/test-wasm.js +764 -0
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
import test from 'ava';
|
|
2
|
+
import * as ohm from 'ohm-js';
|
|
3
|
+
import {performance} from 'perf_hooks';
|
|
4
|
+
|
|
5
|
+
import {ConstantsForTesting as Constants, WasmMatcher} from '../src/index.js';
|
|
6
|
+
|
|
7
|
+
const matchWithInput = (m, str) => (m.setInput(str), m.match());
|
|
8
|
+
|
|
9
|
+
const SIZEOF_UINT32 = 4;
|
|
10
|
+
|
|
11
|
+
function checkNotNull(x, msg = 'unexpected null value') {
|
|
12
|
+
if (x == null) throw new Error(msg);
|
|
13
|
+
return x;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function unparse(m, root) {
|
|
17
|
+
const input = m.getInput();
|
|
18
|
+
let ans = '';
|
|
19
|
+
let pos = 0;
|
|
20
|
+
function walk(node) {
|
|
21
|
+
if (node.isTerminal()) {
|
|
22
|
+
ans += input.slice(pos, pos + node.matchLength);
|
|
23
|
+
pos += node.matchLength;
|
|
24
|
+
}
|
|
25
|
+
node.children.forEach(c => walk(c));
|
|
26
|
+
}
|
|
27
|
+
walk(m.getCstRoot());
|
|
28
|
+
return ans;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// const dumpMemoTable = pos => {
|
|
32
|
+
// const arr = [];
|
|
33
|
+
// for (let i = 0; i < 6; i++) {
|
|
34
|
+
// arr.push(view.getUint32(pos * Constants.MEMO_COL_SIZE_BYTES + i * 4, true));
|
|
35
|
+
// }
|
|
36
|
+
// console.log(arr.map(v => v.toString(16).padStart(8, '0')).join(' '));
|
|
37
|
+
// };
|
|
38
|
+
|
|
39
|
+
test('input in memory', async t => {
|
|
40
|
+
const g = ohm.grammar('G { start = "x" }');
|
|
41
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
42
|
+
matcher.setInput('ohm');
|
|
43
|
+
matcher.match(); // Trigger fillInputBuffer
|
|
44
|
+
|
|
45
|
+
const view = new DataView(matcher._instance.exports.memory.buffer, 64 * 1024);
|
|
46
|
+
t.is(view.getUint8(0), 'ohm'.charCodeAt(0));
|
|
47
|
+
t.is(view.getUint8(1), 'ohm'.charCodeAt(1));
|
|
48
|
+
t.is(view.getUint8(2), 'ohm'.charCodeAt(2));
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test('cst returns', async t => {
|
|
52
|
+
let matcher = await WasmMatcher.fromGrammar(ohm.grammar('G { start = "a" | "b" }'));
|
|
53
|
+
|
|
54
|
+
// start
|
|
55
|
+
t.is(matchWithInput(matcher, 'a'), 1);
|
|
56
|
+
let root = matcher.getCstRoot();
|
|
57
|
+
|
|
58
|
+
t.is(root.children.length, 1);
|
|
59
|
+
t.is(root.matchLength, 1);
|
|
60
|
+
t.is(root.ruleName, 'start');
|
|
61
|
+
|
|
62
|
+
// "a"
|
|
63
|
+
let {matchLength, _type, children} = root.children[0];
|
|
64
|
+
t.is(children.length, 0);
|
|
65
|
+
t.is(matchLength, 1);
|
|
66
|
+
t.is(_type, -1);
|
|
67
|
+
|
|
68
|
+
matcher = await WasmMatcher.fromGrammar(ohm.grammar('G { start = "a" b\nb = "b" }'));
|
|
69
|
+
|
|
70
|
+
// start
|
|
71
|
+
t.is(matchWithInput(matcher, 'ab'), 1);
|
|
72
|
+
root = matcher.getCstRoot();
|
|
73
|
+
t.is(root.children.length, 2);
|
|
74
|
+
t.is(root.matchLength, 2);
|
|
75
|
+
t.is(root.ruleName, 'start');
|
|
76
|
+
|
|
77
|
+
// "a"
|
|
78
|
+
const [childA, childB] = root.children;
|
|
79
|
+
({matchLength, _type, children} = childA);
|
|
80
|
+
t.is(children.length, 0);
|
|
81
|
+
t.is(matchLength, 1);
|
|
82
|
+
t.is(_type, -1);
|
|
83
|
+
|
|
84
|
+
// NonterminalNode for b
|
|
85
|
+
t.is(childB.children.length, 1);
|
|
86
|
+
t.is(childB.matchLength, 1);
|
|
87
|
+
t.is(childB.ruleName, 'b');
|
|
88
|
+
|
|
89
|
+
// TerminalNode for "b"
|
|
90
|
+
// eslint-disable-next-line no-unused-vars
|
|
91
|
+
({matchLength, _type, children} = childB.children[0]);
|
|
92
|
+
t.is(children.length, 0);
|
|
93
|
+
t.is(matchLength, 1);
|
|
94
|
+
t.is(_type, -1);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
test('cst with lookahead', async t => {
|
|
98
|
+
const matcher = await WasmMatcher.fromGrammar(ohm.grammar('G {x = ~space any}'));
|
|
99
|
+
const input = 'a';
|
|
100
|
+
t.is(matchWithInput(matcher, input), 1);
|
|
101
|
+
|
|
102
|
+
// Currently positive lookahead doesn't bind anything!
|
|
103
|
+
|
|
104
|
+
// - apply(x)
|
|
105
|
+
// - any
|
|
106
|
+
// - "a"
|
|
107
|
+
|
|
108
|
+
// x
|
|
109
|
+
const root = matcher.getCstRoot();
|
|
110
|
+
t.is(root.matchLength, 1);
|
|
111
|
+
t.is(root.children.length, 1);
|
|
112
|
+
t.is(root.ruleName, 'x');
|
|
113
|
+
|
|
114
|
+
// any
|
|
115
|
+
const {matchLength, ruleName, children} = root.children[0];
|
|
116
|
+
t.is(matchLength, 1);
|
|
117
|
+
t.is(children.length, 1);
|
|
118
|
+
t.is(ruleName, 'any');
|
|
119
|
+
|
|
120
|
+
// Terminal
|
|
121
|
+
const term = children[0];
|
|
122
|
+
t.is(term.matchLength, 1);
|
|
123
|
+
t.is(term.children.length, 0);
|
|
124
|
+
t.is(term.isTerminal(), true);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test('cst for range', async t => {
|
|
128
|
+
const matcher = await WasmMatcher.fromGrammar(ohm.grammar('G {x = "a".."z"}'));
|
|
129
|
+
t.is(matchWithInput(matcher, 'b'), 1);
|
|
130
|
+
|
|
131
|
+
// x
|
|
132
|
+
const root = matcher.getCstRoot();
|
|
133
|
+
t.is(root.matchLength, 1);
|
|
134
|
+
t.is(root.children.length, 1);
|
|
135
|
+
t.is(root.ruleName, 'x');
|
|
136
|
+
|
|
137
|
+
// Terminal
|
|
138
|
+
// eslint-disable-next-line no-unused-vars
|
|
139
|
+
const term = root.children[0];
|
|
140
|
+
t.is(term.matchLength, 1);
|
|
141
|
+
t.is(term.children.length, 0);
|
|
142
|
+
t.is(term.isTerminal(), true);
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
test('cst for opt', async t => {
|
|
146
|
+
let matcher = await WasmMatcher.fromGrammar(ohm.grammar('G {x = "a"?}'));
|
|
147
|
+
t.is(matchWithInput(matcher, 'a'), 1);
|
|
148
|
+
|
|
149
|
+
// x
|
|
150
|
+
let root = matcher.getCstRoot();
|
|
151
|
+
t.is(root.matchLength, 1);
|
|
152
|
+
t.is(root.ruleName, 'x');
|
|
153
|
+
t.is(root.children.length, 1);
|
|
154
|
+
|
|
155
|
+
// iter
|
|
156
|
+
let iter = root.children[0];
|
|
157
|
+
t.is(iter.matchLength, 1);
|
|
158
|
+
t.is(iter._type, -2);
|
|
159
|
+
t.is(iter.children.length, 1);
|
|
160
|
+
t.is(iter.children[0].isTerminal(), true);
|
|
161
|
+
t.is(iter.children[0].matchLength, 1);
|
|
162
|
+
|
|
163
|
+
matcher = await WasmMatcher.fromGrammar(ohm.grammar('G {x = "a"?}'));
|
|
164
|
+
t.is(matchWithInput(matcher, ''), 1);
|
|
165
|
+
|
|
166
|
+
// x
|
|
167
|
+
// eslint-disable-next-line no-unused-vars
|
|
168
|
+
root = matcher.getCstRoot();
|
|
169
|
+
t.is(root.matchLength, 0);
|
|
170
|
+
t.is(root.ruleName, 'x');
|
|
171
|
+
t.is(root.children.length, 1);
|
|
172
|
+
|
|
173
|
+
// iter
|
|
174
|
+
// eslint-disable-next-line no-unused-vars
|
|
175
|
+
iter = root.children[0];
|
|
176
|
+
t.is(iter.matchLength, 0);
|
|
177
|
+
t.is(iter._type, -2);
|
|
178
|
+
t.is(iter.children.length, 0);
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
test('cst for plus', async t => {
|
|
182
|
+
const matcher = await WasmMatcher.fromGrammar(ohm.grammar('G {x = "a"+}'));
|
|
183
|
+
t.is(matchWithInput(matcher, 'a'), 1);
|
|
184
|
+
|
|
185
|
+
// x
|
|
186
|
+
// eslint-disable-next-line no-unused-vars
|
|
187
|
+
const root = matcher.getCstRoot();
|
|
188
|
+
t.is(root.matchLength, 1);
|
|
189
|
+
t.is(root.ruleName, 'x');
|
|
190
|
+
t.is(root.children.length, 1);
|
|
191
|
+
|
|
192
|
+
// iter
|
|
193
|
+
// eslint-disable-next-line no-unused-vars
|
|
194
|
+
const iter = root.children[0];
|
|
195
|
+
t.is(iter.matchLength, 1);
|
|
196
|
+
t.is(iter._type, -2);
|
|
197
|
+
t.is(iter.children.length, 1);
|
|
198
|
+
|
|
199
|
+
t.is(iter.children[0].isTerminal(), true);
|
|
200
|
+
t.is(iter.children[0].matchLength, 1);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
test('cst with (small) repetition', async t => {
|
|
204
|
+
const matcher = await WasmMatcher.fromGrammar(ohm.grammar('G {x = "a"*}'));
|
|
205
|
+
t.is(matchWithInput(matcher, 'aaa'), 1);
|
|
206
|
+
|
|
207
|
+
// - apply(start)
|
|
208
|
+
// - iter
|
|
209
|
+
// - "a"
|
|
210
|
+
// - "a"
|
|
211
|
+
// - "a"
|
|
212
|
+
|
|
213
|
+
// start
|
|
214
|
+
// eslint-disable-next-line no-unused-vars
|
|
215
|
+
const root = matcher.getCstRoot();
|
|
216
|
+
t.is(root.matchLength, 3);
|
|
217
|
+
t.is(root.children.length, 1);
|
|
218
|
+
t.is(root.ruleName, 'x');
|
|
219
|
+
|
|
220
|
+
// iter
|
|
221
|
+
// eslint-disable-next-line no-unused-vars
|
|
222
|
+
const iter = root.children[0];
|
|
223
|
+
t.is(iter.matchLength, 3);
|
|
224
|
+
t.is(iter.children.length, 3);
|
|
225
|
+
t.is(iter._type, -2);
|
|
226
|
+
|
|
227
|
+
// Terminal children
|
|
228
|
+
const [childA, childB, childC] = iter.children;
|
|
229
|
+
t.is(childA.isTerminal(), true);
|
|
230
|
+
t.is(childA.matchLength, 1);
|
|
231
|
+
t.is(childB.isTerminal(), true);
|
|
232
|
+
t.is(childB.matchLength, 1);
|
|
233
|
+
t.is(childC.isTerminal(), true);
|
|
234
|
+
t.is(childC.matchLength, 1);
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
test('repetition and lookahead', async t => {
|
|
238
|
+
const matcher = await WasmMatcher.fromGrammar(ohm.grammar('G {x = (~space any)*}'));
|
|
239
|
+
t.is(matchWithInput(matcher, 'abc'), 1);
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
// eslint-disable-next-line ava/no-skip-test
|
|
243
|
+
test('cst with repetition and lookahead', async t => {
|
|
244
|
+
let matcher = await WasmMatcher.fromGrammar(ohm.grammar('G {x = (~space any)*}'));
|
|
245
|
+
let input = 'abc';
|
|
246
|
+
t.is(matchWithInput(matcher, input), 1);
|
|
247
|
+
|
|
248
|
+
// x
|
|
249
|
+
let {matchLength, _type, children} = matcher.getCstRoot();
|
|
250
|
+
t.is(matchLength, 3);
|
|
251
|
+
t.is(children.length, 1);
|
|
252
|
+
t.is(_type, 0);
|
|
253
|
+
|
|
254
|
+
// iter
|
|
255
|
+
({matchLength, _type, children} = children[0]);
|
|
256
|
+
t.is(matchLength, 3);
|
|
257
|
+
t.is(children.length, 3);
|
|
258
|
+
t.is(_type, -2);
|
|
259
|
+
|
|
260
|
+
const [childA, childB, childC] = children;
|
|
261
|
+
({matchLength, _type, children} = childA);
|
|
262
|
+
t.is(matchLength, 1);
|
|
263
|
+
t.is(children.length, 1);
|
|
264
|
+
t.is(_type, 0);
|
|
265
|
+
t.is(children[0].isTerminal(), true);
|
|
266
|
+
t.is(children[0].matchLength, 1);
|
|
267
|
+
|
|
268
|
+
({matchLength, _type, children} = childB);
|
|
269
|
+
t.is(matchLength, 1);
|
|
270
|
+
t.is(children.length, 1);
|
|
271
|
+
t.is(_type, 0);
|
|
272
|
+
t.is(children[0].isTerminal(), true);
|
|
273
|
+
t.is(children[0].matchLength, 1);
|
|
274
|
+
|
|
275
|
+
// eslint-disable-next-line no-unused-vars
|
|
276
|
+
({matchLength, _type, children} = childC);
|
|
277
|
+
t.is(matchLength, 1);
|
|
278
|
+
t.is(children.length, 1);
|
|
279
|
+
t.is(_type, 0);
|
|
280
|
+
t.is(children[0].isTerminal(), true);
|
|
281
|
+
t.is(children[0].matchLength, 1);
|
|
282
|
+
|
|
283
|
+
matcher = await WasmMatcher.fromGrammar(ohm.grammar('G {x = (~space any)+ spaces any+}'));
|
|
284
|
+
input = '/ab xy';
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
test('wasm: one-char terminals', async t => {
|
|
288
|
+
const g = ohm.grammar(`
|
|
289
|
+
G {
|
|
290
|
+
start = "1"
|
|
291
|
+
}
|
|
292
|
+
`);
|
|
293
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
294
|
+
t.is(matchWithInput(matcher, '1'), 1);
|
|
295
|
+
});
|
|
296
|
+
test('wasm: multi-char terminals', async t => {
|
|
297
|
+
const g = ohm.grammar(`
|
|
298
|
+
G {
|
|
299
|
+
start = "123"
|
|
300
|
+
}
|
|
301
|
+
`);
|
|
302
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
303
|
+
t.is(matchWithInput(matcher, '123'), 1);
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
test('wasm: handle end', async t => {
|
|
307
|
+
const g = ohm.grammar(`
|
|
308
|
+
G {
|
|
309
|
+
start = "1"
|
|
310
|
+
}
|
|
311
|
+
`);
|
|
312
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
313
|
+
t.is(matchWithInput(matcher, '123'), 0);
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
test('wasm: choice', async t => {
|
|
317
|
+
const g = ohm.grammar(`
|
|
318
|
+
G {
|
|
319
|
+
start = "1" | "2"
|
|
320
|
+
}
|
|
321
|
+
`);
|
|
322
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
323
|
+
t.is(matchWithInput(matcher, '2'), 1);
|
|
324
|
+
t.is(matchWithInput(matcher, '1'), 1);
|
|
325
|
+
t.is(matchWithInput(matcher, '3'), 0);
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
test('wasm: more choice', async t => {
|
|
329
|
+
const g = ohm.grammar(`
|
|
330
|
+
G {
|
|
331
|
+
start = "12" | "13" | "14"
|
|
332
|
+
}
|
|
333
|
+
`);
|
|
334
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
335
|
+
t.is(matchWithInput(matcher, '14'), 1);
|
|
336
|
+
t.is(matchWithInput(matcher, '13'), 1);
|
|
337
|
+
t.is(matchWithInput(matcher, '15'), 0);
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
test('wasm: sequence', async t => {
|
|
341
|
+
const g = ohm.grammar(`
|
|
342
|
+
G {
|
|
343
|
+
start = "1" "2"
|
|
344
|
+
| "130" ""
|
|
345
|
+
}
|
|
346
|
+
`);
|
|
347
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
348
|
+
t.is(matchWithInput(matcher, '12'), 1);
|
|
349
|
+
t.is(matchWithInput(matcher, '130'), 1);
|
|
350
|
+
t.is(matchWithInput(matcher, '13'), 0);
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
test('wasm: choice + sequence', async t => {
|
|
354
|
+
const g = ohm.grammar(`
|
|
355
|
+
G {
|
|
356
|
+
start = "1" ("2" | "3")
|
|
357
|
+
| "14" ""
|
|
358
|
+
}
|
|
359
|
+
`);
|
|
360
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
361
|
+
t.is(matchWithInput(matcher, '12'), 1);
|
|
362
|
+
t.is(matchWithInput(matcher, '13'), 1);
|
|
363
|
+
t.is(matchWithInput(matcher, '14'), 1);
|
|
364
|
+
t.is(matchWithInput(matcher, '15'), 0);
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
test('wasm: rule application', async t => {
|
|
368
|
+
const g = ohm.grammar(`
|
|
369
|
+
G {
|
|
370
|
+
start = one two -- x
|
|
371
|
+
| three
|
|
372
|
+
one = "1"
|
|
373
|
+
two = "II" | "2"
|
|
374
|
+
three = "3"
|
|
375
|
+
}
|
|
376
|
+
`);
|
|
377
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
378
|
+
t.is(matchWithInput(matcher, '12'), 1);
|
|
379
|
+
t.is(matchWithInput(matcher, '1II'), 1);
|
|
380
|
+
t.is(matchWithInput(matcher, '3'), 1);
|
|
381
|
+
t.is(matchWithInput(matcher, '13'), 0);
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
test('wasm: star', async t => {
|
|
385
|
+
const g = ohm.grammar(`
|
|
386
|
+
G {
|
|
387
|
+
start = "1"*
|
|
388
|
+
}
|
|
389
|
+
`);
|
|
390
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
391
|
+
t.is(matchWithInput(matcher, '111'), 1);
|
|
392
|
+
t.is(matchWithInput(matcher, '1'), 1);
|
|
393
|
+
t.is(matchWithInput(matcher, ''), 1);
|
|
394
|
+
t.is(matchWithInput(matcher, '2'), 0);
|
|
395
|
+
|
|
396
|
+
const g2 = ohm.grammar(`
|
|
397
|
+
G {
|
|
398
|
+
start = "123"* "1"
|
|
399
|
+
}
|
|
400
|
+
`);
|
|
401
|
+
const matcher2 = await WasmMatcher.fromGrammar(g2);
|
|
402
|
+
t.is(matchWithInput(matcher2, '1'), 1);
|
|
403
|
+
t.is(matchWithInput(matcher2, '1231'), 1);
|
|
404
|
+
t.is(matchWithInput(matcher2, ''), 0);
|
|
405
|
+
t.is(matchWithInput(matcher2, '2'), 0);
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
test('wasm: plus', async t => {
|
|
409
|
+
const g = ohm.grammar(`
|
|
410
|
+
G {
|
|
411
|
+
start = "1"+
|
|
412
|
+
}
|
|
413
|
+
`);
|
|
414
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
415
|
+
t.is(matchWithInput(matcher, '111'), 1);
|
|
416
|
+
t.is(matchWithInput(matcher, '1'), 1);
|
|
417
|
+
t.is(matchWithInput(matcher, ''), 0);
|
|
418
|
+
t.is(matchWithInput(matcher, '2'), 0);
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
test('wasm: lookahead', async t => {
|
|
422
|
+
const g = ohm.grammar(`
|
|
423
|
+
G {
|
|
424
|
+
start = &"1" "1"
|
|
425
|
+
}
|
|
426
|
+
`);
|
|
427
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
428
|
+
t.is(matchWithInput(matcher, '1'), 1);
|
|
429
|
+
t.is(matchWithInput(matcher, '2'), 0);
|
|
430
|
+
t.is(matchWithInput(matcher, ''), 0);
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
test('wasm: negative lookahead', async t => {
|
|
434
|
+
const g = ohm.grammar(`
|
|
435
|
+
G {
|
|
436
|
+
start = ~"1" "2"
|
|
437
|
+
}
|
|
438
|
+
`);
|
|
439
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
440
|
+
t.is(matchWithInput(matcher, '2'), 1);
|
|
441
|
+
t.is(matchWithInput(matcher, '12'), 0);
|
|
442
|
+
t.is(matchWithInput(matcher, ''), 0);
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
test('wasm: opt', async t => {
|
|
446
|
+
const g = ohm.grammar(`
|
|
447
|
+
G {
|
|
448
|
+
start = "1"? "2"
|
|
449
|
+
}
|
|
450
|
+
`);
|
|
451
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
452
|
+
t.is(matchWithInput(matcher, '12'), 1);
|
|
453
|
+
t.is(matchWithInput(matcher, '2'), 1);
|
|
454
|
+
t.is(matchWithInput(matcher, ''), 0);
|
|
455
|
+
});
|
|
456
|
+
|
|
457
|
+
test('wasm: range', async t => {
|
|
458
|
+
const g = ohm.grammar(`
|
|
459
|
+
G {
|
|
460
|
+
start = "a".."z"
|
|
461
|
+
}
|
|
462
|
+
`);
|
|
463
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
464
|
+
t.is(matchWithInput(matcher, 'a'), 1);
|
|
465
|
+
t.is(matchWithInput(matcher, 'm'), 1);
|
|
466
|
+
t.is(matchWithInput(matcher, 'z'), 1);
|
|
467
|
+
t.is(matchWithInput(matcher, 'A'), 0);
|
|
468
|
+
t.is(matchWithInput(matcher, '1'), 0);
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
test('wasm: any', async t => {
|
|
472
|
+
const g = ohm.grammar('G { start = any }');
|
|
473
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
474
|
+
t.is(matchWithInput(matcher, 'a'), 1);
|
|
475
|
+
t.is(matchWithInput(matcher, '1'), 1);
|
|
476
|
+
t.is(matchWithInput(matcher, ' '), 1);
|
|
477
|
+
t.is(matchWithInput(matcher, ''), 0);
|
|
478
|
+
|
|
479
|
+
const g2 = ohm.grammar('G { start = any* }');
|
|
480
|
+
const matcher2 = await WasmMatcher.fromGrammar(g2);
|
|
481
|
+
t.is(matchWithInput(matcher2, 'a'), 1);
|
|
482
|
+
t.is(matchWithInput(matcher2, ''), 1);
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
test('wasm: end', async t => {
|
|
486
|
+
const g = ohm.grammar(`
|
|
487
|
+
G {
|
|
488
|
+
start = "a" end
|
|
489
|
+
}
|
|
490
|
+
`);
|
|
491
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
492
|
+
t.is(matchWithInput(matcher, 'a'), 1);
|
|
493
|
+
t.is(matchWithInput(matcher, 'ab'), 0);
|
|
494
|
+
t.is(matchWithInput(matcher, ''), 0);
|
|
495
|
+
});
|
|
496
|
+
|
|
497
|
+
test('real-world grammar', async t => {
|
|
498
|
+
const g = ohm.grammar(String.raw`
|
|
499
|
+
Test {
|
|
500
|
+
Msgs = Msg*
|
|
501
|
+
Msg = description? spaces (Head spaces Params spaces)
|
|
502
|
+
|
|
503
|
+
lower := "a".."z"
|
|
504
|
+
upper := "A".."Z"
|
|
505
|
+
|
|
506
|
+
description = "#" (~nl any)* nl?
|
|
507
|
+
Head = msgTarget spaces msgName
|
|
508
|
+
msgTarget
|
|
509
|
+
= (~space any)*
|
|
510
|
+
msgName
|
|
511
|
+
= letter (alnum | "_" | "-" | ":" | "." | "+" | "/")* -- literal
|
|
512
|
+
| templateString -- templateString
|
|
513
|
+
| interpolation -- interpolation
|
|
514
|
+
Params = Param*
|
|
515
|
+
Param = key spaces ":" spaces Json
|
|
516
|
+
Json
|
|
517
|
+
= "{" Param ("," Param)* ","? "}" -- object1
|
|
518
|
+
| "{" "}" -- object0
|
|
519
|
+
| "[" Json ("," Json)* ","? "]" -- array1
|
|
520
|
+
| "[" "]" -- array0
|
|
521
|
+
| string -- string
|
|
522
|
+
| templateString -- templateString
|
|
523
|
+
| number -- number
|
|
524
|
+
| boolean -- boolean
|
|
525
|
+
| interpolation -- interpolation
|
|
526
|
+
ident = letter (alnum | "_")*
|
|
527
|
+
key = ident | string
|
|
528
|
+
boolean = "true" | "false"
|
|
529
|
+
number
|
|
530
|
+
= digit* "." digit+ -- fract
|
|
531
|
+
| digit+ -- whole
|
|
532
|
+
string = "\"" doubleStringCharacter* "\""
|
|
533
|
+
doubleStringCharacter
|
|
534
|
+
= "\\" any -- escaped
|
|
535
|
+
| ~"\"" any -- nonEscaped
|
|
536
|
+
fieldSelector
|
|
537
|
+
= "." ident -- dot
|
|
538
|
+
| "[" ident "]" -- bracketIdent
|
|
539
|
+
| "[" number "]" -- bracketNumber
|
|
540
|
+
| "[" string "]" -- bracketString
|
|
541
|
+
fieldReference = ident fieldSelector*
|
|
542
|
+
interpolation = "$" "{" fieldReference "}"
|
|
543
|
+
templateString = "\u{0060}" templateStringCharacter* "\u{0060}"
|
|
544
|
+
templateStringCharacter
|
|
545
|
+
= "\\" any -- escaped
|
|
546
|
+
| interpolation -- interpolation
|
|
547
|
+
| ~"\u{0060}" any -- nonEscaped
|
|
548
|
+
comment
|
|
549
|
+
= "//" (~nl any)* nl -- cppComment
|
|
550
|
+
| "/*" (~"*/" any)* "*/" -- cComment
|
|
551
|
+
empty =
|
|
552
|
+
// space += comment
|
|
553
|
+
nl = "\n"
|
|
554
|
+
}
|
|
555
|
+
`);
|
|
556
|
+
let longInput = '';
|
|
557
|
+
for (let i = 0; i < 200; i++) {
|
|
558
|
+
longInput += '/quickjs eval source: "1 + 1"\n';
|
|
559
|
+
}
|
|
560
|
+
let start = performance.now();
|
|
561
|
+
g.match(longInput);
|
|
562
|
+
t.log(`Ohm match time: ${(performance.now() - start).toFixed(2)}ms`);
|
|
563
|
+
|
|
564
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
565
|
+
t.is(matchWithInput(matcher, '/quickjs eval source: "1 + 1"'), 1);
|
|
566
|
+
start = performance.now();
|
|
567
|
+
t.is(matchWithInput(matcher, longInput), 1);
|
|
568
|
+
t.log(`Wasm match time: ${(performance.now() - start).toFixed(2)}ms`);
|
|
569
|
+
});
|
|
570
|
+
|
|
571
|
+
test('basic memoization', async t => {
|
|
572
|
+
const g = ohm.grammar('G { start = "a" b\nb = "b" }');
|
|
573
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
574
|
+
t.is(matchWithInput(matcher, 'ab'), 1);
|
|
575
|
+
|
|
576
|
+
const view = matcher.memoTableViewForTesting();
|
|
577
|
+
|
|
578
|
+
const getMemo = (pos, ruleName) => {
|
|
579
|
+
const colOffset = pos * Constants.MEMO_COL_SIZE_BYTES;
|
|
580
|
+
const ruleId = checkNotNull(matcher._ruleIds.get(ruleName));
|
|
581
|
+
return view.getUint32(colOffset + SIZEOF_UINT32 * ruleId, true);
|
|
582
|
+
};
|
|
583
|
+
|
|
584
|
+
const cstRoot = matcher.getCstRoot();
|
|
585
|
+
|
|
586
|
+
// start
|
|
587
|
+
let {matchLength, _type, children} = cstRoot;
|
|
588
|
+
t.is(matchLength, 2);
|
|
589
|
+
t.is(children.length, 2);
|
|
590
|
+
t.is(_type, 0);
|
|
591
|
+
|
|
592
|
+
const [childA, childB] = children;
|
|
593
|
+
|
|
594
|
+
// "a"
|
|
595
|
+
t.is(childA.isTerminal(), true);
|
|
596
|
+
t.is(childA.matchLength, 1);
|
|
597
|
+
|
|
598
|
+
// b
|
|
599
|
+
// eslint-disable-next-line no-unused-vars
|
|
600
|
+
({matchLength, _type, children} = childB);
|
|
601
|
+
t.is(matchLength, 1);
|
|
602
|
+
t.is(children.length, 1);
|
|
603
|
+
t.is(_type, 0);
|
|
604
|
+
|
|
605
|
+
// "b"
|
|
606
|
+
t.is(children[0].isTerminal(), true);
|
|
607
|
+
t.is(children[0].matchLength, 1);
|
|
608
|
+
|
|
609
|
+
// Expect memo for `b` at position 1, and `start` at position 0.
|
|
610
|
+
t.is(getMemo(1, 'b'), childB._base);
|
|
611
|
+
t.is(getMemo(0, 'start'), cstRoot._base);
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
test('more memoization', async t => {
|
|
615
|
+
const g = ohm.grammar('G { start = b "a" | b b\nb = "b" }');
|
|
616
|
+
const matcher = await WasmMatcher.fromGrammar(g);
|
|
617
|
+
t.is(matchWithInput(matcher, 'bb'), 1);
|
|
618
|
+
|
|
619
|
+
const view = matcher.memoTableViewForTesting();
|
|
620
|
+
|
|
621
|
+
const getMemo = (pos, ruleName) => {
|
|
622
|
+
const colOffset = pos * Constants.MEMO_COL_SIZE_BYTES;
|
|
623
|
+
const ruleId = checkNotNull(matcher._ruleIds.get(ruleName));
|
|
624
|
+
return view.getUint32(colOffset + SIZEOF_UINT32 * ruleId, true);
|
|
625
|
+
};
|
|
626
|
+
|
|
627
|
+
// start
|
|
628
|
+
const root = matcher.getCstRoot();
|
|
629
|
+
t.is(root.matchLength, 2);
|
|
630
|
+
t.is(root.children.length, 2);
|
|
631
|
+
t.is(root.ruleName, 'start');
|
|
632
|
+
|
|
633
|
+
const [child1, child2] = root.children;
|
|
634
|
+
|
|
635
|
+
// b #1
|
|
636
|
+
t.is(child1.matchLength, 1);
|
|
637
|
+
t.is(child1.children.length, 1);
|
|
638
|
+
t.is(child1.ruleName, 'b');
|
|
639
|
+
t.is(child1.children[0].isTerminal(), true);
|
|
640
|
+
t.is(child1.children[0].matchLength, 1);
|
|
641
|
+
|
|
642
|
+
// b #2
|
|
643
|
+
t.is(child2.matchLength, 1);
|
|
644
|
+
t.is(child2.children.length, 1);
|
|
645
|
+
t.is(child2.ruleName, 'b');
|
|
646
|
+
t.is(child2.children[0].isTerminal(), true);
|
|
647
|
+
t.is(child2.children[0].matchLength, 1);
|
|
648
|
+
|
|
649
|
+
// Expect memo for `b` at position 0 and 1.
|
|
650
|
+
t.is(getMemo(0, 'b'), child1._base);
|
|
651
|
+
t.is(getMemo(1, 'b'), child2._base);
|
|
652
|
+
});
|
|
653
|
+
|
|
654
|
+
test('parameterized rules (easy)', async t => {
|
|
655
|
+
let g = ohm.grammar(`
|
|
656
|
+
G {
|
|
657
|
+
start = twice<x>
|
|
658
|
+
x = "x"
|
|
659
|
+
twice<exp> = exp exp
|
|
660
|
+
}`);
|
|
661
|
+
let matcher = await WasmMatcher.fromGrammar(g);
|
|
662
|
+
t.is(matchWithInput(matcher, 'xx'), 1);
|
|
663
|
+
|
|
664
|
+
g = ohm.grammar(`
|
|
665
|
+
G {
|
|
666
|
+
start = ~narf<x> narf<y>
|
|
667
|
+
narf<thing> = thing
|
|
668
|
+
x = "x"
|
|
669
|
+
y = "y"
|
|
670
|
+
|
|
671
|
+
}`);
|
|
672
|
+
matcher = await WasmMatcher.fromGrammar(g);
|
|
673
|
+
t.is(matchWithInput(matcher, 'y'), 1);
|
|
674
|
+
});
|
|
675
|
+
|
|
676
|
+
test('parameterized rules (hard)', async t => {
|
|
677
|
+
let g = ohm.grammar(`
|
|
678
|
+
G {
|
|
679
|
+
start = indirect<x>
|
|
680
|
+
indirect<e> = twice<e>
|
|
681
|
+
twice<exp> = exp exp
|
|
682
|
+
x = "x"
|
|
683
|
+
}`);
|
|
684
|
+
let matcher = await WasmMatcher.fromGrammar(g);
|
|
685
|
+
t.is(matchWithInput(matcher, 'xx'), 1);
|
|
686
|
+
|
|
687
|
+
g = ohm.grammar(`
|
|
688
|
+
G {
|
|
689
|
+
start = indirect<"x">
|
|
690
|
+
indirect<e> = twice<e>
|
|
691
|
+
twice<exp> = exp exp
|
|
692
|
+
}`);
|
|
693
|
+
matcher = await WasmMatcher.fromGrammar(g);
|
|
694
|
+
t.is(matchWithInput(matcher, 'xx'), 1);
|
|
695
|
+
});
|
|
696
|
+
|
|
697
|
+
test('basic left recursion', async t => {
|
|
698
|
+
let g = ohm.grammar(`
|
|
699
|
+
G {
|
|
700
|
+
number = number "1" -- rec
|
|
701
|
+
| "1"
|
|
702
|
+
}`);
|
|
703
|
+
const m = await WasmMatcher.fromGrammar(g);
|
|
704
|
+
t.is(matchWithInput(m, '1'), 1);
|
|
705
|
+
});
|
|
706
|
+
|
|
707
|
+
test('tricky left recursion', async t => {
|
|
708
|
+
let g = ohm.grammar(`
|
|
709
|
+
G {
|
|
710
|
+
number = number "1" -- rec
|
|
711
|
+
| number "2" -- rec2
|
|
712
|
+
| "1"
|
|
713
|
+
}`);
|
|
714
|
+
const m = await WasmMatcher.fromGrammar(g);
|
|
715
|
+
t.is(matchWithInput(m, '1'), 1);
|
|
716
|
+
t.is(unparse(m), '1');
|
|
717
|
+
t.is(matchWithInput(m, '12'), 1);
|
|
718
|
+
t.is(unparse(m), '12');
|
|
719
|
+
t.is(matchWithInput(m, '11212'), 1);
|
|
720
|
+
t.is(unparse(m), '11212');
|
|
721
|
+
});
|
|
722
|
+
|
|
723
|
+
test('tricky left recursion #2', async t => {
|
|
724
|
+
let g = ohm.grammar(`
|
|
725
|
+
G {
|
|
726
|
+
number = number digit -- rec
|
|
727
|
+
| number "2" -- rec2
|
|
728
|
+
| digit
|
|
729
|
+
digit := digit "1" -- rec
|
|
730
|
+
| "1"
|
|
731
|
+
}`);
|
|
732
|
+
const m = await WasmMatcher.fromGrammar(g);
|
|
733
|
+
t.is(matchWithInput(m, '1'), 1);
|
|
734
|
+
t.is(unparse(m), '1');
|
|
735
|
+
|
|
736
|
+
t.is(matchWithInput(m, '11'), 1);
|
|
737
|
+
t.is(unparse(m), '11');
|
|
738
|
+
|
|
739
|
+
t.is(matchWithInput(m, '1112111'), 1);
|
|
740
|
+
t.is(unparse(m), '1112111');
|
|
741
|
+
});
|
|
742
|
+
|
|
743
|
+
test('arithmetic', async t => {
|
|
744
|
+
let g = ohm.grammar(`
|
|
745
|
+
Arithmetic {
|
|
746
|
+
addExp = addExp "+" mulExp -- plus
|
|
747
|
+
| addExp "-" mulExp -- minus
|
|
748
|
+
| mulExp
|
|
749
|
+
|
|
750
|
+
mulExp = mulExp "*" priExp -- times
|
|
751
|
+
| mulExp "/" priExp -- divide
|
|
752
|
+
| priExp
|
|
753
|
+
|
|
754
|
+
priExp = "(" addExp ")" -- paren
|
|
755
|
+
| number
|
|
756
|
+
|
|
757
|
+
number = number digit -- rec
|
|
758
|
+
| digit
|
|
759
|
+
}`);
|
|
760
|
+
const m = await WasmMatcher.fromGrammar(g);
|
|
761
|
+
t.is(matchWithInput(m, '1+276*(3+4)'), 1);
|
|
762
|
+
t.is(unparse(m), '1+276*(3+4)');
|
|
763
|
+
t.is(matchWithInput(m, '1'), 1);
|
|
764
|
+
});
|