@ohm-js/wasm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js ADDED
@@ -0,0 +1,1195 @@
1
+ /* global TextDecoder, TextEncoder, WebAssembly */
2
+
3
+ import * as w from '@wasmgroundup/emit';
4
+ import {pexprs} from 'ohm-js';
5
+ // import wabt from 'wabt';
6
+
7
+ import * as prebuilt from '../build/ohmRuntime.wasm_sections.ts';
8
+
9
+ const WASM_PAGE_SIZE = 64 * 1024;
10
+
11
+ const DEBUG = false;
12
+ const FAST_SAVE_BINDINGS = true;
13
+ const FAST_RESTORE_BINDINGS = true;
14
+
15
+ const {instr} = w;
16
+
17
+ function assert(cond, msg) {
18
+ if (!cond) {
19
+ throw new Error(msg ?? 'assertion failed');
20
+ }
21
+ }
22
+
23
+ function checkNotNull(x, msg = 'unexpected null value') {
24
+ assert(x != null, msg);
25
+ return x;
26
+ }
27
+
28
+ function checkNoUndefined(arr) {
29
+ assert(arr.indexOf(undefined) === -1, `found undefined @ ${arr.indexOf(undefined)}`);
30
+ return arr;
31
+ }
32
+
33
+ function uniqueName(names, str) {
34
+ let name = str;
35
+ outer: if (names.has(str)) {
36
+ for (let i = 0; i < 1000; i++) {
37
+ name = `${str}_${i}`;
38
+ if (!names.has(name)) break outer;
39
+ }
40
+ throw new Error(`Unique name generation failed for ${str}`);
41
+ }
42
+ names.add(name);
43
+ return name;
44
+ }
45
+
46
+ function getDebugLabel(exp) {
47
+ const loc = exp.source ? exp.source.startIdx : -1;
48
+ return `${exp.toDisplayString()}@${loc}`;
49
+ }
50
+
51
+ const prebuiltFuncidx = nm => checkNotNull(prebuilt.funcidxByName[nm]);
52
+
53
+ // Produce a section combining `els` with the corresponding prebuilt section.
54
+ // This only does a naive merge; no type or function indices are rewritten.
55
+ function mergeSections(sectionId, prebuilt, els) {
56
+ const count = prebuilt.entryCount + els.length;
57
+ return w.section(sectionId, [w.u32(count), prebuilt.contents, els]);
58
+ }
59
+
60
+ function functypeToString(paramTypes, resultTypes) {
61
+ const toStr = t => checkNotNull(['f64', 'f32', 'i64', 'i32'][t - w.valtype.f64]);
62
+ const params = paramTypes.map(toStr).join(',');
63
+ const results = resultTypes.map(toStr).join(',');
64
+ return '[' + params + '][' + results + ']';
65
+ }
66
+
67
+ class TypeMap {
68
+ constructor(startIdx = 0) {
69
+ this._map = new Map();
70
+ this._startIdx = startIdx;
71
+ }
72
+
73
+ add(paramTypes, resultTypes) {
74
+ const key = functypeToString(paramTypes, resultTypes);
75
+ if (this._map.has(key)) {
76
+ return this._map.get(key)[0];
77
+ }
78
+ const idx = this._startIdx + this._map.size;
79
+ this._map.set(key, [idx, w.functype(paramTypes, resultTypes)]);
80
+ return idx;
81
+ }
82
+
83
+ addDecls(decls) {
84
+ for (const {paramTypes, resultTypes} of decls) {
85
+ this.add(paramTypes, resultTypes);
86
+ }
87
+ }
88
+
89
+ getIdx(paramTypes, resultTypes) {
90
+ const key = functypeToString(paramTypes, resultTypes);
91
+ return checkNotNull(this._map.get(key))[0];
92
+ }
93
+
94
+ getIdxForDecl(decl) {
95
+ return this.getIdx(decl.paramTypes, decl.resultTypes);
96
+ }
97
+
98
+ getTypes() {
99
+ return [...this._map.values()].map(([_, t]) => t);
100
+ }
101
+ }
102
+
103
+ /*
104
+ Offers a higher-level interface for generating WebAssembly code and
105
+ constructing a module.
106
+ */
107
+ class Assembler {
108
+ constructor(typeMap) {
109
+ this._globals = new Map();
110
+
111
+ this._functionDecls = [];
112
+ this._importDecls = [];
113
+
114
+ // Keep track of loops/blocks to make it easier (and safer) to generate
115
+ // breaks to the correct index.
116
+ this._blockStack = [];
117
+
118
+ // State for the current function being generated.
119
+ this._code = [];
120
+ this._locals = undefined;
121
+
122
+ this._typeMap = typeMap;
123
+ }
124
+
125
+ addBlocktype(paramTypes, resultTypes) {
126
+ this._typeMap.add(paramTypes, resultTypes);
127
+ }
128
+
129
+ blocktype(paramTypes, resultTypes) {
130
+ const idx = this._typeMap.getIdx(paramTypes, resultTypes);
131
+ assert(idx !== -1, `Unknown blocktype: '${functypeToString(paramTypes, resultTypes)}'`);
132
+
133
+ // From the spec: "The type index in a block type is encoded as a
134
+ // positive signed integer, so that its signed LEB128 bit pattern cannot
135
+ // collide with the encoding of value types or the special code 0x40."
136
+ return w.i32(idx);
137
+ }
138
+
139
+ doEmit(thunk) {
140
+ const oldCode = this._code;
141
+ this._code = [];
142
+ thunk();
143
+ const body = [...this._code, instr.end];
144
+ this._code = oldCode;
145
+ return body;
146
+ }
147
+
148
+ addGlobal(name, type, mut, initThunk) {
149
+ assert(!this._globals.has(name), `Global '${name}' already exists`);
150
+ const idx = this._globals.size;
151
+ const initExpr = this.doEmit(initThunk);
152
+ this._globals.set(name, {idx, type, mut, initExpr});
153
+ return idx;
154
+ }
155
+
156
+ addLocal(name, type) {
157
+ assert(!this._locals.has(name), `Local '${name}' already exists`);
158
+ assert(type === w.valtype.i32, `invalid local type: ${type}`);
159
+ const idx = this._locals.size;
160
+ this._locals.set(name, idx);
161
+ return idx;
162
+ }
163
+
164
+ addFunction(name, paramTypes, resultTypes, bodyFn) {
165
+ this._locals = new Map();
166
+ paramTypes.forEach((t, i) => {
167
+ this.addLocal(`__arg${i}`, t);
168
+ });
169
+ bodyFn(this);
170
+ this._functionDecls.push({
171
+ name,
172
+ paramTypes,
173
+ resultTypes,
174
+ locals: [w.locals(this._locals.size, w.valtype.i32)], // TODO: Support other types?
175
+ body: [...this._code, instr.end],
176
+ });
177
+ this._code = [];
178
+ this._locals = undefined;
179
+ }
180
+
181
+ // Pure codegen helpers (used to generate the function bodies).
182
+
183
+ globalidx(name) {
184
+ const {idx} = checkNotNull(this._globals.get(name), `Unknown global: ${name}`);
185
+ return idx;
186
+ }
187
+
188
+ localidx(name) {
189
+ return checkNotNull(this._locals.get(name), `Unknown local: ${name}`);
190
+ }
191
+
192
+ emit(...bytes) {
193
+ this._code.push(...checkNoUndefined(bytes.flat(Infinity)));
194
+ }
195
+
196
+ block(bt, bodyThunk) {
197
+ this.emit(w.instr.block, bt);
198
+ this._blockStack.push('block');
199
+ bodyThunk();
200
+ this._blockStack.pop();
201
+ this.emit(w.instr.end);
202
+ }
203
+
204
+ loop(bt, bodyThunk) {
205
+ this.emit(w.instr.loop, bt);
206
+ this._blockStack.push('loop');
207
+ bodyThunk();
208
+ this._blockStack.pop();
209
+ this.emit(w.instr.end);
210
+ }
211
+
212
+ if(bt, bodyThunk) {
213
+ this.ifElse(bt, bodyThunk);
214
+ }
215
+
216
+ ifElse(bt, thenThunk, elseThunk = undefined) {
217
+ this.emit(w.instr.if, bt);
218
+ this._blockStack.push('if');
219
+ thenThunk();
220
+ if (elseThunk) {
221
+ this.emit(w.instr.else);
222
+ elseThunk();
223
+ }
224
+ this._blockStack.pop();
225
+ this.emit(w.instr.end);
226
+ }
227
+
228
+ ifFalse(bt, bodyThunk) {
229
+ this.emit(instr.i32.eqz);
230
+ this.if(bt, bodyThunk);
231
+ }
232
+
233
+ br(depth) {
234
+ this.emit(instr.br, w.labelidx(depth));
235
+ }
236
+
237
+ i32Add() {
238
+ this.emit(instr.i32.add);
239
+ }
240
+
241
+ i32Const(value) {
242
+ this.emit(instr.i32.const, w.i32(value));
243
+ }
244
+
245
+ i32Load(offset = 0) {
246
+ this.emit(instr.i32.load, w.memarg(Assembler.ALIGN_4_BYTES, offset));
247
+ }
248
+
249
+ i32Load8u(offset = 0) {
250
+ this.emit(instr.i32.load8_u, w.memarg(Assembler.ALIGN_1_BYTE, offset));
251
+ }
252
+
253
+ i32Mul() {
254
+ this.emit(instr.i32.mul);
255
+ }
256
+
257
+ i32Ne() {
258
+ this.emit(instr.i32.ne);
259
+ }
260
+
261
+ // Store [addr:i32, val:i32] -> []
262
+ i32Store(offset = 0) {
263
+ this.emit(instr.i32.store, w.memarg(Assembler.ALIGN_4_BYTES, offset));
264
+ }
265
+
266
+ i32Sub() {
267
+ this.emit(instr.i32.sub);
268
+ }
269
+
270
+ globalGet(name) {
271
+ this.emit(instr.global.get, this.globalidx(name));
272
+ }
273
+
274
+ globalSet(name) {
275
+ this.emit(instr.global.set, this.globalidx(name));
276
+ }
277
+
278
+ localGet(name) {
279
+ this.emit(instr.local.get, this.localidx(name));
280
+ }
281
+
282
+ localSet(name) {
283
+ this.emit(instr.local.set, this.localidx(name));
284
+ }
285
+
286
+ localTee(name) {
287
+ this.emit(instr.local.tee, this.localidx(name));
288
+ }
289
+
290
+ break(depth) {
291
+ const what = this._blockStack.at(-(depth + 1));
292
+ assert(what === 'block' || what === 'if', 'Invalid break');
293
+ this.emit(instr.br, w.labelidx(depth));
294
+ }
295
+
296
+ // Conditional break -- emits a `br_if` for the given depth.
297
+ condBreak(depth) {
298
+ const what = this._blockStack.at(-(depth + 1));
299
+ assert(what === 'block' || what === 'if', 'Invalid condBreak');
300
+ this.emit(instr.br_if, w.labelidx(depth));
301
+ }
302
+
303
+ continue(depth) {
304
+ const what = this._blockStack.at(-(depth + 1));
305
+ assert(what === 'loop', 'Invalid continue');
306
+ this.emit(instr.br, w.labelidx(depth));
307
+ }
308
+
309
+ // "Macros" -- codegen helpers specific to Ohm.
310
+
311
+ i32Inc() {
312
+ this.i32Const(1);
313
+ this.i32Add();
314
+ }
315
+
316
+ i32Dec() {
317
+ this.i32Const(1);
318
+ this.i32Sub();
319
+ }
320
+
321
+ dup() {
322
+ this.localTee('tmp');
323
+ this.localGet('tmp');
324
+ }
325
+
326
+ currCharCode() {
327
+ this.globalGet('pos');
328
+ this.i32Load8u(Compiler.INPUT_BUFFER_OFFSET);
329
+ }
330
+
331
+ nextCharCode() {
332
+ this.currCharCode();
333
+ this.incPos();
334
+ }
335
+
336
+ setRet(val) {
337
+ this.i32Const(val);
338
+ this.localSet('ret');
339
+ }
340
+
341
+ pushStackFrame() {
342
+ this.globalGet('sp');
343
+ this.i32Const(Assembler.STACK_FRAME_SIZE_BYTES);
344
+ this.i32Sub();
345
+ this.globalSet('sp');
346
+ this.savePos();
347
+ this.saveNumBindings();
348
+ }
349
+
350
+ popStackFrame() {
351
+ this.i32Const(Assembler.STACK_FRAME_SIZE_BYTES);
352
+ this.globalGet('sp');
353
+ this.i32Add();
354
+ this.globalSet('sp');
355
+ }
356
+
357
+ // Save the current input position.
358
+ savePos() {
359
+ // stack[sp] = pos
360
+ this.globalGet('sp');
361
+ this.globalGet('pos');
362
+ this.i32Store();
363
+ }
364
+
365
+ // Load the saved input position onto the stack.
366
+ getSavedPos() {
367
+ this.globalGet('sp');
368
+ this.i32Load();
369
+ }
370
+
371
+ restorePos() {
372
+ this.getSavedPos();
373
+ this.globalSet('pos');
374
+ }
375
+
376
+ saveNumBindings() {
377
+ this.globalGet('sp');
378
+ if (FAST_SAVE_BINDINGS) {
379
+ this.globalGet('bindings');
380
+ this.i32Load(12); // Array<i32>.length_
381
+ } else {
382
+ this.callPrebuiltFunc('getBindingsLength');
383
+ }
384
+ this.i32Store(4);
385
+ }
386
+
387
+ getSavedNumBindings() {
388
+ this.globalGet('sp');
389
+ this.i32Load(4);
390
+ }
391
+
392
+ restoreBindingsLength() {
393
+ if (FAST_RESTORE_BINDINGS) {
394
+ // It's safe to directly set the length as long as it's shrinking.
395
+ this.globalGet('bindings');
396
+ this.getSavedNumBindings();
397
+ this.i32Store(12); // Array<i32>.length_
398
+ } else {
399
+ this.getSavedNumBindings();
400
+ this.callPrebuiltFunc('setBindingsLength');
401
+ }
402
+ }
403
+
404
+ // Increment the current input position by 1.
405
+ // [i32, i32] -> [i32]
406
+ incPos() {
407
+ this.globalGet('pos');
408
+ this.i32Inc();
409
+ this.globalSet('pos');
410
+ }
411
+
412
+ callPrebuiltFunc(name) {
413
+ this.emit(instr.call, w.funcidx(prebuiltFuncidx(name)));
414
+ }
415
+
416
+ newIterNodeWithSavedPosAndBindings() {
417
+ this.getSavedPos();
418
+ this.globalGet('pos');
419
+ this.getSavedNumBindings();
420
+ this.callPrebuiltFunc('newIterationNode');
421
+ }
422
+
423
+ newTerminalNodeWithSavedPos() {
424
+ this.getSavedPos();
425
+ this.globalGet('pos');
426
+ this.callPrebuiltFunc('newTerminalNode');
427
+ }
428
+
429
+ maybeReturnTerminalNodeWithSavedPos() {
430
+ this.ifElse(
431
+ w.blocktype.i32,
432
+ () => this.newTerminalNodeWithSavedPos(),
433
+ () => this.i32Const(0),
434
+ );
435
+ this.localSet('ret');
436
+ }
437
+ }
438
+ Assembler.ALIGN_1_BYTE = 0;
439
+ Assembler.ALIGN_4_BYTES = 2;
440
+ Assembler.CST_NODE_HEADER_SIZE_BYTES = 8;
441
+
442
+ // A "memo column" holds the info for one input position, i.e. one char.
443
+ Assembler.MEMO_COL_SIZE_BYTES = 4 * 256;
444
+
445
+ Assembler.STACK_FRAME_SIZE_BYTES = 8;
446
+
447
+ export class Compiler {
448
+ constructor(grammar) {
449
+ this.importDecls = [
450
+ {
451
+ module: 'env',
452
+ name: 'abort',
453
+ // (offset: i32, maxLen: i32) -> i32
454
+ // Returns the actual number of bytes read.
455
+ paramTypes: [w.valtype.i32, w.valtype.i32, w.valtype.i32, w.valtype.i32],
456
+ resultTypes: [],
457
+ },
458
+ {
459
+ module: 'env',
460
+ name: 'fillInputBuffer',
461
+ // (offset: i32, maxLen: i32) -> i32
462
+ // Returns the actual number of bytes read.
463
+ paramTypes: [w.valtype.i32, w.valtype.i32],
464
+ resultTypes: [w.valtype.i32],
465
+ },
466
+ {
467
+ module: 'env',
468
+ name: 'printI32',
469
+ // (val: i32) -> void
470
+ paramTypes: [w.valtype.i32],
471
+ resultTypes: [],
472
+ },
473
+ ];
474
+ this.grammar = grammar;
475
+
476
+ // The rule ID is a 0-based index that's mapped to the name.
477
+ // It is *not* the same as the function index the rule's eval function.
478
+ // Ensure that the default start rule always has id 0.
479
+ this.ruleIdByName = new Map([[grammar.defaultStartRule, 0]]);
480
+ for (const name of Object.keys(grammar.rules)) {
481
+ if (name !== grammar.defaultStartRule) {
482
+ this.ruleIdByName.set(name, this.ruleIdByName.size);
483
+ }
484
+ }
485
+ }
486
+
487
+ lookUpRule(ruleName, grammar = this.grammar) {
488
+ // TODO: Find a cleaner way to handle terminals as parameters.
489
+ // We should support any kind of single-arity parsing expression as
490
+ // a parameter, not just terminals.
491
+ if (ruleName.startsWith('$term$')) {
492
+ return {
493
+ body: new pexprs.Terminal(ruleName.substring(6)),
494
+ formals: [],
495
+ };
496
+ }
497
+ if (ruleName in grammar.rules) {
498
+ return grammar.rules[ruleName];
499
+ }
500
+ if (grammar.superGrammar) {
501
+ return this.lookUpRule(ruleName, grammar.superGrammar);
502
+ }
503
+ throw new Error(
504
+ `Rule '${ruleName}' not found in this grammar or any of its supergrammars`,
505
+ );
506
+ }
507
+
508
+ recordRule(name) {
509
+ // If the rule is not defined in this grammar, but it's defined in a
510
+ // supergrammar, lazily add it to the map.
511
+ if (!this.ruleIdByName.has(name)) {
512
+ if (name in this.grammar.superGrammar.rules) {
513
+ this.ruleIdByName.set(name, this.ruleIdByName.size);
514
+ } else if (name.startsWith('$term$')) {
515
+ this.ruleIdByName.set(name, this.ruleIdByName.size);
516
+ } else {
517
+ throw new Error(`Unknown rule: ${name}`);
518
+ }
519
+ }
520
+ }
521
+
522
+ // Return a funcidx corresponding to the eval function for the given rule.
523
+ ruleEvalFuncIdx(name) {
524
+ const offset = this.importDecls.length + prebuilt.funcsec.entryCount;
525
+ return w.funcidx(checkNotNull(this.ruleIdByName.get(name)) + offset);
526
+ }
527
+
528
+ // Return an object implementing all of the debug imports.
529
+ getDebugImports(log) {
530
+ const ans = {};
531
+ for (const decl of this.importDecls.filter(d => d.module === 'debug')) {
532
+ const {name} = decl;
533
+ ans[name] = arg => {
534
+ // eslint-disable-next-line no-console
535
+ log(name, arg);
536
+ };
537
+ }
538
+ return ans;
539
+ }
540
+
541
+ compile() {
542
+ const typeMap = (this.typeMap = new TypeMap(prebuilt.typesec.entryCount));
543
+ const asm = (this.asm = new Assembler(typeMap));
544
+ asm.addBlocktype([w.valtype.i32], []);
545
+ asm.addBlocktype([w.valtype.i32], [w.valtype.i32]);
546
+ asm.addBlocktype([], [w.valtype.i32]); // Rule eval
547
+ // (global $runtime/ohmRuntime/pos (mut i32) (i32.const 0))
548
+ // (global $runtime/ohmRuntime/sp (mut i32) (i32.const 0))
549
+ // (global $~lib/shared/runtime/Runtime.Stub i32 (i32.const 0))
550
+ // (global $~lib/shared/runtime/Runtime.Minimal i32 (i32.const 1))
551
+ // (global $~lib/shared/runtime/Runtime.Incremental i32 (i32.const 2))
552
+ // (global $~lib/rt/stub/startOffset (mut i32) (i32.const 0))
553
+ // (global $~lib/rt/stub/offset (mut i32) (i32.const 0))
554
+ // (global $~lib/native/ASC_RUNTIME i32 (i32.const 0))
555
+ // (global $runtime/ohmRuntime/bindings (mut i32) (i32.const 0))
556
+ // (global $~lib/memory/__heap_base i32 (i32.const 1179884))
557
+ asm.addGlobal('pos', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
558
+ asm.addGlobal('sp', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
559
+ asm.addGlobal('__Runtime.Stub', w.valtype.i32, w.mut.const, () => asm.i32Const(0));
560
+ asm.addGlobal('__Runtime.Minimal', w.valtype.i32, w.mut.const, () => asm.i32Const(1));
561
+ asm.addGlobal('__Runtime.Incremental', w.valtype.i32, w.mut.const, () => asm.i32Const(2));
562
+ asm.addGlobal('__startOffset', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
563
+ asm.addGlobal('__offset', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
564
+ asm.addGlobal('__ASC_RUNTIME', w.valtype.i32, w.mut.const, () => asm.i32Const(0));
565
+ asm.addGlobal('bindings', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
566
+ asm.addGlobal('__heap_base', w.valtype.i32, w.mut.var, () => asm.i32Const(67240172));
567
+
568
+ // Reserve a fixed number of imports for debug labels.
569
+ const debugBaseFuncIdx = this.importDecls.length + 1;
570
+ if (DEBUG) {
571
+ for (let i = 0; i < 5000; i++) {
572
+ this.importDecls.push({
573
+ module: 'debug',
574
+ name: `debug${i}`,
575
+ paramTypes: [],
576
+ resultTypes: [],
577
+ });
578
+ }
579
+ }
580
+
581
+ const functionDecls = this.functionDecls();
582
+ this.rewriteDebugLabels(functionDecls, debugBaseFuncIdx);
583
+ return this.buildModule(typeMap, functionDecls);
584
+ }
585
+
586
+ compileRule(name, ruleInfo) {
587
+ const {asm} = this;
588
+ const paramTypes = ruleInfo.formals.map(_ => w.valtype.i32);
589
+ asm.addFunction(`$${name}`, paramTypes, [w.valtype.i32], () => {
590
+ asm.addLocal('ret', w.valtype.i32);
591
+ asm.addLocal('tmp', w.valtype.i32);
592
+
593
+ this.emitPExpr(ruleInfo.body);
594
+ asm.localGet('ret');
595
+ });
596
+ return this.asm._functionDecls.at(-1);
597
+ }
598
+
599
+ buildRuleNamesSection(ruleNames) {
600
+ // A custom section that allows the clients to look up rule IDs by name.
601
+ // They're simply encoded as a vec(name), and the client can turn this
602
+ // into a list/array and use the ruleId as the index.
603
+ return w.custom(w.name('ruleNames'), w.vec(ruleNames.map((n, i) => w.name(n))));
604
+ }
605
+
606
+ buildModule(typeMap, functionDecls) {
607
+ const {importDecls} = this;
608
+ assert(this.importDecls.length === prebuilt.destImportCount, 'import count mismatch');
609
+
610
+ const ruleNames = [...this.ruleIdByName.keys()];
611
+
612
+ // Ensure that `ruleNames` is in the correct order.
613
+ ruleNames.forEach((n, i) => assert(i === this.ruleIdByName.get(n)));
614
+
615
+ typeMap.addDecls(importDecls);
616
+ typeMap.addDecls(functionDecls);
617
+
618
+ const globals = [];
619
+ const imports = importDecls.map((f, i) =>
620
+ w.import_(f.module, f.name, w.importdesc.func(typeMap.getIdxForDecl(f))),
621
+ );
622
+ const funcs = functionDecls.map((f, i) => w.typeidx(typeMap.getIdxForDecl(f)));
623
+ const codes = functionDecls.map(f => w.code(w.func(f.locals, f.body)));
624
+
625
+ const exportOffset = importDecls.length + prebuilt.funcsec.entryCount;
626
+ const exports = functionDecls.map((f, i) =>
627
+ w.export_(f.name, w.exportdesc.func(i + exportOffset)),
628
+ );
629
+ exports.push(w.export_('memory', w.exportdesc.mem(0)));
630
+ exports.push(w.export_('match', w.exportdesc.func(prebuiltFuncidx('match'))));
631
+ exports.push(w.export_('getCstRoot', w.exportdesc.func(prebuiltFuncidx('getCstRoot'))));
632
+
633
+ // Process globals.
634
+ for (const [name, {type, mut, initExpr}] of this.asm._globals.entries()) {
635
+ globals.push(w.global(w.globaltype(type, mut), initExpr));
636
+
637
+ // Export all of the globals so they get a name for debugging.
638
+ // TODO: Handle this instead via the name section.
639
+ exports.push(w.export_(name, [0x03, this.asm.globalidx(name)]));
640
+ }
641
+ // The module will have a table containing references to all of the rule eval functions.
642
+ // The table declaration goes in the table section; the data in the element section.
643
+ // Note that the rule ID can be used directly as the table index.
644
+ const numRules = this.ruleIdByName.size;
645
+ const table = w.table(
646
+ w.tabletype(w.elemtype.funcref, w.limits.minmax(numRules, numRules)),
647
+ );
648
+ const tableData = ruleNames.map(name => this.ruleEvalFuncIdx(name));
649
+ assert(numRules === tableData.length, 'Invalid rule count');
650
+
651
+ // Determine the index of the start function.
652
+ const indexOfStart = functionDecls.findIndex(f => f.name === 'start');
653
+ assert(indexOfStart !== -1, 'No start function found');
654
+ const startFuncidx = imports.length + prebuilt.funcsec.entryCount + indexOfStart;
655
+
656
+ // Note: globals are *not* merged; they are assumed to be shared.
657
+ const mod = w.module([
658
+ mergeSections(w.SECTION_ID_TYPE, prebuilt.typesec, typeMap.getTypes()),
659
+ w.importsec(imports),
660
+ mergeSections(w.SECTION_ID_FUNCTION, prebuilt.funcsec, funcs),
661
+ w.tablesec([table]),
662
+ w.memsec([w.mem(w.memtype(w.limits.min(1024 + 24)))]),
663
+ w.globalsec(globals),
664
+ w.exportsec(exports),
665
+ w.startsec(w.start(startFuncidx)),
666
+ w.elemsec([w.elem(w.tableidx(0), [instr.i32.const, w.i32(0), instr.end], tableData)]),
667
+ mergeSections(w.SECTION_ID_CODE, prebuilt.codesec, codes),
668
+ w.customsec(this.buildRuleNamesSection(ruleNames)),
669
+ ]);
670
+ const bytes = Uint8Array.from(mod.flat(Infinity));
671
+
672
+ // (async () => {
673
+ // const {readWasm} = await wabt();
674
+ // const m = readWasm(bytes, {check: true});
675
+ // m.validate();
676
+ // })();
677
+
678
+ // DEBUG
679
+ // import('fs').then(fs => {
680
+ // const filename = `out-${new Date().getTime()}.wasm`;
681
+ // fs.writeFileSync(`/Users/pdubroy/${filename}`, bytes);
682
+ // console.log(` wrote ${filename}`);
683
+ // });
684
+ // END DEBUG
685
+
686
+ return bytes;
687
+ }
688
+
689
+ // A *brilliant* way to add arbitrary labels to the generated code.
690
+ // Goes through the body of all functions in `decls`, and replaces any
691
+ // strings with a call to a dummy function with the same name.
692
+ // Ensures that there are no duplicate dummy function names, but does not
693
+ // guarantee that there are no collisions with other functions.
694
+ // Returns the list of dummy functions that need to be added to the module.
695
+ rewriteDebugLabels(decls, baseFuncIdx) {
696
+ let nextFuncIdx = baseFuncIdx;
697
+ const names = new Set();
698
+ for (let i = 0; i < decls.length; i++) {
699
+ const entry = decls[i];
700
+ entry.body = entry.body.flatMap(x => {
701
+ if (typeof x !== 'string') return x;
702
+
703
+ // If debugging is disabled, just drop the string altogether.
704
+ if (!DEBUG) return [];
705
+
706
+ // Claim one of the reserved debug functions…
707
+ const decl = this.importDecls[nextFuncIdx];
708
+ assert(decl, 'Too few debug functions!');
709
+ assert(decl.module === 'debug');
710
+ decl.name = uniqueName(names, x);
711
+
712
+ let pushArg = [];
713
+ if (x.startsWith('END')) {
714
+ decl.paramTypes = [w.valtype.i32];
715
+ pushArg = [instr.local.get, w.localidx(0)];
716
+ }
717
+
718
+ // …and replace the string with a call to that function.
719
+ return [...pushArg, instr.call, w.funcidx(nextFuncIdx++)].flat(Infinity);
720
+ });
721
+ }
722
+ }
723
+
724
+ functionDecls() {
725
+ // This is a bit messy. By default, we include all the rules in the
726
+ // grammar itself, but only inherited rules if they are referenced.
727
+ // So, `ruleIdxByName` can grow while we're iterating (as we reference
728
+ // inherited rules for the first time).
729
+ const ruleDecls = [];
730
+ for (let i = 0; i < this.ruleIdByName.size; i++) {
731
+ const name = [...this.ruleIdByName.keys()][i];
732
+ ruleDecls.push(this.compileRule(name, this.lookUpRule(name)));
733
+ }
734
+ const {asm} = this;
735
+ asm.addFunction('start', [], [], () => {
736
+ asm.emit(instr.call, w.funcidx(prebuilt.startFuncidx));
737
+ });
738
+ ruleDecls.push(asm._functionDecls.at(-1));
739
+ return ruleDecls;
740
+ }
741
+
742
+ // Contract: emitPExpr always means we're going deeper in the PExpr tree.
743
+ emitPExpr(exp) {
744
+ const {asm} = this;
745
+
746
+ if (exp.constructor === pexprs.Apply) {
747
+ this.emitApply(exp);
748
+ return;
749
+ }
750
+
751
+ const debugLabel = getDebugLabel(exp);
752
+ asm.emit(`BEGIN ${debugLabel}`);
753
+ asm.pushStackFrame();
754
+
755
+ // Wrap the body in a block, which is useful for two reasons:
756
+ // - it allows early returns.
757
+ // - it makes sure that the generated code doesn't have stack effects.
758
+ asm.block(w.blocktype.empty, () => {
759
+ // prettier-ignore
760
+ switch (exp.constructor) {
761
+ case pexprs.Alt: this.emitAlt(exp); break;
762
+ case pexprs.Extend: this.emitExtend(exp); break;
763
+ case pexprs.Lookahead: this.emitLookahead(exp, true); break;
764
+ case pexprs.Not: this.emitLookahead(exp, false); break;
765
+ case pexprs.Seq: this.emitSeq(exp); break;
766
+ case pexprs.Star: this.emitStar(exp); break;
767
+ case pexprs.Opt: this.emitOpt(exp); break;
768
+ case pexprs.Range: this.emitRange(exp); break;
769
+ case pexprs.Param: this.emitParam(exp); break;
770
+ case pexprs.Plus: this.emitPlus(exp); break;
771
+ case pexprs.Terminal: this.emitTerminal(exp); break;
772
+ case pexprs.UnicodeChar: this.emitFail(); break; // TODO: Handle this properly
773
+ default:
774
+ if (exp === pexprs.any) {
775
+ this.emitAny();
776
+ } else if (exp === pexprs.end) {
777
+ this.emitEnd();
778
+ } else {
779
+ throw new Error(`not handled: ${exp.constructor.name}`);
780
+ }
781
+ }
782
+ });
783
+ asm.popStackFrame();
784
+ asm.emit(`END ${debugLabel}`);
785
+ }
786
+
787
+ emitAlt(exp) {
788
+ const {asm} = this;
789
+ asm.block(w.blocktype.empty, () => {
790
+ for (const term of exp.terms) {
791
+ this.emitPExpr(term);
792
+ asm.localGet('ret');
793
+ asm.condBreak(0); // return if succeeded
794
+ asm.restorePos();
795
+ asm.restoreBindingsLength();
796
+ }
797
+ });
798
+ }
799
+
800
+ emitAny() {
801
+ const {asm} = this;
802
+ asm.i32Const(0xff);
803
+ asm.nextCharCode();
804
+ asm.i32Ne();
805
+ asm.maybeReturnTerminalNodeWithSavedPos();
806
+ }
807
+
808
+ emitApply(exp) {
809
+ const {asm} = this;
810
+ this.recordRule(exp.ruleName);
811
+
812
+ const argCount = exp.args.length;
813
+ assert(argCount <= 3, 'Too many arguments to rule application');
814
+
815
+ const pushRuleId = name => {
816
+ this.recordRule(name);
817
+ asm.i32Const(checkNotNull(this.ruleIdByName.get(name), `Unknown rule: ${name}`));
818
+ };
819
+
820
+ pushRuleId(exp.ruleName);
821
+ exp.args.forEach((arg, i) => {
822
+ switch (arg.constructor) {
823
+ case pexprs.Apply:
824
+ pushRuleId(arg.ruleName);
825
+ break;
826
+ case pexprs.Param:
827
+ asm.localGet(`__arg${arg.index}`);
828
+ break;
829
+ case pexprs.Terminal: {
830
+ const ruleName = `$term$${arg.obj}`;
831
+ this.recordRule(ruleName);
832
+ pushRuleId(ruleName);
833
+ break;
834
+ }
835
+ default:
836
+ throw new Error(`not supported: ${arg.constructor.name}`);
837
+ }
838
+ });
839
+ // TODO: Handle this at grammar parse time, not here.
840
+ if (exp.ruleName.includes('_')) {
841
+ asm.callPrebuiltFunc(`evalApplyNoMemo${argCount}`);
842
+ } else {
843
+ asm.callPrebuiltFunc(`evalApply${argCount}`);
844
+ }
845
+ asm.localSet('ret');
846
+ }
847
+
848
+ emitEnd() {
849
+ const {asm} = this;
850
+ asm.i32Const(0xff);
851
+ // Careful! We shouldn't move the pos here. Or does it matter?
852
+ asm.currCharCode();
853
+ asm.emit(instr.i32.eq);
854
+ asm.maybeReturnTerminalNodeWithSavedPos();
855
+ }
856
+
857
+ emitExtend(exp) {
858
+ this.emitAlt({
859
+ terms: [exp.body, exp.superGrammar.rules[exp.name].body],
860
+ });
861
+ }
862
+
863
+ emitFail() {
864
+ const {asm} = this;
865
+ asm.i32Const(0);
866
+ asm.localSet('ret');
867
+ }
868
+
869
+ emitLookahead({expr}, shouldMatch = true) {
870
+ const {asm} = this;
871
+
872
+ // TODO: Should positive lookahead record a CST?
873
+ this.emitPExpr(expr);
874
+ if (!shouldMatch) {
875
+ asm.localGet('ret');
876
+ asm.emit(instr.i32.eqz);
877
+ asm.localSet('ret');
878
+ }
879
+ asm.restoreBindingsLength();
880
+ asm.restorePos();
881
+ }
882
+
883
+ emitOpt({expr}) {
884
+ const {asm} = this;
885
+ this.emitPExpr(expr);
886
+ asm.localGet('ret');
887
+ asm.ifFalse(w.blocktype.empty, () => {
888
+ asm.restorePos();
889
+ asm.restoreBindingsLength();
890
+ });
891
+ asm.newIterNodeWithSavedPosAndBindings();
892
+ asm.localSet('ret');
893
+ }
894
+
895
+ emitParam({index}) {
896
+ const {asm} = this;
897
+ asm.localGet(`__arg${index}`);
898
+ asm.callPrebuiltFunc('evalApply0');
899
+ asm.localSet('ret');
900
+ }
901
+
902
+ emitPlus(plusExp) {
903
+ const {asm} = this;
904
+ this.emitPExpr(plusExp.expr);
905
+ asm.localGet('ret');
906
+ asm.if(w.blocktype.empty, () => {
907
+ this.emitStar(plusExp);
908
+ });
909
+ }
910
+
911
+ emitRange({from, to}) {
912
+ assert(from.length === 1 && to.length === 1);
913
+
914
+ const lo = from.charCodeAt(0);
915
+ const hi = to.charCodeAt(0);
916
+
917
+ // TODO: Do we disallow 0xff in the range?
918
+ const {asm} = this;
919
+ asm.nextCharCode();
920
+
921
+ // if (c > hi) return 0;
922
+ asm.dup();
923
+ asm.i32Const(hi);
924
+ asm.emit(instr.i32.gt_u);
925
+ asm.if(w.blocktype.empty, () => {
926
+ asm.setRet(0);
927
+ asm.break(1);
928
+ });
929
+
930
+ // if (c >= lo)
931
+ asm.i32Const(lo);
932
+ asm.emit(instr.i32.ge_u);
933
+ asm.maybeReturnTerminalNodeWithSavedPos();
934
+ }
935
+
936
+ emitSeq(exp) {
937
+ const {asm} = this;
938
+
939
+ // An empty sequence always succeeds.
940
+ if (exp.factors.length === 0) {
941
+ asm.setRet(1);
942
+ return;
943
+ }
944
+
945
+ for (const factor of exp.factors) {
946
+ this.emitPExpr(factor);
947
+ asm.localGet('ret');
948
+ asm.emit(instr.i32.eqz);
949
+ asm.condBreak(0);
950
+ }
951
+ }
952
+
953
+ emitStar({expr}, {reuseStackFrame} = {}) {
954
+ const {asm} = this;
955
+
956
+ // We push another stack frame because we need to save and restore
957
+ // the position just before the last (failed) expression.
958
+ asm.pushStackFrame();
959
+ asm.block(w.blocktype.empty, () => {
960
+ asm.loop(w.blocktype.empty, () => {
961
+ asm.savePos();
962
+ asm.saveNumBindings();
963
+ this.emitPExpr(expr);
964
+ asm.localGet('ret');
965
+ asm.emit(instr.i32.eqz);
966
+ asm.condBreak(1);
967
+ asm.continue(0);
968
+ });
969
+ });
970
+ asm.restorePos();
971
+ asm.restoreBindingsLength();
972
+ asm.popStackFrame();
973
+
974
+ asm.newIterNodeWithSavedPosAndBindings();
975
+ asm.localSet('ret');
976
+ }
977
+
978
+ emitTerminal(exp) {
979
+ // TODO:
980
+ // - proper UTF-8!
981
+ // - handle longer terminals with a loop
982
+
983
+ const {asm} = this;
984
+ asm.emit('Terminal');
985
+ for (const c of [...exp.obj]) {
986
+ // Compare next char
987
+ asm.i32Const(c.charCodeAt(0));
988
+ asm.currCharCode();
989
+ asm.emit(instr.i32.ne);
990
+ asm.if(w.blocktype.empty, () => {
991
+ asm.setRet(0);
992
+ asm.break(1);
993
+ });
994
+ asm.incPos();
995
+ }
996
+ asm.newTerminalNodeWithSavedPos();
997
+ asm.localSet('ret');
998
+ }
999
+ }
1000
+ // Memory layout:
1001
+ // - First page is for the PExpr stack (origPos, etc.), growing downards.
1002
+ // - 2nd page is for input buffer (max 64k for now).
1003
+ // - Pages 3-18 (incl.) for memo table (4 entries per char, 4 bytes each).
1004
+ // - Remainder (>18) is for CST (growing upwards).
1005
+ Compiler.STACK_START_OFFSET = WASM_PAGE_SIZE; // Starting offset of the stack.
1006
+ Compiler.INPUT_BUFFER_OFFSET = WASM_PAGE_SIZE; // Offset of the input buffer in memory.
1007
+
1008
+ // For now, 1k *pages* for the memo table.
1009
+ // That's 1/64 page per char:
1010
+ // - 4 bytes per entry
1011
+ // - 256 entries per column
1012
+ // - 1 column per char
1013
+ // - 64k input length.
1014
+ Compiler.MEMO_START_OFFSET = 2 * WASM_PAGE_SIZE; // Starting offset of memo records.
1015
+ Compiler.CST_START_OFFSET = (1024 + 2) * WASM_PAGE_SIZE; // Starting offset of CST records.
1016
+
1017
+ export class WasmMatcher {
1018
+ constructor() {
1019
+ this._instance = undefined;
1020
+ this._env = {
1021
+ abort() {
1022
+ throw new Error('abort');
1023
+ },
1024
+ printI32(val) {
1025
+ // eslint-disable-next-line no-console
1026
+ console.log(val);
1027
+ },
1028
+ fillInputBuffer: this._fillInputBuffer.bind(this),
1029
+ };
1030
+ this._ruleIds = new Map();
1031
+ }
1032
+
1033
+ _extractRuleIds(module) {
1034
+ const sections = WebAssembly.Module.customSections(module, 'ruleNames');
1035
+ if (sections.length === 0) {
1036
+ throw new Error('No ruleNames section found in module');
1037
+ }
1038
+
1039
+ const data = new Uint8Array(sections[0]);
1040
+ const dataView = new DataView(data.buffer);
1041
+ let offset = 0;
1042
+
1043
+ const parseU32 = () => {
1044
+ // Quick 'n dirty ULeb128 parsing, assuming no more than 2 bytes.
1045
+ const b1 = dataView.getUint8(offset++);
1046
+ let value = b1 & 0x7f;
1047
+ if (b1 & 0x80) {
1048
+ const b2 = dataView.getUint8(offset++);
1049
+ assert((b2 & 0x80) === 0, 'Expected max two bytes');
1050
+ value |= (b2 & 0x7f) << 7;
1051
+ }
1052
+ return value;
1053
+ };
1054
+
1055
+ const decoder = new TextDecoder('utf-8');
1056
+ const numEntries = parseU32();
1057
+ for (let i = 0; i < numEntries; i++) {
1058
+ const stringLen = parseU32();
1059
+ const bytes = data.slice(offset, offset + stringLen);
1060
+ offset += stringLen;
1061
+ this._ruleIds.set(decoder.decode(bytes), i);
1062
+ }
1063
+ }
1064
+
1065
+ async _instantiate(source, debugImports = {}) {
1066
+ const {module, instance} = await WebAssembly.instantiate(source, {
1067
+ env: this._env,
1068
+ debug: debugImports,
1069
+ });
1070
+ this._instance = instance;
1071
+ this._extractRuleIds(module);
1072
+ return this;
1073
+ }
1074
+
1075
+ static async fromBytes(source) {
1076
+ return new WasmMatcher()._instantiate(source);
1077
+ }
1078
+
1079
+ static async fromGrammar(grammar) {
1080
+ const compiler = new Compiler(grammar);
1081
+ const bytes = compiler.compile();
1082
+
1083
+ const m = new WasmMatcher();
1084
+
1085
+ // let depth = 0;
1086
+ const debugImports = compiler.getDebugImports((label, ret) => {
1087
+ // const result = ret === 0 ? 'FAIL' : 'SUCCESS';
1088
+ // const indented = s => new Array(depth).join(' ') + s;
1089
+ // const pos = m._instance.exports.pos.value;
1090
+ // if (label.startsWith('BEGIN')) depth += 1;
1091
+ // const tail = label.startsWith('END') ? ` -> ${result}` : '';
1092
+ // console.log(`pos: ${pos} ${indented(label)}${tail}`);
1093
+ // if (label.startsWith('END')) depth -= 1;
1094
+ });
1095
+ return m._instantiate(bytes, debugImports);
1096
+ }
1097
+
1098
+ getInput() {
1099
+ return this._input;
1100
+ }
1101
+
1102
+ setInput(str) {
1103
+ if (this._input !== str) {
1104
+ // this.replaceInputRange(0, this._input.length, str);
1105
+ this._input = str;
1106
+ }
1107
+ return this;
1108
+ }
1109
+
1110
+ replaceInputRange(startIdx, endIdx, str) {
1111
+ throw new Error('Not implemented');
1112
+ }
1113
+
1114
+ match() {
1115
+ return this._instance.exports.match(0);
1116
+ }
1117
+
1118
+ getCstRoot() {
1119
+ const {buffer} = this._instance.exports.memory;
1120
+ const addr = this._instance.exports.getCstRoot();
1121
+ const ruleNames = [...this._ruleIds.keys()];
1122
+ return new CstNode(ruleNames, new DataView(buffer), addr);
1123
+ }
1124
+
1125
+ _fillInputBuffer(offset, maxLen) {
1126
+ const encoder = new TextEncoder();
1127
+ const {memory} = this._instance.exports;
1128
+ const buf = new Uint8Array(memory.buffer, Compiler.INPUT_BUFFER_OFFSET + offset);
1129
+ const {read, written} = encoder.encodeInto(this._input.substring(this._pos), buf);
1130
+ assert(written < 64 * 1024, 'Input too long');
1131
+ this._pos += read;
1132
+ buf[written] = 0xff; // Mark end of input with an invalid UTF-8 character.
1133
+ return written;
1134
+ }
1135
+
1136
+ memoTableViewForTesting() {
1137
+ const {buffer} = this._instance.exports.memory;
1138
+ return new DataView(buffer, Compiler.MEMO_START_OFFSET);
1139
+ }
1140
+ }
1141
+
1142
+ class CstNode {
1143
+ constructor(ruleNames, dataView, offset) {
1144
+ this._ruleNames = ruleNames;
1145
+ this._view = dataView;
1146
+ this._base = offset;
1147
+ }
1148
+
1149
+ isNonterminal() {
1150
+ return this.type >= 0;
1151
+ }
1152
+
1153
+ isTerminal() {
1154
+ return this._type === -1;
1155
+ }
1156
+
1157
+ isIter() {
1158
+ return this._type === -2;
1159
+ }
1160
+
1161
+ get ruleName() {
1162
+ const id = this._view.getInt32(this._base + 8, true);
1163
+ return this._ruleNames[id];
1164
+ }
1165
+
1166
+ get count() {
1167
+ return this._view.getUint32(this._base, true);
1168
+ }
1169
+
1170
+ get matchLength() {
1171
+ return this._view.getUint32(this._base + 4, true);
1172
+ }
1173
+
1174
+ get _type() {
1175
+ const t = this._view.getInt32(this._base + 8, true);
1176
+ return t < 0 ? t : 0;
1177
+ }
1178
+
1179
+ get children() {
1180
+ const children = [];
1181
+ for (let i = 0; i < this.count; i++) {
1182
+ const slotOffset = this._base + 12 + i * 4;
1183
+ children.push(
1184
+ new CstNode(this._ruleNames, this._view, this._view.getUint32(slotOffset, true)),
1185
+ );
1186
+ }
1187
+ return children;
1188
+ }
1189
+ }
1190
+
1191
+ export const ConstantsForTesting = {
1192
+ CST_NODE_SIZE_BYTES: checkNotNull(Assembler.CST_NODE_HEADER_SIZE_BYTES),
1193
+ CST_START_OFFSET: checkNotNull(Compiler.CST_START_OFFSET),
1194
+ MEMO_COL_SIZE_BYTES: checkNotNull(Assembler.MEMO_COL_SIZE_BYTES),
1195
+ };