@ohm-js/wasm 0.4.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1559 @@
1
+ /* global process */
2
+ import * as w from '@wasmgroundup/emit';
3
+ import * as ohm from 'ohm-js';
4
+ // import wabt from 'wabt';
5
+ import * as ir from "./ir.js";
6
+ import * as prebuilt from "../build/ohmRuntime.wasm_sections.js";
7
+ const WASM_PAGE_SIZE = 64 * 1024;
8
+ const DEBUG = process.env.OHM_DEBUG === '1';
9
+ const FAST_SAVE_BINDINGS = true;
10
+ const FAST_RESTORE_BINDINGS = true;
11
+ const IMPLICIT_SPACE_SKIPPING = true;
12
+ // When specializing rules, should we emit a generalized version that
13
+ // handles the specific cases? If false, code size will be larger.
14
+ // This doesn't seem to make a big performance difference either way.
15
+ const EMIT_GENERALIZED_RULES = false;
16
+ // A sentinel value representing "end of input".
17
+ // This could be anything > 0xffff, really.
18
+ const CHAR_CODE_END = 0xffffffff;
19
+ const { instr } = w;
20
+ const { pexprs } = ohm;
21
+ // Constants for Wasm 3.0 (stuff not in @wasmgroundup/emit).
22
+ const wasm3 = {
23
+ valtype: { externref: 0x6f },
24
+ instr: { ref: { null: 0xd0 } },
25
+ };
26
+ const defaultImports = [
27
+ // func codePointAt(string: externref, index: i32) -> i32
28
+ {
29
+ module: 'wasm:js-string',
30
+ name: 'charCodeAt',
31
+ paramTypes: [wasm3.valtype.externref, w.valtype.i32],
32
+ resultTypes: [w.valtype.i32],
33
+ },
34
+ ];
35
+ const isNonNull = x => x != null;
36
+ function assert(cond, msg) {
37
+ if (!cond) {
38
+ throw new Error(msg ?? 'assertion failed');
39
+ }
40
+ }
41
+ function checkNotNull(x, msg = 'unexpected null value') {
42
+ assert(x != null, msg);
43
+ return x;
44
+ }
45
+ function checkNoUndefined(arr) {
46
+ assert(arr.indexOf(undefined) === -1, `found undefined @ ${arr.indexOf(undefined)}`);
47
+ return arr;
48
+ }
49
+ function setdefault(map, key, makeDefaultVal) {
50
+ if (!map.has(key))
51
+ map.set(key, makeDefaultVal());
52
+ return map.get(key);
53
+ }
54
+ const getNotNull = (map, k) => checkNotNull(map.get(k), `not found: '${k}'`);
55
+ // Return true if the given expression is an Apply or a Param.
56
+ // (When compiling to Wasm, we implement both with a call.)
57
+ const isApplyLike = exp => exp instanceof pexprs.Apply || exp instanceof pexprs.Param;
58
+ function uniqueName(names, str) {
59
+ let name = str;
60
+ outer: if (names.has(str)) {
61
+ for (let i = 0; i < 1000; i++) {
62
+ name = `${str}_${i}`;
63
+ if (!names.has(name))
64
+ break outer;
65
+ }
66
+ throw new Error(`Unique name generation failed for ${str}`);
67
+ }
68
+ names.add(name);
69
+ return name;
70
+ }
71
+ function isSyntacticRule(ruleName) {
72
+ assert(ruleName[0] !== '$', ruleName);
73
+ return ruleName[0] === ruleName[0].toUpperCase();
74
+ }
75
+ const asciiChars = Array.from({ length: 128 }).map((_, i) => String.fromCharCode(i));
76
+ class IndexedSet {
77
+ constructor() {
78
+ this._map = new Map();
79
+ }
80
+ add(item) {
81
+ if (this._map.has(item)) {
82
+ return this._map.get(item);
83
+ }
84
+ const idx = this._map.size;
85
+ this._map.set(checkNotNull(item), idx);
86
+ return idx;
87
+ }
88
+ getIndex(item) {
89
+ return this._map.get(item);
90
+ }
91
+ has(item) {
92
+ return this._map.has(item);
93
+ }
94
+ get size() {
95
+ return this._map.size;
96
+ }
97
+ keys() {
98
+ return [...this._map.keys()];
99
+ }
100
+ values() {
101
+ return [...this._map.keys()];
102
+ }
103
+ [Symbol.iterator]() {
104
+ return this._map[Symbol.iterator]();
105
+ }
106
+ }
107
+ function collectParams(exp, seen = new Set()) {
108
+ switch (exp.constructor) {
109
+ case pexprs.Param:
110
+ if (!seen.has(exp.index)) {
111
+ seen.add(exp.index);
112
+ return [exp];
113
+ }
114
+ return [];
115
+ case pexprs.Alt:
116
+ return exp.terms.flatMap(e => collectParams(e, seen));
117
+ case pexprs.Apply:
118
+ return exp.args.flatMap(e => collectParams(e, seen));
119
+ case pexprs.Lookahead:
120
+ case pexprs.Not:
121
+ case pexprs.Opt:
122
+ case pexprs.Plus:
123
+ case pexprs.Seq:
124
+ return exp.factors.flatMap(e => collectParams(e, seen));
125
+ case pexprs.Star:
126
+ return collectParams(exp.expr, seen);
127
+ case pexprs.Range:
128
+ case pexprs.Terminal:
129
+ case pexprs.UnicodeChar:
130
+ return [];
131
+ default:
132
+ throw new Error(`not handled: ${exp.constructor.name}`);
133
+ }
134
+ }
135
+ const prebuiltFuncidx = nm => checkNotNull(prebuilt.funcidxByName[nm]);
136
+ // Produce a section combining `els` with the corresponding prebuilt section.
137
+ // This only does a naive merge; no type or function indices are rewritten.
138
+ function mergeSections(sectionId, prebuiltSec, els) {
139
+ const count = prebuiltSec.entryCount + els.length;
140
+ return w.section(sectionId, [w.u32(count), prebuiltSec.contents, els]);
141
+ }
142
+ function functypeToString(paramTypes, resultTypes) {
143
+ const toStr = t => {
144
+ return t === wasm3.valtype.externref
145
+ ? 'externref'
146
+ : checkNotNull(['f64', 'f32', 'i64', 'i32'][t - w.valtype.f64]);
147
+ };
148
+ const params = paramTypes.map(toStr).join(',');
149
+ const results = resultTypes.map(toStr).join(',');
150
+ return '[' + params + '][' + results + ']';
151
+ }
152
+ class TypeMap {
153
+ constructor(startIdx = 0) {
154
+ this._map = new Map();
155
+ this._startIdx = startIdx;
156
+ }
157
+ add(paramTypes, resultTypes) {
158
+ const key = functypeToString(paramTypes, resultTypes);
159
+ if (this._map.has(key)) {
160
+ return this._map.get(key)[0];
161
+ }
162
+ const idx = this._startIdx + this._map.size;
163
+ this._map.set(key, [idx, w.functype(paramTypes, resultTypes)]);
164
+ return idx;
165
+ }
166
+ addDecls(decls) {
167
+ for (const { paramTypes, resultTypes } of decls) {
168
+ this.add(paramTypes, resultTypes);
169
+ }
170
+ }
171
+ getIdx(paramTypes, resultTypes) {
172
+ const key = functypeToString(paramTypes, resultTypes);
173
+ return checkNotNull(this._map.get(key))[0];
174
+ }
175
+ getIdxForDecl(decl) {
176
+ return this.getIdx(decl.paramTypes, decl.resultTypes);
177
+ }
178
+ getTypes() {
179
+ return [...this._map.values()].map(([_, t]) => t);
180
+ }
181
+ }
182
+ /*
183
+ Offers a higher-level interface for generating WebAssembly code and
184
+ constructing a module.
185
+ */
186
+ class Assembler {
187
+ constructor(typeMap) {
188
+ this._globals = new Map();
189
+ this._functionDecls = [];
190
+ // Keep track of loops/blocks to make it easier (and safer) to generate
191
+ // breaks to the correct index.
192
+ this._blockStack = [];
193
+ // State for the current function being generated.
194
+ this._code = [];
195
+ this._locals = undefined;
196
+ this._typeMap = typeMap;
197
+ }
198
+ addBlocktype(paramTypes, resultTypes) {
199
+ this._typeMap.add(paramTypes, resultTypes);
200
+ }
201
+ blocktype(paramTypes, resultTypes) {
202
+ const idx = this._typeMap.getIdx(paramTypes, resultTypes);
203
+ assert(idx !== -1, `Unknown blocktype: '${functypeToString(paramTypes, resultTypes)}'`);
204
+ // From the spec: "The type index in a block type is encoded as a
205
+ // positive signed integer, so that its signed LEB128 bit pattern cannot
206
+ // collide with the encoding of value types or the special code 0x40."
207
+ return w.i32(idx);
208
+ }
209
+ doEmit(thunk) {
210
+ const oldCode = this._code;
211
+ this._code = [];
212
+ thunk();
213
+ const body = [...this._code, instr.end];
214
+ this._code = oldCode;
215
+ return body;
216
+ }
217
+ addGlobal(name, type, mut, initThunk) {
218
+ assert(!this._globals.has(name), `Global '${name}' already exists`);
219
+ const idx = this._globals.size;
220
+ const initExpr = this.doEmit(initThunk);
221
+ this._globals.set(name, { idx, type, mut, initExpr });
222
+ return idx;
223
+ }
224
+ addLocal(name, type) {
225
+ assert(!this._locals.has(name), `Local '${name}' already exists`);
226
+ assert(type === w.valtype.i32, `invalid local type: ${type}`);
227
+ const idx = this._locals.size;
228
+ this._locals.set(name, idx);
229
+ return idx;
230
+ }
231
+ addFunction(name, paramTypes, resultTypes, bodyFn) {
232
+ this._locals = new Map();
233
+ paramTypes.forEach((t, i) => {
234
+ this.addLocal(`__arg${i}`, t);
235
+ });
236
+ bodyFn(this);
237
+ this._functionDecls.push({
238
+ name,
239
+ paramTypes,
240
+ resultTypes,
241
+ locals: [w.locals(this._locals.size, w.valtype.i32)], // TODO: Support other types?
242
+ body: [...this._code, instr.end],
243
+ });
244
+ this._code = [];
245
+ this._locals = undefined;
246
+ }
247
+ // Pure codegen helpers (used to generate the function bodies).
248
+ globalidx(name) {
249
+ const { idx } = checkNotNull(this._globals.get(name), `Unknown global: ${name}`);
250
+ return idx;
251
+ }
252
+ localidx(name) {
253
+ return checkNotNull(this._locals.get(name), `Unknown local: ${name}`);
254
+ }
255
+ emit(...bytes) {
256
+ this._code.push(...checkNoUndefined(bytes.flat(Infinity)));
257
+ }
258
+ block(bt, bodyThunk, label = '') {
259
+ this._blockOnly(bt, label);
260
+ bodyThunk();
261
+ this._endBlock();
262
+ }
263
+ // Prefer to use `block`, but for some cases it's more convenient to emit
264
+ // the block and the end separately.
265
+ // Note: `label` (if specified) is not unique (e.g., 'pexprEnd').
266
+ _blockOnly(bt, label) {
267
+ this.emit(instr.block, bt);
268
+ this._blockStack.push(label ? `block:${label}` : 'block');
269
+ }
270
+ // This should always be paired with `blockOnly`.
271
+ _endBlock() {
272
+ const what = this._blockStack.pop().split(':')[0];
273
+ assert(what === 'block', 'Invalid endBlock');
274
+ this.emit(instr.end);
275
+ }
276
+ loop(bt, bodyThunk) {
277
+ this.emit(instr.loop, bt);
278
+ this._blockStack.push('loop');
279
+ bodyThunk();
280
+ this._blockStack.pop();
281
+ this.emit(instr.end);
282
+ }
283
+ if(bt, bodyThunk) {
284
+ this.ifElse(bt, bodyThunk);
285
+ }
286
+ ifElse(bt, thenThunk, elseThunk = undefined) {
287
+ this.emit(instr.if, bt);
288
+ this._blockStack.push('if');
289
+ thenThunk();
290
+ if (elseThunk) {
291
+ this.emit(instr.else);
292
+ elseThunk();
293
+ }
294
+ this._blockStack.pop();
295
+ this.emit(instr.end);
296
+ }
297
+ ifFalse(bt, bodyThunk) {
298
+ this.emit(instr.i32.eqz);
299
+ this.if(bt, bodyThunk);
300
+ }
301
+ br(depth) {
302
+ this.emit(instr.br, w.labelidx(depth));
303
+ }
304
+ i32Add() {
305
+ this.emit(instr.i32.add);
306
+ }
307
+ i32Const(value) {
308
+ this.emit(instr.i32.const, w.i32(value));
309
+ }
310
+ i32Load(offset = 0) {
311
+ this.emit(instr.i32.load, w.memarg(Assembler.ALIGN_4_BYTES, offset));
312
+ }
313
+ i32Load8u(offset = 0) {
314
+ this.emit(instr.i32.load8_u, w.memarg(Assembler.ALIGN_1_BYTE, offset));
315
+ }
316
+ i32Mul() {
317
+ this.emit(instr.i32.mul);
318
+ }
319
+ i32Eq() {
320
+ this.emit(instr.i32.eq);
321
+ }
322
+ i32Ne() {
323
+ this.emit(instr.i32.ne);
324
+ }
325
+ // Store [addr:i32, val:i32] -> []
326
+ i32Store(offset = 0) {
327
+ this.emit(instr.i32.store, w.memarg(Assembler.ALIGN_4_BYTES, offset));
328
+ }
329
+ i32Sub() {
330
+ this.emit(instr.i32.sub);
331
+ }
332
+ globalGet(name) {
333
+ this.emit(instr.global.get, this.globalidx(name));
334
+ }
335
+ globalSet(name) {
336
+ this.emit(instr.global.set, this.globalidx(name));
337
+ }
338
+ localGet(name) {
339
+ this.emit(instr.local.get, this.localidx(name));
340
+ }
341
+ localSet(name) {
342
+ this.emit(instr.local.set, this.localidx(name));
343
+ }
344
+ localTee(name) {
345
+ this.emit(instr.local.tee, this.localidx(name));
346
+ }
347
+ break(depth) {
348
+ const what = this._blockStack.at(-(depth + 1)).split(':')[0];
349
+ assert(what === 'block' || what === 'if', 'Invalid break');
350
+ this.emit(instr.br, w.labelidx(depth));
351
+ }
352
+ // Conditional break -- emits a `br_if` for the given depth.
353
+ condBreak(depth) {
354
+ const what = this._blockStack.at(-(depth + 1)).split(':')[0];
355
+ assert(what === 'block' || what === 'if', 'Invalid condBreak');
356
+ this.emit(instr.br_if, w.labelidx(depth));
357
+ }
358
+ continue(depth) {
359
+ const what = this._blockStack.at(-(depth + 1)).split(':')[0];
360
+ assert(what === 'loop', 'Invalid continue');
361
+ this.emit(instr.br, w.labelidx(depth));
362
+ }
363
+ brTable(labels, defaultLabelidx) {
364
+ this.emit(instr.br_table, w.vec(labels), defaultLabelidx);
365
+ }
366
+ return() {
367
+ this.emit(instr.return);
368
+ }
369
+ // Emit a dense jump table (switch-like) using br_table.
370
+ switch(bt, discrimThunk, numCases, caseCb, defaultThunk) {
371
+ const startStackHeight = this._blockStack.length;
372
+ const labels = [];
373
+ // Emit one block per case…
374
+ for (let i = 0; i < numCases; i++) {
375
+ this._blockOnly(bt);
376
+ labels.push(w.labelidx(i));
377
+ }
378
+ // …and one inner block containing the condition and the br_table.
379
+ this.block(w.blocktype.empty, () => {
380
+ discrimThunk();
381
+ this.brTable(labels, w.labelidx(labels.length));
382
+ });
383
+ for (let i = 0; i < numCases; i++) {
384
+ const depth = labels.length - (i + 1);
385
+ caseCb(i, depth);
386
+ this.break(depth); // Jump to end.
387
+ this._endBlock();
388
+ }
389
+ assert(this._blockStack.length === startStackHeight);
390
+ }
391
+ refNull(valtype) {
392
+ this.emit(wasm3.instr.ref.null, valtype);
393
+ }
394
+ // "Macros" -- codegen helpers specific to Ohm.
395
+ i32Inc() {
396
+ this.i32Const(1);
397
+ this.i32Add();
398
+ }
399
+ i32Dec() {
400
+ this.i32Const(1);
401
+ this.i32Sub();
402
+ }
403
+ dup() {
404
+ this.localTee('tmp');
405
+ this.localGet('tmp');
406
+ }
407
+ currCharCode() {
408
+ this.globalGet('pos');
409
+ this.globalGet('endPos');
410
+ this.emit(instr.i32.lt_u);
411
+ this.ifElse(w.blocktype.i32, () => {
412
+ this.globalGet('input');
413
+ this.globalGet('pos');
414
+ this.emit(instr.call, w.funcidx(prebuilt.importsec.entryCount));
415
+ }, () => {
416
+ this.i32Const(CHAR_CODE_END);
417
+ });
418
+ }
419
+ nextCharCode() {
420
+ this.currCharCode();
421
+ this.incPos();
422
+ }
423
+ setRet(val) {
424
+ this.i32Const(val);
425
+ this.localSet('ret');
426
+ }
427
+ pushStackFrame(saveThunk) {
428
+ this.globalGet('sp');
429
+ this.i32Const(Assembler.STACK_FRAME_SIZE_BYTES);
430
+ this.i32Sub();
431
+ this.globalSet('sp');
432
+ if (saveThunk) {
433
+ saveThunk();
434
+ }
435
+ else {
436
+ this.savePos();
437
+ this.saveNumBindings();
438
+ }
439
+ }
440
+ popStackFrame() {
441
+ this.i32Const(Assembler.STACK_FRAME_SIZE_BYTES);
442
+ this.globalGet('sp');
443
+ this.i32Add();
444
+ this.globalSet('sp');
445
+ }
446
+ // Save the current input position.
447
+ savePos() {
448
+ // stack[sp] = pos
449
+ this.globalGet('sp');
450
+ this.globalGet('pos');
451
+ this.i32Store();
452
+ }
453
+ // Load the saved input position onto the stack.
454
+ getSavedPos() {
455
+ this.globalGet('sp');
456
+ this.i32Load();
457
+ }
458
+ restorePos() {
459
+ this.getSavedPos();
460
+ this.globalSet('pos');
461
+ }
462
+ saveNumBindings() {
463
+ this.globalGet('sp');
464
+ if (FAST_SAVE_BINDINGS) {
465
+ this.globalGet('bindings');
466
+ this.i32Load(12); // Array<i32>.length_
467
+ }
468
+ else {
469
+ this.callPrebuiltFunc('getBindingsLength');
470
+ }
471
+ this.i32Store(4);
472
+ }
473
+ getSavedNumBindings() {
474
+ this.globalGet('sp');
475
+ this.i32Load(4);
476
+ }
477
+ restoreBindingsLength() {
478
+ if (FAST_RESTORE_BINDINGS) {
479
+ // It's safe to directly set the length as long as it's shrinking.
480
+ this.globalGet('bindings');
481
+ this.getSavedNumBindings();
482
+ this.i32Store(12); // Array<i32>.length_
483
+ }
484
+ else {
485
+ this.getSavedNumBindings();
486
+ this.callPrebuiltFunc('setBindingsLength');
487
+ }
488
+ }
489
+ saveFailurePos() {
490
+ this.globalGet('sp');
491
+ this.localGet('failurePos');
492
+ this.i32Store();
493
+ }
494
+ restoreFailurePos() {
495
+ this.globalGet('sp');
496
+ this.i32Load();
497
+ this.localSet('failurePos');
498
+ }
499
+ saveGlobalFailurePos() {
500
+ this.globalGet('sp');
501
+ this.globalGet('rightmostFailurePos');
502
+ this.i32Store(4);
503
+ }
504
+ restoreGlobalFailurePos() {
505
+ this.globalGet('sp');
506
+ this.i32Load(4);
507
+ this.globalSet('rightmostFailurePos');
508
+ }
509
+ updateGlobalFailurePos() {
510
+ // rightmostFailurePos = max(rightmostFailurePos, failurePos)
511
+ this.i32Max(() => this.globalGet('rightmostFailurePos'), () => this.localGet('failurePos'));
512
+ this.globalSet('rightmostFailurePos');
513
+ }
514
+ updateLocalFailurePos(origPosThunk) {
515
+ // failurePos = max(failurePos, origPos)
516
+ this.i32Max(() => this.localGet('failurePos'), origPosThunk);
517
+ this.localSet('failurePos');
518
+ }
519
+ // Increment the current input position by 1.
520
+ // [i32, i32] -> [i32]
521
+ incPos() {
522
+ this.globalGet('pos');
523
+ this.i32Inc();
524
+ this.globalSet('pos');
525
+ }
526
+ callPrebuiltFunc(name) {
527
+ this.emit(instr.call, w.funcidx(prebuiltFuncidx(name)));
528
+ }
529
+ newIterNodeWithSavedPosAndBindings(arity, isOpt = false) {
530
+ this.getSavedPos();
531
+ this.globalGet('pos');
532
+ this.getSavedNumBindings();
533
+ this.i32Const(arity);
534
+ this.i32Const(isOpt ? 1 : 0);
535
+ this.callPrebuiltFunc('newIterationNode');
536
+ }
537
+ newCaseInsensitiveNode(ruleId) {
538
+ this.getSavedPos();
539
+ this.globalGet('pos');
540
+ this.i32Const(ruleId);
541
+ // Ensure we get exactly one binding.
542
+ this.globalGet('bindings');
543
+ this.i32Load(12); // Array<i32>.length_
544
+ this.i32Const(1);
545
+ this.i32Sub();
546
+ this.i32Const(-1); // By def'n, it cannot have contributed to failurePos.
547
+ this.callPrebuiltFunc('newNonterminalNode');
548
+ }
549
+ // [startIdx: i32] -> [ptr: i32]
550
+ newTerminalNode() {
551
+ this.localGet('postSpacesPos');
552
+ this.globalGet('pos');
553
+ this.callPrebuiltFunc('newTerminalNode');
554
+ }
555
+ i32Max(aThunk, bThunk) {
556
+ aThunk();
557
+ bThunk();
558
+ aThunk();
559
+ bThunk();
560
+ this.emit(instr.i32.gt_s, instr.select);
561
+ }
562
+ // Return the depth of the block with the given label.
563
+ depthOf(label) {
564
+ const i = this._blockStack.findLastIndex(what => what === `block:${label}`);
565
+ assert(i !== -1, `Unknown label: ${label}`);
566
+ return this._blockStack.length - i - 1;
567
+ }
568
+ ruleEvalReturn() {
569
+ // Convert the value in `ret` to a single bit in position 0.
570
+ this.localGet('ret');
571
+ this.emit(instr.i32.eqz, instr.i32.eqz);
572
+ // Remaining 32 bits hold the (signed) failurePos.
573
+ this.localGet('failurePos');
574
+ this.i32Const(1);
575
+ this.emit(instr.i32.shl);
576
+ this.emit(instr.i32.or);
577
+ }
578
+ }
579
+ Assembler.ALIGN_1_BYTE = 0;
580
+ Assembler.ALIGN_4_BYTES = 2;
581
+ Assembler.CST_NODE_HEADER_SIZE_BYTES = 8;
582
+ // A "memo column" holds the info for one input position, i.e. one char.
583
+ Assembler.MEMO_COL_SIZE_BYTES = 4 * 256;
584
+ Assembler.STACK_FRAME_SIZE_BYTES = 8;
585
+ export class Compiler {
586
+ constructor(grammar) {
587
+ assert(grammar && 'superGrammar' in grammar, 'Not a valid grammar: ' + grammar);
588
+ // Detect the so-called "dual package hazard". Since we use the identity
589
+ // of the pexpr constructors when compiling the grammar, it gets confusing
590
+ // if there are multiple copies of Ohm.
591
+ if (!(grammar instanceof ohm.ohmGrammar.constructor)) {
592
+ // If we have the source, recover by instantiating the grammar anew.
593
+ // Fail otherwise.
594
+ assert(!!grammar.source, 'Grammar smells fishy. Do you have multiple instances of ohm-js?');
595
+ grammar = ohm.grammar(grammar.source.contents);
596
+ }
597
+ this.grammar = grammar;
598
+ // For any additional imports outside the prebuilt ones.
599
+ this.importDecls = [...defaultImports];
600
+ // The rule ID is a 0-based index that's mapped to the name.
601
+ // It is *not* the same as the function index of the rule's eval function.
602
+ this.ruleIdByName = new IndexedSet();
603
+ // For non-memoized rules, we defer assigning IDs until all memoized
604
+ // rule names have been assigned.
605
+ this._deferredRuleIds = new Set();
606
+ this._maxMemoizedRuleId = -1;
607
+ // Ensure default start rule has id 0; $term, 1; and spaces, 2.
608
+ this._ensureRuleId(grammar.defaultStartRule);
609
+ this._ensureRuleId('$term');
610
+ this._ensureRuleId('$spaces');
611
+ this.rules = undefined;
612
+ this._nextLiftedId = 0;
613
+ // Keeps track of whether we're in a lexical or syntactic context.
614
+ this._lexContextStack = [];
615
+ this._applySpacesImplicit = ir.apply('$spaces');
616
+ }
617
+ importCount() {
618
+ return prebuilt.importsec.entryCount + this.importDecls.length;
619
+ }
620
+ ruleId(name) {
621
+ return checkNotNull(this.ruleIdByName.getIndex(name), `Unknown rule: ${name}`);
622
+ }
623
+ // This should be the only place where we assign rule IDs!
624
+ _ensureRuleId(name, { notMemoized } = {}) {
625
+ const idx = this.ruleIdByName.add(name);
626
+ assert(notMemoized || idx < 256, `too many rules: ${idx}`);
627
+ return idx;
628
+ }
629
+ _deferRuleId(name) {
630
+ this._deferredRuleIds.add(name);
631
+ }
632
+ inLexicalContext() {
633
+ return checkNotNull(this._lexContextStack.at(-1));
634
+ }
635
+ liftPExpr(exp, isSyntactic) {
636
+ assert(!(exp instanceof pexprs.Terminal));
637
+ // Note: the same expression might appear in more than one place, and
638
+ // when lifting, we could in theory avoid creating a duplicate function.
639
+ // But, we have to be careful where Params are involved: `"a" | blah`
640
+ // could mean something different depending on context.
641
+ const name = `$lifted${this._nextLiftedId++}`;
642
+ // Replace "free variables" (Params from the outer scope) with Params
643
+ // for the lifted, to-be-defined rule.
644
+ const freeVars = collectParams(exp);
645
+ let body = exp;
646
+ let formals = [];
647
+ const maxIndex = freeVars.reduce((acc, param) => Math.max(acc, param.index), -1);
648
+ if (freeVars.length > 0) {
649
+ // `substituteParams` usually takes an array of _actual params_,
650
+ // [arg0, arg1, ...]. We use it instead to replace Params from the
651
+ // original scope with ones for the new scope. Since the lifted pexpr
652
+ // might only reference a subset of the params, the array can be holey.
653
+ // E.g., in `doc<a, b, c> = a tail<(b|c)>`, when we lift `b|c`.
654
+ const newParams = new Array(maxIndex + 1);
655
+ freeVars.forEach((p, i) => {
656
+ newParams[p.index] = new pexprs.Param(i);
657
+ });
658
+ body = exp.substituteParams(newParams);
659
+ // We make up some names for the parameters; they don't currently matter.
660
+ formals = newParams.filter(isNonNull).map(p => `__${p.index}`);
661
+ }
662
+ const actuals = freeVars.filter(isNonNull);
663
+ const ruleInfo = { body, formals, isSyntactic, source: exp.source };
664
+ return [name, ruleInfo, actuals];
665
+ }
666
+ // Return a funcidx corresponding to the eval function for the given rule.
667
+ ruleEvalFuncIdx(name) {
668
+ const offset = this.importCount() + prebuilt.funcsec.entryCount;
669
+ return w.funcidx(this.ruleId(name) + offset);
670
+ }
671
+ // Return an object implementing all of the debug imports.
672
+ getDebugImports(log) {
673
+ const ans = {};
674
+ for (const decl of this.importDecls.filter(d => d.module === 'debug')) {
675
+ const { name } = decl;
676
+ ans[name] = arg => {
677
+ log(name, arg);
678
+ };
679
+ }
680
+ return ans;
681
+ }
682
+ normalize() {
683
+ assert(!this.rules, 'already normalized');
684
+ this.simplifyApplications();
685
+ this.specializeApplications();
686
+ }
687
+ compile() {
688
+ this.normalize();
689
+ const typeMap = (this.typeMap = new TypeMap(prebuilt.typesec.entryCount));
690
+ const asm = (this.asm = new Assembler(typeMap));
691
+ asm.addBlocktype([w.valtype.i32], []);
692
+ asm.addBlocktype([w.valtype.i32], [w.valtype.i32]);
693
+ asm.addBlocktype([], [w.valtype.i32]); // Rule eval
694
+ // (global $runtime/ohmRuntime/pos (mut i32) (i32.const 0))
695
+ // (global $runtime/ohmRuntime/endPos (mut i32) (i32.const 0))
696
+ // (global $runtime/ohmRuntime/input (mut externref) (ref.null noextern))
697
+ // (global $runtime/ohmRuntime/memoBase (mut i32) (i32.const 0))
698
+ // (global $runtime/ohmRuntime/rightmostFailurePos (mut i32) (i32.const 0))
699
+ // (global $runtime/ohmRuntime/sp (mut i32) (i32.const 0))
700
+ // (global $~lib/shared/runtime/Runtime.Stub i32 (i32.const 0))
701
+ // (global $~lib/shared/runtime/Runtime.Minimal i32 (i32.const 1))
702
+ // (global $~lib/shared/runtime/Runtime.Incremental i32 (i32.const 2))
703
+ // (global $~lib/rt/stub/startOffset (mut i32) (i32.const 0))
704
+ // (global $~lib/rt/stub/offset (mut i32) (i32.const 0))
705
+ // (global $~lib/native/ASC_RUNTIME i32 (i32.const 0))
706
+ // (global $runtime/ohmRuntime/bindings (mut i32) (i32.const 0))
707
+ // (global $~lib/memory/__heap_base i32 (i32.const 65900))
708
+ asm.addGlobal('pos', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
709
+ asm.addGlobal('endPos', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
710
+ asm.addGlobal('input', wasm3.valtype.externref, w.mut.var, () => asm.refNull(wasm3.valtype.externref));
711
+ asm.addGlobal('memoBase', w.valtype.i32, w.mut.var, () => asm.i32Const(2 * WASM_PAGE_SIZE));
712
+ asm.addGlobal('rightmostFailurePos', w.valtype.i32, w.mut.var, () => asm.i32Const(-1));
713
+ asm.addGlobal('sp', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
714
+ asm.addGlobal('__Runtime.Stub', w.valtype.i32, w.mut.const, () => asm.i32Const(0));
715
+ asm.addGlobal('__Runtime.Minimal', w.valtype.i32, w.mut.const, () => asm.i32Const(1));
716
+ asm.addGlobal('__Runtime.Incremental', w.valtype.i32, w.mut.const, () => asm.i32Const(2));
717
+ asm.addGlobal('__startOffset', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
718
+ asm.addGlobal('__offset', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
719
+ asm.addGlobal('__ASC_RUNTIME', w.valtype.i32, w.mut.const, () => asm.i32Const(0));
720
+ asm.addGlobal('bindings', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
721
+ asm.addGlobal('__heap_base', w.valtype.i32, w.mut.var, () => asm.i32Const(65900));
722
+ // Reserve a fixed number of imports for debug labels.
723
+ if (DEBUG) {
724
+ for (let i = 0; i < 5000; i++) {
725
+ this.importDecls.push({
726
+ module: 'debug',
727
+ name: `debug${i}`,
728
+ paramTypes: [],
729
+ resultTypes: [],
730
+ });
731
+ }
732
+ }
733
+ const functionDecls = this.functionDecls();
734
+ this.rewriteDebugLabels(functionDecls);
735
+ return this.buildModule(typeMap, functionDecls);
736
+ }
737
+ simplifyApplications() {
738
+ const { grammar } = this;
739
+ const lookUpRule = name => ({
740
+ ...checkNotNull(grammar.rules[name]),
741
+ isSyntactic: isSyntacticRule(name),
742
+ });
743
+ // Begin with all the rules directly defined in the grammar.
744
+ const ownRuleNames = Object.keys(grammar.rules).filter(name => Object.hasOwn(grammar.rules, name));
745
+ const rules = ownRuleNames.map(name => [name, lookUpRule(name)]);
746
+ // Ensure the certain rules are always included. (The default start rule
747
+ // might be inherited from the supergrammar, so not there yet.)
748
+ for (const name of ['spaces', grammar.defaultStartRule]) {
749
+ rules.push([name, lookUpRule(name)]);
750
+ }
751
+ const liftedTerminals = new IndexedSet();
752
+ const liftTerminal = ({ obj }) => {
753
+ const id = liftedTerminals.add(obj);
754
+ assert(id >= 0 && id < 0xffff, 'too many terminals!');
755
+ return ir.liftedTerminal(id);
756
+ };
757
+ // If `exp` is not an Apply or Param, lift it into its own rule and return
758
+ // a new application of that rule.
759
+ const simplifyArg = (exp, isSyntactic) => {
760
+ if (isApplyLike(exp)) {
761
+ return simplify(exp, isSyntactic);
762
+ }
763
+ if (exp instanceof pexprs.Terminal) {
764
+ return liftTerminal(exp);
765
+ }
766
+ const [name, info, env] = this.liftPExpr(exp, isSyntactic);
767
+ const args = env.map(p => {
768
+ assert(p instanceof pexprs.Param, 'Expected Param');
769
+ return ir.param(p.index);
770
+ });
771
+ rules.push([name, info]);
772
+ return ir.apply(name, args);
773
+ };
774
+ const simplify = (exp, isSyntactic) => {
775
+ if (exp instanceof pexprs.Alt) {
776
+ return ir.alt(exp.terms.map(e => simplify(e, isSyntactic)));
777
+ }
778
+ if (exp === pexprs.any)
779
+ return ir.any();
780
+ if (exp === pexprs.end)
781
+ return ir.end();
782
+ switch (exp.constructor) {
783
+ case pexprs.Apply: {
784
+ const ruleInfo = lookUpRule(exp.ruleName);
785
+ // Replace an application of the built-in caseInsensitive rule with
786
+ // an inlined case-insensitive terminal.
787
+ if (ruleInfo.body instanceof pexprs.CaseInsensitiveTerminal) {
788
+ assert(exp.args.length === 1 && exp.args[0] instanceof pexprs.Terminal);
789
+ return ir.caseInsensitive(exp.args[0].obj);
790
+ }
791
+ rules.push([exp.ruleName, ruleInfo]);
792
+ return ir.apply(exp.ruleName, exp.args.map(arg => simplifyArg(arg, isSyntactic)));
793
+ }
794
+ case pexprs.Lex:
795
+ return ir.lex(simplify(exp.expr, true));
796
+ case pexprs.Lookahead:
797
+ return ir.lookahead(simplify(exp.expr, isSyntactic));
798
+ case pexprs.Not:
799
+ return ir.not(simplify(exp.expr, isSyntactic));
800
+ case pexprs.Opt:
801
+ return ir.opt(simplify(exp.expr, isSyntactic));
802
+ case pexprs.Plus:
803
+ return ir.plus(simplify(exp.expr, isSyntactic));
804
+ case pexprs.Seq:
805
+ return ir.seq(exp.factors.map(e => simplify(e, isSyntactic)));
806
+ case pexprs.Star:
807
+ return ir.star(simplify(exp.expr, isSyntactic));
808
+ case pexprs.Param:
809
+ return ir.param(exp.index);
810
+ case pexprs.Range:
811
+ return ir.range(exp.from, exp.to);
812
+ case pexprs.Terminal:
813
+ return ir.terminal(exp.obj);
814
+ case pexprs.UnicodeChar:
815
+ // Support older versions of ohm-js (<= 17.2.1).
816
+ // TODO: Remove this eventually.
817
+ return ir.unicodeChar(exp.categoryOrProp ?? exp['category']);
818
+ default:
819
+ throw new Error(`not handled: ${exp.constructor.name}`);
820
+ }
821
+ };
822
+ const newRules = new Map();
823
+ // Go over all the rules and simplify them.
824
+ // Note that `rules` can grow during this process; when we hit the end
825
+ // of the array, there's no more work to be done.
826
+ for (let i = 0; i < rules.length; i++) {
827
+ const [name, info] = rules[i];
828
+ if (!newRules.has(name)) {
829
+ newRules.set(name, {
830
+ ...info,
831
+ body: simplify(info.body, info.isSyntactic),
832
+ });
833
+ }
834
+ }
835
+ this.rules = newRules;
836
+ this.liftedTerminals = liftedTerminals;
837
+ }
838
+ compileTerminalRule(name) {
839
+ const { asm } = this;
840
+ this.beginLexContext(true);
841
+ asm.addFunction(`$${name}`, [w.valtype.i32], [w.valtype.i32], () => {
842
+ asm.addLocal('ret', w.valtype.i32);
843
+ asm.addLocal('tmp', w.valtype.i32);
844
+ asm.addLocal('postSpacesPos', w.valtype.i32);
845
+ asm.addLocal('failurePos', w.valtype.i32);
846
+ asm.i32Const(-1);
847
+ asm.localSet('failurePos');
848
+ const values = this.liftedTerminals.values();
849
+ asm.switch(w.blocktype.empty, () => asm.localGet('__arg0'), values.length, i => this.emitTerminal(ir.terminal(values[i])), () => asm.emit(instr.unreachable));
850
+ // Note: unlike a regular rule evaluation, this function just returns
851
+ // the raw result of PExpr evaluation.
852
+ asm.localGet('ret');
853
+ });
854
+ this.endLexContext();
855
+ return this.asm._functionDecls.at(-1);
856
+ }
857
+ beginLexContext(initialVal) {
858
+ assert(this._lexContextStack.length === 0);
859
+ this._lexContextStack.push(initialVal);
860
+ }
861
+ endLexContext() {
862
+ this._lexContextStack.pop();
863
+ assert(this._lexContextStack.length === 0);
864
+ }
865
+ compileRule(name) {
866
+ const { asm } = this;
867
+ const ruleInfo = getNotNull(this.rules, name);
868
+ let paramTypes = [];
869
+ if (ruleInfo.patterns) {
870
+ paramTypes = [w.valtype.i32];
871
+ }
872
+ // const preHook = () => {
873
+ // if (['alnum'].includes(name)) {
874
+ // this.emitSingleCharFastPath('alnum');
875
+ // }
876
+ // };
877
+ const restoreFailurePos = name === this._applySpacesImplicit.ruleName;
878
+ this.beginLexContext(!ruleInfo.isSyntactic);
879
+ asm.addFunction(`$${name}`, paramTypes, [w.valtype.i32], () => {
880
+ asm.addLocal('ret', w.valtype.i32);
881
+ asm.addLocal('tmp', w.valtype.i32);
882
+ asm.addLocal('postSpacesPos', w.valtype.i32);
883
+ asm.addLocal('failurePos', w.valtype.i32);
884
+ asm.globalGet('rightmostFailurePos');
885
+ asm.localSet('failurePos');
886
+ // TODO: Find a simpler way to do this.
887
+ if (restoreFailurePos) {
888
+ asm.addLocal('origFailurePos', w.valtype.i32);
889
+ asm.globalGet('rightmostFailurePos');
890
+ asm.localSet('origFailurePos');
891
+ }
892
+ asm.emit(`BEGIN eval:${name}`);
893
+ this.emitPExpr(ruleInfo.body);
894
+ if (restoreFailurePos) {
895
+ asm.localGet('origFailurePos');
896
+ asm.dup();
897
+ asm.globalSet('rightmostFailurePos');
898
+ asm.localSet('failurePos');
899
+ }
900
+ asm.ruleEvalReturn();
901
+ asm.emit(`END eval:${name}`);
902
+ });
903
+ this.endLexContext();
904
+ return this.asm._functionDecls.at(-1);
905
+ }
906
+ // Beginning with the default start rule, recursively visit all reachable
907
+ // parsing expressions. For all parameterized rules, create a specialized
908
+ // version of that rule for every possible set of actual parameters.
909
+ // At the end, there are no more applications with arguments.
910
+ specializeApplications() {
911
+ const newRules = new Map();
912
+ const { rules } = this;
913
+ const patternsByRule = new Map();
914
+ let hasCaseInsensitiveTerminals = false;
915
+ const specialize = exp => ir.rewrite(exp, {
916
+ Apply: app => {
917
+ const { ruleName, children } = app;
918
+ const ruleInfo = getNotNull(rules, ruleName);
919
+ const specializedName = ir.specializedName(app);
920
+ if (['liquidRawTagImpl', 'liquidTagRule', 'anyExceptStar', 'anyExceptPlus'].includes(ruleName)) {
921
+ this._deferRuleId(specializedName);
922
+ }
923
+ else {
924
+ this._ensureRuleId(specializedName);
925
+ }
926
+ // If not yet seen, recursively visit the body of the specialized
927
+ // rule. Note that this also applies to non-parameterized rules!
928
+ if (!newRules.has(specializedName)) {
929
+ newRules.set(specializedName, {}); // Prevent infinite recursion.
930
+ // Visit the body with the parameter substituted, to ensure we
931
+ // discover all possible applications that can occur at runtime.
932
+ let body = specialize(ir.substituteParams(ruleInfo.body, children));
933
+ // If there are any args, replace the body with an application of
934
+ // the generalized rule.
935
+ if (children.length > 0) {
936
+ // This is the first time we've seen this pattern; record it.
937
+ const rulePatterns = setdefault(patternsByRule, ruleName, () => new Map());
938
+ rulePatterns.set(specializedName, children);
939
+ if (EMIT_GENERALIZED_RULES) {
940
+ // Note that we deliberately *don't* visit this application yet,
941
+ // so it won't be assigned a rule ID.
942
+ const caseIdx = rulePatterns.size - 1;
943
+ body = ir.applyGeneralized(ruleName, caseIdx);
944
+ }
945
+ }
946
+ newRules.set(specializedName, { ...ruleInfo, body, formals: [] });
947
+ }
948
+ // Replace with an application of the specialized rule.
949
+ return ir.apply(specializedName);
950
+ },
951
+ CaseInsensitive: exp => {
952
+ hasCaseInsensitiveTerminals = true;
953
+ return exp;
954
+ },
955
+ });
956
+ specialize(ir.apply(this.grammar.defaultStartRule));
957
+ this._maxMemoizedRuleId = this.ruleIdByName.size;
958
+ this._deferredRuleIds.forEach(name => this._ensureRuleId(name, { notMemoized: true }));
959
+ // Make a special rule for implicit space skipping, with the same body
960
+ // as the real `spaces` rule.
961
+ const spacesInfo = getNotNull(rules, 'spaces');
962
+ newRules.set('$spaces', {
963
+ ...spacesInfo,
964
+ body: specialize(spacesInfo.body),
965
+ });
966
+ // We inline applications of caseInsensitive, but they still produce a
967
+ // nonterminal node as if they weren't inlined. Because of that, we need
968
+ // to assign a rule ID and ensure that the rule eval function exists.
969
+ if (hasCaseInsensitiveTerminals) {
970
+ assert(!newRules.has('caseInsensitive'));
971
+ this._ensureRuleId('caseInsensitive', { notMemoized: true });
972
+ newRules.set('caseInsensitive', {
973
+ name: 'caseInsensitive',
974
+ body: ir.seq([ir.not(ir.end()), ir.end()]), // ~end end
975
+ formals: [],
976
+ description: '',
977
+ });
978
+ }
979
+ this.rules = newRules;
980
+ if (EMIT_GENERALIZED_RULES) {
981
+ const insertDispatches = (exp, patterns) => ir.rewrite(exp, {
982
+ Apply: app => (app.children.length === 0 ? app : ir.dispatch(app, patterns)),
983
+ Param: p => ir.dispatch(p, patterns),
984
+ });
985
+ // Save the observed patterns of the parameterized rules.
986
+ // All non-parameterized & specialized rules have been discovered and
987
+ // assigned IDs; any rule IDs assigned here won't be memoized.
988
+ for (const [name, patterns] of patternsByRule.entries()) {
989
+ this._ensureRuleId(name, { notMemoized: true });
990
+ const ruleInfo = getNotNull(rules, name);
991
+ const patternsArr = [...patterns.values()];
992
+ newRules.set(name, {
993
+ ...ruleInfo,
994
+ body: insertDispatches(ruleInfo.body, patternsArr),
995
+ patterns: patternsArr,
996
+ });
997
+ }
998
+ }
999
+ }
1000
+ buildRuleNamesSection(ruleNames) {
1001
+ // A custom section that allows the clients to look up rule IDs by name.
1002
+ // They're simply encoded as a vec(name), and the client can turn this
1003
+ // into a list/array and use the ruleId as the index.
1004
+ return w.custom(w.name('ruleNames'), w.vec(ruleNames.map((n, i) => w.name(n))));
1005
+ }
1006
+ buildModule(typeMap, functionDecls) {
1007
+ const ruleNames = this.ruleIdByName.values();
1008
+ assert(this.importCount() === prebuilt.destImportCount, 'import count mismatch');
1009
+ // Ensure that `ruleNames` is in the correct order.
1010
+ ruleNames.forEach((n, i) => assert(i === this.ruleIdByName.getIndex(n), `out of order: ${n}`));
1011
+ typeMap.addDecls(this.importDecls);
1012
+ typeMap.addDecls(functionDecls);
1013
+ const globals = [];
1014
+ const imports = this.importDecls.map((f, i) => w.import_(f.module, f.name, w.importdesc.func(typeMap.getIdxForDecl(f))));
1015
+ const funcs = functionDecls.map((f, i) => w.typeidx(typeMap.getIdxForDecl(f)));
1016
+ const codes = functionDecls.map(f => w.code(w.func(f.locals, f.body)));
1017
+ const exportOffset = this.importCount() + prebuilt.funcsec.entryCount;
1018
+ const exports = functionDecls.map((f, i) => w.export_(f.name, w.exportdesc.func(i + exportOffset)));
1019
+ exports.push(w.export_('memory', w.exportdesc.mem(0)));
1020
+ exports.push(w.export_('resetHeap', w.exportdesc.func(prebuiltFuncidx('resetHeap'))));
1021
+ exports.push(w.export_('match', w.exportdesc.func(prebuiltFuncidx('match'))));
1022
+ exports.push(w.export_('bindingsAt', w.exportdesc.func(prebuiltFuncidx('bindingsAt'))));
1023
+ exports.push(w.export_('getBindingsLength', w.exportdesc.func(prebuiltFuncidx('getBindingsLength'))));
1024
+ // Process globals.
1025
+ for (const [name, { type, mut, initExpr }] of this.asm._globals.entries()) {
1026
+ globals.push(w.global(w.globaltype(type, mut), initExpr));
1027
+ // Export all of the globals so they get a name for debugging.
1028
+ // TODO: Handle this instead via the name section.
1029
+ exports.push(w.export_(name, [0x03, this.asm.globalidx(name)]));
1030
+ }
1031
+ // The module will have a table containing references to all of the rule eval functions.
1032
+ // The table declaration goes in the table section; the data in the element section.
1033
+ // Note that the rule ID can be used directly as the table index.
1034
+ const numRules = this.ruleIdByName.size;
1035
+ const table = w.table(w.tabletype(w.elemtype.funcref, w.limits.minmax(numRules, numRules)));
1036
+ const tableData = ruleNames.map(name => this.ruleEvalFuncIdx(name));
1037
+ assert(numRules === tableData.length, 'Invalid rule count');
1038
+ // Determine the index of the start function.
1039
+ const indexOfStart = functionDecls.findIndex(f => f.name === 'start');
1040
+ assert(indexOfStart !== -1, 'No start function found');
1041
+ const startFuncidx = this.importCount() + prebuilt.funcsec.entryCount + indexOfStart;
1042
+ // Note: globals are *not* merged; they are assumed to be shared.
1043
+ const mod = w.module([
1044
+ mergeSections(w.SECTION_ID_TYPE, prebuilt.typesec, typeMap.getTypes()),
1045
+ mergeSections(w.SECTION_ID_IMPORT, prebuilt.importsec, imports),
1046
+ mergeSections(w.SECTION_ID_FUNCTION, prebuilt.funcsec, funcs),
1047
+ w.tablesec([table]),
1048
+ w.memsec([w.mem(w.memtype(w.limits.min(1)))]),
1049
+ w.globalsec(globals),
1050
+ w.exportsec(exports),
1051
+ w.startsec(w.start(startFuncidx)),
1052
+ w.elemsec([w.elem(w.tableidx(0), [instr.i32.const, w.i32(0), instr.end], tableData)]),
1053
+ mergeSections(w.SECTION_ID_CODE, prebuilt.codesec, codes),
1054
+ w.customsec(this.buildRuleNamesSection(ruleNames)),
1055
+ w.namesec(w.namedata(w.modulenamesubsec(this.grammar.name))),
1056
+ ]);
1057
+ const bytes = Uint8Array.from(mod.flat(Infinity));
1058
+ // (async () => {
1059
+ // const {readWasm} = await wabt();
1060
+ // const m = readWasm(bytes, {check: true});
1061
+ // m.validate();
1062
+ // })();
1063
+ // DEBUG
1064
+ // import('fs').then(fs => {
1065
+ // const filename = `out-${new Date().getTime()}.wasm`;
1066
+ // fs.writeFileSync(`/Users/pdubroy/${filename}`, bytes);
1067
+ // console.log(` wrote ${filename}`);
1068
+ // });
1069
+ // END DEBUG
1070
+ return bytes;
1071
+ }
1072
+ // A *brilliant* way to add arbitrary labels to the generated code.
1073
+ // Goes through the body of all functions in `decls`, and replaces any
1074
+ // strings with a call to a dummy function with the same name.
1075
+ // Ensures that there are no duplicate dummy function names, but does not
1076
+ // guarantee that there are no collisions with other functions.
1077
+ // Returns the list of dummy functions that need to be added to the module.
1078
+ rewriteDebugLabels(decls) {
1079
+ let nextIdx = defaultImports.length;
1080
+ const intoFuncidx = i => w.funcidx(prebuilt.importsec.entryCount + defaultImports.length + i);
1081
+ const names = new Set();
1082
+ for (let i = 0; i < decls.length; i++) {
1083
+ const entry = decls[i];
1084
+ entry.body = entry.body.flatMap(x => {
1085
+ if (typeof x !== 'string')
1086
+ return x;
1087
+ // If debugging is disabled, just drop the string altogether.
1088
+ if (!DEBUG)
1089
+ return [];
1090
+ // Claim one of the reserved debug functions…
1091
+ const idx = nextIdx++;
1092
+ const decl = checkNotNull(this.importDecls[idx], 'Too few debug functions!');
1093
+ assert(decl.module === 'debug');
1094
+ decl.name = uniqueName(names, x);
1095
+ let pushArg = [];
1096
+ if (x.startsWith('END')) {
1097
+ decl.paramTypes = [w.valtype.i32];
1098
+ // We want to pass 'ret', but to figure out its index, we need to
1099
+ // account for the number of parameters.
1100
+ const retIdx = entry.paramTypes.length;
1101
+ pushArg = [instr.local.get, w.localidx(retIdx)];
1102
+ }
1103
+ // …and replace the string with a call to that function.
1104
+ return [...pushArg, instr.call, intoFuncidx(idx)].flat(Infinity);
1105
+ });
1106
+ }
1107
+ }
1108
+ functionDecls() {
1109
+ const ruleDecls = [];
1110
+ for (const name of this.ruleIdByName.keys()) {
1111
+ if (name === '$term') {
1112
+ ruleDecls.push(this.compileTerminalRule(name));
1113
+ }
1114
+ else {
1115
+ assert(!name.startsWith('$term'));
1116
+ ruleDecls.push(this.compileRule(name));
1117
+ }
1118
+ }
1119
+ const { asm } = this;
1120
+ asm.addFunction('start', [], [], () => {
1121
+ asm.emit(instr.call, w.funcidx(prebuilt.startFuncidx));
1122
+ });
1123
+ ruleDecls.push(asm._functionDecls.at(-1));
1124
+ return ruleDecls;
1125
+ }
1126
+ // Handle an application-like expression (i.e. an actual Apply, or a Param)
1127
+ // in the *body* of the generalized version of a parameterized rule.
1128
+ // Generalized rules can behave like a specific specialized version of the
1129
+ // rule; they take an i32 `caseIdx` argument that selects the behaviour.
1130
+ // Then, for any Param -- or Apply that involves a Param -- we dynamically
1131
+ // dispatch to the correct specialized version of the rule.
1132
+ emitDispatch({ child: exp, patterns }) {
1133
+ const { asm } = this;
1134
+ const handleCase = i => {
1135
+ // Substitute the params to get the concrete expression that
1136
+ // needs to be inserted here.
1137
+ let newExp = ir.substituteParams(exp, patterns[i]);
1138
+ if (newExp.type === 'Apply') {
1139
+ // If the application has arguments, we need to dispatch to the
1140
+ // correct specialized version of the rule.
1141
+ newExp = ir.apply(ir.specializedName(newExp));
1142
+ }
1143
+ this.emitPExpr(newExp);
1144
+ };
1145
+ if (patterns.length === 1) {
1146
+ handleCase(0); // No need for a switch.
1147
+ return;
1148
+ }
1149
+ assert(patterns.length > 1);
1150
+ asm.switch(w.blocktype.empty, () => asm.localGet('__arg0'), patterns.length, handleCase, () => {
1151
+ asm.emit(instr.unreachable);
1152
+ });
1153
+ }
1154
+ // Contract: emitPExpr always means we're going deeper in the PExpr tree.
1155
+ emitPExpr(exp, { preHook, postHook } = {}) {
1156
+ const { asm } = this;
1157
+ const allowFastApply = !preHook && !postHook;
1158
+ // Note that after specializeApplications, there are two classes of rule:
1159
+ // - specialized rules, which contain no Params, and only have
1160
+ // applications without args
1161
+ // - generalized rules, which may contain Params and apps w/ args.
1162
+ assert(!(exp.type === 'Apply' && exp.children.length > 0));
1163
+ if (exp.type === 'Apply' && allowFastApply) {
1164
+ asm.emit(`BEGIN apply:${exp.ruleName}`);
1165
+ this.emitApply(exp);
1166
+ asm.emit(`END apply:${exp.ruleName}`);
1167
+ return;
1168
+ }
1169
+ if (exp.type === 'ApplyGeneralized') {
1170
+ assert(EMIT_GENERALIZED_RULES && allowFastApply);
1171
+ asm.emit(`BEGIN applyGeneralized:${exp.ruleName}`);
1172
+ this.emitApplyGeneralized(exp);
1173
+ asm.emit(`END applyGeneralized:${exp.ruleName}`);
1174
+ return;
1175
+ }
1176
+ const debugLabel = ir.toString(exp);
1177
+ asm.emit(`BEGIN ${debugLabel}`);
1178
+ asm.pushStackFrame();
1179
+ // Wrap the body in a block, which is useful for two reasons:
1180
+ // - it allows early returns.
1181
+ // - it makes sure that the generated code doesn't have stack effects.
1182
+ asm.block(w.blocktype.empty, () => {
1183
+ if (preHook)
1184
+ preHook();
1185
+ switch (exp.type) {
1186
+ case 'Alt':
1187
+ this.emitAlt(exp);
1188
+ break;
1189
+ case 'Any':
1190
+ this.emitAny();
1191
+ break;
1192
+ case 'CaseInsensitive':
1193
+ this.emitCaseInsensitive(exp);
1194
+ break;
1195
+ case 'Dispatch':
1196
+ this.emitDispatch(exp);
1197
+ break;
1198
+ case 'End':
1199
+ this.emitEnd();
1200
+ break;
1201
+ case 'Lex':
1202
+ this.emitLex(exp);
1203
+ break;
1204
+ case 'LiftedTerminal':
1205
+ this.emitApplyTerm(exp);
1206
+ break;
1207
+ case 'Lookahead':
1208
+ this.emitLookahead(exp);
1209
+ break;
1210
+ case 'Not':
1211
+ this.emitNot(exp);
1212
+ break;
1213
+ case 'Seq':
1214
+ this.emitSeq(exp);
1215
+ break;
1216
+ case 'Star':
1217
+ this.emitStar(exp);
1218
+ break;
1219
+ case 'Opt':
1220
+ this.emitOpt(exp);
1221
+ break;
1222
+ case 'Range':
1223
+ this.emitRange(exp);
1224
+ break;
1225
+ case 'Plus':
1226
+ this.emitPlus(exp);
1227
+ break;
1228
+ case 'Terminal':
1229
+ this.emitTerminal(exp);
1230
+ break;
1231
+ case 'UnicodeChar':
1232
+ this.emitUnicodeChar(exp);
1233
+ break;
1234
+ case 'Param':
1235
+ // Fall through (Params should not exist at codegen time).
1236
+ default:
1237
+ throw new Error(`not handled: ${exp.type}`);
1238
+ }
1239
+ }, 'pexprEnd');
1240
+ if (postHook)
1241
+ postHook();
1242
+ asm.popStackFrame();
1243
+ asm.emit(`END ${debugLabel}`);
1244
+ }
1245
+ emitAlt(exp) {
1246
+ const { asm } = this;
1247
+ asm.block(w.blocktype.empty, () => {
1248
+ for (const term of exp.children) {
1249
+ this.emitPExpr(term);
1250
+ asm.localGet('ret');
1251
+ asm.condBreak(asm.depthOf('pexprEnd'));
1252
+ asm.restorePos();
1253
+ asm.restoreBindingsLength();
1254
+ }
1255
+ });
1256
+ }
1257
+ emitAny() {
1258
+ const { asm } = this;
1259
+ this.wrapTerminalLike(() => {
1260
+ asm.i32Const(CHAR_CODE_END);
1261
+ asm.nextCharCode();
1262
+ asm.i32Eq();
1263
+ asm.condBreak(asm.depthOf('failure'));
1264
+ });
1265
+ }
1266
+ emitApplyTerm({ terminalId }) {
1267
+ const { asm } = this;
1268
+ this.maybeEmitSpaceSkipping();
1269
+ // Save the original position.
1270
+ asm.globalGet('pos');
1271
+ asm.localSet('tmp');
1272
+ // Call the terminal rule, and use its result as ours.
1273
+ asm.i32Const(terminalId);
1274
+ asm.emit(instr.call, this.ruleEvalFuncIdx('$term'));
1275
+ asm.localTee('ret');
1276
+ // Update the failure position if necessary.
1277
+ asm.ifFalse(w.blocktype.empty, () => {
1278
+ asm.updateLocalFailurePos(() => asm.localGet('tmp'));
1279
+ });
1280
+ }
1281
+ // Emit an application of the generalized version of a parameterized rule.
1282
+ // Need to know which case we're applying!
1283
+ emitApplyGeneralized(exp) {
1284
+ const { asm } = this;
1285
+ asm.i32Const(this.ruleId(exp.ruleName));
1286
+ asm.i32Const(exp.caseIdx);
1287
+ asm.callPrebuiltFunc('evalApplyGeneralized');
1288
+ asm.localSet('ret');
1289
+ }
1290
+ emitApply(exp) {
1291
+ assert(exp.children.length === 0);
1292
+ // Avoid infinite recursion.
1293
+ if (exp !== this._applySpacesImplicit) {
1294
+ this.maybeEmitSpaceSkipping();
1295
+ }
1296
+ const { asm } = this;
1297
+ const ruleId = this.ruleId(exp.ruleName);
1298
+ asm.i32Const(ruleId);
1299
+ // TODO: Should lifted expressions be memoized?
1300
+ // TODO: Handle this at grammar parse time, not here.
1301
+ if (exp.ruleName.includes('_') || ruleId >= this._maxMemoizedRuleId) {
1302
+ asm.callPrebuiltFunc('evalApplyNoMemo0');
1303
+ }
1304
+ else {
1305
+ asm.callPrebuiltFunc('evalApply0');
1306
+ }
1307
+ // The application may have updated rightmostFailurePos; if so, we may
1308
+ // need to update the local failure position.
1309
+ asm.updateLocalFailurePos(() => asm.globalGet('rightmostFailurePos'));
1310
+ asm.localSet('ret');
1311
+ }
1312
+ emitEnd() {
1313
+ const { asm } = this;
1314
+ this.wrapTerminalLike(() => {
1315
+ asm.i32Const(CHAR_CODE_END);
1316
+ // Careful! We shouldn't move the pos here. Or does it matter?
1317
+ asm.currCharCode();
1318
+ asm.i32Ne();
1319
+ asm.condBreak(asm.depthOf('failure'));
1320
+ });
1321
+ }
1322
+ emitFail() {
1323
+ const { asm } = this;
1324
+ asm.i32Const(0);
1325
+ asm.localSet('ret');
1326
+ }
1327
+ emitLex({ child }) {
1328
+ this._lexContextStack.push(true);
1329
+ this.emitPExpr(child);
1330
+ this._lexContextStack.pop();
1331
+ }
1332
+ emitLookahead({ child }) {
1333
+ const { asm } = this;
1334
+ // TODO: Should positive lookahead record a CST?
1335
+ this.emitPExpr(child);
1336
+ asm.restoreBindingsLength();
1337
+ asm.restorePos();
1338
+ }
1339
+ emitNot({ child }) {
1340
+ const { asm } = this;
1341
+ // Push an inner stack frame with the failure positions.
1342
+ asm.pushStackFrame(() => {
1343
+ asm.saveFailurePos();
1344
+ asm.saveGlobalFailurePos();
1345
+ });
1346
+ this.emitPExpr(child);
1347
+ // Invert the result.
1348
+ asm.localGet('ret');
1349
+ asm.emit(instr.i32.eqz);
1350
+ asm.localSet('ret');
1351
+ // Restore all global and local state.
1352
+ asm.restoreGlobalFailurePos();
1353
+ asm.restoreFailurePos();
1354
+ asm.popStackFrame(); // Pop inner frame.
1355
+ asm.restoreBindingsLength();
1356
+ asm.restorePos();
1357
+ }
1358
+ emitOpt({ child }) {
1359
+ const { asm } = this;
1360
+ this.emitPExpr(child);
1361
+ asm.localGet('ret');
1362
+ asm.ifFalse(w.blocktype.empty, () => {
1363
+ asm.restorePos();
1364
+ asm.restoreBindingsLength();
1365
+ });
1366
+ asm.newIterNodeWithSavedPosAndBindings(ir.outArity(child), true);
1367
+ asm.localSet('ret');
1368
+ }
1369
+ emitPlus(plusExp) {
1370
+ const { asm } = this;
1371
+ this.emitPExpr(plusExp.child);
1372
+ asm.localGet('ret');
1373
+ asm.if(w.blocktype.empty, () => {
1374
+ this.emitStar(plusExp);
1375
+ });
1376
+ }
1377
+ emitRange(exp) {
1378
+ assert(exp.lo.length === 1 && exp.hi.length === 1);
1379
+ const lo = exp.lo.charCodeAt(0);
1380
+ const hi = exp.hi.charCodeAt(0);
1381
+ // TODO: Do we disallow 0xff in the range?
1382
+ const { asm } = this;
1383
+ this.wrapTerminalLike(() => {
1384
+ asm.nextCharCode();
1385
+ // if (c > hi) return 0;
1386
+ asm.dup();
1387
+ asm.i32Const(hi);
1388
+ asm.emit(instr.i32.gt_u);
1389
+ asm.condBreak(asm.depthOf('failure'));
1390
+ // if (c >= lo) return 0;
1391
+ asm.i32Const(lo);
1392
+ asm.emit(instr.i32.lt_u);
1393
+ asm.condBreak(asm.depthOf('failure'));
1394
+ });
1395
+ }
1396
+ emitSeq({ children }) {
1397
+ const { asm } = this;
1398
+ // An empty sequence always succeeds.
1399
+ if (children.length === 0) {
1400
+ asm.setRet(1);
1401
+ return;
1402
+ }
1403
+ for (const c of children) {
1404
+ this.emitPExpr(c);
1405
+ asm.localGet('ret');
1406
+ asm.emit(instr.i32.eqz);
1407
+ asm.condBreak(asm.depthOf('pexprEnd'));
1408
+ }
1409
+ }
1410
+ maybeEmitSpaceSkipping() {
1411
+ if (IMPLICIT_SPACE_SKIPPING && !this.inLexicalContext()) {
1412
+ this.asm.emit('BEGIN space skipping');
1413
+ this.emitApply(this._applySpacesImplicit);
1414
+ this.asm.emit('END space skipping');
1415
+ }
1416
+ }
1417
+ emitStar({ child }, { reuseStackFrame } = {}) {
1418
+ const { asm } = this;
1419
+ // We push another stack frame because we need to save and restore
1420
+ // the position just before the last (failed) expression.
1421
+ asm.pushStackFrame();
1422
+ asm.block(w.blocktype.empty, () => {
1423
+ asm.loop(w.blocktype.empty, () => {
1424
+ asm.savePos();
1425
+ asm.saveNumBindings();
1426
+ this.emitPExpr(child);
1427
+ asm.localGet('ret');
1428
+ asm.emit(instr.i32.eqz);
1429
+ asm.condBreak(asm.depthOf('done'));
1430
+ asm.continue(0);
1431
+ });
1432
+ }, 'done');
1433
+ asm.restorePos();
1434
+ asm.restoreBindingsLength();
1435
+ asm.popStackFrame();
1436
+ asm.newIterNodeWithSavedPosAndBindings(ir.outArity(child));
1437
+ asm.localSet('ret');
1438
+ }
1439
+ wrapTerminalLike(thunk) {
1440
+ const { asm } = this;
1441
+ this.maybeEmitSpaceSkipping();
1442
+ // With space skipping, the startIdx of the terminal node is not
1443
+ // necessarily the same as the saved pos. So we save the new pos before
1444
+ // actually matching.
1445
+ asm.globalGet('pos');
1446
+ asm.localSet('postSpacesPos');
1447
+ asm.block(w.blocktype.empty, () => {
1448
+ asm.block(w.blocktype.empty, () => {
1449
+ thunk();
1450
+ asm.newTerminalNode();
1451
+ asm.localSet('ret');
1452
+ asm.break(asm.depthOf('_done'));
1453
+ }, 'failure');
1454
+ asm.updateLocalFailurePos(() => asm.localGet('postSpacesPos'));
1455
+ asm.setRet(0);
1456
+ }, '_done');
1457
+ }
1458
+ emitCaseInsensitive({ value }) {
1459
+ const { asm } = this;
1460
+ assert([...value].every(c => c <= '\x7f'), 'not supported: case-insensitive Unicode');
1461
+ const str = value.toLowerCase();
1462
+ asm.emit(JSON.stringify(`caseInsensitive:${value}`));
1463
+ this.wrapTerminalLike(() => {
1464
+ // TODO:
1465
+ // - proper UTF-8!
1466
+ // - handle longer terminals with a loop
1467
+ // - SIMD
1468
+ for (const c of [...str]) {
1469
+ asm.i32Const(c.charCodeAt(0));
1470
+ asm.currCharCode();
1471
+ if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) {
1472
+ // Cute trick: the diff between upper and lower case is bit 5.
1473
+ asm.i32Const(0x20);
1474
+ asm.emit(instr.i32.or);
1475
+ }
1476
+ asm.emit(instr.i32.ne);
1477
+ asm.condBreak(asm.depthOf('failure'));
1478
+ asm.incPos();
1479
+ }
1480
+ });
1481
+ // Case-insensitive terminals are inlined, but should appear in the CST
1482
+ // as if they are actual applications.
1483
+ asm.localGet('ret');
1484
+ asm.if(w.blocktype.empty, () => {
1485
+ asm.newCaseInsensitiveNode(this.ruleId('caseInsensitive'));
1486
+ asm.localSet('ret');
1487
+ });
1488
+ }
1489
+ emitTerminal(exp) {
1490
+ const { asm } = this;
1491
+ asm.emit(JSON.stringify(exp.value));
1492
+ this.wrapTerminalLike(() => {
1493
+ // TODO:
1494
+ // - handle longer terminals with a loop?
1495
+ // - SIMD
1496
+ for (const c of exp.value) {
1497
+ asm.i32Const(c.charCodeAt(0));
1498
+ asm.currCharCode();
1499
+ asm.emit(instr.i32.ne);
1500
+ asm.condBreak(asm.depthOf('failure'));
1501
+ asm.incPos();
1502
+ }
1503
+ });
1504
+ }
1505
+ emitUnicodeChar(exp) {
1506
+ const { asm } = this;
1507
+ // TODO: Add support for more categories, by calling out to the host.
1508
+ assert(['Ll', 'Lu', 'Ltmo'].includes(exp.categoryOrProp));
1509
+ const makeLabels = () => asciiChars.map(c => {
1510
+ const isLowercase = 'a' <= c && c <= 'z';
1511
+ const isUppercase = 'A' <= c && c <= 'Z';
1512
+ if ((exp.categoryOrProp === 'Lu' && isUppercase) ||
1513
+ (exp.categoryOrProp === 'Ll' && isLowercase)) {
1514
+ return w.labelidx(asm.depthOf('fastSuccess'));
1515
+ }
1516
+ return w.labelidx(asm.depthOf('failure'));
1517
+ });
1518
+ this.wrapTerminalLike(() => {
1519
+ asm.block(w.blocktype.empty, () => {
1520
+ asm.block(w.blocktype.empty, () => {
1521
+ // Fast path: a jump table for ASCII characters.
1522
+ asm.block(w.blocktype.empty, () => {
1523
+ asm.currCharCode();
1524
+ asm.brTable(makeLabels(), w.labelidx(asm.depthOf('default')));
1525
+ }, 'default');
1526
+ // Fall through: not an ASCII character.
1527
+ // Push the arg: a bitmap indicating the categories.
1528
+ switch (exp.categoryOrProp) {
1529
+ case 'Lu':
1530
+ asm.i32Const(1 << 1);
1531
+ break;
1532
+ case 'Ll':
1533
+ asm.i32Const(1 << 2);
1534
+ break;
1535
+ case 'Ltmo':
1536
+ asm.i32Const((1 << 3) | (1 << 4) | (1 << 5));
1537
+ break;
1538
+ default:
1539
+ assert(false, 'not handled');
1540
+ }
1541
+ asm.callPrebuiltFunc('doMatchUnicodeChar');
1542
+ asm.ifElse(w.blocktype.empty, () => asm.break(asm.depthOf('slowSuccess')), () => asm.break(asm.depthOf('failure')));
1543
+ }, 'fastSuccess');
1544
+ asm.incPos();
1545
+ }, 'slowSuccess');
1546
+ });
1547
+ }
1548
+ }
1549
+ // Memory layout:
1550
+ // - First page is for the PExpr stack (origPos, etc.), growing downwards.
1551
+ // - 2nd page is for input buffer (max 64k for now).
1552
+ // - Pages 3-18 (incl.) for memo table (4 entries per char, 4 bytes each).
1553
+ // - Remainder (>18) is for CST (growing upwards).
1554
+ Compiler.STACK_START_OFFSET = WASM_PAGE_SIZE; // Starting offset of the stack.
1555
+ export const ConstantsForTesting = {
1556
+ CST_NODE_SIZE_BYTES: checkNotNull(Assembler.CST_NODE_HEADER_SIZE_BYTES),
1557
+ MEMO_COL_SIZE_BYTES: checkNotNull(Assembler.MEMO_COL_SIZE_BYTES),
1558
+ };
1559
+ //# sourceMappingURL=Compiler.js.map