@ohm-js/wasm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mise.toml +2 -0
- package/AGENT.md +25 -0
- package/LICENSE +21 -0
- package/Makefile +23 -0
- package/README.md +34 -0
- package/TODO.md +28 -0
- package/package.json +32 -0
- package/runtime/ohmRuntime.ts +252 -0
- package/scripts/bundlewasm.ts +49 -0
- package/scripts/modparse.ts +397 -0
- package/src/cli.js +36 -0
- package/src/index.js +1195 -0
- package/test/data/_book-review.liquid +257 -0
- package/test/data/_es5.js +1057 -0
- package/test/data/_es5.wasm +0 -0
- package/test/data/_html5shiv-3.7.3.js +326 -0
- package/test/data/_liquid-html.ohm +605 -0
- package/test/go/README.md +67 -0
- package/test/go/cst.go +164 -0
- package/test/go/go.mod +5 -0
- package/test/go/go.sum +2 -0
- package/test/go/matcher.go +370 -0
- package/test/go/testmain.go +161 -0
- package/test/test-es5.js +104 -0
- package/test/test-liquid-html.js +27 -0
- package/test/test-wasm.js +764 -0
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
import {decodeULEB128, decodeSLEB128} from '@thi.ng/leb128';
|
|
2
|
+
import * as w from '@wasmgroundup/emit';
|
|
3
|
+
|
|
4
|
+
import assert from 'node:assert';
|
|
5
|
+
|
|
6
|
+
// For sanity checking, assume that the number of locals is never
|
|
7
|
+
// above a certain number. (We can raise this if necessary.)
|
|
8
|
+
const MAX_LOCALS = 64;
|
|
9
|
+
|
|
10
|
+
const WASM_NUMTYPES = [0x7c, 0x7d, 0x7e, 0x7f];
|
|
11
|
+
const WASM_VECTYPE = 0x7b;
|
|
12
|
+
const WASM_REFTYPES = [0x6f, 0x70];
|
|
13
|
+
|
|
14
|
+
function checkNotNull<T>(x): NonNullable<T> {
|
|
15
|
+
assert(x !== null, 'unexpected null value');
|
|
16
|
+
return x;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function skipPreamble(bytes: Uint8Array): void {
|
|
20
|
+
// prettier-ignore
|
|
21
|
+
const expected = new Uint8Array([
|
|
22
|
+
0, ...Array.from("asm").map((c) => checkNotNull(c.codePointAt(0))),
|
|
23
|
+
1, 0, 0, 0,
|
|
24
|
+
])
|
|
25
|
+
for (let i = 0; i < expected.length; i++) {
|
|
26
|
+
assert(
|
|
27
|
+
bytes[i] === expected[i],
|
|
28
|
+
`bad preamble @${i}: expected ${expected[i]}, got ${bytes[i]}`
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function isValtype(t: number): boolean {
|
|
34
|
+
return WASM_NUMTYPES.includes(t) || WASM_VECTYPE === t || WASM_REFTYPES.includes(t);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function checkValtype(t: number): number {
|
|
38
|
+
assert(isValtype(t), `unrecognized valtype: 0x${t.toString(16).padStart(2, '0')}`);
|
|
39
|
+
return t;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function checkU32(b: bigint): number {
|
|
43
|
+
assert(b >= 0n && b < 2n ** 32n, `not a valid U32 value: ${b}`);
|
|
44
|
+
return Number(b);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export type VecContents = {
|
|
48
|
+
entryCount: number;
|
|
49
|
+
contents: Uint8Array;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
export type ExportSection = {
|
|
53
|
+
[name: string]: number;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
export type RawContents = {
|
|
57
|
+
contents: Uint8Array;
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
interface ExtractOptions {
|
|
61
|
+
destImportCount?: number;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Extracts the type, import, function, global, and code sections from a Wasm module.
|
|
65
|
+
export function extractSections(bytes: Uint8Array, opts: ExtractOptions = {}) {
|
|
66
|
+
skipPreamble(bytes);
|
|
67
|
+
|
|
68
|
+
const parseU32 = () => {
|
|
69
|
+
const [val, count] = decodeULEB128(bytes, pos);
|
|
70
|
+
pos += count;
|
|
71
|
+
return checkU32(val);
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
function peekSectionId(): number {
|
|
75
|
+
return bytes[pos];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function skipSection() {
|
|
79
|
+
// @ts-ignore unused variable
|
|
80
|
+
const id = bytes[pos++];
|
|
81
|
+
const size = parseU32();
|
|
82
|
+
pos += size;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Parse the export section, and return a map of {funcName: newIdx}.
|
|
86
|
+
// `adjustment` is the difference between the number of dest imports
|
|
87
|
+
// and source imports.
|
|
88
|
+
function parseExportSection(expectedId: number, adjustment): ExportSection {
|
|
89
|
+
const id = bytes[pos++];
|
|
90
|
+
assert(id === expectedId, `expected section with id ${expectedId}, got ${id}`);
|
|
91
|
+
const size = parseU32();
|
|
92
|
+
const startPos = pos;
|
|
93
|
+
const count = parseU32();
|
|
94
|
+
const exports: ExportSection = {};
|
|
95
|
+
|
|
96
|
+
for (let i = 0; i < count; i++) {
|
|
97
|
+
// Parse name (length-prefixed UTF-8 string)
|
|
98
|
+
const nameLen = parseU32();
|
|
99
|
+
const nameBytes = bytes.slice(pos, pos + nameLen);
|
|
100
|
+
pos += nameLen;
|
|
101
|
+
const name = new TextDecoder().decode(nameBytes);
|
|
102
|
+
|
|
103
|
+
// Parse kind (single byte)
|
|
104
|
+
const kind = bytes[pos++];
|
|
105
|
+
|
|
106
|
+
// Parse index
|
|
107
|
+
const index = parseU32();
|
|
108
|
+
|
|
109
|
+
// Only collect functions (kind 0x00)
|
|
110
|
+
if (kind === 0x00) {
|
|
111
|
+
exports[name] = index + adjustment;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
assert(
|
|
116
|
+
pos - startPos === size,
|
|
117
|
+
`Export section parsing mismatch: expected ${size} bytes, parsed ${pos - startPos}`
|
|
118
|
+
);
|
|
119
|
+
return exports;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Parse the start section, which contains a single function index.
|
|
123
|
+
function parseStartSection(expectedId: number): number {
|
|
124
|
+
const id = bytes[pos++];
|
|
125
|
+
assert(id === expectedId, `expected section with id ${expectedId}, got ${id}`);
|
|
126
|
+
const _size = parseU32();
|
|
127
|
+
return parseU32();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function parseVecSectionOpaque(expectedId: number) {
|
|
131
|
+
const id = bytes[pos++];
|
|
132
|
+
assert(id === expectedId, `expected section with id ${expectedId}, got ${id}`);
|
|
133
|
+
const size = parseU32();
|
|
134
|
+
return parseVecOpaque(size);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Parse a vec without examining its contents.
|
|
138
|
+
// Return the length (# of elements) and the raw contents.
|
|
139
|
+
function parseVecOpaque(size: number) {
|
|
140
|
+
const end = pos + size;
|
|
141
|
+
const entryCount = parseU32();
|
|
142
|
+
const contents = bytes.slice(pos, end);
|
|
143
|
+
pos = end;
|
|
144
|
+
return {entryCount, contents};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
let typesec: VecContents | undefined;
|
|
148
|
+
let importsec: VecContents = {entryCount: 0, contents: new Uint8Array()};
|
|
149
|
+
let funcsec: VecContents | undefined;
|
|
150
|
+
let globalsec: VecContents = {entryCount: 0, contents: new Uint8Array()};
|
|
151
|
+
let exports: ExportSection | undefined;
|
|
152
|
+
let codesec: VecContents | undefined;
|
|
153
|
+
let startFuncidx: number | undefined;
|
|
154
|
+
|
|
155
|
+
let pos = 8;
|
|
156
|
+
let lastId = -1;
|
|
157
|
+
while (pos < bytes.length) {
|
|
158
|
+
const id = peekSectionId();
|
|
159
|
+
// Custom sections (id 0) can appear anywhere. Also, the data count
|
|
160
|
+
// section (id 12) appears just before the code section (id 10).
|
|
161
|
+
// All other sections must appear in the prescribed order.
|
|
162
|
+
assert(
|
|
163
|
+
id === 0 || lastId < id || (lastId === 12 && id === 10),
|
|
164
|
+
`@${pos} expected id > ${lastId}, got ${id}`
|
|
165
|
+
);
|
|
166
|
+
lastId = id;
|
|
167
|
+
if (id === 1) {
|
|
168
|
+
typesec = parseVecSectionOpaque(id);
|
|
169
|
+
} else if (id === 2) {
|
|
170
|
+
importsec = parseVecSectionOpaque(id);
|
|
171
|
+
} else if (id === 3) {
|
|
172
|
+
funcsec = parseVecSectionOpaque(id);
|
|
173
|
+
} else if (id === 6) {
|
|
174
|
+
globalsec = parseVecSectionOpaque(id);
|
|
175
|
+
} else if (id === 7) {
|
|
176
|
+
const srcImportCount = importsec?.entryCount ?? 0;
|
|
177
|
+
const destImportCount = opts.destImportCount ?? 0;
|
|
178
|
+
exports = parseExportSection(id, destImportCount - srcImportCount);
|
|
179
|
+
} else if (id === 8) {
|
|
180
|
+
const srcImportCount = importsec?.entryCount ?? 0;
|
|
181
|
+
const destImportCount = opts.destImportCount ?? 0;
|
|
182
|
+
startFuncidx = parseStartSection(id) + destImportCount - srcImportCount;
|
|
183
|
+
} else if (id === 10) {
|
|
184
|
+
codesec = parseVecSectionOpaque(id);
|
|
185
|
+
// Rewrite the code section to account for the number of imports that
|
|
186
|
+
// will exist in the final module.
|
|
187
|
+
const srcImportCount = importsec?.entryCount ?? 0;
|
|
188
|
+
const destImportCount = opts.destImportCount ?? 0;
|
|
189
|
+
|
|
190
|
+
codesec.contents = rewriteCodesecContents(
|
|
191
|
+
codesec.contents,
|
|
192
|
+
srcImportCount,
|
|
193
|
+
destImportCount
|
|
194
|
+
);
|
|
195
|
+
} else {
|
|
196
|
+
skipSection();
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
return {
|
|
200
|
+
typesec: checkNotNull(typesec),
|
|
201
|
+
importsec,
|
|
202
|
+
funcsec: checkNotNull(funcsec),
|
|
203
|
+
globalsec,
|
|
204
|
+
funcidxByName: exports,
|
|
205
|
+
codesec: checkNotNull(codesec),
|
|
206
|
+
startFuncidx
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function rewriteCodeEntry(
|
|
211
|
+
bytes: Uint8Array,
|
|
212
|
+
srcImportCount: number,
|
|
213
|
+
destImportCount: number
|
|
214
|
+
): number[] {
|
|
215
|
+
const {instr} = w;
|
|
216
|
+
let pos = 0;
|
|
217
|
+
|
|
218
|
+
const parseU32 = () => {
|
|
219
|
+
const [val, count] = decodeULEB128(bytes, pos);
|
|
220
|
+
pos += count;
|
|
221
|
+
return checkU32(val);
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
function skipLocals() {
|
|
225
|
+
const len = parseU32();
|
|
226
|
+
for (let i = 0; i < len; i++) {
|
|
227
|
+
const count = parseU32();
|
|
228
|
+
assert(count < MAX_LOCALS, `too many locals: ${count} @${pos}`);
|
|
229
|
+
checkValtype(bytes[pos++]);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// See https://webassembly.github.io/spec/core/bikeshed/#binary-blocktype
|
|
234
|
+
function skipBlocktype() {
|
|
235
|
+
const b = bytes[pos];
|
|
236
|
+
if (b === 0x40 || isValtype(b)) {
|
|
237
|
+
pos += 1;
|
|
238
|
+
return;
|
|
239
|
+
}
|
|
240
|
+
// From the spec:
|
|
241
|
+
// > Unlike any other occurrence, the type index in a block type is encoded
|
|
242
|
+
// > as a positive signed integer, so that its signed LEB128 bit pattern
|
|
243
|
+
// > cannot collide with the encoding of value types or the special code
|
|
244
|
+
// > 0x40, which correspond to the LEB128 encoding of negative integers.
|
|
245
|
+
const [idx, count] = decodeSLEB128(bytes.slice(pos));
|
|
246
|
+
pos += count;
|
|
247
|
+
assert(idx >= 0, `unexpected typeidx in blocktype: ${idx}`);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
skipLocals();
|
|
251
|
+
|
|
252
|
+
const result: number[] = [];
|
|
253
|
+
let sliceStart = 0;
|
|
254
|
+
let nesting = 1;
|
|
255
|
+
|
|
256
|
+
// Walk through the function's bytecode.
|
|
257
|
+
while (pos < bytes.length) {
|
|
258
|
+
const bc = bytes[pos++];
|
|
259
|
+
|
|
260
|
+
// The cases here are ordered by ascending opcode.
|
|
261
|
+
// See https://pengowray.github.io/wasm-ops/ for an overview.
|
|
262
|
+
switch (bc) {
|
|
263
|
+
case instr.unreachable:
|
|
264
|
+
case instr.nop:
|
|
265
|
+
break;
|
|
266
|
+
case instr.block:
|
|
267
|
+
case instr.loop:
|
|
268
|
+
case instr.if:
|
|
269
|
+
skipBlocktype();
|
|
270
|
+
++nesting;
|
|
271
|
+
break;
|
|
272
|
+
case instr.else:
|
|
273
|
+
break;
|
|
274
|
+
case instr.end:
|
|
275
|
+
assert(--nesting >= 0, `bad nesting @${pos - 1}`);
|
|
276
|
+
break;
|
|
277
|
+
case instr.br:
|
|
278
|
+
case instr.br_if:
|
|
279
|
+
parseU32();
|
|
280
|
+
break;
|
|
281
|
+
case instr.br_table:
|
|
282
|
+
throw new Error(`unhandled bytecode 0x${bc.toString(16)} @${pos - 1}`);
|
|
283
|
+
case instr.return:
|
|
284
|
+
break;
|
|
285
|
+
case instr.call:
|
|
286
|
+
// Rewrite `call` instructions so that the index is valid for the
|
|
287
|
+
// target module.
|
|
288
|
+
result.push(...bytes.slice(sliceStart, pos));
|
|
289
|
+
let idx = parseU32();
|
|
290
|
+
|
|
291
|
+
// Function indices in a Wasm bundle are automatically assigned.
|
|
292
|
+
// First come the imports, then the user-defined functions.
|
|
293
|
+
// Since the dest module has additional imports, we need to rewrite
|
|
294
|
+
// the funcidx if and only if it referred to a user function.
|
|
295
|
+
if (idx >= srcImportCount) {
|
|
296
|
+
idx += destImportCount - srcImportCount;
|
|
297
|
+
}
|
|
298
|
+
result.push(...w.u32(idx));
|
|
299
|
+
sliceStart = pos;
|
|
300
|
+
break;
|
|
301
|
+
case instr.call_indirect:
|
|
302
|
+
parseU32();
|
|
303
|
+
parseU32();
|
|
304
|
+
break;
|
|
305
|
+
case instr.drop:
|
|
306
|
+
case instr.select:
|
|
307
|
+
break;
|
|
308
|
+
case instr.local.get:
|
|
309
|
+
case instr.local.set:
|
|
310
|
+
case instr.local.tee:
|
|
311
|
+
case instr.global.get:
|
|
312
|
+
case instr.global.set:
|
|
313
|
+
parseU32();
|
|
314
|
+
break;
|
|
315
|
+
case instr.i32.const:
|
|
316
|
+
parseU32();
|
|
317
|
+
break;
|
|
318
|
+
case instr.i64.const:
|
|
319
|
+
const origPos = pos;
|
|
320
|
+
const [_, count] = decodeULEB128(bytes.slice(pos));
|
|
321
|
+
assert(count <= 10, `too many bytes (${count}) for i64 @${origPos}`);
|
|
322
|
+
pos += count;
|
|
323
|
+
break;
|
|
324
|
+
case instr.f32.const:
|
|
325
|
+
pos += 4;
|
|
326
|
+
break;
|
|
327
|
+
case instr.f64.const:
|
|
328
|
+
pos += 8;
|
|
329
|
+
break;
|
|
330
|
+
// @ts-ignore Fallthrough case in switch
|
|
331
|
+
case 0xfc:
|
|
332
|
+
const bc2 = parseU32();
|
|
333
|
+
if (0 <= bc2 && bc2 <= 7) {
|
|
334
|
+
// i32.trunc_sat_XXX
|
|
335
|
+
break;
|
|
336
|
+
}
|
|
337
|
+
switch (bc2) {
|
|
338
|
+
case 0x0a: // memory.copy
|
|
339
|
+
parseU32();
|
|
340
|
+
parseU32();
|
|
341
|
+
break;
|
|
342
|
+
case 0x0b: // memory.fill
|
|
343
|
+
parseU32();
|
|
344
|
+
break;
|
|
345
|
+
default:
|
|
346
|
+
throw new Error(`unhandled multibyte ${bc2.toString(16)} @${pos - 1}`);
|
|
347
|
+
}
|
|
348
|
+
break;
|
|
349
|
+
default:
|
|
350
|
+
if (instr.i32.load <= bc && bc <= instr.i64.store32) {
|
|
351
|
+
parseU32();
|
|
352
|
+
parseU32();
|
|
353
|
+
} else if (instr.memory.size <= bc && bc <= instr.memory.grow) {
|
|
354
|
+
parseU32();
|
|
355
|
+
} else if (instr.i32.eqz <= bc && bc <= instr.f64.reinterpret_i64) {
|
|
356
|
+
// do nothing
|
|
357
|
+
} else {
|
|
358
|
+
throw new Error(`unhandled bytecode 0x${bc.toString(16)} @${pos - 1}`);
|
|
359
|
+
}
|
|
360
|
+
break;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
result.push(...bytes.slice(sliceStart));
|
|
364
|
+
return result;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// Rewrite the contents of the prebuilt code section, changing the funcidx of
|
|
368
|
+
// `call` instructions to account for the correct number of imports in the
|
|
369
|
+
// final module.
|
|
370
|
+
function rewriteCodesecContents(
|
|
371
|
+
bytes: Uint8Array,
|
|
372
|
+
srcImportCount: number,
|
|
373
|
+
destImportCount: number
|
|
374
|
+
): Uint8Array {
|
|
375
|
+
let pos = 0;
|
|
376
|
+
|
|
377
|
+
const parseU32 = () => {
|
|
378
|
+
const [val, count] = decodeULEB128(bytes, pos);
|
|
379
|
+
pos += count;
|
|
380
|
+
return checkU32(val);
|
|
381
|
+
};
|
|
382
|
+
|
|
383
|
+
const newBytes: number[] = [];
|
|
384
|
+
|
|
385
|
+
while (pos < bytes.length) {
|
|
386
|
+
const size = parseU32();
|
|
387
|
+
const newEntry = rewriteCodeEntry(
|
|
388
|
+
bytes.slice(pos, pos + size),
|
|
389
|
+
srcImportCount,
|
|
390
|
+
destImportCount
|
|
391
|
+
);
|
|
392
|
+
newBytes.push(...w.u32(newEntry.length), ...newEntry);
|
|
393
|
+
pos += size;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
return new Uint8Array(newBytes);
|
|
397
|
+
}
|
package/src/cli.js
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/* global process */
|
|
4
|
+
|
|
5
|
+
import * as ohm from 'ohm-js';
|
|
6
|
+
import fs from 'node:fs';
|
|
7
|
+
import {basename} from 'node:path';
|
|
8
|
+
|
|
9
|
+
import {Compiler} from './index.js';
|
|
10
|
+
|
|
11
|
+
// Compile an Ohm grammar file (.ohm) to WebAssembly (.wasm).
|
|
12
|
+
function main() {
|
|
13
|
+
const args = process.argv.slice(2);
|
|
14
|
+
|
|
15
|
+
// Check if we have exactly one argument
|
|
16
|
+
if (args.length !== 1) {
|
|
17
|
+
printUsage();
|
|
18
|
+
process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const filename = args[0];
|
|
22
|
+
const g = ohm.grammar(fs.readFileSync(filename, 'utf8'));
|
|
23
|
+
const bytes = new Compiler(g).compile();
|
|
24
|
+
const outFilename = filename.replace('.ohm', '.wasm');
|
|
25
|
+
fs.writeFileSync(outFilename, bytes);
|
|
26
|
+
// eslint-disable-next-line no-console
|
|
27
|
+
console.log(`Wrote Wasm to ${outFilename}`);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Print usage information
|
|
31
|
+
function printUsage() {
|
|
32
|
+
// eslint-disable-next-line no-console
|
|
33
|
+
console.log(`usage: ${basename(process.argv[1])} <ohm-grammar-file>`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
main();
|