watr 3.2.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/compile.js CHANGED
@@ -1,55 +1,87 @@
1
1
  import * as encode from './encode.js'
2
2
  import { uleb, i32, i64 } from './encode.js'
3
- import { SECTION, TYPE, KIND, INSTR, HEAPTYPE, DEFTYPE, RECTYPE, REFTYPE } from './const.js'
3
+ import { SECTION, TYPE, KIND, INSTR, DEFTYPE } from './const.js'
4
4
  import parse from './parse.js'
5
- import { clone, err } from './util.js'
5
+ import { err, unescape, str } from './util.js'
6
6
 
7
- // build instructions index
8
- INSTR.forEach((op, i) => INSTR[op] = i >= 0x133 ? [0xfd, i - 0x133] : i >= 0x11b ? [0xfc, i - 0x11b] : i >= 0xfb ? [0xfb, i - 0xfb] : [i]);
7
+
8
+ /**
9
+ * Clean up AST: remove comments, normalize quoted ids, convert strings to bytes.
10
+ * Preserves @custom and @metadata.code.* annotations. Preserves .i for error reporting.
11
+ *
12
+ * @param {any} node - AST node
13
+ * @param {Array} [result] - Internal accumulator
14
+ * @returns {any} Cleaned node
15
+ */
16
+ const cleanup = (node, result) => !Array.isArray(node) ? (
17
+ typeof node !== 'string' ? node :
18
+ // skip comments: ;; ... or (; ... ;)
19
+ node[0] === ';' || node[1] === ';' ? null :
20
+ // normalize quoted ids: $"name" -> $name (if no escapes), else $unescaped
21
+ node[0] === '$' && node[1] === '"' ? (node.includes('\\') ? '$' + unescape(node.slice(1)) : '$' + node.slice(2, -1)) :
22
+ // convert string literals to byte arrays with valueOf
23
+ node[0] === '"' ? str(node) :
24
+ node
25
+ ) :
26
+ // remove annotations like (@name ...) except @custom and @metadata.code.*
27
+ node[0]?.[0] === '@' && node[0] !== '@custom' && !node[0]?.startsWith?.('@metadata.code.') ? null :
28
+ // unwrap single-element array containing module (after removing comments), preserve .i
29
+ (result = node.map(cleanup).filter(n => n != null), result.i = node.i, result.length === 1 && result[0]?.[0] === 'module' ? result[0] : result)
9
30
 
10
31
 
11
32
  /**
12
33
  * Converts a WebAssembly Text Format (WAT) tree to a WebAssembly binary format (WASM).
13
34
  *
14
35
  * @param {string|Array} nodes - The WAT tree or string to be compiled to WASM binary.
15
- * @param {Object} opt - opt.fullSize for fixed-width uleb encoding
16
36
  * @returns {Uint8Array} The compiled WASM binary data.
17
37
  */
18
- export default function watr(nodes) {
38
+ export default function compile(nodes) {
19
39
  // normalize to (module ...) form
20
- if (typeof nodes === 'string') nodes = parse(nodes);
21
- else nodes = clone(nodes)
40
+ if (typeof nodes === 'string') err.src = nodes, nodes = parse(nodes) || []
41
+ else err.src = '' // clear source if AST passed directly
42
+ err.i = 0
43
+
44
+ nodes = cleanup(nodes) || []
45
+
46
+ let idx = 0
22
47
 
23
48
  // module abbr https://webassembly.github.io/spec/core/text/modules.html#id10
24
- if (nodes[0] === 'module') nodes.shift(), nodes[0]?.[0] === '$' && nodes.shift()
49
+ if (nodes[0] === 'module') idx++, isId(nodes[idx]) && idx++
25
50
  // single node, not module
26
51
  else if (typeof nodes[0] === 'string') nodes = [nodes]
27
52
 
28
53
  // binary abbr "\00" "\0x61" ...
29
- if (nodes[0] === 'binary') {
30
- nodes.shift()
31
- return Uint8Array.from(str(nodes.map(i => i.slice(1, -1)).join('')))
32
- }
54
+ if (nodes[idx] === 'binary') return Uint8Array.from(nodes.slice(++idx).flat())
55
+
33
56
  // quote "a" "b"
34
- else if (nodes[0] === 'quote') {
35
- nodes.shift()
36
- return watr(nodes.map(i => i.slice(1, -1)).join(''))
37
- }
57
+ if (nodes[idx] === 'quote') return compile(nodes.slice(++idx).map(v => v.valueOf().slice(1, -1)).flat().join(''))
38
58
 
39
59
  // scopes are aliased by key as well, eg. section.func.$name = section[SECTION.func] = idx
40
60
  const ctx = []
41
61
  for (let kind in SECTION) (ctx[SECTION[kind]] = ctx[kind] = []).name = kind
62
+ ctx.metadata = {} // code metadata storage: { type: [[funcIdx, [[pos, data]...]]] }
42
63
 
43
64
  // initialize types
44
- nodes.filter(([kind, ...node]) => {
65
+ nodes.slice(idx).filter((n) => {
66
+ if (!Array.isArray(n)) {
67
+ let pos = err.src?.indexOf(n, err.i)
68
+ if (pos >= 0) err.i = pos
69
+ err(`Unexpected token ${n}`)
70
+ }
71
+ let [kind, ...node] = n
72
+ err.i = n.i // track position for errors
73
+ // (@custom "name" placement? data) - custom section support
74
+ if (kind === '@custom') {
75
+ ctx.custom.push(node)
76
+ }
45
77
  // (rec (type $a (sub final? $sup* (func ...))...) (type $b ...)) -> save subtypes
46
- if (kind === 'rec') {
78
+ else if (kind === 'rec') {
47
79
  // node contains a list of subtypes, (type ...) or (type (sub final? ...))
48
80
  // convert rec type into regular type (first subtype) with stashed subtypes length
49
81
  // add rest of subtypes as regular type nodes with subtype flag
50
82
  for (let i = 0; i < node.length; i++) {
51
- let [,...subnode] = node[i]
52
- alias(subnode, ctx.type);
83
+ let [, ...subnode] = node[i]
84
+ name(subnode, ctx.type);
53
85
  (subnode = typedef(subnode, ctx)).push(i ? true : [ctx.type.length, node.length])
54
86
  ctx.type.push(subnode)
55
87
  }
@@ -59,7 +91,7 @@ export default function watr(nodes) {
59
91
  // (type (struct (field a)*)
60
92
  // (type (sub final? $nm* (struct|array|func ...)))
61
93
  else if (kind === 'type') {
62
- alias(node, ctx.type);
94
+ name(node, ctx.type);
63
95
  ctx.type.push(typedef(node, ctx));
64
96
  }
65
97
  // other sections may have id
@@ -68,58 +100,70 @@ export default function watr(nodes) {
68
100
  else return true
69
101
  })
70
102
 
71
- // prepare/normalize nodes
72
- .forEach(([kind, ...node]) => {
73
- let imported // if node needs to be imported
74
-
75
- // import abbr
76
- // (import m n (table|memory|global|func id? type)) -> (table|memory|global|func id? (import m n) type)
77
- if (kind === 'import') [kind, ...node] = (imported = node).pop()
78
-
79
- // index, alias
80
- let items = ctx[kind];
81
- let name = alias(node, items);
82
-
83
- // export abbr
84
- // (table|memory|global|func id? (export n)* ...) -> (table|memory|global|func id ...) (export n (table|memory|global|func id))
85
- while (node[0]?.[0] === 'export') ctx.export.push([node.shift()[1], [kind, items.length]])
86
-
87
- // for import nodes - redirect output to import
88
- if (node[0]?.[0] === 'import') [, ...imported] = node.shift()
103
+ // prepare/normalize nodes
104
+ .forEach((n) => {
105
+ let [kind, ...node] = n
106
+ err.i = n.i // track position for errors
107
+ let imported // if node needs to be imported
108
+
109
+ // import abbr
110
+ // (import m n (table|memory|global|func id? type)) -> (table|memory|global|func id? (import m n) type)
111
+ if (kind === 'import') [kind, ...node] = (imported = node).pop()
112
+
113
+ // index, alias
114
+ let items = ctx[kind];
115
+ if (!items) err(`Unknown section ${kind}`)
116
+ name(node, items);
117
+
118
+ // export abbr
119
+ // (table|memory|global|func|tag id? (export n)* ...) -> (table|memory|global|func|tag id ...) (export n (table|memory|global|func id))
120
+ while (node[0]?.[0] === 'export') ctx.export.push([node.shift()[1], [kind, items?.length]])
121
+
122
+ // for import nodes - redirect output to import
123
+ if (node[0]?.[0] === 'import') [, ...imported] = node.shift()
124
+
125
+ // table abbr: (table id? i64? reftype (elem ...)) -> (table id? i64? n n reftype) + (elem ...)
126
+ if (kind === 'table') {
127
+ const is64 = node[0] === 'i64', idx = is64 ? 1 : 0
128
+ if (node[idx + 1]?.[0] === 'elem') {
129
+ let [reftype, [, ...els]] = [node[idx], node[idx + 1]]
130
+ node = is64 ? ['i64', els.length, els.length, reftype] : [els.length, els.length, reftype]
131
+ ctx.elem.push([['table', items.length], ['offset', [is64 ? 'i64.const' : 'i32.const', is64 ? 0n : 0]], reftype, ...els])
132
+ }
133
+ }
89
134
 
90
- // table abbr
91
- if (kind === 'table') {
92
- // (table id? reftype (elem ...{n})) -> (table id? n n reftype) (elem (table id) (i32.const 0) reftype ...)
93
- if (node[1]?.[0] === 'elem') {
94
- let [reftype, [, ...els]] = node
95
- node = [els.length, els.length, reftype]
96
- ctx.elem.push([['table', name || items.length], ['i32.const', '0'], reftype, ...els])
135
+ // data abbr: (memory id? i64? (data str)) -> (memory id? i64? n n) + (data ...)
136
+ else if (kind === 'memory') {
137
+ const is64 = node[0] === 'i64', idx = is64 ? 1 : 0
138
+ if (node[idx]?.[0] === 'data') {
139
+ let [, ...data] = node.splice(idx, 1)[0], m = '' + Math.ceil(data.reduce((s, d) => s + d.length, 0) / 65536)
140
+ ctx.data.push([['memory', items.length], [is64 ? 'i64.const' : 'i32.const', is64 ? 0n : 0], ...data])
141
+ node = is64 ? ['i64', m, m] : [m, m]
142
+ }
97
143
  }
98
- }
99
144
 
100
- // data abbr
101
- // (memory id? (data str)) -> (memory id? n n) (data (memory id) (i32.const 0) str)
102
- else if (kind === 'memory' && node[0]?.[0] === 'data') {
103
- let [, ...data] = node.shift(), m = '' + Math.ceil(data.map(s => s.slice(1, -1)).join('').length / 65536) // FIXME: figure out actual data size
104
- ctx.data.push([['memory', items.length], ['i32.const', 0], ...data])
105
- node = [m, m]
106
- }
145
+ // dupe to code section, save implicit type
146
+ else if (kind === 'func') {
147
+ let [idx, param, result] = typeuse(node, ctx);
148
+ idx ??= regtype(param, result, ctx)
107
149
 
108
- // dupe to code section, save implicit type
109
- else if (kind === 'func') {
110
- let [idx, param, result] = typeuse(node, ctx);
111
- idx ??= regtype(param, result, ctx)
150
+ // flatten + normalize function body
151
+ !imported && ctx.code.push([[idx, param, result], ...normalize(node, ctx)])
152
+ node = [['type', idx]]
153
+ }
112
154
 
113
- // we save idx because type can be defined after
114
- !imported && ctx.code.push([[idx, param, result], ...plain(node, ctx)]) // pass param since they may have names
115
- node.unshift(['type', idx])
116
- }
155
+ // tag has a type similar to func
156
+ else if (kind === 'tag') {
157
+ let [idx, param] = typeuse(node, ctx);
158
+ idx ??= regtype(param, [], ctx)
159
+ node = [['type', idx]]
160
+ }
117
161
 
118
- // import writes to import section amd adds placeholder for (kind) section
119
- if (imported) ctx.import.push([...imported, [kind, ...node]]), node = null
162
+ // import writes to import section amd adds placeholder for (kind) section
163
+ if (imported) ctx.import.push([...imported, [kind, ...node]]), node = null
120
164
 
121
- items.push(node)
122
- })
165
+ items.push(node)
166
+ })
123
167
 
124
168
  // convert nodes to bytes
125
169
  const bin = (kind, count = true) => {
@@ -128,9 +172,26 @@ export default function watr(nodes) {
128
172
  .map(item => build[kind](item, ctx))
129
173
  .filter(Boolean) // filter out unrenderable things (subtype or data.length)
130
174
 
175
+ // Custom sections - each is output as separate section with own header
176
+ if (kind === SECTION.custom) return items.flatMap(content => [kind, ...vec(content)])
177
+
131
178
  return !items.length ? [] : [kind, ...vec(count ? vec(items) : items)]
132
179
  }
133
180
 
181
+ // Generate metadata custom sections
182
+ const binMeta = () => {
183
+ const sections = []
184
+ for (const type in ctx.metadata) {
185
+ const name = vec(str(`"metadata.code.${type}"`))
186
+ const content = vec(ctx.metadata[type].map(([funcIdx, instances]) =>
187
+ [...uleb(funcIdx), ...vec(instances.map(([pos, data]) => [...uleb(pos), ...vec(data)]))]
188
+ ))
189
+ sections.push(0, ...vec([...name, ...content]))
190
+ }
191
+ return sections
192
+ }
193
+
194
+
134
195
  // build final binary
135
196
  return Uint8Array.from([
136
197
  0x00, 0x61, 0x73, 0x6d, // magic
@@ -141,215 +202,250 @@ export default function watr(nodes) {
141
202
  ...bin(SECTION.func),
142
203
  ...bin(SECTION.table),
143
204
  ...bin(SECTION.memory),
205
+ ...bin(SECTION.tag),
144
206
  ...bin(SECTION.global),
145
207
  ...bin(SECTION.export),
146
208
  ...bin(SECTION.start, false),
147
209
  ...bin(SECTION.elem),
148
210
  ...bin(SECTION.datacount, false),
149
211
  ...bin(SECTION.code),
212
+ ...binMeta(),
150
213
  ...bin(SECTION.data)
151
214
  ])
152
215
  }
153
216
 
154
- // consume name eg. $t ...
155
- const alias = (node, list) => {
156
- let name = (node[0]?.[0] === '$' || node[0]?.[0] == null) && node.shift();
157
- if (name) name in list ? err(`Duplicate ${list.name} ${name}`) : list[name] = list.length; // save alias
158
- return name
159
- }
160
-
161
- // (type $id? (func param* result*))
162
- // (type $id? (array (mut i8)))
163
- // (type $id? (struct (field a)*)
164
- // (type $id? (sub final? $nm* (struct|array|func ...)))
165
- const typedef = ([dfn], ctx) => {
166
- let subkind = 'subfinal', supertypes = [], compkind
167
- if (dfn[0] === 'sub') {
168
- subkind = dfn.shift(), dfn[0] === 'final' && (subkind += dfn.shift())
169
- dfn = (supertypes = dfn).pop() // last item is definition
170
- }
171
-
172
- [compkind, ...dfn] = dfn // composite type kind
173
-
174
- if (compkind === 'func') dfn = paramres(dfn), ctx.type['$' + dfn.join('>')] ??= ctx.type.length
175
- else if (compkind === 'struct') dfn = fieldseq(dfn, 'field', true)
176
- else if (compkind === 'array') [dfn] = dfn
177
-
178
- return [compkind, dfn, subkind, supertypes]
179
- }
180
-
181
- // register (implicit) type
182
- const regtype = (param, result, ctx, idx='$' + param + '>' + result) => (
183
- (ctx.type[idx] ??= ctx.type.push(['func', [param, result]]) - 1),
184
- idx
185
- )
186
-
187
- // consume typeuse nodes, return type index/params, or null idx if no type
188
- // https://webassembly.github.io/spec/core/text/modules.html#type-uses
189
- const typeuse = (nodes, ctx, names) => {
190
- let idx, param, result
191
-
192
- // explicit type (type 0|$name)
193
- if (nodes[0]?.[0] === 'type') {
194
- [, idx] = nodes.shift();
195
- [param, result] = paramres(nodes, names);
196
-
197
- const [,srcParamRes] = ctx.type[id(idx, ctx.type)] ?? err(`Unknown type ${idx}`)
198
-
199
- // check type consistency (excludes forward refs)
200
- if ((param.length || result.length) && srcParamRes.join('>') !== param + '>' + result) err(`Type ${idx} mismatch`)
201
-
202
- return [idx, ...srcParamRes]
203
- }
204
-
205
- // implicit type (param i32 i32)(result i32)
206
- return [idx, ...paramres(nodes, names)]
207
- }
208
-
209
- // consume (param t+)* (result t+)* sequence
210
- const paramres = (nodes, names = true) => {
211
- // let param = [], result = []
212
-
213
- // collect param (param i32 i64) (param $x? i32)
214
- let param = fieldseq(nodes, 'param', names)
215
-
216
- // collect result eg. (result f64 f32)(result i32)
217
- let result = fieldseq(nodes, 'result')
218
217
 
219
- if (nodes[0]?.[0] === 'param') err(`Unexpected param`)
220
-
221
- return [param, result]
222
- }
223
-
224
- // collect sequence of field, eg. (param a) (param b c), (field a) (field b c) or (result a b) (result c)
225
- // optionally allow or not names
226
- const fieldseq = (nodes, field, names = false) => {
227
- let seq = []
228
- // collect field eg. (field f64 f32)(field i32)
229
- while (nodes[0]?.[0] === field) {
230
- let [, ...args] = nodes.shift()
231
- let name = args[0]?.[0] === '$' && args.shift()
232
- // expose name refs, if allowed
233
- if (name) {
234
- if (names) name in seq ? err(`Duplicate ${field} ${name}`) : seq[name] = seq.length
235
- else err(`Unexpected ${field} name ${name}`)
236
- }
237
- seq.push(...args)
238
- }
239
- return seq
240
- }
241
-
242
- // consume blocktype - makes sure either type or single result is returned
243
- const blocktype = (nodes, ctx) => {
244
- let [idx, param, result] = typeuse(nodes, ctx, 0)
245
-
246
- // get type - can be either idx or valtype (numtype | reftype)
247
- if (!param.length && !result.length) return
248
-
249
- // (result i32) - doesn't require registering type
250
- if (!param.length && result.length === 1) return ['result', ...result]
251
-
252
- // register implicit type
253
- idx ??= regtype(param, result, ctx)
254
-
255
- return ['type', idx]
256
- }
257
-
258
- // abbr blocks, loops, ifs; collect implicit types via typeuses; resolve optional immediates
259
- // https://webassembly.github.io/spec/core/text/instructions.html#folded-instructions
260
- const plain = (nodes, ctx) => {
261
- let out = [], stack = [], label
218
+ /** Check if node is a valid index reference ($name or number) */
219
+ const isIdx = n => n?.[0] === '$' || !isNaN(n)
220
+ /** Check if node is an identifier (starts with $) */
221
+ const isId = n => n?.[0] === '$'
222
+ /** Check if node is align/offset memory parameter */
223
+ const isMemParam = n => n?.[0] === 'a' || n?.[0] === 'o'
262
224
 
225
+ /**
226
+ * Normalize and flatten function body to stack form.
227
+ * Converts folded S-expressions to linear instruction sequence.
228
+ * Handles blocks, if/then/else, try_table, and metadata annotations.
229
+ *
230
+ * @param {Array} nodes - Function body nodes
231
+ * @param {Object} ctx - Compilation context with type info
232
+ * @returns {Array} Flattened instruction sequence
233
+ */
234
+ function normalize(nodes, ctx) {
235
+ const out = []
236
+ nodes = [...nodes]
263
237
  while (nodes.length) {
264
238
  let node = nodes.shift()
265
-
266
- // lookup is slower than sequence of known ifs
267
239
  if (typeof node === 'string') {
268
240
  out.push(node)
269
-
270
- // block typeuse?
271
241
  if (node === 'block' || node === 'if' || node === 'loop') {
272
- // (loop $l?)
273
- if (nodes[0]?.[0] === '$') label = nodes.shift(), out.push(label), stack.push(label)
274
-
242
+ if (isId(nodes[0])) out.push(nodes.shift())
275
243
  out.push(blocktype(nodes, ctx))
276
244
  }
277
-
278
- // else $label
279
- // end $label - make sure it matches block label
280
245
  else if (node === 'else' || node === 'end') {
281
- if (nodes[0]?.[0] === '$') (node === 'end' ? stack.pop() : label) !== (label = nodes.shift()) && err(`Mismatched label ${label}`)
246
+ if (isId(nodes[0])) nodes.shift()
282
247
  }
283
-
284
- // select (result i32 i32 i32)?
285
- else if (node === 'select') {
286
- out.push(paramres(nodes, 0)[1])
287
- }
288
-
289
- // call_indirect $table? $typeidx
290
- // return_call_indirect $table? $typeidx
248
+ else if (node === 'select') out.push(paramres(nodes)[1])
291
249
  else if (node.endsWith('call_indirect')) {
292
- let tableidx = nodes[0]?.[0] === '$' || !isNaN(nodes[0]) ? nodes.shift() : 0
293
- let [idx, param, result] = typeuse(nodes, ctx, 0)
250
+ let tableidx = isIdx(nodes[0]) ? nodes.shift() : 0, [idx, param, result] = typeuse(nodes, ctx)
294
251
  out.push(tableidx, ['type', idx ?? regtype(param, result, ctx)])
295
252
  }
253
+ else if (node === 'table.init') out.push(isIdx(nodes[1]) ? nodes.shift() : 0, nodes.shift())
254
+ else if (node === 'table.copy' || node === 'memory.copy') out.push(isIdx(nodes[0]) ? nodes.shift() : 0, isIdx(nodes[0]) ? nodes.shift() : 0)
255
+ else if (node.startsWith('table.')) out.push(isIdx(nodes[0]) ? nodes.shift() : 0)
256
+ else if (node === 'memory.init') {
257
+ out.push(...(isIdx(nodes[1]) ? [nodes.shift(), nodes.shift()].reverse() : [nodes.shift(), 0]))
258
+ ctx.datacount && (ctx.datacount[0] = true)
259
+ }
260
+ else if (node === 'data.drop' || node === 'array.new_data' || node === 'array.init_data') {
261
+ node === 'data.drop' && out.push(nodes.shift())
262
+ ctx.datacount && (ctx.datacount[0] = true)
263
+ }
264
+ // memory.* instructions and load/store with optional memory index
265
+ else if ((node.startsWith('memory.') || node.endsWith('load') || node.endsWith('store')) && isIdx(nodes[0])) out.push(nodes.shift())
266
+ }
267
+ else if (Array.isArray(node)) {
268
+ const op = node[0]
269
+ node.i != null && (err.i = node.i) // track position for errors
270
+
271
+ // code metadata annotations - pass through as marker with metadata type and data
272
+ // (@metadata.code.<type> data:str)
273
+ if (op?.startsWith?.('@metadata.code.')) {
274
+ let type = op.slice(15) // remove '@metadata.code.' prefix
275
+ out.push(['@metadata', type, node[1]])
276
+ continue
277
+ }
296
278
 
297
- // mark datacount section as required
298
- else if (node === 'memory.init' || node === 'data.drop' || node === 'array.new_data' || node === 'array.init_data') {
299
- ctx.datacount[0] = true
279
+ // Check if node is a valid instruction (string with opcode in INSTR)
280
+ if (typeof op !== 'string' || !Array.isArray(INSTR[op])) { out.push(node); continue }
281
+ const parts = node.slice(1)
282
+ if (op === 'block' || op === 'loop') {
283
+ out.push(op)
284
+ if (isId(parts[0])) out.push(parts.shift())
285
+ out.push(blocktype(parts, ctx), ...normalize(parts, ctx), 'end')
286
+ }
287
+ else if (op === 'if') {
288
+ let then = [], els = []
289
+ if (parts.at(-1)?.[0] === 'else') els = normalize(parts.pop().slice(1), ctx)
290
+ if (parts.at(-1)?.[0] === 'then') then = normalize(parts.pop().slice(1), ctx)
291
+ let immed = [op]
292
+ if (isId(parts[0])) immed.push(parts.shift())
293
+ immed.push(blocktype(parts, ctx))
294
+ out.push(...normalize(parts, ctx), ...immed, ...then)
295
+ els.length && out.push('else', ...els)
296
+ out.push('end')
297
+ }
298
+ else if (op === 'try_table') {
299
+ out.push(op)
300
+ if (isId(parts[0])) out.push(parts.shift())
301
+ out.push(blocktype(parts, ctx))
302
+ // Collect catch clauses
303
+ while (parts[0]?.[0] === 'catch' || parts[0]?.[0] === 'catch_ref' || parts[0]?.[0] === 'catch_all' || parts[0]?.[0] === 'catch_all_ref') {
304
+ out.push(parts.shift())
305
+ }
306
+ out.push(...normalize(parts, ctx), 'end')
307
+ }
308
+ else {
309
+ const imm = []
310
+ // Collect immediate operands (non-arrays or special forms like type/param/result/ref)
311
+ while (parts.length && (!Array.isArray(parts[0]) || 'type,param,result,ref'.includes(parts[0][0]))) imm.push(parts.shift())
312
+ out.push(...normalize(parts, ctx), op, ...imm)
313
+ nodes.unshift(...out.splice(out.length - 1 - imm.length))
300
314
  }
315
+ } else out.push(node)
316
+ }
317
+ return out
318
+ }
301
319
 
302
- // table.init tableidx? elemidx -> table.init tableidx elemidx
303
- else if (node === 'table.init') out.push((nodes[1][0] === '$' || !isNaN(nodes[1])) ? nodes.shift() : 0, nodes.shift())
320
+ /**
321
+ * Register implicit function type, return type index.
322
+ * Creates canonical name like '$i32,i32>i32' for deduplication.
323
+ *
324
+ * @param {string[]} param - Parameter types
325
+ * @param {string[]} result - Result types
326
+ * @param {Object} ctx - Compilation context
327
+ * @param {string} [idx] - Type identifier
328
+ * @returns {string} Type index/identifier
329
+ */
330
+ const regtype = (param, result, ctx, idx = '$' + param + '>' + result) => (ctx.type[idx] ??= ctx.type.push(['func', [param, result]]) - 1, idx)
304
331
 
305
- // table.* tableidx?
306
- else if (node.startsWith('table.')) {
307
- out.push(nodes[0]?.[0] === '$' || !isNaN(nodes[0]) ? nodes.shift() : 0)
332
+ /**
333
+ * Collect field sequence: (field a) (field b c) → [a, b, c].
334
+ * Tracks named fields for index lookup.
335
+ *
336
+ * @param {Array} nodes - Nodes to consume from
337
+ * @param {string} field - Field keyword ('param', 'result', 'field')
338
+ * @returns {Array} Collected values with named indices
339
+ */
340
+ const fieldseq = (nodes, field) => {
341
+ let seq = []
342
+ while (nodes[0]?.[0] === field) {
343
+ let [, ...args] = nodes.shift(), nm = isId(args[0]) && args.shift()
344
+ if (nm) nm in seq ? (() => { throw Error(`Duplicate ${field} ${nm}`) })() : seq[nm] = seq.length
345
+ seq.push(...args)
346
+ }
347
+ return seq
348
+ }
308
349
 
309
- // table.copy tableidx? tableidx?
310
- if (node === 'table.copy') out.push(nodes[0][0] === '$' || !isNaN(nodes[0]) ? nodes.shift() : 0)
311
- }
312
- }
350
+ /**
351
+ * Consume (param ...)* (result ...)* from nodes.
352
+ *
353
+ * @param {Array} nodes - Nodes to consume from
354
+ * @returns {[string[], string[]]} [params, results]
355
+ */
356
+ const paramres = (nodes) => {
357
+ let param = fieldseq(nodes, 'param'), result = fieldseq(nodes, 'result')
358
+ if (nodes[0]?.[0] === 'param') throw Error('Unexpected param')
359
+ return [param, result]
360
+ }
313
361
 
314
- else {
315
- // (block ...) -> block ... end
316
- if (node[0] === 'block' || node[0] === 'loop') {
317
- out.push(...plain(node, ctx), 'end')
318
- }
362
+ /**
363
+ * Consume typeuse: (type idx)? (param ...)* (result ...)*.
364
+ * Resolves type reference or returns inline signature.
365
+ *
366
+ * @param {Array} nodes - Nodes to consume from
367
+ * @param {Object} ctx - Compilation context with type table
368
+ * @returns {[string|undefined, string[], string[]]} [typeIdx, params, results]
369
+ */
370
+ const typeuse = (nodes, ctx) => {
371
+ if (nodes[0]?.[0] !== 'type') return [, ...paramres(nodes)]
372
+ let [, idx] = nodes.shift(), [param, result] = paramres(nodes)
373
+ const entry = ctx.type[(typeof idx === 'string' && isNaN(idx)) ? ctx.type[idx] : +idx]
374
+ if (!entry) throw Error(`Unknown type ${idx}`)
375
+ if ((param.length || result.length) && entry[1].join('>') !== param + '>' + result) throw Error(`Type ${idx} mismatch`)
376
+ return [idx, ...entry[1]]
377
+ }
319
378
 
320
- // (if ...) -> if ... end
321
- else if (node[0] === 'if') {
322
- let then = [], els = [], immed = [node.shift()]
323
- // (if label? blocktype? cond*? (then instr*) (else instr*)?) -> cond*? if label? blocktype? instr* else instr*? end
324
- // https://webassembly.github.io/spec/core/text/instructions.html#control-instructions
325
- if (node[node.length - 1]?.[0] === 'else') {
326
- els = plain(node.pop(), ctx)
327
- // ignore empty else
328
- // https://webassembly.github.io/spec/core/text/instructions.html#abbreviations
329
- if (els.length === 1) els.length = 0
330
- }
331
- if (node[node.length - 1]?.[0] === 'then') then = plain(node.pop(), ctx)
379
+ /**
380
+ * Resolve blocktype: void | (result t) | (type idx).
381
+ * Returns abbreviated form when possible.
382
+ *
383
+ * @param {Array} nodes - Nodes to consume from
384
+ * @param {Object} ctx - Compilation context
385
+ * @returns {Array|undefined} Blocktype node or undefined for void
386
+ */
387
+ const blocktype = (nodes, ctx) => {
388
+ let [idx, param, result] = typeuse(nodes, ctx)
389
+ if (!param.length && !result.length) return
390
+ if (!param.length && result.length === 1) return ['result', ...result]
391
+ return ['type', idx ?? regtype(param, result, ctx)]
392
+ }
332
393
 
333
- // label?
334
- if (node[0]?.[0] === '$') immed.push(node.shift())
335
394
 
336
- // blocktype?
337
- immed.push(blocktype(node, ctx))
338
395
 
339
- if (typeof node[0] === 'string') err('Unfolded condition')
396
+ /**
397
+ * Consume and register section item name (e.g., $foo).
398
+ * Stores alias in list for later index resolution.
399
+ *
400
+ * @param {Array} node - Node array (mutated)
401
+ * @param {Array} list - Section list with name property
402
+ * @returns {string|false} Name if found, false otherwise
403
+ */
404
+ const name = (node, list) => {
405
+ let nm = isId(node[0]) && node.shift();
406
+ if (nm) nm in list ? err(`Duplicate ${list.name} ${nm}`) : list[nm] = list.length; // save alias
407
+ return nm
408
+ }
340
409
 
341
- out.push(...plain(node, ctx), ...immed, ...then, ...els, 'end')
342
- }
343
- else out.push(plain(node, ctx))
344
- }
410
+ /**
411
+ * Parse type definition: func, array, struct, or sub(type).
412
+ * Handles recursive types and subtyping.
413
+ *
414
+ * @param {Array} node - [definition] where definition is func/array/struct/sub
415
+ * @param {Object} ctx - Compilation context
416
+ * @returns {[string, any, string, string[]]} [kind, fields, subkind, supertypes]
417
+ */
418
+ const typedef = ([dfn], ctx) => {
419
+ let subkind = 'subfinal', supertypes = [], compkind
420
+ if (dfn[0] === 'sub') {
421
+ subkind = dfn.shift(), dfn[0] === 'final' && (subkind += dfn.shift())
422
+ dfn = (supertypes = dfn).pop() // last item is definition
345
423
  }
346
424
 
347
- return out
425
+ [compkind, ...dfn] = dfn // composite type kind
426
+
427
+ if (compkind === 'func') dfn = paramres(dfn), ctx.type['$' + dfn.join('>')] ??= ctx.type.length
428
+ else if (compkind === 'struct') dfn = fieldseq(dfn, 'field')
429
+ else if (compkind === 'array') [dfn] = dfn
430
+
431
+ return [compkind, dfn, subkind, supertypes]
348
432
  }
349
433
 
350
434
 
351
435
  // build section binary [by section codes] (non consuming)
352
- const build = [,
436
+ const build = [
437
+ // (@custom "name" placement? data) - custom section builder
438
+ ([name, ...rest], ctx) => {
439
+ // Check if second arg is placement directive (before|after section)
440
+ let data = rest
441
+ if (rest[0]?.[0] === 'before' || rest[0]?.[0] === 'after') {
442
+ // Skip placement for now - would need more complex section ordering
443
+ data = rest.slice(1)
444
+ }
445
+ // Custom section format: name (vec string) + raw content bytes
446
+ // parse already returns strings as byte arrays, so just vec them
447
+ return [...vec(name), ...data.flat()]
448
+ },
353
449
  // type kinds
354
450
  // (func params result)
355
451
  // (array i8)
@@ -383,7 +479,7 @@ const build = [,
383
479
  return [DEFTYPE[kind], ...details]
384
480
  },
385
481
 
386
- // (import "math" "add" (func|table|global|memory dfn?))
482
+ // (import "math" "add" (func|table|global|memory|tag dfn?))
387
483
  ([mod, field, [kind, ...dfn]], ctx) => {
388
484
  let details
389
485
 
@@ -392,6 +488,10 @@ const build = [,
392
488
  let [[, typeidx]] = dfn
393
489
  details = uleb(id(typeidx, ctx.type))
394
490
  }
491
+ else if (kind === 'tag') {
492
+ let [[, typeidx]] = dfn
493
+ details = [0x00, ...uleb(id(typeidx, ctx.type))]
494
+ }
395
495
  else if (kind === 'memory') {
396
496
  details = limits(dfn)
397
497
  }
@@ -403,7 +503,7 @@ const build = [,
403
503
  }
404
504
  else err(`Unknown kind ${kind}`)
405
505
 
406
- return ([...vec(str(mod.slice(1, -1))), ...vec(str(field.slice(1, -1))), KIND[kind], ...details])
506
+ return ([...vec(mod), ...vec(field), KIND[kind], ...details])
407
507
  },
408
508
 
409
509
  // (func $name? ...params result ...body)
@@ -421,8 +521,8 @@ const build = [,
421
521
  // (global $id? (mut i32) (i32.const 42))
422
522
  ([t, init], ctx) => [...fieldtype(t, ctx), ...expr(init, ctx)],
423
523
 
424
- // (export "name" (func|table|mem $name|idx))
425
- ([nm, [kind, l]], ctx) => ([...vec(str(nm.slice(1, -1))), KIND[kind], ...uleb(id(l, ctx[kind]))]),
524
+ // (export "name" (func|table|mem $name|idx))
525
+ ([nm, [kind, l]], ctx) => ([...vec(nm), KIND[kind], ...uleb(id(l, ctx[kind]))]),
426
526
 
427
527
  // (start $main)
428
528
  ([l], ctx) => uleb(id(l, ctx.func)),
@@ -438,10 +538,15 @@ const build = [,
438
538
  if (parts[0] === 'declare') parts.shift(), declare = 1
439
539
 
440
540
  // table?
441
- if (parts[0][0] === 'table') {
541
+ if (parts[0]?.[0] === 'table') {
442
542
  [, tabidx] = parts.shift()
443
543
  tabidx = id(tabidx, ctx.table)
444
544
  }
545
+ // Handle abbreviated form: (elem tableidx (offset ...) ...) where tableidx is directly a number/identifier
546
+ else if ((typeof parts[0] === 'string' || typeof parts[0] === 'number') &&
547
+ (parts[1]?.[0] === 'offset' || (Array.isArray(parts[1]) && parts[1][0] !== 'item' && !parts[1][0]?.startsWith('ref')))) {
548
+ tabidx = id(parts.shift(), ctx.table)
549
+ }
445
550
 
446
551
  // (offset expr)|expr
447
552
  if (parts[0]?.[0] === 'offset' || (Array.isArray(parts[0]) && parts[0][0] !== 'item' && !parts[0][0].startsWith('ref'))) {
@@ -453,15 +558,17 @@ const build = [,
453
558
  else if (!declare) passive = 1
454
559
 
455
560
  // funcref|externref|(ref ...)
456
- if (REFTYPE[parts[0]] || parts[0]?.[0] === 'ref') rt = reftype(parts.shift(), ctx)
561
+ if (TYPE[parts[0]] || parts[0]?.[0] === 'ref') rt = reftype(parts.shift(), ctx)
457
562
  // func ... abbr https://webassembly.github.io/function-references/core/text/modules.html#id7
458
- else if (parts[0] === 'func') rt = [HEAPTYPE[parts.shift()]]
563
+ else if (parts[0] === 'func') rt = [TYPE[parts.shift()]]
459
564
  // or anything else
460
- else rt = [HEAPTYPE.func]
565
+ else rt = [TYPE.func]
461
566
 
462
567
  // deabbr els sequence, detect expr usage
463
568
  parts = parts.map(el => {
464
- if (el[0] === 'item') [, ...el] = el
569
+ // (item ref.func $f) or (item (ref.func $f)) → $f
570
+ if (el[0] === 'item') el = el.length === 3 && el[1] === 'ref.func' ? el[2] : el[1]
571
+ // (ref.func $f) → $f
465
572
  if (el[0] === 'ref.func') [, el] = el
466
573
  // (ref.null func) and other expressions turn expr els mode
467
574
  if (typeof el !== 'string') elexpr = 1
@@ -470,7 +577,7 @@ const build = [,
470
577
 
471
578
  // reftype other than (ref null? func) forces table index via nofunc flag
472
579
  // also it forces elexpr
473
- if (rt[0] !== REFTYPE.funcref) nofunc = 1, elexpr = 1
580
+ if (rt[0] !== TYPE.funcref) nofunc = 1, elexpr = 1
474
581
 
475
582
  // mode:
476
583
  // bit 0 indicates a passive or declarative segment
@@ -523,27 +630,35 @@ const build = [,
523
630
  ctx.local.name = 'local'
524
631
  ctx.block.name = 'block'
525
632
 
633
+ // Track current code index for code metadata
634
+ if (ctx._codeIdx === undefined) ctx._codeIdx = 0
635
+ let codeIdx = ctx._codeIdx++
636
+
526
637
  // collect locals
527
638
  while (body[0]?.[0] === 'local') {
528
639
  let [, ...types] = body.shift()
529
- if (types[0]?.[0] === '$') {
530
- let name = types.shift()
531
- if (name in ctx.local) err(`Duplicate local ${name}`)
532
- else ctx.local[name] = ctx.local.length
640
+ if (isId(types[0])) {
641
+ let nm = types.shift()
642
+ if (nm in ctx.local) err(`Duplicate local ${nm}`)
643
+ else ctx.local[nm] = ctx.local.length
533
644
  }
534
645
  ctx.local.push(...types)
535
646
  }
536
647
 
537
- const bytes = []
538
- while (body.length) bytes.push(...instr(body, ctx))
539
- bytes.push(0x0b)
648
+ // Setup metadata tracking for this function
649
+ ctx.meta = {}
650
+ const bytes = instr(body, ctx)
651
+
652
+ // Store collected metadata for this function
653
+ const funcIdx = ctx.import.filter(imp => imp[2][0] === 'func').length + codeIdx
654
+ for (const type in ctx.meta) ((ctx.metadata ??= {})[type] ??= []).push([funcIdx, ctx.meta[type]])
540
655
 
541
656
  // squash locals into (n:u32 t:valtype)*, n is number and t is type
542
657
  // we skip locals provided by params
543
658
  let loctypes = ctx.local.slice(param.length).reduce((a, type) => (type == a[a.length - 1]?.[1] ? a[a.length - 1][0]++ : a.push([1, type]), a), [])
544
659
 
545
660
  // cleanup tmp state
546
- ctx.local = ctx.block = null
661
+ ctx.local = ctx.block = ctx.meta = null
547
662
 
548
663
  // https://webassembly.github.io/spec/core/binary/modules.html#code-section
549
664
  return vec([...vec(loctypes.map(([n, t]) => [...uleb(n), ...reftype(t, ctx)])), ...bytes])
@@ -560,9 +675,14 @@ const build = [,
560
675
  [, memidx] = inits.shift()
561
676
  memidx = id(memidx, ctx.memory)
562
677
  }
678
+ // Handle abbreviated form: (data memidx (offset ...) ...) where memidx is directly a number/identifier
679
+ else if ((typeof inits[0] === 'string' || typeof inits[0] === 'number') &&
680
+ (inits[1]?.[0] === 'offset' || (Array.isArray(inits[1]) && typeof inits[1][0] === 'string'))) {
681
+ memidx = id(inits.shift(), ctx.memory)
682
+ }
563
683
 
564
684
  // (offset (i32.const 0)) or (i32.const 0)
565
- if (typeof inits[0] !== 'string') {
685
+ if (Array.isArray(inits[0]) && typeof inits[0]?.[0] === 'string') {
566
686
  offset = inits.shift()
567
687
  if (offset[0] === 'offset') [, offset] = offset
568
688
  offset ?? err('Bad offset', offset)
@@ -577,20 +697,24 @@ const build = [,
577
697
  // passive: 1
578
698
  [1]
579
699
  ),
580
- ...vec(str(inits.map(i => i.slice(1, -1)).join('')))
700
+ ...vec(inits.flat())
581
701
  ])
582
702
  },
583
703
 
584
704
  // datacount
585
- (nodes, ctx) => uleb(ctx.data.length)
705
+ (nodes, ctx) => uleb(ctx.data.length),
706
+
707
+ // (tag $name? (type idx))
708
+ ([[, typeidx]], ctx) => [0x00, ...uleb(id(typeidx, ctx.type))]
586
709
  ]
587
710
 
588
- // build reftype, either direct absheaptype or wrapped heaptype https://webassembly.github.io/gc/core/binary/types.html#reference-types
711
+ // Build reference type encoding (ref/refnull forms, not related to regtype which handles func types)
712
+ // https://webassembly.github.io/gc/core/binary/types.html#reference-types
589
713
  const reftype = (t, ctx) => (
590
714
  t[0] === 'ref' ?
591
715
  t[1] == 'null' ?
592
- HEAPTYPE[t[2]] ? [HEAPTYPE[t[2]]] : [REFTYPE.refnull, ...uleb(id(t[t.length - 1], ctx.type))] :
593
- [TYPE.ref, ...uleb(HEAPTYPE[t[t.length - 1]] || id(t[t.length - 1], ctx.type))] :
716
+ TYPE[t[2]] ? [TYPE[t[2]]] : [TYPE.refnull, ...uleb(id(t[t.length - 1], ctx.type))] :
717
+ [TYPE.ref, ...uleb(TYPE[t[t.length - 1]] || id(t[t.length - 1], ctx.type))] :
594
718
  // abbrs
595
719
  [TYPE[t] ?? err(`Unknown type ${t}`)]
596
720
  );
@@ -600,287 +724,176 @@ const fieldtype = (t, ctx, mut = t[0] === 'mut' ? 1 : 0) => [...reftype(mut ? t[
600
724
 
601
725
 
602
726
 
603
- // consume one instruction from nodes sequence
604
- const instr = (nodes, ctx) => {
605
- if (!nodes?.length) return []
606
727
 
607
- let out = [], op = nodes.shift(), immed, code
608
728
 
609
- // consume group
610
- if (Array.isArray(op)) {
611
- immed = instr(op, ctx)
612
- while (op.length) out.push(...instr(op, ctx))
613
- out.push(...immed)
614
- return out
615
- }
616
-
617
- [...immed] = isNaN(op[0]) && INSTR[op] || err(`Unknown instruction ${op}`)
618
- code = immed[0]
619
-
620
- // gc-related
621
- // https://webassembly.github.io/gc/core/binary/instructions.html#reference-instructions
622
- if (code === 0x0fb) {
623
- [, code] = immed
624
-
625
- // struct.new $t ... array.set $t
626
- if ((code >= 0 && code <= 14) || (code >= 16 && code <= 19)) {
627
- let tidx = id(nodes.shift(), ctx.type)
628
- immed.push(...uleb(tidx))
629
-
630
- // struct.get|set* $t $f - read field by index from struct definition (ctx.type[structidx][dfnidx])
631
- if (code >= 2 && code <= 5) immed.push(...uleb(id(nodes.shift(), ctx.type[tidx][1])))
632
- // array.new_fixed $t n
633
- else if (code === 8) immed.push(...uleb(nodes.shift()))
634
- // array.new_data|init_data $t $d
635
- else if (code === 9 || code === 18) immed.push(...uleb(id(nodes.shift(), ctx.data)))
636
- // array.new_elem|init_elem $t $e
637
- else if (code === 10 || code === 19) immed.push(...uleb(id(nodes.shift(), ctx.elem)))
638
- // array.copy $t $t
639
- else if (code === 17) immed.push(...uleb(id(nodes.shift(), ctx.type)))
640
- }
641
- // ref.test|cast (ref null? $t|heaptype)
642
- else if (code >= 20 && code <= 23) {
643
- let ht = reftype(nodes.shift(), ctx)
644
- if (ht[0] !== REFTYPE.ref) immed.push(code = immed.pop()+1) // ref.test|cast (ref null $t) is next op
645
- if (ht.length > 1) ht.shift() // pop ref
646
- immed.push(...ht)
647
- }
648
- // br_on_cast[_fail] $l? (ref null? ht1) (ref null? ht2)
649
- else if (code === 24 || code === 25) {
650
- let i = blockid(nodes.shift(), ctx.block),
651
- ht1 = reftype(nodes.shift(), ctx),
652
- ht2 = reftype(nodes.shift(), ctx),
653
- castflags = ((ht2[0] !== REFTYPE.ref) << 1) | (ht1[0] !== REFTYPE.ref)
654
- immed.push(castflags, ...uleb(i), ht1.pop(), ht2.pop()) // we take only abstype or
655
- }
729
+ // Pre-defined instruction handlers
730
+ const IMM = {
731
+ null: () => [],
732
+ reversed: (n, c) => { let t = n.shift(), e = n.shift(); return [...uleb(id(e, c.elem)), ...uleb(id(t, c.table))] },
733
+ block: (n, c) => {
734
+ c.block.push(1)
735
+ isId(n[0]) && (c.block[n.shift()] = c.block.length)
736
+ let t = n.shift()
737
+ return !t ? [TYPE.void] : t[0] === 'result' ? reftype(t[1], c) : uleb(id(t[1], c.type))
738
+ },
739
+ try_table: (n, c) => {
740
+ isId(n[0]) && (c.block[n.shift()] = c.block.length + 1)
741
+ let blocktype = n.shift()
742
+ let result = !blocktype ? [TYPE.void] : blocktype[0] === 'result' ? reftype(blocktype[1], c) : uleb(id(blocktype[1], c.type))
743
+ // Collect catch clauses BEFORE pushing try_table to block stack (catch labels are relative to outer blocks)
744
+ let catches = [], count = 0
745
+ while (n[0]?.[0] === 'catch' || n[0]?.[0] === 'catch_ref' || n[0]?.[0] === 'catch_all' || n[0]?.[0] === 'catch_all_ref') {
746
+ let clause = n.shift()
747
+ let kind = clause[0] === 'catch' ? 0x00 : clause[0] === 'catch_ref' ? 0x01 : clause[0] === 'catch_all' ? 0x02 : 0x03
748
+ if (kind <= 0x01) catches.push(kind, ...uleb(id(clause[1], c.tag)), ...uleb(blockid(clause[2], c.block)))
749
+ else catches.push(kind, ...uleb(blockid(clause[1], c.block)))
750
+ count++
751
+ }
752
+ c.block.push(1) // NOW push try_table to block stack after processing catches
753
+ return [...result, ...uleb(count), ...catches]
754
+ },
755
+ end: (_n, c) => (c.block.pop(), []),
756
+ call_indirect: (n, c) => { let t = n.shift(), [, idx] = n.shift(); return [...uleb(id(idx, c.type)), ...uleb(id(t, c.table))] },
757
+ br_table: (n, c) => {
758
+ let labels = [], count = 0
759
+ while (n[0] && (!isNaN(n[0]) || isId(n[0]))) (labels.push(...uleb(blockid(n.shift(), c.block))), count++)
760
+ return [...uleb(count - 1), ...labels]
761
+ },
762
+ select: (n, c) => { let r = n.shift() || []; return r.length ? vec(r.map(t => reftype(t, c))) : [] },
763
+ ref_null: (n, c) => { let t = n.shift(); return TYPE[t] ? [TYPE[t]] : uleb(id(t, c.type)) },
764
+ memarg: (n, c, op) => memargEnc(n, op, isIdx(n[0]) && !isMemParam(n[0]) ? id(n.shift(), c.memory) : 0),
765
+ opt_memory: (n, c) => uleb(id(isIdx(n[0]) ? n.shift() : 0, c.memory)),
766
+ reftype: (n, c) => { let ht = reftype(n.shift(), c); return ht.length > 1 ? ht.slice(1) : ht },
767
+ reftype2: (n, c) => { let b = blockid(n.shift(), c.block), h1 = reftype(n.shift(), c), h2 = reftype(n.shift(), c); return [((h2[0] !== TYPE.ref) << 1) | (h1[0] !== TYPE.ref), ...uleb(b), h1.pop(), h2.pop()] },
768
+ v128const: (n) => {
769
+ let [t, num] = n.shift().split('x'), bits = +t.slice(1), stride = bits >>> 3; num = +num
770
+ if (t[0] === 'i') {
771
+ let arr = num === 16 ? new Uint8Array(16) : num === 8 ? new Uint16Array(8) : num === 4 ? new Uint32Array(4) : new BigUint64Array(2)
772
+ for (let j = 0; j < num; j++) arr[j] = encode[t].parse(n.shift())
773
+ return [...new Uint8Array(arr.buffer)]
774
+ }
775
+ let arr = new Uint8Array(16)
776
+ for (let j = 0; j < num; j++) arr.set(encode[t](n.shift()), j * stride)
777
+ return [...arr]
778
+ },
779
+ shuffle: (n) => {
780
+ let result = []
781
+ for (let j = 0; j < 16; j++) result.push(parseUint(n.shift(), 32))
782
+ if (typeof n[0] === 'string' && !isNaN(n[0])) err(`invalid lane length`)
783
+ return result
784
+ },
785
+ memlane: (n, c, op) => {
786
+ // SIMD lane: [memidx?] [offset/align]* laneidx - memidx present if isId OR (isIdx AND (next is memParam OR isIdx))
787
+ const memIdx = isId(n[0]) || (isIdx(n[0]) && (isMemParam(n[1]) || isIdx(n[1]))) ? id(n.shift(), c.memory) : 0
788
+ return [...memargEnc(n, op, memIdx), ...uleb(parseUint(n.shift()))]
789
+ },
790
+ '*': (n) => uleb(n.shift()),
791
+
792
+ // *idx types
793
+ labelidx: (n, c) => uleb(blockid(n.shift(), c.block)),
794
+ laneidx: (n) => [parseUint(n.shift(), 0xff)],
795
+ funcidx: (n, c) => uleb(id(n.shift(), c.func)),
796
+ typeidx: (n, c) => uleb(id(n.shift(), c.type)),
797
+ tableidx: (n, c) => uleb(id(n.shift(), c.table)),
798
+ memoryidx: (n, c) => uleb(id(n.shift(), c.memory)),
799
+ globalidx: (n, c) => uleb(id(n.shift(), c.global)),
800
+ localidx: (n, c) => uleb(id(n.shift(), c.local)),
801
+ dataidx: (n, c) => uleb(id(n.shift(), c.data)),
802
+ elemidx: (n, c) => uleb(id(n.shift(), c.elem)),
803
+ tagidx: (n, c) => uleb(id(n.shift(), c.tag)),
804
+ 'memoryidx?': (n, c) => uleb(id(isIdx(n[0]) ? n.shift() : 0, c.memory)),
805
+
806
+ // Value type
807
+ i32: (n) => encode.i32(n.shift()),
808
+ i64: (n) => encode.i64(n.shift()),
809
+ f32: (n) => encode.f32(n.shift()),
810
+ f64: (n) => encode.f64(n.shift()),
811
+ v128: (n) => encode.v128(n.shift()),
812
+
813
+ // Combinations
814
+ typeidx_field: (n, c) => { let typeId = id(n.shift(), c.type); return [...uleb(typeId), ...uleb(id(n.shift(), c.type[typeId][1]))] },
815
+ typeidx_multi: (n, c) => [...uleb(id(n.shift(), c.type)), ...uleb(n.shift())],
816
+ typeidx_dataidx: (n, c) => [...uleb(id(n.shift(), c.type)), ...uleb(id(n.shift(), c.data))],
817
+ typeidx_elemidx: (n, c) => [...uleb(id(n.shift(), c.type)), ...uleb(id(n.shift(), c.elem))],
818
+ typeidx_typeidx: (n, c) => [...uleb(id(n.shift(), c.type)), ...uleb(id(n.shift(), c.type))],
819
+ dataidx_memoryidx: (n, c) => [...uleb(id(n.shift(), c.data)), ...uleb(id(n.shift(), c.memory))],
820
+ memoryidx_memoryidx: (n, c) => [...uleb(id(n.shift(), c.memory)), ...uleb(id(n.shift(), c.memory))],
821
+ tableidx_tableidx: (n, c) => [...uleb(id(n.shift(), c.table)), ...uleb(id(n.shift(), c.table))]
822
+ };
823
+
824
+ // per-op imm handlers
825
+ const HANDLER = {};
826
+
827
+
828
+ // Populate INSTR and IMM
829
+ (function populate(items, pre) {
830
+ for (let op = 0, item, nm, imm; op < items.length; op++) if (item = items[op]) {
831
+ // Nested array (0xfb, 0xfc, 0xfd opcodes)
832
+ if (Array.isArray(item)) populate(item, op)
833
+ else [nm, imm] = item.split(' '), INSTR[nm] = pre ? [pre, ...uleb(op)] : [op], imm && (HANDLER[nm] = IMM[imm])
656
834
  }
835
+ })(INSTR);
657
836
 
658
- // bulk memory: (memory.init) (memory.copy) (data.drop) (memory.fill)
659
- // table ops: (table.init|copy|grow|size|fill) (elem.drop)
660
- // https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md#instruction-encoding
661
- else if (code == 0xfc) {
662
- [, code] = immed
663
837
 
664
- // memory.init idx, data.drop idx,
665
- if (code === 0x08 || code === 0x09) {
666
- immed.push(...uleb(id(nodes.shift(), ctx.data)))
667
- }
838
+ // instruction encoder
839
+ const instr = (nodes, ctx) => {
840
+ let out = [], meta = []
668
841
 
669
- // memory placeholders
670
- if (code == 0x08 || code == 0x0b) immed.push(0)
671
- else if (code === 0x0a) immed.push(0, 0)
842
+ while (nodes?.length) {
843
+ let op = nodes.shift()
672
844
 
673
- // elem.drop elemidx
674
- if (code === 0x0d) {
675
- immed.push(...uleb(id(nodes.shift(), ctx.elem)))
676
- }
677
- // table.init tableidx elemidx -> 0xfc 0x0c elemidx tableidx
678
- else if (code === 0x0c) {
679
- immed.push(...uleb(id(nodes[1], ctx.elem)), ...uleb(id(nodes.shift(), ctx.table)))
680
- nodes.shift()
681
- }
682
- // table.* tableidx?
683
- // abbrs https://webassembly.github.io/spec/core/text/instructions.html#id1
684
- else if (code >= 0x0c && code < 0x13) {
685
- immed.push(...uleb(id(nodes.shift(), ctx.table)))
686
- // table.copy tableidx? tableidx?
687
- if (code === 0x0e) immed.push(...uleb(id(nodes.shift(), ctx.table)))
845
+ // Handle code metadata marker - store for next instruction
846
+ // ['@metadata', type, data]
847
+ if (op?.[0] === '@metadata') {
848
+ meta.push(op.slice(1))
849
+ continue
688
850
  }
689
- }
690
851
 
691
- // v128s: (v128.load x) etc
692
- // https://github.com/WebAssembly/simd/blob/master/proposals/simd/BinarySIMD.md
693
- else if (code === 0xfd) {
694
- [, code] = immed
695
- immed = [0xfd, ...uleb(code)]
696
- // (v128.load offset? align?)
697
- if (code <= 0x0b) {
698
- const [a, o] = memarg(nodes)
699
- immed.push(...uleb((a ?? align(op))), ...uleb(o ?? 0))
700
- }
701
- // (v128.load_lane offset? align? idx)
702
- else if (code >= 0x54 && code <= 0x5d) {
703
- const [a, o] = memarg(nodes)
704
- immed.push(...uleb((a ?? align(op))), ...uleb(o ?? 0))
705
- // (v128.load_lane_zero)
706
- if (code <= 0x5b) immed.push(...uleb(nodes.shift()))
852
+ // Array = unknown instruction passed through from normalize
853
+ if (Array.isArray(op)) {
854
+ op.i != null && (err.i = op.i)
855
+ err(`Unknown instruction ${op[0]}`)
707
856
  }
708
- // (i8x16.shuffle 0 1 ... 15 a b)
709
- else if (code === 0x0d) {
710
- // i8, i16, i32 - bypass the encoding
711
- for (let i = 0; i < 16; i++) immed.push(parseUint(nodes.shift(), 32))
712
- }
713
- // (v128.const i32x4 1 2 3 4)
714
- else if (code === 0x0c) {
715
- let [t, n] = nodes.shift().split('x'),
716
- bits = +t.slice(1),
717
- stride = bits >>> 3 // i16 -> 2, f32 -> 4
718
- n = +n
719
- // i8, i16, i32 - bypass the encoding
720
- if (t[0] === 'i') {
721
- let arr = n === 16 ? new Uint8Array(16) : n === 8 ? new Uint16Array(8) : n === 4 ? new Uint32Array(4) : new BigUint64Array(2)
722
- for (let i = 0; i < n; i++) {
723
- let s = nodes.shift(), v = encode[t].parse(s)
724
- arr[i] = v
725
- }
726
- immed.push(...(new Uint8Array(arr.buffer)))
727
- }
728
- // f32, f64 - encode
729
- else {
730
- let arr = new Uint8Array(16)
731
- for (let i = 0; i < n; i++) {
732
- let s = nodes.shift(), v = encode[t](s)
733
- arr.set(v, i * stride)
734
- }
735
- immed.push(...arr)
736
- }
737
- }
738
- // (i8x16.extract_lane_s 0 ...)
739
- else if (code >= 0x15 && code <= 0x22) {
740
- immed.push(...uleb(parseUint(nodes.shift())))
741
- }
742
- }
743
-
744
- // control block abbrs
745
- // block ..., loop ..., if ...
746
- else if (code === 2 || code === 3 || code === 4) {
747
- ctx.block.push(code)
748
-
749
- // (block $x) (loop $y) - save label pointer
750
- if (nodes[0]?.[0] === '$') ctx.block[nodes.shift()] = ctx.block.length
751
-
752
- let t = nodes.shift();
753
-
754
- // void
755
- if (!t) immed.push(TYPE.void)
756
- // (result i32) - doesn't require registering type
757
- // FIXME: Make sure it is signed positive integer (leb, not uleb) https://webassembly.github.io/gc/core/binary/instructions.html#control-instructions
758
- else if (t[0] === 'result') immed.push(...reftype(t[1], ctx))
759
- // (type idx)
760
- else immed.push(...uleb(id(t[1], ctx.type)))
761
- }
762
- // else
763
- else if (code === 5) { }
764
- // then
765
- else if (code === 6) immed = [] // ignore
766
-
767
- // local.get $id, local.tee $id x
768
- else if (code == 0x20 || code == 0x21 || code == 0x22) {
769
- immed.push(...uleb(id(nodes.shift(), ctx.local)))
770
- }
771
-
772
- // global.get $id, global.set $id
773
- else if (code == 0x23 || code == 0x24) {
774
- immed.push(...uleb(id(nodes.shift(), ctx.global)))
775
- }
776
-
777
- // call $func ...nodes
778
- // return_call $func
779
- else if (code == 0x10 || code == 0x12) {
780
- immed.push(...uleb(id(nodes.shift(), ctx.func)))
781
- }
782
-
783
- // call_indirect $table (type $typeName) ...nodes
784
- // return_call_indirect $table (type $typeName) ... nodes
785
- else if (code == 0x11 || code == 0x13) {
786
- immed.push(
787
- ...uleb(id(nodes[1][1], ctx.type)),
788
- ...uleb(id(nodes.shift(), ctx.table))
789
- )
790
- nodes.shift()
791
- }
792
-
793
- // call_ref $type
794
- // return_call_ref $type
795
- else if (code == 0x14 || code == 0x15) {
796
- immed.push(...uleb(id(nodes.shift(), ctx.type)))
797
- }
798
857
 
799
- // end
800
- else if (code == 0x0b) ctx.block.pop()
858
+ let [...bytes] = INSTR[op] || err(`Unknown instruction ${op}`)
801
859
 
802
- // br $label result?
803
- // br_if $label cond result?
804
- // br_on_null $l, br_on_non_null $l
805
- else if (code == 0x0c || code == 0x0d || code == 0xd5 || code == 0xd6) {
806
- immed.push(...uleb(blockid(nodes.shift(), ctx.block)))
807
- }
808
-
809
- // br_table 1 2 3 4 0 selector result?
810
- else if (code == 0x0e) {
811
- let args = []
812
- while (nodes[0] && (!isNaN(nodes[0]) || nodes[0][0] === '$')) {
813
- args.push(...uleb(blockid(nodes.shift(), ctx.block)))
860
+ // special op handlers
861
+ if (HANDLER[op]) {
862
+ // select: becomes typed select (opcode+1) if next node is an array with result types
863
+ if (op === 'select' && nodes[0]?.length) bytes[0]++
864
+ // ref.type|cast: opcode+1 if type is nullable: (ref null $t) or (funcref, anyref, etc.)
865
+ else if (HANDLER[op] === IMM.reftype && (nodes[0][1] === 'null' || nodes[0][0] !== 'ref')) {
866
+ bytes[bytes.length - 1]++
867
+ }
868
+ bytes.push(...HANDLER[op](nodes, ctx, op))
814
869
  }
815
- args.unshift(...uleb(args.length - 1))
816
- immed.push(...args)
817
- }
818
-
819
- // select (result t+)
820
- else if (code == 0x1b) {
821
- let result = nodes.shift()
822
- // 0x1b -> 0x1c
823
- if (result.length) immed.push(immed.pop() + 1, ...vec(result.map(t => reftype(t, ctx))))
824
- }
825
-
826
- // ref.func $id
827
- else if (code == 0xd2) {
828
- immed.push(...uleb(id(nodes.shift(), ctx.func)))
829
- }
830
-
831
- // ref.null func
832
- else if (code == 0xd0) {
833
- let t = nodes.shift()
834
- immed.push(...(HEAPTYPE[t] ? [HEAPTYPE[t]] : uleb(id(t, ctx.type)))) // func->funcref, extern->externref
835
- }
836
-
837
- // binary/unary (i32.add a b) - no immed
838
- else if (code >= 0x45) { }
839
-
840
- // i32.store align=n offset=m
841
- else if (code >= 0x28 && code <= 0x3e) {
842
- let [a, o] = memarg(nodes)
843
- immed.push(...uleb((a ?? align(op))), ...uleb(o ?? 0))
844
- }
845
-
846
- // i32.const 123, f32.const 123.45
847
- else if (code >= 0x41 && code <= 0x44) {
848
- immed.push(...encode[op.split('.')[0]](nodes.shift()))
849
- }
850
870
 
851
- // memory.grow|size $idx - mandatory 0x00
852
- // https://webassembly.github.io/spec/core/binary/instructions.html#memory-instructions
853
- else if (code == 0x3f || code == 0x40) {
854
- immed.push(0)
855
- }
871
+ // Record metadata at current byte position
872
+ for (const [type, data] of meta) ((ctx.meta[type] ??= []).push([out.length, data]))
856
873
 
857
- // table.get|set $id
858
- else if (code == 0x25 || code == 0x26) {
859
- immed.push(...uleb(id(nodes.shift(), ctx.table)))
874
+ out.push(...bytes)
860
875
  }
861
876
 
862
- out.push(...immed)
863
-
864
- return out
877
+ return out.push(0x0b), out
865
878
  }
866
879
 
867
- // instantiation time value initializer (consuming) - we redirect to instr
868
- const expr = (node, ctx) => [...instr([node], ctx), 0x0b]
880
+ // instantiation time value initializer (consuming) - normalize then encode + add end byte
881
+ const expr = (node, ctx) => instr(normalize([node], ctx), ctx)
869
882
 
870
883
  // deref id node to numeric idx
871
- const id = (nm, list, n) => (n = nm[0] === '$' ? list[nm] : +nm, n in list ? n : err(`Unknown ${list.name} ${nm}`))
884
+ const id = (nm, list, n) => (n = isId(nm) ? list[nm] : +nm, n in list ? n : err(`Unknown ${list.name} ${nm}`))
872
885
 
873
886
  // block id - same as id but for block
874
887
  // index indicates how many block items to pop
875
888
  const blockid = (nm, block, i) => (
876
- i = nm?.[0] === '$' ? block.length - block[nm] : +nm,
889
+ i = isId(nm) ? block.length - block[nm] : +nm,
877
890
  isNaN(i) || i > block.length ? err(`Bad label ${nm}`) : i
878
891
  )
879
892
 
880
893
  // consume align/offset params
881
894
  const memarg = (args) => {
882
895
  let align, offset, k, v
883
- while (args[0]?.includes('=')) [k, v] = args.shift().split('='), k === 'offset' ? offset = +v : k === 'align' ? align = +v : err(`Unknown param ${k}=${v}`)
896
+ while (isMemParam(args[0])) [k, v] = args.shift().split('='), k === 'offset' ? offset = +v : k === 'align' ? align = +v : err(`Unknown param ${k}=${v}`)
884
897
 
885
898
  if (offset < 0 || offset > 0xffffffff) err(`Bad offset ${offset}`)
886
899
  if (align <= 0 || align > 0xffffffff) err(`Bad align ${align}`)
@@ -888,6 +901,13 @@ const memarg = (args) => {
888
901
  return [align, offset]
889
902
  }
890
903
 
904
+ // Encode memarg (align + offset) with default values based on instruction
905
+ // If memIdx is non-zero, set bit 6 in alignment flags and insert memIdx after align
906
+ const memargEnc = (nodes, op, memIdx = 0) => {
907
+ const [a, o] = memarg(nodes), alignVal = (a ?? align(op)) | (memIdx && 0x40)
908
+ return memIdx ? [...uleb(alignVal), ...uleb(memIdx), ...uleb(o ?? 0)] : [...uleb(alignVal), ...uleb(o ?? 0)]
909
+ }
910
+
891
911
  // const ALIGN = {
892
912
  // 'i32.load': 4, 'i64.load': 8, 'f32.load': 4, 'f64.load': 8,
893
913
  // 'i32.load8_s': 1, 'i32.load8_u': 1, 'i32.load16_s': 2, 'i32.load16_u': 2,
@@ -897,35 +917,43 @@ const memarg = (args) => {
897
917
  // 'v128.load': 16, 'v128.load8_lane': 1, 'v128.load16_lane': 2, 'v128.load32_lane': 4, 'v128.load64_lane': 8, 'v128.store8_lane': 1, 'v128.store16_lane': 2, 'v128.store32_lane': 4, 'v128.store64_lane': 8, 'v128.load32_zero': 4, 'v128.load64_zero': 8
898
918
  // }
899
919
  const align = (op) => {
900
- let [group, opname] = op.split('.'); // v128.load8x8_u -> group = v128, opname = load8x8_u
901
- let [lsize] = (opname[0] === 'l' ? opname.slice(4) : opname.slice(5)).split('_') // load8x8_u -> lsize = 8x8
902
- let [size, x] = lsize ? lsize.split('x') : [group.slice(1)] // 8x8 -> size = 8
903
- return Math.log2(x ? 8 : +size / 8)
920
+ let i = op.indexOf('.', 3) + 1, group = op.slice(1, op[0] === 'v' ? 4 : 3) // type: i32->32, v128->128
921
+ if (op[i] === 'a') i = op.indexOf('.', i) + 1 // skip 'atomic.'
922
+ if (op[0] === 'm') return op.includes('64') ? 3 : 2 // memory.*.wait64 vs wait32/notify
923
+ if (op[i] === 'r') { // rmw: extract size from rmw##
924
+ let m = op.slice(i, i + 6).match(/\d+/)
925
+ return m ? Math.log2(m[0] / 8) : Math.log2(+group / 8)
926
+ }
927
+ // load/store: extract size after operation name
928
+ let k = op[i] === 'l' ? i + 4 : i + 5, m = op.slice(k).match(/(\d+)(x|_|$)/)
929
+ return Math.log2(m ? (m[2] === 'x' ? 8 : m[1] / 8) : +group / 8)
904
930
  }
905
931
 
906
932
  // build limits sequence (consuming)
907
- const limits = (node) => (
908
- isNaN(parseInt(node[1])) ? [0, ...uleb(parseUint(node.shift()))] : [node[2] === 'shared' ? 3 : 1, ...uleb(parseUint(node.shift())), ...uleb(parseUint(node.shift()))]
909
- )
933
+ // Memory64: i64 index type uses flags 0x04-0x07 (bit 2 = is_64)
934
+ const limits = (node) => {
935
+ const is64 = node[0] === 'i64' && node.shift()
936
+ const shared = node[node.length - 1] === 'shared' && node.pop()
937
+ const hasMax = !isNaN(parseInt(node[1]))
938
+ const flag = (is64 ? 4 : 0) | (shared ? 2 : 0) | (hasMax ? 1 : 0)
939
+ // For i64, parse as unsigned BigInt (limits are always unsigned)
940
+ const parse = is64 ? v => {
941
+ if (typeof v === 'bigint') return v
942
+ const str = typeof v === 'string' ? v.replaceAll('_', '') : String(v)
943
+ return BigInt(str)
944
+ } : parseUint
945
+
946
+ return hasMax
947
+ ? [flag, ...uleb(parse(node.shift())), ...uleb(parse(node.shift()))]
948
+ : [flag, ...uleb(parse(node.shift()))]
949
+ }
910
950
 
911
951
  // check if node is valid int in a range
912
- // we put extra condition for index ints for tests complacency
913
- const parseUint = (v, max = 0xFFFFFFFF) => (typeof v === 'string' && v[0] !== '+' ? (typeof max === 'bigint' ? i64 : i32).parse(v) : typeof v === 'number' ? v : err(`Bad int ${v}`)) > max ? err(`Value out of range ${v}`) : v
914
-
915
-
916
- // escape codes
917
- const escape = { n: 10, r: 13, t: 9, v: 1, '"': 34, "'": 39, '\\': 92 }
918
-
919
- // build string binary
920
- const str = str => {
921
- let res = [], i = 0, c, BSLASH = 92
922
- // https://webassembly.github.io/spec/core/text/values.html#strings
923
- for (; i < str.length;) {
924
- c = str.charCodeAt(i++)
925
- res.push(c === BSLASH ? escape[str[i++]] || parseInt(str.slice(i - 1, ++i), 16) : c)
926
- }
927
- return res
952
+ const parseUint = (v, max = 0xFFFFFFFF) => {
953
+ const n = typeof v === 'string' && v[0] !== '+' ? i32.parse(v) : typeof v === 'number' ? v : err(`Bad int ${v}`)
954
+ return n > max ? err(`Value out of range ${v}`) : n
928
955
  }
929
956
 
957
+
930
958
  // serialize binary array
931
959
  const vec = a => [...uleb(a.length), ...a.flat()]