watr 3.0.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/compile.js CHANGED
@@ -1,657 +1,934 @@
1
1
  import * as encode from './encode.js'
2
- import { uleb } from './encode.js'
3
- import { SECTION, ALIGN, TYPE, KIND, INSTR } from './const.js'
2
+ import { uleb, i32, i64 } from './encode.js'
3
+ import { SECTION, TYPE, KIND, INSTR, HEAPTYPE, DEFTYPE, RECTYPE, REFTYPE } from './const.js'
4
4
  import parse from './parse.js'
5
+ import { clone, err } from './util.js'
5
6
 
6
7
  // build instructions index
7
- INSTR.forEach((instr, i) => {
8
- let [op, ...imm] = instr.split(':'), a, b
8
+ INSTR.forEach((op, i) => INSTR[op] = i >= 0x133 ? [0xfd, i - 0x133] : i >= 0x11b ? [0xfc, i - 0x11b] : i >= 0xfb ? [0xfb, i - 0xfb] : [i]);
9
9
 
10
- // TODO
11
- // wrap codes
12
- // const code = i >= 0x10f ? [0xfd, i - 0x10f] : i >= 0xfc ? [0xfc, i - 0xfc] : i
13
- INSTR[op] = i
14
-
15
- // // handle immediates
16
- // INSTR[op] = !imm.length ? () => code :
17
- // imm.length === 1 ? (a = immedname(imm[0]), nodes => [...code, ...a(nodes)]) :
18
- // (imm = imm.map(immedname), nodes => [...code, ...imm.flatMap(imm => imm(nodes))])
19
- })
20
10
 
21
11
  /**
22
12
  * Converts a WebAssembly Text Format (WAT) tree to a WebAssembly binary format (WASM).
23
13
  *
24
14
  * @param {string|Array} nodes - The WAT tree or string to be compiled to WASM binary.
15
+ * @param {Object} opt - opt.fullSize for fixed-width uleb encoding
25
16
  * @returns {Uint8Array} The compiled WASM binary data.
26
17
  */
27
- export default (nodes) => {
18
+ export default function watr(nodes) {
28
19
  // normalize to (module ...) form
29
- if (typeof nodes === 'string') nodes = parse(nodes); else nodes = [...nodes]
20
+ if (typeof nodes === 'string') nodes = parse(nodes);
21
+ else nodes = clone(nodes)
30
22
 
31
23
  // module abbr https://webassembly.github.io/spec/core/text/modules.html#id10
32
- if (nodes[0] === 'module') nodes.shift(), id(nodes)
24
+ if (nodes[0] === 'module') nodes.shift(), nodes[0]?.[0] === '$' && nodes.shift()
25
+ // single node, not module
33
26
  else if (typeof nodes[0] === 'string') nodes = [nodes]
34
27
 
35
- // Scopes are stored directly on section array by key, eg. section.func.$name = idx
36
- // FIXME: make direct binary instead (faster)
37
- const sections = []
38
- for (let kind in SECTION) sections.push(sections[kind] = [])
28
+ // binary abbr "\00" "\0x61" ...
29
+ if (nodes[0] === 'binary') {
30
+ nodes.shift()
31
+ return Uint8Array.from(str(nodes.map(i => i.slice(1, -1)).join('')))
32
+ }
33
+ // quote "a" "b"
34
+ else if (nodes[0] === 'quote') {
35
+ nodes.shift()
36
+ return watr(nodes.map(i => i.slice(1, -1)).join(''))
37
+ }
39
38
 
40
- const binary = [
41
- 0x00, 0x61, 0x73, 0x6d, // magic
42
- 0x01, 0x00, 0x00, 0x00, // version
43
- ]
39
+ // scopes are aliased by key as well, eg. section.func.$name = section[SECTION.func] = idx
40
+ const ctx = []
41
+ for (let kind in SECTION) (ctx[SECTION[kind]] = ctx[kind] = []).name = kind
42
+ ctx._ = {} // implicit types
43
+
44
+ let subc // current subtype count
45
+
46
+ // prepare/normalize nodes
47
+ while (nodes.length) {
48
+ let [kind, ...node] = nodes.shift()
49
+ let imported // if node needs to be imported
50
+ let rec // number of subtypes under rec type
51
+
52
+ // (rec (type $a (sub final? $sup* (func ...))...) (type $b ...)) -> save subtypes
53
+ if (kind === 'rec') {
54
+ // node contains a list of subtypes, (type ...) or (type (sub final? ...))
55
+ // convert rec type into regular type (first subtype) with stashed subtypes length
56
+ // add rest of subtypes as regular type nodes with subtype flag
57
+ if (node.length > 1) rec = subc = node.length, nodes.unshift(...node), node = nodes.shift(), kind = node.shift()
58
+ else kind = (node = node[0]).shift()
59
+ }
44
60
 
45
- // sort nodes by sections
46
- // TODO: make this more elegant
47
- let nodeGroups = []
48
- for (let kind in SECTION) nodeGroups.push(nodeGroups[kind] = [])
61
+ // import abbr
62
+ // (import m n (table|memory|global|func id? type)) -> (table|memory|global|func id? (import m n) type)
63
+ else if (kind === 'import') [kind, ...node] = (imported = node).pop()
49
64
 
50
- for (let [kind, ...node] of nodes) {
51
65
  // index, alias
52
- let name = id(node), idx = nodeGroups[kind].length;
53
- if (name) sections[kind][name] = idx; // save alias
66
+ let items = ctx[kind];
67
+ let name = alias(node, items)
54
68
 
55
69
  // export abbr
56
70
  // (table|memory|global|func id? (export n)* ...) -> (table|memory|global|func id ...) (export n (table|memory|global|func id))
57
- while (node[0]?.[0] === 'export') nodeGroups.export.push([node.shift()[1], [kind, idx]])
71
+ while (node[0]?.[0] === 'export') ctx.export.push([node.shift()[1], [kind, items.length]])
58
72
 
59
- // import abbr
60
- // (table|memory|global|func id? (import m n) type) -> (import m n (table|memory|global|func id? type))
61
- if (node[0]?.[0] === 'import') node = [...node.shift(), [kind, ...(name ? [name] : []), ...node]], kind = node.shift()
73
+ // for import nodes - redirect output to import
74
+ if (node[0]?.[0] === 'import') [, ...imported] = node.shift()
62
75
 
63
76
  // table abbr
64
- // (table id? reftype (elem ...{n})) -> (table id? n n reftype) (elem (table id) (i32.const 0) reftype ...)
65
- if (node[1]?.[0] === 'elem') {
66
- let [reftype, [, ...els]] = node
67
- node = [els.length, els.length, reftype]
68
- nodeGroups.elem.push([['table', name || nodeGroups.table.length], ['i32.const', '0'], typeof els[0] === 'string' ? 'func' : reftype, ...els])
77
+ if (kind === 'table') {
78
+ // (table id? reftype (elem ...{n})) -> (table id? n n reftype) (elem (table id) (i32.const 0) reftype ...)
79
+ if (node[1]?.[0] === 'elem') {
80
+ let [reftype, [, ...els]] = node
81
+ node = [els.length, els.length, reftype]
82
+ ctx.elem.push([['table', name || items.length], ['i32.const', '0'], reftype, ...els])
83
+ }
69
84
  }
70
85
 
71
86
  // data abbr
72
87
  // (memory id? (data str)) -> (memory id? n n) (data (memory id) (i32.const 0) str)
73
- if (node[0]?.[0] === 'data') {
74
- let [,...data] = node.shift(), m = ''+Math.ceil(data.map(s => s.slice(1,-1)).join('').length / 65536) // FIXME: figure out actual data size
75
- nodeGroups.data.push([['memory', idx], ['i32.const',0], ...data])
88
+ else if (kind === 'memory' && node[0]?.[0] === 'data') {
89
+ let [, ...data] = node.shift(), m = '' + Math.ceil(data.map(s => s.slice(1, -1)).join('').length / 65536) // FIXME: figure out actual data size
90
+ ctx.data.push([['memory', items.length], ['i32.const', 0], ...data])
76
91
  node = [m, m]
77
92
  }
78
93
 
94
+ // keep start name
95
+ else if (kind === 'start') name && node.push(name)
96
+
97
+ // normalize type definition to (func|array|struct dfn) form
98
+ // (type (func param* result*))
99
+ // (type (array (mut i8)))
100
+ // (type (struct (field a)*)
101
+ // (type (sub final? $nm* (struct|array|func ...)))
102
+ else if (kind === 'type') {
103
+ let [dfn] = node
104
+ let issub = subc-- > 0
105
+ let subkind = issub && 'subfinal', supertypes = []
106
+ if (dfn[0] === 'sub') {
107
+ subkind = dfn.shift(), dfn[0] === 'final' && (subkind += dfn.shift())
108
+ dfn = (supertypes = dfn).pop() // last item is definition
109
+ }
110
+
111
+ let ckind = dfn.shift() // composite type kind
112
+ if (ckind === 'func') dfn = paramres(dfn), ctx.type['$' + dfn.join('>')] ??= ctx.type.length
113
+ else if (ckind === 'struct') dfn = fieldseq(dfn, 'field', true)
114
+ else if (ckind === 'array') dfn = dfn.shift()
79
115
 
80
- // import increments corresponding section index
81
- // FIXME: can be turned into shallow node
82
- if (kind === 'import') {
83
- let [mod, field, [kind, ...dfn]] = node
84
- let name = id(dfn)
85
- if (name) sections[kind][name] = nodeGroups[kind].length
86
- nodeGroups[kind].length++
87
- node[2] = [kind, ...dfn]
116
+ node = [ckind, dfn, subkind, supertypes, rec ? [ctx.type.length, rec] : issub]
88
117
  }
89
- else if (kind === 'start') {name && node.unshift(name);}
90
118
 
91
- nodeGroups[kind].push(node)
92
- }
119
+ // dupe to code section, save implicit type
120
+ else if (kind === 'func') {
121
+ let [idx, param, result] = typeuse(node, ctx);
122
+ idx ?? (ctx._[idx = '$' + param + '>' + result] = [param, result]);
123
+ // we save idx because type can be defined after
124
+ !imported && nodes.push(['code', [idx, param, result], ...plain(node, ctx)]) // pass param since they may have names
125
+ node.unshift(['type', idx])
126
+ }
93
127
 
94
- // build sections binaries
95
- for (let kind in SECTION) nodeGroups[kind].map((node,i) => !node ? [] : build[kind](i, node, sections))
128
+ // import writes to import section amd adds placeholder for (kind) section
129
+ if (imported) ctx.import.push([...imported, [kind, ...node]]), node = null
96
130
 
97
- // build final binary
98
- for (let secCode = 0; secCode < sections.length; secCode++) {
99
- let items = sections[secCode], bytes = [], count = 0
100
- for (let item of items) {
101
- if (!item) { continue } // ignore empty items (like import placeholders)
102
- count++ // count number of items in section
103
- bytes.push(...item)
104
- }
105
- // ignore empty sections
106
- if (!bytes.length) continue
107
- // skip start section count - write length
108
- if (secCode !== 8) bytes.unshift(...uleb(count))
109
- binary.push(secCode, ...vec(bytes))
131
+ items.push(node)
110
132
  }
111
133
 
112
- return new Uint8Array(binary)
113
- }
114
-
115
- // consume $id
116
- const id = nodes => nodes[0]?.[0] === '$' && nodes.shift()
134
+ // add implicit types - main types receive aliases, implicit types are added if no explicit types exist
135
+ for (let n in ctx._) ctx.type[n] ??= (ctx.type.push(['func', ctx._[n]]) - 1)
117
136
 
118
- // build section binary (non consuming)
119
- const build = {
120
- // (type $id? (func params result))
121
- // we cannot squash types since indices can refer to them
122
- type(idx, [...node], ctx) {
123
- let [, ...sig] = node?.[0] || [], [param, result] = paramres(sig)
137
+ // patch datacount if data === 0
138
+ // FIXME: let's try to return empty in datacount builder, since we filter after builder as well
139
+ // if (!ctx.data.length) ctx.datacount.length = 0
124
140
 
125
- ctx.type[idx] = Object.assign(
126
- [TYPE.func, ...vec(param.map(t => TYPE[t])), ...vec(result.map(t => TYPE[t]))],
127
- { param, result } // save params for the type name
128
- )
129
- ctx.type[param + '>' + result] ??= idx // alias for quick search (don't increment if exists)
130
- },
131
-
132
- // (import "math" "add" (func|table|global|memory typedef?))
133
- import(_, [mod, field, [kind, ...dfn]], ctx) {
134
- let details
141
+ // convert nodes to bytes
142
+ const bin = (kind, count = true) => {
143
+ const items = ctx[kind]
144
+ .filter(Boolean) // filter out (type, imported) placeholders
145
+ .map(item => build[kind](item, ctx))
146
+ .filter(Boolean) // filter out unrenderable things (subtype or data.length)
135
147
 
136
- if (kind === 'func') {
137
- // we track imported funcs in func section to share namespace, and skip them on final build
138
- let [typeIdx] = typeuse(dfn, ctx)
139
- details = uleb(typeIdx)
140
- }
141
- else if (kind === 'memory') {
142
- details = limits(dfn)
143
- }
144
- else if (kind === 'global') {
145
- let [type] = dfn, mut = type[0] === 'mut' ? 1 : 0
146
- details = [TYPE[mut ? type[1] : type], mut]
147
- }
148
- else if (kind === 'table') {
149
- details = [TYPE[dfn.pop()], ...limits(dfn)]
150
- }
151
-
152
- ctx.import.push([...str(mod), ...str(field), KIND[kind], ...details])
153
- },
148
+ return !items.length ? [] : [kind, ...vec(count ? vec(items) : items)]
149
+ }
154
150
 
155
- // (func $name? ...params result ...body)
156
- func(idx, [...node], ctx) {
157
- const [typeidx, param, result] = typeuse(node, ctx)
151
+ // build final binary
152
+ return Uint8Array.from([
153
+ 0x00, 0x61, 0x73, 0x6d, // magic
154
+ 0x01, 0x00, 0x00, 0x00, // version
155
+ ...bin(SECTION.custom),
156
+ ...bin(SECTION.type),
157
+ ...bin(SECTION.import),
158
+ ...bin(SECTION.func),
159
+ ...bin(SECTION.table),
160
+ ...bin(SECTION.memory),
161
+ ...bin(SECTION.global),
162
+ ...bin(SECTION.export),
163
+ ...bin(SECTION.start, false),
164
+ ...bin(SECTION.elem),
165
+ ...bin(SECTION.datacount, false),
166
+ ...bin(SECTION.code),
167
+ ...bin(SECTION.data)
168
+ ])
169
+ }
158
170
 
159
- ctx.func[idx] = uleb(typeidx)
171
+ // consume name eg. $t ...
172
+ const alias = (node, list) => {
173
+ let name = (node[0]?.[0] === '$' || node[0]?.[0] == null) && node.shift();
174
+ if (name) name in list ? err(`Duplicate ${list.name} ${name}`) : list[name] = list.length; // save alias
175
+ return name
176
+ }
160
177
 
161
- // build code section
162
- let blocks = [] // control instructions / blocks stack
163
- let locals = [] // list of local variables
178
+ // abbr blocks, loops, ifs; collect implicit types via typeuses; resolve optional immediates
179
+ // https://webassembly.github.io/spec/core/text/instructions.html#folded-instructions
180
+ const plain = (nodes, ctx) => {
181
+ let out = [], stack = [], label
164
182
 
165
- // collect locals
166
- while (node[0]?.[0] === 'local') {
167
- let [, ...types] = node.shift(), name
168
- if (types[0]?.[0] === '$')
169
- param[name = types.shift()] ? err('Ambiguous name ' + name) : // FIXME: not supposed to happen
170
- locals[name] = param.length + locals.length
171
- locals.push(...types.map(t => TYPE[t]))
172
- }
183
+ while (nodes.length) {
184
+ let node = nodes.shift()
173
185
 
174
- // convert sequence of instructions from input nodes to out bytes
175
- // FIXME: make external func
176
- const consume = (nodes, out = []) => {
177
- if (!nodes?.length) return out
186
+ // lookup is slower than sequence of known ifs
187
+ if (typeof node === 'string') {
188
+ out.push(node)
178
189
 
179
- let op = nodes.shift(), opCode, args = nodes, immed, id, group
190
+ // block typeuse?
191
+ if (node === 'block' || node === 'if' || node === 'loop') {
192
+ // (loop $l?)
193
+ if (nodes[0]?.[0] === '$') label = nodes.shift(), out.push(label), stack.push(label)
180
194
 
181
- // flatten groups, eg. (cmd z w) -> z w cmd
182
- if (group = Array.isArray(op)) {
183
- args = [...op] // op is immutable
184
- opCode = INSTR[op = args.shift()]
195
+ out.push(blocktype(nodes, ctx))
185
196
  }
186
- else opCode = INSTR[op]
187
-
188
- // v128s: (v128.load x) etc
189
- // https://github.com/WebAssembly/simd/blob/master/proposals/simd/BinarySIMD.md
190
- if (opCode >= 0x10f) {
191
- opCode -= 0x10f
192
- immed = [0xfd, ...uleb(opCode)]
193
- // (v128.load)
194
- if (opCode <= 0x0b) {
195
- const o = memarg(args)
196
- immed.push(Math.log2(o.align ?? ALIGN[op]), ...uleb(o.offset ?? 0))
197
- }
198
- // (v128.load_lane offset? align? idx)
199
- else if (opCode >= 0x54 && opCode <= 0x5d) {
200
- const o = memarg(args)
201
- immed.push(Math.log2(o.align ?? ALIGN[op]), ...uleb(o.offset ?? 0))
202
- // (v128.load_lane_zero)
203
- if (opCode <= 0x5b) immed.push(...uleb(args.shift()))
204
- }
205
- // (i8x16.shuffle 0 1 ... 15 a b)
206
- else if (opCode === 0x0d) {
207
- // i8, i16, i32 - bypass the encoding
208
- for (let i = 0; i < 16; i++) immed.push(encode.i32.parse(args.shift()))
209
- }
210
- // (v128.const i32x4)
211
- else if (opCode === 0x0c) {
212
- args.unshift(op)
213
- immed = expr(args, ctx)
214
- }
215
- // (i8x16.extract_lane_s 0 ...)
216
- else if (opCode >= 0x15 && opCode <= 0x22) {
217
- immed.push(...uleb(args.shift()))
218
- }
219
- opCode = null // ignore opcode
197
+
198
+ // else $label
199
+ // end $label - make sure it matches block label
200
+ else if (node === 'else' || node === 'end') {
201
+ if (nodes[0]?.[0] === '$') (node === 'end' ? stack.pop() : label) !== (label = nodes.shift()) && err(`Mismatched label ${label}`)
220
202
  }
221
203
 
222
- // bulk memory: (memory.init) (memory.copy) etc
223
- // https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md#instruction-encoding
224
- else if (opCode >= 0xfc) {
225
- immed = [0xfc, ...uleb(opCode -= 0xfc)]
226
- // memory.init idx, memory.drop idx, table.init idx, table.drop idx
227
- if (!(opCode & 0b10)) immed.push(...uleb(args.shift()))
228
- else immed.push(0)
229
- // even opCodes (memory.init, memory.copy, table.init, table.copy) have 2nd predefined immediate
230
- if (!(opCode & 0b1)) immed.push(0)
231
- opCode = null // ignore opcode
204
+ // select (result i32 i32 i32)?
205
+ else if (node === 'select') {
206
+ out.push(paramres(nodes, 0)[1])
232
207
  }
233
208
 
234
- // ref.func $id
235
- else if (opCode == 0xd2) {
236
- immed = uleb(args[0][0] === '$' ? ctx.func[args.shift()] : +args.shift())
209
+ // call_indirect $table? $typeidx
210
+ // return_call_indirect $table? $typeidx
211
+ else if (node.endsWith('call_indirect')) {
212
+ let tableidx = nodes[0]?.[0] === '$' || !isNaN(nodes[0]) ? nodes.shift() : 0
213
+ let [idx, param, result] = typeuse(nodes, ctx, 0)
214
+ out.push(tableidx, ['type', idx ?? (ctx._[idx = '$' + param + '>' + result] = [param, result], idx)])
237
215
  }
238
- // ref.null
239
- else if (opCode == 0xd0) {
240
- immed = [TYPE[args.shift() + 'ref']] // func->funcref, extern->externref
216
+
217
+ // mark datacount section as required
218
+ else if (node === 'memory.init' || node === 'data.drop' || node === 'array.new_data' || node === 'array.init_data') {
219
+ ctx.datacount[0] = true
241
220
  }
242
221
 
243
- // binary/unary (i32.add a b) - no immed
244
- else if (opCode >= 0x45) { }
222
+ // table.init tableidx? elemidx -> table.init tableidx elemidx
223
+ else if (node === 'table.init') out.push((nodes[1][0] === '$' || !isNaN(nodes[1])) ? nodes.shift() : 0, nodes.shift())
245
224
 
246
- // (i32.store align=n offset=m at value) etc
247
- else if (opCode >= 0x28 && opCode <= 0x3e) {
248
- // FIXME: figure out point in Math.log2 aligns
249
- let o = memarg(args)
250
- immed = [Math.log2(o.align ?? ALIGN[op]), ...uleb(o.offset ?? 0)]
251
- }
225
+ // table.* tableidx?
226
+ else if (node.startsWith('table.')) {
227
+ out.push(nodes[0]?.[0] === '$' || !isNaN(nodes[0]) ? nodes.shift() : 0)
252
228
 
253
- // (i32.const 123), (f32.const 123.45) etc
254
- else if (opCode >= 0x41 && opCode <= 0x44) {
255
- immed = encode[op.split('.')[0]](args.shift())
229
+ // table.copy tableidx? tableidx?
230
+ if (node === 'table.copy') out.push(nodes[0][0] === '$' || !isNaN(nodes[0]) ? nodes.shift() : 0)
256
231
  }
232
+ }
257
233
 
258
- // (local.get $id), (local.tee $id x)
259
- else if (opCode >= 0x20 && opCode <= 0x22) {
260
- immed = uleb(args[0]?.[0] === '$' ? param[id = args.shift()] ?? locals[id] ?? err('Unknown local ' + id) : +args.shift())
234
+ else {
235
+ // (block ...) -> block ... end
236
+ if (node[0] === 'block' || node[0] === 'loop') {
237
+ out.push(...plain(node, ctx), 'end')
261
238
  }
262
239
 
263
- // (global.get $id), (global.set $id)
264
- else if (opCode == 0x23 || opCode == 0x24) {
265
- immed = uleb(args[0]?.[0] === '$' ? ctx.global[args.shift()] : +args.shift())
266
- }
240
+ // (if ...) -> if ... end
241
+ else if (node[0] === 'if') {
242
+ let then = [], els = [], immed = [node.shift()]
243
+ // (if label? blocktype? cond*? (then instr*) (else instr*)?) -> cond*? if label? blocktype? instr* else instr*? end
244
+ // https://webassembly.github.io/spec/core/text/instructions.html#control-instructions
245
+ if (node[node.length - 1]?.[0] === 'else') {
246
+ els = plain(node.pop(), ctx)
247
+ // ignore empty else
248
+ // https://webassembly.github.io/spec/core/text/instructions.html#abbreviations
249
+ if (els.length === 1) els.length = 0
250
+ }
251
+ if (node[node.length - 1]?.[0] === 'then') then = plain(node.pop(), ctx)
267
252
 
268
- // (call id ...nodes)
269
- else if (opCode == 0x10) {
270
- let fnName = args.shift()
271
- immed = uleb(id = fnName[0] === '$' ? ctx.func[fnName] : +fnName);
272
- // FIXME: how to get signature of imported function
273
- }
253
+ // label?
254
+ if (node[0]?.[0] === '$') immed.push(node.shift())
255
+
256
+ // blocktype?
257
+ immed.push(blocktype(node, ctx))
274
258
 
275
- // (call_indirect tableIdx? (type $typeName) (idx) ...nodes)
276
- else if (opCode == 0x11) {
277
- let tableidx = args[0]?.[0] === '$' ? ctx.table[args.shift()] : 0
278
- let [typeidx] = typeuse(args, ctx)
279
- immed = [...uleb(typeidx), ...uleb(tableidx)]
259
+ if (typeof node[0] === 'string') err('Unfolded condition')
260
+
261
+ out.push(...plain(node, ctx), ...immed, ...then, ...els, 'end')
280
262
  }
263
+ else out.push(plain(node, ctx))
264
+ }
265
+ }
281
266
 
282
- // (block ...), (loop ...), (if ...)
283
- else if (opCode === 2 || opCode === 3 || opCode === 4) {
284
- blocks.push(opCode)
267
+ return out
268
+ }
285
269
 
286
- // (block $x) (loop $y)
287
- if (args[0]?.[0] === '$') (blocks[args.shift()] = blocks.length)
270
+ // consume typeuse nodes, return type index/params, or null idx if no type
271
+ // https://webassembly.github.io/spec/core/text/modules.html#type-uses
272
+ const typeuse = (nodes, ctx, names) => {
273
+ let idx, param, result
288
274
 
289
- // get type - can be either typeidx or valtype (numtype | reftype)
290
- // (result i32) - doesn't require registering type
291
- if (args[0]?.[0] === 'result' && args[0].length < 3) {
292
- let [, type] = args.shift()
293
- immed = [TYPE[type]]
294
- }
295
- // (result i32 i32)
296
- else if (args[0]?.[0] === 'result' || args[0]?.[0] === 'param') {
297
- let [typeidx] = typeuse(args, ctx)
298
- immed = uleb(typeidx)
299
- }
300
- // FIXME: that def can be done nicer
301
- else if (args[0]?.[0] === 'type') {
302
- let [typeidx, params, result] = typeuse(args, ctx)
303
- if (!params.length && !result.length) immed = [TYPE.void]
304
- else if (!param.length && result.length === 1) immed = [TYPE[result[0]]]
305
- else immed = uleb(typeidx)
306
- }
307
- else {
308
- immed = [TYPE.void]
309
- }
275
+ // explicit type (type 0|$name)
276
+ if (nodes[0]?.[0] === 'type') {
277
+ [, idx] = nodes.shift();
278
+ [param, result] = paramres(nodes, names);
310
279
 
311
- if (group) {
312
- // (block xxx) -> block xxx end
313
- nodes.unshift('end')
314
-
315
- if (opCode < 4) while (args.length) nodes.unshift(args.pop())
316
- // (if cond a) -> cond if a end
317
- else if (args.length < 3) nodes.unshift(args.pop())
318
- // (if cond (then a) (else b)) -> `cond if a else b end`
319
- else {
320
- nodes.unshift(args.pop())
321
- // (if cond a b) -> (if cond a else b)
322
- if (nodes[0][0] !== 'else') nodes.unshift('else')
323
- // (if a b (else)) -> (if a b)
324
- else if (nodes[0].length < 2) nodes.shift()
325
- nodes.unshift(args.pop())
326
- }
327
- }
328
- }
280
+ // check type consistency (excludes forward refs)
281
+ if ((param.length || result.length) && idx in ctx.type)
282
+ if (ctx.type[id(idx, ctx.type)][1].join('>') !== param + '>' + result) err(`Type ${idx} mismatch`)
329
283
 
330
- // (else)
331
- else if (opCode === 5) {
332
- // (else xxx) -> else xxx
333
- if (group) while (args.length) nodes.unshift(args.pop())
334
- }
335
- // (then)
336
- else if (opCode === 6) {
337
- opCode = null // ignore opcode
338
- }
284
+ return [idx]
285
+ }
339
286
 
340
- // (end)
341
- else if (opCode == 0x0b) blocks.pop()
287
+ // implicit type (param i32 i32)(result i32)
288
+ [param, result] = paramres(nodes, names)
342
289
 
343
- // (br $label result?)
344
- // (br_if $label cond result?)
345
- else if (opCode == 0x0c || opCode == 0x0d) {
346
- // br index indicates how many block items to pop
347
- immed = uleb(args[0]?.[0] === '$' ? blocks.length - blocks[args.shift()] : args.shift())
348
- }
290
+ return [, param, result]
291
+ }
349
292
 
350
- // (br_table 1 2 3 4 0 selector result?)
351
- else if (opCode == 0x0e) {
352
- immed = []
353
- while (args[0] && !Array.isArray(args[0])) {
354
- id = args.shift()
355
- immed.push(...uleb(id[0][0] === '$' ? blocks.length - blocks[id] : id))
356
- }
357
- immed.unshift(...uleb(immed.length - 1))
358
- }
293
+ // consume (param t+)* (result t+)* sequence
294
+ const paramres = (nodes, names = true) => {
295
+ // let param = [], result = []
359
296
 
360
- // FIXME multiple memory (memory.grow $idx?)
361
- else if (opCode == 0x3f || opCode == 0x40) {
362
- immed = [0]
363
- }
297
+ // collect param (param i32 i64) (param $x? i32)
298
+ let param = fieldseq(nodes, 'param', names)
364
299
 
365
- // (table.get $id)
366
- else if (opCode == 0x25 || opCode == 0x26) {
367
- immed = uleb(args[0]?.[0] === '$' ? ctx.table[args.shift()] : +args.shift())
368
- }
300
+ // collect result eg. (result f64 f32)(result i32)
301
+ let result = fieldseq(nodes, 'result')
369
302
 
370
- // table.grow id, table.size id, table.fill id
371
- else if (opCode >= 0x0f && opCode <= 0x11) {
372
- immed = []
373
- }
303
+ if (nodes[0]?.[0] === 'param') err(`Unexpected param`)
374
304
 
375
- else if (opCode == null) err(`Unknown instruction \`${op}\``)
305
+ return [param, result]
306
+ }
376
307
 
377
- // if group (cmd im1 im2 arg1 arg2) - insert any remaining args first: arg1 arg2
378
- // because inline case has them in stack already
379
- if (group) while (args.length) consume(args, out)
308
+ // collect sequence of field, eg. (param a) (param b c), (field a) (field b c) or (result a b) (result c)
309
+ // optionally allow or not names
310
+ const fieldseq = (nodes, field, names = false) => {
311
+ let seq = []
312
+ // collect field eg. (field f64 f32)(field i32)
313
+ while (nodes[0]?.[0] === field) {
314
+ let [, ...args] = nodes.shift()
315
+ let name = args[0]?.[0] === '$' && args.shift()
316
+ // expose name refs, if allowed
317
+ if (name) {
318
+ if (names) name in seq ? err(`Duplicate ${field} ${name}`) : seq[name] = seq.length
319
+ else err(`Unexpected ${field} name ${name}`)
320
+ }
321
+ seq.push(...args)
322
+ }
323
+ return seq
324
+ }
325
+
326
+ // consume blocktype - makes sure either type or single result is returned
327
+ const blocktype = (nodes, ctx) => {
328
+ let [idx, param, result] = typeuse(nodes, ctx, 0)
329
+
330
+ // direct idx (no params/result needed)
331
+ if (idx != null) return ['type', idx]
380
332
 
381
- if (opCode != null) out.push(opCode)
382
- if (immed) out.push(...immed)
333
+ // get type - can be either idx or valtype (numtype | reftype)
334
+ if (!param.length && !result.length) return
335
+
336
+ // (result i32) - doesn't require registering type
337
+ if (!param.length && result.length === 1) return ['result', ...result]
338
+
339
+ // (param i32 i32)? (result i32 i32) - implicit type
340
+ ctx._[idx = '$' + param + '>' + result] = [param, result]
341
+ return ['type', idx]
342
+ }
343
+
344
+
345
+ // build section binary [by section codes] (non consuming)
346
+ const build = [,
347
+ // type kinds
348
+ // (func params result)
349
+ // (array i8)
350
+ // (struct ...fields)
351
+ ([kind, fields, subkind, supertypes, rec], ctx) => {
352
+ if (rec === true) return // ignore rec subtypes cept for 1st one
353
+
354
+ let details
355
+ // (rec (sub ...)*)
356
+ if (rec) {
357
+ // FIXME: rec of one type
358
+ kind = 'rec'
359
+ let [from, length] = rec, subtypes = Array.from({ length }, (_, i) => build[SECTION.type](ctx.type[from + i].slice(0, 4), ctx))
360
+ details = vec(subtypes)
361
+ }
362
+ // (sub final? sups* (type...))
363
+ else if (subkind === 'sub' || supertypes?.length) {
364
+ details = [...vec(supertypes.map(n => id(n, ctx.type))), ...build[SECTION.type]([kind, fields], ctx)]
365
+ kind = subkind
383
366
  }
384
367
 
385
- const bytes = []
386
- // FIXME: avoid passing bytes from outside, push result instead
387
- while (node.length) consume(node, bytes)
388
- bytes.push(0x0b)
368
+ else if (kind === 'func') {
369
+ details = [...vec(fields[0].map(t => reftype(t, ctx))), ...vec(fields[1].map(t => reftype(t, ctx)))]
370
+ }
371
+ else if (kind === 'array') {
372
+ details = fieldtype(fields, ctx)
373
+ }
374
+ else if (kind === 'struct') {
375
+ details = vec(fields.map(t => fieldtype(t, ctx)))
376
+ }
389
377
 
390
- // squash locals into (n:u32 t:valtype)*, n is number and t is type
391
- let loctypes = locals.reduce((a, type) => (type == a[a.length - 1]?.[1] ? a[a.length - 1][0]++ : a.push([1, type]), a), [])
378
+ return [DEFTYPE[kind], ...details]
379
+ },
392
380
 
393
- // https://webassembly.github.io/spec/core/binary/modules.html#code-section
394
- ctx.code[idx] = vec([...uleb(loctypes.length), ...loctypes.flatMap(([n, t]) => [...uleb(n), t]), ...bytes])
381
+ // (import "math" "add" (func|table|global|memory typedef?))
382
+ ([mod, field, [kind, ...dfn]], ctx) => {
383
+ let details
384
+
385
+ if (kind === 'func') {
386
+ // we track imported funcs in func section to share namespace, and skip them on final build
387
+ let [[, typeidx]] = dfn
388
+ details = uleb(id(typeidx, ctx.type))
389
+ }
390
+ else if (kind === 'memory') {
391
+ details = limits(dfn)
392
+ }
393
+ else if (kind === 'global') {
394
+ details = fieldtype(dfn[0], ctx)
395
+ }
396
+ else if (kind === 'table') {
397
+ details = [...reftype(dfn.pop(), ctx), ...limits(dfn)]
398
+ }
399
+ else err(`Unknown kind ${kind}`)
400
+
401
+ return ([...vec(str(mod.slice(1, -1))), ...vec(str(field.slice(1, -1))), KIND[kind], ...details])
395
402
  },
396
403
 
397
- // (table id? 1 2? funcref)
398
- table(idx, [...node], ctx) {
399
- ctx.table[idx] = [TYPE[node.pop()], ...limits(node)]
404
+ // (func $name? ...params result ...body)
405
+ ([[, typeidx]], ctx) => (uleb(id(typeidx, ctx.type))),
406
+
407
+ // (table 1 2 funcref)
408
+ (node, ctx) => {
409
+ let lims = limits(node), t = reftype(node.shift(), ctx), [init] = node
410
+ return init ? [0x40, 0x00, ...t, ...lims, ...expr(init, ctx)] : [...t, ...lims]
400
411
  },
401
412
 
402
413
  // (memory id? export* min max shared)
403
- memory(idx, [...node], ctx) {
404
- ctx.memory[idx] = limits(node)
405
- },
414
+ (node, ctx) => limits(node),
406
415
 
407
416
  // (global $id? (mut i32) (i32.const 42))
408
- global(idx, [...node], ctx) {
409
- let [type] = node, mut = type[0] === 'mut' ? 1 : 0
410
-
411
- let [, [...init]] = node
412
- ctx.global[idx] = [TYPE[mut ? type[1] : type], mut, ...expr(init, ctx), 0x0b]
413
- },
417
+ ([t, init], ctx) => [...fieldtype(t, ctx), ...expr(init, ctx)],
414
418
 
415
419
  // (export "name" (func|table|mem $name|idx))
416
- export(_, [nm, [kind, id]], ctx) {
417
- // put placeholder to future-init
418
- let idx = id[0] === '$' ? ctx[kind][id] : +id
419
- ctx.export.push([...str(nm), KIND[kind], ...uleb(idx)])
420
- },
420
+ ([nm, [kind, l]], ctx) => ([...vec(str(nm.slice(1, -1))), KIND[kind], ...uleb(id(l, ctx[kind]))]),
421
421
 
422
422
  // (start $main)
423
- start(_,[id], ctx) {
424
- id = id[0] === '$' ? ctx.func[id] : +id
425
- ctx.start[0] = uleb(id)
426
- },
423
+ ([l], ctx) => uleb(id(l, ctx.func)),
427
424
 
425
+ // (elem elem*) - passive
426
+ // (elem declare elem*) - declarative
427
+ // (elem (table idx)? (offset expr)|(expr) elem*) - active
428
428
  // ref: https://webassembly.github.io/spec/core/binary/modules.html#element-section
429
- // passive: (elem elem*)
430
- // declarative: (elem declare elem*)
431
- // active: (elem (table idx)? (offset expr)|(expr) elem*)
432
- // elems: funcref|externref (item expr)|expr (item expr)|expr
433
- // idxs: func? $id0 $id1
434
- elem(idx,[...parts], ctx) {
435
- let tabidx, offset, mode = 0b000, reftype
429
+ (parts, ctx) => {
430
+ let passive = 0, declare = 0, elexpr = 0, nofunc = 0, tabidx, offset, rt
436
431
 
437
432
  // declare?
438
- if (parts[0] === 'declare') parts.shift(), mode |= 0b010
433
+ if (parts[0] === 'declare') parts.shift(), declare = 1
439
434
 
440
435
  // table?
441
436
  if (parts[0][0] === 'table') {
442
437
  [, tabidx] = parts.shift()
443
- tabidx = tabidx[0] === '$' ? ctx.table[tabidx] : +tabidx
444
- // ignore table=0
445
- if (tabidx) mode |= 0b010
438
+ tabidx = id(tabidx, ctx.table)
446
439
  }
447
440
 
448
441
  // (offset expr)|expr
449
442
  if (parts[0]?.[0] === 'offset' || (Array.isArray(parts[0]) && parts[0][0] !== 'item' && !parts[0][0].startsWith('ref'))) {
450
- [...offset] = parts.shift()
451
- if (offset[0] === 'offset') [, [...offset]] = offset
443
+ offset = parts.shift()
444
+ if (offset[0] === 'offset') [, offset] = offset
445
+ offset = expr(offset, ctx)
452
446
  }
453
- else mode |= 0b001 // passive
447
+ // no offset = passive
448
+ else if (!declare) passive = 1
454
449
 
455
- // funcref|externref|func
456
- if (parts[0]?.[0]!=='$') reftype = parts.shift()
457
- // externref makes explicit table index
458
- if (reftype === 'externref') offset ||= ['i32.const', 0], mode = 0b110
450
+ // funcref|externref|(ref ...)
451
+ if (REFTYPE[parts[0]] || parts[0]?.[0] === 'ref') rt = reftype(parts.shift(), ctx)
452
+ // func ... abbr https://webassembly.github.io/function-references/core/text/modules.html#id7
453
+ else if (parts[0] === 'func') rt = [HEAPTYPE[parts.shift()]]
454
+ // or anything else
455
+ else rt = [HEAPTYPE.func]
459
456
 
460
- // reset to simplest mode if no actual elements
461
- if (!parts.length) mode &= 0b011
462
-
463
- // simplify els
457
+ // deabbr els sequence, detect expr usage
464
458
  parts = parts.map(el => {
465
- if (el[0] === 'item') [, el] = el
459
+ if (el[0] === 'item') [, ...el] = el
466
460
  if (el[0] === 'ref.func') [, el] = el
467
- // (ref.null func) and other expressions
468
- if (typeof el !== 'string') mode |= 0b100
461
+ // (ref.null func) and other expressions turn expr els mode
462
+ if (typeof el !== 'string') elexpr = 1
469
463
  return el
470
464
  })
471
465
 
472
- ctx.elem[idx] = ([
466
+ // reftype other than (ref null? func) forces table index via nofunc flag
467
+ // also it forces elexpr
468
+ if (rt[0] !== REFTYPE.funcref) nofunc = 1, elexpr = 1
469
+
470
+ // mode:
471
+ // bit 0 indicates a passive or declarative segment
472
+ // bit 1 indicates the presence of an explicit table index for an active segment
473
+ // and otherwise distinguishes passive from declarative segments
474
+ // bit 2 indicates the use of element type and element expressions instead of elemkind=0x00 and element indices.
475
+ let mode = (elexpr << 2) | ((passive || declare ? declare : (!!tabidx || nofunc)) << 1) | (passive || declare);
476
+
477
+ return ([
473
478
  mode,
474
479
  ...(
475
- // 0b000 e:expr y*:vec(funcidx) | type=funcref, init ((ref.func y)end)*, active (table=0,offset=e)
476
- mode === 0b000 ? [...expr(offset, ctx), 0x0b] :
480
+ // 0b000 e:expr y*:vec(funcidx) | type=(ref func), init ((ref.func y)end)*, active (table=0,offset=e)
481
+ mode === 0b000 ? offset :
477
482
  // 0b001 et:elkind y*:vec(funcidx) | type=0x00, init ((ref.func y)end)*, passive
478
483
  mode === 0b001 ? [0x00] :
479
484
  // 0b010 x:tabidx e:expr et:elkind y*:vec(funcidx) | type=0x00, init ((ref.func y)end)*, active (table=x,offset=e)
480
- mode === 0b010 ? [...uleb(tabidx || 0), ...expr(offset), 0x0b, 0x00] :
485
+ mode === 0b010 ? [...uleb(tabidx || 0), ...offset, 0x00] :
481
486
  // 0b011 et:elkind y*:vec(funcidx) | type=0x00, init ((ref.func y)end)*, passive declare
482
487
  mode === 0b011 ? [0x00] :
483
- // 0b100 e:expr el*:vec(expr) | type=funcref, init el*, active (table=0, offset=e)
484
- mode === 0b100 ? [...expr(offset, ctx), 0x0b] :
488
+ // 0b100 e:expr el*:vec(expr) | type=(ref null func), init el*, active (table=0, offset=e)
489
+ mode === 0b100 ? offset :
485
490
  // 0b101 et:reftype el*:vec(expr) | type=et, init el*, passive
486
- mode === 0b101 ? [TYPE[reftype]] :
491
+ mode === 0b101 ? rt :
487
492
  // 0b110 x:tabidx e:expr et:reftype el*:vec(expr) | type=et, init el*, active (table=x, offset=e)
488
- mode === 0b110 ? [...uleb(tabidx || 0), ...expr(offset), 0x0b, TYPE[reftype]] :
493
+ mode === 0b110 ? [...uleb(tabidx || 0), ...offset, ...rt] :
489
494
  // 0b111 et:reftype el*:vec(expr) | type=et, init el*, passive declare
490
- [TYPE[reftype]]
495
+ rt
491
496
  ),
492
- ...uleb(parts.length),
493
- ...parts.flatMap(mode & 0b100 ?
494
- // ((ref.func y)end)*
495
- el => [...expr(typeof el === 'string' ? ['ref.func', el] : [...el], ctx), 0x0b] :
496
- // el*
497
- el => uleb(el[0] === '$' ? ctx.func[el] : +el)
497
+ ...vec(
498
+ parts.map(elexpr ?
499
+ // ((ref.func y)end)*
500
+ el => expr(typeof el === 'string' ? ['ref.func', el] : el, ctx) :
501
+ // el*
502
+ el => uleb(id(el, ctx.func))
503
+ )
498
504
  )
499
505
  ])
500
506
  },
501
507
 
508
+ // (code)
509
+ (body, ctx) => {
510
+ let [typeidx, param] = body.shift()
511
+ if (!param) [, [param]] = ctx.type[id(typeidx, ctx.type)]
512
+
513
+ // provide param/local in ctx
514
+ ctx.local = Object.create(param) // list of local variables - some of them are params
515
+ ctx.block = [] // control instructions / blocks stack
516
+
517
+ // display names for error messages
518
+ ctx.local.name = 'local'
519
+ ctx.block.name = 'block'
520
+
521
+ // collect locals
522
+ while (body[0]?.[0] === 'local') {
523
+ let [, ...types] = body.shift()
524
+ if (types[0]?.[0] === '$') {
525
+ let name = types.shift()
526
+ if (name in ctx.local) err(`Duplicate local ${name}`)
527
+ else ctx.local[name] = ctx.local.length
528
+ }
529
+ ctx.local.push(...types)
530
+ }
531
+
532
+ const bytes = []
533
+ while (body.length) bytes.push(...instr(body, ctx))
534
+ bytes.push(0x0b)
535
+
536
+ // squash locals into (n:u32 t:valtype)*, n is number and t is type
537
+ // we skip locals provided by params
538
+ let loctypes = ctx.local.slice(param.length).reduce((a, type) => (type == a[a.length - 1]?.[1] ? a[a.length - 1][0]++ : a.push([1, type]), a), [])
539
+
540
+ // cleanup tmp state
541
+ ctx.local = ctx.block = null
542
+
543
+ // https://webassembly.github.io/spec/core/binary/modules.html#code-section
544
+ return vec([...vec(loctypes.map(([n, t]) => [...uleb(n), ...reftype(t, ctx)])), ...bytes])
545
+ },
546
+
502
547
  // (data (i32.const 0) "\aa" "\bb"?)
503
548
  // (data (memory ref) (offset (i32.const 0)) "\aa" "\bb"?)
504
549
  // (data (global.get $x) "\aa" "\bb"?)
505
- data(idx, [...inits], ctx) {
506
- let offset, mem = [0]
550
+ (inits, ctx) => {
551
+ let offset, memidx = 0
507
552
 
508
553
  // (memory ref)?
509
554
  if (inits[0]?.[0] === 'memory') {
510
- [, mem] = inits.shift()
511
- mem = mem[0] === '$' ? ctx.memory[mem] : +mem
512
- mem = !mem ? [0] : [2, ...uleb(mem)]
555
+ [, memidx] = inits.shift()
556
+ memidx = id(memidx, ctx.memory)
513
557
  }
514
558
 
515
559
  // (offset (i32.const 0)) or (i32.const 0)
516
560
  if (typeof inits[0] !== 'string') {
517
561
  offset = inits.shift()
518
562
  if (offset[0] === 'offset') [, offset] = offset
563
+ offset ?? err('Bad offset', offset)
519
564
  }
520
- else offset = ['i32.const', 0]
521
- ctx.data[idx] = [...mem, ...expr([...offset], ctx), 0x0b, ...str(inits.map(i => i.slice(1, -1)).join(''))]
522
- }
523
- }
524
565
 
525
- // serialize binary array
526
- const vec = a => [...uleb(a.length), ...a]
566
+ return ([
567
+ ...(
568
+ // active: 2, x=memidx, e=expr
569
+ memidx ? [2, ...uleb(memidx), ...expr(offset, ctx)] :
570
+ // active: 0, e=expr
571
+ offset ? [0, ...expr(offset, ctx)] :
572
+ // passive: 1
573
+ [1]
574
+ ),
575
+ ...vec(str(inits.map(i => i.slice(1, -1)).join('')))
576
+ ])
577
+ },
578
+
579
+ // datacount
580
+ (nodes, ctx) => uleb(ctx.data.length)
581
+ ]
582
+
583
+ // build reftype, either direct absheaptype or wrapped heaptype https://webassembly.github.io/gc/core/binary/types.html#reference-types
584
+ const reftype = (t, ctx) => (
585
+ t[0] === 'ref' ?
586
+ t[1] == 'null' ?
587
+ HEAPTYPE[t[2]] ? [HEAPTYPE[t[2]]] : [REFTYPE.refnull, ...uleb(id(t[t.length - 1], ctx.type))] :
588
+ [TYPE.ref, ...uleb(HEAPTYPE[t[t.length - 1]] || id(t[t.length - 1], ctx.type))] :
589
+ // abbrs
590
+ [TYPE[t] ?? err(`Unknown type ${t}`)]
591
+ );
527
592
 
528
- // instantiation time const initializer (consuming)
529
- const expr = (node, ctx) => {
530
- let op = node.shift(), [type, cmd] = op.split('.')
593
+ // build type with mutable flag (mut t) or t
594
+ const fieldtype = (t, ctx, mut = t[0] === 'mut' ? 1 : 0) => [...reftype(mut ? t[1] : t, ctx), mut];
531
595
 
532
- // (global.get idx)
533
- if (type === 'global') return [0x23, ...uleb(node[0][0] === '$' ? ctx.global[node[0]] : +node)]
534
596
 
535
- // (v128.const i32x4 1 2 3 4)
536
- if (type === 'v128') return [0xfd, 0x0c, ...v128(node)]
537
597
 
538
- // (i32.const 1)
539
- if (cmd === 'const') return [0x41 + ['i32', 'i64', 'f32', 'f64'].indexOf(type), ...encode[type](node[0])]
598
+ // consume one instruction from nodes sequence
599
+ const instr = (nodes, ctx) => {
600
+ if (!nodes?.length) return []
540
601
 
541
- // (ref.func $x) or (ref.null func|extern)
542
- if (type === 'ref') {
543
- return cmd === 'func' ?
544
- [0xd2, ...uleb(node[0][0] === '$' ? ctx.func[node[0]] : +node)] :
545
- // heaptype
546
- [0xd0, TYPE[node[0] + 'ref']] // func->funcref, extern->externref
602
+ let out = [], op = nodes.shift(), immed, code
603
+
604
+ // consume group
605
+ if (Array.isArray(op)) {
606
+ immed = instr(op, ctx)
607
+ while (op.length) out.push(...instr(op, ctx))
608
+ out.push(...immed)
609
+ return out
547
610
  }
548
611
 
549
- // (i32.add a b), (i32.mult a b) etc
550
- return [
551
- ...expr(node.shift(), ctx),
552
- ...expr(node.shift(), ctx),
553
- INSTR[op]
554
- ]
555
- }
612
+ [...immed] = isNaN(op[0]) && INSTR[op] || err(`Unknown instruction ${op}`)
613
+ code = immed[0]
614
+
615
+ // gc-related
616
+ // https://webassembly.github.io/gc/core/binary/instructions.html#reference-instructions
617
+ if (code === 0x0fb) {
618
+ [, code] = immed
619
+
620
+ // struct.new $t ... array.set $t
621
+ if ((code >= 0 && code <= 14) || (code >= 16 && code <= 19)) {
622
+ let tidx = id(nodes.shift(), ctx.type)
623
+ immed.push(...uleb(tidx))
624
+
625
+ // struct.get|set* $t $f - read field by index from struct definition (ctx.type[structidx][dfnidx])
626
+ if (code >= 2 && code <= 5) immed.push(...uleb(id(nodes.shift(), ctx.type[tidx][1])))
627
+ // array.new_fixed $t n
628
+ else if (code === 8) immed.push(...uleb(nodes.shift()))
629
+ // array.new_data|init_data $t $d
630
+ else if (code === 9 || code === 18) immed.push(...uleb(id(nodes.shift(), ctx.data)))
631
+ // array.new_elem|init_elem $t $e
632
+ else if (code === 10 || code === 19) immed.push(...uleb(id(nodes.shift(), ctx.elem)))
633
+ // array.copy $t $t
634
+ else if (code === 17) immed.push(...uleb(id(nodes.shift(), ctx.type)))
635
+ }
636
+ // ref.test|cast (ref null? $t|heaptype)
637
+ else if (code >= 20 && code <= 23) {
638
+ // FIXME: normalizer is supposed to resolve this
639
+ let ht = reftype(nodes.shift(), ctx)
640
+ if (ht[0] !== REFTYPE.ref) immed.push(code = immed.pop()+1) // ref.test|cast (ref null $t) is next op
641
+ if (ht.length > 1) ht.shift() // pop ref
642
+ immed.push(...ht)
643
+ }
644
+ // br_on_cast[_fail] $l? (ref null? ht1) (ref null? ht2)
645
+ // FIXME: normalizer should resolve anyref|etc to (ref null any|etc)
646
+ else if (code === 24 || code === 25) {
647
+ let i = blockid(nodes.shift(), ctx.block),
648
+ ht1 = reftype(nodes.shift(), ctx),
649
+ ht2 = reftype(nodes.shift(), ctx),
650
+ castflags = ((ht2[0] !== REFTYPE.ref) << 1) | (ht1[0] !== REFTYPE.ref)
651
+ immed.push(castflags, ...uleb(i), ht1.pop(), ht2.pop()) // we take only abstype or
652
+ }
653
+ }
654
+
655
+ // bulk memory: (memory.init) (memory.copy) (data.drop) (memory.fill)
656
+ // table ops: (table.init|copy|grow|size|fill) (elem.drop)
657
+ // https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md#instruction-encoding
658
+ else if (code == 0xfc) {
659
+ [, code] = immed
660
+
661
+ // memory.init idx, data.drop idx,
662
+ if (code === 0x08 || code === 0x09) {
663
+ immed.push(...uleb(id(nodes.shift(), ctx.data)))
664
+ }
556
665
 
557
- // (v128.const i32x4 1 2 3 4)
558
- const v128 = (args) => {
559
- let [t, n] = args.shift().split('x'),
560
- stride = t.slice(1) >>> 3 // i16 -> 2, f32 -> 4
666
+ // memory placeholders
667
+ if (code == 0x08 || code == 0x0b) immed.push(0)
668
+ else if (code === 0x0a) immed.push(0, 0)
561
669
 
562
- n = +n
670
+ // elem.drop elemidx
671
+ if (code === 0x0d) {
672
+ immed.push(...uleb(id(nodes.shift(), ctx.elem)))
673
+ }
674
+ // table.init tableidx elemidx -> 0xfc 0x0c elemidx tableidx
675
+ else if (code === 0x0c) {
676
+ immed.push(...uleb(id(nodes[1], ctx.elem)), ...uleb(id(nodes.shift(), ctx.table)))
677
+ nodes.shift()
678
+ }
679
+ // table.* tableidx?
680
+ // abbrs https://webassembly.github.io/spec/core/text/instructions.html#id1
681
+ else if (code >= 0x0c && code < 0x13) {
682
+ immed.push(...uleb(id(nodes.shift(), ctx.table)))
683
+ // table.copy tableidx? tableidx?
684
+ if (code === 0x0e) immed.push(...uleb(id(nodes.shift(), ctx.table)))
685
+ }
686
+ }
687
+
688
+ // v128s: (v128.load x) etc
689
+ // https://github.com/WebAssembly/simd/blob/master/proposals/simd/BinarySIMD.md
690
+ else if (code === 0xfd) {
691
+ [, code] = immed
692
+ immed = [0xfd, ...uleb(code)]
693
+ // (v128.load offset? align?)
694
+ if (code <= 0x0b) {
695
+ const [a, o] = memarg(nodes)
696
+ immed.push(...uleb((a ?? align(op))), ...uleb(o ?? 0))
697
+ }
698
+ // (v128.load_lane offset? align? idx)
699
+ else if (code >= 0x54 && code <= 0x5d) {
700
+ const [a, o] = memarg(nodes)
701
+ immed.push(...uleb((a ?? align(op))), ...uleb(o ?? 0))
702
+ // (v128.load_lane_zero)
703
+ if (code <= 0x5b) immed.push(...uleb(nodes.shift()))
704
+ }
705
+ // (i8x16.shuffle 0 1 ... 15 a b)
706
+ else if (code === 0x0d) {
707
+ // i8, i16, i32 - bypass the encoding
708
+ for (let i = 0; i < 16; i++) immed.push(parseUint(nodes.shift(), 32))
709
+ }
710
+ // (v128.const i32x4 1 2 3 4)
711
+ else if (code === 0x0c) {
712
+ let [t, n] = nodes.shift().split('x'),
713
+ bits = +t.slice(1),
714
+ stride = bits >>> 3 // i16 -> 2, f32 -> 4
715
+ n = +n
716
+ // i8, i16, i32 - bypass the encoding
717
+ if (t[0] === 'i') {
718
+ let arr = n === 16 ? new Uint8Array(16) : n === 8 ? new Uint16Array(8) : n === 4 ? new Uint32Array(4) : new BigUint64Array(2)
719
+ for (let i = 0; i < n; i++) {
720
+ let s = nodes.shift(), v = encode[t].parse(s)
721
+ arr[i] = v
722
+ }
723
+ immed.push(...(new Uint8Array(arr.buffer)))
724
+ }
725
+ // f32, f64 - encode
726
+ else {
727
+ let arr = new Uint8Array(16)
728
+ for (let i = 0; i < n; i++) {
729
+ let s = nodes.shift(), v = encode[t](s)
730
+ arr.set(v, i * stride)
731
+ }
732
+ immed.push(...arr)
733
+ }
734
+ }
735
+ // (i8x16.extract_lane_s 0 ...)
736
+ else if (code >= 0x15 && code <= 0x22) {
737
+ immed.push(...uleb(parseUint(nodes.shift())))
738
+ }
739
+ }
563
740
 
564
- // i8, i16, i32 - bypass the encoding
565
- if (t[0] === 'i') {
566
- let arr = n === 16 ? new Uint8Array(16) : n === 8 ? new Uint16Array(8) : n === 4 ? new Uint32Array(4) : new BigInt64Array(2)
567
- for (let i = 0; i < n; i++) {
568
- arr[i] = encode[t].parse(args.shift())
741
+ // control block abbrs
742
+ // block ..., loop ..., if ...
743
+ else if (code === 2 || code === 3 || code === 4) {
744
+ ctx.block.push(code)
745
+
746
+ // (block $x) (loop $y) - save label pointer
747
+ // FIXME: do in normalizer
748
+ if (nodes[0]?.[0] === '$') ctx.block[nodes.shift()] = ctx.block.length
749
+
750
+ let t = nodes.shift();
751
+
752
+ // void
753
+ if (!t) immed.push(TYPE.void)
754
+ // (result i32) - doesn't require registering type
755
+ // FIXME: Make sure it is signed positive integer (leb, not uleb) https://webassembly.github.io/gc/core/binary/instructions.html#control-instructions
756
+ else if (t[0] === 'result') immed.push(...reftype(t[1], ctx))
757
+ else {
758
+ let typeidx = id(t[1], ctx.type), [param, result] = ctx.type[typeidx][1]
759
+ // (type $idx (func (result i32)))
760
+ if (!param?.length && result.length === 1) immed.push(...reftype(result[0], ctx))
761
+ // (type idx)
762
+ else immed.push(...uleb(typeidx))
569
763
  }
570
- return new Uint8Array(arr.buffer)
764
+ }
765
+ // else
766
+ else if (code === 5) { }
767
+ // then
768
+ else if (code === 6) immed = [] // ignore
769
+
770
+ // local.get $id, local.tee $id x
771
+ else if (code == 0x20 || code == 0x21 || code == 0x22) {
772
+ immed.push(...uleb(id(nodes.shift(), ctx.local)))
571
773
  }
572
774
 
573
- // f32, f64 - encode
574
- let arr = new Uint8Array(16)
575
- for (let i = 0; i < n; i++) {
576
- arr.set(encode[t](args.shift()), i * stride)
775
+ // global.get $id, global.set $id
776
+ else if (code == 0x23 || code == 0x24) {
777
+ immed.push(...uleb(id(nodes.shift(), ctx.global)))
577
778
  }
578
779
 
579
- return arr
580
- }
780
+ // call $func ...nodes
781
+ // return_call $func
782
+ else if (code == 0x10 || code == 0x12) {
783
+ immed.push(...uleb(id(nodes.shift(), ctx.func)))
784
+ }
581
785
 
582
- // https://webassembly.github.io/spec/core/text/modules.html#type-uses
583
- // consume (type $id|id) (param t+)* (result t+)*
584
- const typeuse = (nodes, ctx) => {
585
- let idx, param, result, alias
786
+ // call_indirect $table (type $typeName) ...nodes
787
+ // return_call_indirect $table (type $typeName) ... nodes
788
+ else if (code == 0x11 || code == 0x13) {
789
+ immed.push(
790
+ ...uleb(id(nodes[1][1], ctx.type)),
791
+ ...uleb(id(nodes.shift(), ctx.table))
792
+ )
793
+ nodes.shift()
794
+ }
586
795
 
587
- // existing/new type (type 0|$name)
588
- if (nodes[0]?.[0] === 'type') {
589
- [, idx] = nodes.shift();
796
+ // call_ref $type
797
+ // return_call_ref $type
798
+ else if (code == 0x14 || code == 0x15) {
799
+ immed.push(...uleb(id(nodes.shift(), ctx.type)))
800
+ }
801
+
802
+ // end
803
+ else if (code == 0x0b) ctx.block.pop()
590
804
 
591
- // (type 0), (type $n) - existing type
592
- if (ctx.type[idx] != null) {
593
- paramres(nodes);
594
- if (idx[0] === '$') idx = ctx.type[idx];
595
- ({ param, result } = ctx.type[idx] ?? err('Bad type ' + idx));
596
- return [+idx, param, result]
805
+ // br $label result?
806
+ // br_if $label cond result?
807
+ // br_on_null $l, br_on_non_null $l
808
+ else if (code == 0x0c || code == 0x0d || code == 0xd5 || code == 0xd6) {
809
+ immed.push(...uleb(blockid(nodes.shift(), ctx.block)))
810
+ }
811
+
812
+ // br_table 1 2 3 4 0 selector result?
813
+ else if (code == 0x0e) {
814
+ let args = []
815
+ while (nodes[0] && (!isNaN(nodes[0]) || nodes[0][0] === '$')) {
816
+ args.push(...uleb(blockid(nodes.shift(), ctx.block)))
597
817
  }
818
+ args.unshift(...uleb(args.length - 1))
819
+ immed.push(...args)
598
820
  }
599
821
 
600
- // if new type - find existing match
601
- ;[param, result] = paramres(nodes), alias = param + '>' + result
602
- // or register new type
603
- if (ctx.type[alias] == null) {
604
- build.type(ctx.type.length, [[, ['param', ...param], ['result', ...result]]], ctx)
822
+ // select (result t+)
823
+ else if (code == 0x1b) {
824
+ let result = nodes.shift()
825
+ // 0x1b -> 0x1c
826
+ if (result.length) immed.push(immed.pop() + 1, ...vec(result.map(t => reftype(t, ctx))))
605
827
  }
606
828
 
607
- return [ctx.type[alias], param, result]
608
- }
829
+ // ref.func $id
830
+ else if (code == 0xd2) {
831
+ immed.push(...uleb(id(nodes.shift(), ctx.func)))
832
+ }
609
833
 
610
- // consume (param t+)* (result t+)* sequence
611
- const paramres = (nodes) => {
612
- let param = [], result = []
834
+ // ref.null func
835
+ else if (code == 0xd0) {
836
+ let t = nodes.shift()
837
+ immed.push(...(HEAPTYPE[t] ? [HEAPTYPE[t]] : uleb(id(t, ctx.type)))) // func->funcref, extern->externref
838
+ }
613
839
 
614
- // collect param (param i32 i64) (param $x? i32)
615
- while (nodes[0]?.[0] === 'param') {
616
- let [, ...args] = nodes.shift()
617
- let name = args[0]?.[0] === '$' && args.shift()
618
- if (name) param[name] = param.length // expose name refs
619
- param.push(...args)
840
+ // binary/unary (i32.add a b) - no immed
841
+ else if (code >= 0x45) { }
842
+
843
+ // i32.store align=n offset=m
844
+ else if (code >= 0x28 && code <= 0x3e) {
845
+ let [a, o] = memarg(nodes)
846
+ immed.push(...uleb((a ?? align(op))), ...uleb(o ?? 0))
620
847
  }
621
848
 
622
- // collect result eg. (result f64 f32)(result i32)
623
- while (nodes[0]?.[0] === 'result') {
624
- let [, ...args] = nodes.shift()
625
- result.push(...args)
849
+ // i32.const 123, f32.const 123.45
850
+ else if (code >= 0x41 && code <= 0x44) {
851
+ immed.push(...encode[op.split('.')[0]](nodes.shift()))
626
852
  }
627
853
 
628
- return [param, result]
854
+ // memory.grow|size $idx - mandatory 0x00
855
+ // https://webassembly.github.io/spec/core/binary/instructions.html#memory-instructions
856
+ else if (code == 0x3f || code == 0x40) {
857
+ immed.push(0)
858
+ }
859
+
860
+ // table.get|set $id
861
+ else if (code == 0x25 || code == 0x26) {
862
+ immed.push(...uleb(id(nodes.shift(), ctx.table)))
863
+ }
864
+
865
+ out.push(...immed)
866
+
867
+ return out
629
868
  }
630
869
 
631
- // consume align/offset/etc params
870
+ // instantiation time value initializer (consuming) - we redirect to instr
871
+ const expr = (node, ctx) => [...instr([node], ctx), 0x0b]
872
+
873
+ // deref id node to numeric idx
874
+ const id = (nm, list, n) => (n = nm[0] === '$' ? list[nm] : +nm, n in list ? n : err(`Unknown ${list.name} ${nm}`))
875
+
876
+ // block id - same as id but for block
877
+ // index indicates how many block items to pop
878
+ const blockid = (nm, block, i) => (
879
+ i = nm?.[0] === '$' ? block.length - block[nm] : +nm,
880
+ isNaN(i) || i > block.length ? err(`Bad label ${nm}`) : i
881
+ )
882
+
883
+ // consume align/offset params
632
884
  const memarg = (args) => {
633
- let ao = {}, kv
634
- while (args[0]?.includes('=')) kv = args.shift().split('='), ao[kv[0]] = Number(kv[1])
635
- return ao
885
+ let align, offset, k, v
886
+ while (args[0]?.includes('=')) [k, v] = args.shift().split('='), k === 'offset' ? offset = +v : k === 'align' ? align = +v : err(`Unknown param ${k}=${v}`)
887
+
888
+ if (offset < 0 || offset > 0xffffffff) err(`Bad offset ${offset}`)
889
+ if (align <= 0 || align > 0xffffffff) err(`Bad align ${align}`)
890
+ if (align) ((align = Math.log2(align)) % 1) && err(`Bad align ${align}`)
891
+ return [align, offset]
892
+ }
893
+
894
+ // const ALIGN = {
895
+ // 'i32.load': 4, 'i64.load': 8, 'f32.load': 4, 'f64.load': 8,
896
+ // 'i32.load8_s': 1, 'i32.load8_u': 1, 'i32.load16_s': 2, 'i32.load16_u': 2,
897
+ // 'i64.load8_s': 1, 'i64.load8_u': 1, 'i64.load16_s': 2, 'i64.load16_u': 2, 'i64.load32_s': 4, 'i64.load32_u': 4, 'i32.store': 4,
898
+ // 'i64.store': 8, 'f32.store': 4, 'f64.store': 8, 'i32.store8': 1, 'i32.store16': 2, 'i64.store8': 1, 'i64.store16': 2, 'i64.store32': 4,
899
+ // 'v128.load': 16, 'v128.load8x8_s': 8, 'v128.load8x8_u': 8, 'v128.load16x4_s': 8, 'v128.load16x4_u': 8, 'v128.load32x2_s': 8, 'v128.load32x2_u': 8, 'v128.load8_splat': 1, 'v128.load16_splat': 2, 'v128.load32_splat': 4, 'v128.load64_splat': 8, 'v128.store': 16,
900
+ // 'v128.load': 16, 'v128.load8_lane': 1, 'v128.load16_lane': 2, 'v128.load32_lane': 4, 'v128.load64_lane': 8, 'v128.store8_lane': 1, 'v128.store16_lane': 2, 'v128.store32_lane': 4, 'v128.store64_lane': 8, 'v128.load32_zero': 4, 'v128.load64_zero': 8
901
+ // }
902
+ const align = (op) => {
903
+ let [group, opname] = op.split('.'); // v128.load8x8_u -> group = v128, opname = load8x8_u
904
+ let [lsize] = (opname[0] === 'l' ? opname.slice(4) : opname.slice(5)).split('_') // load8x8_u -> lsize = 8x8
905
+ let [size, x] = lsize ? lsize.split('x') : [group.slice(1)] // 8x8 -> size = 8
906
+ return Math.log2(x ? 8 : +size / 8)
636
907
  }
637
908
 
909
+ // build limits sequence (consuming)
910
+ const limits = (node) => (
911
+ isNaN(parseInt(node[1])) ? [0, ...uleb(parseUint(node.shift()))] : [node[2] === 'shared' ? 3 : 1, ...uleb(parseUint(node.shift())), ...uleb(parseUint(node.shift()))]
912
+ )
913
+
914
+ // check if node is valid int in a range
915
+ // we put extra condition for index ints for tests complacency
916
+ const parseUint = (v, max = 0xFFFFFFFF) => (typeof v === 'string' && v[0] !== '+' ? (typeof max === 'bigint' ? i64 : i32).parse(v) : typeof v === 'number' ? v : err(`Bad int ${v}`)) > max ? err(`Value out of range ${v}`) : v
917
+
918
+
638
919
  // escape codes
639
- const escape = { n: 10, r: 13, t: 9, v: 1, '\\': 92 }
920
+ const escape = { n: 10, r: 13, t: 9, v: 1, '"': 34, "'": 39, '\\': 92 }
640
921
 
641
922
  // build string binary
642
923
  const str = str => {
643
- str = str[0] === '"' ? str.slice(1, -1) : str
644
924
  let res = [], i = 0, c, BSLASH = 92
645
925
  // https://webassembly.github.io/spec/core/text/values.html#strings
646
926
  for (; i < str.length;) {
647
927
  c = str.charCodeAt(i++)
648
928
  res.push(c === BSLASH ? escape[str[i++]] || parseInt(str.slice(i - 1, ++i), 16) : c)
649
929
  }
650
-
651
- return vec(res)
930
+ return res
652
931
  }
653
932
 
654
- // build limits sequence (non-consuming)
655
- const limits = ([min, max, shared]) => isNaN(parseInt(max)) ? [0, ...uleb(min)] : [shared === 'shared' ? 3 : 1, ...uleb(min), ...uleb(max)]
656
-
657
- const err = text => { throw Error(text) }
933
+ // serialize binary array
934
+ const vec = a => [...uleb(a.length), ...a.flat()]