watr 4.5.3 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/optimize.js CHANGED
@@ -18,13 +18,17 @@ const OPTS = {
18
18
  identity: true, // remove identity ops (x + 0 → x)
19
19
  strength: true, // strength reduction (x * 2 → x << 1)
20
20
  branch: true, // simplify constant branches
21
- propagate: false, // constant propagation can duplicate expressions
21
+ propagate: true, // forward-propagate single-use locals & tiny consts (never inflates)
22
22
  inline: false, // inline tiny functions — can duplicate bodies
23
+ inlineOnce: true, // inline single-call functions into their lone caller (never duplicates)
23
24
  vacuum: true, // remove nops, drop-of-pure, empty branches
25
+ mergeBlocks: true, // unwrap `(block $L …)` whose label is never targeted
26
+ coalesce: true, // share local slots between same-type non-overlapping locals
24
27
  peephole: true, // x-x→0, x&0→0, etc.
25
28
  globals: true, // propagate immutable global constants
26
29
  offset: true, // fold add+const into load/store offset
27
30
  unbranch: true, // remove redundant br at end of own block
31
+ loopify: true, // collapse block+loop+brif while-idiom into loop+if
28
32
  stripmut: true, // strip mut from never-written globals
29
33
  brif: true, // if-then-br → br_if
30
34
  foldarms: false, // merge identical trailing if arms — can add block wrapper
@@ -318,6 +322,18 @@ const treeshake = (ast) => {
318
322
  /** IEEE 754 roundTiesToEven (bankers' rounding) */
319
323
  const roundEven = (x) => x - Math.floor(x) !== 0.5 ? Math.round(x) : 2 * Math.round(x / 2)
320
324
 
325
+ // Bit-exact reinterpret helpers (preserve NaN payloads).
326
+ const _rb8 = new ArrayBuffer(8)
327
+ const _rf64 = new Float64Array(_rb8)
328
+ const _ri64 = new BigInt64Array(_rb8)
329
+ const _rb4 = new ArrayBuffer(4)
330
+ const _rf32 = new Float32Array(_rb4)
331
+ const _ri32 = new Int32Array(_rb4)
332
+ const i64FromF64 = (x) => { _rf64[0] = x; return _ri64[0] }
333
+ const f64FromI64 = (x) => { _ri64[0] = BigInt.asIntN(64, x); return _rf64[0] }
334
+ const i32FromF32 = (x) => { _rf32[0] = x; return _ri32[0] }
335
+ const f32FromI32 = (x) => { _ri32[0] = x | 0; return _rf32[0] }
336
+
321
337
  /** Build i32 comparison folder: returns 1/0 */
322
338
  const i32c = (fn) => (a, b) => fn(a, b) ? 1 : 0
323
339
  /** Build unsigned i32 comparison folder */
@@ -421,6 +437,24 @@ const FOLDABLE = {
421
437
  'f64.floor': [Math.floor, 'f64'],
422
438
  'f64.trunc': [Math.trunc, 'f64'],
423
439
  'f64.nearest': [roundEven, 'f64'],
440
+
441
+ // Bit-exact reinterprets (preserve NaN payloads)
442
+ 'i32.reinterpret_f32': [i32FromF32, 'i32'],
443
+ 'f32.reinterpret_i32': [f32FromI32, 'f32'],
444
+ 'i64.reinterpret_f64': [i64FromF64, 'i64'],
445
+ 'f64.reinterpret_i64': [f64FromI64, 'f64'],
446
+
447
+ // Numeric conversions (value-preserving where representable)
448
+ 'f32.convert_i32_s': [(a) => Math.fround(a | 0), 'f32'],
449
+ 'f32.convert_i32_u': [(a) => Math.fround(a >>> 0), 'f32'],
450
+ 'f32.convert_i64_s': [(a) => Math.fround(Number(BigInt.asIntN(64, a))), 'f32'],
451
+ 'f32.convert_i64_u': [(a) => Math.fround(Number(BigInt.asUintN(64, a))), 'f32'],
452
+ 'f64.convert_i32_s': [(a) => (a | 0), 'f64'],
453
+ 'f64.convert_i32_u': [(a) => (a >>> 0), 'f64'],
454
+ 'f64.convert_i64_s': [(a) => Number(BigInt.asIntN(64, a)), 'f64'],
455
+ 'f64.convert_i64_u': [(a) => Number(BigInt.asUintN(64, a)), 'f64'],
456
+ 'f32.demote_f64': [(a) => Math.fround(a), 'f32'],
457
+ 'f64.promote_f32': [(a) => Math.fround(a), 'f64'],
424
458
  }
425
459
 
426
460
  /**
@@ -855,8 +889,33 @@ const countLocalUses = (node) => {
855
889
  return counts
856
890
  }
857
891
 
858
- /** Can this tracked value be substituted for a local.get? */
859
- const canSubst = (k) => getConst(k.val) || (k.pure && k.singleUse)
892
+ /** A constant whose inlined form (opcode + immediate) is no wider than the ~2 B
893
+ * `local.get` it would replace so propagating it to every use is byte-neutral
894
+ * at worst, and still drops the `local.set` + the `local` decl. f32/f64 consts
895
+ * (5/9 B) lose on reuse, so only narrow i32/i64 literals qualify. */
896
+ const isTinyConst = (node) => {
897
+ const c = getConst(node)
898
+ if (!c) return false
899
+ if (c.type === 'i32') { const v = c.value | 0; return v >= -64 && v <= 63 }
900
+ if (c.type === 'i64') { const v = typeof c.value === 'bigint' ? c.value : BigInt(c.value); return v >= -64n && v <= 63n }
901
+ return false
902
+ }
903
+
904
+ /** Can this tracked value be substituted for a local.get?
905
+ * - single use of a pure value: always shrinks (drops the set, the lone get, the decl);
906
+ * - any use of a tiny constant: byte-neutral at worst, still drops the set + decl.
907
+ * Anything else (a wide constant reused many times, an impure expr) could inflate
908
+ * or reorder side effects, so it's left alone. */
909
+ const canSubst = (k) => (k.pure && k.singleUse) || isTinyConst(k.val)
910
+
911
+ /** Drop tracked values that read `$name`: rewriting `$name` makes them stale. */
912
+ const purgeRefs = (known, name) => {
913
+ for (const [key, tracked] of known) {
914
+ let refs = false
915
+ walk(tracked.val, n => { if (Array.isArray(n) && (n[0] === 'local.get' || n[0] === 'local.tee') && n[1] === name) refs = true })
916
+ if (refs) known.delete(key)
917
+ }
918
+ }
860
919
 
861
920
  /** Try substitute local.get nodes with known values */
862
921
  const substGets = (node, known) => walkPost(node, n => {
@@ -889,6 +948,7 @@ const forwardPropagate = (funcNode, params, useCounts) => {
889
948
  if ((op === 'local.set' || op === 'local.tee') && instr.length === 3 && typeof instr[1] === 'string') {
890
949
  substGets(instr[2], known) // substitute known values in RHS
891
950
  const uses = getUseCount(instr[1])
951
+ purgeRefs(known, instr[1]) // entries that read this local just went stale
892
952
  known.set(instr[1], {
893
953
  val: instr[2], pure: isPure(instr[2]),
894
954
  singleUse: uses.gets <= 1 && uses.sets <= 1 && uses.tees === 0
@@ -923,7 +983,7 @@ const forwardPropagate = (funcNode, params, useCounts) => {
923
983
  // (untracked) value, not the stale constant.
924
984
  walk(instr, n => {
925
985
  if (Array.isArray(n) && (n[0] === 'local.set' || n[0] === 'local.tee') && typeof n[1] === 'string')
926
- known.delete(n[1])
986
+ { known.delete(n[1]); purgeRefs(known, n[1]) }
927
987
  })
928
988
  }
929
989
  }
@@ -1024,6 +1084,10 @@ const eliminateDeadStores = (funcNode, params, useCounts) => {
1024
1084
  * Constants propagate to all uses; pure single-use exprs inline into get site.
1025
1085
  * Multi-pass with batch counting for convergence.
1026
1086
  */
1087
+ /** Block-like nodes whose body is a straight-line instruction list (after any header). */
1088
+ const isScopeNode = (n) => Array.isArray(n) &&
1089
+ (n[0] === 'func' || n[0] === 'block' || n[0] === 'loop' || n[0] === 'then' || n[0] === 'else')
1090
+
1027
1091
  const propagate = (ast) => {
1028
1092
  walk(ast, (funcNode) => {
1029
1093
  if (!Array.isArray(funcNode) || funcNode[0] !== 'func') return
@@ -1032,16 +1096,30 @@ const propagate = (ast) => {
1032
1096
  for (const sub of funcNode)
1033
1097
  if (Array.isArray(sub) && sub[0] === 'param' && typeof sub[1] === 'string') params.add(sub[1])
1034
1098
 
1035
- // useCounts must be refreshed before every sub-pass: each mutation
1036
- // (substitution, set/get pair removal, tee creation, dead-store removal)
1037
- // changes the gets/sets/tees totals that downstream sub-passes rely on.
1038
- for (let pass = 0; pass < 4; pass++) {
1039
- let changed = false
1040
- if (forwardPropagate(funcNode, params, countLocalUses(funcNode))) changed = true
1041
- if (eliminateSetGetPairs(funcNode, params, countLocalUses(funcNode))) changed = true
1042
- if (createLocalTees(funcNode, params, countLocalUses(funcNode))) changed = true
1043
- if (eliminateDeadStores(funcNode, params, countLocalUses(funcNode))) changed = true
1044
- if (!changed) break
1099
+ // Propagation runs per straight-line scope: the function body and every nested
1100
+ // `block`/`loop`/`then`/`else` (including ones embedded in an expression, e.g. the
1101
+ // `(block (result i32) …)` an inlined call leaves behind). Collect scopes deepest-
1102
+ // first so inner simplifications shrink the use-counts the outer scopes see.
1103
+ // Use-counts are always whole-function — a set/get pair or dead store is only
1104
+ // touched when it's globally the sole occurrence, so per-scope work stays sound.
1105
+ const scopes = []
1106
+ walkPost(funcNode, n => { if (isScopeNode(n)) scopes.push(n) })
1107
+
1108
+ // One use-count per round, shared by every scope: substitutions only ever
1109
+ // *drop* gets, so a stale count can only make a sub-pass act more cautiously
1110
+ // (skip a not-yet-provably-dead store, decline a not-yet-provably-single use) —
1111
+ // never wrongly. The next round re-counts and mops up. (Recounting per sub-pass
1112
+ // per scope is O(scopes·funcSize) and crippling on big modules.)
1113
+ for (let round = 0; round < 6; round++) {
1114
+ const useCounts = countLocalUses(funcNode)
1115
+ let progressed = false
1116
+ for (const scope of scopes) {
1117
+ if (forwardPropagate(scope, params, useCounts)) progressed = true
1118
+ if (eliminateSetGetPairs(scope, params, useCounts)) progressed = true
1119
+ if (createLocalTees(scope, params, useCounts)) progressed = true
1120
+ if (eliminateDeadStores(scope, params, useCounts)) progressed = true
1121
+ }
1122
+ if (!progressed) break
1045
1123
  }
1046
1124
  })
1047
1125
 
@@ -1154,6 +1232,407 @@ const inline = (ast) => {
1154
1232
  return ast
1155
1233
  }
1156
1234
 
1235
+ // ==================== INLINE-ONCE ====================
1236
+
1237
+ let inlineUid = 0
1238
+
1239
+ /**
1240
+ * Inline functions that are called from exactly one place into their lone caller,
1241
+ * then delete them. Unlike {@link inline} (which duplicates tiny stateless bodies),
1242
+ * this never duplicates code and never inflates: each inlined function drops a
1243
+ * function-section entry, a type-section entry (if now unused), and a `call`
1244
+ * instruction, paying back only a `block`/`local.set` wrapper. This is what
1245
+ * `wasm-opt -Oz` does — collapsing helper chains down to a couple of functions —
1246
+ * and it's the bulk of the gap between hand-tuned WASM and naive codegen.
1247
+ *
1248
+ * A function `$f` qualifies when it is, all of:
1249
+ * • named, with named params and locals (numeric indices can't be safely renamed);
1250
+ * • referenced exactly once across the whole module, by a plain `call` (no
1251
+ * `return_call`, `ref.func`, `elem`, `export`, or `start` reference, and not
1252
+ * recursive);
1253
+ * • single-result or void (a multi-value result can't be modeled as `(block (result …))`);
1254
+ * • free of numeric (depth-relative) branch labels — those would shift under the
1255
+ * extra block nesting — and of `return_call*` in its body.
1256
+ *
1257
+ * `(call $f a0 a1 …)` becomes
1258
+ * (block $__inlN (result T)?
1259
+ * (local.set $__inlN_p0 a0) (local.set $__inlN_p1 a1) … ;; args evaluated once, in order
1260
+ * …body, params/locals renamed to $__inlN_*, `return X` → `br $__inlN X`…)
1261
+ * and the renamed params+locals are appended to the caller's `local` decls; the
1262
+ * body's own block/loop/if labels are renamed too so they can't shadow the caller's.
1263
+ * Runs to a fixpoint so helper chains fully collapse.
1264
+ *
1265
+ * @param {Array} ast
1266
+ * @returns {Array}
1267
+ */
1268
+ const inlineOnce = (ast) => {
1269
+ if (!Array.isArray(ast) || ast[0] !== 'module') return ast
1270
+
1271
+ const HEAD = new Set(['export', 'type', 'param', 'result', 'local'])
1272
+ const bodyStart = (fn) => {
1273
+ let i = 2
1274
+ while (i < fn.length && (typeof fn[i] === 'string' || (Array.isArray(fn[i]) && HEAD.has(fn[i][0])))) i++
1275
+ return i
1276
+ }
1277
+ const isBranch = op => op === 'br' || op === 'br_if' || op === 'br_table'
1278
+ // A subtree we can't lift into a (block …): depth-relative branch labels (shift
1279
+ // under added nesting) or tail calls (would escape the wrapping block).
1280
+ const unsafe = (n) => {
1281
+ if (!Array.isArray(n)) return false
1282
+ const op = n[0]
1283
+ if (op === 'return_call' || op === 'return_call_indirect' || op === 'return_call_ref') return true
1284
+ if (op === 'try' || op === 'try_table' || op === 'delegate' || op === 'rethrow') return true // exception labels — not handled by the relabeler below
1285
+ if (isBranch(op)) for (let i = 1; i < n.length; i++) if (typeof n[i] === 'number' || (typeof n[i] === 'string' && /^\d+$/.test(n[i]))) return true
1286
+ for (let i = 1; i < n.length; i++) if (unsafe(n[i])) return true
1287
+ return false
1288
+ }
1289
+ const callsSelf = (n, name) => {
1290
+ if (!Array.isArray(n)) return false
1291
+ if ((n[0] === 'call' || n[0] === 'return_call') && n[1] === name) return true
1292
+ for (let i = 1; i < n.length; i++) if (callsSelf(n[i], name)) return true
1293
+ return false
1294
+ }
1295
+
1296
+ // Module-level references that pin a function (can't be removed/inlined-away).
1297
+ const collectPinned = (n, pinned) => {
1298
+ if (!Array.isArray(n)) return
1299
+ const op = n[0]
1300
+ if (op === 'export' && Array.isArray(n[2]) && n[2][0] === 'func' && typeof n[2][1] === 'string') pinned.add(n[2][1])
1301
+ else if (op === 'start' && typeof n[1] === 'string') pinned.add(n[1])
1302
+ else if (op === 'ref.func' && typeof n[1] === 'string') pinned.add(n[1])
1303
+ else if (op === 'elem') for (const c of n) if (typeof c === 'string' && c[0] === '$') pinned.add(c)
1304
+ for (const c of n) collectPinned(c, pinned)
1305
+ }
1306
+
1307
+ for (let round = 0; round < 16; round++) {
1308
+ const funcs = ast.filter(n => Array.isArray(n) && n[0] === 'func')
1309
+ const funcByName = new Map()
1310
+ for (const n of funcs) if (typeof n[1] === 'string') funcByName.set(n[1], n)
1311
+
1312
+ // Count plain-call references across the WHOLE module (anonymous exported funcs
1313
+ // call helpers too); flag any non-call reference (return_call etc.).
1314
+ const callRefs = new Map(), otherRef = new Set()
1315
+ const countRefs = (n) => {
1316
+ if (!Array.isArray(n)) return
1317
+ const op = n[0]
1318
+ if (op === 'call' && typeof n[1] === 'string') callRefs.set(n[1], (callRefs.get(n[1]) || 0) + 1)
1319
+ else if (op === 'return_call' && typeof n[1] === 'string') otherRef.add(n[1])
1320
+ for (let i = 1; i < n.length; i++) countRefs(n[i])
1321
+ }
1322
+ countRefs(ast)
1323
+ const pinned = new Set()
1324
+ for (const n of ast) if (!Array.isArray(n) || n[0] !== 'func') collectPinned(n, pinned)
1325
+ // a func may carry its own (export "name") — the signature scan below rejects those too
1326
+
1327
+ // Pick a callee.
1328
+ let calleeName = null
1329
+ for (const [name, fn] of funcByName) {
1330
+ if (pinned.has(name) || otherRef.has(name)) continue
1331
+ if (callRefs.get(name) !== 1) continue
1332
+ if (callsSelf(fn, name)) continue
1333
+ // named params/locals only; collect signature
1334
+ let ok = true, nResult = 0
1335
+ for (let i = 2; i < fn.length; i++) {
1336
+ const c = fn[i]
1337
+ if (typeof c === 'string') continue
1338
+ if (!Array.isArray(c)) { ok = false; break }
1339
+ if (c[0] === 'param' || c[0] === 'local') { if (typeof c[1] !== 'string' || c[1][0] !== '$') { ok = false; break } }
1340
+ else if (c[0] === 'result') nResult += c.length - 1
1341
+ else if (c[0] === 'export') { ok = false; break }
1342
+ else if (c[0] === 'type') continue
1343
+ else break
1344
+ }
1345
+ if (!ok || nResult > 1) continue
1346
+ let bad = false
1347
+ for (let i = bodyStart(fn); i < fn.length; i++) if (unsafe(fn[i])) { bad = true; break }
1348
+ if (bad) continue
1349
+ calleeName = name; break
1350
+ }
1351
+ if (!calleeName) break
1352
+
1353
+ const callee = funcByName.get(calleeName)
1354
+ const params = [], locals = []
1355
+ let resultType = null
1356
+ for (let i = 2; i < callee.length; i++) {
1357
+ const c = callee[i]
1358
+ if (typeof c === 'string' || !Array.isArray(c)) continue
1359
+ if (c[0] === 'param') params.push({ name: c[1], type: c[2] })
1360
+ else if (c[0] === 'result') { if (c.length > 1) resultType = c[1] }
1361
+ else if (c[0] === 'local') locals.push({ name: c[1], type: c[2] })
1362
+ else if (c[0] === 'export' || c[0] === 'type') continue
1363
+ else break
1364
+ }
1365
+ const cBody = callee.slice(bodyStart(callee))
1366
+
1367
+ const uid = ++inlineUid
1368
+ const exit = `$__inl${uid}`
1369
+ const rename = new Map()
1370
+ for (const p of params) rename.set(p.name, `$__inl${uid}_${p.name.slice(1)}`)
1371
+ for (const l of locals) rename.set(l.name, `$__inl${uid}_${l.name.slice(1)}`)
1372
+ // The callee's own block/loop/if labels would shadow same-named labels in the
1373
+ // caller after nesting (and break depth resolution) — give them fresh names too.
1374
+ const isBlockLabel = op => op === 'block' || op === 'loop' || op === 'if'
1375
+ const labelRename = new Map()
1376
+ const collectLabels = (n) => {
1377
+ if (!Array.isArray(n)) return
1378
+ if (isBlockLabel(n[0]) && typeof n[1] === 'string' && n[1][0] === '$' && !labelRename.has(n[1]))
1379
+ labelRename.set(n[1], `$__inl${uid}L_${n[1].slice(1)}`)
1380
+ for (let i = 1; i < n.length; i++) collectLabels(n[i])
1381
+ }
1382
+ for (const n of cBody) collectLabels(n)
1383
+ const sub = (n) => {
1384
+ if (!Array.isArray(n)) return n
1385
+ const op = n[0]
1386
+ if ((op === 'local.get' || op === 'local.set' || op === 'local.tee') && typeof n[1] === 'string' && rename.has(n[1]))
1387
+ return [op, rename.get(n[1]), ...n.slice(2).map(sub)]
1388
+ if (op === 'return') return ['br', exit, ...n.slice(1).map(sub)]
1389
+ if (isBlockLabel(op) && typeof n[1] === 'string' && labelRename.has(n[1]))
1390
+ return [op, labelRename.get(n[1]), ...n.slice(2).map(sub)]
1391
+ if (isBranch(op)) return [op, ...n.slice(1).map(c => (typeof c === 'string' && labelRename.has(c)) ? labelRename.get(c) : sub(c))]
1392
+ return n.map((c, i) => i === 0 ? c : sub(c))
1393
+ }
1394
+
1395
+ // Splice into the (unique) caller (which may be an anonymous exported func).
1396
+ let done = false
1397
+ for (const fn of funcs) {
1398
+ if (fn === callee || done) continue
1399
+ const start = bodyStart(fn)
1400
+ for (let i = start; i < fn.length; i++) {
1401
+ const replaced = walkPost(fn[i], (n) => {
1402
+ if (done || !Array.isArray(n) || n[0] !== 'call' || n[1] !== calleeName) return
1403
+ const args = n.slice(2)
1404
+ if (args.length !== params.length) return // arity mismatch — leave it
1405
+ const setup = params.map((p, k) => ['local.set', rename.get(p.name), args[k]])
1406
+ const inner = cBody.map(sub)
1407
+ done = true
1408
+ return resultType
1409
+ ? ['block', exit, ['result', resultType], ...setup, ...inner]
1410
+ : ['block', exit, ...setup, ...inner]
1411
+ })
1412
+ if (replaced !== fn[i]) fn[i] = replaced
1413
+ if (done) {
1414
+ const decls = [...params, ...locals].map(p => ['local', rename.get(p.name), p.type])
1415
+ if (decls.length) fn.splice(bodyStart(fn), 0, ...decls)
1416
+ break
1417
+ }
1418
+ }
1419
+ if (done) break
1420
+ }
1421
+ if (!done) break // call site not found inside a func body — give up
1422
+
1423
+ const idx = ast.indexOf(callee)
1424
+ if (idx >= 0) ast.splice(idx, 1)
1425
+ }
1426
+
1427
+ return ast
1428
+ }
1429
+
1430
+ // ==================== MERGE BLOCKS ====================
1431
+
1432
+ /**
1433
+ * Does `body` contain a branch instruction targeting `label`, ignoring inner
1434
+ * blocks/loops that re-bind the same label?
1435
+ */
1436
+ const targetsLabel = (body, label) => {
1437
+ let found = false
1438
+ const search = (n, shadowed) => {
1439
+ if (found || !Array.isArray(n)) return
1440
+ const op = n[0]
1441
+ let inner = shadowed
1442
+ if ((op === 'block' || op === 'loop') && typeof n[1] === 'string' && n[1] === label) inner = true
1443
+ if (!shadowed) {
1444
+ if (op === 'br' || op === 'br_if' || op === 'br_on_null' || op === 'br_on_non_null' ||
1445
+ op === 'br_on_cast' || op === 'br_on_cast_fail') {
1446
+ if (n[1] === label) { found = true; return }
1447
+ } else if (op === 'br_table') {
1448
+ for (let j = 1; j < n.length; j++) {
1449
+ if (typeof n[j] === 'string') { if (n[j] === label) { found = true; return } }
1450
+ else break
1451
+ }
1452
+ }
1453
+ }
1454
+ for (let i = 1; i < n.length; i++) search(n[i], inner)
1455
+ }
1456
+ for (const node of body) search(node, false)
1457
+ return found
1458
+ }
1459
+
1460
+ /**
1461
+ * Unwrap redundant blocks whose label is never targeted. The block's stack
1462
+ * effect is determined entirely by its body, so removing the `block`/`end`
1463
+ * framing is sound as long as no `br` reaches into the block from inside.
1464
+ *
1465
+ * Two complementary patterns:
1466
+ *
1467
+ * 1. **Block at scope level** (sibling in `func`/`block`/`loop`/`then`/`else`):
1468
+ * splice body into the parent scope. Works for untyped, `(result T)`-typed,
1469
+ * or even `(param …)`-typed blocks — in all cases the body produces the
1470
+ * same net stack effect as the framed block did, at the same position.
1471
+ * 2. **Result-typed block in expression position** (`(block (result T) expr)`
1472
+ * as the value of some operand): collapse to `expr` if the body is a
1473
+ * single value expression. Catches the wrappers jz codegen leaves around
1474
+ * arena allocations once `propagate` has folded the intermediate
1475
+ * set/get pairs to a single call.
1476
+ *
1477
+ * Pattern 2 runs first (post-order) so pattern 1 sees the cleaned-up parents.
1478
+ * @param {Array} ast
1479
+ * @returns {Array}
1480
+ */
1481
+ const mergeBlocks = (ast) => {
1482
+ walkPost(ast, (node) => {
1483
+ if (!Array.isArray(node) || node[0] !== 'block') return
1484
+ let bi = 1, label = null
1485
+ if (typeof node[1] === 'string' && node[1][0] === '$') { label = node[1]; bi = 2 }
1486
+ let hasResult = false
1487
+ while (bi < node.length) {
1488
+ const c = node[bi]
1489
+ if (Array.isArray(c) && (c[0] === 'param' || c[0] === 'type')) { bi++; continue }
1490
+ if (Array.isArray(c) && c[0] === 'result') { hasResult = true; bi++; continue }
1491
+ break
1492
+ }
1493
+ const body = node.slice(bi)
1494
+ if (!hasResult || body.length !== 1) return
1495
+ const only = body[0]
1496
+ if (!Array.isArray(only)) return
1497
+ if (label && targetsLabel(body, label)) return
1498
+ node.length = 0
1499
+ for (const tok of only) node.push(tok)
1500
+ })
1501
+
1502
+ walk(ast, (node) => {
1503
+ if (!isScopeNode(node)) return
1504
+ let i = 1
1505
+ while (i < node.length) {
1506
+ const child = node[i]
1507
+ if (!Array.isArray(child) || child[0] !== 'block') { i++; continue }
1508
+ let bi = 1, label = null
1509
+ if (typeof child[1] === 'string' && child[1][0] === '$') { label = child[1]; bi = 2 }
1510
+ // Skip leading typing annotations; they describe the block's stack effect,
1511
+ // which the body already produces verbatim, so they're discarded on splice.
1512
+ while (bi < child.length) {
1513
+ const c = child[bi]
1514
+ if (Array.isArray(c) && (c[0] === 'param' || c[0] === 'result' || c[0] === 'type')) { bi++; continue }
1515
+ break
1516
+ }
1517
+ const body = child.slice(bi)
1518
+ if (label && targetsLabel(body, label)) { i++; continue }
1519
+ node.splice(i, 1, ...body)
1520
+ i += body.length
1521
+ }
1522
+ })
1523
+ return ast
1524
+ }
1525
+
1526
+ // ==================== COALESCE LOCALS ====================
1527
+
1528
+ /**
1529
+ * Share local slots between same-type locals with non-overlapping live ranges.
1530
+ * Live range = [first pos, last pos] of any local.get/set/tee, extended over
1531
+ * any loop containing a reference (so a value read across loop iterations stays
1532
+ * intact). Greedy slot assignment by start position. Params and unnamed/numeric
1533
+ * references are left alone; `localReuse` later removes the renamed-away decls.
1534
+ *
1535
+ * Soundness: WASM zero-initializes locals at function entry, so a local whose
1536
+ * first reference (in walk order) is a `local.get` *relies* on that implicit
1537
+ * zero — coalescing it into a slot whose previous user left a non-zero residue
1538
+ * would silently change behavior (e.g. a `for (let i=0; …)` loop counter
1539
+ * inheriting `N*4` from a sibling temp). Such "read-first" locals can still
1540
+ * serve as a slot's *primary* (the slot then keeps the function's zero start),
1541
+ * but can never be a donor merged into an existing slot.
1542
+ * @param {Array} ast
1543
+ * @returns {Array}
1544
+ */
1545
+ const coalesceLocals = (ast) => {
1546
+ walk(ast, (funcNode) => {
1547
+ if (!Array.isArray(funcNode) || funcNode[0] !== 'func') return
1548
+
1549
+ const decls = new Map()
1550
+ for (const sub of funcNode) {
1551
+ if (Array.isArray(sub) && sub[0] === 'local' &&
1552
+ typeof sub[1] === 'string' && sub[1][0] === '$' && typeof sub[2] === 'string') {
1553
+ decls.set(sub[1], sub[2])
1554
+ }
1555
+ }
1556
+ if (decls.size < 2) return
1557
+
1558
+ const uses = new Map()
1559
+ const loopStack = []
1560
+ let pos = 0, abort = false, condDepth = 0
1561
+
1562
+ const visit = (n) => {
1563
+ if (abort || !Array.isArray(n)) return
1564
+ const op = n[0]
1565
+ const isLoop = op === 'loop'
1566
+ if (isLoop) loopStack.push({ start: pos, end: pos })
1567
+ const isSet = op === 'local.set' || op === 'local.tee'
1568
+
1569
+ if (isSet || op === 'local.get') {
1570
+ const name = n[1]
1571
+ if (typeof name !== 'string' || name[0] !== '$') { abort = true; return }
1572
+ // Execution order: evaluate set/tee value BEFORE recording the write,
1573
+ // so a `(local.set $x (… (local.get $x) …))` is correctly seen as a
1574
+ // read-then-write of $x (firstOp = local.get).
1575
+ if (isSet) for (let i = 2; i < n.length; i++) visit(n[i])
1576
+ const here = pos++
1577
+ if (decls.has(name)) {
1578
+ let u = uses.get(name)
1579
+ if (!u) { u = { start: here, end: here, firstOp: op, firstCond: condDepth > 0, loops: new Set() }; uses.set(name, u) }
1580
+ if (here > u.end) u.end = here
1581
+ for (const ls of loopStack) u.loops.add(ls)
1582
+ }
1583
+ } else {
1584
+ pos++
1585
+ const isIf = op === 'if'
1586
+ for (let i = 1; i < n.length; i++) {
1587
+ const c = n[i]
1588
+ const cond = isIf && Array.isArray(c) && (c[0] === 'then' || c[0] === 'else')
1589
+ if (cond) condDepth++
1590
+ visit(c)
1591
+ if (cond) condDepth--
1592
+ }
1593
+ }
1594
+
1595
+ if (isLoop) { const ls = loopStack.pop(); ls.end = pos }
1596
+ }
1597
+ visit(funcNode)
1598
+ if (abort) return
1599
+
1600
+ // A use inside a loop must stay live for the whole loop — the next
1601
+ // iteration could read what this iteration wrote.
1602
+ for (const u of uses.values()) {
1603
+ for (const ls of u.loops) {
1604
+ if (ls.start < u.start) u.start = ls.start
1605
+ if (ls.end > u.end) u.end = ls.end
1606
+ }
1607
+ }
1608
+
1609
+ const ordered = [...uses.entries()].sort((a, b) => a[1].start - b[1].start)
1610
+ const rename = new Map()
1611
+ const slots = []
1612
+ for (const [name, range] of ordered) {
1613
+ // Read-first locals depend on the implicit zero; locals first seen inside
1614
+ // an if/else branch may be skipped on the alternate path — either way
1615
+ // they'd observe a prior slot's residue if reused. They may *start* a
1616
+ // fresh slot (the function's zero init), but never *join* one.
1617
+ const readsZero = range.firstOp === 'local.get' || range.firstCond
1618
+ const type = decls.get(name)
1619
+ const slot = readsZero ? null : slots.find(s => s.type === type && s.end < range.start)
1620
+ if (slot) { rename.set(name, slot.primary); if (range.end > slot.end) slot.end = range.end }
1621
+ else slots.push({ primary: name, type, end: range.end })
1622
+ }
1623
+ if (rename.size === 0) return
1624
+
1625
+ walk(funcNode, (n) => {
1626
+ if (Array.isArray(n) &&
1627
+ (n[0] === 'local.get' || n[0] === 'local.set' || n[0] === 'local.tee') &&
1628
+ rename.has(n[1])) {
1629
+ n[1] = rename.get(n[1])
1630
+ }
1631
+ })
1632
+ })
1633
+ return ast
1634
+ }
1635
+
1157
1636
  // ==================== VACUUM ====================
1158
1637
 
1159
1638
  /**
@@ -1300,52 +1779,99 @@ const peephole = (ast) => {
1300
1779
 
1301
1780
  // ==================== GLOBAL CONSTANT PROPAGATION ====================
1302
1781
 
1782
+ /** Bytes a signed-LEB128 integer encodes to. */
1783
+ const slebSize = (v) => {
1784
+ let x = typeof v === 'bigint' ? v : BigInt(Math.trunc(Number(v) || 0))
1785
+ let n = 1
1786
+ while (true) {
1787
+ const b = x & 0x7fn
1788
+ x >>= 7n
1789
+ if ((x === 0n && (b & 0x40n) === 0n) || (x === -1n && (b & 0x40n) !== 0n)) return n
1790
+ n++
1791
+ }
1792
+ }
1793
+ /** Encoded byte size of a constant init instruction (opcode + immediate). */
1794
+ const constInstrSize = (node) => {
1795
+ if (!Array.isArray(node)) return 4
1796
+ switch (node[0]) {
1797
+ case 'i32.const': case 'i64.const': return 1 + slebSize(node[1])
1798
+ case 'f32.const': return 5
1799
+ case 'f64.const': return 9
1800
+ case 'v128.const': return 18
1801
+ default: return 4 // ref.null/ref.func/global.get — conservative
1802
+ }
1803
+ }
1804
+ const GLOBAL_GET_SIZE = 2 // 0x23 opcode + 1-byte globalidx (typical)
1805
+
1303
1806
  /**
1304
- * Replace global.get of immutable globals with their constant init values.
1807
+ * Replace `global.get` of an immutable, const-initialised global with the
1808
+ * constant — but only when it doesn't grow the module. A `global.get` costs
1809
+ * ~2 B; an `i32.const 12345` costs 4 B; an `f64.const` costs 9 B. Naively
1810
+ * inlining a big constant read from many sites trades a few cheap reads + one
1811
+ * global decl for many fat immediates — pure bloat (and the node-count size
1812
+ * guard can't see it: same number of AST nodes). So we only propagate a global
1813
+ * when `refs·constSize ≤ refs·2 + declSize`; when every read is replaced and
1814
+ * the global isn't exported, its now-dead decl is dropped here too.
1305
1815
  * @param {Array} ast
1306
1816
  * @returns {Array}
1307
1817
  */
1308
1818
  const globals = (ast) => {
1309
1819
  if (!Array.isArray(ast) || ast[0] !== 'module') return ast
1310
1820
 
1311
- // Find immutable globals with const init
1312
- const constGlobals = new Map() // name → const node
1313
- const mutableGlobals = new Set()
1821
+ // Immutable globals with a constant init: name → init node.
1822
+ const constGlobals = new Map()
1823
+ const exported = new Set() // globals pinned by an export — keep the decl
1314
1824
 
1315
1825
  for (const node of ast.slice(1)) {
1316
- if (!Array.isArray(node) || node[0] !== 'global') continue
1826
+ if (!Array.isArray(node)) continue
1827
+ if (node[0] === 'export' && Array.isArray(node[2]) && node[2][0] === 'global' && typeof node[2][1] === 'string') { exported.add(node[2][1]); continue }
1828
+ if (node[0] !== 'global') continue
1317
1829
  const name = typeof node[1] === 'string' && node[1][0] === '$' ? node[1] : null
1318
1830
  if (!name) continue
1319
-
1320
- // Check mutability: (global $g (mut i32) init) vs (global $g i32 init)
1321
- const hasName = typeof node[1] === 'string' && node[1][0] === '$'
1322
- const initIdx = hasName ? 3 : 2
1323
-
1324
- // Skip mutable globals
1325
- const typeSlot = hasName ? node[2] : node[1]
1326
- if (Array.isArray(typeSlot) && typeSlot[0] === 'mut') continue
1327
-
1328
- const init = node[initIdx]
1831
+ // (global $g (export "x") …) inline export → pinned
1832
+ if (node.some(c => Array.isArray(c) && c[0] === 'export')) exported.add(name)
1833
+ const typeSlot = node[2]
1834
+ if (Array.isArray(typeSlot) && typeSlot[0] === 'mut') continue // mutable
1835
+ if (Array.isArray(typeSlot) && typeSlot[0] === 'import') continue // imported
1836
+ const init = node[3]
1329
1837
  if (getConst(init)) constGlobals.set(name, init)
1330
1838
  }
1839
+ if (constGlobals.size === 0) return ast
1331
1840
 
1332
- // Also mark any global that is ever written as mutable
1841
+ // Drop any global that is ever written (defensive — an immutable global can't
1842
+ // be, but a malformed module might) and tally read counts.
1843
+ const reads = new Map()
1333
1844
  walk(ast, (n) => {
1334
- if (!Array.isArray(n) || n[0] !== 'global.set') return
1845
+ if (!Array.isArray(n)) return
1335
1846
  const ref = n[1]
1336
- if (typeof ref === 'string' && ref[0] === '$') mutableGlobals.add(ref)
1847
+ if (typeof ref !== 'string' || ref[0] !== '$') return
1848
+ if (n[0] === 'global.set') constGlobals.delete(ref)
1849
+ else if (n[0] === 'global.get') reads.set(ref, (reads.get(ref) || 0) + 1)
1337
1850
  })
1338
1851
 
1339
- // Remove mutable ones from propagation set
1340
- for (const name of mutableGlobals) constGlobals.delete(name)
1341
- if (constGlobals.size === 0) return ast
1852
+ // Keep only globals where propagation is size-neutral or better.
1853
+ const propagate = new Set()
1854
+ for (const [name, init] of constGlobals) {
1855
+ const r = reads.get(name) || 0
1856
+ if (r === 0) continue // dead anyway — leave to treeshake
1857
+ const cs = constInstrSize(init)
1858
+ const declSize = cs + 2 // valtype + mutability byte + init expr + `end`
1859
+ const before = r * GLOBAL_GET_SIZE + declSize
1860
+ const after = r * cs + (exported.has(name) ? declSize : 0)
1861
+ if (after <= before) propagate.add(name)
1862
+ }
1863
+ if (propagate.size === 0) return ast
1342
1864
 
1343
- // Substitute global.get with const
1344
- return walkPost(ast, (node) => {
1865
+ walkPost(ast, (node) => {
1345
1866
  if (!Array.isArray(node) || node[0] !== 'global.get' || node.length !== 2) return
1346
- const ref = node[1]
1347
- if (constGlobals.has(ref)) return clone(constGlobals.get(ref))
1867
+ if (propagate.has(node[1])) return clone(constGlobals.get(node[1]))
1348
1868
  })
1869
+ // Their reads are all gone now — remove the decls we're free to remove.
1870
+ for (let i = ast.length - 1; i >= 1; i--) {
1871
+ const n = ast[i]
1872
+ if (Array.isArray(n) && n[0] === 'global' && typeof n[1] === 'string' && propagate.has(n[1]) && !exported.has(n[1])) ast.splice(i, 1)
1873
+ }
1874
+ return ast
1349
1875
  }
1350
1876
 
1351
1877
  // ==================== LOAD/STORE OFFSET FOLDING ====================
@@ -1458,13 +1984,92 @@ const unbranch = (ast) => {
1458
1984
 
1459
1985
  const last = node[lastIdx]
1460
1986
  if (Array.isArray(last) && last[0] === 'br' && last[1] === label) {
1461
- node.splice(lastIdx, 1)
1987
+ // `(br $L v…)` as a block's last instruction just leaves v… as the block's
1988
+ // result — splice the value operand(s) in its place (none → plain removal).
1989
+ node.splice(lastIdx, 1, ...last.slice(2))
1462
1990
  }
1463
1991
  })
1464
1992
 
1465
1993
  return ast
1466
1994
  }
1467
1995
 
1996
+ // ==================== WHILE-LOOP CANONICALIZATION ====================
1997
+
1998
+ /**
1999
+ * Collapse the `while`-emit idiom into a single loop.
2000
+ *
2001
+ * (block $A
2002
+ * (loop $B
2003
+ * (br_if $A (i32.eqz cond)) ;; exit when cond is false
2004
+ * …body…
2005
+ * (br $B) ;; continue
2006
+ * ))
2007
+ *
2008
+ * becomes
2009
+ *
2010
+ * (loop $B
2011
+ * (if cond (then …body… (br $B))))
2012
+ *
2013
+ * Saves ~3 B per while-loop (drop the outer block framing + the `i32.eqz`,
2014
+ * trade `br_if`→`if`). Safe only when:
2015
+ * - the block contains nothing but the loop (plus optional `type` slot),
2016
+ * - block / loop are void (no result),
2017
+ * - $A is never targeted from within body (only the head `br_if` uses it).
2018
+ *
2019
+ * @param {Array} ast
2020
+ * @returns {Array}
2021
+ */
2022
+ const loopify = (ast) => {
2023
+ walk(ast, (node) => {
2024
+ if (!Array.isArray(node) || node[0] !== 'block') return
2025
+ let bi = 1, label = null
2026
+ if (typeof node[1] === 'string' && node[1][0] === '$') { label = node[1]; bi = 2 }
2027
+ if (!label) return
2028
+ while (bi < node.length) {
2029
+ const c = node[bi]
2030
+ if (Array.isArray(c) && c[0] === 'type') { bi++; continue }
2031
+ if (Array.isArray(c) && (c[0] === 'param' || c[0] === 'result')) return // typed → skip
2032
+ break
2033
+ }
2034
+ if (node.length - bi !== 1) return
2035
+ const loop = node[bi]
2036
+ if (!Array.isArray(loop) || loop[0] !== 'loop') return
2037
+ let li = 1, loopLabel = null
2038
+ if (typeof loop[1] === 'string' && loop[1][0] === '$') { loopLabel = loop[1]; li = 2 }
2039
+ const loopHeader = []
2040
+ while (li < loop.length) {
2041
+ const c = loop[li]
2042
+ if (Array.isArray(c) && c[0] === 'type') { loopHeader.push(c); li++; continue }
2043
+ if (Array.isArray(c) && (c[0] === 'param' || c[0] === 'result')) return // typed → skip
2044
+ break
2045
+ }
2046
+ const body = loop.slice(li)
2047
+ if (body.length < 2) return
2048
+ const head = body[0]
2049
+ const tail = body[body.length - 1]
2050
+ if (!Array.isArray(head) || head[0] !== 'br_if' || head[1] !== label || head.length !== 3) return
2051
+ if (!Array.isArray(tail) || tail[0] !== 'br' || tail[1] !== loopLabel || tail.length !== 2) return
2052
+ const inner = body.slice(1, -1)
2053
+ if (targetsLabel(inner, label)) return
2054
+
2055
+ // br_if exits when `cond` is non-zero — `if`'s then-arm runs when its
2056
+ // condition is non-zero. So the if-condition is the negation. Strip a
2057
+ // wrapping `i32.eqz` if present; otherwise wrap.
2058
+ let cond = head[2]
2059
+ if (Array.isArray(cond) && cond[0] === 'i32.eqz' && cond.length === 2) cond = cond[1]
2060
+ else cond = ['i32.eqz', cond]
2061
+
2062
+ const newLoop = ['loop']
2063
+ if (loopLabel) newLoop.push(loopLabel)
2064
+ for (const h of loopHeader) newLoop.push(h)
2065
+ newLoop.push(['if', cond, ['then', ...inner, tail]])
2066
+
2067
+ node.length = 0
2068
+ for (const tok of newLoop) node.push(tok)
2069
+ })
2070
+ return ast
2071
+ }
2072
+
1468
2073
  // ==================== STRIP MUT FROM GLOBALS ====================
1469
2074
 
1470
2075
  /**
@@ -2092,9 +2697,15 @@ export default function optimize(ast, opts = true) {
2092
2697
  ast = clone(ast)
2093
2698
  let beforeRound = null
2094
2699
 
2700
+ // Size guard works on encoded bytes, not AST node count: passes like
2701
+ // `globals` / `inlineOnce` are node-count-neutral yet move real bytes
2702
+ // (a `global.get` ↔ a fat `f64.const`; a `call` ↔ an inlined body), so a
2703
+ // node-count guard can't tell when a round bloated — or shrank. `binarySize`
2704
+ // also returns Infinity if a round produced invalid wat, so a broken round
2705
+ // reverts instead of escaping.
2095
2706
  for (let round = 0; round < 3; round++) {
2096
2707
  beforeRound = clone(ast)
2097
- const sizeBefore = count(ast)
2708
+ const sizeBefore = binarySize(ast)
2098
2709
 
2099
2710
  if (opts.stripmut) ast = stripmut(ast)
2100
2711
  if (opts.globals) ast = globals(ast)
@@ -2104,13 +2715,17 @@ export default function optimize(ast, opts = true) {
2104
2715
  if (opts.strength) ast = strength(ast)
2105
2716
  if (opts.branch) ast = branch(ast)
2106
2717
  if (opts.propagate) ast = propagate(ast)
2718
+ if (opts.inlineOnce) ast = inlineOnce(ast)
2107
2719
  if (opts.inline) ast = inline(ast)
2108
2720
  if (opts.offset) ast = offset(ast)
2109
2721
  if (opts.unbranch) ast = unbranch(ast)
2722
+ if (opts.loopify) ast = loopify(ast)
2110
2723
  if (opts.brif) ast = brif(ast)
2111
2724
  if (opts.foldarms) ast = foldarms(ast)
2112
2725
  if (opts.deadcode) ast = deadcode(ast)
2113
2726
  if (opts.vacuum) ast = vacuum(ast)
2727
+ if (opts.mergeBlocks) ast = mergeBlocks(ast)
2728
+ if (opts.coalesce) ast = coalesceLocals(ast)
2114
2729
  if (opts.locals) ast = localReuse(ast)
2115
2730
  if (opts.dedupe) ast = dedupe(ast)
2116
2731
  if (opts.dedupTypes) ast = dedupTypes(ast)
@@ -2118,19 +2733,25 @@ export default function optimize(ast, opts = true) {
2118
2733
  if (opts.reorder) ast = reorder(ast)
2119
2734
  if (opts.treeshake) ast = treeshake(ast)
2120
2735
  if (opts.minifyImports) ast = minifyImports(ast)
2121
-
2122
- const sizeAfter = count(ast)
2736
+ // Second propagate sweep: `inlineOnce`/`inline` (above) leave fresh
2737
+ // `(local.set $p arg) … (local.get $p)` wrappers around each inlined call;
2738
+ // re-running propagation collapses them within this same round, so the size
2739
+ // guard scores the cleaned result instead of waiting a round (which it may
2740
+ // never get if `equal()` declares a fixpoint first).
2741
+ if (opts.propagate && (opts.inlineOnce || opts.inline)) ast = propagate(ast)
2742
+
2743
+ const sizeAfter = binarySize(ast)
2123
2744
  const delta = sizeAfter - sizeBefore
2124
2745
 
2125
2746
  if (verbose || delta !== 0) {
2126
- log(` round ${round + 1}: ${delta > 0 ? '+' : ''}${delta} nodes`, delta)
2747
+ log(` round ${round + 1}: ${delta > 0 ? '+' : ''}${delta} bytes`, delta)
2127
2748
  }
2128
2749
 
2129
- // Size guard: default optimize must never inflate. Explicit passes
2130
- // get leniency (+5 nodes) so inline/propagate/foldarms can chain.
2131
- const tolerance = strictGuard ? 0 : 5
2750
+ // Size guard: default optimize must never inflate. Explicit passes get a
2751
+ // little leniency (a round may grow a few bytes setting up a bigger win).
2752
+ const tolerance = strictGuard ? 0 : 16
2132
2753
  if (delta > tolerance) {
2133
- if (verbose) log(` ⚠ round ${round + 1} inflated by ${delta}, reverting`, delta)
2754
+ if (verbose) log(` ⚠ round ${round + 1} inflated by ${delta} bytes, reverting`, delta)
2134
2755
  ast = beforeRound
2135
2756
  break
2136
2757
  }
@@ -2143,4 +2764,4 @@ export default function optimize(ast, opts = true) {
2143
2764
 
2144
2765
  /** Count AST nodes (fast size heuristic). */
2145
2766
  export { count as size, count, binarySize }
2146
- export { optimize, treeshake, fold, deadcode, localReuse, identity, strength, branch, propagate, inline, normalize, OPTS, vacuum, peephole, globals, offset, unbranch, stripmut, brif, foldarms, dedupe, reorder, dedupTypes, packData, minifyImports }
2767
+ export { optimize, treeshake, fold, deadcode, localReuse, identity, strength, branch, propagate, inline, inlineOnce, normalize, OPTS, vacuum, peephole, globals, offset, unbranch, loopify, stripmut, brif, foldarms, dedupe, reorder, dedupTypes, packData, minifyImports, mergeBlocks, coalesceLocals }