watr 4.5.3 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/optimize.js CHANGED
@@ -18,9 +18,12 @@ const OPTS = {
18
18
  identity: true, // remove identity ops (x + 0 → x)
19
19
  strength: true, // strength reduction (x * 2 → x << 1)
20
20
  branch: true, // simplify constant branches
21
- propagate: false, // constant propagation can duplicate expressions
21
+ propagate: true, // forward-propagate single-use locals & tiny consts (never inflates)
22
22
  inline: false, // inline tiny functions — can duplicate bodies
23
+ inlineOnce: true, // inline single-call functions into their lone caller (never duplicates)
23
24
  vacuum: true, // remove nops, drop-of-pure, empty branches
25
+ mergeBlocks: true, // unwrap `(block $L …)` whose label is never targeted
26
+ coalesce: true, // share local slots between same-type non-overlapping locals
24
27
  peephole: true, // x-x→0, x&0→0, etc.
25
28
  globals: true, // propagate immutable global constants
26
29
  offset: true, // fold add+const into load/store offset
@@ -855,8 +858,33 @@ const countLocalUses = (node) => {
855
858
  return counts
856
859
  }
857
860
 
858
- /** Can this tracked value be substituted for a local.get? */
859
- const canSubst = (k) => getConst(k.val) || (k.pure && k.singleUse)
861
+ /** A constant whose inlined form (opcode + immediate) is no wider than the ~2 B
862
+ * `local.get` it would replace so propagating it to every use is byte-neutral
863
+ * at worst, and still drops the `local.set` + the `local` decl. f32/f64 consts
864
+ * (5/9 B) lose on reuse, so only narrow i32/i64 literals qualify. */
865
+ const isTinyConst = (node) => {
866
+ const c = getConst(node)
867
+ if (!c) return false
868
+ if (c.type === 'i32') { const v = c.value | 0; return v >= -64 && v <= 63 }
869
+ if (c.type === 'i64') { const v = typeof c.value === 'bigint' ? c.value : BigInt(c.value); return v >= -64n && v <= 63n }
870
+ return false
871
+ }
872
+
873
+ /** Can this tracked value be substituted for a local.get?
874
+ * - single use of a pure value: always shrinks (drops the set, the lone get, the decl);
875
+ * - any use of a tiny constant: byte-neutral at worst, still drops the set + decl.
876
+ * Anything else (a wide constant reused many times, an impure expr) could inflate
877
+ * or reorder side effects, so it's left alone. */
878
+ const canSubst = (k) => (k.pure && k.singleUse) || isTinyConst(k.val)
879
+
880
+ /** Drop tracked values that read `$name`: rewriting `$name` makes them stale. */
881
+ const purgeRefs = (known, name) => {
882
+ for (const [key, tracked] of known) {
883
+ let refs = false
884
+ walk(tracked.val, n => { if (Array.isArray(n) && (n[0] === 'local.get' || n[0] === 'local.tee') && n[1] === name) refs = true })
885
+ if (refs) known.delete(key)
886
+ }
887
+ }
860
888
 
861
889
  /** Try substitute local.get nodes with known values */
862
890
  const substGets = (node, known) => walkPost(node, n => {
@@ -889,6 +917,7 @@ const forwardPropagate = (funcNode, params, useCounts) => {
889
917
  if ((op === 'local.set' || op === 'local.tee') && instr.length === 3 && typeof instr[1] === 'string') {
890
918
  substGets(instr[2], known) // substitute known values in RHS
891
919
  const uses = getUseCount(instr[1])
920
+ purgeRefs(known, instr[1]) // entries that read this local just went stale
892
921
  known.set(instr[1], {
893
922
  val: instr[2], pure: isPure(instr[2]),
894
923
  singleUse: uses.gets <= 1 && uses.sets <= 1 && uses.tees === 0
@@ -923,7 +952,7 @@ const forwardPropagate = (funcNode, params, useCounts) => {
923
952
  // (untracked) value, not the stale constant.
924
953
  walk(instr, n => {
925
954
  if (Array.isArray(n) && (n[0] === 'local.set' || n[0] === 'local.tee') && typeof n[1] === 'string')
926
- known.delete(n[1])
955
+ { known.delete(n[1]); purgeRefs(known, n[1]) }
927
956
  })
928
957
  }
929
958
  }
@@ -1024,6 +1053,10 @@ const eliminateDeadStores = (funcNode, params, useCounts) => {
1024
1053
  * Constants propagate to all uses; pure single-use exprs inline into get site.
1025
1054
  * Multi-pass with batch counting for convergence.
1026
1055
  */
1056
+ /** Block-like nodes whose body is a straight-line instruction list (after any header). */
1057
+ const isScopeNode = (n) => Array.isArray(n) &&
1058
+ (n[0] === 'func' || n[0] === 'block' || n[0] === 'loop' || n[0] === 'then' || n[0] === 'else')
1059
+
1027
1060
  const propagate = (ast) => {
1028
1061
  walk(ast, (funcNode) => {
1029
1062
  if (!Array.isArray(funcNode) || funcNode[0] !== 'func') return
@@ -1032,16 +1065,30 @@ const propagate = (ast) => {
1032
1065
  for (const sub of funcNode)
1033
1066
  if (Array.isArray(sub) && sub[0] === 'param' && typeof sub[1] === 'string') params.add(sub[1])
1034
1067
 
1035
- // useCounts must be refreshed before every sub-pass: each mutation
1036
- // (substitution, set/get pair removal, tee creation, dead-store removal)
1037
- // changes the gets/sets/tees totals that downstream sub-passes rely on.
1038
- for (let pass = 0; pass < 4; pass++) {
1039
- let changed = false
1040
- if (forwardPropagate(funcNode, params, countLocalUses(funcNode))) changed = true
1041
- if (eliminateSetGetPairs(funcNode, params, countLocalUses(funcNode))) changed = true
1042
- if (createLocalTees(funcNode, params, countLocalUses(funcNode))) changed = true
1043
- if (eliminateDeadStores(funcNode, params, countLocalUses(funcNode))) changed = true
1044
- if (!changed) break
1068
+ // Propagation runs per straight-line scope: the function body and every nested
1069
+ // `block`/`loop`/`then`/`else` (including ones embedded in an expression, e.g. the
1070
+ // `(block (result i32) …)` an inlined call leaves behind). Collect scopes deepest-
1071
+ // first so inner simplifications shrink the use-counts the outer scopes see.
1072
+ // Use-counts are always whole-function — a set/get pair or dead store is only
1073
+ // touched when it's globally the sole occurrence, so per-scope work stays sound.
1074
+ const scopes = []
1075
+ walkPost(funcNode, n => { if (isScopeNode(n)) scopes.push(n) })
1076
+
1077
+ // One use-count per round, shared by every scope: substitutions only ever
1078
+ // *drop* gets, so a stale count can only make a sub-pass act more cautiously
1079
+ // (skip a not-yet-provably-dead store, decline a not-yet-provably-single use) —
1080
+ // never wrongly. The next round re-counts and mops up. (Recounting per sub-pass
1081
+ // per scope is O(scopes·funcSize) and crippling on big modules.)
1082
+ for (let round = 0; round < 6; round++) {
1083
+ const useCounts = countLocalUses(funcNode)
1084
+ let progressed = false
1085
+ for (const scope of scopes) {
1086
+ if (forwardPropagate(scope, params, useCounts)) progressed = true
1087
+ if (eliminateSetGetPairs(scope, params, useCounts)) progressed = true
1088
+ if (createLocalTees(scope, params, useCounts)) progressed = true
1089
+ if (eliminateDeadStores(scope, params, useCounts)) progressed = true
1090
+ }
1091
+ if (!progressed) break
1045
1092
  }
1046
1093
  })
1047
1094
 
@@ -1154,6 +1201,373 @@ const inline = (ast) => {
1154
1201
  return ast
1155
1202
  }
1156
1203
 
1204
+ // ==================== INLINE-ONCE ====================
1205
+
1206
+ let inlineUid = 0
1207
+
1208
+ /**
1209
+ * Inline functions that are called from exactly one place into their lone caller,
1210
+ * then delete them. Unlike {@link inline} (which duplicates tiny stateless bodies),
1211
+ * this never duplicates code and never inflates: each inlined function drops a
1212
+ * function-section entry, a type-section entry (if now unused), and a `call`
1213
+ * instruction, paying back only a `block`/`local.set` wrapper. This is what
1214
+ * `wasm-opt -Oz` does — collapsing helper chains down to a couple of functions —
1215
+ * and it's the bulk of the gap between hand-tuned WASM and naive codegen.
1216
+ *
1217
+ * A function `$f` qualifies when it is, all of:
1218
+ * • named, with named params and locals (numeric indices can't be safely renamed);
1219
+ * • referenced exactly once across the whole module, by a plain `call` (no
1220
+ * `return_call`, `ref.func`, `elem`, `export`, or `start` reference, and not
1221
+ * recursive);
1222
+ * • single-result or void (a multi-value result can't be modeled as `(block (result …))`);
1223
+ * • free of numeric (depth-relative) branch labels — those would shift under the
1224
+ * extra block nesting — and of `return_call*` in its body.
1225
+ *
1226
+ * `(call $f a0 a1 …)` becomes
1227
+ * (block $__inlN (result T)?
1228
+ * (local.set $__inlN_p0 a0) (local.set $__inlN_p1 a1) … ;; args evaluated once, in order
1229
+ * …body, params/locals renamed to $__inlN_*, `return X` → `br $__inlN X`…)
1230
+ * and the renamed params+locals are appended to the caller's `local` decls; the
1231
+ * body's own block/loop/if labels are renamed too so they can't shadow the caller's.
1232
+ * Runs to a fixpoint so helper chains fully collapse.
1233
+ *
1234
+ * @param {Array} ast
1235
+ * @returns {Array}
1236
+ */
1237
+ const inlineOnce = (ast) => {
1238
+ if (!Array.isArray(ast) || ast[0] !== 'module') return ast
1239
+
1240
+ const HEAD = new Set(['export', 'type', 'param', 'result', 'local'])
1241
+ const bodyStart = (fn) => {
1242
+ let i = 2
1243
+ while (i < fn.length && (typeof fn[i] === 'string' || (Array.isArray(fn[i]) && HEAD.has(fn[i][0])))) i++
1244
+ return i
1245
+ }
1246
+ const isBranch = op => op === 'br' || op === 'br_if' || op === 'br_table'
1247
+ // A subtree we can't lift into a (block …): depth-relative branch labels (shift
1248
+ // under added nesting) or tail calls (would escape the wrapping block).
1249
+ const unsafe = (n) => {
1250
+ if (!Array.isArray(n)) return false
1251
+ const op = n[0]
1252
+ if (op === 'return_call' || op === 'return_call_indirect' || op === 'return_call_ref') return true
1253
+ if (op === 'try' || op === 'try_table' || op === 'delegate' || op === 'rethrow') return true // exception labels — not handled by the relabeler below
1254
+ if (isBranch(op)) for (let i = 1; i < n.length; i++) if (typeof n[i] === 'number' || (typeof n[i] === 'string' && /^\d+$/.test(n[i]))) return true
1255
+ for (let i = 1; i < n.length; i++) if (unsafe(n[i])) return true
1256
+ return false
1257
+ }
1258
+ const callsSelf = (n, name) => {
1259
+ if (!Array.isArray(n)) return false
1260
+ if ((n[0] === 'call' || n[0] === 'return_call') && n[1] === name) return true
1261
+ for (let i = 1; i < n.length; i++) if (callsSelf(n[i], name)) return true
1262
+ return false
1263
+ }
1264
+
1265
+ // Module-level references that pin a function (can't be removed/inlined-away).
1266
+ const collectPinned = (n, pinned) => {
1267
+ if (!Array.isArray(n)) return
1268
+ const op = n[0]
1269
+ if (op === 'export' && Array.isArray(n[2]) && n[2][0] === 'func' && typeof n[2][1] === 'string') pinned.add(n[2][1])
1270
+ else if (op === 'start' && typeof n[1] === 'string') pinned.add(n[1])
1271
+ else if (op === 'ref.func' && typeof n[1] === 'string') pinned.add(n[1])
1272
+ else if (op === 'elem') for (const c of n) if (typeof c === 'string' && c[0] === '$') pinned.add(c)
1273
+ for (const c of n) collectPinned(c, pinned)
1274
+ }
1275
+
1276
+ for (let round = 0; round < 16; round++) {
1277
+ const funcs = ast.filter(n => Array.isArray(n) && n[0] === 'func')
1278
+ const funcByName = new Map()
1279
+ for (const n of funcs) if (typeof n[1] === 'string') funcByName.set(n[1], n)
1280
+
1281
+ // Count plain-call references across the WHOLE module (anonymous exported funcs
1282
+ // call helpers too); flag any non-call reference (return_call etc.).
1283
+ const callRefs = new Map(), otherRef = new Set()
1284
+ const countRefs = (n) => {
1285
+ if (!Array.isArray(n)) return
1286
+ const op = n[0]
1287
+ if (op === 'call' && typeof n[1] === 'string') callRefs.set(n[1], (callRefs.get(n[1]) || 0) + 1)
1288
+ else if (op === 'return_call' && typeof n[1] === 'string') otherRef.add(n[1])
1289
+ for (let i = 1; i < n.length; i++) countRefs(n[i])
1290
+ }
1291
+ countRefs(ast)
1292
+ const pinned = new Set()
1293
+ for (const n of ast) if (!Array.isArray(n) || n[0] !== 'func') collectPinned(n, pinned)
1294
+ // a func may carry its own (export "name") — the signature scan below rejects those too
1295
+
1296
+ // Pick a callee.
1297
+ let calleeName = null
1298
+ for (const [name, fn] of funcByName) {
1299
+ if (pinned.has(name) || otherRef.has(name)) continue
1300
+ if (callRefs.get(name) !== 1) continue
1301
+ if (callsSelf(fn, name)) continue
1302
+ // named params/locals only; collect signature
1303
+ let ok = true, nResult = 0
1304
+ for (let i = 2; i < fn.length; i++) {
1305
+ const c = fn[i]
1306
+ if (typeof c === 'string') continue
1307
+ if (!Array.isArray(c)) { ok = false; break }
1308
+ if (c[0] === 'param' || c[0] === 'local') { if (typeof c[1] !== 'string' || c[1][0] !== '$') { ok = false; break } }
1309
+ else if (c[0] === 'result') nResult += c.length - 1
1310
+ else if (c[0] === 'export') { ok = false; break }
1311
+ else if (c[0] === 'type') continue
1312
+ else break
1313
+ }
1314
+ if (!ok || nResult > 1) continue
1315
+ let bad = false
1316
+ for (let i = bodyStart(fn); i < fn.length; i++) if (unsafe(fn[i])) { bad = true; break }
1317
+ if (bad) continue
1318
+ calleeName = name; break
1319
+ }
1320
+ if (!calleeName) break
1321
+
1322
+ const callee = funcByName.get(calleeName)
1323
+ const params = [], locals = []
1324
+ let resultType = null
1325
+ for (let i = 2; i < callee.length; i++) {
1326
+ const c = callee[i]
1327
+ if (typeof c === 'string' || !Array.isArray(c)) continue
1328
+ if (c[0] === 'param') params.push({ name: c[1], type: c[2] })
1329
+ else if (c[0] === 'result') { if (c.length > 1) resultType = c[1] }
1330
+ else if (c[0] === 'local') locals.push({ name: c[1], type: c[2] })
1331
+ else if (c[0] === 'export' || c[0] === 'type') continue
1332
+ else break
1333
+ }
1334
+ const cBody = callee.slice(bodyStart(callee))
1335
+
1336
+ const uid = ++inlineUid
1337
+ const exit = `$__inl${uid}`
1338
+ const rename = new Map()
1339
+ for (const p of params) rename.set(p.name, `$__inl${uid}_${p.name.slice(1)}`)
1340
+ for (const l of locals) rename.set(l.name, `$__inl${uid}_${l.name.slice(1)}`)
1341
+ // The callee's own block/loop/if labels would shadow same-named labels in the
1342
+ // caller after nesting (and break depth resolution) — give them fresh names too.
1343
+ const isBlockLabel = op => op === 'block' || op === 'loop' || op === 'if'
1344
+ const labelRename = new Map()
1345
+ const collectLabels = (n) => {
1346
+ if (!Array.isArray(n)) return
1347
+ if (isBlockLabel(n[0]) && typeof n[1] === 'string' && n[1][0] === '$' && !labelRename.has(n[1]))
1348
+ labelRename.set(n[1], `$__inl${uid}L_${n[1].slice(1)}`)
1349
+ for (let i = 1; i < n.length; i++) collectLabels(n[i])
1350
+ }
1351
+ for (const n of cBody) collectLabels(n)
1352
+ const sub = (n) => {
1353
+ if (!Array.isArray(n)) return n
1354
+ const op = n[0]
1355
+ if ((op === 'local.get' || op === 'local.set' || op === 'local.tee') && typeof n[1] === 'string' && rename.has(n[1]))
1356
+ return [op, rename.get(n[1]), ...n.slice(2).map(sub)]
1357
+ if (op === 'return') return ['br', exit, ...n.slice(1).map(sub)]
1358
+ if (isBlockLabel(op) && typeof n[1] === 'string' && labelRename.has(n[1]))
1359
+ return [op, labelRename.get(n[1]), ...n.slice(2).map(sub)]
1360
+ if (isBranch(op)) return [op, ...n.slice(1).map(c => (typeof c === 'string' && labelRename.has(c)) ? labelRename.get(c) : sub(c))]
1361
+ return n.map((c, i) => i === 0 ? c : sub(c))
1362
+ }
1363
+
1364
+ // Splice into the (unique) caller (which may be an anonymous exported func).
1365
+ let done = false
1366
+ for (const fn of funcs) {
1367
+ if (fn === callee || done) continue
1368
+ const start = bodyStart(fn)
1369
+ for (let i = start; i < fn.length; i++) {
1370
+ const replaced = walkPost(fn[i], (n) => {
1371
+ if (done || !Array.isArray(n) || n[0] !== 'call' || n[1] !== calleeName) return
1372
+ const args = n.slice(2)
1373
+ if (args.length !== params.length) return // arity mismatch — leave it
1374
+ const setup = params.map((p, k) => ['local.set', rename.get(p.name), args[k]])
1375
+ const inner = cBody.map(sub)
1376
+ done = true
1377
+ return resultType
1378
+ ? ['block', exit, ['result', resultType], ...setup, ...inner]
1379
+ : ['block', exit, ...setup, ...inner]
1380
+ })
1381
+ if (replaced !== fn[i]) fn[i] = replaced
1382
+ if (done) {
1383
+ const decls = [...params, ...locals].map(p => ['local', rename.get(p.name), p.type])
1384
+ if (decls.length) fn.splice(bodyStart(fn), 0, ...decls)
1385
+ break
1386
+ }
1387
+ }
1388
+ if (done) break
1389
+ }
1390
+ if (!done) break // call site not found inside a func body — give up
1391
+
1392
+ const idx = ast.indexOf(callee)
1393
+ if (idx >= 0) ast.splice(idx, 1)
1394
+ }
1395
+
1396
+ return ast
1397
+ }
1398
+
1399
+ // ==================== MERGE BLOCKS ====================
1400
+
1401
+ /**
1402
+ * Does `body` contain a branch instruction targeting `label`, ignoring inner
1403
+ * blocks/loops that re-bind the same label?
1404
+ */
1405
+ const targetsLabel = (body, label) => {
1406
+ let found = false
1407
+ const search = (n, shadowed) => {
1408
+ if (found || !Array.isArray(n)) return
1409
+ const op = n[0]
1410
+ let inner = shadowed
1411
+ if ((op === 'block' || op === 'loop') && typeof n[1] === 'string' && n[1] === label) inner = true
1412
+ if (!shadowed) {
1413
+ if (op === 'br' || op === 'br_if' || op === 'br_on_null' || op === 'br_on_non_null' ||
1414
+ op === 'br_on_cast' || op === 'br_on_cast_fail') {
1415
+ if (n[1] === label) { found = true; return }
1416
+ } else if (op === 'br_table') {
1417
+ for (let j = 1; j < n.length; j++) {
1418
+ if (typeof n[j] === 'string') { if (n[j] === label) { found = true; return } }
1419
+ else break
1420
+ }
1421
+ }
1422
+ }
1423
+ for (let i = 1; i < n.length; i++) search(n[i], inner)
1424
+ }
1425
+ for (const node of body) search(node, false)
1426
+ return found
1427
+ }
1428
+
1429
+ /**
1430
+ * Unwrap redundant `(block $L body)` whose label is never targeted, splicing
1431
+ * the body into the surrounding scope. Skips blocks with `param`/`result`/`type`
1432
+ * annotations (their stack effect would change). Each unwrap drops the
1433
+ * `block`+`end` framing bytes; iterates by walk so chained blocks collapse.
1434
+ * @param {Array} ast
1435
+ * @returns {Array}
1436
+ */
1437
+ const mergeBlocks = (ast) => {
1438
+ walk(ast, (node) => {
1439
+ if (!isScopeNode(node)) return
1440
+ let i = 1
1441
+ while (i < node.length) {
1442
+ const child = node[i]
1443
+ if (!Array.isArray(child) || child[0] !== 'block') { i++; continue }
1444
+ let bi = 1, label = null
1445
+ if (typeof child[1] === 'string' && child[1][0] === '$') { label = child[1]; bi = 2 }
1446
+ let typed = false
1447
+ for (let j = bi; j < child.length; j++) {
1448
+ const c = child[j]
1449
+ if (Array.isArray(c) && (c[0] === 'param' || c[0] === 'result' || c[0] === 'type')) { typed = true; break }
1450
+ }
1451
+ if (typed) { i++; continue }
1452
+ const body = child.slice(bi)
1453
+ if (label && targetsLabel(body, label)) { i++; continue }
1454
+ node.splice(i, 1, ...body)
1455
+ i += body.length
1456
+ }
1457
+ })
1458
+ return ast
1459
+ }
1460
+
1461
+ // ==================== COALESCE LOCALS ====================
1462
+
1463
+ /**
1464
+ * Share local slots between same-type locals with non-overlapping live ranges.
1465
+ * Live range = [first pos, last pos] of any local.get/set/tee, extended over
1466
+ * any loop containing a reference (so a value read across loop iterations stays
1467
+ * intact). Greedy slot assignment by start position. Params and unnamed/numeric
1468
+ * references are left alone; `localReuse` later removes the renamed-away decls.
1469
+ *
1470
+ * Soundness: WASM zero-initializes locals at function entry, so a local whose
1471
+ * first reference (in walk order) is a `local.get` *relies* on that implicit
1472
+ * zero — coalescing it into a slot whose previous user left a non-zero residue
1473
+ * would silently change behavior (e.g. a `for (let i=0; …)` loop counter
1474
+ * inheriting `N*4` from a sibling temp). Such "read-first" locals can still
1475
+ * serve as a slot's *primary* (the slot then keeps the function's zero start),
1476
+ * but can never be a donor merged into an existing slot.
1477
+ * @param {Array} ast
1478
+ * @returns {Array}
1479
+ */
1480
+ const coalesceLocals = (ast) => {
1481
+ walk(ast, (funcNode) => {
1482
+ if (!Array.isArray(funcNode) || funcNode[0] !== 'func') return
1483
+
1484
+ const decls = new Map()
1485
+ for (const sub of funcNode) {
1486
+ if (Array.isArray(sub) && sub[0] === 'local' &&
1487
+ typeof sub[1] === 'string' && sub[1][0] === '$' && typeof sub[2] === 'string') {
1488
+ decls.set(sub[1], sub[2])
1489
+ }
1490
+ }
1491
+ if (decls.size < 2) return
1492
+
1493
+ const uses = new Map()
1494
+ const loopStack = []
1495
+ let pos = 0, abort = false, condDepth = 0
1496
+
1497
+ const visit = (n) => {
1498
+ if (abort || !Array.isArray(n)) return
1499
+ const op = n[0]
1500
+ const isLoop = op === 'loop'
1501
+ if (isLoop) loopStack.push({ start: pos, end: pos })
1502
+ const isSet = op === 'local.set' || op === 'local.tee'
1503
+
1504
+ if (isSet || op === 'local.get') {
1505
+ const name = n[1]
1506
+ if (typeof name !== 'string' || name[0] !== '$') { abort = true; return }
1507
+ // Execution order: evaluate set/tee value BEFORE recording the write,
1508
+ // so a `(local.set $x (… (local.get $x) …))` is correctly seen as a
1509
+ // read-then-write of $x (firstOp = local.get).
1510
+ if (isSet) for (let i = 2; i < n.length; i++) visit(n[i])
1511
+ const here = pos++
1512
+ if (decls.has(name)) {
1513
+ let u = uses.get(name)
1514
+ if (!u) { u = { start: here, end: here, firstOp: op, firstCond: condDepth > 0, loops: new Set() }; uses.set(name, u) }
1515
+ if (here > u.end) u.end = here
1516
+ for (const ls of loopStack) u.loops.add(ls)
1517
+ }
1518
+ } else {
1519
+ pos++
1520
+ const isIf = op === 'if'
1521
+ for (let i = 1; i < n.length; i++) {
1522
+ const c = n[i]
1523
+ const cond = isIf && Array.isArray(c) && (c[0] === 'then' || c[0] === 'else')
1524
+ if (cond) condDepth++
1525
+ visit(c)
1526
+ if (cond) condDepth--
1527
+ }
1528
+ }
1529
+
1530
+ if (isLoop) { const ls = loopStack.pop(); ls.end = pos }
1531
+ }
1532
+ visit(funcNode)
1533
+ if (abort) return
1534
+
1535
+ // A use inside a loop must stay live for the whole loop — the next
1536
+ // iteration could read what this iteration wrote.
1537
+ for (const u of uses.values()) {
1538
+ for (const ls of u.loops) {
1539
+ if (ls.start < u.start) u.start = ls.start
1540
+ if (ls.end > u.end) u.end = ls.end
1541
+ }
1542
+ }
1543
+
1544
+ const ordered = [...uses.entries()].sort((a, b) => a[1].start - b[1].start)
1545
+ const rename = new Map()
1546
+ const slots = []
1547
+ for (const [name, range] of ordered) {
1548
+ // Read-first locals depend on the implicit zero; locals first seen inside
1549
+ // an if/else branch may be skipped on the alternate path — either way
1550
+ // they'd observe a prior slot's residue if reused. They may *start* a
1551
+ // fresh slot (the function's zero init), but never *join* one.
1552
+ const readsZero = range.firstOp === 'local.get' || range.firstCond
1553
+ const type = decls.get(name)
1554
+ const slot = readsZero ? null : slots.find(s => s.type === type && s.end < range.start)
1555
+ if (slot) { rename.set(name, slot.primary); if (range.end > slot.end) slot.end = range.end }
1556
+ else slots.push({ primary: name, type, end: range.end })
1557
+ }
1558
+ if (rename.size === 0) return
1559
+
1560
+ walk(funcNode, (n) => {
1561
+ if (Array.isArray(n) &&
1562
+ (n[0] === 'local.get' || n[0] === 'local.set' || n[0] === 'local.tee') &&
1563
+ rename.has(n[1])) {
1564
+ n[1] = rename.get(n[1])
1565
+ }
1566
+ })
1567
+ })
1568
+ return ast
1569
+ }
1570
+
1157
1571
  // ==================== VACUUM ====================
1158
1572
 
1159
1573
  /**
@@ -1300,52 +1714,99 @@ const peephole = (ast) => {
1300
1714
 
1301
1715
  // ==================== GLOBAL CONSTANT PROPAGATION ====================
1302
1716
 
1717
+ /** Bytes a signed-LEB128 integer encodes to. */
1718
+ const slebSize = (v) => {
1719
+ let x = typeof v === 'bigint' ? v : BigInt(Math.trunc(Number(v) || 0))
1720
+ let n = 1
1721
+ while (true) {
1722
+ const b = x & 0x7fn
1723
+ x >>= 7n
1724
+ if ((x === 0n && (b & 0x40n) === 0n) || (x === -1n && (b & 0x40n) !== 0n)) return n
1725
+ n++
1726
+ }
1727
+ }
1728
+ /** Encoded byte size of a constant init instruction (opcode + immediate). */
1729
+ const constInstrSize = (node) => {
1730
+ if (!Array.isArray(node)) return 4
1731
+ switch (node[0]) {
1732
+ case 'i32.const': case 'i64.const': return 1 + slebSize(node[1])
1733
+ case 'f32.const': return 5
1734
+ case 'f64.const': return 9
1735
+ case 'v128.const': return 18
1736
+ default: return 4 // ref.null/ref.func/global.get — conservative
1737
+ }
1738
+ }
1739
+ const GLOBAL_GET_SIZE = 2 // 0x23 opcode + 1-byte globalidx (typical)
1740
+
1303
1741
  /**
1304
- * Replace global.get of immutable globals with their constant init values.
1742
+ * Replace `global.get` of an immutable, const-initialised global with the
1743
+ * constant — but only when it doesn't grow the module. A `global.get` costs
1744
+ * ~2 B; an `i32.const 12345` costs 4 B; an `f64.const` costs 9 B. Naively
1745
+ * inlining a big constant read from many sites trades a few cheap reads + one
1746
+ * global decl for many fat immediates — pure bloat (and the node-count size
1747
+ * guard can't see it: same number of AST nodes). So we only propagate a global
1748
+ * when `refs·constSize ≤ refs·2 + declSize`; when every read is replaced and
1749
+ * the global isn't exported, its now-dead decl is dropped here too.
1305
1750
  * @param {Array} ast
1306
1751
  * @returns {Array}
1307
1752
  */
1308
1753
  const globals = (ast) => {
1309
1754
  if (!Array.isArray(ast) || ast[0] !== 'module') return ast
1310
1755
 
1311
- // Find immutable globals with const init
1312
- const constGlobals = new Map() // name → const node
1313
- const mutableGlobals = new Set()
1756
+ // Immutable globals with a constant init: name → init node.
1757
+ const constGlobals = new Map()
1758
+ const exported = new Set() // globals pinned by an export — keep the decl
1314
1759
 
1315
1760
  for (const node of ast.slice(1)) {
1316
- if (!Array.isArray(node) || node[0] !== 'global') continue
1761
+ if (!Array.isArray(node)) continue
1762
+ if (node[0] === 'export' && Array.isArray(node[2]) && node[2][0] === 'global' && typeof node[2][1] === 'string') { exported.add(node[2][1]); continue }
1763
+ if (node[0] !== 'global') continue
1317
1764
  const name = typeof node[1] === 'string' && node[1][0] === '$' ? node[1] : null
1318
1765
  if (!name) continue
1319
-
1320
- // Check mutability: (global $g (mut i32) init) vs (global $g i32 init)
1321
- const hasName = typeof node[1] === 'string' && node[1][0] === '$'
1322
- const initIdx = hasName ? 3 : 2
1323
-
1324
- // Skip mutable globals
1325
- const typeSlot = hasName ? node[2] : node[1]
1326
- if (Array.isArray(typeSlot) && typeSlot[0] === 'mut') continue
1327
-
1328
- const init = node[initIdx]
1766
+ // (global $g (export "x") …) inline export → pinned
1767
+ if (node.some(c => Array.isArray(c) && c[0] === 'export')) exported.add(name)
1768
+ const typeSlot = node[2]
1769
+ if (Array.isArray(typeSlot) && typeSlot[0] === 'mut') continue // mutable
1770
+ if (Array.isArray(typeSlot) && typeSlot[0] === 'import') continue // imported
1771
+ const init = node[3]
1329
1772
  if (getConst(init)) constGlobals.set(name, init)
1330
1773
  }
1774
+ if (constGlobals.size === 0) return ast
1331
1775
 
1332
- // Also mark any global that is ever written as mutable
1776
+ // Drop any global that is ever written (defensive — an immutable global can't
1777
+ // be, but a malformed module might) and tally read counts.
1778
+ const reads = new Map()
1333
1779
  walk(ast, (n) => {
1334
- if (!Array.isArray(n) || n[0] !== 'global.set') return
1780
+ if (!Array.isArray(n)) return
1335
1781
  const ref = n[1]
1336
- if (typeof ref === 'string' && ref[0] === '$') mutableGlobals.add(ref)
1782
+ if (typeof ref !== 'string' || ref[0] !== '$') return
1783
+ if (n[0] === 'global.set') constGlobals.delete(ref)
1784
+ else if (n[0] === 'global.get') reads.set(ref, (reads.get(ref) || 0) + 1)
1337
1785
  })
1338
1786
 
1339
- // Remove mutable ones from propagation set
1340
- for (const name of mutableGlobals) constGlobals.delete(name)
1341
- if (constGlobals.size === 0) return ast
1787
+ // Keep only globals where propagation is size-neutral or better.
1788
+ const propagate = new Set()
1789
+ for (const [name, init] of constGlobals) {
1790
+ const r = reads.get(name) || 0
1791
+ if (r === 0) continue // dead anyway — leave to treeshake
1792
+ const cs = constInstrSize(init)
1793
+ const declSize = cs + 2 // valtype + mutability byte + init expr + `end`
1794
+ const before = r * GLOBAL_GET_SIZE + declSize
1795
+ const after = r * cs + (exported.has(name) ? declSize : 0)
1796
+ if (after <= before) propagate.add(name)
1797
+ }
1798
+ if (propagate.size === 0) return ast
1342
1799
 
1343
- // Substitute global.get with const
1344
- return walkPost(ast, (node) => {
1800
+ walkPost(ast, (node) => {
1345
1801
  if (!Array.isArray(node) || node[0] !== 'global.get' || node.length !== 2) return
1346
- const ref = node[1]
1347
- if (constGlobals.has(ref)) return clone(constGlobals.get(ref))
1802
+ if (propagate.has(node[1])) return clone(constGlobals.get(node[1]))
1348
1803
  })
1804
+ // Their reads are all gone now — remove the decls we're free to remove.
1805
+ for (let i = ast.length - 1; i >= 1; i--) {
1806
+ const n = ast[i]
1807
+ if (Array.isArray(n) && n[0] === 'global' && typeof n[1] === 'string' && propagate.has(n[1]) && !exported.has(n[1])) ast.splice(i, 1)
1808
+ }
1809
+ return ast
1349
1810
  }
1350
1811
 
1351
1812
  // ==================== LOAD/STORE OFFSET FOLDING ====================
@@ -1458,7 +1919,9 @@ const unbranch = (ast) => {
1458
1919
 
1459
1920
  const last = node[lastIdx]
1460
1921
  if (Array.isArray(last) && last[0] === 'br' && last[1] === label) {
1461
- node.splice(lastIdx, 1)
1922
+ // `(br $L v…)` as a block's last instruction just leaves v… as the block's
1923
+ // result — splice the value operand(s) in its place (none → plain removal).
1924
+ node.splice(lastIdx, 1, ...last.slice(2))
1462
1925
  }
1463
1926
  })
1464
1927
 
@@ -2092,9 +2555,15 @@ export default function optimize(ast, opts = true) {
2092
2555
  ast = clone(ast)
2093
2556
  let beforeRound = null
2094
2557
 
2558
+ // Size guard works on encoded bytes, not AST node count: passes like
2559
+ // `globals` / `inlineOnce` are node-count-neutral yet move real bytes
2560
+ // (a `global.get` ↔ a fat `f64.const`; a `call` ↔ an inlined body), so a
2561
+ // node-count guard can't tell when a round bloated — or shrank. `binarySize`
2562
+ // also returns Infinity if a round produced invalid wat, so a broken round
2563
+ // reverts instead of escaping.
2095
2564
  for (let round = 0; round < 3; round++) {
2096
2565
  beforeRound = clone(ast)
2097
- const sizeBefore = count(ast)
2566
+ const sizeBefore = binarySize(ast)
2098
2567
 
2099
2568
  if (opts.stripmut) ast = stripmut(ast)
2100
2569
  if (opts.globals) ast = globals(ast)
@@ -2104,6 +2573,7 @@ export default function optimize(ast, opts = true) {
2104
2573
  if (opts.strength) ast = strength(ast)
2105
2574
  if (opts.branch) ast = branch(ast)
2106
2575
  if (opts.propagate) ast = propagate(ast)
2576
+ if (opts.inlineOnce) ast = inlineOnce(ast)
2107
2577
  if (opts.inline) ast = inline(ast)
2108
2578
  if (opts.offset) ast = offset(ast)
2109
2579
  if (opts.unbranch) ast = unbranch(ast)
@@ -2111,6 +2581,8 @@ export default function optimize(ast, opts = true) {
2111
2581
  if (opts.foldarms) ast = foldarms(ast)
2112
2582
  if (opts.deadcode) ast = deadcode(ast)
2113
2583
  if (opts.vacuum) ast = vacuum(ast)
2584
+ if (opts.mergeBlocks) ast = mergeBlocks(ast)
2585
+ if (opts.coalesce) ast = coalesceLocals(ast)
2114
2586
  if (opts.locals) ast = localReuse(ast)
2115
2587
  if (opts.dedupe) ast = dedupe(ast)
2116
2588
  if (opts.dedupTypes) ast = dedupTypes(ast)
@@ -2118,19 +2590,25 @@ export default function optimize(ast, opts = true) {
2118
2590
  if (opts.reorder) ast = reorder(ast)
2119
2591
  if (opts.treeshake) ast = treeshake(ast)
2120
2592
  if (opts.minifyImports) ast = minifyImports(ast)
2121
-
2122
- const sizeAfter = count(ast)
2593
+ // Second propagate sweep: `inlineOnce`/`inline` (above) leave fresh
2594
+ // `(local.set $p arg) … (local.get $p)` wrappers around each inlined call;
2595
+ // re-running propagation collapses them within this same round, so the size
2596
+ // guard scores the cleaned result instead of waiting a round (which it may
2597
+ // never get if `equal()` declares a fixpoint first).
2598
+ if (opts.propagate && (opts.inlineOnce || opts.inline)) ast = propagate(ast)
2599
+
2600
+ const sizeAfter = binarySize(ast)
2123
2601
  const delta = sizeAfter - sizeBefore
2124
2602
 
2125
2603
  if (verbose || delta !== 0) {
2126
- log(` round ${round + 1}: ${delta > 0 ? '+' : ''}${delta} nodes`, delta)
2604
+ log(` round ${round + 1}: ${delta > 0 ? '+' : ''}${delta} bytes`, delta)
2127
2605
  }
2128
2606
 
2129
- // Size guard: default optimize must never inflate. Explicit passes
2130
- // get leniency (+5 nodes) so inline/propagate/foldarms can chain.
2131
- const tolerance = strictGuard ? 0 : 5
2607
+ // Size guard: default optimize must never inflate. Explicit passes get a
2608
+ // little leniency (a round may grow a few bytes setting up a bigger win).
2609
+ const tolerance = strictGuard ? 0 : 16
2132
2610
  if (delta > tolerance) {
2133
- if (verbose) log(` ⚠ round ${round + 1} inflated by ${delta}, reverting`, delta)
2611
+ if (verbose) log(` ⚠ round ${round + 1} inflated by ${delta} bytes, reverting`, delta)
2134
2612
  ast = beforeRound
2135
2613
  break
2136
2614
  }
@@ -2143,4 +2621,4 @@ export default function optimize(ast, opts = true) {
2143
2621
 
2144
2622
  /** Count AST nodes (fast size heuristic). */
2145
2623
  export { count as size, count, binarySize }
2146
- export { optimize, treeshake, fold, deadcode, localReuse, identity, strength, branch, propagate, inline, normalize, OPTS, vacuum, peephole, globals, offset, unbranch, stripmut, brif, foldarms, dedupe, reorder, dedupTypes, packData, minifyImports }
2624
+ export { optimize, treeshake, fold, deadcode, localReuse, identity, strength, branch, propagate, inline, inlineOnce, normalize, OPTS, vacuum, peephole, globals, offset, unbranch, stripmut, brif, foldarms, dedupe, reorder, dedupTypes, packData, minifyImports, mergeBlocks, coalesceLocals }