watr 4.5.3 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/watr.js +441 -43
- package/dist/watr.min.js +6 -6
- package/dist/watr.wasm +0 -0
- package/package.json +8 -3
- package/readme.md +1 -1
- package/src/optimize.js +527 -49
- package/types/src/optimize.d.ts +68 -6
- package/types/src/optimize.d.ts.map +1 -1
package/src/optimize.js
CHANGED
|
@@ -18,9 +18,12 @@ const OPTS = {
|
|
|
18
18
|
identity: true, // remove identity ops (x + 0 → x)
|
|
19
19
|
strength: true, // strength reduction (x * 2 → x << 1)
|
|
20
20
|
branch: true, // simplify constant branches
|
|
21
|
-
propagate:
|
|
21
|
+
propagate: true, // forward-propagate single-use locals & tiny consts (never inflates)
|
|
22
22
|
inline: false, // inline tiny functions — can duplicate bodies
|
|
23
|
+
inlineOnce: true, // inline single-call functions into their lone caller (never duplicates)
|
|
23
24
|
vacuum: true, // remove nops, drop-of-pure, empty branches
|
|
25
|
+
mergeBlocks: true, // unwrap `(block $L …)` whose label is never targeted
|
|
26
|
+
coalesce: true, // share local slots between same-type non-overlapping locals
|
|
24
27
|
peephole: true, // x-x→0, x&0→0, etc.
|
|
25
28
|
globals: true, // propagate immutable global constants
|
|
26
29
|
offset: true, // fold add+const into load/store offset
|
|
@@ -855,8 +858,33 @@ const countLocalUses = (node) => {
|
|
|
855
858
|
return counts
|
|
856
859
|
}
|
|
857
860
|
|
|
858
|
-
/**
|
|
859
|
-
|
|
861
|
+
/** A constant whose inlined form (opcode + immediate) is no wider than the ~2 B
|
|
862
|
+
* `local.get` it would replace — so propagating it to every use is byte-neutral
|
|
863
|
+
* at worst, and still drops the `local.set` + the `local` decl. f32/f64 consts
|
|
864
|
+
* (5/9 B) lose on reuse, so only narrow i32/i64 literals qualify. */
|
|
865
|
+
const isTinyConst = (node) => {
|
|
866
|
+
const c = getConst(node)
|
|
867
|
+
if (!c) return false
|
|
868
|
+
if (c.type === 'i32') { const v = c.value | 0; return v >= -64 && v <= 63 }
|
|
869
|
+
if (c.type === 'i64') { const v = typeof c.value === 'bigint' ? c.value : BigInt(c.value); return v >= -64n && v <= 63n }
|
|
870
|
+
return false
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
/** Can this tracked value be substituted for a local.get?
|
|
874
|
+
* - single use of a pure value: always shrinks (drops the set, the lone get, the decl);
|
|
875
|
+
* - any use of a tiny constant: byte-neutral at worst, still drops the set + decl.
|
|
876
|
+
* Anything else (a wide constant reused many times, an impure expr) could inflate
|
|
877
|
+
* or reorder side effects, so it's left alone. */
|
|
878
|
+
const canSubst = (k) => (k.pure && k.singleUse) || isTinyConst(k.val)
|
|
879
|
+
|
|
880
|
+
/** Drop tracked values that read `$name`: rewriting `$name` makes them stale. */
|
|
881
|
+
const purgeRefs = (known, name) => {
|
|
882
|
+
for (const [key, tracked] of known) {
|
|
883
|
+
let refs = false
|
|
884
|
+
walk(tracked.val, n => { if (Array.isArray(n) && (n[0] === 'local.get' || n[0] === 'local.tee') && n[1] === name) refs = true })
|
|
885
|
+
if (refs) known.delete(key)
|
|
886
|
+
}
|
|
887
|
+
}
|
|
860
888
|
|
|
861
889
|
/** Try substitute local.get nodes with known values */
|
|
862
890
|
const substGets = (node, known) => walkPost(node, n => {
|
|
@@ -889,6 +917,7 @@ const forwardPropagate = (funcNode, params, useCounts) => {
|
|
|
889
917
|
if ((op === 'local.set' || op === 'local.tee') && instr.length === 3 && typeof instr[1] === 'string') {
|
|
890
918
|
substGets(instr[2], known) // substitute known values in RHS
|
|
891
919
|
const uses = getUseCount(instr[1])
|
|
920
|
+
purgeRefs(known, instr[1]) // entries that read this local just went stale
|
|
892
921
|
known.set(instr[1], {
|
|
893
922
|
val: instr[2], pure: isPure(instr[2]),
|
|
894
923
|
singleUse: uses.gets <= 1 && uses.sets <= 1 && uses.tees === 0
|
|
@@ -923,7 +952,7 @@ const forwardPropagate = (funcNode, params, useCounts) => {
|
|
|
923
952
|
// (untracked) value, not the stale constant.
|
|
924
953
|
walk(instr, n => {
|
|
925
954
|
if (Array.isArray(n) && (n[0] === 'local.set' || n[0] === 'local.tee') && typeof n[1] === 'string')
|
|
926
|
-
known.delete(n[1])
|
|
955
|
+
{ known.delete(n[1]); purgeRefs(known, n[1]) }
|
|
927
956
|
})
|
|
928
957
|
}
|
|
929
958
|
}
|
|
@@ -1024,6 +1053,10 @@ const eliminateDeadStores = (funcNode, params, useCounts) => {
|
|
|
1024
1053
|
* Constants propagate to all uses; pure single-use exprs inline into get site.
|
|
1025
1054
|
* Multi-pass with batch counting for convergence.
|
|
1026
1055
|
*/
|
|
1056
|
+
/** Block-like nodes whose body is a straight-line instruction list (after any header). */
|
|
1057
|
+
const isScopeNode = (n) => Array.isArray(n) &&
|
|
1058
|
+
(n[0] === 'func' || n[0] === 'block' || n[0] === 'loop' || n[0] === 'then' || n[0] === 'else')
|
|
1059
|
+
|
|
1027
1060
|
const propagate = (ast) => {
|
|
1028
1061
|
walk(ast, (funcNode) => {
|
|
1029
1062
|
if (!Array.isArray(funcNode) || funcNode[0] !== 'func') return
|
|
@@ -1032,16 +1065,30 @@ const propagate = (ast) => {
|
|
|
1032
1065
|
for (const sub of funcNode)
|
|
1033
1066
|
if (Array.isArray(sub) && sub[0] === 'param' && typeof sub[1] === 'string') params.add(sub[1])
|
|
1034
1067
|
|
|
1035
|
-
//
|
|
1036
|
-
// (
|
|
1037
|
-
//
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1068
|
+
// Propagation runs per straight-line scope: the function body and every nested
|
|
1069
|
+
// `block`/`loop`/`then`/`else` (including ones embedded in an expression, e.g. the
|
|
1070
|
+
// `(block (result i32) …)` an inlined call leaves behind). Collect scopes deepest-
|
|
1071
|
+
// first so inner simplifications shrink the use-counts the outer scopes see.
|
|
1072
|
+
// Use-counts are always whole-function — a set/get pair or dead store is only
|
|
1073
|
+
// touched when it's globally the sole occurrence, so per-scope work stays sound.
|
|
1074
|
+
const scopes = []
|
|
1075
|
+
walkPost(funcNode, n => { if (isScopeNode(n)) scopes.push(n) })
|
|
1076
|
+
|
|
1077
|
+
// One use-count per round, shared by every scope: substitutions only ever
|
|
1078
|
+
// *drop* gets, so a stale count can only make a sub-pass act more cautiously
|
|
1079
|
+
// (skip a not-yet-provably-dead store, decline a not-yet-provably-single use) —
|
|
1080
|
+
// never wrongly. The next round re-counts and mops up. (Recounting per sub-pass
|
|
1081
|
+
// per scope is O(scopes·funcSize) and crippling on big modules.)
|
|
1082
|
+
for (let round = 0; round < 6; round++) {
|
|
1083
|
+
const useCounts = countLocalUses(funcNode)
|
|
1084
|
+
let progressed = false
|
|
1085
|
+
for (const scope of scopes) {
|
|
1086
|
+
if (forwardPropagate(scope, params, useCounts)) progressed = true
|
|
1087
|
+
if (eliminateSetGetPairs(scope, params, useCounts)) progressed = true
|
|
1088
|
+
if (createLocalTees(scope, params, useCounts)) progressed = true
|
|
1089
|
+
if (eliminateDeadStores(scope, params, useCounts)) progressed = true
|
|
1090
|
+
}
|
|
1091
|
+
if (!progressed) break
|
|
1045
1092
|
}
|
|
1046
1093
|
})
|
|
1047
1094
|
|
|
@@ -1154,6 +1201,373 @@ const inline = (ast) => {
|
|
|
1154
1201
|
return ast
|
|
1155
1202
|
}
|
|
1156
1203
|
|
|
1204
|
+
// ==================== INLINE-ONCE ====================
|
|
1205
|
+
|
|
1206
|
+
let inlineUid = 0
|
|
1207
|
+
|
|
1208
|
+
/**
|
|
1209
|
+
* Inline functions that are called from exactly one place into their lone caller,
|
|
1210
|
+
* then delete them. Unlike {@link inline} (which duplicates tiny stateless bodies),
|
|
1211
|
+
* this never duplicates code and never inflates: each inlined function drops a
|
|
1212
|
+
* function-section entry, a type-section entry (if now unused), and a `call`
|
|
1213
|
+
* instruction, paying back only a `block`/`local.set` wrapper. This is what
|
|
1214
|
+
* `wasm-opt -Oz` does — collapsing helper chains down to a couple of functions —
|
|
1215
|
+
* and it's the bulk of the gap between hand-tuned WASM and naive codegen.
|
|
1216
|
+
*
|
|
1217
|
+
* A function `$f` qualifies when it is, all of:
|
|
1218
|
+
* • named, with named params and locals (numeric indices can't be safely renamed);
|
|
1219
|
+
* • referenced exactly once across the whole module, by a plain `call` (no
|
|
1220
|
+
* `return_call`, `ref.func`, `elem`, `export`, or `start` reference, and not
|
|
1221
|
+
* recursive);
|
|
1222
|
+
* • single-result or void (a multi-value result can't be modeled as `(block (result …))`);
|
|
1223
|
+
* • free of numeric (depth-relative) branch labels — those would shift under the
|
|
1224
|
+
* extra block nesting — and of `return_call*` in its body.
|
|
1225
|
+
*
|
|
1226
|
+
* `(call $f a0 a1 …)` becomes
|
|
1227
|
+
* (block $__inlN (result T)?
|
|
1228
|
+
* (local.set $__inlN_p0 a0) (local.set $__inlN_p1 a1) … ;; args evaluated once, in order
|
|
1229
|
+
* …body, params/locals renamed to $__inlN_*, `return X` → `br $__inlN X`…)
|
|
1230
|
+
* and the renamed params+locals are appended to the caller's `local` decls; the
|
|
1231
|
+
* body's own block/loop/if labels are renamed too so they can't shadow the caller's.
|
|
1232
|
+
* Runs to a fixpoint so helper chains fully collapse.
|
|
1233
|
+
*
|
|
1234
|
+
* @param {Array} ast
|
|
1235
|
+
* @returns {Array}
|
|
1236
|
+
*/
|
|
1237
|
+
const inlineOnce = (ast) => {
|
|
1238
|
+
if (!Array.isArray(ast) || ast[0] !== 'module') return ast
|
|
1239
|
+
|
|
1240
|
+
const HEAD = new Set(['export', 'type', 'param', 'result', 'local'])
|
|
1241
|
+
const bodyStart = (fn) => {
|
|
1242
|
+
let i = 2
|
|
1243
|
+
while (i < fn.length && (typeof fn[i] === 'string' || (Array.isArray(fn[i]) && HEAD.has(fn[i][0])))) i++
|
|
1244
|
+
return i
|
|
1245
|
+
}
|
|
1246
|
+
const isBranch = op => op === 'br' || op === 'br_if' || op === 'br_table'
|
|
1247
|
+
// A subtree we can't lift into a (block …): depth-relative branch labels (shift
|
|
1248
|
+
// under added nesting) or tail calls (would escape the wrapping block).
|
|
1249
|
+
const unsafe = (n) => {
|
|
1250
|
+
if (!Array.isArray(n)) return false
|
|
1251
|
+
const op = n[0]
|
|
1252
|
+
if (op === 'return_call' || op === 'return_call_indirect' || op === 'return_call_ref') return true
|
|
1253
|
+
if (op === 'try' || op === 'try_table' || op === 'delegate' || op === 'rethrow') return true // exception labels — not handled by the relabeler below
|
|
1254
|
+
if (isBranch(op)) for (let i = 1; i < n.length; i++) if (typeof n[i] === 'number' || (typeof n[i] === 'string' && /^\d+$/.test(n[i]))) return true
|
|
1255
|
+
for (let i = 1; i < n.length; i++) if (unsafe(n[i])) return true
|
|
1256
|
+
return false
|
|
1257
|
+
}
|
|
1258
|
+
const callsSelf = (n, name) => {
|
|
1259
|
+
if (!Array.isArray(n)) return false
|
|
1260
|
+
if ((n[0] === 'call' || n[0] === 'return_call') && n[1] === name) return true
|
|
1261
|
+
for (let i = 1; i < n.length; i++) if (callsSelf(n[i], name)) return true
|
|
1262
|
+
return false
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
// Module-level references that pin a function (can't be removed/inlined-away).
|
|
1266
|
+
const collectPinned = (n, pinned) => {
|
|
1267
|
+
if (!Array.isArray(n)) return
|
|
1268
|
+
const op = n[0]
|
|
1269
|
+
if (op === 'export' && Array.isArray(n[2]) && n[2][0] === 'func' && typeof n[2][1] === 'string') pinned.add(n[2][1])
|
|
1270
|
+
else if (op === 'start' && typeof n[1] === 'string') pinned.add(n[1])
|
|
1271
|
+
else if (op === 'ref.func' && typeof n[1] === 'string') pinned.add(n[1])
|
|
1272
|
+
else if (op === 'elem') for (const c of n) if (typeof c === 'string' && c[0] === '$') pinned.add(c)
|
|
1273
|
+
for (const c of n) collectPinned(c, pinned)
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
for (let round = 0; round < 16; round++) {
|
|
1277
|
+
const funcs = ast.filter(n => Array.isArray(n) && n[0] === 'func')
|
|
1278
|
+
const funcByName = new Map()
|
|
1279
|
+
for (const n of funcs) if (typeof n[1] === 'string') funcByName.set(n[1], n)
|
|
1280
|
+
|
|
1281
|
+
// Count plain-call references across the WHOLE module (anonymous exported funcs
|
|
1282
|
+
// call helpers too); flag any non-call reference (return_call etc.).
|
|
1283
|
+
const callRefs = new Map(), otherRef = new Set()
|
|
1284
|
+
const countRefs = (n) => {
|
|
1285
|
+
if (!Array.isArray(n)) return
|
|
1286
|
+
const op = n[0]
|
|
1287
|
+
if (op === 'call' && typeof n[1] === 'string') callRefs.set(n[1], (callRefs.get(n[1]) || 0) + 1)
|
|
1288
|
+
else if (op === 'return_call' && typeof n[1] === 'string') otherRef.add(n[1])
|
|
1289
|
+
for (let i = 1; i < n.length; i++) countRefs(n[i])
|
|
1290
|
+
}
|
|
1291
|
+
countRefs(ast)
|
|
1292
|
+
const pinned = new Set()
|
|
1293
|
+
for (const n of ast) if (!Array.isArray(n) || n[0] !== 'func') collectPinned(n, pinned)
|
|
1294
|
+
// a func may carry its own (export "name") — the signature scan below rejects those too
|
|
1295
|
+
|
|
1296
|
+
// Pick a callee.
|
|
1297
|
+
let calleeName = null
|
|
1298
|
+
for (const [name, fn] of funcByName) {
|
|
1299
|
+
if (pinned.has(name) || otherRef.has(name)) continue
|
|
1300
|
+
if (callRefs.get(name) !== 1) continue
|
|
1301
|
+
if (callsSelf(fn, name)) continue
|
|
1302
|
+
// named params/locals only; collect signature
|
|
1303
|
+
let ok = true, nResult = 0
|
|
1304
|
+
for (let i = 2; i < fn.length; i++) {
|
|
1305
|
+
const c = fn[i]
|
|
1306
|
+
if (typeof c === 'string') continue
|
|
1307
|
+
if (!Array.isArray(c)) { ok = false; break }
|
|
1308
|
+
if (c[0] === 'param' || c[0] === 'local') { if (typeof c[1] !== 'string' || c[1][0] !== '$') { ok = false; break } }
|
|
1309
|
+
else if (c[0] === 'result') nResult += c.length - 1
|
|
1310
|
+
else if (c[0] === 'export') { ok = false; break }
|
|
1311
|
+
else if (c[0] === 'type') continue
|
|
1312
|
+
else break
|
|
1313
|
+
}
|
|
1314
|
+
if (!ok || nResult > 1) continue
|
|
1315
|
+
let bad = false
|
|
1316
|
+
for (let i = bodyStart(fn); i < fn.length; i++) if (unsafe(fn[i])) { bad = true; break }
|
|
1317
|
+
if (bad) continue
|
|
1318
|
+
calleeName = name; break
|
|
1319
|
+
}
|
|
1320
|
+
if (!calleeName) break
|
|
1321
|
+
|
|
1322
|
+
const callee = funcByName.get(calleeName)
|
|
1323
|
+
const params = [], locals = []
|
|
1324
|
+
let resultType = null
|
|
1325
|
+
for (let i = 2; i < callee.length; i++) {
|
|
1326
|
+
const c = callee[i]
|
|
1327
|
+
if (typeof c === 'string' || !Array.isArray(c)) continue
|
|
1328
|
+
if (c[0] === 'param') params.push({ name: c[1], type: c[2] })
|
|
1329
|
+
else if (c[0] === 'result') { if (c.length > 1) resultType = c[1] }
|
|
1330
|
+
else if (c[0] === 'local') locals.push({ name: c[1], type: c[2] })
|
|
1331
|
+
else if (c[0] === 'export' || c[0] === 'type') continue
|
|
1332
|
+
else break
|
|
1333
|
+
}
|
|
1334
|
+
const cBody = callee.slice(bodyStart(callee))
|
|
1335
|
+
|
|
1336
|
+
const uid = ++inlineUid
|
|
1337
|
+
const exit = `$__inl${uid}`
|
|
1338
|
+
const rename = new Map()
|
|
1339
|
+
for (const p of params) rename.set(p.name, `$__inl${uid}_${p.name.slice(1)}`)
|
|
1340
|
+
for (const l of locals) rename.set(l.name, `$__inl${uid}_${l.name.slice(1)}`)
|
|
1341
|
+
// The callee's own block/loop/if labels would shadow same-named labels in the
|
|
1342
|
+
// caller after nesting (and break depth resolution) — give them fresh names too.
|
|
1343
|
+
const isBlockLabel = op => op === 'block' || op === 'loop' || op === 'if'
|
|
1344
|
+
const labelRename = new Map()
|
|
1345
|
+
const collectLabels = (n) => {
|
|
1346
|
+
if (!Array.isArray(n)) return
|
|
1347
|
+
if (isBlockLabel(n[0]) && typeof n[1] === 'string' && n[1][0] === '$' && !labelRename.has(n[1]))
|
|
1348
|
+
labelRename.set(n[1], `$__inl${uid}L_${n[1].slice(1)}`)
|
|
1349
|
+
for (let i = 1; i < n.length; i++) collectLabels(n[i])
|
|
1350
|
+
}
|
|
1351
|
+
for (const n of cBody) collectLabels(n)
|
|
1352
|
+
const sub = (n) => {
|
|
1353
|
+
if (!Array.isArray(n)) return n
|
|
1354
|
+
const op = n[0]
|
|
1355
|
+
if ((op === 'local.get' || op === 'local.set' || op === 'local.tee') && typeof n[1] === 'string' && rename.has(n[1]))
|
|
1356
|
+
return [op, rename.get(n[1]), ...n.slice(2).map(sub)]
|
|
1357
|
+
if (op === 'return') return ['br', exit, ...n.slice(1).map(sub)]
|
|
1358
|
+
if (isBlockLabel(op) && typeof n[1] === 'string' && labelRename.has(n[1]))
|
|
1359
|
+
return [op, labelRename.get(n[1]), ...n.slice(2).map(sub)]
|
|
1360
|
+
if (isBranch(op)) return [op, ...n.slice(1).map(c => (typeof c === 'string' && labelRename.has(c)) ? labelRename.get(c) : sub(c))]
|
|
1361
|
+
return n.map((c, i) => i === 0 ? c : sub(c))
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
// Splice into the (unique) caller (which may be an anonymous exported func).
|
|
1365
|
+
let done = false
|
|
1366
|
+
for (const fn of funcs) {
|
|
1367
|
+
if (fn === callee || done) continue
|
|
1368
|
+
const start = bodyStart(fn)
|
|
1369
|
+
for (let i = start; i < fn.length; i++) {
|
|
1370
|
+
const replaced = walkPost(fn[i], (n) => {
|
|
1371
|
+
if (done || !Array.isArray(n) || n[0] !== 'call' || n[1] !== calleeName) return
|
|
1372
|
+
const args = n.slice(2)
|
|
1373
|
+
if (args.length !== params.length) return // arity mismatch — leave it
|
|
1374
|
+
const setup = params.map((p, k) => ['local.set', rename.get(p.name), args[k]])
|
|
1375
|
+
const inner = cBody.map(sub)
|
|
1376
|
+
done = true
|
|
1377
|
+
return resultType
|
|
1378
|
+
? ['block', exit, ['result', resultType], ...setup, ...inner]
|
|
1379
|
+
: ['block', exit, ...setup, ...inner]
|
|
1380
|
+
})
|
|
1381
|
+
if (replaced !== fn[i]) fn[i] = replaced
|
|
1382
|
+
if (done) {
|
|
1383
|
+
const decls = [...params, ...locals].map(p => ['local', rename.get(p.name), p.type])
|
|
1384
|
+
if (decls.length) fn.splice(bodyStart(fn), 0, ...decls)
|
|
1385
|
+
break
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
if (done) break
|
|
1389
|
+
}
|
|
1390
|
+
if (!done) break // call site not found inside a func body — give up
|
|
1391
|
+
|
|
1392
|
+
const idx = ast.indexOf(callee)
|
|
1393
|
+
if (idx >= 0) ast.splice(idx, 1)
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1396
|
+
return ast
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
// ==================== MERGE BLOCKS ====================
|
|
1400
|
+
|
|
1401
|
+
/**
|
|
1402
|
+
* Does `body` contain a branch instruction targeting `label`, ignoring inner
|
|
1403
|
+
* blocks/loops that re-bind the same label?
|
|
1404
|
+
*/
|
|
1405
|
+
const targetsLabel = (body, label) => {
|
|
1406
|
+
let found = false
|
|
1407
|
+
const search = (n, shadowed) => {
|
|
1408
|
+
if (found || !Array.isArray(n)) return
|
|
1409
|
+
const op = n[0]
|
|
1410
|
+
let inner = shadowed
|
|
1411
|
+
if ((op === 'block' || op === 'loop') && typeof n[1] === 'string' && n[1] === label) inner = true
|
|
1412
|
+
if (!shadowed) {
|
|
1413
|
+
if (op === 'br' || op === 'br_if' || op === 'br_on_null' || op === 'br_on_non_null' ||
|
|
1414
|
+
op === 'br_on_cast' || op === 'br_on_cast_fail') {
|
|
1415
|
+
if (n[1] === label) { found = true; return }
|
|
1416
|
+
} else if (op === 'br_table') {
|
|
1417
|
+
for (let j = 1; j < n.length; j++) {
|
|
1418
|
+
if (typeof n[j] === 'string') { if (n[j] === label) { found = true; return } }
|
|
1419
|
+
else break
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1423
|
+
for (let i = 1; i < n.length; i++) search(n[i], inner)
|
|
1424
|
+
}
|
|
1425
|
+
for (const node of body) search(node, false)
|
|
1426
|
+
return found
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
/**
|
|
1430
|
+
* Unwrap redundant `(block $L body)` whose label is never targeted, splicing
|
|
1431
|
+
* the body into the surrounding scope. Skips blocks with `param`/`result`/`type`
|
|
1432
|
+
* annotations (their stack effect would change). Each unwrap drops the
|
|
1433
|
+
* `block`+`end` framing bytes; iterates by walk so chained blocks collapse.
|
|
1434
|
+
* @param {Array} ast
|
|
1435
|
+
* @returns {Array}
|
|
1436
|
+
*/
|
|
1437
|
+
const mergeBlocks = (ast) => {
|
|
1438
|
+
walk(ast, (node) => {
|
|
1439
|
+
if (!isScopeNode(node)) return
|
|
1440
|
+
let i = 1
|
|
1441
|
+
while (i < node.length) {
|
|
1442
|
+
const child = node[i]
|
|
1443
|
+
if (!Array.isArray(child) || child[0] !== 'block') { i++; continue }
|
|
1444
|
+
let bi = 1, label = null
|
|
1445
|
+
if (typeof child[1] === 'string' && child[1][0] === '$') { label = child[1]; bi = 2 }
|
|
1446
|
+
let typed = false
|
|
1447
|
+
for (let j = bi; j < child.length; j++) {
|
|
1448
|
+
const c = child[j]
|
|
1449
|
+
if (Array.isArray(c) && (c[0] === 'param' || c[0] === 'result' || c[0] === 'type')) { typed = true; break }
|
|
1450
|
+
}
|
|
1451
|
+
if (typed) { i++; continue }
|
|
1452
|
+
const body = child.slice(bi)
|
|
1453
|
+
if (label && targetsLabel(body, label)) { i++; continue }
|
|
1454
|
+
node.splice(i, 1, ...body)
|
|
1455
|
+
i += body.length
|
|
1456
|
+
}
|
|
1457
|
+
})
|
|
1458
|
+
return ast
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
// ==================== COALESCE LOCALS ====================
|
|
1462
|
+
|
|
1463
|
+
/**
|
|
1464
|
+
* Share local slots between same-type locals with non-overlapping live ranges.
|
|
1465
|
+
* Live range = [first pos, last pos] of any local.get/set/tee, extended over
|
|
1466
|
+
* any loop containing a reference (so a value read across loop iterations stays
|
|
1467
|
+
* intact). Greedy slot assignment by start position. Params and unnamed/numeric
|
|
1468
|
+
* references are left alone; `localReuse` later removes the renamed-away decls.
|
|
1469
|
+
*
|
|
1470
|
+
* Soundness: WASM zero-initializes locals at function entry, so a local whose
|
|
1471
|
+
* first reference (in walk order) is a `local.get` *relies* on that implicit
|
|
1472
|
+
* zero — coalescing it into a slot whose previous user left a non-zero residue
|
|
1473
|
+
* would silently change behavior (e.g. a `for (let i=0; …)` loop counter
|
|
1474
|
+
* inheriting `N*4` from a sibling temp). Such "read-first" locals can still
|
|
1475
|
+
* serve as a slot's *primary* (the slot then keeps the function's zero start),
|
|
1476
|
+
* but can never be a donor merged into an existing slot.
|
|
1477
|
+
* @param {Array} ast
|
|
1478
|
+
* @returns {Array}
|
|
1479
|
+
*/
|
|
1480
|
+
const coalesceLocals = (ast) => {
|
|
1481
|
+
walk(ast, (funcNode) => {
|
|
1482
|
+
if (!Array.isArray(funcNode) || funcNode[0] !== 'func') return
|
|
1483
|
+
|
|
1484
|
+
const decls = new Map()
|
|
1485
|
+
for (const sub of funcNode) {
|
|
1486
|
+
if (Array.isArray(sub) && sub[0] === 'local' &&
|
|
1487
|
+
typeof sub[1] === 'string' && sub[1][0] === '$' && typeof sub[2] === 'string') {
|
|
1488
|
+
decls.set(sub[1], sub[2])
|
|
1489
|
+
}
|
|
1490
|
+
}
|
|
1491
|
+
if (decls.size < 2) return
|
|
1492
|
+
|
|
1493
|
+
const uses = new Map()
|
|
1494
|
+
const loopStack = []
|
|
1495
|
+
let pos = 0, abort = false, condDepth = 0
|
|
1496
|
+
|
|
1497
|
+
const visit = (n) => {
|
|
1498
|
+
if (abort || !Array.isArray(n)) return
|
|
1499
|
+
const op = n[0]
|
|
1500
|
+
const isLoop = op === 'loop'
|
|
1501
|
+
if (isLoop) loopStack.push({ start: pos, end: pos })
|
|
1502
|
+
const isSet = op === 'local.set' || op === 'local.tee'
|
|
1503
|
+
|
|
1504
|
+
if (isSet || op === 'local.get') {
|
|
1505
|
+
const name = n[1]
|
|
1506
|
+
if (typeof name !== 'string' || name[0] !== '$') { abort = true; return }
|
|
1507
|
+
// Execution order: evaluate set/tee value BEFORE recording the write,
|
|
1508
|
+
// so a `(local.set $x (… (local.get $x) …))` is correctly seen as a
|
|
1509
|
+
// read-then-write of $x (firstOp = local.get).
|
|
1510
|
+
if (isSet) for (let i = 2; i < n.length; i++) visit(n[i])
|
|
1511
|
+
const here = pos++
|
|
1512
|
+
if (decls.has(name)) {
|
|
1513
|
+
let u = uses.get(name)
|
|
1514
|
+
if (!u) { u = { start: here, end: here, firstOp: op, firstCond: condDepth > 0, loops: new Set() }; uses.set(name, u) }
|
|
1515
|
+
if (here > u.end) u.end = here
|
|
1516
|
+
for (const ls of loopStack) u.loops.add(ls)
|
|
1517
|
+
}
|
|
1518
|
+
} else {
|
|
1519
|
+
pos++
|
|
1520
|
+
const isIf = op === 'if'
|
|
1521
|
+
for (let i = 1; i < n.length; i++) {
|
|
1522
|
+
const c = n[i]
|
|
1523
|
+
const cond = isIf && Array.isArray(c) && (c[0] === 'then' || c[0] === 'else')
|
|
1524
|
+
if (cond) condDepth++
|
|
1525
|
+
visit(c)
|
|
1526
|
+
if (cond) condDepth--
|
|
1527
|
+
}
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
if (isLoop) { const ls = loopStack.pop(); ls.end = pos }
|
|
1531
|
+
}
|
|
1532
|
+
visit(funcNode)
|
|
1533
|
+
if (abort) return
|
|
1534
|
+
|
|
1535
|
+
// A use inside a loop must stay live for the whole loop — the next
|
|
1536
|
+
// iteration could read what this iteration wrote.
|
|
1537
|
+
for (const u of uses.values()) {
|
|
1538
|
+
for (const ls of u.loops) {
|
|
1539
|
+
if (ls.start < u.start) u.start = ls.start
|
|
1540
|
+
if (ls.end > u.end) u.end = ls.end
|
|
1541
|
+
}
|
|
1542
|
+
}
|
|
1543
|
+
|
|
1544
|
+
const ordered = [...uses.entries()].sort((a, b) => a[1].start - b[1].start)
|
|
1545
|
+
const rename = new Map()
|
|
1546
|
+
const slots = []
|
|
1547
|
+
for (const [name, range] of ordered) {
|
|
1548
|
+
// Read-first locals depend on the implicit zero; locals first seen inside
|
|
1549
|
+
// an if/else branch may be skipped on the alternate path — either way
|
|
1550
|
+
// they'd observe a prior slot's residue if reused. They may *start* a
|
|
1551
|
+
// fresh slot (the function's zero init), but never *join* one.
|
|
1552
|
+
const readsZero = range.firstOp === 'local.get' || range.firstCond
|
|
1553
|
+
const type = decls.get(name)
|
|
1554
|
+
const slot = readsZero ? null : slots.find(s => s.type === type && s.end < range.start)
|
|
1555
|
+
if (slot) { rename.set(name, slot.primary); if (range.end > slot.end) slot.end = range.end }
|
|
1556
|
+
else slots.push({ primary: name, type, end: range.end })
|
|
1557
|
+
}
|
|
1558
|
+
if (rename.size === 0) return
|
|
1559
|
+
|
|
1560
|
+
walk(funcNode, (n) => {
|
|
1561
|
+
if (Array.isArray(n) &&
|
|
1562
|
+
(n[0] === 'local.get' || n[0] === 'local.set' || n[0] === 'local.tee') &&
|
|
1563
|
+
rename.has(n[1])) {
|
|
1564
|
+
n[1] = rename.get(n[1])
|
|
1565
|
+
}
|
|
1566
|
+
})
|
|
1567
|
+
})
|
|
1568
|
+
return ast
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1157
1571
|
// ==================== VACUUM ====================
|
|
1158
1572
|
|
|
1159
1573
|
/**
|
|
@@ -1300,52 +1714,99 @@ const peephole = (ast) => {
|
|
|
1300
1714
|
|
|
1301
1715
|
// ==================== GLOBAL CONSTANT PROPAGATION ====================
|
|
1302
1716
|
|
|
1717
|
+
/** Bytes a signed-LEB128 integer encodes to. */
|
|
1718
|
+
const slebSize = (v) => {
|
|
1719
|
+
let x = typeof v === 'bigint' ? v : BigInt(Math.trunc(Number(v) || 0))
|
|
1720
|
+
let n = 1
|
|
1721
|
+
while (true) {
|
|
1722
|
+
const b = x & 0x7fn
|
|
1723
|
+
x >>= 7n
|
|
1724
|
+
if ((x === 0n && (b & 0x40n) === 0n) || (x === -1n && (b & 0x40n) !== 0n)) return n
|
|
1725
|
+
n++
|
|
1726
|
+
}
|
|
1727
|
+
}
|
|
1728
|
+
/** Encoded byte size of a constant init instruction (opcode + immediate). */
|
|
1729
|
+
const constInstrSize = (node) => {
|
|
1730
|
+
if (!Array.isArray(node)) return 4
|
|
1731
|
+
switch (node[0]) {
|
|
1732
|
+
case 'i32.const': case 'i64.const': return 1 + slebSize(node[1])
|
|
1733
|
+
case 'f32.const': return 5
|
|
1734
|
+
case 'f64.const': return 9
|
|
1735
|
+
case 'v128.const': return 18
|
|
1736
|
+
default: return 4 // ref.null/ref.func/global.get — conservative
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
const GLOBAL_GET_SIZE = 2 // 0x23 opcode + 1-byte globalidx (typical)
|
|
1740
|
+
|
|
1303
1741
|
/**
|
|
1304
|
-
* Replace global.get of immutable
|
|
1742
|
+
* Replace `global.get` of an immutable, const-initialised global with the
|
|
1743
|
+
* constant — but only when it doesn't grow the module. A `global.get` costs
|
|
1744
|
+
* ~2 B; an `i32.const 12345` costs 4 B; an `f64.const` costs 9 B. Naively
|
|
1745
|
+
* inlining a big constant read from many sites trades a few cheap reads + one
|
|
1746
|
+
* global decl for many fat immediates — pure bloat (and the node-count size
|
|
1747
|
+
* guard can't see it: same number of AST nodes). So we only propagate a global
|
|
1748
|
+
* when `refs·constSize ≤ refs·2 + declSize`; when every read is replaced and
|
|
1749
|
+
* the global isn't exported, its now-dead decl is dropped here too.
|
|
1305
1750
|
* @param {Array} ast
|
|
1306
1751
|
* @returns {Array}
|
|
1307
1752
|
*/
|
|
1308
1753
|
const globals = (ast) => {
|
|
1309
1754
|
if (!Array.isArray(ast) || ast[0] !== 'module') return ast
|
|
1310
1755
|
|
|
1311
|
-
//
|
|
1312
|
-
const constGlobals = new Map()
|
|
1313
|
-
const
|
|
1756
|
+
// Immutable globals with a constant init: name → init node.
|
|
1757
|
+
const constGlobals = new Map()
|
|
1758
|
+
const exported = new Set() // globals pinned by an export — keep the decl
|
|
1314
1759
|
|
|
1315
1760
|
for (const node of ast.slice(1)) {
|
|
1316
|
-
if (!Array.isArray(node)
|
|
1761
|
+
if (!Array.isArray(node)) continue
|
|
1762
|
+
if (node[0] === 'export' && Array.isArray(node[2]) && node[2][0] === 'global' && typeof node[2][1] === 'string') { exported.add(node[2][1]); continue }
|
|
1763
|
+
if (node[0] !== 'global') continue
|
|
1317
1764
|
const name = typeof node[1] === 'string' && node[1][0] === '$' ? node[1] : null
|
|
1318
1765
|
if (!name) continue
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
const
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
const typeSlot = hasName ? node[2] : node[1]
|
|
1326
|
-
if (Array.isArray(typeSlot) && typeSlot[0] === 'mut') continue
|
|
1327
|
-
|
|
1328
|
-
const init = node[initIdx]
|
|
1766
|
+
// (global $g (export "x") …) inline export → pinned
|
|
1767
|
+
if (node.some(c => Array.isArray(c) && c[0] === 'export')) exported.add(name)
|
|
1768
|
+
const typeSlot = node[2]
|
|
1769
|
+
if (Array.isArray(typeSlot) && typeSlot[0] === 'mut') continue // mutable
|
|
1770
|
+
if (Array.isArray(typeSlot) && typeSlot[0] === 'import') continue // imported
|
|
1771
|
+
const init = node[3]
|
|
1329
1772
|
if (getConst(init)) constGlobals.set(name, init)
|
|
1330
1773
|
}
|
|
1774
|
+
if (constGlobals.size === 0) return ast
|
|
1331
1775
|
|
|
1332
|
-
//
|
|
1776
|
+
// Drop any global that is ever written (defensive — an immutable global can't
|
|
1777
|
+
// be, but a malformed module might) and tally read counts.
|
|
1778
|
+
const reads = new Map()
|
|
1333
1779
|
walk(ast, (n) => {
|
|
1334
|
-
if (!Array.isArray(n)
|
|
1780
|
+
if (!Array.isArray(n)) return
|
|
1335
1781
|
const ref = n[1]
|
|
1336
|
-
if (typeof ref
|
|
1782
|
+
if (typeof ref !== 'string' || ref[0] !== '$') return
|
|
1783
|
+
if (n[0] === 'global.set') constGlobals.delete(ref)
|
|
1784
|
+
else if (n[0] === 'global.get') reads.set(ref, (reads.get(ref) || 0) + 1)
|
|
1337
1785
|
})
|
|
1338
1786
|
|
|
1339
|
-
//
|
|
1340
|
-
|
|
1341
|
-
|
|
1787
|
+
// Keep only globals where propagation is size-neutral or better.
|
|
1788
|
+
const propagate = new Set()
|
|
1789
|
+
for (const [name, init] of constGlobals) {
|
|
1790
|
+
const r = reads.get(name) || 0
|
|
1791
|
+
if (r === 0) continue // dead anyway — leave to treeshake
|
|
1792
|
+
const cs = constInstrSize(init)
|
|
1793
|
+
const declSize = cs + 2 // valtype + mutability byte + init expr + `end`
|
|
1794
|
+
const before = r * GLOBAL_GET_SIZE + declSize
|
|
1795
|
+
const after = r * cs + (exported.has(name) ? declSize : 0)
|
|
1796
|
+
if (after <= before) propagate.add(name)
|
|
1797
|
+
}
|
|
1798
|
+
if (propagate.size === 0) return ast
|
|
1342
1799
|
|
|
1343
|
-
|
|
1344
|
-
return walkPost(ast, (node) => {
|
|
1800
|
+
walkPost(ast, (node) => {
|
|
1345
1801
|
if (!Array.isArray(node) || node[0] !== 'global.get' || node.length !== 2) return
|
|
1346
|
-
|
|
1347
|
-
if (constGlobals.has(ref)) return clone(constGlobals.get(ref))
|
|
1802
|
+
if (propagate.has(node[1])) return clone(constGlobals.get(node[1]))
|
|
1348
1803
|
})
|
|
1804
|
+
// Their reads are all gone now — remove the decls we're free to remove.
|
|
1805
|
+
for (let i = ast.length - 1; i >= 1; i--) {
|
|
1806
|
+
const n = ast[i]
|
|
1807
|
+
if (Array.isArray(n) && n[0] === 'global' && typeof n[1] === 'string' && propagate.has(n[1]) && !exported.has(n[1])) ast.splice(i, 1)
|
|
1808
|
+
}
|
|
1809
|
+
return ast
|
|
1349
1810
|
}
|
|
1350
1811
|
|
|
1351
1812
|
// ==================== LOAD/STORE OFFSET FOLDING ====================
|
|
@@ -1458,7 +1919,9 @@ const unbranch = (ast) => {
|
|
|
1458
1919
|
|
|
1459
1920
|
const last = node[lastIdx]
|
|
1460
1921
|
if (Array.isArray(last) && last[0] === 'br' && last[1] === label) {
|
|
1461
|
-
|
|
1922
|
+
// `(br $L v…)` as a block's last instruction just leaves v… as the block's
|
|
1923
|
+
// result — splice the value operand(s) in its place (none → plain removal).
|
|
1924
|
+
node.splice(lastIdx, 1, ...last.slice(2))
|
|
1462
1925
|
}
|
|
1463
1926
|
})
|
|
1464
1927
|
|
|
@@ -2092,9 +2555,15 @@ export default function optimize(ast, opts = true) {
|
|
|
2092
2555
|
ast = clone(ast)
|
|
2093
2556
|
let beforeRound = null
|
|
2094
2557
|
|
|
2558
|
+
// Size guard works on encoded bytes, not AST node count: passes like
|
|
2559
|
+
// `globals` / `inlineOnce` are node-count-neutral yet move real bytes
|
|
2560
|
+
// (a `global.get` ↔ a fat `f64.const`; a `call` ↔ an inlined body), so a
|
|
2561
|
+
// node-count guard can't tell when a round bloated — or shrank. `binarySize`
|
|
2562
|
+
// also returns Infinity if a round produced invalid wat, so a broken round
|
|
2563
|
+
// reverts instead of escaping.
|
|
2095
2564
|
for (let round = 0; round < 3; round++) {
|
|
2096
2565
|
beforeRound = clone(ast)
|
|
2097
|
-
const sizeBefore =
|
|
2566
|
+
const sizeBefore = binarySize(ast)
|
|
2098
2567
|
|
|
2099
2568
|
if (opts.stripmut) ast = stripmut(ast)
|
|
2100
2569
|
if (opts.globals) ast = globals(ast)
|
|
@@ -2104,6 +2573,7 @@ export default function optimize(ast, opts = true) {
|
|
|
2104
2573
|
if (opts.strength) ast = strength(ast)
|
|
2105
2574
|
if (opts.branch) ast = branch(ast)
|
|
2106
2575
|
if (opts.propagate) ast = propagate(ast)
|
|
2576
|
+
if (opts.inlineOnce) ast = inlineOnce(ast)
|
|
2107
2577
|
if (opts.inline) ast = inline(ast)
|
|
2108
2578
|
if (opts.offset) ast = offset(ast)
|
|
2109
2579
|
if (opts.unbranch) ast = unbranch(ast)
|
|
@@ -2111,6 +2581,8 @@ export default function optimize(ast, opts = true) {
|
|
|
2111
2581
|
if (opts.foldarms) ast = foldarms(ast)
|
|
2112
2582
|
if (opts.deadcode) ast = deadcode(ast)
|
|
2113
2583
|
if (opts.vacuum) ast = vacuum(ast)
|
|
2584
|
+
if (opts.mergeBlocks) ast = mergeBlocks(ast)
|
|
2585
|
+
if (opts.coalesce) ast = coalesceLocals(ast)
|
|
2114
2586
|
if (opts.locals) ast = localReuse(ast)
|
|
2115
2587
|
if (opts.dedupe) ast = dedupe(ast)
|
|
2116
2588
|
if (opts.dedupTypes) ast = dedupTypes(ast)
|
|
@@ -2118,19 +2590,25 @@ export default function optimize(ast, opts = true) {
|
|
|
2118
2590
|
if (opts.reorder) ast = reorder(ast)
|
|
2119
2591
|
if (opts.treeshake) ast = treeshake(ast)
|
|
2120
2592
|
if (opts.minifyImports) ast = minifyImports(ast)
|
|
2121
|
-
|
|
2122
|
-
|
|
2593
|
+
// Second propagate sweep: `inlineOnce`/`inline` (above) leave fresh
|
|
2594
|
+
// `(local.set $p arg) … (local.get $p)` wrappers around each inlined call;
|
|
2595
|
+
// re-running propagation collapses them within this same round, so the size
|
|
2596
|
+
// guard scores the cleaned result instead of waiting a round (which it may
|
|
2597
|
+
// never get if `equal()` declares a fixpoint first).
|
|
2598
|
+
if (opts.propagate && (opts.inlineOnce || opts.inline)) ast = propagate(ast)
|
|
2599
|
+
|
|
2600
|
+
const sizeAfter = binarySize(ast)
|
|
2123
2601
|
const delta = sizeAfter - sizeBefore
|
|
2124
2602
|
|
|
2125
2603
|
if (verbose || delta !== 0) {
|
|
2126
|
-
log(` round ${round + 1}: ${delta > 0 ? '+' : ''}${delta}
|
|
2604
|
+
log(` round ${round + 1}: ${delta > 0 ? '+' : ''}${delta} bytes`, delta)
|
|
2127
2605
|
}
|
|
2128
2606
|
|
|
2129
|
-
// Size guard: default optimize must never inflate. Explicit passes
|
|
2130
|
-
//
|
|
2131
|
-
const tolerance = strictGuard ? 0 :
|
|
2607
|
+
// Size guard: default optimize must never inflate. Explicit passes get a
|
|
2608
|
+
// little leniency (a round may grow a few bytes setting up a bigger win).
|
|
2609
|
+
const tolerance = strictGuard ? 0 : 16
|
|
2132
2610
|
if (delta > tolerance) {
|
|
2133
|
-
if (verbose) log(` ⚠ round ${round + 1} inflated by ${delta}, reverting`, delta)
|
|
2611
|
+
if (verbose) log(` ⚠ round ${round + 1} inflated by ${delta} bytes, reverting`, delta)
|
|
2134
2612
|
ast = beforeRound
|
|
2135
2613
|
break
|
|
2136
2614
|
}
|
|
@@ -2143,4 +2621,4 @@ export default function optimize(ast, opts = true) {
|
|
|
2143
2621
|
|
|
2144
2622
|
/** Count AST nodes (fast size heuristic). */
|
|
2145
2623
|
export { count as size, count, binarySize }
|
|
2146
|
-
export { optimize, treeshake, fold, deadcode, localReuse, identity, strength, branch, propagate, inline, normalize, OPTS, vacuum, peephole, globals, offset, unbranch, stripmut, brif, foldarms, dedupe, reorder, dedupTypes, packData, minifyImports }
|
|
2624
|
+
export { optimize, treeshake, fold, deadcode, localReuse, identity, strength, branch, propagate, inline, inlineOnce, normalize, OPTS, vacuum, peephole, globals, offset, unbranch, stripmut, brif, foldarms, dedupe, reorder, dedupTypes, packData, minifyImports, mergeBlocks, coalesceLocals }
|