npm - @holoscript/engine - Versions diffs - 6.0.3 → 6.0.4 - Mend

@holoscript/engine 6.0.3 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

package/dist/AutoMesher-CK47F6AV.js +17 -0
package/dist/GPUBuffers-2LHBCD7X.js +9 -0
package/dist/WebGPUContext-TNEUYU2Y.js +11 -0
package/dist/animation/index.cjs +38 -38
package/dist/animation/index.d.cts +1 -1
package/dist/animation/index.d.ts +1 -1
package/dist/animation/index.js +1 -1
package/dist/audio/index.cjs +16 -6
package/dist/audio/index.d.cts +1 -1
package/dist/audio/index.d.ts +1 -1
package/dist/audio/index.js +1 -1
package/dist/camera/index.cjs +23 -23
package/dist/camera/index.d.cts +1 -1
package/dist/camera/index.d.ts +1 -1
package/dist/camera/index.js +1 -1
package/dist/character/index.cjs +6 -4
package/dist/character/index.js +1 -1
package/dist/choreography/index.cjs +1194 -0
package/dist/choreography/index.d.cts +687 -0
package/dist/choreography/index.d.ts +687 -0
package/dist/choreography/index.js +1156 -0
package/dist/chunk-2CSNRI2N.js +217 -0
package/dist/chunk-33T2WINR.js +266 -0
package/dist/chunk-35R73OFM.js +1257 -0
package/dist/chunk-4MMDSUNP.js +1256 -0
package/dist/chunk-5V6HOU72.js +319 -0
package/dist/chunk-6QOP6PYF.js +1038 -0
package/dist/chunk-7KMJVHIL.js +8944 -0
package/dist/chunk-7VPUC62U.js +1106 -0
package/dist/chunk-A2Y6RCAT.js +1878 -0
package/dist/chunk-AHM42MK6.js +8944 -0
package/dist/chunk-BL7IDTHE.js +218 -0
package/dist/chunk-CITOMSWL.js +10462 -0
package/dist/chunk-CXDPKW2K.js +8944 -0
package/dist/chunk-CXZPLD4S.js +223 -0
package/dist/chunk-CZYJE7IH.js +5169 -0
package/dist/chunk-D2OP7YC7.js +6325 -0
package/dist/chunk-EDRVQHUU.js +1544 -0
package/dist/chunk-EJSLOOW2.js +3589 -0
package/dist/chunk-F53SFGW5.js +1878 -0
package/dist/chunk-HCFPELPY.js +919 -0
package/dist/chunk-HNEE36PY.js +93 -0
package/dist/chunk-HYXNV36F.js +1256 -0
package/dist/chunk-IB7KHVFY.js +821 -0
package/dist/chunk-IBBO7YYG.js +690 -0
package/dist/chunk-ILIBGINU.js +5470 -0
package/dist/chunk-IS4MHLKN.js +5479 -0
package/dist/chunk-JT2PFKWD.js +5479 -0
package/dist/chunk-K4CUB4NY.js +1038 -0
package/dist/chunk-KATDQXRJ.js +10462 -0
package/dist/chunk-KBQE6ZFJ.js +8944 -0
package/dist/chunk-KBVD5K7E.js +560 -0
package/dist/chunk-KCDPVQRY.js +4088 -0
package/dist/chunk-KN4QJPKN.js +8944 -0
package/dist/chunk-KWJ3ROSI.js +8944 -0
package/dist/chunk-L45VF6DD.js +919 -0
package/dist/chunk-LY4T37YK.js +307 -0
package/dist/chunk-MDN5WZXA.js +1544 -0
package/dist/chunk-MGCDP6VU.js +928 -0
package/dist/chunk-NCX7X6G2.js +8681 -0
package/dist/chunk-OF54BPVD.js +913 -0
package/dist/chunk-OWSN2Q3Q.js +690 -0
package/dist/chunk-PRRB5TTA.js +406 -0
package/dist/chunk-PXWVQF76.js +4086 -0
package/dist/chunk-PYCOIDT2.js +812 -0
package/dist/chunk-PZCSADOV.js +928 -0
package/dist/chunk-Q2XBVS2K.js +1038 -0
package/dist/chunk-QDZRXWN5.js +1776 -0
package/dist/chunk-RNWOZ6WQ.js +913 -0
package/dist/chunk-ROLFT4CJ.js +1693 -0
package/dist/chunk-SLTJRZ2N.js +266 -0
package/dist/chunk-SRUS5XSU.js +4088 -0
package/dist/chunk-TKCA3WZ5.js +5409 -0
package/dist/chunk-TNRMXYI2.js +1650 -0
package/dist/chunk-TQB3GJGM.js +9763 -0
package/dist/chunk-TUFGXG6K.js +510 -0
package/dist/chunk-U6KMTGQJ.js +632 -0
package/dist/chunk-VMGJQST6.js +8681 -0
package/dist/chunk-X4F4TCG4.js +5470 -0
package/dist/chunk-ZIFROE75.js +1544 -0
package/dist/chunk-ZIJQYHSQ.js +1204 -0
package/dist/combat/index.cjs +4 -4
package/dist/combat/index.d.cts +1 -1
package/dist/combat/index.d.ts +1 -1
package/dist/combat/index.js +1 -1
package/dist/ecs/index.cjs +1 -1
package/dist/ecs/index.js +1 -1
package/dist/environment/index.cjs +14 -14
package/dist/environment/index.d.cts +1 -1
package/dist/environment/index.d.ts +1 -1
package/dist/environment/index.js +1 -1
package/dist/gpu/index.cjs +4810 -0
package/dist/gpu/index.js +3714 -0
package/dist/hologram/index.cjs +27 -1
package/dist/hologram/index.js +1 -1
package/dist/index-B2PIsAmR.d.cts +2180 -0
package/dist/index-B2PIsAmR.d.ts +2180 -0
package/dist/index-BHySEPX7.d.cts +2921 -0
package/dist/index-BJV21zuy.d.cts +341 -0
package/dist/index-BJV21zuy.d.ts +341 -0
package/dist/index-BQutTphC.d.cts +790 -0
package/dist/index-ByIq2XrS.d.cts +3910 -0
package/dist/index-BysHjDSO.d.cts +224 -0
package/dist/index-BysHjDSO.d.ts +224 -0
package/dist/index-CKwAJGck.d.ts +455 -0
package/dist/index-CUl3QstQ.d.cts +3006 -0
package/dist/index-CUl3QstQ.d.ts +3006 -0
package/dist/index-CmYtNiI-.d.cts +953 -0
package/dist/index-CmYtNiI-.d.ts +953 -0
package/dist/index-CnRzWxi_.d.cts +522 -0
package/dist/index-CnRzWxi_.d.ts +522 -0
package/dist/index-CwRWbSC7.d.ts +2921 -0
package/dist/index-CxKIBstO.d.ts +790 -0
package/dist/index-DJ6-R8vh.d.cts +455 -0
package/dist/index-DQKisbcI.d.cts +4968 -0
package/dist/index-DQKisbcI.d.ts +4968 -0
package/dist/index-DRT2zJez.d.ts +3910 -0
package/dist/index-DfNLiAka.d.cts +192 -0
package/dist/index-DfNLiAka.d.ts +192 -0
package/dist/index-nMvkoRm8.d.cts +405 -0
package/dist/index-nMvkoRm8.d.ts +405 -0
package/dist/index-s9yOFU37.d.cts +604 -0
package/dist/index-s9yOFU37.d.ts +604 -0
package/dist/index.cjs +22966 -6960
package/dist/index.d.cts +864 -20
package/dist/index.d.ts +864 -20
package/dist/index.js +3062 -48
package/dist/input/index.cjs +1 -1
package/dist/input/index.js +1 -1
package/dist/orbital/index.cjs +3 -3
package/dist/orbital/index.d.cts +1 -1
package/dist/orbital/index.d.ts +1 -1
package/dist/orbital/index.js +1 -1
package/dist/particles/index.cjs +16 -16
package/dist/particles/index.d.cts +1 -1
package/dist/particles/index.d.ts +1 -1
package/dist/particles/index.js +1 -1
package/dist/physics/index.cjs +2377 -21
package/dist/physics/index.d.cts +1 -1
package/dist/physics/index.d.ts +1 -1
package/dist/physics/index.js +35 -1
package/dist/postfx/index.cjs +3491 -0
package/dist/postfx/index.js +93 -0
package/dist/procedural/index.cjs +1 -1
package/dist/procedural/index.js +1 -1
package/dist/puppeteer-5VF6KDVO.js +52197 -0
package/dist/puppeteer-IZVZ3SG4.js +52197 -0
package/dist/rendering/index.cjs +33 -32
package/dist/rendering/index.d.cts +1 -1
package/dist/rendering/index.d.ts +1 -1
package/dist/rendering/index.js +8 -6
package/dist/runtime/index.cjs +23 -13
package/dist/runtime/index.d.cts +1 -1
package/dist/runtime/index.d.ts +1 -1
package/dist/runtime/index.js +8 -6
package/dist/runtime/protocols/index.cjs +349 -0
package/dist/runtime/protocols/index.js +15 -0
package/dist/scene/index.cjs +8 -8
package/dist/scene/index.d.cts +1 -1
package/dist/scene/index.d.ts +1 -1
package/dist/scene/index.js +1 -1
package/dist/shader/index.cjs +3087 -0
package/dist/shader/index.js +3044 -0
package/dist/simulation/index.cjs +10680 -0
package/dist/simulation/index.d.cts +3 -0
package/dist/simulation/index.d.ts +3 -0
package/dist/simulation/index.js +307 -0
package/dist/spatial/index.cjs +2443 -0
package/dist/spatial/index.d.cts +1545 -0
package/dist/spatial/index.d.ts +1545 -0
package/dist/spatial/index.js +2400 -0
package/dist/terrain/index.cjs +1 -1
package/dist/terrain/index.d.cts +1 -1
package/dist/terrain/index.d.ts +1 -1
package/dist/terrain/index.js +1 -1
package/dist/transformers.node-4NKAPD5U.js +45620 -0
package/dist/vm/index.cjs +7 -8
package/dist/vm/index.d.cts +1 -1
package/dist/vm/index.d.ts +1 -1
package/dist/vm/index.js +1 -1
package/dist/vm-bridge/index.cjs +2 -2
package/dist/vm-bridge/index.d.cts +2 -2
package/dist/vm-bridge/index.d.ts +2 -2
package/dist/vm-bridge/index.js +1 -1
package/dist/vr/index.cjs +6 -6
package/dist/vr/index.js +1 -1
package/dist/world/index.cjs +3 -3
package/dist/world/index.d.cts +1 -1
package/dist/world/index.d.ts +1 -1
package/dist/world/index.js +1 -1
package/package.json +53 -21
package/LICENSE +0 -21

package/dist/chunk-TUFGXG6K.js ADDED Viewed

@@ -0,0 +1,510 @@
+// wgsl-raw:C:\Users\josep\Documents\GitHub\HoloScript\packages\engine\src\gpu\shaders\cg_kernels.wgsl
+var cg_kernels_default = "/**\n * Conjugate Gradient Kernels \u2014 Sparse Linear Algebra on WebGPU\n *\n * Unified bind group layout:\n *   group(0): CSR matrix (SpMV only)\n *   group(1): Vectors (vec_in read, vec_out read_write)\n *   group(2): SolverArgs uniform\n *   group(3): Reduction workspace (dot/final_reduce only)\n *\n * Each entry point references only the groups it needs.\n * With layout:'auto', each pipeline gets a layout derived from\n * only the bindings its entry point actually accesses.\n */\n\n// \u2500\u2500 Shared Types \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nstruct SolverArgs {\n  num_rows: u32,\n  vector_width: u32,\n  n: u32,\n  alpha: f32,\n};\n\n// \u2500\u2500 Group 0: CSR Matrix \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n@group(0) @binding(0) var<storage, read> csr_val: array<f32>;\n@group(0) @binding(1) var<storage, read> csr_col: array<u32>;\n@group(0) @binding(2) var<storage, read> csr_row: array<u32>;\n\n// \u2500\u2500 Group 1: Vectors \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n@group(1) @binding(0) var<storage, read> vec_in: array<f32>;\n@group(1) @binding(1) var<storage, read_write> vec_out: array<f32>;\n\n// \u2500\u2500 Group 2: Solver Arguments \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n@group(2) @binding(0) var<uniform> args: SolverArgs;\n\n// \u2500\u2500 Group 3: Reduction Workspace \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n@group(3) @binding(0) var<storage, read_write> partial_sums: array<f32>;\n@group(3) @binding(1) var<storage, read_write> scalar_result: array<f32>;\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 1. SpMV \u2014 CSR-Vector (multi-thread per row)\n//    Assigns vector_width threads per row for irregular TET10 sparsity.\n//    Uses: groups 0, 1, 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\nvar<workgroup> spmv_shared: array<f32, 256>;\n\n@compute @workgroup_size(256)\nfn spmv_vector(\n    @builtin(global_invocation_id) global_id: vec3<u32>,\n    @builtin(local_invocation_id) local_id: vec3<u32>\n) {\n    let tid = local_id.x;\n    let gid = global_id.x;\n    let threads_per_row = args.vector_width;\n    let row = gid / threads_per_row;\n    let lane = gid % threads_per_row;\n\n    if (row >= args.num_rows) {\n        return;\n    }\n\n    let row_start = csr_row[row];\n    let row_end = csr_row[row + 1];\n\n    var sum: f32 = 0.0;\n    for (var i = row_start + lane; i < row_end; i = i + threads_per_row) {\n        sum += csr_val[i] * vec_in[csr_col[i]];\n    }\n\n    spmv_shared[tid] = sum;\n    workgroupBarrier();\n\n    for (var s = threads_per_row / 2u; s > 0u; s >>= 1u) {\n        if (lane < s) {\n            spmv_shared[tid] += spmv_shared[tid + s];\n        }\n        workgroupBarrier();\n    }\n\n    if (lane == 0u) {\n        vec_out[row] = spmv_shared[tid];\n    }\n}\n\n// Legacy scalar SpMV (1 thread per row, for small/regular matrices)\n// Uses: groups 0, 1, 2\n@compute @workgroup_size(64)\nfn spmv(@builtin(global_invocation_id) global_id: vec3<u32>) {\n    let row = global_id.x;\n    if (row >= args.num_rows) {\n        return;\n    }\n\n    let row_start = csr_row[row];\n    let row_end = csr_row[row + 1];\n\n    var sum: f32 = 0.0;\n    for (var i = row_start; i < row_end; i = i + 1u) {\n        sum += csr_val[i] * vec_in[csr_col[i]];\n    }\n\n    vec_out[row] = sum;\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 2. SAXPY: vec_out = alpha * vec_in + vec_out\n//    Uses: groups 1, 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\n@compute @workgroup_size(256)\nfn saxpy(@builtin(global_invocation_id) global_id: vec3<u32>) {\n    let idx = global_id.x;\n    if (idx >= args.n) {\n        return;\n    }\n    vec_out[idx] = args.alpha * vec_in[idx] + vec_out[idx];\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 3. Fused CG Update: p = r + beta * p\n//    vec_in = r (read), vec_out = p (read_write), args.alpha = beta\n//    Uses: groups 1, 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\n@compute @workgroup_size(256)\nfn p_update(@builtin(global_invocation_id) global_id: vec3<u32>) {\n    let idx = global_id.x;\n    if (idx >= args.n) {\n        return;\n    }\n    vec_out[idx] = vec_in[idx] + args.alpha * vec_out[idx];\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 4. Vector Copy: vec_out = vec_in\n//    Uses: groups 1, 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\n@compute @workgroup_size(256)\nfn vec_copy(@builtin(global_invocation_id) global_id: vec3<u32>) {\n    let idx = global_id.x;\n    if (idx >= args.n) {\n        return;\n    }\n    vec_out[idx] = vec_in[idx];\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 5. Vector Zero: vec_out = 0\n//    Uses: groups 1 (binding 1 only), 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\n@compute @workgroup_size(256)\nfn vec_zero(@builtin(global_invocation_id) global_id: vec3<u32>) {\n    let idx = global_id.x;\n    if (idx >= args.n) {\n        return;\n    }\n    vec_out[idx] = 0.0;\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 6. Dot Product \u2014 Phase 1: per-workgroup partial sums\n//    result[wg_id] = sum of vec_in[i] * vec_out[i] for this workgroup\n//    Uses: groups 1, 2, 3 (binding 0)\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\nvar<workgroup> dot_shared: array<f32, 256>;\n\n@compute @workgroup_size(256)\nfn dot_product(\n    @builtin(global_invocation_id) global_id: vec3<u32>,\n    @builtin(local_invocation_id) local_id: vec3<u32>,\n    @builtin(workgroup_id) workgroup_id: vec3<u32>\n) {\n    let idx = global_id.x;\n    let tid = local_id.x;\n\n    if (idx < args.n) {\n        dot_shared[tid] = vec_in[idx] * vec_out[idx];\n    } else {\n        dot_shared[tid] = 0.0;\n    }\n\n    workgroupBarrier();\n\n    for (var s = 128u; s > 0u; s >>= 1u) {\n        if (tid < s) {\n            dot_shared[tid] += dot_shared[tid + s];\n        }\n        workgroupBarrier();\n    }\n\n    if (tid == 0u) {\n        partial_sums[workgroup_id.x] = dot_shared[0];\n    }\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 7. Final Reduce \u2014 Phase 2: sum partial_sums \u2192 scalar_result[0]\n//    args.n = number of partial sums to reduce\n//    Uses: groups 2, 3 (bindings 0 and 1)\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\nvar<workgroup> reduce_shared: array<f32, 256>;\n\n@compute @workgroup_size(256)\nfn final_reduce(@builtin(local_invocation_id) local_id: vec3<u32>) {\n    let tid = local_id.x;\n    let count = args.n;\n\n    var acc: f32 = 0.0;\n    var i = tid;\n    loop {\n        if (i >= count) {\n            break;\n        }\n        acc += partial_sums[i];\n        i += 256u;\n    }\n    reduce_shared[tid] = acc;\n\n    workgroupBarrier();\n\n    for (var s = 128u; s > 0u; s >>= 1u) {\n        if (tid < s) {\n            reduce_shared[tid] += reduce_shared[tid + s];\n        }\n        workgroupBarrier();\n    }\n\n    if (tid == 0u) {\n        scalar_result[0] = reduce_shared[0];\n    }\n}\n";
+// src/gpu/SparseLinearSolver.ts
+var WG_SIZE = 256;
+var SparseLinearSolver = class {
+  constructor(context) {
+    this.context = context;
+    this.device = context.getDevice();
+  }
+  context;
+  device;
+  shaderModule;
+  spmvPipeline;
+  spmvVectorPipeline;
+  saxpyPipeline;
+  dotPipeline;
+  finalReducePipeline;
+  vecCopyPipeline;
+  vecZeroPipeline;
+  pUpdatePipeline;
+  initialized = false;
+  /** Compile shaders and create all compute pipelines */
+  async initialize() {
+    if (this.initialized) return;
+    this.shaderModule = this.device.createShaderModule({
+      label: "CG Kernels",
+      code: cg_kernels_default
+    });
+    const [spmv, spmvVec, saxpy, dot, finalReduce, vecCopy, vecZero, pUpdate] = await Promise.all([
+      this.device.createComputePipelineAsync({
+        label: "SpMV Scalar",
+        layout: "auto",
+        compute: { module: this.shaderModule, entryPoint: "spmv" }
+      }),
+      this.device.createComputePipelineAsync({
+        label: "SpMV Vector",
+        layout: "auto",
+        compute: { module: this.shaderModule, entryPoint: "spmv_vector" }
+      }),
+      this.device.createComputePipelineAsync({
+        label: "SAXPY",
+        layout: "auto",
+        compute: { module: this.shaderModule, entryPoint: "saxpy" }
+      }),
+      this.device.createComputePipelineAsync({
+        label: "Dot Product",
+        layout: "auto",
+        compute: { module: this.shaderModule, entryPoint: "dot_product" }
+      }),
+      this.device.createComputePipelineAsync({
+        label: "Final Reduce",
+        layout: "auto",
+        compute: { module: this.shaderModule, entryPoint: "final_reduce" }
+      }),
+      this.device.createComputePipelineAsync({
+        label: "Vec Copy",
+        layout: "auto",
+        compute: { module: this.shaderModule, entryPoint: "vec_copy" }
+      }),
+      this.device.createComputePipelineAsync({
+        label: "Vec Zero",
+        layout: "auto",
+        compute: { module: this.shaderModule, entryPoint: "vec_zero" }
+      }),
+      this.device.createComputePipelineAsync({
+        label: "P-Update",
+        layout: "auto",
+        compute: { module: this.shaderModule, entryPoint: "p_update" }
+      })
+    ]);
+    this.spmvPipeline = spmv;
+    this.spmvVectorPipeline = spmvVec;
+    this.saxpyPipeline = saxpy;
+    this.dotPipeline = dot;
+    this.finalReducePipeline = finalReduce;
+    this.vecCopyPipeline = vecCopy;
+    this.vecZeroPipeline = vecZero;
+    this.pUpdatePipeline = pUpdate;
+    this.initialized = true;
+  }
+  /**
+   * Solve Ax = b using Conjugate Gradient on the GPU.
+   *
+   * Algorithm (Hestenes-Stiefel):
+   *   r₀ = b - A·x₀
+   *   p₀ = r₀
+   *   for k = 0, 1, 2, ...
+   *     Ap = A·p
+   *     α = (r·r) / (p·Ap)
+   *     x = x + α·p
+   *     r = r - α·Ap
+   *     if ||r||² < tol: break
+   *     β = (r_new·r_new) / (r·r)
+   *     p = r + β·p           ← fused kernel
+   */
+  async solveCG(A, b, xGuess, options = {}) {
+    if (!this.initialized) {
+      throw new Error("SparseLinearSolver not initialized. Call initialize() first.");
+    }
+    const {
+      maxIterations = 1e3,
+      toleranceSq = 1e-10,
+      convergenceCheckInterval = 50,
+      onProgress
+    } = options;
+    const n = A.num_rows;
+    const vectorWidth = 16;
+    const numWgSpmvVec = Math.ceil(n * vectorWidth / WG_SIZE);
+    const numWgVec = Math.ceil(n / WG_SIZE);
+    const numWgDot = Math.ceil(n / WG_SIZE);
+    const csrVal = this.uploadStorage(A.val, "csr-val");
+    const csrCol = this.uploadStorage(A.col_ind, "csr-col");
+    const csrRow = this.uploadStorage(A.row_ptr, "csr-row");
+    const bufB = this.uploadStorage(b, "vec-b");
+    const bufX = this.uploadStorage(xGuess, "vec-x");
+    const bufR = this.emptyVec(n, "vec-r");
+    const bufP = this.emptyVec(n, "vec-p");
+    const bufAp = this.emptyVec(n, "vec-Ap");
+    const bufPartials = this.emptyVec(numWgDot, "partial-sums");
+    const bufScalar = this.emptyVec(1, "scalar-result");
+    const bufStaging = this.device.createBuffer({
+      label: "staging",
+      size: 4,
+      usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
+    });
+    const bufArgs = this.device.createBuffer({
+      label: "solver-args",
+      size: 16,
+      usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
+    });
+    const allBuffers = [csrVal, csrCol, csrRow, bufB, bufX, bufR, bufP, bufAp, bufPartials, bufScalar, bufStaging, bufArgs];
+    {
+      this.writeArgs(bufArgs, n, vectorWidth, n, 0);
+      const enc = this.device.createCommandEncoder({ label: "init-spmv" });
+      this.dispatchSpmv(enc, csrVal, csrCol, csrRow, bufX, bufAp, bufArgs, numWgSpmvVec, true);
+      this.device.queue.submit([enc.finish()]);
+      await this.device.queue.onSubmittedWorkDone();
+    }
+    {
+      const enc = this.device.createCommandEncoder({ label: "init-residual" });
+      enc.copyBufferToBuffer(bufB, 0, bufR, 0, n * 4);
+      this.device.queue.submit([enc.finish()]);
+      await this.device.queue.onSubmittedWorkDone();
+    }
+    {
+      this.writeArgs(bufArgs, n, vectorWidth, n, -1);
+      const enc = this.device.createCommandEncoder({ label: "init-saxpy" });
+      this.dispatchSaxpy(enc, bufAp, bufR, bufArgs, numWgVec);
+      this.device.queue.submit([enc.finish()]);
+      await this.device.queue.onSubmittedWorkDone();
+    }
+    {
+      this.writeArgs(bufArgs, n, vectorWidth, n, 0);
+      const enc = this.device.createCommandEncoder({ label: "init-copy-p" });
+      this.dispatchVecCopy(enc, bufR, bufP, bufArgs, numWgVec);
+      this.device.queue.submit([enc.finish()]);
+      await this.device.queue.onSubmittedWorkDone();
+    }
+    let rDotR = await this.dotProduct(bufR, bufR, bufPartials, bufScalar, bufStaging, bufArgs, n, numWgDot);
+    if (rDotR < toleranceSq) {
+      const x = await this.readback(bufX, n);
+      this.cleanup(allBuffers);
+      return { x, iterations: 0, residualNormSq: rDotR, converged: true };
+    }
+    let iteration = 0;
+    let converged = false;
+    for (iteration = 0; iteration < maxIterations; iteration++) {
+      {
+        this.writeArgs(bufArgs, n, vectorWidth, n, 0);
+        const enc = this.device.createCommandEncoder();
+        this.dispatchSpmv(enc, csrVal, csrCol, csrRow, bufP, bufAp, bufArgs, numWgSpmvVec, true);
+        this.device.queue.submit([enc.finish()]);
+      }
+      const pAp = await this.dotProduct(bufP, bufAp, bufPartials, bufScalar, bufStaging, bufArgs, n, numWgDot);
+      if (Math.abs(pAp) < 1e-30) {
+        converged = rDotR < toleranceSq;
+        break;
+      }
+      const alpha = rDotR / pAp;
+      {
+        this.writeArgs(bufArgs, n, vectorWidth, n, alpha);
+        const enc = this.device.createCommandEncoder();
+        this.dispatchSaxpy(enc, bufP, bufX, bufArgs, numWgVec);
+        this.device.queue.submit([enc.finish()]);
+        await this.device.queue.onSubmittedWorkDone();
+      }
+      {
+        this.writeArgs(bufArgs, n, vectorWidth, n, -alpha);
+        const enc = this.device.createCommandEncoder();
+        this.dispatchSaxpy(enc, bufAp, bufR, bufArgs, numWgVec);
+        this.device.queue.submit([enc.finish()]);
+        await this.device.queue.onSubmittedWorkDone();
+      }
+      const rNewDotRNew = await this.dotProduct(bufR, bufR, bufPartials, bufScalar, bufStaging, bufArgs, n, numWgDot);
+      if (rNewDotRNew < toleranceSq) {
+        rDotR = rNewDotRNew;
+        converged = true;
+        iteration++;
+        onProgress?.(iteration, rNewDotRNew);
+        break;
+      }
+      if (onProgress && iteration % convergenceCheckInterval === 0) {
+        onProgress(iteration, rNewDotRNew);
+      }
+      const beta = rNewDotRNew / rDotR;
+      {
+        this.writeArgs(bufArgs, n, vectorWidth, n, beta);
+        const enc = this.device.createCommandEncoder();
+        this.dispatchPUpdate(enc, bufR, bufP, bufArgs, numWgVec);
+        this.device.queue.submit([enc.finish()]);
+        await this.device.queue.onSubmittedWorkDone();
+      }
+      rDotR = rNewDotRNew;
+    }
+    const solution = await this.readback(bufX, n);
+    this.cleanup(allBuffers);
+    return { x: solution, iterations: iteration, residualNormSq: rDotR, converged };
+  }
+  /**
+   * solveCGDirect — Direct GPU-to-GPU Conjugate Gradient solve.
+   *
+   * Same as solveCG but avoids CPU readback of the solution vector.
+   * Returns the live GPUBuffer containing the result.
+   *
+   * @warning Caller is responsible for destroying the returned xBuffer.
+   */
+  async solveCGDirect(A, b, x0, options = {}) {
+    const n = A.num_rows;
+    const maxIterations = options.maxIterations ?? 1e3;
+    const toleranceSq = options.toleranceSq ?? 1e-10;
+    const xExtraUsage = options.xExtraUsage ?? 0;
+    const valBuffer = this.uploadStorage(A.val, "val");
+    const colIndBuffer = this.uploadStorage(new Uint32Array(A.col_ind), "col_ind");
+    const rowPtrBuffer = this.uploadStorage(new Uint32Array(A.row_ptr), "row_ptr");
+    const bBuffer = this.uploadStorage(b, "b");
+    const xBuffer = this.uploadStorage(x0, "x", xExtraUsage);
+    const rBuffer = this.emptyVec(n, "r");
+    const pBuffer = this.emptyVec(n, "p");
+    const ApBuffer = this.emptyVec(n, "Ap");
+    const rDotRBuffer = this.emptyVec(1, "rDotR");
+    const rDotRStagingBuffer = this.device.createBuffer({
+      size: 4,
+      usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
+    });
+    const numWgVec = Math.ceil(n / WG_SIZE);
+    const numWgDot = Math.ceil(n / WG_SIZE);
+    const bufArgs = this.device.createBuffer({ size: 16, usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST });
+    const partials = this.emptyVec(numWgDot, "partials");
+    {
+      const enc = this.device.createCommandEncoder();
+      this.dispatchVecCopy(enc, bBuffer, rBuffer, bufArgs, numWgVec);
+      this.dispatchSpmv(enc, valBuffer, colIndBuffer, rowPtrBuffer, xBuffer, ApBuffer, bufArgs, numWgVec, true);
+      this.device.queue.submit([enc.finish()]);
+    }
+    {
+      this.writeArgs(bufArgs, n, 0, n, -1);
+      const enc = this.device.createCommandEncoder();
+      this.dispatchSaxpy(enc, ApBuffer, rBuffer, bufArgs, numWgVec);
+      this.device.queue.submit([enc.finish()]);
+    }
+    {
+      const enc = this.device.createCommandEncoder();
+      this.dispatchVecCopy(enc, rBuffer, pBuffer, bufArgs, numWgVec);
+      this.device.queue.submit([enc.finish()]);
+    }
+    let iteration = 0;
+    let converged = false;
+    let rDotR = await this.dotProduct(rBuffer, rBuffer, partials, rDotRBuffer, rDotRStagingBuffer, bufArgs, n, numWgDot);
+    for (iteration = 0; iteration < maxIterations; iteration++) {
+      if (rDotR < toleranceSq) {
+        converged = true;
+        break;
+      }
+      {
+        const enc = this.device.createCommandEncoder();
+        this.dispatchSpmv(enc, valBuffer, colIndBuffer, rowPtrBuffer, pBuffer, ApBuffer, bufArgs, numWgVec, true);
+        this.device.queue.submit([enc.finish()]);
+      }
+      const pAp = await this.dotProduct(pBuffer, ApBuffer, partials, rDotRBuffer, rDotRStagingBuffer, bufArgs, n, numWgDot);
+      const alpha = rDotR / (pAp + 1e-20);
+      {
+        this.writeArgs(bufArgs, n, 0, n, alpha);
+        const enc = this.device.createCommandEncoder();
+        this.dispatchSaxpy(enc, pBuffer, xBuffer, bufArgs, numWgVec);
+        this.device.queue.submit([enc.finish()]);
+      }
+      {
+        this.writeArgs(bufArgs, n, 0, n, -alpha);
+        const enc = this.device.createCommandEncoder();
+        this.dispatchSaxpy(enc, ApBuffer, rBuffer, bufArgs, numWgVec);
+        this.device.queue.submit([enc.finish()]);
+      }
+      const oldRDotR = rDotR;
+      rDotR = await this.dotProduct(rBuffer, rBuffer, partials, rDotRBuffer, rDotRStagingBuffer, bufArgs, n, numWgDot);
+      const beta = rDotR / (oldRDotR + 1e-20);
+      {
+        this.writeArgs(bufArgs, n, 0, n, beta);
+        const enc = this.device.createCommandEncoder();
+        this.dispatchPUpdate(enc, rBuffer, pBuffer, bufArgs, numWgVec);
+        this.device.queue.submit([enc.finish()]);
+      }
+    }
+    this.cleanup([
+      valBuffer,
+      colIndBuffer,
+      rowPtrBuffer,
+      bBuffer,
+      rBuffer,
+      pBuffer,
+      ApBuffer,
+      rDotRBuffer,
+      rDotRStagingBuffer
+    ]);
+    return { xBuffer, iterations: iteration, residualNormSq: rDotR, converged };
+  }
+  // ═══════════════════════════════════════════════════════════════════
+  // Dispatch helpers — each sets the bind groups its entry point needs
+  // ═══════════════════════════════════════════════════════════════════
+  /** SpMV: groups 0 (CSR), 1 (vecs), 2 (args) */
+  dispatchSpmv(enc, val, col, row, x, y, args, numWgs, useVector) {
+    const pipeline = useVector ? this.spmvVectorPipeline : this.spmvPipeline;
+    const pass = enc.beginComputePass({ label: "spmv" });
+    pass.setPipeline(pipeline);
+    pass.setBindGroup(0, this.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [
+        { binding: 0, resource: { buffer: val } },
+        { binding: 1, resource: { buffer: col } },
+        { binding: 2, resource: { buffer: row } }
+      ]
+    }));
+    pass.setBindGroup(1, this.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(1),
+      entries: [
+        { binding: 0, resource: { buffer: x } },
+        { binding: 1, resource: { buffer: y } }
+      ]
+    }));
+    pass.setBindGroup(2, this.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(2),
+      entries: [{ binding: 0, resource: { buffer: args } }]
+    }));
+    pass.dispatchWorkgroups(numWgs);
+    pass.end();
+  }
+  /** SAXPY: groups 1 (vecs), 2 (args) */
+  dispatchSaxpy(enc, x, y, args, numWgs) {
+    const pass = enc.beginComputePass({ label: "saxpy" });
+    pass.setPipeline(this.saxpyPipeline);
+    pass.setBindGroup(1, this.device.createBindGroup({
+      layout: this.saxpyPipeline.getBindGroupLayout(1),
+      entries: [
+        { binding: 0, resource: { buffer: x } },
+        { binding: 1, resource: { buffer: y } }
+      ]
+    }));
+    pass.setBindGroup(2, this.device.createBindGroup({
+      layout: this.saxpyPipeline.getBindGroupLayout(2),
+      entries: [{ binding: 0, resource: { buffer: args } }]
+    }));
+    pass.dispatchWorkgroups(numWgs);
+    pass.end();
+  }
+  /** Fused p = r + beta*p: groups 1 (vecs), 2 (args) */
+  dispatchPUpdate(enc, r, p, args, numWgs) {
+    const pass = enc.beginComputePass({ label: "p-update" });
+    pass.setPipeline(this.pUpdatePipeline);
+    pass.setBindGroup(1, this.device.createBindGroup({
+      layout: this.pUpdatePipeline.getBindGroupLayout(1),
+      entries: [
+        { binding: 0, resource: { buffer: r } },
+        { binding: 1, resource: { buffer: p } }
+      ]
+    }));
+    pass.setBindGroup(2, this.device.createBindGroup({
+      layout: this.pUpdatePipeline.getBindGroupLayout(2),
+      entries: [{ binding: 0, resource: { buffer: args } }]
+    }));
+    pass.dispatchWorkgroups(numWgs);
+    pass.end();
+  }
+  /** Vec copy: groups 1 (vecs), 2 (args) */
+  dispatchVecCopy(enc, src, dst, args, numWgs) {
+    const pass = enc.beginComputePass({ label: "vec-copy" });
+    pass.setPipeline(this.vecCopyPipeline);
+    pass.setBindGroup(1, this.device.createBindGroup({
+      layout: this.vecCopyPipeline.getBindGroupLayout(1),
+      entries: [
+        { binding: 0, resource: { buffer: src } },
+        { binding: 1, resource: { buffer: dst } }
+      ]
+    }));
+    pass.setBindGroup(2, this.device.createBindGroup({
+      layout: this.vecCopyPipeline.getBindGroupLayout(2),
+      entries: [{ binding: 0, resource: { buffer: args } }]
+    }));
+    pass.dispatchWorkgroups(numWgs);
+    pass.end();
+  }
+  /**
+   * Full dot product: v1·v2
+   *   Phase 1: dot_product kernel → partial_sums (per-workgroup)
+   *   Phase 2: final_reduce → scalar_result[0]
+   *   Readback: staging mapAsync → CPU f32
+   */
+  async dotProduct(v1, v2, partials, scalar, staging, args, n, numWgDot) {
+    {
+      this.writeArgs(args, n, 0, n, 0);
+      const enc = this.device.createCommandEncoder({ label: "dot-phase1" });
+      const pass = enc.beginComputePass();
+      pass.setPipeline(this.dotPipeline);
+      pass.setBindGroup(1, this.device.createBindGroup({
+        layout: this.dotPipeline.getBindGroupLayout(1),
+        entries: [
+          { binding: 0, resource: { buffer: v1 } },
+          { binding: 1, resource: { buffer: v2 } }
+        ]
+      }));
+      pass.setBindGroup(2, this.device.createBindGroup({
+        layout: this.dotPipeline.getBindGroupLayout(2),
+        entries: [{ binding: 0, resource: { buffer: args } }]
+      }));
+      pass.setBindGroup(3, this.device.createBindGroup({
+        layout: this.dotPipeline.getBindGroupLayout(3),
+        entries: [{ binding: 0, resource: { buffer: partials } }]
+      }));
+      pass.dispatchWorkgroups(numWgDot);
+      pass.end();
+      this.device.queue.submit([enc.finish()]);
+    }
+    {
+      this.writeArgs(args, numWgDot, 0, numWgDot, 0);
+      const enc = this.device.createCommandEncoder({ label: "dot-phase2" });
+      const pass = enc.beginComputePass();
+      pass.setPipeline(this.finalReducePipeline);
+      pass.setBindGroup(2, this.device.createBindGroup({
+        layout: this.finalReducePipeline.getBindGroupLayout(2),
+        entries: [{ binding: 0, resource: { buffer: args } }]
+      }));
+      pass.setBindGroup(3, this.device.createBindGroup({
+        layout: this.finalReducePipeline.getBindGroupLayout(3),
+        entries: [
+          { binding: 0, resource: { buffer: partials } },
+          { binding: 1, resource: { buffer: scalar } }
+        ]
+      }));
+      pass.dispatchWorkgroups(1);
+      pass.end();
+      enc.copyBufferToBuffer(scalar, 0, staging, 0, 4);
+      this.device.queue.submit([enc.finish()]);
+    }
+    await staging.mapAsync(GPUMapMode.READ);
+    const value = new Float32Array(staging.getMappedRange())[0];
+    staging.unmap();
+    return value;
+  }
+  // ═══════════════════════════════════════════════════════════════════
+  // Buffer helpers
+  // ═══════════════════════════════════════════════════════════════════
+  writeArgs(buf, numRows, vectorWidth, n, alpha) {
+    const data = new ArrayBuffer(16);
+    new Uint32Array(data, 0, 3).set([numRows, vectorWidth, n]);
+    new Float32Array(data, 12, 1).set([alpha]);
+    this.device.queue.writeBuffer(buf, 0, data);
+  }
+  uploadStorage(data, label, extraUsage = 0) {
+    const buf = this.device.createBuffer({
+      label,
+      size: data.byteLength,
+      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | extraUsage,
+      mappedAtCreation: true
+    });
+    if (data instanceof Float32Array) new Float32Array(buf.getMappedRange()).set(data);
+    else new Uint32Array(buf.getMappedRange()).set(data);
+    buf.unmap();
+    return buf;
+  }
+  emptyVec(n, label, extraUsage = 0) {
+    return this.device.createBuffer({
+      label,
+      size: Math.max(4, n * 4),
+      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | extraUsage
+    });
+  }
+  async readback(buf, n) {
+    const staging = this.device.createBuffer({
+      size: n * 4,
+      usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
+    });
+    const enc = this.device.createCommandEncoder();
+    enc.copyBufferToBuffer(buf, 0, staging, 0, n * 4);
+    this.device.queue.submit([enc.finish()]);
+    await staging.mapAsync(GPUMapMode.READ);
+    const result = new Float32Array(staging.getMappedRange()).slice();
+    staging.unmap();
+    staging.destroy();
+    return result;
+  }
+  cleanup(buffers) {
+    for (const b of buffers) b.destroy();
+  }
+  destroy() {
+    this.initialized = false;
+  }
+};
+export {
+  SparseLinearSolver
+};