@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -3,6 +3,7 @@
3
3
  import { getDevice } from '../../gpu/device.js';
4
4
  import { allowReadback } from '../../gpu/perf-guards.js';
5
5
  import { log } from '../../debug/index.js';
6
+ import { readBuffer } from '../../memory/buffer-pool.js';
6
7
  import {
7
8
  isContiguousLayer,
8
9
  isPagedLayer,
@@ -815,8 +816,52 @@ export class KVCache {
815
816
  }
816
817
  }
817
818
 
819
+ _destroyGpuBuffer(buffer) {
820
+ if (!buffer) return;
821
+ try {
822
+ buffer.destroy();
823
+ } catch {
824
+ // Ignore already-destroyed buffers during rollback.
825
+ }
826
+ }
827
+
828
+ _snapshotLayerGpuState(layer) {
829
+ return {
830
+ keysGPU: layer.keysGPU ?? null,
831
+ valuesGPU: layer.valuesGPU ?? null,
832
+ pageTableGPU: layer.pageTableGPU ?? null,
833
+ };
834
+ }
835
+
836
+ _rollbackMigratedLayers(snapshots) {
837
+ for (let l = 0; l < this.numLayers; l++) {
838
+ const layer = this.layers[l];
839
+ const snapshot = snapshots[l];
840
+ if (!snapshot) continue;
841
+
842
+ if (layer.keysGPU && layer.keysGPU !== snapshot.keysGPU) {
843
+ this._destroyGpuBuffer(layer.keysGPU);
844
+ }
845
+ if (layer.valuesGPU && layer.valuesGPU !== snapshot.valuesGPU) {
846
+ this._destroyGpuBuffer(layer.valuesGPU);
847
+ }
848
+ if (layer.pageTableGPU && layer.pageTableGPU !== snapshot.pageTableGPU) {
849
+ this._destroyGpuBuffer(layer.pageTableGPU);
850
+ }
851
+
852
+ layer.keysGPU = snapshot.keysGPU;
853
+ layer.valuesGPU = snapshot.valuesGPU;
854
+ if ('pageTableGPU' in layer) {
855
+ layer.pageTableGPU = snapshot.pageTableGPU;
856
+ }
857
+ }
858
+ }
859
+
818
860
 
819
861
  _migrateToGPU(device) {
862
+ const snapshots = this.layers.map((layer) => this._snapshotLayerGpuState(layer));
863
+
864
+ try {
820
865
  if (this.layout === 'paged') {
821
866
  log.info('KVCache', `Migrating ${this.currentSeqLen} positions to GPU (paged)...`);
822
867
  const numPages = Math.ceil(this.maxSeqLen / this.pageSize);
@@ -826,56 +871,66 @@ export class KVCache {
826
871
 
827
872
  for (let l = 0; l < this.numLayers; l++) {
828
873
  const layer = (this.layers[l]);
829
-
830
- if (!layer.keysGPU) {
831
- layer.keysGPU = device.createBuffer({
874
+ let keysGPU = null;
875
+ let valuesGPU = null;
876
+ let pageTableGPU = null;
877
+ try {
878
+ keysGPU = device.createBuffer({
832
879
  label: `kv_cache_keys_paged_layer_${l}`,
833
880
  size: bytesPerLayer,
834
881
  usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
835
882
  });
836
- }
837
- if (!layer.valuesGPU) {
838
- layer.valuesGPU = device.createBuffer({
883
+ valuesGPU = device.createBuffer({
839
884
  label: `kv_cache_values_paged_layer_${l}`,
840
885
  size: bytesPerLayer,
841
886
  usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
842
887
  });
843
- }
844
888
 
845
- if (!layer.pageTable) {
846
- layer.pageTable = new Uint32Array(numPages);
847
- for (let i = 0; i < numPages; i++) {
848
- layer.pageTable[i] = i;
889
+ if (!layer.pageTable) {
890
+ layer.pageTable = new Uint32Array(numPages);
891
+ for (let i = 0; i < numPages; i++) {
892
+ layer.pageTable[i] = i;
893
+ }
849
894
  }
850
- }
851
- if (!layer.pageTableGPU) {
852
- layer.pageTableGPU = device.createBuffer({
895
+ pageTableGPU = device.createBuffer({
853
896
  label: `kv_cache_page_table_layer_${l}`,
854
897
  size: pageTableBytes,
855
898
  usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
856
899
  });
857
- }
858
- device.queue.writeBuffer(layer.pageTableGPU, 0, layer.pageTable);
859
-
860
- const allocatedPages = layer.allocatedPages ?? 0;
861
- if (allocatedPages > 0) {
862
- const pageElems = this.pageSize * this.kvSize;
863
- const pageBytes = pageElems * this.bytesPerElem;
864
- for (let p = 0; p < allocatedPages; p++) {
865
- const keyPage = layer.keyPages?.[p];
866
- const valuePage = layer.valuePages?.[p];
867
- if (!keyPage || !valuePage) continue;
868
- const byteOffset = p * pageBytes;
869
- if (this.kvDtype === 'f16') {
870
- const keysF16 = f32ToF16Array(keyPage);
871
- const valuesF16 = f32ToF16Array(valuePage);
872
- device.queue.writeBuffer(layer.keysGPU, byteOffset, keysF16);
873
- device.queue.writeBuffer(layer.valuesGPU, byteOffset, valuesF16);
874
- } else {
875
- device.queue.writeBuffer(layer.keysGPU, byteOffset, keyPage);
876
- device.queue.writeBuffer(layer.valuesGPU, byteOffset, valuePage);
900
+ device.queue.writeBuffer(pageTableGPU, 0, layer.pageTable);
901
+
902
+ const allocatedPages = layer.allocatedPages ?? 0;
903
+ if (allocatedPages > 0) {
904
+ const pageElems = this.pageSize * this.kvSize;
905
+ const pageBytes = pageElems * this.bytesPerElem;
906
+ for (let p = 0; p < allocatedPages; p++) {
907
+ const keyPage = layer.keyPages?.[p];
908
+ const valuePage = layer.valuePages?.[p];
909
+ if (!keyPage || !valuePage) continue;
910
+ const byteOffset = p * pageBytes;
911
+ if (this.kvDtype === 'f16') {
912
+ const keysF16 = f32ToF16Array(keyPage);
913
+ const valuesF16 = f32ToF16Array(valuePage);
914
+ device.queue.writeBuffer(keysGPU, byteOffset, keysF16);
915
+ device.queue.writeBuffer(valuesGPU, byteOffset, valuesF16);
916
+ } else {
917
+ device.queue.writeBuffer(keysGPU, byteOffset, keyPage);
918
+ device.queue.writeBuffer(valuesGPU, byteOffset, valuePage);
919
+ }
877
920
  }
878
921
  }
922
+
923
+ this._destroyGpuBuffer(layer.keysGPU);
924
+ this._destroyGpuBuffer(layer.valuesGPU);
925
+ this._destroyGpuBuffer(layer.pageTableGPU);
926
+ layer.keysGPU = keysGPU;
927
+ layer.valuesGPU = valuesGPU;
928
+ layer.pageTableGPU = pageTableGPU;
929
+ } catch (error) {
930
+ this._destroyGpuBuffer(keysGPU);
931
+ this._destroyGpuBuffer(valuesGPU);
932
+ this._destroyGpuBuffer(pageTableGPU);
933
+ throw error;
879
934
  }
880
935
  }
881
936
 
@@ -890,53 +945,64 @@ export class KVCache {
890
945
 
891
946
  for (let l = 0; l < this.numLayers; l++) {
892
947
  const layer = (this.layers[l]);
893
-
894
- // Create GPU buffers if they don't exist
895
- if (!layer.keysGPU) {
896
- layer.keysGPU = device.createBuffer({
948
+ let keysGPU = null;
949
+ let valuesGPU = null;
950
+ try {
951
+ keysGPU = device.createBuffer({
897
952
  label: `kv_cache_keys_layer_${l}`,
898
953
  size: bytesPerLayer,
899
954
  usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
900
955
  });
901
- }
902
- if (!layer.valuesGPU) {
903
- layer.valuesGPU = device.createBuffer({
956
+ valuesGPU = device.createBuffer({
904
957
  label: `kv_cache_values_layer_${l}`,
905
958
  size: bytesPerLayer,
906
959
  usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
907
960
  });
908
- }
909
961
 
910
- // Upload existing CPU data to GPU
911
- const usedElems = layer.seqLen * this.kvSize;
912
- const usedSize = usedElems * this.bytesPerElem;
913
- if (usedSize > 0) {
914
- if (this.kvDtype === 'f16') {
915
- const keysF16 = f32ToF16Array(layer.keys.subarray(0, usedElems));
916
- const valuesF16 = f32ToF16Array(layer.values.subarray(0, usedElems));
917
- device.queue.writeBuffer(layer.keysGPU, 0, keysF16);
918
- device.queue.writeBuffer(layer.valuesGPU, 0, valuesF16);
919
- } else {
920
- device.queue.writeBuffer(
921
- layer.keysGPU,
922
- 0,
923
- layer.keys.buffer,
924
- layer.keys.byteOffset,
925
- usedSize
926
- );
927
- device.queue.writeBuffer(
928
- layer.valuesGPU,
929
- 0,
930
- layer.values.buffer,
931
- layer.values.byteOffset,
932
- usedSize
933
- );
962
+ // Upload existing CPU data to GPU
963
+ const usedElems = layer.seqLen * this.kvSize;
964
+ const usedSize = usedElems * this.bytesPerElem;
965
+ if (usedSize > 0) {
966
+ if (this.kvDtype === 'f16') {
967
+ const keysF16 = f32ToF16Array(layer.keys.subarray(0, usedElems));
968
+ const valuesF16 = f32ToF16Array(layer.values.subarray(0, usedElems));
969
+ device.queue.writeBuffer(keysGPU, 0, keysF16);
970
+ device.queue.writeBuffer(valuesGPU, 0, valuesF16);
971
+ } else {
972
+ device.queue.writeBuffer(
973
+ keysGPU,
974
+ 0,
975
+ layer.keys.buffer,
976
+ layer.keys.byteOffset,
977
+ usedSize
978
+ );
979
+ device.queue.writeBuffer(
980
+ valuesGPU,
981
+ 0,
982
+ layer.values.buffer,
983
+ layer.values.byteOffset,
984
+ usedSize
985
+ );
986
+ }
934
987
  }
988
+
989
+ this._destroyGpuBuffer(layer.keysGPU);
990
+ this._destroyGpuBuffer(layer.valuesGPU);
991
+ layer.keysGPU = keysGPU;
992
+ layer.valuesGPU = valuesGPU;
993
+ } catch (error) {
994
+ this._destroyGpuBuffer(keysGPU);
995
+ this._destroyGpuBuffer(valuesGPU);
996
+ throw error;
935
997
  }
936
998
  }
937
999
 
938
1000
  this.useGPU = true;
939
1001
  log.info('KVCache', 'Migration complete');
1002
+ } catch (error) {
1003
+ this._rollbackMigratedLayers(snapshots);
1004
+ throw error;
1005
+ }
940
1006
  }
941
1007
 
942
1008
 
@@ -962,44 +1028,24 @@ export class KVCache {
962
1028
  layer.values = new Float32Array(sizePerLayer);
963
1029
  }
964
1030
 
965
- // Create staging buffers for readback
966
- const keysStaging = device.createBuffer({
967
- size: usedSize,
968
- usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
969
- });
970
- const valuesStaging = device.createBuffer({
971
- size: usedSize,
972
- usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
973
- });
974
-
975
- // Copy from GPU cache to staging
976
- const encoder = device.createCommandEncoder({ label: 'kv_cache_sync' });
977
- encoder.copyBufferToBuffer(layer.keysGPU, 0, keysStaging, 0, usedSize);
978
- encoder.copyBufferToBuffer(layer.valuesGPU, 0, valuesStaging, 0, usedSize);
979
- device.queue.submit([encoder.finish()]);
980
-
981
- // Map and copy to CPU arrays
982
- await keysStaging.mapAsync(GPUMapMode.READ);
983
- await valuesStaging.mapAsync(GPUMapMode.READ);
1031
+ const [keysBytes, valuesBytes] = await Promise.all([
1032
+ readBuffer(layer.keysGPU, usedSize),
1033
+ readBuffer(layer.valuesGPU, usedSize),
1034
+ ]);
984
1035
 
985
1036
  if (this.kvDtype === 'f16') {
986
- const keysRaw = new Uint16Array(keysStaging.getMappedRange().slice(0));
987
- const valuesRaw = new Uint16Array(valuesStaging.getMappedRange().slice(0));
1037
+ const keysRaw = new Uint16Array(keysBytes);
1038
+ const valuesRaw = new Uint16Array(valuesBytes);
988
1039
  const keysData = f16ToF32Array(keysRaw);
989
1040
  const valuesData = f16ToF32Array(valuesRaw);
990
1041
  layer.keys.set(keysData);
991
1042
  layer.values.set(valuesData);
992
1043
  } else {
993
- const keysData = new Float32Array(keysStaging.getMappedRange().slice(0));
994
- const valuesData = new Float32Array(valuesStaging.getMappedRange().slice(0));
1044
+ const keysData = new Float32Array(keysBytes);
1045
+ const valuesData = new Float32Array(valuesBytes);
995
1046
  layer.keys.set(keysData);
996
1047
  layer.values.set(valuesData);
997
1048
  }
998
-
999
- keysStaging.unmap();
1000
- valuesStaging.unmap();
1001
- keysStaging.destroy();
1002
- valuesStaging.destroy();
1003
1049
  }
1004
1050
  }
1005
1051
 
@@ -60,7 +60,7 @@ export class TieredKVCache {
60
60
  : (tiering.mode === 'int4' ? 'int4' : 'none');
61
61
  this.compression = tiering.compression ?? { mode: defaultCompressionMode, blockSize: 1 };
62
62
 
63
- this.gating = tiering.gating ?? { mode: 'auto', minAluBwRatio: 0.0 };
63
+ this.gating = tiering.gating ?? { mode: 'force_off', minAluBwRatio: 0.0 };
64
64
 
65
65
  this.currentSeqLen = 0;
66
66
 
@@ -145,8 +145,10 @@ export class TieredKVCache {
145
145
  if (gating?.mode === 'force_off') return 'none';
146
146
  if (gating?.mode === 'force_on') return requested;
147
147
  if (gating?.mode === 'auto' && gating.minAluBwRatio > 0) {
148
- const ratio = 1.0;
149
- if (ratio < gating.minAluBwRatio) return 'none';
148
+ throw new Error(
149
+ 'TieredKVCache auto compression gating requires an explicit measured ALU/BW ratio. ' +
150
+ 'Use gating.mode="force_on"/"force_off" or set minAluBwRatio to 0.'
151
+ );
150
152
  }
151
153
  return requested;
152
154
  }
@@ -8,6 +8,9 @@ import { createTensor } from '../gpu/tensor.js';
8
8
  import { f16ToF32Array } from './kv-cache/types.js';
9
9
  import { selectRuleValue } from '../rules/rule-registry.js';
10
10
 
11
+ function isGpuBufferInstance(value) {
12
+ return typeof GPUBuffer !== 'undefined' && value instanceof GPUBuffer;
13
+ }
11
14
 
12
15
 
13
16
 
@@ -84,6 +87,12 @@ export class MoERouter {
84
87
 
85
88
 
86
89
  loadWeights(weights, bias = null) {
90
+ if (this._gateBiasGPU) {
91
+ this._gateBiasGPU.destroy();
92
+ }
93
+ if (this._gateWeightGPU) {
94
+ this._gateWeightGPU.destroy();
95
+ }
87
96
  this.gateWeight = weights;
88
97
  this.gateBias = bias;
89
98
  // Clear cached GPU uploads when swapping router parameters (e.g., per-layer routers).
@@ -91,13 +100,27 @@ export class MoERouter {
91
100
  this._gateWeightGPU = null;
92
101
  }
93
102
 
103
+ destroy() {
104
+ if (isGpuBufferInstance(this._gateBiasGPU)) {
105
+ this._gateBiasGPU.destroy();
106
+ }
107
+ if (isGpuBufferInstance(this._gateWeightGPU)) {
108
+ this._gateWeightGPU.destroy();
109
+ }
110
+ this._gateBiasGPU = null;
111
+ this._gateWeightGPU = null;
112
+ this.gateWeight = null;
113
+ this.gateBias = null;
114
+ this._biasAddPipelines.clear();
115
+ }
116
+
94
117
 
95
118
  computeRouterLogitsCPU(hiddenStates, numTokens) {
96
119
  if (!this.gateWeight) {
97
120
  throw new Error('Router gate weights not loaded');
98
121
  }
99
122
 
100
- if (this.gateWeight instanceof GPUBuffer || isWeightBuffer(this.gateWeight)) {
123
+ if (isGpuBufferInstance(this.gateWeight) || isWeightBuffer(this.gateWeight)) {
101
124
  throw new Error('Gate weights are on GPU, use computeRouterLogitsGPU instead');
102
125
  }
103
126
 
@@ -140,13 +163,18 @@ export class MoERouter {
140
163
  if (!gateWeightBuffer) {
141
164
  throw new Error('Router gate weights not loaded');
142
165
  }
143
- if (!isWeightBuffer(gateWeightBuffer) && !(gateWeightBuffer instanceof GPUBuffer)) {
166
+ if (!isWeightBuffer(gateWeightBuffer) && !isGpuBufferInstance(gateWeightBuffer)) {
144
167
  const uploaded = device.createBuffer({
145
168
  label: 'moe_gate_weight',
146
169
  size: gateWeightBuffer.byteLength,
147
170
  usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
148
171
  });
149
- device.queue.writeBuffer(uploaded, 0, (gateWeightBuffer));
172
+ try {
173
+ device.queue.writeBuffer(uploaded, 0, gateWeightBuffer);
174
+ } catch (error) {
175
+ uploaded.destroy();
176
+ throw error;
177
+ }
150
178
  this._gateWeightGPU = uploaded;
151
179
  this.gateWeight = uploaded;
152
180
  gateWeightBuffer = uploaded;
@@ -186,7 +214,7 @@ export class MoERouter {
186
214
 
187
215
 
188
216
  async _getGateBiasBuffer(device) {
189
- if (this.gateBias instanceof GPUBuffer) return this.gateBias;
217
+ if (isGpuBufferInstance(this.gateBias)) return this.gateBias;
190
218
  if (this._gateBiasGPU) return this._gateBiasGPU;
191
219
 
192
220
  if (!(this.gateBias instanceof Float32Array)) {
@@ -198,7 +226,12 @@ export class MoERouter {
198
226
  size: this.gateBias.byteLength,
199
227
  usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
200
228
  });
201
- device.queue.writeBuffer(buf, 0, (this.gateBias));
229
+ try {
230
+ device.queue.writeBuffer(buf, 0, this.gateBias);
231
+ } catch (error) {
232
+ buf.destroy();
233
+ throw error;
234
+ }
202
235
  this._gateBiasGPU = buf;
203
236
  return buf;
204
237
  }
@@ -206,7 +239,7 @@ export class MoERouter {
206
239
 
207
240
  _inferBiasDtype(bias) {
208
241
  if (bias instanceof Float32Array) return 'f32';
209
- if (bias instanceof GPUBuffer) {
242
+ if (isGpuBufferInstance(bias)) {
210
243
  const bytesPerElement = Math.round(bias.size / this.numExperts);
211
244
  return selectRuleValue('inference', 'dtype', 'f16OrF32FromBytes', { bytesPerElement });
212
245
  }
@@ -276,65 +309,64 @@ export class MoERouter {
276
309
  size: 16,
277
310
  usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
278
311
  });
279
- device.queue.writeBuffer(uniformBuffer, 0, uniformData);
280
-
281
- const bindGroup = device.createBindGroup({
282
- layout: pipeline.getBindGroupLayout(0),
283
- entries: [
284
- { binding: 0, resource: { buffer: uniformBuffer } },
285
- { binding: 1, resource: { buffer: logits } },
286
- { binding: 2, resource: { buffer: bias } },
287
- ],
288
- });
289
-
290
- const encoder = device.createCommandEncoder({ label: 'moe_router_bias_add_encoder' });
291
- const pass = encoder.beginComputePass({ label: 'moe_router_bias_add_pass' });
292
- pass.setPipeline(pipeline);
293
- pass.setBindGroup(0, bindGroup);
294
- const total = numTokens * this.numExperts;
295
- pass.dispatchWorkgroups(Math.ceil(total / 256));
296
- pass.end();
297
- device.queue.submit([encoder.finish()]);
312
+ try {
313
+ device.queue.writeBuffer(uniformBuffer, 0, uniformData);
314
+
315
+ const bindGroup = device.createBindGroup({
316
+ layout: pipeline.getBindGroupLayout(0),
317
+ entries: [
318
+ { binding: 0, resource: { buffer: uniformBuffer } },
319
+ { binding: 1, resource: { buffer: logits } },
320
+ { binding: 2, resource: { buffer: bias } },
321
+ ],
322
+ });
298
323
 
299
- uniformBuffer.destroy();
324
+ const encoder = device.createCommandEncoder({ label: 'moe_router_bias_add_encoder' });
325
+ const pass = encoder.beginComputePass({ label: 'moe_router_bias_add_pass' });
326
+ pass.setPipeline(pipeline);
327
+ pass.setBindGroup(0, bindGroup);
328
+ const total = numTokens * this.numExperts;
329
+ pass.dispatchWorkgroups(Math.ceil(total / 256));
330
+ pass.end();
331
+ device.queue.submit([encoder.finish()]);
332
+ } finally {
333
+ uniformBuffer.destroy();
334
+ }
300
335
  }
301
336
 
302
337
 
303
338
  async routeGPU(hiddenStates, numTokens) {
304
339
  // Compute router logits on GPU
305
340
  const logitsBuffer = await this.computeRouterLogitsGPU(hiddenStates, numTokens);
306
-
307
- // Read back logits to CPU for top-k selection
308
- // (GPU top-k is complex and not always faster for small numExperts)
309
- const logitsData = await readBuffer(logitsBuffer);
310
- const logits = this.lastLogitsDtype === 'f16'
311
- ? f16ToF32Array(new Uint16Array(logitsData))
312
- : new Float32Array(logitsData);
313
-
314
-
315
- const selections = [];
316
- this.activeExperts.clear();
317
-
318
- for (let t = 0; t < numTokens; t++) {
319
- const tokenLogits = logits.subarray(
320
- t * this.numExperts,
321
- (t + 1) * this.numExperts
322
- );
323
-
324
- const selection = this.selectExpertsForToken(tokenLogits);
325
- selections.push(selection);
326
-
327
- for (const idx of selection.indices) {
328
- this.activeExperts.add(idx);
329
- this.loadBalanceStats.expertCounts[idx]++;
341
+ try {
342
+ const logitsData = await readBuffer(logitsBuffer);
343
+ const logits = this.lastLogitsDtype === 'f16'
344
+ ? f16ToF32Array(new Uint16Array(logitsData))
345
+ : new Float32Array(logitsData);
346
+
347
+ const selections = [];
348
+ this.activeExperts.clear();
349
+
350
+ for (let t = 0; t < numTokens; t++) {
351
+ const tokenLogits = logits.subarray(
352
+ t * this.numExperts,
353
+ (t + 1) * this.numExperts
354
+ );
355
+
356
+ const selection = this.selectExpertsForToken(tokenLogits);
357
+ selections.push(selection);
358
+
359
+ for (const idx of selection.indices) {
360
+ this.activeExperts.add(idx);
361
+ this.loadBalanceStats.expertCounts[idx]++;
362
+ }
363
+ this.loadBalanceStats.totalTokens++;
330
364
  }
331
- this.loadBalanceStats.totalTokens++;
332
- }
333
-
334
- // Clean up logits buffer
335
- releaseBuffer(logitsBuffer);
336
365
 
337
- return selections;
366
+ return selections;
367
+ } finally {
368
+ releaseBuffer(logitsBuffer);
369
+ }
338
370
  }
339
371
 
340
372
 
@@ -4,9 +4,10 @@ import { ExpertRouter } from './expert-router.js';
4
4
  import { MultiModelRecorder } from '../gpu/multi-model-recorder.js';
5
5
  import { applyRepetitionPenalty, sample, getTopK } from './pipelines/text/sampling.js';
6
6
  import { finalizeLogits, extractLastPositionLogits } from './pipelines/text/logits/index.js';
7
+ import { readBufferWithCleanup } from './pipelines/text/logits/utils.js';
7
8
  import { isStopToken } from './pipelines/text/init.js';
8
9
  import { mergeMultipleLogits } from '../gpu/kernels/logit-merge.js';
9
- import { releaseBuffer, readBuffer } from '../memory/buffer-pool.js';
10
+ import { releaseBuffer } from '../memory/buffer-pool.js';
10
11
 
11
12
  const MIN_AGREEMENT_WEIGHT = 1e-4;
12
13
 
@@ -478,8 +479,9 @@ export class MultiModelNetwork {
478
479
  if (canMergeOnGpu) {
479
480
  const buffers = voterResults.map((result) => result.logitsBuffer);
480
481
  const mergedBuffer = await mergeMultipleLogits(buffers, rawVocabSize, normalizedWeights, 1.0);
481
- const mergedData = await readBuffer(mergedBuffer, rawVocabSize * 4);
482
- releaseBuffer(mergedBuffer);
482
+ const mergedData = await readBufferWithCleanup(mergedBuffer, rawVocabSize * 4, () => {
483
+ releaseBuffer(mergedBuffer);
484
+ });
483
485
  const rawMerged = new Float32Array(mergedData);
484
486
  const finalized = await finalizeLogits(
485
487
  rawMerged,
@@ -35,12 +35,21 @@ export interface EvolutionConfig {
35
35
  generations?: number;
36
36
  eliteCount?: number;
37
37
  mutationRate?: number;
38
+ random: () => number;
38
39
  evaluate: (genome: NetworkGenome) => Promise<number>;
39
40
  randomGenome: () => NetworkGenome;
40
41
  }
41
42
 
42
- export declare const mutateGenome: (genome: NetworkGenome, mutationRate?: number) => NetworkGenome;
43
+ export declare const mutateGenome: (
44
+ genome: NetworkGenome,
45
+ mutationRate?: number,
46
+ random?: (() => number) | null
47
+ ) => NetworkGenome;
43
48
 
44
- export declare const crossoverGenome: (a: NetworkGenome, b: NetworkGenome) => NetworkGenome;
49
+ export declare const crossoverGenome: (
50
+ a: NetworkGenome,
51
+ b: NetworkGenome,
52
+ random?: (() => number) | null
53
+ ) => NetworkGenome;
45
54
 
46
55
  export declare function evolveNetwork(config: EvolutionConfig): Promise<NetworkGenome>;