@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -16,6 +16,21 @@ import { selectRuleValue } from '../../rules/rule-registry.js';
16
16
 
17
17
  let loggedF32UpcastNonMatmul = false;
18
18
 
19
+ function isGpuBufferInstance(value) {
20
+ return typeof GPUBuffer !== 'undefined' && value instanceof GPUBuffer;
21
+ }
22
+
23
+ function isReleasableBuffer(value) {
24
+ return typeof value === 'object' && value !== null && 'size' in value;
25
+ }
26
+
27
+ function releaseOwnedGpuBuffer(buffer, owned) {
28
+ if (!owned || !isReleasableBuffer(buffer)) {
29
+ return;
30
+ }
31
+ releaseBuffer(buffer);
32
+ }
33
+
19
34
  function logF32UpcastNonMatmul(name, numElements, bufferSize) {
20
35
  if (loggedF32UpcastNonMatmul) {
21
36
  return;
@@ -152,66 +167,80 @@ export function convertF16ToF32CPU(f16Data) {
152
167
 
153
168
  export async function loadQ4KFused(shardData, location, name) {
154
169
  const device = getDevice();
155
- const buffer = shardData instanceof GPUBuffer
170
+ const ownsBuffer = !isGpuBufferInstance(shardData);
171
+ const buffer = isGpuBufferInstance(shardData)
156
172
  ? shardData
157
173
  : acquireAlignedBuffer(location.size, `q4k_${name}`);
158
- if (!(shardData instanceof GPUBuffer)) {
159
- writeBufferAligned(device, buffer, shardData);
174
+ try {
175
+ if (ownsBuffer) {
176
+ writeBufferAligned(device, buffer, shardData);
177
+ }
178
+ return {
179
+ data: createWeightBuffer(buffer, 'q4k', 'row', location.shape, name),
180
+ allocatedBuffers: [buffer],
181
+ };
182
+ } catch (error) {
183
+ releaseOwnedGpuBuffer(buffer, ownsBuffer);
184
+ throw error;
160
185
  }
161
-
162
- return {
163
- data: createWeightBuffer(buffer, 'q4k', 'row', location.shape, name),
164
- allocatedBuffers: [buffer],
165
- };
166
186
  }
167
187
 
168
188
 
169
189
  export async function loadQ4KDequant(shardData, location, name, config) {
170
190
  const device = getDevice();
171
- const quantBuffer = shardData instanceof GPUBuffer
191
+ let ownsQuantBuffer = !isGpuBufferInstance(shardData);
192
+ const quantBuffer = isGpuBufferInstance(shardData)
172
193
  ? shardData
173
194
  : acquireAlignedBuffer(location.size, `quant_${name}`);
174
- if (!(shardData instanceof GPUBuffer)) {
175
- writeBufferAligned(device, quantBuffer, shardData);
176
- }
177
-
178
- const outputDtype = getQ4KOutputDtype(location, config);
179
-
180
- // Check if this is a 2D matrix with K (columns) not aligned to QK_K (256).
181
- // If so, we need row-wise dequant to produce proper row-major output.
182
- const is2DMatrix = Array.isArray(location.shape) && location.shape.length === 2;
183
- const K = is2DMatrix ? location.shape[1] : 0;
184
- const needsRowwise = is2DMatrix && K > 0 && K % QK_K !== 0;
195
+ let dequantized = null;
196
+ try {
197
+ if (ownsQuantBuffer) {
198
+ writeBufferAligned(device, quantBuffer, shardData);
199
+ }
185
200
 
186
- let dequantizedTensor;
187
- if (needsRowwise) {
188
- const rows = location.shape[0];
189
- debugTrace.loader(
190
- `Dequantizing ${name} (row-wise): [${rows},${K}], K not 256-aligned, ` +
191
- `outputDtype=${outputDtype}`
192
- );
193
- dequantizedTensor = await dequantizeRowwise(quantBuffer, rows, K, { outputDtype });
194
- } else {
195
- const numBlocks = Math.ceil(location.size / Q4K_BLOCK_BYTES);
196
- debugTrace.loader(
197
- `Dequantizing ${name}: size=${location.size}, numBlocks=${numBlocks}, ` +
198
- `outputDtype=${outputDtype}, expectedOutput=${numBlocks * QK_K * (outputDtype === 'f16' ? 2 : 4)}`
199
- );
200
- dequantizedTensor = await dequantize(quantBuffer, numBlocks, { outputDtype });
201
- }
202
- const dequantized = dequantizedTensor.buffer;
201
+ const outputDtype = getQ4KOutputDtype(location, config);
202
+
203
+ const is2DMatrix = Array.isArray(location.shape) && location.shape.length === 2;
204
+ const K = is2DMatrix ? location.shape[1] : 0;
205
+ const needsRowwise = is2DMatrix && K > 0 && K % QK_K !== 0;
206
+
207
+ let dequantizedTensor;
208
+ if (needsRowwise) {
209
+ const rows = location.shape[0];
210
+ debugTrace.loader(
211
+ `Dequantizing ${name} (row-wise): [${rows},${K}], K not 256-aligned, ` +
212
+ `outputDtype=${outputDtype}`
213
+ );
214
+ dequantizedTensor = await dequantizeRowwise(quantBuffer, rows, K, { outputDtype });
215
+ } else {
216
+ const numBlocks = Math.ceil(location.size / Q4K_BLOCK_BYTES);
217
+ debugTrace.loader(
218
+ `Dequantizing ${name}: size=${location.size}, numBlocks=${numBlocks}, ` +
219
+ `outputDtype=${outputDtype}, expectedOutput=${numBlocks * QK_K * (outputDtype === 'f16' ? 2 : 4)}`
220
+ );
221
+ dequantizedTensor = await dequantize(quantBuffer, numBlocks, { outputDtype });
222
+ }
223
+ dequantized = dequantizedTensor.buffer;
203
224
 
204
- debugTrace.loader(`Dequantized ${name}: resultSize=${dequantized.size}`);
205
- releaseBuffer(quantBuffer);
225
+ debugTrace.loader(`Dequantized ${name}: resultSize=${dequantized.size}`);
226
+ releaseOwnedGpuBuffer(quantBuffer, ownsQuantBuffer);
227
+ ownsQuantBuffer = false;
206
228
 
207
- const layout = getWeightLayout(location, config);
208
-
209
- const dtype = outputDtype;
229
+ const layout = getWeightLayout(location, config);
230
+ const dtype = outputDtype;
210
231
 
211
- return {
212
- data: createWeightBuffer(dequantized, dtype, layout, location.shape, name),
213
- allocatedBuffers: [dequantized],
214
- };
232
+ return {
233
+ data: createWeightBuffer(dequantized, dtype, layout, location.shape, name),
234
+ allocatedBuffers: [dequantized],
235
+ };
236
+ } catch (error) {
237
+ if (isReleasableBuffer(dequantized)) {
238
+ releaseBuffer(dequantized);
239
+ }
240
+ throw error;
241
+ } finally {
242
+ releaseOwnedGpuBuffer(quantBuffer, ownsQuantBuffer);
243
+ }
215
244
  }
216
245
 
217
246
 
@@ -219,97 +248,124 @@ export async function loadQ6K(shardData, location, name) {
219
248
  const device = getDevice();
220
249
 
221
250
  debugTrace.loader(`Loading Q6_K tensor "${name}", size=${location.size}`);
222
- const quantBuffer = shardData instanceof GPUBuffer
251
+ let ownsQuantBuffer = !isGpuBufferInstance(shardData);
252
+ const quantBuffer = isGpuBufferInstance(shardData)
223
253
  ? shardData
224
254
  : acquireAlignedBuffer(location.size, `quant_${name}`);
225
- if (!(shardData instanceof GPUBuffer)) {
226
- writeBufferAligned(device, quantBuffer, shardData);
227
- }
255
+ let dequantized = null;
256
+ try {
257
+ if (ownsQuantBuffer) {
258
+ writeBufferAligned(device, quantBuffer, shardData);
259
+ }
228
260
 
229
- const numBlocks = Math.floor(location.size / Q6K_BLOCK_BYTES);
230
- debugTrace.loader(
231
- `Dequantizing Q6_K ${name}: size=${location.size}, numBlocks=${numBlocks}, ` +
232
- `expectedOutput=${numBlocks * 256 * 2} (f16)`
233
- );
261
+ const numBlocks = Math.floor(location.size / Q6K_BLOCK_BYTES);
262
+ debugTrace.loader(
263
+ `Dequantizing Q6_K ${name}: size=${location.size}, numBlocks=${numBlocks}, ` +
264
+ `expectedOutput=${numBlocks * 256 * 2} (f16)`
265
+ );
234
266
 
235
- const dequantizedTensor = await dequantizeQ6K(quantBuffer, numBlocks, { outputDtype: 'f16' });
236
- const dequantized = dequantizedTensor.buffer;
267
+ const dequantizedTensor = await dequantizeQ6K(quantBuffer, numBlocks, { outputDtype: 'f16' });
268
+ dequantized = dequantizedTensor.buffer;
237
269
 
238
- debugTrace.loader(`Dequantized Q6_K ${name}: resultSize=${dequantized.size}`);
239
- releaseBuffer(quantBuffer);
270
+ debugTrace.loader(`Dequantized Q6_K ${name}: resultSize=${dequantized.size}`);
271
+ releaseOwnedGpuBuffer(quantBuffer, ownsQuantBuffer);
272
+ ownsQuantBuffer = false;
273
+
274
+ const isMatmulWeight = shouldDequantizeToF16(location);
275
+ if (isMatmulWeight) {
276
+ return {
277
+ data: createWeightBuffer(dequantized, 'f16', 'row', location.shape, name),
278
+ allocatedBuffers: [dequantized],
279
+ };
280
+ }
240
281
 
241
- const isMatmulWeight = shouldDequantizeToF16(location);
242
- if (isMatmulWeight) {
243
282
  return {
244
- data: createWeightBuffer(dequantized, 'f16', 'row', location.shape, name),
283
+ data: applyBufferLayout(dequantized, location, 'f16'),
245
284
  allocatedBuffers: [dequantized],
246
285
  };
286
+ } catch (error) {
287
+ if (isReleasableBuffer(dequantized)) {
288
+ releaseBuffer(dequantized);
289
+ }
290
+ throw error;
291
+ } finally {
292
+ releaseOwnedGpuBuffer(quantBuffer, ownsQuantBuffer);
247
293
  }
248
-
249
- return {
250
- data: applyBufferLayout(dequantized, location, 'f16'),
251
- allocatedBuffers: [dequantized],
252
- };
253
294
  }
254
295
 
255
296
 
256
297
  export async function loadBF16(shardData, location, name, config) {
257
298
  const device = getDevice();
258
- const srcBuffer = shardData instanceof GPUBuffer
299
+ let ownsSrcBuffer = !isGpuBufferInstance(shardData);
300
+ const srcBuffer = isGpuBufferInstance(shardData)
259
301
  ? shardData
260
302
  : acquireAlignedBuffer(location.size, `${name}_bf16`);
261
- if (!(shardData instanceof GPUBuffer)) {
262
- writeBufferAligned(device, srcBuffer, shardData);
263
- }
264
-
265
- const numElements = location.size / 2;
266
- const caps = config.gpuCapabilities || getKernelCapabilities();
267
- const isMatmulWeight = shouldDequantizeToF16(location);
303
+ let resultBuffer = null;
304
+ try {
305
+ if (ownsSrcBuffer) {
306
+ writeBufferAligned(device, srcBuffer, shardData);
307
+ }
268
308
 
269
- // For matmul weights with F16 support: BF16 -> F16 directly
270
- if (caps?.hasF16 && isMatmulWeight) {
271
- const f16Tensor = await runBF16ToF16(srcBuffer, [numElements], name);
272
- releaseBuffer(srcBuffer);
273
- debugTrace.loader(`BF16->F16 for matmul weight: ${name} (${numElements} elements)`);
309
+ const numElements = location.size / 2;
310
+ const caps = config.gpuCapabilities || getKernelCapabilities();
311
+ const isMatmulWeight = shouldDequantizeToF16(location);
312
+ const keepF32Weights = config.keepF32Weights === true;
274
313
 
275
-
276
- const layout = selectRuleValue('loader', 'weights', 'weightLayout', {
277
- layout: location.layout ?? null,
278
- useColumnWise: false,
279
- });
280
- return {
281
- data: createWeightBuffer(f16Tensor.buffer, 'f16', layout, location.shape, name),
282
- allocatedBuffers: [f16Tensor.buffer],
283
- };
284
- }
285
-
286
- // Standard path: BF16 -> F32
287
- const dstBuffer = await convertBF16ToF32GPU(srcBuffer, numElements, name);
288
- releaseBuffer(srcBuffer);
314
+ if (caps?.hasF16 && isMatmulWeight && !keepF32Weights) {
315
+ const f16Tensor = await runBF16ToF16(srcBuffer, [numElements], name);
316
+ resultBuffer = f16Tensor.buffer;
317
+ releaseOwnedGpuBuffer(srcBuffer, ownsSrcBuffer);
318
+ ownsSrcBuffer = false;
319
+ debugTrace.loader(`BF16->F16 for matmul weight: ${name} (${numElements} elements)`);
289
320
 
290
- if (dstBuffer instanceof GPUBuffer) {
291
- if (isMatmulWeight) {
292
-
293
321
  const layout = selectRuleValue('loader', 'weights', 'weightLayout', {
294
322
  layout: location.layout ?? null,
295
323
  useColumnWise: false,
296
324
  });
297
325
  return {
298
- data: createWeightBuffer(dstBuffer, 'f32', layout, location.shape, name),
326
+ data: createWeightBuffer(f16Tensor.buffer, 'f16', layout, location.shape, name),
327
+ allocatedBuffers: [f16Tensor.buffer],
328
+ };
329
+ }
330
+
331
+ if (isMatmulWeight && keepF32Weights) {
332
+ debugTrace.loader(`Keeping BF16 matmul weight in f32: ${name} (keepF32Weights=true)`);
333
+ }
334
+
335
+ const dstBuffer = await convertBF16ToF32GPU(srcBuffer, numElements, name);
336
+ resultBuffer = dstBuffer;
337
+ releaseOwnedGpuBuffer(srcBuffer, ownsSrcBuffer);
338
+ ownsSrcBuffer = false;
339
+
340
+ if (isGpuBufferInstance(dstBuffer)) {
341
+ if (isMatmulWeight) {
342
+ const layout = selectRuleValue('loader', 'weights', 'weightLayout', {
343
+ layout: location.layout ?? null,
344
+ useColumnWise: false,
345
+ });
346
+ return {
347
+ data: createWeightBuffer(dstBuffer, 'f32', layout, location.shape, name),
348
+ allocatedBuffers: [dstBuffer],
349
+ };
350
+ }
351
+ return {
352
+ data: applyBufferLayout(dstBuffer, location, 'f32'),
299
353
  allocatedBuffers: [dstBuffer],
300
354
  };
301
355
  }
356
+
302
357
  return {
303
- data: applyBufferLayout(dstBuffer, location, 'f32'),
304
- allocatedBuffers: [dstBuffer],
358
+ data: dstBuffer,
359
+ allocatedBuffers: [],
305
360
  };
361
+ } catch (error) {
362
+ if (isReleasableBuffer(resultBuffer)) {
363
+ releaseBuffer(resultBuffer);
364
+ }
365
+ throw error;
366
+ } finally {
367
+ releaseOwnedGpuBuffer(srcBuffer, ownsSrcBuffer);
306
368
  }
307
-
308
- // Float32Array returned (shouldn't happen in GPU path)
309
- return {
310
- data: dstBuffer,
311
- allocatedBuffers: [],
312
- };
313
369
  }
314
370
 
315
371
 
@@ -318,55 +374,69 @@ export async function loadFloat(shardData, location, name, config) {
318
374
  throw new Error('Tensor load config is required.');
319
375
  }
320
376
  const device = getDevice();
321
- const buffer = shardData instanceof GPUBuffer
377
+ let ownsBuffer = !isGpuBufferInstance(shardData);
378
+ const buffer = isGpuBufferInstance(shardData)
322
379
  ? shardData
323
380
  : acquireAlignedBuffer(location.size, name);
324
- if (!(shardData instanceof GPUBuffer)) {
325
- writeBufferAligned(device, buffer, shardData);
326
- }
327
-
328
- const dtype = selectRuleValue('loader', 'weights', 'floatLocationDtype', {
329
- locationDtype: location.dtype,
330
- });
331
- const layout = selectRuleValue('loader', 'weights', 'weightLayout', {
332
- layout: location.layout ?? null,
333
- useColumnWise: false,
334
- });
335
- const isMatmulWeight = shouldDequantizeToF16(location);
381
+ let resultBuffer = null;
382
+ try {
383
+ if (ownsBuffer) {
384
+ writeBufferAligned(device, buffer, shardData);
385
+ }
336
386
 
337
- // Return WeightBuffer for matmul weights
338
- if (isMatmulWeight) {
339
- return {
340
- data: createWeightBuffer(buffer, dtype, layout, location.shape, name),
341
- allocatedBuffers: [buffer],
342
- };
343
- }
387
+ const dtype = selectRuleValue('loader', 'weights', 'floatLocationDtype', {
388
+ locationDtype: location.dtype,
389
+ });
390
+ const layout = selectRuleValue('loader', 'weights', 'weightLayout', {
391
+ layout: location.layout ?? null,
392
+ useColumnWise: false,
393
+ });
394
+ const isMatmulWeight = shouldDequantizeToF16(location);
344
395
 
345
- // Non-matmul F16 weights need upcast to F32
346
- if (dtype === 'f16') {
347
- if (config.allowF32UpcastNonMatmul === false) {
396
+ if (isMatmulWeight) {
397
+ ownsBuffer = false;
348
398
  return {
349
- data: applyBufferLayout(buffer, location, 'f16'),
399
+ data: createWeightBuffer(buffer, dtype, layout, location.shape, name),
350
400
  allocatedBuffers: [buffer],
351
401
  };
352
402
  }
353
- const numElements = location.shape.reduce((a, b) => a * b, 1);
354
- logF32UpcastNonMatmul(name, numElements, buffer.size);
355
- debugTrace.loader(`F16->F32 upcast for non-matmul: ${name} (${numElements} elements, bufSize=${buffer.size})`);
356
- const inputTensor = createTensor(buffer, 'f16', [numElements], `${name}_f16`);
357
- const f32Tensor = await castF16ToF32(inputTensor);
358
- debugTrace.loader(`F16->F32 complete: ${name} resultSize=${f32Tensor.buffer.size}`);
359
- releaseBuffer(buffer);
403
+
404
+ if (dtype === 'f16') {
405
+ if (config.allowF32UpcastNonMatmul === false) {
406
+ ownsBuffer = false;
407
+ return {
408
+ data: applyBufferLayout(buffer, location, 'f16'),
409
+ allocatedBuffers: [buffer],
410
+ };
411
+ }
412
+ const numElements = location.shape.reduce((a, b) => a * b, 1);
413
+ logF32UpcastNonMatmul(name, numElements, buffer.size);
414
+ debugTrace.loader(`F16->F32 upcast for non-matmul: ${name} (${numElements} elements, bufSize=${buffer.size})`);
415
+ const inputTensor = createTensor(buffer, 'f16', [numElements], `${name}_f16`);
416
+ const f32Tensor = await castF16ToF32(inputTensor);
417
+ resultBuffer = f32Tensor.buffer;
418
+ debugTrace.loader(`F16->F32 complete: ${name} resultSize=${f32Tensor.buffer.size}`);
419
+ releaseOwnedGpuBuffer(buffer, ownsBuffer);
420
+ ownsBuffer = false;
421
+ return {
422
+ data: applyBufferLayout(f32Tensor.buffer, location, 'f32'),
423
+ allocatedBuffers: [f32Tensor.buffer],
424
+ };
425
+ }
426
+
427
+ ownsBuffer = false;
360
428
  return {
361
- data: applyBufferLayout(f32Tensor.buffer, location, 'f32'),
362
- allocatedBuffers: [f32Tensor.buffer],
429
+ data: applyBufferLayout(buffer, location, dtype),
430
+ allocatedBuffers: [buffer],
363
431
  };
432
+ } catch (error) {
433
+ if (isReleasableBuffer(resultBuffer)) {
434
+ releaseBuffer(resultBuffer);
435
+ }
436
+ throw error;
437
+ } finally {
438
+ releaseOwnedGpuBuffer(buffer, ownsBuffer);
364
439
  }
365
-
366
- return {
367
- data: applyBufferLayout(buffer, location, dtype),
368
- allocatedBuffers: [buffer],
369
- };
370
440
  }
371
441
 
372
442
  // ============================================================================
@@ -2,30 +2,84 @@
2
2
 
3
3
  import { trace } from '../../debug/index.js';
4
4
 
5
+ function resolveSpanShardIndex(span, name, spanIndex) {
6
+ const shardIndex = typeof span?.shardIndex === 'number'
7
+ ? span.shardIndex
8
+ : span?.shard;
9
+ if (!Number.isInteger(shardIndex) || shardIndex < 0) {
10
+ throw new Error(
11
+ `[DopplerLoader] Tensor "${name}" span[${spanIndex}] has invalid shard index.`
12
+ );
13
+ }
14
+ return shardIndex;
15
+ }
16
+
17
+ function validateSpanField(value, field, name, spanIndex) {
18
+ if (!Number.isInteger(value) || value < 0) {
19
+ throw new Error(
20
+ `[DopplerLoader] Tensor "${name}" span[${spanIndex}] has invalid ${field}.`
21
+ );
22
+ }
23
+ return value;
24
+ }
25
+
26
+ function getLocationSpans(location) {
27
+ if (!Array.isArray(location?.spans) || location.spans.length === 0) {
28
+ return null;
29
+ }
30
+ return location.spans;
31
+ }
32
+
33
+ function resolveLocationShardIndex(location, name) {
34
+ const shardIndex = typeof location?.shardIndex === 'number'
35
+ ? location.shardIndex
36
+ : location?.shard;
37
+ if (!Number.isInteger(shardIndex) || shardIndex < 0) {
38
+ throw new Error(`[DopplerLoader] Tensor "${name}" has invalid shard index.`);
39
+ }
40
+ return shardIndex;
41
+ }
42
+
43
+ function validateLocationField(location, field, name) {
44
+ const value = location?.[field];
45
+ if (!Number.isInteger(value) || value < 0) {
46
+ throw new Error(`[DopplerLoader] Tensor "${name}" has invalid ${field}.`);
47
+ }
48
+ return value;
49
+ }
5
50
 
6
51
  export async function assembleShardData(location, name, loadShard, loadShardRange = null) {
7
- if (location.spans) {
8
- trace.loader(`Assembling tensor "${name}" from ${location.spans.length} spans`);
52
+ const spans = getLocationSpans(location);
53
+ if (spans) {
54
+ trace.loader(`Assembling tensor "${name}" from ${spans.length} spans`);
9
55
 
10
- const chunks = await Promise.all(location.spans.map(async (span) => {
56
+ const chunks = await Promise.all(spans.map(async (span, spanIndex) => {
57
+ const shardIndex = resolveSpanShardIndex(span, name, spanIndex);
58
+ const offset = validateSpanField(span.offset, 'offset', name, spanIndex);
59
+ const size = validateSpanField(span.size, 'size', name, spanIndex);
11
60
  if (loadShardRange) {
12
- const data = await loadShardRange(span.shardIndex, span.offset, span.size);
13
- if (span.size > data.byteLength) {
61
+ const data = await loadShardRange(shardIndex, offset, size);
62
+ if (size > data.byteLength) {
14
63
  throw new Error(
15
- `[DopplerLoader] Shard ${span.shardIndex} too small for tensor "${name}" span.`
64
+ `[DopplerLoader] Shard ${shardIndex} too small for tensor "${name}" span.`
16
65
  );
17
66
  }
18
- return new Uint8Array(data, 0, span.size);
67
+ return new Uint8Array(data, 0, size);
19
68
  }
20
- const data = await loadShard(span.shardIndex);
21
- if (span.offset + span.size > data.byteLength) {
69
+ const data = await loadShard(shardIndex);
70
+ if (offset + size > data.byteLength) {
22
71
  throw new Error(
23
- `[DopplerLoader] Shard ${span.shardIndex} too small for tensor "${name}" span.`
72
+ `[DopplerLoader] Shard ${shardIndex} too small for tensor "${name}" span.`
24
73
  );
25
74
  }
26
- return new Uint8Array(data, span.offset, span.size);
75
+ return new Uint8Array(data, offset, size);
27
76
  }));
28
77
  const totalSize = chunks.reduce((s, c) => s + c.length, 0);
78
+ if (Number.isInteger(location?.size) && totalSize !== location.size) {
79
+ throw new Error(
80
+ `[DopplerLoader] Tensor "${name}" spans total ${totalSize} bytes, expected ${location.size}.`
81
+ );
82
+ }
29
83
  const combined = new Uint8Array(totalSize);
30
84
  let offset = 0;
31
85
  for (const chunk of chunks) {
@@ -36,21 +90,24 @@ export async function assembleShardData(location, name, loadShard, loadShardRang
36
90
  }
37
91
 
38
92
  // Single shard - use view to avoid copying
93
+ const shardIndex = resolveLocationShardIndex(location, name);
94
+ const offset = validateLocationField(location, 'offset', name);
95
+ const size = validateLocationField(location, 'size', name);
39
96
  if (loadShardRange) {
40
- const slice = await loadShardRange(location.shardIndex, location.offset, location.size);
41
- if (location.size > slice.byteLength) {
97
+ const slice = await loadShardRange(shardIndex, offset, size);
98
+ if (size > slice.byteLength) {
42
99
  throw new Error(
43
- `[DopplerLoader] Shard ${location.shardIndex} too small for tensor "${name}" (offset=${location.offset}, size=${location.size}, shard=${slice.byteLength})`
100
+ `[DopplerLoader] Shard ${shardIndex} too small for tensor "${name}" (offset=${offset}, size=${size}, shard=${slice.byteLength})`
44
101
  );
45
102
  }
46
- return new Uint8Array(slice, 0, location.size);
103
+ return new Uint8Array(slice, 0, size);
47
104
  }
48
105
 
49
- const fullShard = await loadShard(location.shardIndex);
50
- if (location.offset + location.size > fullShard.byteLength) {
106
+ const fullShard = await loadShard(shardIndex);
107
+ if (offset + size > fullShard.byteLength) {
51
108
  throw new Error(
52
- `[DopplerLoader] Shard ${location.shardIndex} too small for tensor "${name}" (offset=${location.offset}, size=${location.size}, shard=${fullShard.byteLength})`
109
+ `[DopplerLoader] Shard ${shardIndex} too small for tensor "${name}" (offset=${offset}, size=${size}, shard=${fullShard.byteLength})`
53
110
  );
54
111
  }
55
- return new Uint8Array(fullShard, location.offset, location.size);
112
+ return new Uint8Array(fullShard, offset, size);
56
113
  }
@@ -47,7 +47,7 @@ export async function maybeDowncastToF16(buf, options) {
47
47
  }
48
48
 
49
49
  // Handle raw GPUBuffer
50
- if (buf instanceof GPUBuffer) {
50
+ if (typeof GPUBuffer !== 'undefined' && buf instanceof GPUBuffer) {
51
51
  return downcastGPUBuffer(buf, options);
52
52
  }
53
53
 
@@ -80,6 +80,12 @@ export declare class BufferPool {
80
80
  */
81
81
  release(buffer: GPUBuffer): void;
82
82
 
83
+ /**
84
+ * Force-dispose an active buffer instead of returning it to the pool.
85
+ * Use for error paths where the buffer contents or device state may be invalid.
86
+ */
87
+ discard(buffer: GPUBuffer): void;
88
+
83
89
  /**
84
90
  * Check if a buffer is currently tracked as active by the pool
85
91
  */
@@ -159,7 +165,8 @@ export declare class BufferPool {
159
165
  }
160
166
 
161
167
  /**
162
- * Get the global buffer pool
168
+ * Get the global buffer pool for the current device epoch.
169
+ * If the active device has changed or was lost, a fresh global pool is created.
163
170
  */
164
171
  export function getBufferPool(): BufferPool;
165
172
 
@@ -179,6 +186,7 @@ export declare const createUploadBuffer: (size: number) => GPUBuffer;
179
186
  export declare const createUniformBuffer: (size: number) => GPUBuffer;
180
187
  export declare const acquireBuffer: (size: number, usage?: GPUBufferUsageFlags, label?: string) => GPUBuffer;
181
188
  export declare const releaseBuffer: (buffer: GPUBuffer) => void;
189
+ export declare const discardBuffer: (buffer: GPUBuffer) => void;
182
190
  export declare const isBufferActive: (buffer: GPUBuffer) => boolean;
183
191
  export declare const getBufferRequestedSize: (buffer: GPUBuffer) => number;
184
192
  export declare const uploadData: (buffer: GPUBuffer, data: ArrayBuffer | ArrayBufferView, offset?: number) => void;