@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -5,6 +5,7 @@ import {
5
5
  computeHash,
6
6
  getStorageBackendType,
7
7
  } from '../storage/shard-manager.js';
8
+ import { getExpectedShardHash } from '../formats/rdrr/index.js';
8
9
  import { formatBytes } from '../storage/quota.js';
9
10
  import { log, trace as debugTrace } from '../debug/index.js';
10
11
  import { getRuntimeConfig } from '../config/runtime.js';
@@ -23,6 +24,7 @@ export class ShardCache {
23
24
  #inFlightLoads = 0;
24
25
  #highPriorityQueue = [];
25
26
  #lowPriorityQueue = [];
27
+ #epoch = 0;
26
28
 
27
29
  lastSource = null;
28
30
 
@@ -123,6 +125,7 @@ export class ShardCache {
123
125
  const shardInfo = this.#manifest?.shards?.[shardIndex];
124
126
  const sizeStr = shardInfo ? formatBytes(shardInfo.size) : '';
125
127
  const priority = options.priority === 'low' ? 'low' : 'high';
128
+ const epoch = this.#epoch;
126
129
 
127
130
  // 1. Check cache first
128
131
  if (this.#cache.has(shardIndex)) {
@@ -136,24 +139,29 @@ export class ShardCache {
136
139
  }
137
140
 
138
141
  // 2. Check if fetch is already in-flight - deduplicate concurrent requests
139
- if (this.#fetchPromises.has(shardIndex)) {
142
+ const inFlight = this.#fetchPromises.get(shardIndex);
143
+ if (inFlight && inFlight.epoch === epoch) {
140
144
  log.verbose('ShardCache', `Shard ${shardIndex}: waiting for in-flight fetch`);
141
- return this.#fetchPromises.get(shardIndex);
145
+ return inFlight.promise;
142
146
  }
143
147
 
144
148
  // 3. Start the actual fetch and store the promise for deduplication
145
149
  const fetchPromise = this.#scheduleLoad(
146
150
  priority,
147
- () => this.#doLoad(shardIndex, sizeStr)
151
+ epoch,
152
+ () => this.#doLoad(shardIndex, sizeStr, epoch)
148
153
  );
149
- this.#fetchPromises.set(shardIndex, fetchPromise);
154
+ const fetchEntry = { epoch, promise: fetchPromise };
155
+ this.#fetchPromises.set(shardIndex, fetchEntry);
150
156
 
151
157
  try {
152
158
  const result = await fetchPromise;
153
159
  return result;
154
160
  } finally {
155
161
  // Remove from in-flight map when done (success or error)
156
- this.#fetchPromises.delete(shardIndex);
162
+ if (this.#fetchPromises.get(shardIndex) === fetchEntry) {
163
+ this.#fetchPromises.delete(shardIndex);
164
+ }
157
165
  }
158
166
  }
159
167
 
@@ -195,6 +203,13 @@ export class ShardCache {
195
203
  throw new Error('Custom shard loader must return ArrayBuffer or Uint8Array.');
196
204
  }
197
205
 
206
+ #throwShortStreamRead(shardIndex, start, want, produced, path) {
207
+ throw new Error(
208
+ `Shard ${shardIndex} short stream read via ${path}: ` +
209
+ `offset=${start}, expected=${want}, got=${produced}.`
210
+ );
211
+ }
212
+
198
213
  async loadRange(shardIndex, offset = 0, length = null, options = {}) {
199
214
  const start = this.#toRangeOffset(offset);
200
215
  const want = length == null ? null : this.#toRangeOffset(length);
@@ -276,9 +291,15 @@ export class ShardCache {
276
291
  this.#setLastSource('RAM', 0, 'stream', 'cache');
277
292
  const view = new Uint8Array(cached);
278
293
  const end = want == null ? view.length : Math.min(view.length, start + want);
294
+ let produced = 0;
279
295
  for (let cursor = start; cursor < end; cursor += chunkBytes) {
280
296
  const sliceEnd = Math.min(end, cursor + chunkBytes);
281
- yield view.slice(cursor, sliceEnd);
297
+ const chunk = view.slice(cursor, sliceEnd);
298
+ produced += chunk.byteLength;
299
+ yield chunk;
300
+ }
301
+ if (want != null && produced < want) {
302
+ this.#throwShortStreamRead(shardIndex, start, want, produced, 'cache');
282
303
  }
283
304
  return;
284
305
  }
@@ -323,6 +344,15 @@ export class ShardCache {
323
344
  resumed += bytes.byteLength;
324
345
  yield bytes;
325
346
  }
347
+ if (want != null && produced + resumed < want) {
348
+ this.#throwShortStreamRead(
349
+ shardIndex,
350
+ start,
351
+ want,
352
+ produced + resumed,
353
+ 'custom-range-fallback'
354
+ );
355
+ }
326
356
  const elapsed = (performance.now() - streamStart) / 1000;
327
357
  this.#setLastSource(
328
358
  'custom',
@@ -358,6 +388,15 @@ export class ShardCache {
358
388
  resumed += bytes.byteLength;
359
389
  yield bytes;
360
390
  }
391
+ if (produced + resumed < want) {
392
+ this.#throwShortStreamRead(
393
+ shardIndex,
394
+ start,
395
+ want,
396
+ produced + resumed,
397
+ 'custom-range-fallback'
398
+ );
399
+ }
361
400
  const elapsed = (performance.now() - streamStart) / 1000;
362
401
  this.#setLastSource(
363
402
  'custom',
@@ -369,6 +408,9 @@ export class ShardCache {
369
408
  return;
370
409
  }
371
410
 
411
+ if (want != null && produced < want) {
412
+ this.#throwShortStreamRead(shardIndex, start, want, produced, 'custom-stream');
413
+ }
372
414
  const elapsed = (performance.now() - streamStart) / 1000;
373
415
  this.#setLastSource('custom', elapsed, 'stream', 'custom-stream');
374
416
  return;
@@ -403,6 +445,9 @@ export class ShardCache {
403
445
  }
404
446
  }
405
447
  }
448
+ if (want != null && produced < want) {
449
+ this.#throwShortStreamRead(shardIndex, start, want, produced, 'custom-range');
450
+ }
406
451
  this.#setLastSource(
407
452
  'custom',
408
453
  (performance.now() - rangeStart) / 1000,
@@ -414,8 +459,14 @@ export class ShardCache {
414
459
  }
415
460
 
416
461
  const streamStart = performance.now();
462
+ let produced = 0;
417
463
  for await (const chunk of streamShardRangeFromStore(shardIndex, start, want, { chunkBytes })) {
418
- yield chunk instanceof Uint8Array ? chunk : new Uint8Array(chunk);
464
+ const bytes = chunk instanceof Uint8Array ? chunk : new Uint8Array(chunk);
465
+ produced += bytes.byteLength;
466
+ yield bytes;
467
+ }
468
+ if (want != null && produced < want) {
469
+ this.#throwShortStreamRead(shardIndex, start, want, produced, 'backend-stream');
419
470
  }
420
471
  const elapsed = (performance.now() - streamStart) / 1000;
421
472
  const backend = getStorageBackendType() ?? 'storage';
@@ -426,7 +477,7 @@ export class ShardCache {
426
477
  return this.load(shardIndex, { priority: 'low' });
427
478
  }
428
479
 
429
- async #doLoad(shardIndex, sizeStr) {
480
+ async #doLoad(shardIndex, sizeStr, epoch) {
430
481
  if (this.#customLoader) {
431
482
  const startTime = performance.now();
432
483
  let data = await this.#customLoader(shardIndex);
@@ -434,11 +485,11 @@ export class ShardCache {
434
485
  // Verify hash if enabled
435
486
  if (this.#verifyHashes && this.#manifest) {
436
487
  const shardInfo = this.#manifest.shards?.[shardIndex];
437
- const expectedHash = shardInfo?.hash;
488
+ const algorithm = shardInfo?.hashAlgorithm ?? this.#manifest.hashAlgorithm;
489
+ const expectedHash = getExpectedShardHash(shardInfo, algorithm);
438
490
  if (!expectedHash) {
439
491
  throw new Error(`Shard ${shardIndex} missing hash in manifest.`);
440
492
  }
441
- const algorithm = shardInfo?.hashAlgorithm ?? this.#manifest.hashAlgorithm;
442
493
  if (!algorithm) {
443
494
  throw new Error(`Manifest missing hashAlgorithm for shard ${shardIndex}.`);
444
495
  }
@@ -453,7 +504,9 @@ export class ShardCache {
453
504
  // Normalize to ArrayBuffer for downstream slicing
454
505
  const arrayBuffer = this.#toArrayBuffer(data);
455
506
 
456
- this.#add(shardIndex, arrayBuffer);
507
+ if (epoch === this.#epoch) {
508
+ this.#add(shardIndex, arrayBuffer);
509
+ }
457
510
 
458
511
  const elapsed = (performance.now() - startTime) / 1000;
459
512
  this.#setLastSource('custom', elapsed, 'full', 'custom-loader');
@@ -463,7 +516,9 @@ export class ShardCache {
463
516
 
464
517
  const storageStart = performance.now();
465
518
  const data = await loadShardFromStore(shardIndex);
466
- this.#add(shardIndex, data);
519
+ if (epoch === this.#epoch) {
520
+ this.#add(shardIndex, data);
521
+ }
467
522
  const elapsed = (performance.now() - storageStart) / 1000;
468
523
  const backend = getStorageBackendType() ?? 'storage';
469
524
  this.#setLastSource(backend, elapsed, 'full', 'backend-full');
@@ -471,12 +526,15 @@ export class ShardCache {
471
526
  return data;
472
527
  }
473
528
 
474
- async #scheduleLoad(priority, task) {
529
+ async #scheduleLoad(priority, epoch, task) {
475
530
  const limit = this.#maxConcurrentLoads > 0
476
531
  ? this.#maxConcurrentLoads
477
532
  : Number.POSITIVE_INFINITY;
478
533
 
479
534
  if (this.#inFlightLoads < limit) {
535
+ if (epoch !== this.#epoch) {
536
+ throw new Error('Shard load invalidated by cache clear().');
537
+ }
480
538
  this.#inFlightLoads++;
481
539
  try {
482
540
  return await task();
@@ -487,7 +545,7 @@ export class ShardCache {
487
545
  }
488
546
 
489
547
  return new Promise((resolve, reject) => {
490
- const entry = { task, resolve, reject };
548
+ const entry = { task, resolve, reject, epoch };
491
549
  if (priority === 'low') {
492
550
  this.#lowPriorityQueue.push(entry);
493
551
  } else {
@@ -504,6 +562,10 @@ export class ShardCache {
504
562
  while (this.#inFlightLoads < limit) {
505
563
  const entry = this.#highPriorityQueue.shift() ?? this.#lowPriorityQueue.shift();
506
564
  if (!entry) return;
565
+ if (entry.epoch !== this.#epoch) {
566
+ entry.reject(new Error('Shard load invalidated by cache clear().'));
567
+ continue;
568
+ }
507
569
 
508
570
  this.#inFlightLoads++;
509
571
  Promise.resolve()
@@ -529,6 +591,14 @@ export class ShardCache {
529
591
  clear() {
530
592
  const count = this.#cache.size;
531
593
  const bytes = this.totalBytes;
594
+ this.#epoch++;
595
+ const queued = [...this.#highPriorityQueue, ...this.#lowPriorityQueue];
596
+ this.#highPriorityQueue = [];
597
+ this.#lowPriorityQueue = [];
598
+ this.#fetchPromises.clear();
599
+ for (const entry of queued) {
600
+ entry.reject(new Error('Shard load invalidated by cache clear().'));
601
+ }
532
602
  this.#cache.clear();
533
603
  debugTrace.loader(`Cleared shard cache: ${count} shards, ${formatBytes(bytes)} freed`);
534
604
  }
@@ -2,6 +2,28 @@ import { loadTensorsFromStore } from '../storage/shard-manager.js';
2
2
  import { parseTensorMap } from '../formats/rdrr/index.js';
3
3
  import { log, trace as debugTrace } from '../debug/index.js';
4
4
 
5
+ function normalizeLocationSpans(spans, name, sourceLabel) {
6
+ if (spans === undefined) {
7
+ return undefined;
8
+ }
9
+ if (!Array.isArray(spans)) {
10
+ throw new Error(`Tensor "${name}" has invalid spans in ${sourceLabel}`);
11
+ }
12
+ return spans.map((span, spanIndex) => {
13
+ const shardIndex = typeof span?.shardIndex === 'number'
14
+ ? span.shardIndex
15
+ : span?.shard;
16
+ if (typeof shardIndex !== 'number') {
17
+ throw new Error(`Tensor "${name}" span[${spanIndex}] missing shard index in ${sourceLabel}`);
18
+ }
19
+ return {
20
+ shardIndex,
21
+ offset: span.offset,
22
+ size: span.size,
23
+ };
24
+ });
25
+ }
26
+
5
27
  export async function buildTensorLocations(manifest, options = {}) {
6
28
  const locations = new Map();
7
29
 
@@ -37,14 +59,14 @@ export async function buildTensorLocations(manifest, options = {}) {
37
59
  throw new Error(`Tensor "${name}" missing role in tensors.json`);
38
60
  }
39
61
  locations.set(name, {
40
- shardIndex: info.shard,
62
+ shardIndex: info.shardIndex ?? info.shard,
41
63
  offset: info.offset,
42
64
  size: info.size,
43
65
  shape: info.shape,
44
66
  dtype: info.dtype,
45
67
  role: info.role,
46
68
  group: info.group,
47
- spans: info.spans,
69
+ spans: normalizeLocationSpans(info.spans, name, 'tensors.json'),
48
70
  layout: info.layout,
49
71
  originalShape: info.originalShape,
50
72
  });
@@ -73,7 +95,7 @@ export async function buildTensorLocations(manifest, options = {}) {
73
95
  dtype: tensorInfo.dtype,
74
96
  role: tensorInfo.role,
75
97
  group: tensorInfo.group,
76
- spans: tensorInfo.spans,
98
+ spans: normalizeLocationSpans(tensorInfo.spans, name, 'manifest.tensors'),
77
99
  layout: tensorInfo.layout,
78
100
  originalShape: tensorInfo.originalShape,
79
101
  });