@simulatte/doppler 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +16 -23
  3. package/package.json +14 -1
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +1 -1
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +7 -5
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +12 -2
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +10 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  45. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  46. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  47. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  48. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  49. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  50. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  52. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  54. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  55. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  56. package/src/config/runtime.js +6 -1
  57. package/src/config/schema/debug.schema.d.ts +5 -0
  58. package/src/config/schema/doppler.schema.js +16 -21
  59. package/src/config/schema/inference-defaults.schema.js +3 -3
  60. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  61. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  62. package/src/config/schema/manifest.schema.d.ts +2 -1
  63. package/src/config/schema/manifest.schema.js +16 -3
  64. package/src/config/training-defaults.js +30 -22
  65. package/src/converter/conversion-plan.js +94 -9
  66. package/src/converter/core.d.ts +7 -0
  67. package/src/converter/core.js +14 -9
  68. package/src/converter/execution-v0-manifest.js +4 -1
  69. package/src/converter/index.d.ts +1 -0
  70. package/src/converter/index.js +1 -0
  71. package/src/converter/manifest-inference.js +43 -12
  72. package/src/converter/parsers/diffusion.js +0 -3
  73. package/src/converter/quantization-info.js +35 -15
  74. package/src/converter/shard-packer.d.ts +1 -1
  75. package/src/converter/shard-packer.js +4 -1
  76. package/src/debug/config.js +123 -11
  77. package/src/debug/signals.js +7 -1
  78. package/src/debug/tensor.d.ts +2 -0
  79. package/src/debug/tensor.js +13 -2
  80. package/src/distribution/p2p-control-plane.js +52 -12
  81. package/src/distribution/p2p-observability.js +43 -7
  82. package/src/distribution/p2p-webrtc-browser.js +20 -0
  83. package/src/distribution/shard-delivery.js +77 -26
  84. package/src/formats/gguf/types.js +33 -16
  85. package/src/formats/rdrr/groups.d.ts +12 -4
  86. package/src/formats/rdrr/groups.js +3 -6
  87. package/src/formats/rdrr/parsing.js +39 -2
  88. package/src/formats/rdrr/types.d.ts +2 -1
  89. package/src/gpu/command-recorder.js +86 -61
  90. package/src/gpu/device.d.ts +1 -0
  91. package/src/gpu/device.js +73 -19
  92. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  93. package/src/gpu/kernel-tuner/cache.js +71 -4
  94. package/src/gpu/kernel-tuner/tuner.js +22 -4
  95. package/src/gpu/kernels/attention.js +15 -34
  96. package/src/gpu/kernels/backward/adam.js +62 -58
  97. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  98. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  99. package/src/gpu/kernels/cast.js +191 -149
  100. package/src/gpu/kernels/check-stop.js +33 -44
  101. package/src/gpu/kernels/conv2d.js +27 -17
  102. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  103. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  104. package/src/gpu/kernels/dequant.js +178 -126
  105. package/src/gpu/kernels/energy.d.ts +3 -21
  106. package/src/gpu/kernels/energy.js +111 -88
  107. package/src/gpu/kernels/feature-check.js +1 -1
  108. package/src/gpu/kernels/fused_ffn.js +84 -65
  109. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  110. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  111. package/src/gpu/kernels/gather.js +33 -15
  112. package/src/gpu/kernels/gelu.js +19 -11
  113. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  114. package/src/gpu/kernels/groupnorm.js +34 -23
  115. package/src/gpu/kernels/kv-quantize.js +5 -2
  116. package/src/gpu/kernels/layernorm.js +35 -19
  117. package/src/gpu/kernels/logit-merge.js +5 -3
  118. package/src/gpu/kernels/matmul.js +58 -39
  119. package/src/gpu/kernels/modulate.js +23 -15
  120. package/src/gpu/kernels/moe.js +221 -175
  121. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  122. package/src/gpu/kernels/relu.js +18 -10
  123. package/src/gpu/kernels/repeat_channels.js +25 -17
  124. package/src/gpu/kernels/residual.js +37 -27
  125. package/src/gpu/kernels/rmsnorm.js +57 -41
  126. package/src/gpu/kernels/rope.js +3 -0
  127. package/src/gpu/kernels/sample.js +27 -38
  128. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  129. package/src/gpu/kernels/scale.js +18 -11
  130. package/src/gpu/kernels/shader-cache.js +4 -2
  131. package/src/gpu/kernels/silu.js +120 -72
  132. package/src/gpu/kernels/softmax.js +44 -25
  133. package/src/gpu/kernels/split_qkv.js +23 -13
  134. package/src/gpu/kernels/transpose.js +18 -10
  135. package/src/gpu/kernels/transpose.wgsl +5 -3
  136. package/src/gpu/kernels/upsample2d.js +21 -13
  137. package/src/gpu/kernels/utils.js +20 -13
  138. package/src/gpu/partitioned-buffer-pool.js +10 -2
  139. package/src/gpu/perf-guards.js +2 -9
  140. package/src/gpu/profiler.js +27 -22
  141. package/src/gpu/readback-utils.d.ts +16 -0
  142. package/src/gpu/readback-utils.js +41 -0
  143. package/src/gpu/submit-tracker.js +13 -0
  144. package/src/gpu/uniform-cache.d.ts +1 -0
  145. package/src/gpu/uniform-cache.js +30 -9
  146. package/src/hotswap/intent-bundle.js +6 -0
  147. package/src/hotswap/manifest.d.ts +10 -1
  148. package/src/hotswap/manifest.js +12 -2
  149. package/src/hotswap/runtime.js +30 -8
  150. package/src/index-browser.d.ts +44 -0
  151. package/src/index-browser.js +14 -0
  152. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  153. package/src/inference/browser-harness-contract-helpers.js +28 -0
  154. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  155. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  156. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  157. package/src/inference/browser-harness-model-helpers.js +217 -0
  158. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  159. package/src/inference/browser-harness-report-helpers.js +42 -0
  160. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  161. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  162. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  163. package/src/inference/browser-harness-suite-helpers.js +268 -0
  164. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  165. package/src/inference/browser-harness-text-helpers.js +788 -0
  166. package/src/inference/browser-harness.d.ts +6 -0
  167. package/src/inference/browser-harness.js +130 -1996
  168. package/src/inference/kv-cache/base.js +140 -94
  169. package/src/inference/kv-cache/tiered.js +5 -3
  170. package/src/inference/moe-router.js +88 -56
  171. package/src/inference/multi-model-network.js +5 -3
  172. package/src/inference/network-evolution.d.ts +11 -2
  173. package/src/inference/network-evolution.js +20 -21
  174. package/src/inference/pipelines/context.d.ts +3 -0
  175. package/src/inference/pipelines/context.js +142 -2
  176. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  177. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  178. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  179. package/src/inference/pipelines/diffusion/vae.js +3 -7
  180. package/src/inference/pipelines/energy/pipeline.js +27 -21
  181. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  182. package/src/inference/pipelines/energy/quintel.js +11 -0
  183. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  184. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  185. package/src/inference/pipelines/text/attention/projections.js +151 -101
  186. package/src/inference/pipelines/text/attention/record.js +62 -8
  187. package/src/inference/pipelines/text/attention/run.js +62 -8
  188. package/src/inference/pipelines/text/config.js +3 -4
  189. package/src/inference/pipelines/text/embed.js +2 -8
  190. package/src/inference/pipelines/text/execution-plan.js +41 -19
  191. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  192. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  193. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  194. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  195. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  196. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  197. package/src/inference/pipelines/text/generator-steps.js +298 -207
  198. package/src/inference/pipelines/text/generator.js +6 -23
  199. package/src/inference/pipelines/text/init.js +78 -20
  200. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  201. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  202. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  203. package/src/inference/pipelines/text/layer.js +3 -9
  204. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  205. package/src/inference/pipelines/text/linear-attention.js +80 -6
  206. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  207. package/src/inference/pipelines/text/logits/index.js +10 -11
  208. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  209. package/src/inference/pipelines/text/logits/utils.js +9 -0
  210. package/src/inference/pipelines/text/lora-apply.js +50 -32
  211. package/src/inference/pipelines/text/model-load.js +279 -104
  212. package/src/inference/pipelines/text/moe-cache.js +5 -4
  213. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  214. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  215. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  216. package/src/inference/pipelines/text/ops.js +90 -90
  217. package/src/inference/pipelines/text/probes.js +9 -9
  218. package/src/inference/pipelines/text/weights.js +17 -7
  219. package/src/inference/pipelines/text.js +13 -1
  220. package/src/inference/speculative.d.ts +2 -2
  221. package/src/inference/speculative.js +4 -18
  222. package/src/inference/test-harness.d.ts +1 -1
  223. package/src/inference/test-harness.js +15 -5
  224. package/src/inference/tokenizer.d.ts +0 -5
  225. package/src/inference/tokenizer.js +4 -23
  226. package/src/inference/tokenizers/bpe.js +9 -0
  227. package/src/inference/tokenizers/bundled.js +20 -0
  228. package/src/inference/tokenizers/sentencepiece.js +12 -0
  229. package/src/loader/doppler-loader.js +38 -22
  230. package/src/loader/dtype-utils.js +3 -44
  231. package/src/loader/embedding-loader.js +7 -3
  232. package/src/loader/experts/expert-cache.js +13 -6
  233. package/src/loader/experts/expert-loader.js +10 -6
  234. package/src/loader/final-weights-loader.js +8 -4
  235. package/src/loader/layer-loader.js +2 -1
  236. package/src/loader/loader-state.js +2 -2
  237. package/src/loader/memory-monitor.js +8 -0
  238. package/src/loader/multi-model-loader.d.ts +14 -0
  239. package/src/loader/multi-model-loader.js +70 -24
  240. package/src/loader/shard-cache.js +81 -12
  241. package/src/loader/shard-resolver.js +25 -3
  242. package/src/loader/tensors/tensor-loader.js +209 -144
  243. package/src/loader/tensors/tensor-reader.js +76 -19
  244. package/src/loader/weight-downcast.js +1 -1
  245. package/src/memory/buffer-pool.d.ts +9 -1
  246. package/src/memory/buffer-pool.js +109 -44
  247. package/src/memory/unified-detect.js +1 -1
  248. package/src/rules/inference/kernel-path.rules.json +24 -8
  249. package/src/rules/rule-registry.js +25 -1
  250. package/src/storage/backends/opfs-store.js +68 -24
  251. package/src/storage/downloader.js +364 -83
  252. package/src/storage/index.d.ts +3 -0
  253. package/src/storage/index.js +3 -0
  254. package/src/storage/preflight.d.ts +2 -2
  255. package/src/storage/preflight.js +24 -2
  256. package/src/storage/quickstart-downloader.js +11 -5
  257. package/src/storage/registry.js +10 -4
  258. package/src/storage/reports.js +1 -1
  259. package/src/storage/shard-manager.d.ts +15 -1
  260. package/src/storage/shard-manager.js +51 -3
  261. package/src/storage/source-artifact-store.d.ts +52 -0
  262. package/src/storage/source-artifact-store.js +234 -0
  263. package/src/tooling/command-api-constants.d.ts +9 -0
  264. package/src/tooling/command-api-constants.js +9 -0
  265. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  266. package/src/tooling/command-api-family-normalizers.js +343 -0
  267. package/src/tooling/command-api-helpers.d.ts +25 -0
  268. package/src/tooling/command-api-helpers.js +262 -0
  269. package/src/tooling/command-api.js +16 -602
  270. package/src/tooling/command-envelope.js +4 -1
  271. package/src/tooling/command-runner-shared.js +52 -18
  272. package/src/tooling/lean-execution-contract.js +150 -3
  273. package/src/tooling/node-browser-command-runner.js +161 -271
  274. package/src/tooling/node-command-runner.js +29 -3
  275. package/src/tooling/node-converter.js +27 -1
  276. package/src/tooling/node-source-runtime.d.ts +1 -1
  277. package/src/tooling/node-source-runtime.js +84 -3
  278. package/src/tooling/node-webgpu.js +24 -21
  279. package/src/tooling/opfs-cache.js +21 -4
  280. package/src/tooling/runtime-input-composition.d.ts +38 -0
  281. package/src/tooling/runtime-input-composition.js +86 -0
  282. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  283. package/src/tooling/source-runtime-bundle.js +261 -34
  284. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  285. package/src/tooling/source-runtime-materializer.js +93 -0
  286. package/src/training/attention-backward.js +32 -17
  287. package/src/training/autograd.js +80 -52
  288. package/src/training/checkpoint-watch.d.ts +2 -1
  289. package/src/training/checkpoint-watch.js +39 -6
  290. package/src/training/checkpoint.js +40 -11
  291. package/src/training/clip.js +2 -1
  292. package/src/training/datasets/token-batch.js +20 -8
  293. package/src/training/distillation/checkpoint-watch.js +1 -0
  294. package/src/training/distillation/student-fixture.d.ts +22 -0
  295. package/src/training/distillation/student-fixture.js +846 -0
  296. package/src/training/distillation/suite-data.d.ts +45 -0
  297. package/src/training/distillation/suite-data.js +189 -0
  298. package/src/training/lora-pipeline.js +4 -7
  299. package/src/training/lora.js +26 -12
  300. package/src/training/loss.js +5 -6
  301. package/src/training/objectives/cross_entropy.js +2 -5
  302. package/src/training/objectives/distill_kd.js +4 -8
  303. package/src/training/objectives/distill_triplet.js +4 -8
  304. package/src/training/objectives/ul_stage2_base.js +4 -8
  305. package/src/training/operator-command.js +2 -0
  306. package/src/training/optimizer.js +19 -7
  307. package/src/training/runner.js +2 -1
  308. package/src/training/suite.js +18 -978
  309. package/src/training/tensor-factory.d.ts +9 -0
  310. package/src/training/tensor-factory.js +13 -0
  311. package/src/training/trainer.js +3 -5
  312. package/src/training/ul_dataset.js +3 -5
  313. package/src/training/workloads.js +70 -79
  314. package/src/version.js +1 -1
  315. package/tools/convert-safetensors-node.js +22 -16
  316. package/tools/doppler-cli.js +44 -25
@@ -23,6 +23,7 @@ export class ShardCache {
23
23
  #inFlightLoads = 0;
24
24
  #highPriorityQueue = [];
25
25
  #lowPriorityQueue = [];
26
+ #epoch = 0;
26
27
 
27
28
  lastSource = null;
28
29
 
@@ -123,6 +124,7 @@ export class ShardCache {
123
124
  const shardInfo = this.#manifest?.shards?.[shardIndex];
124
125
  const sizeStr = shardInfo ? formatBytes(shardInfo.size) : '';
125
126
  const priority = options.priority === 'low' ? 'low' : 'high';
127
+ const epoch = this.#epoch;
126
128
 
127
129
  // 1. Check cache first
128
130
  if (this.#cache.has(shardIndex)) {
@@ -136,24 +138,29 @@ export class ShardCache {
136
138
  }
137
139
 
138
140
  // 2. Check if fetch is already in-flight - deduplicate concurrent requests
139
- if (this.#fetchPromises.has(shardIndex)) {
141
+ const inFlight = this.#fetchPromises.get(shardIndex);
142
+ if (inFlight && inFlight.epoch === epoch) {
140
143
  log.verbose('ShardCache', `Shard ${shardIndex}: waiting for in-flight fetch`);
141
- return this.#fetchPromises.get(shardIndex);
144
+ return inFlight.promise;
142
145
  }
143
146
 
144
147
  // 3. Start the actual fetch and store the promise for deduplication
145
148
  const fetchPromise = this.#scheduleLoad(
146
149
  priority,
147
- () => this.#doLoad(shardIndex, sizeStr)
150
+ epoch,
151
+ () => this.#doLoad(shardIndex, sizeStr, epoch)
148
152
  );
149
- this.#fetchPromises.set(shardIndex, fetchPromise);
153
+ const fetchEntry = { epoch, promise: fetchPromise };
154
+ this.#fetchPromises.set(shardIndex, fetchEntry);
150
155
 
151
156
  try {
152
157
  const result = await fetchPromise;
153
158
  return result;
154
159
  } finally {
155
160
  // Remove from in-flight map when done (success or error)
156
- this.#fetchPromises.delete(shardIndex);
161
+ if (this.#fetchPromises.get(shardIndex) === fetchEntry) {
162
+ this.#fetchPromises.delete(shardIndex);
163
+ }
157
164
  }
158
165
  }
159
166
 
@@ -195,6 +202,13 @@ export class ShardCache {
195
202
  throw new Error('Custom shard loader must return ArrayBuffer or Uint8Array.');
196
203
  }
197
204
 
205
+ #throwShortStreamRead(shardIndex, start, want, produced, path) {
206
+ throw new Error(
207
+ `Shard ${shardIndex} short stream read via ${path}: ` +
208
+ `offset=${start}, expected=${want}, got=${produced}.`
209
+ );
210
+ }
211
+
198
212
  async loadRange(shardIndex, offset = 0, length = null, options = {}) {
199
213
  const start = this.#toRangeOffset(offset);
200
214
  const want = length == null ? null : this.#toRangeOffset(length);
@@ -276,9 +290,15 @@ export class ShardCache {
276
290
  this.#setLastSource('RAM', 0, 'stream', 'cache');
277
291
  const view = new Uint8Array(cached);
278
292
  const end = want == null ? view.length : Math.min(view.length, start + want);
293
+ let produced = 0;
279
294
  for (let cursor = start; cursor < end; cursor += chunkBytes) {
280
295
  const sliceEnd = Math.min(end, cursor + chunkBytes);
281
- yield view.slice(cursor, sliceEnd);
296
+ const chunk = view.slice(cursor, sliceEnd);
297
+ produced += chunk.byteLength;
298
+ yield chunk;
299
+ }
300
+ if (want != null && produced < want) {
301
+ this.#throwShortStreamRead(shardIndex, start, want, produced, 'cache');
282
302
  }
283
303
  return;
284
304
  }
@@ -323,6 +343,15 @@ export class ShardCache {
323
343
  resumed += bytes.byteLength;
324
344
  yield bytes;
325
345
  }
346
+ if (want != null && produced + resumed < want) {
347
+ this.#throwShortStreamRead(
348
+ shardIndex,
349
+ start,
350
+ want,
351
+ produced + resumed,
352
+ 'custom-range-fallback'
353
+ );
354
+ }
326
355
  const elapsed = (performance.now() - streamStart) / 1000;
327
356
  this.#setLastSource(
328
357
  'custom',
@@ -358,6 +387,15 @@ export class ShardCache {
358
387
  resumed += bytes.byteLength;
359
388
  yield bytes;
360
389
  }
390
+ if (produced + resumed < want) {
391
+ this.#throwShortStreamRead(
392
+ shardIndex,
393
+ start,
394
+ want,
395
+ produced + resumed,
396
+ 'custom-range-fallback'
397
+ );
398
+ }
361
399
  const elapsed = (performance.now() - streamStart) / 1000;
362
400
  this.#setLastSource(
363
401
  'custom',
@@ -369,6 +407,9 @@ export class ShardCache {
369
407
  return;
370
408
  }
371
409
 
410
+ if (want != null && produced < want) {
411
+ this.#throwShortStreamRead(shardIndex, start, want, produced, 'custom-stream');
412
+ }
372
413
  const elapsed = (performance.now() - streamStart) / 1000;
373
414
  this.#setLastSource('custom', elapsed, 'stream', 'custom-stream');
374
415
  return;
@@ -403,6 +444,9 @@ export class ShardCache {
403
444
  }
404
445
  }
405
446
  }
447
+ if (want != null && produced < want) {
448
+ this.#throwShortStreamRead(shardIndex, start, want, produced, 'custom-range');
449
+ }
406
450
  this.#setLastSource(
407
451
  'custom',
408
452
  (performance.now() - rangeStart) / 1000,
@@ -414,8 +458,14 @@ export class ShardCache {
414
458
  }
415
459
 
416
460
  const streamStart = performance.now();
461
+ let produced = 0;
417
462
  for await (const chunk of streamShardRangeFromStore(shardIndex, start, want, { chunkBytes })) {
418
- yield chunk instanceof Uint8Array ? chunk : new Uint8Array(chunk);
463
+ const bytes = chunk instanceof Uint8Array ? chunk : new Uint8Array(chunk);
464
+ produced += bytes.byteLength;
465
+ yield bytes;
466
+ }
467
+ if (want != null && produced < want) {
468
+ this.#throwShortStreamRead(shardIndex, start, want, produced, 'backend-stream');
419
469
  }
420
470
  const elapsed = (performance.now() - streamStart) / 1000;
421
471
  const backend = getStorageBackendType() ?? 'storage';
@@ -426,7 +476,7 @@ export class ShardCache {
426
476
  return this.load(shardIndex, { priority: 'low' });
427
477
  }
428
478
 
429
- async #doLoad(shardIndex, sizeStr) {
479
+ async #doLoad(shardIndex, sizeStr, epoch) {
430
480
  if (this.#customLoader) {
431
481
  const startTime = performance.now();
432
482
  let data = await this.#customLoader(shardIndex);
@@ -453,7 +503,9 @@ export class ShardCache {
453
503
  // Normalize to ArrayBuffer for downstream slicing
454
504
  const arrayBuffer = this.#toArrayBuffer(data);
455
505
 
456
- this.#add(shardIndex, arrayBuffer);
506
+ if (epoch === this.#epoch) {
507
+ this.#add(shardIndex, arrayBuffer);
508
+ }
457
509
 
458
510
  const elapsed = (performance.now() - startTime) / 1000;
459
511
  this.#setLastSource('custom', elapsed, 'full', 'custom-loader');
@@ -463,7 +515,9 @@ export class ShardCache {
463
515
 
464
516
  const storageStart = performance.now();
465
517
  const data = await loadShardFromStore(shardIndex);
466
- this.#add(shardIndex, data);
518
+ if (epoch === this.#epoch) {
519
+ this.#add(shardIndex, data);
520
+ }
467
521
  const elapsed = (performance.now() - storageStart) / 1000;
468
522
  const backend = getStorageBackendType() ?? 'storage';
469
523
  this.#setLastSource(backend, elapsed, 'full', 'backend-full');
@@ -471,12 +525,15 @@ export class ShardCache {
471
525
  return data;
472
526
  }
473
527
 
474
- async #scheduleLoad(priority, task) {
528
+ async #scheduleLoad(priority, epoch, task) {
475
529
  const limit = this.#maxConcurrentLoads > 0
476
530
  ? this.#maxConcurrentLoads
477
531
  : Number.POSITIVE_INFINITY;
478
532
 
479
533
  if (this.#inFlightLoads < limit) {
534
+ if (epoch !== this.#epoch) {
535
+ throw new Error('Shard load invalidated by cache clear().');
536
+ }
480
537
  this.#inFlightLoads++;
481
538
  try {
482
539
  return await task();
@@ -487,7 +544,7 @@ export class ShardCache {
487
544
  }
488
545
 
489
546
  return new Promise((resolve, reject) => {
490
- const entry = { task, resolve, reject };
547
+ const entry = { task, resolve, reject, epoch };
491
548
  if (priority === 'low') {
492
549
  this.#lowPriorityQueue.push(entry);
493
550
  } else {
@@ -504,6 +561,10 @@ export class ShardCache {
504
561
  while (this.#inFlightLoads < limit) {
505
562
  const entry = this.#highPriorityQueue.shift() ?? this.#lowPriorityQueue.shift();
506
563
  if (!entry) return;
564
+ if (entry.epoch !== this.#epoch) {
565
+ entry.reject(new Error('Shard load invalidated by cache clear().'));
566
+ continue;
567
+ }
507
568
 
508
569
  this.#inFlightLoads++;
509
570
  Promise.resolve()
@@ -529,6 +590,14 @@ export class ShardCache {
529
590
  clear() {
530
591
  const count = this.#cache.size;
531
592
  const bytes = this.totalBytes;
593
+ this.#epoch++;
594
+ const queued = [...this.#highPriorityQueue, ...this.#lowPriorityQueue];
595
+ this.#highPriorityQueue = [];
596
+ this.#lowPriorityQueue = [];
597
+ this.#fetchPromises.clear();
598
+ for (const entry of queued) {
599
+ entry.reject(new Error('Shard load invalidated by cache clear().'));
600
+ }
532
601
  this.#cache.clear();
533
602
  debugTrace.loader(`Cleared shard cache: ${count} shards, ${formatBytes(bytes)} freed`);
534
603
  }
@@ -2,6 +2,28 @@ import { loadTensorsFromStore } from '../storage/shard-manager.js';
2
2
  import { parseTensorMap } from '../formats/rdrr/index.js';
3
3
  import { log, trace as debugTrace } from '../debug/index.js';
4
4
 
5
+ function normalizeLocationSpans(spans, name, sourceLabel) {
6
+ if (spans === undefined) {
7
+ return undefined;
8
+ }
9
+ if (!Array.isArray(spans)) {
10
+ throw new Error(`Tensor "${name}" has invalid spans in ${sourceLabel}`);
11
+ }
12
+ return spans.map((span, spanIndex) => {
13
+ const shardIndex = typeof span?.shardIndex === 'number'
14
+ ? span.shardIndex
15
+ : span?.shard;
16
+ if (typeof shardIndex !== 'number') {
17
+ throw new Error(`Tensor "${name}" span[${spanIndex}] missing shard index in ${sourceLabel}`);
18
+ }
19
+ return {
20
+ shardIndex,
21
+ offset: span.offset,
22
+ size: span.size,
23
+ };
24
+ });
25
+ }
26
+
5
27
  export async function buildTensorLocations(manifest, options = {}) {
6
28
  const locations = new Map();
7
29
 
@@ -37,14 +59,14 @@ export async function buildTensorLocations(manifest, options = {}) {
37
59
  throw new Error(`Tensor "${name}" missing role in tensors.json`);
38
60
  }
39
61
  locations.set(name, {
40
- shardIndex: info.shard,
62
+ shardIndex: info.shardIndex ?? info.shard,
41
63
  offset: info.offset,
42
64
  size: info.size,
43
65
  shape: info.shape,
44
66
  dtype: info.dtype,
45
67
  role: info.role,
46
68
  group: info.group,
47
- spans: info.spans,
69
+ spans: normalizeLocationSpans(info.spans, name, 'tensors.json'),
48
70
  layout: info.layout,
49
71
  originalShape: info.originalShape,
50
72
  });
@@ -73,7 +95,7 @@ export async function buildTensorLocations(manifest, options = {}) {
73
95
  dtype: tensorInfo.dtype,
74
96
  role: tensorInfo.role,
75
97
  group: tensorInfo.group,
76
- spans: tensorInfo.spans,
98
+ spans: normalizeLocationSpans(tensorInfo.spans, name, 'manifest.tensors'),
77
99
  layout: tensorInfo.layout,
78
100
  originalShape: tensorInfo.originalShape,
79
101
  });