@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -1,4 +1,5 @@
1
1
  import { log } from '../debug/index.js';
2
+ import { getExpectedShardHash } from '../formats/rdrr/index.js';
2
3
  import {
3
4
  computeHash,
4
5
  createStreamingHasher,
@@ -55,22 +56,30 @@ const inFlightDeliveries = new Map();
55
56
  const p2pTransportPolicyState = new WeakMap();
56
57
 
57
58
  function normalizeDistributionSourceOrder(rawSources = []) {
58
- if (!Array.isArray(rawSources)) {
59
+ if (rawSources === undefined || rawSources === null) {
59
60
  return [...DISTRIBUTION_SOURCES];
60
61
  }
62
+ if (!Array.isArray(rawSources)) {
63
+ throw new Error('distribution.sourceOrder must be an array when provided.');
64
+ }
61
65
 
62
66
  const normalized = [];
63
67
  const seen = new Set();
64
68
 
65
69
  for (const value of rawSources) {
66
70
  const source = String(value || '').trim().toLowerCase();
67
- if (!DISTRIBUTION_SOURCES.includes(source)) continue;
71
+ if (!DISTRIBUTION_SOURCES.includes(source)) {
72
+ throw new Error(`distribution.sourceOrder contains unsupported source "${source || value}".`);
73
+ }
68
74
  if (seen.has(source)) continue;
69
75
  seen.add(source);
70
76
  normalized.push(source);
71
77
  }
72
78
 
73
- return normalized.length > 0 ? normalized : [...DISTRIBUTION_SOURCES];
79
+ if (normalized.length === 0) {
80
+ throw new Error('distribution.sourceOrder must include at least one supported source.');
81
+ }
82
+ return normalized;
74
83
  }
75
84
 
76
85
  function normalizeInteger(value, fallback, allowZero = false) {
@@ -81,6 +90,23 @@ function normalizeInteger(value, fallback, allowZero = false) {
81
90
  : fallback;
82
91
  }
83
92
 
93
+ function normalizeRequiredInteger(value, label, { allowZero = false, fallback = null } = {}) {
94
+ if (value === undefined || value === null) {
95
+ if (fallback !== null) {
96
+ return fallback;
97
+ }
98
+ throw new Error(`${label} is required.`);
99
+ }
100
+ const parsed = Number(value);
101
+ const min = allowZero ? 0 : 1;
102
+ if (!Number.isInteger(parsed) || parsed < min) {
103
+ throw new Error(
104
+ `${label} must be a ${allowZero ? 'non-negative' : 'positive'} integer when provided.`
105
+ );
106
+ }
107
+ return parsed;
108
+ }
109
+
84
110
  function normalizeContentEncodings(value) {
85
111
  if (!value) return [];
86
112
  return value
@@ -95,13 +121,17 @@ function normalizeManifestVersionSet(value) {
95
121
  return normalized || null;
96
122
  }
97
123
 
98
- function normalizeSamplingRate(value, fallback = 1) {
124
+ function normalizeSamplingRate(value, fallback = 1, label = 'distribution.sourceDecision.trace.samplingRate') {
125
+ if (value === undefined || value === null) {
126
+ return fallback;
127
+ }
99
128
  const parsed = Number(value);
100
129
  if (!Number.isFinite(parsed)) {
101
- return fallback;
130
+ throw new Error(`${label} must be a finite number between 0 and 1 when provided.`);
131
+ }
132
+ if (parsed < 0 || parsed > 1) {
133
+ throw new Error(`${label} must be between 0 and 1 when provided.`);
102
134
  }
103
- if (parsed <= 0) return 0;
104
- if (parsed >= 1) return 1;
105
135
  return parsed;
106
136
  }
107
137
 
@@ -479,19 +509,28 @@ function normalizeP2PConfig(config = {}) {
479
509
 
480
510
  return {
481
511
  enabled,
482
- timeoutMs: normalizeInteger(rawTimeoutMs, DEFAULT_P2P_TIMEOUT_MS),
483
- maxRetries: normalizeInteger(rawMaxRetries, DEFAULT_P2P_MAX_RETRIES, true),
484
- retryDelayMs: normalizeInteger(rawRetryDelayMs, DEFAULT_P2P_RETRY_DELAY_MS, true),
512
+ timeoutMs: normalizeRequiredInteger(
513
+ rawTimeoutMs,
514
+ 'distribution.p2p.timeoutMs',
515
+ { fallback: DEFAULT_P2P_TIMEOUT_MS }
516
+ ),
517
+ maxRetries: normalizeRequiredInteger(
518
+ rawMaxRetries,
519
+ 'distribution.p2p.maxRetries',
520
+ { allowZero: true, fallback: DEFAULT_P2P_MAX_RETRIES }
521
+ ),
522
+ retryDelayMs: normalizeRequiredInteger(
523
+ rawRetryDelayMs,
524
+ 'distribution.p2p.retryDelayMs',
525
+ { allowZero: true, fallback: DEFAULT_P2P_RETRY_DELAY_MS }
526
+ ),
485
527
  transport,
486
528
  contractVersion,
487
529
  controlPlane: normalizeP2PControlPlaneConfig({
488
530
  ...DEFAULT_DISTRIBUTION_CONFIG.p2p.controlPlane,
489
531
  ...rawControlPlane,
490
- tokenRefreshSkewMs: normalizeInteger(
491
- rawControlPlane.tokenRefreshSkewMs,
492
- DEFAULT_P2P_CONTROL_PLANE_TOKEN_REFRESH_SKEW_MS,
493
- true
494
- ),
532
+ tokenRefreshSkewMs: rawControlPlane.tokenRefreshSkewMs
533
+ ?? DEFAULT_P2P_CONTROL_PLANE_TOKEN_REFRESH_SKEW_MS,
495
534
  }),
496
535
  security: {
497
536
  requireSessionToken: rawSecurity.requireSessionToken === true,
@@ -499,19 +538,20 @@ function normalizeP2PConfig(config = {}) {
499
538
  tokenExpiresAtMs: normalizeOptionalTimestamp(rawSecurity.tokenExpiresAtMs),
500
539
  },
501
540
  abuse: {
502
- rateLimitPerMinute: normalizeInteger(
541
+ rateLimitPerMinute: normalizeRequiredInteger(
503
542
  rawAbuse.rateLimitPerMinute,
504
- DEFAULT_P2P_RATE_LIMIT_PER_MINUTE,
505
- true
543
+ 'distribution.p2p.abuse.rateLimitPerMinute',
544
+ { allowZero: true, fallback: DEFAULT_P2P_RATE_LIMIT_PER_MINUTE }
506
545
  ),
507
- maxConsecutiveFailures: normalizeInteger(
546
+ maxConsecutiveFailures: normalizeRequiredInteger(
508
547
  rawAbuse.maxConsecutiveFailures,
509
- DEFAULT_P2P_MAX_CONSECUTIVE_FAILURES
548
+ 'distribution.p2p.abuse.maxConsecutiveFailures',
549
+ { fallback: DEFAULT_P2P_MAX_CONSECUTIVE_FAILURES }
510
550
  ),
511
- quarantineMs: normalizeInteger(
551
+ quarantineMs: normalizeRequiredInteger(
512
552
  rawAbuse.quarantineMs,
513
- DEFAULT_P2P_QUARANTINE_MS,
514
- true
553
+ 'distribution.p2p.abuse.quarantineMs',
554
+ { allowZero: true, fallback: DEFAULT_P2P_QUARANTINE_MS }
515
555
  ),
516
556
  },
517
557
  };
@@ -1293,9 +1333,21 @@ async function downloadShardFromHttp(baseUrl, shardInfo, shardIndex, options = {
1293
1333
  const startTime = performance.now();
1294
1334
  const url = buildShardUrl(baseUrl, shardInfo);
1295
1335
  let lastError;
1296
- const maxRetries = normalizeInteger(options.maxRetries, 3, true);
1297
- const initialRetryDelayMs = normalizeInteger(options.initialRetryDelayMs, 1000);
1298
- const maxRetryDelayMs = normalizeInteger(options.maxRetryDelayMs, 30000);
1336
+ const maxRetries = normalizeRequiredInteger(
1337
+ options.maxRetries,
1338
+ 'download.maxRetries',
1339
+ { allowZero: true, fallback: 3 }
1340
+ );
1341
+ const initialRetryDelayMs = normalizeRequiredInteger(
1342
+ options.initialRetryDelayMs,
1343
+ 'download.initialRetryDelayMs',
1344
+ { allowZero: true, fallback: 1000 }
1345
+ );
1346
+ const maxRetryDelayMs = normalizeRequiredInteger(
1347
+ options.maxRetryDelayMs,
1348
+ 'download.maxRetryDelayMs',
1349
+ { allowZero: true, fallback: 30000 }
1350
+ );
1299
1351
  const progressTotalBytes = Number.isFinite(options.expectedSize)
1300
1352
  ? Math.floor(options.expectedSize)
1301
1353
  : (Number.isFinite(shardInfo?.size) ? Math.floor(shardInfo.size) : 0);
@@ -1967,7 +2019,11 @@ export async function downloadShard(
1967
2019
  onDeliveryMetrics,
1968
2020
  signal,
1969
2021
  requiredEncoding: requiredEncoding ?? activeConfig.requiredContentEncoding ?? null,
1970
- expectedHash: options.expectedHash ?? shardInfo?.hash ?? activeConfig.expectedHash ?? null,
2022
+ expectedHash:
2023
+ options.expectedHash
2024
+ ?? getExpectedShardHash(shardInfo, algorithm)
2025
+ ?? activeConfig.expectedHash
2026
+ ?? null,
1971
2027
  expectedSize: expectedSize ?? shardInfo?.size ?? null,
1972
2028
  expectedManifestVersionSet: options.expectedManifestVersionSet ?? null,
1973
2029
  writeToStore,
@@ -94,6 +94,8 @@ export const GGML_TYPE_SIZE = {
94
94
  const GGUF_MAGIC = 0x46554747;
95
95
  const GGUF_VERSION_MIN = 2;
96
96
  const GGUF_VERSION_MAX = 3;
97
+ const MAX_SAFE_BIGINT = BigInt(Number.MAX_SAFE_INTEGER);
98
+ const MIN_SAFE_BIGINT = BigInt(Number.MIN_SAFE_INTEGER);
97
99
 
98
100
  const {
99
101
  contextLength: DEFAULT_GGUF_CONTEXT_LENGTH,
@@ -102,6 +104,13 @@ const {
102
104
  ropeFreqBase: DEFAULT_ROPE_FREQ_BASE,
103
105
  } = DEFAULT_GGUF_PARSER_DEFAULTS;
104
106
 
107
+ function toSafeInteger(value, label) {
108
+ if (value > MAX_SAFE_BIGINT || value < MIN_SAFE_BIGINT) {
109
+ throw new Error(`GGUF ${label} exceeds JavaScript safe integer range: ${value.toString()}`);
110
+ }
111
+ return Number(value);
112
+ }
113
+
105
114
  class GGUFReader {
106
115
  constructor(buffer) {
107
116
  this.view = new DataView(buffer);
@@ -144,18 +153,26 @@ class GGUFReader {
144
153
  return value;
145
154
  }
146
155
 
147
- readUint64() {
148
- const low = this.view.getUint32(this.offset, true);
149
- const high = this.view.getUint32(this.offset + 4, true);
156
+ readUint64BigInt() {
157
+ const low = BigInt(this.view.getUint32(this.offset, true));
158
+ const high = BigInt(this.view.getUint32(this.offset + 4, true));
150
159
  this.offset += 8;
151
- return high * 0x100000000 + low;
160
+ return (high << 32n) | low;
152
161
  }
153
162
 
154
- readInt64() {
155
- const low = this.view.getUint32(this.offset, true);
156
- const high = this.view.getInt32(this.offset + 4, true);
163
+ readUint64(label = 'u64 value') {
164
+ return toSafeInteger(this.readUint64BigInt(), label);
165
+ }
166
+
167
+ readInt64BigInt() {
168
+ const low = BigInt(this.view.getUint32(this.offset, true));
169
+ const high = BigInt(this.view.getInt32(this.offset + 4, true));
157
170
  this.offset += 8;
158
- return high * 0x100000000 + low;
171
+ return (high << 32n) | low;
172
+ }
173
+
174
+ readInt64(label = 'i64 value') {
175
+ return toSafeInteger(this.readInt64BigInt(), label);
159
176
  }
160
177
 
161
178
  readFloat32() {
@@ -175,7 +192,7 @@ class GGUFReader {
175
192
  }
176
193
 
177
194
  readString() {
178
- const length = this.readUint64();
195
+ const length = this.readUint64('string length');
179
196
  const bytes = new Uint8Array(this.view.buffer, this.offset, length);
180
197
  this.offset += length;
181
198
  return new TextDecoder().decode(bytes);
@@ -196,9 +213,9 @@ class GGUFReader {
196
213
  case GGUFValueType.INT32:
197
214
  return this.readInt32();
198
215
  case GGUFValueType.UINT64:
199
- return this.readUint64();
216
+ return this.readUint64('metadata uint64');
200
217
  case GGUFValueType.INT64:
201
- return this.readInt64();
218
+ return this.readInt64('metadata int64');
202
219
  case GGUFValueType.FLOAT32:
203
220
  return this.readFloat32();
204
221
  case GGUFValueType.FLOAT64:
@@ -216,7 +233,7 @@ class GGUFReader {
216
233
 
217
234
  readArray() {
218
235
  const elementType = this.readUint32();
219
- const length = this.readUint64();
236
+ const length = this.readUint64('array length');
220
237
  if (length > 10000000) {
221
238
  throw new Error(`Array too long: ${length}`);
222
239
  }
@@ -331,8 +348,8 @@ export function parseGGUF(buffer) {
331
348
  throw new Error(`Unsupported GGUF version: ${version}`);
332
349
  }
333
350
 
334
- const tensorCount = reader.readUint64();
335
- const metadataKVCount = reader.readUint64();
351
+ const tensorCount = reader.readUint64('tensor count');
352
+ const metadataKVCount = reader.readUint64('metadata count');
336
353
 
337
354
  const metadata = {};
338
355
  for (let i = 0; i < metadataKVCount; i++) {
@@ -351,10 +368,10 @@ export function parseGGUF(buffer) {
351
368
  const nDims = reader.readUint32();
352
369
  const shape = [];
353
370
  for (let d = 0; d < nDims; d++) {
354
- shape.push(reader.readUint64());
371
+ shape.push(reader.readUint64(`tensor "${name}" shape[${d}]`));
355
372
  }
356
373
  const type = reader.readUint32();
357
- const offset = reader.readUint64();
374
+ const offset = reader.readUint64(`tensor "${name}" offset`);
358
375
 
359
376
  tensors.push({
360
377
  name,
@@ -6,7 +6,7 @@
6
6
  * @module formats/rdrr/groups
7
7
  */
8
8
 
9
- import type { ComponentGroup } from './types.js';
9
+ import type { ComponentGroup, RDRRManifest } from './types.js';
10
10
 
11
11
  export declare function getGroup(groupId: string): ComponentGroup | null;
12
12
 
@@ -16,11 +16,19 @@ export declare function getShardsForGroup(groupId: string): number[];
16
16
 
17
17
  export declare function getTensorsForGroup(groupId: string): string[];
18
18
 
19
- export declare function getShardsForExpert(layerIdx: number, expertIdx: number): number[];
19
+ export declare function getShardsForExpert(
20
+ layerIdx: number,
21
+ expertIdx: number,
22
+ manifest?: RDRRManifest | null
23
+ ): number[];
20
24
 
21
- export declare function getTensorsForExpert(layerIdx: number, expertIdx: number): string[];
25
+ export declare function getTensorsForExpert(
26
+ layerIdx: number,
27
+ expertIdx: number,
28
+ manifest?: RDRRManifest | null
29
+ ): string[];
22
30
 
23
- export declare function getExpertBytes(): number;
31
+ export declare function getExpertBytes(manifest?: RDRRManifest | null): number;
24
32
 
25
33
  export declare function getLayerGroupIds(): string[];
26
34
 
@@ -19,8 +19,7 @@ export function getTensorsForGroup(groupId) {
19
19
  return getManifest()?.groups?.[groupId]?.tensors ?? [];
20
20
  }
21
21
 
22
- export function getShardsForExpert(layerIdx, expertIdx) {
23
- const manifest = getManifest();
22
+ export function getShardsForExpert(layerIdx, expertIdx, manifest = getManifest()) {
24
23
  const groupId = `layer.${layerIdx}.expert.${expertIdx}`;
25
24
  const group = manifest?.groups?.[groupId];
26
25
  if (group) {
@@ -29,8 +28,7 @@ export function getShardsForExpert(layerIdx, expertIdx) {
29
28
  throw new Error(`Missing expert group mapping: ${groupId}`);
30
29
  }
31
30
 
32
- export function getTensorsForExpert(layerIdx, expertIdx) {
33
- const manifest = getManifest();
31
+ export function getTensorsForExpert(layerIdx, expertIdx, manifest = getManifest()) {
34
32
  const groupId = `layer.${layerIdx}.expert.${expertIdx}`;
35
33
  const group = manifest?.groups?.[groupId];
36
34
  if (group) {
@@ -39,8 +37,7 @@ export function getTensorsForExpert(layerIdx, expertIdx) {
39
37
  throw new Error(`Missing expert group mapping: ${groupId}`);
40
38
  }
41
39
 
42
- export function getExpertBytes() {
43
- const manifest = getManifest();
40
+ export function getExpertBytes(manifest = getManifest()) {
44
41
  const expertGroups = Object.entries(manifest?.groups || {})
45
42
  .filter(([id]) => id.includes('.expert.'));
46
43
 
@@ -7,6 +7,10 @@
7
7
  import type { RDRRManifest, ShardInfo, TensorMap } from './types.js';
8
8
 
9
9
  export declare function parseManifest(jsonString: string): RDRRManifest;
10
+ export declare function getExpectedShardHash(
11
+ shard: Partial<ShardInfo> | Record<string, unknown> | null | undefined,
12
+ manifestHashAlgorithm?: string | null
13
+ ): string;
10
14
 
11
15
  export declare function parseTensorMap(jsonString: string): TensorMap;
12
16
 
@@ -4,6 +4,19 @@ import { validateManifest } from './validation.js';
4
4
 
5
5
  let currentManifest = null;
6
6
 
7
+ export function getExpectedShardHash(shard, manifestHashAlgorithm = null) {
8
+ if (!shard || typeof shard !== 'object' || Array.isArray(shard)) {
9
+ return '';
10
+ }
11
+ const algorithm = typeof manifestHashAlgorithm === 'string'
12
+ ? manifestHashAlgorithm.trim().toLowerCase()
13
+ : '';
14
+ if (algorithm === 'blake3') {
15
+ return shard.blake3 || shard.hash || '';
16
+ }
17
+ return shard.hash || shard.blake3 || '';
18
+ }
19
+
7
20
  export function parseManifest(jsonString) {
8
21
  let manifest;
9
22
 
@@ -21,7 +34,7 @@ export function parseManifest(jsonString) {
21
34
  index: shard.index ?? i,
22
35
  filename: shard.filename || shard.fileName || '',
23
36
  size: shard.size,
24
- hash: shard.hash || shard.blake3 || '',
37
+ hash: getExpectedShardHash(shard, manifest.hashAlgorithm),
25
38
  blake3: shard.blake3 || shard.hash,
26
39
  offset: shard.offset ?? offset,
27
40
  hashAlgorithm: shard.hashAlgorithm,
@@ -44,9 +57,13 @@ export function parseManifest(jsonString) {
44
57
  export function parseTensorMap(jsonString) {
45
58
  try {
46
59
  const tensorMap = JSON.parse(jsonString);
60
+ const normalizedTensorMap = {};
47
61
 
48
62
  for (const [name, loc] of Object.entries(tensorMap)) {
49
- if (typeof loc.shard !== 'number') {
63
+ const shardIndex = typeof loc.shardIndex === 'number'
64
+ ? loc.shardIndex
65
+ : loc.shard;
66
+ if (typeof shardIndex !== 'number') {
50
67
  throw new Error(`Tensor '${name}' missing shard index`);
51
68
  }
52
69
  if (typeof loc.offset !== 'number') {
@@ -61,9 +78,42 @@ export function parseTensorMap(jsonString) {
61
78
  if (typeof loc.role !== 'string') {
62
79
  throw new Error(`Tensor '${name}' missing role`);
63
80
  }
81
+
82
+ let spans = undefined;
83
+ if (loc.spans !== undefined) {
84
+ if (!Array.isArray(loc.spans)) {
85
+ throw new Error(`Tensor '${name}' has invalid spans array`);
86
+ }
87
+ spans = loc.spans.map((span, spanIndex) => {
88
+ const spanShardIndex = typeof span?.shardIndex === 'number'
89
+ ? span.shardIndex
90
+ : span?.shard;
91
+ if (typeof spanShardIndex !== 'number') {
92
+ throw new Error(`Tensor '${name}' span[${spanIndex}] missing shard index`);
93
+ }
94
+ if (typeof span?.offset !== 'number') {
95
+ throw new Error(`Tensor '${name}' span[${spanIndex}] missing offset`);
96
+ }
97
+ if (typeof span?.size !== 'number') {
98
+ throw new Error(`Tensor '${name}' span[${spanIndex}] missing size`);
99
+ }
100
+ return {
101
+ shardIndex: spanShardIndex,
102
+ offset: span.offset,
103
+ size: span.size,
104
+ };
105
+ });
106
+ }
107
+
108
+ normalizedTensorMap[name] = {
109
+ ...loc,
110
+ shard: shardIndex,
111
+ shardIndex,
112
+ spans,
113
+ };
64
114
  }
65
115
 
66
- return tensorMap;
116
+ return normalizedTensorMap;
67
117
  } catch (e) {
68
118
  if (e instanceof Error && e.message.includes('Tensor')) {
69
119
  throw e;
@@ -75,13 +75,14 @@ export interface ComponentGroup extends ComponentGroupSchema {}
75
75
 
76
76
  export interface TensorLocation {
77
77
  shard: number;
78
+ shardIndex?: number;
78
79
  offset: number;
79
80
  size: number;
80
81
  shape: number[];
81
82
  dtype: string;
82
83
  role: TensorRole;
83
84
  group?: string;
84
- spans?: Array<{ shardIndex: number; offset: number; size: number }>;
85
+ spans?: Array<{ shard?: number; shardIndex?: number; offset: number; size: number }>;
85
86
  layout?: WeightLayout;
86
87
  originalShape?: number[];
87
88
  }