@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +25 -17
  3. package/package.json +20 -4
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +39 -39
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +49 -7
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +43 -4
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +28 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/models/qwen3.json +9 -2
  45. package/src/config/presets/models/transformer.json +5 -0
  46. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  47. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  48. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  49. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  50. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  52. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  54. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  55. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  56. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  57. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  58. package/src/config/required-inference-fields-contract-check.js +6 -0
  59. package/src/config/runtime.js +6 -1
  60. package/src/config/schema/debug.schema.d.ts +5 -0
  61. package/src/config/schema/doppler.schema.js +16 -21
  62. package/src/config/schema/inference-defaults.schema.js +6 -3
  63. package/src/config/schema/inference.schema.d.ts +9 -0
  64. package/src/config/schema/kernel-path.schema.d.ts +11 -1
  65. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  66. package/src/config/schema/manifest.schema.d.ts +8 -1
  67. package/src/config/schema/manifest.schema.js +19 -3
  68. package/src/config/training-defaults.js +30 -22
  69. package/src/converter/conversion-plan.js +94 -9
  70. package/src/converter/core.d.ts +7 -0
  71. package/src/converter/core.js +14 -9
  72. package/src/converter/execution-v0-manifest.js +4 -1
  73. package/src/converter/index.d.ts +1 -0
  74. package/src/converter/index.js +1 -0
  75. package/src/converter/manifest-inference.js +43 -12
  76. package/src/converter/parsers/diffusion.js +0 -3
  77. package/src/converter/quantization-info.js +35 -15
  78. package/src/converter/rope-config.js +42 -0
  79. package/src/converter/shard-packer.d.ts +1 -1
  80. package/src/converter/shard-packer.js +4 -1
  81. package/src/debug/config.js +123 -11
  82. package/src/debug/signals.js +7 -1
  83. package/src/debug/tensor.d.ts +2 -0
  84. package/src/debug/tensor.js +13 -2
  85. package/src/distribution/p2p-control-plane.js +52 -12
  86. package/src/distribution/p2p-observability.js +43 -7
  87. package/src/distribution/p2p-webrtc-browser.js +20 -0
  88. package/src/distribution/shard-delivery.js +77 -26
  89. package/src/formats/gguf/types.js +33 -16
  90. package/src/formats/rdrr/groups.d.ts +12 -4
  91. package/src/formats/rdrr/groups.js +3 -6
  92. package/src/formats/rdrr/parsing.js +39 -2
  93. package/src/formats/rdrr/types.d.ts +2 -1
  94. package/src/gpu/command-recorder.js +86 -61
  95. package/src/gpu/device.d.ts +1 -0
  96. package/src/gpu/device.js +131 -19
  97. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  98. package/src/gpu/kernel-tuner/cache.js +71 -4
  99. package/src/gpu/kernel-tuner/tuner.js +22 -4
  100. package/src/gpu/kernels/attention.js +113 -34
  101. package/src/gpu/kernels/backward/adam.js +62 -58
  102. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  103. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  104. package/src/gpu/kernels/bias_add.wgsl +8 -6
  105. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  106. package/src/gpu/kernels/cast.js +191 -149
  107. package/src/gpu/kernels/check-stop.js +33 -44
  108. package/src/gpu/kernels/conv2d.js +27 -17
  109. package/src/gpu/kernels/conv2d.wgsl +7 -8
  110. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  111. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  112. package/src/gpu/kernels/depthwise_conv2d.js +37 -26
  113. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  114. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  115. package/src/gpu/kernels/dequant.js +178 -126
  116. package/src/gpu/kernels/energy.d.ts +3 -21
  117. package/src/gpu/kernels/energy.js +111 -88
  118. package/src/gpu/kernels/feature-check.js +1 -1
  119. package/src/gpu/kernels/fused_ffn.js +84 -65
  120. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  121. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  122. package/src/gpu/kernels/gather.js +33 -15
  123. package/src/gpu/kernels/gelu.js +19 -11
  124. package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
  125. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  126. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  127. package/src/gpu/kernels/groupnorm.js +34 -23
  128. package/src/gpu/kernels/kv-quantize.js +5 -2
  129. package/src/gpu/kernels/layernorm.js +35 -19
  130. package/src/gpu/kernels/logit-merge.js +5 -3
  131. package/src/gpu/kernels/matmul.js +83 -39
  132. package/src/gpu/kernels/modulate.js +23 -15
  133. package/src/gpu/kernels/moe.js +221 -175
  134. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  135. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  136. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  137. package/src/gpu/kernels/relu.js +31 -10
  138. package/src/gpu/kernels/relu.wgsl +2 -1
  139. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  140. package/src/gpu/kernels/repeat_channels.js +25 -17
  141. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  142. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  143. package/src/gpu/kernels/residual.js +69 -23
  144. package/src/gpu/kernels/residual.wgsl +6 -3
  145. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  146. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  147. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  148. package/src/gpu/kernels/rmsnorm.js +96 -28
  149. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  150. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  151. package/src/gpu/kernels/rope.d.ts +2 -0
  152. package/src/gpu/kernels/rope.js +14 -1
  153. package/src/gpu/kernels/rope.wgsl +56 -40
  154. package/src/gpu/kernels/sample.js +27 -38
  155. package/src/gpu/kernels/sana_linear_attention.js +19 -12
  156. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  157. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  158. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  159. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  160. package/src/gpu/kernels/scale.js +18 -11
  161. package/src/gpu/kernels/shader-cache.js +4 -2
  162. package/src/gpu/kernels/silu.d.ts +1 -0
  163. package/src/gpu/kernels/silu.js +148 -82
  164. package/src/gpu/kernels/silu.wgsl +19 -9
  165. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  166. package/src/gpu/kernels/softmax.js +44 -25
  167. package/src/gpu/kernels/split_qkv.js +23 -13
  168. package/src/gpu/kernels/transpose.js +31 -10
  169. package/src/gpu/kernels/transpose.wgsl +6 -5
  170. package/src/gpu/kernels/upsample2d.js +22 -13
  171. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  172. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  173. package/src/gpu/kernels/utils.js +35 -13
  174. package/src/gpu/partitioned-buffer-pool.js +10 -2
  175. package/src/gpu/perf-guards.js +2 -9
  176. package/src/gpu/profiler.js +27 -22
  177. package/src/gpu/readback-utils.d.ts +16 -0
  178. package/src/gpu/readback-utils.js +41 -0
  179. package/src/gpu/submit-tracker.js +13 -0
  180. package/src/gpu/uniform-cache.d.ts +1 -0
  181. package/src/gpu/uniform-cache.js +30 -9
  182. package/src/hotswap/intent-bundle.js +6 -0
  183. package/src/hotswap/manifest.d.ts +10 -1
  184. package/src/hotswap/manifest.js +12 -2
  185. package/src/hotswap/runtime.js +30 -8
  186. package/src/index-browser.d.ts +44 -0
  187. package/src/index-browser.js +14 -0
  188. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  189. package/src/inference/browser-harness-contract-helpers.js +28 -0
  190. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  191. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  192. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  193. package/src/inference/browser-harness-model-helpers.js +217 -0
  194. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  195. package/src/inference/browser-harness-report-helpers.js +42 -0
  196. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  197. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  198. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  199. package/src/inference/browser-harness-suite-helpers.js +268 -0
  200. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  201. package/src/inference/browser-harness-text-helpers.js +788 -0
  202. package/src/inference/browser-harness.d.ts +6 -0
  203. package/src/inference/browser-harness.js +130 -1950
  204. package/src/inference/kv-cache/base.js +140 -94
  205. package/src/inference/kv-cache/tiered.js +5 -3
  206. package/src/inference/moe-router.js +88 -56
  207. package/src/inference/multi-model-network.js +5 -3
  208. package/src/inference/network-evolution.d.ts +11 -2
  209. package/src/inference/network-evolution.js +20 -21
  210. package/src/inference/pipelines/context.d.ts +3 -0
  211. package/src/inference/pipelines/context.js +142 -2
  212. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  213. package/src/inference/pipelines/diffusion/pipeline.js +17 -7
  214. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  215. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  216. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  217. package/src/inference/pipelines/diffusion/vae.js +3 -7
  218. package/src/inference/pipelines/energy/pipeline.js +27 -21
  219. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  220. package/src/inference/pipelines/energy/quintel.js +11 -0
  221. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  222. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  223. package/src/inference/pipelines/text/attention/projections.js +151 -101
  224. package/src/inference/pipelines/text/attention/record.js +73 -10
  225. package/src/inference/pipelines/text/attention/run.js +73 -10
  226. package/src/inference/pipelines/text/chat-format.js +25 -1
  227. package/src/inference/pipelines/text/config.d.ts +4 -0
  228. package/src/inference/pipelines/text/config.js +71 -5
  229. package/src/inference/pipelines/text/embed.js +2 -8
  230. package/src/inference/pipelines/text/execution-plan.js +64 -50
  231. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  232. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  233. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  234. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  235. package/src/inference/pipelines/text/execution-v0.js +78 -1002
  236. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  237. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  238. package/src/inference/pipelines/text/generator-steps.js +298 -207
  239. package/src/inference/pipelines/text/generator.js +6 -23
  240. package/src/inference/pipelines/text/init.d.ts +4 -0
  241. package/src/inference/pipelines/text/init.js +134 -29
  242. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  243. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  244. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  245. package/src/inference/pipelines/text/layer.js +14 -9
  246. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  247. package/src/inference/pipelines/text/linear-attention.js +80 -6
  248. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  249. package/src/inference/pipelines/text/logits/index.js +10 -11
  250. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  251. package/src/inference/pipelines/text/logits/utils.js +9 -0
  252. package/src/inference/pipelines/text/lora-apply.js +50 -32
  253. package/src/inference/pipelines/text/model-load.js +279 -104
  254. package/src/inference/pipelines/text/moe-cache.js +5 -4
  255. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  256. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  257. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  258. package/src/inference/pipelines/text/ops.js +90 -90
  259. package/src/inference/pipelines/text/probes.js +9 -9
  260. package/src/inference/pipelines/text/weights.js +17 -7
  261. package/src/inference/pipelines/text.js +17 -1
  262. package/src/inference/speculative.d.ts +2 -2
  263. package/src/inference/speculative.js +4 -18
  264. package/src/inference/test-harness.d.ts +1 -1
  265. package/src/inference/test-harness.js +15 -5
  266. package/src/inference/tokenizer.d.ts +0 -5
  267. package/src/inference/tokenizer.js +4 -23
  268. package/src/inference/tokenizers/bpe.js +9 -0
  269. package/src/inference/tokenizers/bundled.js +176 -33
  270. package/src/inference/tokenizers/sentencepiece.js +12 -0
  271. package/src/loader/doppler-loader.js +38 -22
  272. package/src/loader/dtype-utils.js +3 -44
  273. package/src/loader/embedding-loader.js +7 -3
  274. package/src/loader/experts/expert-cache.js +13 -6
  275. package/src/loader/experts/expert-loader.js +10 -6
  276. package/src/loader/final-weights-loader.js +8 -4
  277. package/src/loader/layer-loader.js +2 -1
  278. package/src/loader/loader-state.js +2 -2
  279. package/src/loader/memory-monitor.js +8 -0
  280. package/src/loader/multi-model-loader.d.ts +14 -0
  281. package/src/loader/multi-model-loader.js +70 -24
  282. package/src/loader/shard-cache.js +81 -12
  283. package/src/loader/shard-resolver.js +25 -3
  284. package/src/loader/tensors/tensor-loader.js +209 -144
  285. package/src/loader/tensors/tensor-reader.js +76 -19
  286. package/src/loader/weight-downcast.js +1 -1
  287. package/src/memory/buffer-pool.d.ts +9 -1
  288. package/src/memory/buffer-pool.js +109 -44
  289. package/src/memory/unified-detect.js +1 -1
  290. package/src/rules/inference/kernel-path.rules.json +24 -8
  291. package/src/rules/rule-registry.js +25 -1
  292. package/src/rules/tooling/command-runtime.rules.json +18 -0
  293. package/src/storage/backends/opfs-store.js +68 -24
  294. package/src/storage/downloader.js +364 -83
  295. package/src/storage/index.d.ts +3 -0
  296. package/src/storage/index.js +3 -0
  297. package/src/storage/preflight.d.ts +2 -2
  298. package/src/storage/preflight.js +24 -2
  299. package/src/storage/quickstart-downloader.js +11 -5
  300. package/src/storage/registry.js +10 -4
  301. package/src/storage/reports.js +1 -1
  302. package/src/storage/shard-manager.d.ts +15 -1
  303. package/src/storage/shard-manager.js +51 -3
  304. package/src/storage/source-artifact-store.d.ts +52 -0
  305. package/src/storage/source-artifact-store.js +234 -0
  306. package/src/tooling/command-api-constants.d.ts +9 -0
  307. package/src/tooling/command-api-constants.js +9 -0
  308. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  309. package/src/tooling/command-api-family-normalizers.js +343 -0
  310. package/src/tooling/command-api-helpers.d.ts +25 -0
  311. package/src/tooling/command-api-helpers.js +262 -0
  312. package/src/tooling/command-api.d.ts +27 -1
  313. package/src/tooling/command-api.js +26 -473
  314. package/src/tooling/command-envelope.js +4 -1
  315. package/src/tooling/command-runner-shared.js +52 -18
  316. package/src/tooling/lean-execution-contract.js +150 -3
  317. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  318. package/src/tooling/node-browser-command-runner.js +218 -273
  319. package/src/tooling/node-command-runner.js +44 -3
  320. package/src/tooling/node-converter.js +27 -1
  321. package/src/tooling/node-source-runtime.d.ts +1 -1
  322. package/src/tooling/node-source-runtime.js +84 -3
  323. package/src/tooling/node-webgpu.js +30 -105
  324. package/src/tooling/opfs-cache.js +21 -4
  325. package/src/tooling/runtime-input-composition.d.ts +38 -0
  326. package/src/tooling/runtime-input-composition.js +86 -0
  327. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  328. package/src/tooling/source-runtime-bundle.js +261 -34
  329. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  330. package/src/tooling/source-runtime-materializer.js +93 -0
  331. package/src/training/attention-backward.js +32 -17
  332. package/src/training/autograd.js +80 -52
  333. package/src/training/checkpoint-watch.d.ts +8 -0
  334. package/src/training/checkpoint-watch.js +139 -0
  335. package/src/training/checkpoint.d.ts +6 -1
  336. package/src/training/checkpoint.js +46 -7
  337. package/src/training/clip.js +2 -1
  338. package/src/training/datasets/token-batch.js +20 -8
  339. package/src/training/distillation/artifacts.d.ts +71 -0
  340. package/src/training/distillation/artifacts.js +132 -0
  341. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  342. package/src/training/distillation/checkpoint-watch.js +58 -0
  343. package/src/training/distillation/dataset.d.ts +59 -0
  344. package/src/training/distillation/dataset.js +337 -0
  345. package/src/training/distillation/eval.d.ts +34 -0
  346. package/src/training/distillation/eval.js +310 -0
  347. package/src/training/distillation/index.d.ts +29 -0
  348. package/src/training/distillation/index.js +29 -0
  349. package/src/training/distillation/runtime.d.ts +20 -0
  350. package/src/training/distillation/runtime.js +121 -0
  351. package/src/training/distillation/scoreboard.d.ts +6 -0
  352. package/src/training/distillation/scoreboard.js +8 -0
  353. package/src/training/distillation/stage-a.d.ts +45 -0
  354. package/src/training/distillation/stage-a.js +338 -0
  355. package/src/training/distillation/stage-b.d.ts +24 -0
  356. package/src/training/distillation/stage-b.js +20 -0
  357. package/src/training/distillation/student-fixture.d.ts +22 -0
  358. package/src/training/distillation/student-fixture.js +846 -0
  359. package/src/training/distillation/suite-data.d.ts +45 -0
  360. package/src/training/distillation/suite-data.js +189 -0
  361. package/src/training/index.d.ts +10 -0
  362. package/src/training/index.js +10 -0
  363. package/src/training/lora-pipeline.d.ts +40 -0
  364. package/src/training/lora-pipeline.js +793 -0
  365. package/src/training/lora.js +26 -12
  366. package/src/training/loss.js +5 -6
  367. package/src/training/objectives/cross_entropy.js +2 -5
  368. package/src/training/objectives/distill_kd.js +4 -8
  369. package/src/training/objectives/distill_triplet.js +4 -8
  370. package/src/training/objectives/ul_stage2_base.js +4 -8
  371. package/src/training/operator-artifacts.d.ts +62 -0
  372. package/src/training/operator-artifacts.js +140 -0
  373. package/src/training/operator-command.d.ts +5 -0
  374. package/src/training/operator-command.js +455 -0
  375. package/src/training/operator-eval.d.ts +48 -0
  376. package/src/training/operator-eval.js +230 -0
  377. package/src/training/operator-scoreboard.d.ts +5 -0
  378. package/src/training/operator-scoreboard.js +44 -0
  379. package/src/training/optimizer.js +19 -7
  380. package/src/training/runner.d.ts +52 -0
  381. package/src/training/runner.js +31 -5
  382. package/src/training/suite.d.ts +112 -0
  383. package/src/training/suite.js +24 -984
  384. package/src/training/tensor-factory.d.ts +9 -0
  385. package/src/training/tensor-factory.js +13 -0
  386. package/src/training/trainer.js +3 -5
  387. package/src/training/ul_dataset.js +3 -5
  388. package/src/training/workloads.d.ts +164 -0
  389. package/src/training/workloads.js +530 -0
  390. package/src/version.js +1 -1
  391. package/tools/convert-safetensors-node.js +22 -16
  392. package/tools/doppler-cli.js +179 -63
@@ -1,5 +1,5 @@
1
1
 
2
- import { acquireBuffer } from '../../memory/buffer-pool.js';
2
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
3
3
  import { createTensor, dtypeBytes } from '../tensor.js';
4
4
  import { WORKGROUP_SIZES } from './constants.js';
5
5
  import { unifiedKernelWrapper } from './utils.js';
@@ -7,6 +7,9 @@ import { selectRuleValue } from './rule-registry.js';
7
7
 
8
8
  async function _splitQKV(target, qkvTensor, options) {
9
9
  const { numTokens, qSize, kSize, vSize, qTensor = null, kTensor = null, vTensor = null } = options;
10
+ const ownsQ = qTensor == null;
11
+ const ownsK = kTensor == null;
12
+ const ownsV = vTensor == null;
10
13
 
11
14
  const outputDtype = qkvTensor.dtype;
12
15
  const pipelineVariant = selectRuleValue('splitQkv', 'variant', { outputDtype });
@@ -18,18 +21,25 @@ async function _splitQKV(target, qkvTensor, options) {
18
21
 
19
22
  const totalElements = numTokens * (qSize + kSize + vSize);
20
23
 
21
- await unifiedKernelWrapper(
22
- 'split_qkv', target, pipelineVariant,
23
- [qkvTensor, qBuffer, kBuffer, vBuffer],
24
- { num_tokens: numTokens, q_size: qSize, k_size: kSize, v_size: vSize },
25
- Math.ceil(totalElements / WORKGROUP_SIZES.DEFAULT)
26
- );
27
-
28
- const Q = qTensor || createTensor(qBuffer, outputDtype, [numTokens, qSize], 'Q');
29
- const K = kTensor || createTensor(kBuffer, outputDtype, [numTokens, kSize], 'K');
30
- const V = vTensor || createTensor(vBuffer, outputDtype, [numTokens, vSize], 'V');
31
-
32
- return { Q, K, V };
24
+ try {
25
+ await unifiedKernelWrapper(
26
+ 'split_qkv', target, pipelineVariant,
27
+ [qkvTensor, qBuffer, kBuffer, vBuffer],
28
+ { num_tokens: numTokens, q_size: qSize, k_size: kSize, v_size: vSize },
29
+ Math.ceil(totalElements / WORKGROUP_SIZES.DEFAULT)
30
+ );
31
+
32
+ const Q = qTensor || createTensor(qBuffer, outputDtype, [numTokens, qSize], 'Q');
33
+ const K = kTensor || createTensor(kBuffer, outputDtype, [numTokens, kSize], 'K');
34
+ const V = vTensor || createTensor(vBuffer, outputDtype, [numTokens, vSize], 'V');
35
+
36
+ return { Q, K, V };
37
+ } catch (error) {
38
+ if (ownsQ) releaseBuffer(qBuffer);
39
+ if (ownsK) releaseBuffer(kBuffer);
40
+ if (ownsV) releaseBuffer(vBuffer);
41
+ throw error;
42
+ }
33
43
  }
34
44
 
35
45
  export async function runSplitQKV(qkvTensor, options) {
@@ -1,24 +1,45 @@
1
- import { acquireBuffer } from '../../memory/buffer-pool.js';
1
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
2
2
  import { createTensor, dtypeBytes } from '../tensor.js';
3
3
  import { WORKGROUP_SIZES } from './constants.js';
4
4
  import { unifiedKernelWrapper } from './utils.js';
5
5
 
6
+ function planTransposeDispatch(target, cols) {
7
+ const device = target?.device;
8
+ const maxPerDim = Number.isFinite(device?.limits?.maxComputeWorkgroupsPerDimension)
9
+ ? device.limits.maxComputeWorkgroupsPerDimension
10
+ : 65535;
11
+ const dispatchStride = Math.min(cols, maxPerDim * WORKGROUP_SIZES.DEFAULT);
12
+ return {
13
+ dispatchStride,
14
+ workgroups: [Math.ceil(dispatchStride / WORKGROUP_SIZES.DEFAULT), 1, 1],
15
+ };
16
+ }
17
+
6
18
  async function _transpose(target, input, rows, cols, options = {}) {
7
19
  const { outputBuffer = null } = options;
8
20
  const bytesPerElement = dtypeBytes(input.dtype);
9
21
  const outputSize = rows * cols * bytesPerElement;
10
22
  const outputBuf = outputBuffer || acquireBuffer(outputSize, undefined, 'transpose_output');
23
+ const ownedOutput = outputBuffer ? null : outputBuf;
24
+ const dispatchPlan = planTransposeDispatch(target, cols);
11
25
 
12
- await unifiedKernelWrapper(
13
- 'transpose',
14
- target,
15
- 'default',
16
- [input, outputBuf],
17
- { rows, cols },
18
- Math.ceil((rows * cols) / WORKGROUP_SIZES.DEFAULT)
19
- );
26
+ try {
27
+ await unifiedKernelWrapper(
28
+ 'transpose',
29
+ target,
30
+ 'default',
31
+ [input, outputBuf],
32
+ { rows, cols, _pad0: dispatchPlan.dispatchStride, _pad1: 0 },
33
+ [dispatchPlan.workgroups[0], rows, 1]
34
+ );
20
35
 
21
- return createTensor(outputBuf, input.dtype, [cols, rows], 'transpose_output');
36
+ return createTensor(outputBuf, input.dtype, [cols, rows], 'transpose_output');
37
+ } catch (error) {
38
+ if (ownedOutput) {
39
+ releaseBuffer(ownedOutput);
40
+ }
41
+ throw error;
42
+ }
22
43
  }
23
44
 
24
45
  export async function runTranspose(input, rows, cols, options = {}) {
@@ -19,14 +19,15 @@ struct Uniforms {
19
19
 
20
20
  @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
21
21
  fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
22
- let idx = gid.x;
22
+ let dispatch_stride = max(u._pad0, 1u);
23
+ let linear_idx = gid.y * dispatch_stride + gid.x;
23
24
  let total = u.rows * u.cols;
24
- if (idx >= total) {
25
+ if (linear_idx >= total) {
25
26
  return;
26
27
  }
27
-
28
- let row = idx / u.cols;
29
- let col = idx % u.cols;
28
+ let row = linear_idx / u.cols;
29
+ let col = linear_idx % u.cols;
30
+ let idx = row * u.cols + col;
30
31
  let out_idx = col * u.rows + row;
31
32
  output[out_idx] = input[idx];
32
33
  }
@@ -1,4 +1,4 @@
1
- import { acquireBuffer } from '../../memory/buffer-pool.js';
1
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
2
2
  import { createTensor, dtypeBytes } from '../tensor.js';
3
3
  import { unifiedKernelWrapper } from './utils.js';
4
4
  import { selectRuleValue } from './rule-registry.js';
@@ -31,22 +31,31 @@ async function _upsample2d(target, input, options = {}) {
31
31
 
32
32
  const outHeight = resolvedHeight * scale;
33
33
  const outWidth = resolvedWidth * scale;
34
+ const outSpatial = outHeight * outWidth;
34
35
  const bytesPerElement = dtypeBytes(input.dtype);
35
36
  const outputSize = channels * outHeight * outWidth * bytesPerElement;
36
37
  const output = outputBuffer || acquireBuffer(outputSize, undefined, 'upsample2d_output');
38
+ const ownedOutput = outputBuffer ? null : output;
37
39
 
38
- await unifiedKernelWrapper(
39
- 'upsample2d', target, selectUpsample2DVariant(input.dtype === 'f16'),
40
- [input, output],
41
- {
42
- channels, in_height: resolvedHeight, in_width: resolvedWidth,
43
- out_height: outHeight, out_width: outWidth, scale,
44
- _pad0: 0, _pad1: 0,
45
- },
46
- Math.ceil((channels * outHeight * outWidth) / WORKGROUP_SIZES.DEFAULT)
47
- );
48
-
49
- return createTensor(output, input.dtype, [channels, outHeight, outWidth], 'upsample2d_output');
40
+ try {
41
+ await unifiedKernelWrapper(
42
+ 'upsample2d', target, selectUpsample2DVariant(input.dtype === 'f16'),
43
+ [input, output],
44
+ {
45
+ channels, in_height: resolvedHeight, in_width: resolvedWidth,
46
+ out_height: outHeight, out_width: outWidth, scale,
47
+ _pad0: 0, _pad1: 0,
48
+ },
49
+ [Math.ceil(outSpatial / WORKGROUP_SIZES.DEFAULT), channels, 1]
50
+ );
51
+
52
+ return createTensor(output, input.dtype, [channels, outHeight, outWidth], 'upsample2d_output');
53
+ } catch (error) {
54
+ if (ownedOutput) {
55
+ releaseBuffer(ownedOutput);
56
+ }
57
+ throw error;
58
+ }
50
59
  }
51
60
 
52
61
  export async function runUpsample2D(input, options = {}) {
@@ -19,19 +19,16 @@ struct Uniforms {
19
19
 
20
20
  @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
21
21
  fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
22
- let idx = gid.x;
23
22
  let out_spatial = u.out_height * u.out_width;
24
- let total = u.channels * out_spatial;
25
- if (idx >= total) {
23
+ let spatial_idx = gid.x;
24
+ let channel = gid.y;
25
+ if (spatial_idx >= out_spatial || channel >= u.channels) {
26
26
  return;
27
27
  }
28
-
29
- let channel = idx / out_spatial;
30
- let rem = idx - channel * out_spatial;
31
- let out_y = rem / u.out_width;
32
- let out_x = rem - out_y * u.out_width;
28
+ let out_y = spatial_idx / u.out_width;
29
+ let out_x = spatial_idx - out_y * u.out_width;
33
30
  let in_y = out_y / u.scale;
34
31
  let in_x = out_x / u.scale;
35
32
  let in_idx = (channel * u.in_height + in_y) * u.in_width + in_x;
36
- output[idx] = input[in_idx];
33
+ output[channel * out_spatial + spatial_idx] = input[in_idx];
37
34
  }
@@ -23,19 +23,16 @@ struct Uniforms {
23
23
 
24
24
  @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
25
25
  fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
26
- let idx = gid.x;
27
26
  let out_spatial = u.out_height * u.out_width;
28
- let total = u.channels * out_spatial;
29
- if (idx >= total) {
27
+ let spatial_idx = gid.x;
28
+ let channel = gid.y;
29
+ if (spatial_idx >= out_spatial || channel >= u.channels) {
30
30
  return;
31
31
  }
32
-
33
- let channel = idx / out_spatial;
34
- let rem = idx - channel * out_spatial;
35
- let out_y = rem / u.out_width;
36
- let out_x = rem - out_y * u.out_width;
32
+ let out_y = spatial_idx / u.out_width;
33
+ let out_x = spatial_idx - out_y * u.out_width;
37
34
  let in_y = out_y / u.scale;
38
35
  let in_x = out_x / u.scale;
39
36
  let in_idx = (channel * u.in_height + in_y) * u.in_width + in_x;
40
- output[idx] = input[in_idx];
37
+ output[channel * out_spatial + spatial_idx] = input[in_idx];
41
38
  }
@@ -116,27 +116,49 @@ export async function unifiedKernelWrapper(opName, target, variant, bindings, un
116
116
  index = config.variantMetadata.outputBinding;
117
117
  }
118
118
 
119
+ const buffer = binding?.buffer || binding;
120
+ const isGpuBuffer = buffer && (
121
+ typeof GPUBuffer === 'undefined'
122
+ ? true
123
+ : buffer instanceof GPUBuffer
124
+ );
125
+ if (!isGpuBuffer) {
126
+ const bindingLabel = binding?.label ?? buffer?.label ?? 'unknown';
127
+ const bufferType = buffer === null ? 'null' : buffer === undefined ? 'undefined' : buffer.constructor?.name || typeof buffer;
128
+ throw new Error(
129
+ `Kernel "${opName}/${variant}" binding "${bindingConfig.name}" (index ${index}) requires a GPUBuffer ` +
130
+ `(label=${bindingLabel}, type=${bufferType}).`
131
+ );
132
+ }
133
+
119
134
  bindGroupEntries.push({
120
135
  binding: index,
121
- resource: { buffer: binding?.buffer || binding }
136
+ resource: { buffer }
122
137
  });
123
138
  }
124
139
 
125
- const bindGroup = device.createBindGroup({
126
- label: `${opName}_bind_group`,
127
- layout: pipeline.getBindGroupLayout(0),
128
- entries: bindGroupEntries,
129
- });
140
+ try {
141
+ const bindGroup = device.createBindGroup({
142
+ label: `${opName}_bind_group`,
143
+ layout: pipeline.getBindGroupLayout(0),
144
+ entries: bindGroupEntries,
145
+ });
130
146
 
131
- if (workgroups && typeof workgroups === 'object' && workgroups.indirectBuffer) {
132
- const indirectOffset = workgroups.indirectOffset ?? 0;
133
- if (recorder) {
134
- recordDispatchIndirect(recorder, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
147
+ if (workgroups && typeof workgroups === 'object' && workgroups.indirectBuffer) {
148
+ const indirectOffset = workgroups.indirectOffset ?? 0;
149
+ if (recorder) {
150
+ recordDispatchIndirect(recorder, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
151
+ } else {
152
+ dispatchIndirect(device, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
153
+ }
135
154
  } else {
136
- dispatchIndirect(device, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
155
+ dispatchKernel(target, pipeline, bindGroup, workgroups, opName);
156
+ }
157
+ } catch (error) {
158
+ if (!recorder) {
159
+ uniformBuffer.destroy();
137
160
  }
138
- } else {
139
- dispatchKernel(target, pipeline, bindGroup, workgroups, opName);
161
+ throw error;
140
162
  }
141
163
 
142
164
  if (!recorder) {
@@ -11,10 +11,13 @@ export class PartitionedBufferPool {
11
11
 
12
12
  #expertPools;
13
13
 
14
+ #bufferOwners;
15
+
14
16
 
15
17
  constructor(partitions, schemaConfig = getRuntimeConfig().shared.bufferPool) {
16
18
  this.#sharedPool = new BufferPool(false, schemaConfig);
17
19
  this.#expertPools = new Map();
20
+ this.#bufferOwners = new WeakMap();
18
21
  for (const partition of partitions) {
19
22
  this.#expertPools.set(partition.id, new BufferPool(false, schemaConfig));
20
23
  }
@@ -28,12 +31,17 @@ export class PartitionedBufferPool {
28
31
  label
29
32
  ) {
30
33
  const pool = this.#expertPools.get(partitionId) || this.#sharedPool;
31
- return pool.acquire(size, usage, label);
34
+ const buffer = pool.acquire(size, usage, label);
35
+ this.#bufferOwners.set(buffer, pool);
36
+ return buffer;
32
37
  }
33
38
 
34
39
 
35
40
  release(partitionId, buffer) {
36
- const pool = this.#expertPools.get(partitionId) || this.#sharedPool;
41
+ const pool = this.#bufferOwners.get(buffer)
42
+ || this.#expertPools.get(partitionId)
43
+ || this.#sharedPool;
44
+ this.#bufferOwners.delete(buffer);
37
45
  pool.release(buffer);
38
46
  }
39
47
 
@@ -1,15 +1,8 @@
1
1
 
2
-
3
2
  import { log, trace } from '../debug/index.js';
3
+ import { DEFAULT_PERF_GUARDS_CONFIG } from '../config/schema/debug.schema.js';
4
4
 
5
- // Initial config uses inline defaults; caller should configure via configurePerfGuards()
6
- let config = {
7
- allowGPUReadback: true,
8
- trackSubmitCount: false,
9
- trackAllocations: false,
10
- logExpensiveOps: false,
11
- strictMode: false,
12
- };
5
+ let config = { ...DEFAULT_PERF_GUARDS_CONFIG };
13
6
 
14
7
 
15
8
  let counters = {
@@ -179,6 +179,8 @@ export class GPUProfiler {
179
179
 
180
180
  if (!this.#device || !this.#querySet || !this.#queryBuffer || !this.#readbackBuffer) {
181
181
  log.warn('GPUProfiler', 'Missing required resources for resolve');
182
+ this.#pendingResolves = [];
183
+ this.#nextQueryIndex = 0;
182
184
  return;
183
185
  }
184
186
 
@@ -199,34 +201,35 @@ export class GPUProfiler {
199
201
 
200
202
  this.#device.queue.submit([encoder.finish()]);
201
203
 
202
- if (!allowReadback('GPUProfiler.resolve')) {
203
- return;
204
- }
204
+ let mapped = false;
205
205
 
206
- // Read back timestamps
207
- await this.#readbackBuffer.mapAsync(GPUMapMode.READ);
208
- const timestamps = new BigUint64Array(this.#readbackBuffer.getMappedRange());
206
+ try {
207
+ if (!allowReadback('GPUProfiler.resolve')) {
208
+ return;
209
+ }
209
210
 
210
- // Process pending resolves
211
- for (const pending of this.#pendingResolves) {
212
- const startNs = timestamps[pending.startIndex];
213
- const endNs = timestamps[pending.endIndex];
211
+ await this.#readbackBuffer.mapAsync(GPUMapMode.READ);
212
+ mapped = true;
213
+ const timestamps = new BigUint64Array(this.#readbackBuffer.getMappedRange());
214
214
 
215
- // Convert nanoseconds to milliseconds
216
- const durationMs = Number(endNs - startNs) / 1_000_000;
215
+ for (const pending of this.#pendingResolves) {
216
+ const startNs = timestamps[pending.startIndex];
217
+ const endNs = timestamps[pending.endIndex];
218
+ const durationMs = Number(endNs - startNs) / 1_000_000;
217
219
 
218
- // Sanity check - use CPU timing if GPU timing seems wrong
219
- if (durationMs < 0 || durationMs > this.#maxDurationMs) {
220
- // Fallback to CPU timing
221
- this.#recordResult(pending.label, pending.cpuEndTime - pending.cpuStartTime);
222
- } else {
223
- this.#recordResult(pending.label, durationMs);
220
+ if (durationMs < 0 || durationMs > this.#maxDurationMs) {
221
+ this.#recordResult(pending.label, pending.cpuEndTime - pending.cpuStartTime);
222
+ } else {
223
+ this.#recordResult(pending.label, durationMs);
224
+ }
225
+ }
226
+ } finally {
227
+ if (mapped) {
228
+ this.#readbackBuffer.unmap();
224
229
  }
230
+ this.#pendingResolves = [];
231
+ this.#nextQueryIndex = 0;
225
232
  }
226
-
227
- this.#readbackBuffer.unmap();
228
- this.#pendingResolves = [];
229
- this.#nextQueryIndex = 0;
230
233
  }
231
234
 
232
235
 
@@ -350,6 +353,8 @@ export class GPUProfiler {
350
353
  }
351
354
  this.#results.clear();
352
355
  this.#activeLabels.clear();
356
+ this.#pendingResolves = [];
357
+ this.#nextQueryIndex = 0;
353
358
  }
354
359
  }
355
360
 
@@ -0,0 +1,16 @@
1
+ export interface ReadbackBufferEntry {
2
+ buffer: GPUBuffer;
3
+ destroy?: boolean;
4
+ offset?: number;
5
+ size?: number;
6
+ }
7
+
8
+ export function withMappedReadBuffer<T>(
9
+ buffer: GPUBuffer,
10
+ read: (range: ArrayBuffer) => T | Promise<T>,
11
+ ): Promise<T>;
12
+
13
+ export function withMappedReadBuffers<T>(
14
+ entries: ReadonlyArray<ReadbackBufferEntry>,
15
+ read: (ranges: Array<ArrayBuffer>) => T | Promise<T>,
16
+ ): Promise<T>;
@@ -0,0 +1,41 @@
1
+ export async function withMappedReadBuffer(buffer, read) {
2
+ let mapped = false;
3
+ try {
4
+ await buffer.mapAsync(GPUMapMode.READ);
5
+ mapped = true;
6
+ return await read(buffer.getMappedRange());
7
+ } finally {
8
+ if (mapped) {
9
+ buffer.unmap();
10
+ }
11
+ }
12
+ }
13
+
14
+ export async function withMappedReadBuffers(entries, read) {
15
+ const mappedEntries = [];
16
+ try {
17
+ await Promise.all(entries.map(async (entry) => {
18
+ await entry.buffer.mapAsync(GPUMapMode.READ);
19
+ mappedEntries.push(entry);
20
+ }));
21
+ const ranges = entries.map((entry) => {
22
+ if (entry.offset != null || entry.size != null) {
23
+ return entry.buffer.getMappedRange(
24
+ entry.offset ?? 0,
25
+ entry.size ?? (entry.buffer.size - (entry.offset ?? 0))
26
+ );
27
+ }
28
+ return entry.buffer.getMappedRange();
29
+ });
30
+ return await read(ranges);
31
+ } finally {
32
+ for (let index = mappedEntries.length - 1; index >= 0; index -= 1) {
33
+ mappedEntries[index].buffer.unmap();
34
+ }
35
+ for (const entry of entries) {
36
+ if (entry.destroy === true) {
37
+ entry.buffer.destroy();
38
+ }
39
+ }
40
+ }
41
+ }
@@ -21,6 +21,8 @@ let submitSources = new Map();
21
21
 
22
22
  let currentPhase = 'other';
23
23
 
24
+ const WRAPPED_QUEUE_SENTINEL = Symbol.for('doppler.submitTrackerWrapped');
25
+
24
26
 
25
27
  const phaseStats = {
26
28
  prefill: { count: 0, times: [], totalMs: 0, maxMs: 0, minMs: Infinity, sources: new Map() },
@@ -194,6 +196,10 @@ function extractSourceFromStack() {
194
196
 
195
197
 
196
198
  export function wrapQueueForTracking(queue) {
199
+ if (!queue || queue[WRAPPED_QUEUE_SENTINEL] === true) {
200
+ return queue;
201
+ }
202
+
197
203
  const originalSubmit = queue.submit.bind(queue);
198
204
 
199
205
  (queue).submit = function( commandBuffers) {
@@ -210,6 +216,13 @@ export function wrapQueueForTracking(queue) {
210
216
  return result;
211
217
  };
212
218
 
219
+ Object.defineProperty(queue, WRAPPED_QUEUE_SENTINEL, {
220
+ value: true,
221
+ configurable: true,
222
+ enumerable: false,
223
+ writable: false,
224
+ });
225
+
213
226
  return queue;
214
227
  }
215
228
 
@@ -8,6 +8,7 @@
8
8
 
9
9
  interface UniformCacheEntry {
10
10
  buffer: GPUBuffer;
11
+ bytes: Uint8Array;
11
12
  lastUsed: number;
12
13
  refCount: number;
13
14
  }
@@ -17,6 +17,18 @@ function hashArrayBuffer(data) {
17
17
  return (hash >>> 0).toString(16).padStart(8, '0');
18
18
  }
19
19
 
20
+ function copyUniformBytes(data) {
21
+ return new Uint8Array(data.slice(0));
22
+ }
23
+
24
+ function equalUniformBytes(a, b) {
25
+ if (a.length !== b.length) return false;
26
+ for (let i = 0; i < a.length; i++) {
27
+ if (a[i] !== b[i]) return false;
28
+ }
29
+ return true;
30
+ }
31
+
20
32
 
21
33
  export class UniformBufferCache {
22
34
 
@@ -50,14 +62,22 @@ export class UniformBufferCache {
50
62
 
51
63
 
52
64
  getOrCreate(data, label) {
53
- const hash = hashArrayBuffer(data);
54
- const existing = this.#cache.get(hash);
55
-
56
- if (existing) {
57
- existing.lastUsed = performance.now();
58
- existing.refCount++;
59
- this.#stats.hits++;
60
- return existing.buffer;
65
+ const baseKey = `${data.byteLength}:${hashArrayBuffer(data)}`;
66
+ const dataBytes = copyUniformBytes(data);
67
+ let key = baseKey;
68
+ let suffix = 0;
69
+ let existing = this.#cache.get(key);
70
+
71
+ while (existing) {
72
+ if (equalUniformBytes(existing.bytes, dataBytes)) {
73
+ existing.lastUsed = performance.now();
74
+ existing.refCount++;
75
+ this.#stats.hits++;
76
+ return existing.buffer;
77
+ }
78
+ suffix += 1;
79
+ key = `${baseKey}#${suffix}`;
80
+ existing = this.#cache.get(key);
61
81
  }
62
82
 
63
83
  // Cache miss - create new buffer
@@ -80,8 +100,9 @@ export class UniformBufferCache {
80
100
  this.#evictLRU();
81
101
  }
82
102
 
83
- this.#cache.set(hash, {
103
+ this.#cache.set(key, {
84
104
  buffer,
105
+ bytes: dataBytes,
85
106
  lastUsed: performance.now(),
86
107
  refCount: 1,
87
108
  });
@@ -42,6 +42,9 @@ export async function verifyIntentBundle(bundle, context) {
42
42
  reasons.push('Missing payload.expectedOutputHash');
43
43
  }
44
44
 
45
+ if (baseModelHash && !context?.manifest) {
46
+ reasons.push('Missing verification context manifest');
47
+ }
45
48
  if (context?.manifest && baseModelHash) {
46
49
  const manifestHash = await computeManifestHash(context.manifest);
47
50
  if (manifestHash !== baseModelHash.replace('sha256:', '')) {
@@ -49,6 +52,9 @@ export async function verifyIntentBundle(bundle, context) {
49
52
  }
50
53
  }
51
54
 
55
+ if (kernelRegistryVersion && context?.kernelRegistryVersion == null) {
56
+ reasons.push('Missing verification context kernelRegistryVersion');
57
+ }
52
58
  if (context?.kernelRegistryVersion && kernelRegistryVersion) {
53
59
  if (context.kernelRegistryVersion !== kernelRegistryVersion) {
54
60
  reasons.push('Kernel registry version mismatch');
@@ -23,11 +23,20 @@ export interface HotSwapVerificationResult {
23
23
  signerId?: string;
24
24
  }
25
25
 
26
+ export interface HotSwapVerificationContext {
27
+ source?: {
28
+ kind?: 'local' | 'remote' | string | null;
29
+ isLocal?: boolean | null;
30
+ url?: string | null;
31
+ } | null;
32
+ }
33
+
26
34
  export declare function fetchHotSwapManifest(url: string): Promise<HotSwapManifest>;
27
35
 
28
36
  export declare function verifyHotSwapManifest(
29
37
  manifest: HotSwapManifest,
30
- policy: HotSwapConfigSchema
38
+ policy: HotSwapConfigSchema,
39
+ context?: HotSwapVerificationContext
31
40
  ): Promise<HotSwapVerificationResult>;
32
41
 
33
42
  export declare function serializeHotSwapManifest(manifest: HotSwapManifest): string;
@@ -14,13 +14,23 @@ export async function fetchHotSwapManifest(url) {
14
14
  return response.json();
15
15
  }
16
16
 
17
- export async function verifyHotSwapManifest(manifest, policy) {
17
+ function isExplicitLocalSource(source) {
18
+ if (!source || typeof source !== 'object') {
19
+ return false;
20
+ }
21
+ if (source.isLocal === true) {
22
+ return true;
23
+ }
24
+ return source.kind === 'local';
25
+ }
26
+
27
+ export async function verifyHotSwapManifest(manifest, policy, context = {}) {
18
28
  if (!policy.enabled) {
19
29
  return { ok: false, reason: 'Hot-swap disabled' };
20
30
  }
21
31
 
22
32
  if (!manifest.signature) {
23
- if (policy.localOnly && policy.allowUnsignedLocal) {
33
+ if (policy.localOnly && policy.allowUnsignedLocal && isExplicitLocalSource(context.source)) {
24
34
  return { ok: true, reason: 'Local-only unsigned manifest accepted' };
25
35
  }
26
36
  return { ok: false, reason: 'Signature required' };