@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +25 -17
  3. package/package.json +20 -4
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +39 -39
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +49 -7
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +43 -4
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +28 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/models/qwen3.json +9 -2
  45. package/src/config/presets/models/transformer.json +5 -0
  46. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  47. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  48. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  49. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  50. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  52. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  54. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  55. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  56. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  57. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  58. package/src/config/required-inference-fields-contract-check.js +6 -0
  59. package/src/config/runtime.js +6 -1
  60. package/src/config/schema/debug.schema.d.ts +5 -0
  61. package/src/config/schema/doppler.schema.js +16 -21
  62. package/src/config/schema/inference-defaults.schema.js +6 -3
  63. package/src/config/schema/inference.schema.d.ts +9 -0
  64. package/src/config/schema/kernel-path.schema.d.ts +11 -1
  65. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  66. package/src/config/schema/manifest.schema.d.ts +8 -1
  67. package/src/config/schema/manifest.schema.js +19 -3
  68. package/src/config/training-defaults.js +30 -22
  69. package/src/converter/conversion-plan.js +94 -9
  70. package/src/converter/core.d.ts +7 -0
  71. package/src/converter/core.js +14 -9
  72. package/src/converter/execution-v0-manifest.js +4 -1
  73. package/src/converter/index.d.ts +1 -0
  74. package/src/converter/index.js +1 -0
  75. package/src/converter/manifest-inference.js +43 -12
  76. package/src/converter/parsers/diffusion.js +0 -3
  77. package/src/converter/quantization-info.js +35 -15
  78. package/src/converter/rope-config.js +42 -0
  79. package/src/converter/shard-packer.d.ts +1 -1
  80. package/src/converter/shard-packer.js +4 -1
  81. package/src/debug/config.js +123 -11
  82. package/src/debug/signals.js +7 -1
  83. package/src/debug/tensor.d.ts +2 -0
  84. package/src/debug/tensor.js +13 -2
  85. package/src/distribution/p2p-control-plane.js +52 -12
  86. package/src/distribution/p2p-observability.js +43 -7
  87. package/src/distribution/p2p-webrtc-browser.js +20 -0
  88. package/src/distribution/shard-delivery.js +77 -26
  89. package/src/formats/gguf/types.js +33 -16
  90. package/src/formats/rdrr/groups.d.ts +12 -4
  91. package/src/formats/rdrr/groups.js +3 -6
  92. package/src/formats/rdrr/parsing.js +39 -2
  93. package/src/formats/rdrr/types.d.ts +2 -1
  94. package/src/gpu/command-recorder.js +86 -61
  95. package/src/gpu/device.d.ts +1 -0
  96. package/src/gpu/device.js +131 -19
  97. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  98. package/src/gpu/kernel-tuner/cache.js +71 -4
  99. package/src/gpu/kernel-tuner/tuner.js +22 -4
  100. package/src/gpu/kernels/attention.js +113 -34
  101. package/src/gpu/kernels/backward/adam.js +62 -58
  102. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  103. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  104. package/src/gpu/kernels/bias_add.wgsl +8 -6
  105. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  106. package/src/gpu/kernels/cast.js +191 -149
  107. package/src/gpu/kernels/check-stop.js +33 -44
  108. package/src/gpu/kernels/conv2d.js +27 -17
  109. package/src/gpu/kernels/conv2d.wgsl +7 -8
  110. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  111. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  112. package/src/gpu/kernels/depthwise_conv2d.js +37 -26
  113. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  114. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  115. package/src/gpu/kernels/dequant.js +178 -126
  116. package/src/gpu/kernels/energy.d.ts +3 -21
  117. package/src/gpu/kernels/energy.js +111 -88
  118. package/src/gpu/kernels/feature-check.js +1 -1
  119. package/src/gpu/kernels/fused_ffn.js +84 -65
  120. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  121. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  122. package/src/gpu/kernels/gather.js +33 -15
  123. package/src/gpu/kernels/gelu.js +19 -11
  124. package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
  125. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  126. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  127. package/src/gpu/kernels/groupnorm.js +34 -23
  128. package/src/gpu/kernels/kv-quantize.js +5 -2
  129. package/src/gpu/kernels/layernorm.js +35 -19
  130. package/src/gpu/kernels/logit-merge.js +5 -3
  131. package/src/gpu/kernels/matmul.js +83 -39
  132. package/src/gpu/kernels/modulate.js +23 -15
  133. package/src/gpu/kernels/moe.js +221 -175
  134. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  135. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  136. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  137. package/src/gpu/kernels/relu.js +31 -10
  138. package/src/gpu/kernels/relu.wgsl +2 -1
  139. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  140. package/src/gpu/kernels/repeat_channels.js +25 -17
  141. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  142. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  143. package/src/gpu/kernels/residual.js +69 -23
  144. package/src/gpu/kernels/residual.wgsl +6 -3
  145. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  146. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  147. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  148. package/src/gpu/kernels/rmsnorm.js +96 -28
  149. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  150. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  151. package/src/gpu/kernels/rope.d.ts +2 -0
  152. package/src/gpu/kernels/rope.js +14 -1
  153. package/src/gpu/kernels/rope.wgsl +56 -40
  154. package/src/gpu/kernels/sample.js +27 -38
  155. package/src/gpu/kernels/sana_linear_attention.js +19 -12
  156. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  157. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  158. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  159. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  160. package/src/gpu/kernels/scale.js +18 -11
  161. package/src/gpu/kernels/shader-cache.js +4 -2
  162. package/src/gpu/kernels/silu.d.ts +1 -0
  163. package/src/gpu/kernels/silu.js +148 -82
  164. package/src/gpu/kernels/silu.wgsl +19 -9
  165. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  166. package/src/gpu/kernels/softmax.js +44 -25
  167. package/src/gpu/kernels/split_qkv.js +23 -13
  168. package/src/gpu/kernels/transpose.js +31 -10
  169. package/src/gpu/kernels/transpose.wgsl +6 -5
  170. package/src/gpu/kernels/upsample2d.js +22 -13
  171. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  172. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  173. package/src/gpu/kernels/utils.js +35 -13
  174. package/src/gpu/partitioned-buffer-pool.js +10 -2
  175. package/src/gpu/perf-guards.js +2 -9
  176. package/src/gpu/profiler.js +27 -22
  177. package/src/gpu/readback-utils.d.ts +16 -0
  178. package/src/gpu/readback-utils.js +41 -0
  179. package/src/gpu/submit-tracker.js +13 -0
  180. package/src/gpu/uniform-cache.d.ts +1 -0
  181. package/src/gpu/uniform-cache.js +30 -9
  182. package/src/hotswap/intent-bundle.js +6 -0
  183. package/src/hotswap/manifest.d.ts +10 -1
  184. package/src/hotswap/manifest.js +12 -2
  185. package/src/hotswap/runtime.js +30 -8
  186. package/src/index-browser.d.ts +44 -0
  187. package/src/index-browser.js +14 -0
  188. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  189. package/src/inference/browser-harness-contract-helpers.js +28 -0
  190. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  191. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  192. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  193. package/src/inference/browser-harness-model-helpers.js +217 -0
  194. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  195. package/src/inference/browser-harness-report-helpers.js +42 -0
  196. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  197. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  198. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  199. package/src/inference/browser-harness-suite-helpers.js +268 -0
  200. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  201. package/src/inference/browser-harness-text-helpers.js +788 -0
  202. package/src/inference/browser-harness.d.ts +6 -0
  203. package/src/inference/browser-harness.js +130 -1950
  204. package/src/inference/kv-cache/base.js +140 -94
  205. package/src/inference/kv-cache/tiered.js +5 -3
  206. package/src/inference/moe-router.js +88 -56
  207. package/src/inference/multi-model-network.js +5 -3
  208. package/src/inference/network-evolution.d.ts +11 -2
  209. package/src/inference/network-evolution.js +20 -21
  210. package/src/inference/pipelines/context.d.ts +3 -0
  211. package/src/inference/pipelines/context.js +142 -2
  212. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  213. package/src/inference/pipelines/diffusion/pipeline.js +17 -7
  214. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  215. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  216. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  217. package/src/inference/pipelines/diffusion/vae.js +3 -7
  218. package/src/inference/pipelines/energy/pipeline.js +27 -21
  219. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  220. package/src/inference/pipelines/energy/quintel.js +11 -0
  221. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  222. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  223. package/src/inference/pipelines/text/attention/projections.js +151 -101
  224. package/src/inference/pipelines/text/attention/record.js +73 -10
  225. package/src/inference/pipelines/text/attention/run.js +73 -10
  226. package/src/inference/pipelines/text/chat-format.js +25 -1
  227. package/src/inference/pipelines/text/config.d.ts +4 -0
  228. package/src/inference/pipelines/text/config.js +71 -5
  229. package/src/inference/pipelines/text/embed.js +2 -8
  230. package/src/inference/pipelines/text/execution-plan.js +64 -50
  231. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  232. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  233. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  234. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  235. package/src/inference/pipelines/text/execution-v0.js +78 -1002
  236. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  237. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  238. package/src/inference/pipelines/text/generator-steps.js +298 -207
  239. package/src/inference/pipelines/text/generator.js +6 -23
  240. package/src/inference/pipelines/text/init.d.ts +4 -0
  241. package/src/inference/pipelines/text/init.js +134 -29
  242. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  243. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  244. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  245. package/src/inference/pipelines/text/layer.js +14 -9
  246. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  247. package/src/inference/pipelines/text/linear-attention.js +80 -6
  248. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  249. package/src/inference/pipelines/text/logits/index.js +10 -11
  250. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  251. package/src/inference/pipelines/text/logits/utils.js +9 -0
  252. package/src/inference/pipelines/text/lora-apply.js +50 -32
  253. package/src/inference/pipelines/text/model-load.js +279 -104
  254. package/src/inference/pipelines/text/moe-cache.js +5 -4
  255. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  256. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  257. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  258. package/src/inference/pipelines/text/ops.js +90 -90
  259. package/src/inference/pipelines/text/probes.js +9 -9
  260. package/src/inference/pipelines/text/weights.js +17 -7
  261. package/src/inference/pipelines/text.js +17 -1
  262. package/src/inference/speculative.d.ts +2 -2
  263. package/src/inference/speculative.js +4 -18
  264. package/src/inference/test-harness.d.ts +1 -1
  265. package/src/inference/test-harness.js +15 -5
  266. package/src/inference/tokenizer.d.ts +0 -5
  267. package/src/inference/tokenizer.js +4 -23
  268. package/src/inference/tokenizers/bpe.js +9 -0
  269. package/src/inference/tokenizers/bundled.js +176 -33
  270. package/src/inference/tokenizers/sentencepiece.js +12 -0
  271. package/src/loader/doppler-loader.js +38 -22
  272. package/src/loader/dtype-utils.js +3 -44
  273. package/src/loader/embedding-loader.js +7 -3
  274. package/src/loader/experts/expert-cache.js +13 -6
  275. package/src/loader/experts/expert-loader.js +10 -6
  276. package/src/loader/final-weights-loader.js +8 -4
  277. package/src/loader/layer-loader.js +2 -1
  278. package/src/loader/loader-state.js +2 -2
  279. package/src/loader/memory-monitor.js +8 -0
  280. package/src/loader/multi-model-loader.d.ts +14 -0
  281. package/src/loader/multi-model-loader.js +70 -24
  282. package/src/loader/shard-cache.js +81 -12
  283. package/src/loader/shard-resolver.js +25 -3
  284. package/src/loader/tensors/tensor-loader.js +209 -144
  285. package/src/loader/tensors/tensor-reader.js +76 -19
  286. package/src/loader/weight-downcast.js +1 -1
  287. package/src/memory/buffer-pool.d.ts +9 -1
  288. package/src/memory/buffer-pool.js +109 -44
  289. package/src/memory/unified-detect.js +1 -1
  290. package/src/rules/inference/kernel-path.rules.json +24 -8
  291. package/src/rules/rule-registry.js +25 -1
  292. package/src/rules/tooling/command-runtime.rules.json +18 -0
  293. package/src/storage/backends/opfs-store.js +68 -24
  294. package/src/storage/downloader.js +364 -83
  295. package/src/storage/index.d.ts +3 -0
  296. package/src/storage/index.js +3 -0
  297. package/src/storage/preflight.d.ts +2 -2
  298. package/src/storage/preflight.js +24 -2
  299. package/src/storage/quickstart-downloader.js +11 -5
  300. package/src/storage/registry.js +10 -4
  301. package/src/storage/reports.js +1 -1
  302. package/src/storage/shard-manager.d.ts +15 -1
  303. package/src/storage/shard-manager.js +51 -3
  304. package/src/storage/source-artifact-store.d.ts +52 -0
  305. package/src/storage/source-artifact-store.js +234 -0
  306. package/src/tooling/command-api-constants.d.ts +9 -0
  307. package/src/tooling/command-api-constants.js +9 -0
  308. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  309. package/src/tooling/command-api-family-normalizers.js +343 -0
  310. package/src/tooling/command-api-helpers.d.ts +25 -0
  311. package/src/tooling/command-api-helpers.js +262 -0
  312. package/src/tooling/command-api.d.ts +27 -1
  313. package/src/tooling/command-api.js +26 -473
  314. package/src/tooling/command-envelope.js +4 -1
  315. package/src/tooling/command-runner-shared.js +52 -18
  316. package/src/tooling/lean-execution-contract.js +150 -3
  317. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  318. package/src/tooling/node-browser-command-runner.js +218 -273
  319. package/src/tooling/node-command-runner.js +44 -3
  320. package/src/tooling/node-converter.js +27 -1
  321. package/src/tooling/node-source-runtime.d.ts +1 -1
  322. package/src/tooling/node-source-runtime.js +84 -3
  323. package/src/tooling/node-webgpu.js +30 -105
  324. package/src/tooling/opfs-cache.js +21 -4
  325. package/src/tooling/runtime-input-composition.d.ts +38 -0
  326. package/src/tooling/runtime-input-composition.js +86 -0
  327. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  328. package/src/tooling/source-runtime-bundle.js +261 -34
  329. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  330. package/src/tooling/source-runtime-materializer.js +93 -0
  331. package/src/training/attention-backward.js +32 -17
  332. package/src/training/autograd.js +80 -52
  333. package/src/training/checkpoint-watch.d.ts +8 -0
  334. package/src/training/checkpoint-watch.js +139 -0
  335. package/src/training/checkpoint.d.ts +6 -1
  336. package/src/training/checkpoint.js +46 -7
  337. package/src/training/clip.js +2 -1
  338. package/src/training/datasets/token-batch.js +20 -8
  339. package/src/training/distillation/artifacts.d.ts +71 -0
  340. package/src/training/distillation/artifacts.js +132 -0
  341. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  342. package/src/training/distillation/checkpoint-watch.js +58 -0
  343. package/src/training/distillation/dataset.d.ts +59 -0
  344. package/src/training/distillation/dataset.js +337 -0
  345. package/src/training/distillation/eval.d.ts +34 -0
  346. package/src/training/distillation/eval.js +310 -0
  347. package/src/training/distillation/index.d.ts +29 -0
  348. package/src/training/distillation/index.js +29 -0
  349. package/src/training/distillation/runtime.d.ts +20 -0
  350. package/src/training/distillation/runtime.js +121 -0
  351. package/src/training/distillation/scoreboard.d.ts +6 -0
  352. package/src/training/distillation/scoreboard.js +8 -0
  353. package/src/training/distillation/stage-a.d.ts +45 -0
  354. package/src/training/distillation/stage-a.js +338 -0
  355. package/src/training/distillation/stage-b.d.ts +24 -0
  356. package/src/training/distillation/stage-b.js +20 -0
  357. package/src/training/distillation/student-fixture.d.ts +22 -0
  358. package/src/training/distillation/student-fixture.js +846 -0
  359. package/src/training/distillation/suite-data.d.ts +45 -0
  360. package/src/training/distillation/suite-data.js +189 -0
  361. package/src/training/index.d.ts +10 -0
  362. package/src/training/index.js +10 -0
  363. package/src/training/lora-pipeline.d.ts +40 -0
  364. package/src/training/lora-pipeline.js +793 -0
  365. package/src/training/lora.js +26 -12
  366. package/src/training/loss.js +5 -6
  367. package/src/training/objectives/cross_entropy.js +2 -5
  368. package/src/training/objectives/distill_kd.js +4 -8
  369. package/src/training/objectives/distill_triplet.js +4 -8
  370. package/src/training/objectives/ul_stage2_base.js +4 -8
  371. package/src/training/operator-artifacts.d.ts +62 -0
  372. package/src/training/operator-artifacts.js +140 -0
  373. package/src/training/operator-command.d.ts +5 -0
  374. package/src/training/operator-command.js +455 -0
  375. package/src/training/operator-eval.d.ts +48 -0
  376. package/src/training/operator-eval.js +230 -0
  377. package/src/training/operator-scoreboard.d.ts +5 -0
  378. package/src/training/operator-scoreboard.js +44 -0
  379. package/src/training/optimizer.js +19 -7
  380. package/src/training/runner.d.ts +52 -0
  381. package/src/training/runner.js +31 -5
  382. package/src/training/suite.d.ts +112 -0
  383. package/src/training/suite.js +24 -984
  384. package/src/training/tensor-factory.d.ts +9 -0
  385. package/src/training/tensor-factory.js +13 -0
  386. package/src/training/trainer.js +3 -5
  387. package/src/training/ul_dataset.js +3 -5
  388. package/src/training/workloads.d.ts +164 -0
  389. package/src/training/workloads.js +530 -0
  390. package/src/version.js +1 -1
  391. package/tools/convert-safetensors-node.js +22 -16
  392. package/tools/doppler-cli.js +179 -63
@@ -0,0 +1,937 @@
1
+ import {
2
+ buildExecutionV0KernelProfileKey,
3
+ indexExecutionV0KernelProfiles,
4
+ normalizeExecutionV0Dtype,
5
+ resolveExecutionV0KernelProfile,
6
+ resolveExecutionV0KVIO,
7
+ resolveExecutionV0Precision,
8
+ } from '../../../config/execution-v0-contract-check.js';
9
+ import {
10
+ EXECUTION_V0_SCHEMA_ID,
11
+ isExecutionV0Digest,
12
+ isExecutionV0Semver,
13
+ } from '../../../config/schema/execution-v0.schema.js';
14
+ import { KERNEL_CONFIGS } from '../../../gpu/kernels/kernel-configs.js';
15
+ import { buildKernelRefFromKernelEntry, isKernelRefBoundToKernel } from '../../../config/kernels/kernel-ref.js';
16
+
17
+ const PATCH_SET_MUTABLE_FIELDS = new Set(['precision', 'kvIO', 'constants', 'entry']);
18
+ const EXECUTION_V0_RUNTIME_KEYS = new Set(['session', 'executionPatch']);
19
+
20
+ const KERNEL_OUTPUT_CAPABILITIES = (() => {
21
+ const byKernelEntry = new Map();
22
+ for (const variants of Object.values(KERNEL_CONFIGS)) {
23
+ for (const config of Object.values(variants)) {
24
+ const kernel = config?.shaderFile;
25
+ const entry = config?.entryPoint ?? 'main';
26
+ if (typeof kernel !== 'string' || kernel.length === 0) continue;
27
+ const key = `${kernel}#${entry}`;
28
+ if (!byKernelEntry.has(key)) {
29
+ byKernelEntry.set(key, new Set());
30
+ }
31
+ const outputDtype = config?.outputDtype;
32
+ if (typeof outputDtype === 'string' && outputDtype.length > 0) {
33
+ byKernelEntry.get(key).add(String(outputDtype).toLowerCase());
34
+ }
35
+ }
36
+ }
37
+ return byKernelEntry;
38
+ })();
39
+
40
+ const normalizeDtype = normalizeExecutionV0Dtype;
41
+ const resolvePrecision = resolveExecutionV0Precision;
42
+ const resolveKVIO = resolveExecutionV0KVIO;
43
+ const indexKernelProfiles = indexExecutionV0KernelProfiles;
44
+ const buildKernelProfileKey = buildExecutionV0KernelProfileKey;
45
+
46
+ function getKernelOutputCapabilities(step) {
47
+ const kernel = String(step?.kernel ?? '').trim();
48
+ const entry = String(step?.entry ?? 'main').trim() || 'main';
49
+ if (!kernel) {
50
+ return null;
51
+ }
52
+ return KERNEL_OUTPUT_CAPABILITIES.get(`${kernel}#${entry}`) ?? null;
53
+ }
54
+
55
+ export function cloneJson(value) {
56
+ if (typeof structuredClone === 'function') {
57
+ return structuredClone(value);
58
+ }
59
+ return JSON.parse(JSON.stringify(value));
60
+ }
61
+
62
+ function normalizeStopCheckMode(value, label) {
63
+ const normalized = String(value ?? '').trim().toLowerCase();
64
+ if (normalized !== 'batch' && normalized !== 'per-token') {
65
+ throw new Error(`[ExecutionV0] ${label} must be "batch" or "per-token".`);
66
+ }
67
+ return normalized;
68
+ }
69
+
70
+ function normalizeKVLayout(value, label) {
71
+ if (value == null) {
72
+ return null;
73
+ }
74
+ const normalized = String(value).trim().toLowerCase();
75
+ if (!normalized) {
76
+ return null;
77
+ }
78
+ return normalized;
79
+ }
80
+
81
+ function requirePlainObject(value, label) {
82
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
83
+ throw new Error(`[ExecutionV0] ${label} must be an object.`);
84
+ }
85
+ return value;
86
+ }
87
+
88
+ function requireOwnProperty(root, key, label) {
89
+ if (!Object.prototype.hasOwnProperty.call(root, key)) {
90
+ throw new Error(`[ExecutionV0] ${label} is required.`);
91
+ }
92
+ return root[key];
93
+ }
94
+
95
+ function requireNullableObject(root, key, label) {
96
+ const value = requireOwnProperty(root, key, label);
97
+ if (value === null) {
98
+ return null;
99
+ }
100
+ return requirePlainObject(value, label);
101
+ }
102
+
103
+ function requireArrayProperty(root, key, label) {
104
+ const value = requireOwnProperty(root, key, label);
105
+ if (!Array.isArray(value)) {
106
+ throw new Error(`[ExecutionV0] ${label} must be an array.`);
107
+ }
108
+ return value;
109
+ }
110
+
111
+ function requirePositiveInteger(value, label) {
112
+ if (!Number.isInteger(value) || value < 1) {
113
+ throw new Error(`[ExecutionV0] ${label} must be a positive integer.`);
114
+ }
115
+ return value;
116
+ }
117
+
118
+ function requireOptionalBoolean(value, label) {
119
+ if (value === undefined) {
120
+ return undefined;
121
+ }
122
+ if (typeof value !== 'boolean') {
123
+ throw new Error(`[ExecutionV0] ${label} must be a boolean when provided.`);
124
+ }
125
+ return value;
126
+ }
127
+
128
+ function requireDtypeProperty(root, key, label) {
129
+ const value = requireOwnProperty(root, key, label);
130
+ if (value == null) {
131
+ throw new Error(`[ExecutionV0] ${label} is required.`);
132
+ }
133
+ return normalizeDtype(value, label);
134
+ }
135
+
136
+ function validateDecodeLoopContract(sessionDefaults) {
137
+ const decodeLoop = requireNullableObject(sessionDefaults, 'decodeLoop', 'sessionDefaults.decodeLoop');
138
+ if (decodeLoop === null) {
139
+ return;
140
+ }
141
+ requirePositiveInteger(
142
+ decodeLoop.batchSize,
143
+ 'sessionDefaults.decodeLoop.batchSize'
144
+ );
145
+ requirePositiveInteger(
146
+ decodeLoop.readbackInterval,
147
+ 'sessionDefaults.decodeLoop.readbackInterval'
148
+ );
149
+ normalizeStopCheckMode(
150
+ decodeLoop.stopCheckMode,
151
+ 'sessionDefaults.decodeLoop.stopCheckMode'
152
+ );
153
+ if (decodeLoop.ringTokens !== undefined) {
154
+ requirePositiveInteger(
155
+ decodeLoop.ringTokens,
156
+ 'sessionDefaults.decodeLoop.ringTokens'
157
+ );
158
+ }
159
+ if (decodeLoop.ringStop !== undefined) {
160
+ requirePositiveInteger(
161
+ decodeLoop.ringStop,
162
+ 'sessionDefaults.decodeLoop.ringStop'
163
+ );
164
+ }
165
+ if (decodeLoop.ringStaging !== undefined) {
166
+ requirePositiveInteger(
167
+ decodeLoop.ringStaging,
168
+ 'sessionDefaults.decodeLoop.ringStaging'
169
+ );
170
+ }
171
+ requireOptionalBoolean(
172
+ decodeLoop.disableCommandBatching,
173
+ 'sessionDefaults.decodeLoop.disableCommandBatching'
174
+ );
175
+ }
176
+
177
+ export function validateManifestSessionDefaultsContract(manifestInference) {
178
+ const sessionDefaults = requirePlainObject(
179
+ manifestInference?.sessionDefaults,
180
+ 'manifest.inference.sessionDefaults'
181
+ );
182
+ const compute = requirePlainObject(
183
+ requireOwnProperty(sessionDefaults, 'compute', 'sessionDefaults.compute'),
184
+ 'sessionDefaults.compute'
185
+ );
186
+ const computeDefaults = requirePlainObject(
187
+ requireOwnProperty(compute, 'defaults', 'sessionDefaults.compute.defaults'),
188
+ 'sessionDefaults.compute.defaults'
189
+ );
190
+ requireDtypeProperty(
191
+ computeDefaults,
192
+ 'activationDtype',
193
+ 'sessionDefaults.compute.defaults.activationDtype'
194
+ );
195
+ requireDtypeProperty(
196
+ computeDefaults,
197
+ 'mathDtype',
198
+ 'sessionDefaults.compute.defaults.mathDtype'
199
+ );
200
+ requireDtypeProperty(
201
+ computeDefaults,
202
+ 'accumDtype',
203
+ 'sessionDefaults.compute.defaults.accumDtype'
204
+ );
205
+ requireDtypeProperty(
206
+ computeDefaults,
207
+ 'outputDtype',
208
+ 'sessionDefaults.compute.defaults.outputDtype'
209
+ );
210
+ requireArrayProperty(
211
+ compute,
212
+ 'kernelProfiles',
213
+ 'sessionDefaults.compute.kernelProfiles'
214
+ );
215
+ const kvcache = requireNullableObject(sessionDefaults, 'kvcache', 'sessionDefaults.kvcache');
216
+ if (kvcache !== null) {
217
+ requireDtypeProperty(
218
+ kvcache,
219
+ 'kvDtype',
220
+ 'sessionDefaults.kvcache.kvDtype'
221
+ );
222
+ }
223
+ validateDecodeLoopContract(sessionDefaults);
224
+ }
225
+
226
+ function assertKernelRef(kernelRef, label) {
227
+ if (!kernelRef) return;
228
+ if (typeof kernelRef.id !== 'string' || kernelRef.id.trim().length === 0) {
229
+ throw new Error(`[ExecutionV0] ${label}.id is required`);
230
+ }
231
+ if (!isExecutionV0Semver(kernelRef.version)) {
232
+ throw new Error(`[ExecutionV0] ${label}.version must be semver; got "${kernelRef.version}"`);
233
+ }
234
+ if (!isExecutionV0Digest(kernelRef.digest)) {
235
+ throw new Error(`[ExecutionV0] ${label}.digest must match sha256:<64-hex>`);
236
+ }
237
+ }
238
+
239
+ export function isPhaseMatch(phase, targetPhase) {
240
+ return phase === 'both' || phase === targetPhase;
241
+ }
242
+
243
+ export function stepHasLayer(step, layerIdx) {
244
+ if (step.layers === 'all') return true;
245
+ if (!Array.isArray(step.layers)) return false;
246
+ return step.layers.includes(layerIdx);
247
+ }
248
+
249
+ export function normalizePhase(value, label) {
250
+ const normalized = String(value ?? '').trim().toLowerCase();
251
+ if (normalized !== 'prefill' && normalized !== 'decode' && normalized !== 'both') {
252
+ throw new Error(`[ExecutionV0] ${label} must be prefill|decode|both; got "${value}"`);
253
+ }
254
+ return normalized;
255
+ }
256
+
257
+ export function normalizeSection(value, label) {
258
+ const normalized = String(value ?? '').trim();
259
+ if (!['preLayer', 'layer', 'postLayer', 'sampling'].includes(normalized)) {
260
+ throw new Error(`[ExecutionV0] ${label} must be preLayer|layer|postLayer|sampling; got "${value}"`);
261
+ }
262
+ return normalized;
263
+ }
264
+
265
+ export function normalizeSlot(value, label) {
266
+ if (typeof value !== 'string' || value.trim().length === 0) {
267
+ throw new Error(`[ExecutionV0] ${label} must be a non-empty string`);
268
+ }
269
+ return value.trim();
270
+ }
271
+
272
+ function assertKernelPrecisionCapability(step, resolvedPrecision, policies) {
273
+ if (step.op === 'cast') {
274
+ return;
275
+ }
276
+ if (policies.unsupportedPrecision !== 'error') {
277
+ return;
278
+ }
279
+ const kernel = String(step.kernel ?? '').trim();
280
+ const entry = String(step.entry ?? 'main').trim() || 'main';
281
+ const supportedOutputDtypes = getKernelOutputCapabilities(step);
282
+ if (!supportedOutputDtypes) {
283
+ throw new Error(
284
+ `[ExecutionV0] step "${step.id}" kernel "${kernel}#${entry}" ` +
285
+ 'is not present in kernel registry; cannot validate precision capability.'
286
+ );
287
+ }
288
+ if (supportedOutputDtypes.size === 0) {
289
+ return;
290
+ }
291
+ const outputDtype = normalizeDtype(resolvedPrecision.outputDtype, `${step.id}.precision.outputDtype`);
292
+ if (!supportedOutputDtypes.has(outputDtype)) {
293
+ throw new Error(
294
+ `[ExecutionV0] step "${step.id}" outputDtype=${outputDtype} is unsupported by ` +
295
+ `kernel "${kernel}#${entry}" (supported: ${[...supportedOutputDtypes].join(', ') || 'none'}).`
296
+ );
297
+ }
298
+ }
299
+
300
+ export function createSourceTrace() {
301
+ return {
302
+ session: {},
303
+ steps: {},
304
+ };
305
+ }
306
+
307
+ export function setSourceTrace(trace, path, source) {
308
+ if (!trace || typeof path !== 'string' || path.length === 0) return;
309
+ trace[path] = { source };
310
+ }
311
+
312
+ function setStepSourceTrace(trace, stepId, path, source) {
313
+ if (!trace || !stepId || !path) return;
314
+ if (!trace.steps[stepId]) {
315
+ trace.steps[stepId] = {};
316
+ }
317
+ trace.steps[stepId][path] = { source };
318
+ }
319
+
320
+ function isExecutionV0PlainObject(value) {
321
+ return value != null && typeof value === 'object' && !Array.isArray(value);
322
+ }
323
+
324
+ export function collectLeafPaths(value, prefix = [], out = []) {
325
+ if (Array.isArray(value)) {
326
+ if (prefix.length > 0) {
327
+ out.push(prefix);
328
+ }
329
+ return out;
330
+ }
331
+ if (!isExecutionV0PlainObject(value)) {
332
+ if (prefix.length > 0) {
333
+ out.push(prefix);
334
+ }
335
+ return out;
336
+ }
337
+ for (const [key, child] of Object.entries(value)) {
338
+ collectLeafPaths(child, [...prefix, key], out);
339
+ }
340
+ return out;
341
+ }
342
+
343
+ export function hasDefinedPath(root, pathSegments) {
344
+ let current = root;
345
+ for (const segment of pathSegments) {
346
+ if (!isExecutionV0PlainObject(current) || !Object.prototype.hasOwnProperty.call(current, segment)) {
347
+ return false;
348
+ }
349
+ current = current[segment];
350
+ }
351
+ return current !== undefined;
352
+ }
353
+
354
+ function resolveProfile(profileIndex, step) {
355
+ return resolveExecutionV0KernelProfile(profileIndex, step);
356
+ }
357
+
358
+ export function validateStepShape(step, index) {
359
+ if (!step || typeof step !== 'object') {
360
+ throw new Error(`[ExecutionV0] execution.steps[${index}] must be an object`);
361
+ }
362
+ if (typeof step.id !== 'string' || step.id.trim().length === 0) {
363
+ throw new Error(`[ExecutionV0] execution.steps[${index}].id is required`);
364
+ }
365
+ if (typeof step.op !== 'string' || step.op.trim().length === 0) {
366
+ throw new Error(`[ExecutionV0] execution.steps[${index}].op is required`);
367
+ }
368
+ normalizePhase(step.phase, `execution.steps[${index}].phase`);
369
+ normalizeSection(step.section, `execution.steps[${index}].section`);
370
+ normalizeSlot(step.src, `execution.steps[${index}].src`);
371
+ normalizeSlot(step.dst, `execution.steps[${index}].dst`);
372
+ if (step.layers !== 'all' && !Array.isArray(step.layers)) {
373
+ throw new Error(`[ExecutionV0] execution.steps[${index}].layers must be "all" or number[]`);
374
+ }
375
+ if (step.layers !== 'all') {
376
+ for (const layer of step.layers) {
377
+ if (!Number.isInteger(layer) || layer < 0) {
378
+ throw new Error(`[ExecutionV0] execution.steps[${index}].layers must contain non-negative integers`);
379
+ }
380
+ }
381
+ }
382
+ if (step.op === 'cast') {
383
+ normalizeDtype(step.toDtype, `execution.steps[${index}].toDtype`);
384
+ if (step.fromDtype != null) {
385
+ normalizeDtype(step.fromDtype, `execution.steps[${index}].fromDtype`);
386
+ }
387
+ } else {
388
+ if (typeof step.kernel !== 'string' || step.kernel.trim().length === 0) {
389
+ throw new Error(
390
+ `[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernel (non-cast op)`
391
+ );
392
+ }
393
+ if (!step.kernelRef || typeof step.kernelRef !== 'object' || Array.isArray(step.kernelRef)) {
394
+ throw new Error(
395
+ `[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernelRef {id, version, digest} (non-cast op)`
396
+ );
397
+ }
398
+ assertKernelRef(step.kernelRef, `execution.steps[${index}].kernelRef`);
399
+ const entry = String(step.entry ?? 'main').trim() || 'main';
400
+ let expectedKernelRef;
401
+ try {
402
+ expectedKernelRef = buildKernelRefFromKernelEntry(step.kernel, entry);
403
+ } catch (error) {
404
+ const message = error instanceof Error ? error.message : String(error);
405
+ throw new Error(
406
+ `[ExecutionV0] execution.steps[${index}] "${step.id}" kernel "${step.kernel}#${entry}" ` +
407
+ `cannot be content-pinned: ${message}`
408
+ );
409
+ }
410
+ if (!isKernelRefBoundToKernel(step.kernelRef, step.kernel, entry)) {
411
+ throw new Error(
412
+ `[ExecutionV0] execution.steps[${index}] "${step.id}" kernelRef does not match kernel binding ` +
413
+ `("${step.kernel}#${entry}"). Expected ${expectedKernelRef.id}@${expectedKernelRef.version} ${expectedKernelRef.digest}.`
414
+ );
415
+ }
416
+ }
417
+ }
418
+
419
+ export function assertExecutionRuntimeOverlay(runtimeInference) {
420
+ if (!runtimeInference || typeof runtimeInference !== 'object') {
421
+ return;
422
+ }
423
+ const unknownKeys = Object.keys(runtimeInference).filter((key) => !EXECUTION_V0_RUNTIME_KEYS.has(key));
424
+ if (unknownKeys.length > 0) {
425
+ throw new Error(
426
+ `[ExecutionV0] runtime.inference overlay supports only ${[...EXECUTION_V0_RUNTIME_KEYS].join(', ')}; ` +
427
+ `got unsupported keys: ${unknownKeys.join(', ')}.`
428
+ );
429
+ }
430
+ }
431
+
432
+ export function validateUniqueStepIds(steps) {
433
+ const ids = new Set();
434
+ for (const step of steps) {
435
+ if (ids.has(step.id)) {
436
+ throw new Error(`[ExecutionV0] duplicate step id "${step.id}"`);
437
+ }
438
+ ids.add(step.id);
439
+ }
440
+ }
441
+
442
+ export function hasExecutionV0(manifestInference) {
443
+ return !!manifestInference?.execution && Array.isArray(manifestInference.execution.steps);
444
+ }
445
+
446
+ export function assertExecutionV0Schema(manifestInference) {
447
+ if (!hasExecutionV0(manifestInference)) return;
448
+ const discriminator = manifestInference?.schema ?? null;
449
+ if (discriminator !== EXECUTION_V0_SCHEMA_ID) {
450
+ throw new Error(
451
+ `[ExecutionV0] manifest.inference.schema must be "${EXECUTION_V0_SCHEMA_ID}" ` +
452
+ `when execution is present; got "${discriminator}".`
453
+ );
454
+ }
455
+ }
456
+
457
+ export function applyExecutionPatchAtomic(baseSteps, patch) {
458
+ if (!patch) {
459
+ return baseSteps;
460
+ }
461
+ const steps = cloneJson(baseSteps);
462
+ const byId = new Map(steps.map((step, index) => [step.id, index]));
463
+
464
+ for (const entry of patch.set ?? []) {
465
+ if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
466
+ throw new Error('[ExecutionV0] executionPatch.set entries require id');
467
+ }
468
+ if (!byId.has(entry.id)) {
469
+ throw new Error(`[ExecutionV0] executionPatch.set target "${entry.id}" does not exist`);
470
+ }
471
+ for (const key of Object.keys(entry)) {
472
+ if (key === 'id') continue;
473
+ if (!PATCH_SET_MUTABLE_FIELDS.has(key)) {
474
+ throw new Error(`[ExecutionV0] executionPatch.set "${entry.id}" cannot mutate "${key}"`);
475
+ }
476
+ }
477
+ }
478
+
479
+ for (const entry of patch.remove ?? []) {
480
+ if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
481
+ throw new Error('[ExecutionV0] executionPatch.remove entries require id');
482
+ }
483
+ if (!byId.has(entry.id)) {
484
+ throw new Error(`[ExecutionV0] executionPatch.remove target "${entry.id}" does not exist`);
485
+ }
486
+ }
487
+
488
+ for (const entry of patch.set ?? []) {
489
+ const index = byId.get(entry.id);
490
+ const target = steps[index];
491
+ if (entry.precision !== undefined) target.precision = cloneJson(entry.precision);
492
+ if (entry.kvIO !== undefined) target.kvIO = cloneJson(entry.kvIO);
493
+ if (entry.constants !== undefined) target.constants = cloneJson(entry.constants);
494
+ if (entry.entry !== undefined) target.entry = entry.entry;
495
+ }
496
+
497
+ const removeIds = new Set((patch.remove ?? []).map((entry) => entry.id));
498
+ const removedSteps = steps.filter((step) => !removeIds.has(step.id));
499
+
500
+ let current = removedSteps;
501
+ const insertedAfterAnchors = new Map();
502
+ for (const entry of patch.add ?? []) {
503
+ if (!entry?.step || typeof entry.step !== 'object') {
504
+ throw new Error('[ExecutionV0] executionPatch.add requires a step payload');
505
+ }
506
+ const hasBefore = typeof entry.insertBefore === 'string' && entry.insertBefore.length > 0;
507
+ const hasAfter = typeof entry.insertAfter === 'string' && entry.insertAfter.length > 0;
508
+ if (hasBefore === hasAfter) {
509
+ throw new Error('[ExecutionV0] executionPatch.add requires exactly one of insertBefore or insertAfter');
510
+ }
511
+ if (current.some((step) => step.id === entry.step.id)) {
512
+ throw new Error(`[ExecutionV0] executionPatch.add step id "${entry.step.id}" already exists`);
513
+ }
514
+ const anchorId = hasBefore ? entry.insertBefore : entry.insertAfter;
515
+ const anchorIndex = current.findIndex((step) => step.id === anchorId);
516
+ if (anchorIndex < 0) {
517
+ throw new Error(`[ExecutionV0] executionPatch.add anchor "${anchorId}" not found`);
518
+ }
519
+ let insertIndex = hasBefore ? anchorIndex : anchorIndex + 1;
520
+ if (!hasBefore) {
521
+ const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
522
+ while (insertIndex < current.length && insertedIds.includes(current[insertIndex].id)) {
523
+ insertIndex += 1;
524
+ }
525
+ }
526
+ current = [
527
+ ...current.slice(0, insertIndex),
528
+ cloneJson(entry.step),
529
+ ...current.slice(insertIndex),
530
+ ];
531
+ if (!hasBefore) {
532
+ const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
533
+ insertedIds.push(entry.step.id);
534
+ insertedAfterAnchors.set(anchorId, insertedIds);
535
+ }
536
+ }
537
+
538
+ validateUniqueStepIds(current);
539
+ return current;
540
+ }
541
+
542
+ export function indexRuntimePatchMeta(patch) {
543
+ const meta = {
544
+ addedSteps: new Set(),
545
+ precisionFieldsByStep: new Map(),
546
+ kvIOFieldsByStep: new Set(),
547
+ };
548
+ if (!patch || typeof patch !== 'object') {
549
+ return meta;
550
+ }
551
+
552
+ for (const add of patch.add ?? []) {
553
+ const stepId = add?.step?.id;
554
+ if (typeof stepId === 'string' && stepId.length > 0) {
555
+ meta.addedSteps.add(stepId);
556
+ }
557
+ }
558
+
559
+ for (const set of patch.set ?? []) {
560
+ const stepId = set?.id;
561
+ if (typeof stepId !== 'string' || stepId.length === 0) continue;
562
+ if (set.precision && typeof set.precision === 'object') {
563
+ meta.precisionFieldsByStep.set(stepId, new Set(Object.keys(set.precision)));
564
+ }
565
+ if (set.kvIO && typeof set.kvIO === 'object') {
566
+ meta.kvIOFieldsByStep.add(stepId);
567
+ }
568
+ }
569
+ return meta;
570
+ }
571
+
572
+ export function requireSessionActivationDtype(
573
+ sessionDefaults,
574
+ label = 'sessionDefaults.compute.defaults.activationDtype'
575
+ ) {
576
+ const activationDtype = sessionDefaults?.compute?.defaults?.activationDtype;
577
+ if (activationDtype == null) {
578
+ throw new Error(`[ExecutionV0] ${label} is required.`);
579
+ }
580
+ return normalizeDtype(activationDtype, label);
581
+ }
582
+
583
+ export function createInitialSlotDtypes(sessionDefaults) {
584
+ const activationDefault = requireSessionActivationDtype(
585
+ sessionDefaults,
586
+ 'sessionDefaults.compute.defaults.activationDtype'
587
+ );
588
+ return new Map([['state', activationDefault]]);
589
+ }
590
+
591
+ function ensureCompatibleKV(step, kvIO, sessionDefaults) {
592
+ if (step.op !== 'attention' || !kvIO) {
593
+ return;
594
+ }
595
+ const runtimeKvDtypeRaw = sessionDefaults?.kvcache?.kvDtype;
596
+ if (runtimeKvDtypeRaw == null) {
597
+ return;
598
+ }
599
+ const runtimeKvDtype = normalizeDtype(runtimeKvDtypeRaw, 'sessionDefaults.kvcache.kvDtype');
600
+ if (kvIO.readDtype !== runtimeKvDtype || kvIO.writeDtype !== runtimeKvDtype) {
601
+ throw new Error(
602
+ `[ExecutionV0] step "${step.id}" kvIO read/write (${kvIO.readDtype}/${kvIO.writeDtype}) ` +
603
+ `must match sessionDefaults.kvcache.kvDtype (${runtimeKvDtype}).`
604
+ );
605
+ }
606
+ }
607
+
608
+ export function resolvePhaseSteps(phase, steps, sessionDefaults, profileIndex, policies, options = {}) {
609
+ const slotDtypes = options.initialSlotDtypes
610
+ ? new Map(options.initialSlotDtypes)
611
+ : createInitialSlotDtypes(sessionDefaults);
612
+ const resolved = [];
613
+ const sourceTrace = options.sourceTrace ?? null;
614
+ const sessionDefaultSources = options.sessionDefaultSources ?? {};
615
+ const runtimePatchMeta = options.runtimePatchMeta ?? {
616
+ addedSteps: new Set(),
617
+ precisionFieldsByStep: new Map(),
618
+ kvIOFieldsByStep: new Set(),
619
+ };
620
+
621
+ for (const step of steps) {
622
+ const stepPhase = normalizePhase(step.phase, `${step.id}.phase`);
623
+ if (!isPhaseMatch(stepPhase, phase)) continue;
624
+ const profile = resolveProfile(profileIndex, step);
625
+ if (
626
+ step.kernelRef
627
+ && !profile
628
+ && policies.unresolvedKernel === 'error'
629
+ ) {
630
+ throw new Error(
631
+ `[ExecutionV0] step "${step.id}" references kernel profile ` +
632
+ `${step.kernelRef.id}@${step.kernelRef.version} (${step.kernelRef.digest}) ` +
633
+ 'but no matching sessionDefaults.compute.kernelProfiles entry was found.'
634
+ );
635
+ }
636
+ const { precision, sources: precisionSources } = resolvePrecision(step, profile, sessionDefaults);
637
+ const src = normalizeSlot(step.src, `${step.id}.src`);
638
+ const dst = normalizeSlot(step.dst, `${step.id}.dst`);
639
+ if (!slotDtypes.has(src)) {
640
+ throw new Error(
641
+ `[ExecutionV0] step "${step.id}" reads slot "${src}" before it is produced. ` +
642
+ 'Add an explicit producer step or cast/load bridge.'
643
+ );
644
+ }
645
+ const derivedInput = slotDtypes.get(src);
646
+ const inputDtype = normalizeDtype(precision.inputDtype ?? derivedInput, `${step.id}.precision.inputDtype`);
647
+
648
+ if (
649
+ policies.dtypeTransition === 'require_cast_step'
650
+ && step.op !== 'cast'
651
+ && inputDtype !== derivedInput
652
+ ) {
653
+ throw new Error(
654
+ `[ExecutionV0] step "${step.id}" requires inputDtype=${inputDtype} ` +
655
+ `but slot "${src}" currently holds ${derivedInput}. Insert explicit cast step.`
656
+ );
657
+ }
658
+
659
+ let outputDtype = normalizeDtype(precision.outputDtype, `${step.id}.precision.outputDtype`);
660
+ let outputDtypeSource = precisionSources.outputDtype;
661
+ if (step.op !== 'cast' && outputDtypeSource === 'sessionDefault') {
662
+ const declaredOutputDtypes = getKernelOutputCapabilities(step);
663
+ if (declaredOutputDtypes && declaredOutputDtypes.size === 1) {
664
+ outputDtype = [...declaredOutputDtypes][0];
665
+ outputDtypeSource = 'derived';
666
+ }
667
+ }
668
+ if (step.op === 'cast') {
669
+ outputDtype = normalizeDtype(step.toDtype, `${step.id}.toDtype`);
670
+ outputDtypeSource = 'manifest';
671
+ const fromDtype = step.fromDtype
672
+ ? normalizeDtype(step.fromDtype, `${step.id}.fromDtype`)
673
+ : derivedInput;
674
+ if (fromDtype !== derivedInput) {
675
+ throw new Error(
676
+ `[ExecutionV0] cast step "${step.id}" fromDtype=${fromDtype} does not match slot "${src}" dtype=${derivedInput}`
677
+ );
678
+ }
679
+ }
680
+
681
+ const resolvedPrecision = {
682
+ inputDtype,
683
+ mathDtype: normalizeDtype(precision.mathDtype, `${step.id}.precision.mathDtype`),
684
+ accumDtype: normalizeDtype(precision.accumDtype, `${step.id}.precision.accumDtype`),
685
+ outputDtype,
686
+ };
687
+ assertKernelPrecisionCapability(step, resolvedPrecision, policies);
688
+ slotDtypes.set(dst, outputDtype);
689
+
690
+ const kvIOResolved = step.op === 'attention'
691
+ ? resolveKVIO(step, profile, sessionDefaults)
692
+ : null;
693
+ const kvIO = kvIOResolved?.value ?? null;
694
+ ensureCompatibleKV(step, kvIO, sessionDefaults);
695
+
696
+ if (sourceTrace) {
697
+ const precisionFieldsPatched = runtimePatchMeta.precisionFieldsByStep.get(step.id) ?? new Set();
698
+ const isAddedStep = runtimePatchMeta.addedSteps.has(step.id);
699
+ const inputPatched = isAddedStep
700
+ ? step.precision?.inputDtype != null
701
+ : precisionFieldsPatched.has('inputDtype');
702
+ const mathPatched = isAddedStep
703
+ ? step.precision?.mathDtype != null
704
+ : precisionFieldsPatched.has('mathDtype');
705
+ const accumPatched = isAddedStep
706
+ ? step.precision?.accumDtype != null
707
+ : precisionFieldsPatched.has('accumDtype');
708
+ const outputPatched = isAddedStep
709
+ ? step.precision?.outputDtype != null
710
+ : precisionFieldsPatched.has('outputDtype');
711
+ const mathSource = precisionSources.mathDtype === 'sessionDefault'
712
+ ? sessionDefaultSources.mathDtype ?? 'derived'
713
+ : precisionSources.mathDtype;
714
+ const accumSource = precisionSources.accumDtype === 'sessionDefault'
715
+ ? sessionDefaultSources.accumDtype ?? 'derived'
716
+ : precisionSources.accumDtype;
717
+ const outputSource = precisionSources.outputDtype === 'sessionDefault'
718
+ ? outputDtypeSource === 'sessionDefault'
719
+ ? (sessionDefaultSources.outputDtype ?? 'derived')
720
+ : outputDtypeSource
721
+ : outputDtypeSource;
722
+ setStepSourceTrace(sourceTrace, step.id, 'precision.inputDtype',
723
+ inputPatched
724
+ ? 'runtime.patch'
725
+ : precision.inputDtype != null
726
+ ? precisionSources.inputDtype
727
+ : 'derived');
728
+ setStepSourceTrace(sourceTrace, step.id, 'precision.mathDtype', mathPatched ? 'runtime.patch' : mathSource);
729
+ setStepSourceTrace(sourceTrace, step.id, 'precision.accumDtype', accumPatched ? 'runtime.patch' : accumSource);
730
+ setStepSourceTrace(sourceTrace, step.id, 'precision.outputDtype', outputPatched ? 'runtime.patch' : outputSource);
731
+ if (step.op === 'attention') {
732
+ const kvPatched = runtimePatchMeta.kvIOFieldsByStep.has(step.id)
733
+ || (isAddedStep && !!step.kvIO);
734
+ const kvSource = kvIOResolved?.source === 'sessionDefault'
735
+ ? sessionDefaultSources.kvDtype ?? 'derived'
736
+ : kvIOResolved?.source ?? 'derived';
737
+ const resolvedKvSource = kvPatched ? 'runtime.patch' : kvSource;
738
+ setStepSourceTrace(sourceTrace, step.id, 'kvIO.readDtype', resolvedKvSource);
739
+ setStepSourceTrace(sourceTrace, step.id, 'kvIO.writeDtype', resolvedKvSource);
740
+ }
741
+ }
742
+
743
+ resolved.push({
744
+ ...step,
745
+ src,
746
+ dst,
747
+ phase: stepPhase,
748
+ section: normalizeSection(step.section, `${step.id}.section`),
749
+ precision: resolvedPrecision,
750
+ kvIO,
751
+ });
752
+ }
753
+
754
+ return {
755
+ steps: resolved,
756
+ finalSlotDtypes: slotDtypes,
757
+ };
758
+ }
759
+
760
+ function stripSchemaDefaultComputeDefaults(compute, manifestComputeDefaults, defaultComputeDefaults) {
761
+ if (!compute?.defaults || !manifestComputeDefaults || !defaultComputeDefaults) {
762
+ return compute;
763
+ }
764
+ const dtypeKeys = ['activationDtype', 'mathDtype', 'accumDtype', 'outputDtype'];
765
+ const hasManifestDtype = dtypeKeys.some(
766
+ (key) => manifestComputeDefaults[key] !== undefined && manifestComputeDefaults[key] !== null
767
+ );
768
+ if (!hasManifestDtype) {
769
+ return compute;
770
+ }
771
+ const nextDefaults = { ...compute.defaults };
772
+ let changed = false;
773
+ for (const key of dtypeKeys) {
774
+ if (
775
+ manifestComputeDefaults[key] !== undefined
776
+ && manifestComputeDefaults[key] !== null
777
+ && nextDefaults[key] === defaultComputeDefaults[key]
778
+ ) {
779
+ delete nextDefaults[key];
780
+ changed = true;
781
+ }
782
+ }
783
+ if (!changed) {
784
+ return compute;
785
+ }
786
+ if (Object.keys(nextDefaults).length === 0) {
787
+ const nextCompute = { ...compute };
788
+ delete nextCompute.defaults;
789
+ return Object.keys(nextCompute).length === 0 ? null : nextCompute;
790
+ }
791
+ return { ...compute, defaults: nextDefaults };
792
+ }
793
+
794
+ export function normalizeRuntimeSessionForExecutionV0(
795
+ runtimeSession,
796
+ manifestInference,
797
+ defaultComputeDefaults
798
+ ) {
799
+ const manifestSessionDefaults = manifestInference?.sessionDefaults ?? null;
800
+ const manifestProfiles = manifestSessionDefaults?.compute?.kernelProfiles;
801
+ const hasManifestProfiles = Array.isArray(manifestProfiles) && manifestProfiles.length > 0;
802
+ const manifestComputeDefaults = manifestSessionDefaults?.compute?.defaults ?? null;
803
+ const hasManifestKVCache = manifestSessionDefaults?.kvcache != null;
804
+ const hasManifestDecodeLoop = manifestSessionDefaults?.decodeLoop != null;
805
+
806
+ if (!runtimeSession || typeof runtimeSession !== 'object') {
807
+ return runtimeSession;
808
+ }
809
+
810
+ let compute = runtimeSession.compute ?? null;
811
+ let kvcache = Object.prototype.hasOwnProperty.call(runtimeSession, 'kvcache')
812
+ ? runtimeSession.kvcache
813
+ : undefined;
814
+ let decodeLoop = Object.prototype.hasOwnProperty.call(runtimeSession, 'decodeLoop')
815
+ ? runtimeSession.decodeLoop
816
+ : undefined;
817
+ let changed = false;
818
+
819
+ if (manifestComputeDefaults) {
820
+ const stripped = stripSchemaDefaultComputeDefaults(
821
+ compute,
822
+ manifestComputeDefaults,
823
+ defaultComputeDefaults
824
+ );
825
+ if (stripped !== compute) {
826
+ compute = stripped;
827
+ changed = true;
828
+ }
829
+ }
830
+
831
+ if (compute && Object.prototype.hasOwnProperty.call(compute, 'kernelProfiles')) {
832
+ const kernelProfiles = compute.kernelProfiles;
833
+ if (Array.isArray(kernelProfiles) && kernelProfiles.length === 0 && hasManifestProfiles) {
834
+ const nextCompute = { ...compute };
835
+ delete nextCompute.kernelProfiles;
836
+ compute = Object.keys(nextCompute).length === 0 ? null : nextCompute;
837
+ changed = true;
838
+ }
839
+ }
840
+
841
+ if (kvcache === null && hasManifestKVCache) {
842
+ kvcache = undefined;
843
+ changed = true;
844
+ }
845
+
846
+ if (decodeLoop === null && hasManifestDecodeLoop) {
847
+ decodeLoop = undefined;
848
+ changed = true;
849
+ }
850
+
851
+ if (!changed) {
852
+ return runtimeSession;
853
+ }
854
+
855
+ const nextRuntimeSession = { ...runtimeSession };
856
+ if (!compute) {
857
+ delete nextRuntimeSession.compute;
858
+ } else {
859
+ nextRuntimeSession.compute = compute;
860
+ }
861
+ if (kvcache === undefined) {
862
+ delete nextRuntimeSession.kvcache;
863
+ } else {
864
+ nextRuntimeSession.kvcache = kvcache;
865
+ }
866
+ if (decodeLoop === undefined) {
867
+ delete nextRuntimeSession.decodeLoop;
868
+ } else {
869
+ nextRuntimeSession.decodeLoop = decodeLoop;
870
+ }
871
+
872
+ return Object.keys(nextRuntimeSession).length === 0 ? {} : nextRuntimeSession;
873
+ }
874
+
875
+ export function validatePhaseBoundaryCompatibility(options) {
876
+ const {
877
+ steps,
878
+ prefillFinalSlotDtypes,
879
+ decodeInitialSlotDtypes,
880
+ sessionDefaults,
881
+ profileIndex,
882
+ policies,
883
+ } = options;
884
+ const decodeSteps = steps.filter((step) => isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'decode'));
885
+ if (decodeSteps.length === 0) {
886
+ return;
887
+ }
888
+ const writtenSlots = new Set();
889
+ for (const step of decodeSteps) {
890
+ const src = normalizeSlot(step.src, `${step.id}.src`);
891
+ const dst = normalizeSlot(step.dst, `${step.id}.dst`);
892
+ const readsCarriedSlot = !writtenSlots.has(src) && prefillFinalSlotDtypes.has(src);
893
+ if (readsCarriedSlot && step.op !== 'cast') {
894
+ const profile = resolveProfile(profileIndex, step);
895
+ const { precision } = resolvePrecision(step, profile, sessionDefaults);
896
+ const carriedDtype = prefillFinalSlotDtypes.get(src);
897
+ const decodeInput = normalizeDtype(
898
+ precision.inputDtype
899
+ ?? carriedDtype
900
+ ?? decodeInitialSlotDtypes.get(src),
901
+ `${step.id}.precision.inputDtype`
902
+ );
903
+ if (decodeInput !== carriedDtype) {
904
+ throw new Error(
905
+ `[ExecutionV0] decode step "${step.id}" reads carried slot "${src}" as ${decodeInput} ` +
906
+ `but prefill left ${carriedDtype}. Add explicit cast at phase boundary.`
907
+ );
908
+ }
909
+ }
910
+ writtenSlots.add(dst);
911
+ }
912
+ }
913
+
914
+ export function assertKVLayoutExecutionCompatibility(steps, sessionDefaults) {
915
+ const kvLayout = normalizeKVLayout(sessionDefaults?.kvcache?.layout, 'sessionDefaults.kvcache.layout');
916
+ if (kvLayout !== 'bdpa') {
917
+ return;
918
+ }
919
+ const incompatibleStep = steps.find((step) => (
920
+ step?.op === 'attention'
921
+ && isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'prefill')
922
+ ));
923
+ if (!incompatibleStep) {
924
+ return;
925
+ }
926
+ throw new Error(
927
+ `[ExecutionV0] sessionDefaults.kvcache.layout="bdpa" is decode-only, ` +
928
+ `but step "${incompatibleStep.id}" declares prefill attention. ` +
929
+ 'Use a non-BDPA KV layout for prefill-capable models or remove prefill attention from the execution contract.'
930
+ );
931
+ }
932
+
933
+ export {
934
+ buildKernelProfileKey,
935
+ indexKernelProfiles,
936
+ normalizeDtype,
937
+ };