@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +25 -17
  3. package/package.json +20 -4
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +39 -39
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +49 -7
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +43 -4
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +28 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/models/qwen3.json +9 -2
  45. package/src/config/presets/models/transformer.json +5 -0
  46. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  47. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  48. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  49. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  50. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  52. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  54. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  55. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  56. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  57. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  58. package/src/config/required-inference-fields-contract-check.js +6 -0
  59. package/src/config/runtime.js +6 -1
  60. package/src/config/schema/debug.schema.d.ts +5 -0
  61. package/src/config/schema/doppler.schema.js +16 -21
  62. package/src/config/schema/inference-defaults.schema.js +6 -3
  63. package/src/config/schema/inference.schema.d.ts +9 -0
  64. package/src/config/schema/kernel-path.schema.d.ts +11 -1
  65. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  66. package/src/config/schema/manifest.schema.d.ts +8 -1
  67. package/src/config/schema/manifest.schema.js +19 -3
  68. package/src/config/training-defaults.js +30 -22
  69. package/src/converter/conversion-plan.js +94 -9
  70. package/src/converter/core.d.ts +7 -0
  71. package/src/converter/core.js +14 -9
  72. package/src/converter/execution-v0-manifest.js +4 -1
  73. package/src/converter/index.d.ts +1 -0
  74. package/src/converter/index.js +1 -0
  75. package/src/converter/manifest-inference.js +43 -12
  76. package/src/converter/parsers/diffusion.js +0 -3
  77. package/src/converter/quantization-info.js +35 -15
  78. package/src/converter/rope-config.js +42 -0
  79. package/src/converter/shard-packer.d.ts +1 -1
  80. package/src/converter/shard-packer.js +4 -1
  81. package/src/debug/config.js +123 -11
  82. package/src/debug/signals.js +7 -1
  83. package/src/debug/tensor.d.ts +2 -0
  84. package/src/debug/tensor.js +13 -2
  85. package/src/distribution/p2p-control-plane.js +52 -12
  86. package/src/distribution/p2p-observability.js +43 -7
  87. package/src/distribution/p2p-webrtc-browser.js +20 -0
  88. package/src/distribution/shard-delivery.js +77 -26
  89. package/src/formats/gguf/types.js +33 -16
  90. package/src/formats/rdrr/groups.d.ts +12 -4
  91. package/src/formats/rdrr/groups.js +3 -6
  92. package/src/formats/rdrr/parsing.js +39 -2
  93. package/src/formats/rdrr/types.d.ts +2 -1
  94. package/src/gpu/command-recorder.js +86 -61
  95. package/src/gpu/device.d.ts +1 -0
  96. package/src/gpu/device.js +131 -19
  97. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  98. package/src/gpu/kernel-tuner/cache.js +71 -4
  99. package/src/gpu/kernel-tuner/tuner.js +22 -4
  100. package/src/gpu/kernels/attention.js +113 -34
  101. package/src/gpu/kernels/backward/adam.js +62 -58
  102. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  103. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  104. package/src/gpu/kernels/bias_add.wgsl +8 -6
  105. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  106. package/src/gpu/kernels/cast.js +191 -149
  107. package/src/gpu/kernels/check-stop.js +33 -44
  108. package/src/gpu/kernels/conv2d.js +27 -17
  109. package/src/gpu/kernels/conv2d.wgsl +7 -8
  110. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  111. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  112. package/src/gpu/kernels/depthwise_conv2d.js +37 -26
  113. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  114. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  115. package/src/gpu/kernels/dequant.js +178 -126
  116. package/src/gpu/kernels/energy.d.ts +3 -21
  117. package/src/gpu/kernels/energy.js +111 -88
  118. package/src/gpu/kernels/feature-check.js +1 -1
  119. package/src/gpu/kernels/fused_ffn.js +84 -65
  120. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  121. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  122. package/src/gpu/kernels/gather.js +33 -15
  123. package/src/gpu/kernels/gelu.js +19 -11
  124. package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
  125. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  126. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  127. package/src/gpu/kernels/groupnorm.js +34 -23
  128. package/src/gpu/kernels/kv-quantize.js +5 -2
  129. package/src/gpu/kernels/layernorm.js +35 -19
  130. package/src/gpu/kernels/logit-merge.js +5 -3
  131. package/src/gpu/kernels/matmul.js +83 -39
  132. package/src/gpu/kernels/modulate.js +23 -15
  133. package/src/gpu/kernels/moe.js +221 -175
  134. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  135. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  136. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  137. package/src/gpu/kernels/relu.js +31 -10
  138. package/src/gpu/kernels/relu.wgsl +2 -1
  139. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  140. package/src/gpu/kernels/repeat_channels.js +25 -17
  141. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  142. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  143. package/src/gpu/kernels/residual.js +69 -23
  144. package/src/gpu/kernels/residual.wgsl +6 -3
  145. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  146. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  147. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  148. package/src/gpu/kernels/rmsnorm.js +96 -28
  149. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  150. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  151. package/src/gpu/kernels/rope.d.ts +2 -0
  152. package/src/gpu/kernels/rope.js +14 -1
  153. package/src/gpu/kernels/rope.wgsl +56 -40
  154. package/src/gpu/kernels/sample.js +27 -38
  155. package/src/gpu/kernels/sana_linear_attention.js +19 -12
  156. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  157. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  158. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  159. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  160. package/src/gpu/kernels/scale.js +18 -11
  161. package/src/gpu/kernels/shader-cache.js +4 -2
  162. package/src/gpu/kernels/silu.d.ts +1 -0
  163. package/src/gpu/kernels/silu.js +148 -82
  164. package/src/gpu/kernels/silu.wgsl +19 -9
  165. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  166. package/src/gpu/kernels/softmax.js +44 -25
  167. package/src/gpu/kernels/split_qkv.js +23 -13
  168. package/src/gpu/kernels/transpose.js +31 -10
  169. package/src/gpu/kernels/transpose.wgsl +6 -5
  170. package/src/gpu/kernels/upsample2d.js +22 -13
  171. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  172. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  173. package/src/gpu/kernels/utils.js +35 -13
  174. package/src/gpu/partitioned-buffer-pool.js +10 -2
  175. package/src/gpu/perf-guards.js +2 -9
  176. package/src/gpu/profiler.js +27 -22
  177. package/src/gpu/readback-utils.d.ts +16 -0
  178. package/src/gpu/readback-utils.js +41 -0
  179. package/src/gpu/submit-tracker.js +13 -0
  180. package/src/gpu/uniform-cache.d.ts +1 -0
  181. package/src/gpu/uniform-cache.js +30 -9
  182. package/src/hotswap/intent-bundle.js +6 -0
  183. package/src/hotswap/manifest.d.ts +10 -1
  184. package/src/hotswap/manifest.js +12 -2
  185. package/src/hotswap/runtime.js +30 -8
  186. package/src/index-browser.d.ts +44 -0
  187. package/src/index-browser.js +14 -0
  188. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  189. package/src/inference/browser-harness-contract-helpers.js +28 -0
  190. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  191. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  192. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  193. package/src/inference/browser-harness-model-helpers.js +217 -0
  194. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  195. package/src/inference/browser-harness-report-helpers.js +42 -0
  196. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  197. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  198. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  199. package/src/inference/browser-harness-suite-helpers.js +268 -0
  200. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  201. package/src/inference/browser-harness-text-helpers.js +788 -0
  202. package/src/inference/browser-harness.d.ts +6 -0
  203. package/src/inference/browser-harness.js +130 -1950
  204. package/src/inference/kv-cache/base.js +140 -94
  205. package/src/inference/kv-cache/tiered.js +5 -3
  206. package/src/inference/moe-router.js +88 -56
  207. package/src/inference/multi-model-network.js +5 -3
  208. package/src/inference/network-evolution.d.ts +11 -2
  209. package/src/inference/network-evolution.js +20 -21
  210. package/src/inference/pipelines/context.d.ts +3 -0
  211. package/src/inference/pipelines/context.js +142 -2
  212. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  213. package/src/inference/pipelines/diffusion/pipeline.js +17 -7
  214. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  215. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  216. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  217. package/src/inference/pipelines/diffusion/vae.js +3 -7
  218. package/src/inference/pipelines/energy/pipeline.js +27 -21
  219. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  220. package/src/inference/pipelines/energy/quintel.js +11 -0
  221. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  222. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  223. package/src/inference/pipelines/text/attention/projections.js +151 -101
  224. package/src/inference/pipelines/text/attention/record.js +73 -10
  225. package/src/inference/pipelines/text/attention/run.js +73 -10
  226. package/src/inference/pipelines/text/chat-format.js +25 -1
  227. package/src/inference/pipelines/text/config.d.ts +4 -0
  228. package/src/inference/pipelines/text/config.js +71 -5
  229. package/src/inference/pipelines/text/embed.js +2 -8
  230. package/src/inference/pipelines/text/execution-plan.js +64 -50
  231. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  232. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  233. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  234. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  235. package/src/inference/pipelines/text/execution-v0.js +78 -1002
  236. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  237. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  238. package/src/inference/pipelines/text/generator-steps.js +298 -207
  239. package/src/inference/pipelines/text/generator.js +6 -23
  240. package/src/inference/pipelines/text/init.d.ts +4 -0
  241. package/src/inference/pipelines/text/init.js +134 -29
  242. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  243. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  244. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  245. package/src/inference/pipelines/text/layer.js +14 -9
  246. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  247. package/src/inference/pipelines/text/linear-attention.js +80 -6
  248. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  249. package/src/inference/pipelines/text/logits/index.js +10 -11
  250. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  251. package/src/inference/pipelines/text/logits/utils.js +9 -0
  252. package/src/inference/pipelines/text/lora-apply.js +50 -32
  253. package/src/inference/pipelines/text/model-load.js +279 -104
  254. package/src/inference/pipelines/text/moe-cache.js +5 -4
  255. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  256. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  257. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  258. package/src/inference/pipelines/text/ops.js +90 -90
  259. package/src/inference/pipelines/text/probes.js +9 -9
  260. package/src/inference/pipelines/text/weights.js +17 -7
  261. package/src/inference/pipelines/text.js +17 -1
  262. package/src/inference/speculative.d.ts +2 -2
  263. package/src/inference/speculative.js +4 -18
  264. package/src/inference/test-harness.d.ts +1 -1
  265. package/src/inference/test-harness.js +15 -5
  266. package/src/inference/tokenizer.d.ts +0 -5
  267. package/src/inference/tokenizer.js +4 -23
  268. package/src/inference/tokenizers/bpe.js +9 -0
  269. package/src/inference/tokenizers/bundled.js +176 -33
  270. package/src/inference/tokenizers/sentencepiece.js +12 -0
  271. package/src/loader/doppler-loader.js +38 -22
  272. package/src/loader/dtype-utils.js +3 -44
  273. package/src/loader/embedding-loader.js +7 -3
  274. package/src/loader/experts/expert-cache.js +13 -6
  275. package/src/loader/experts/expert-loader.js +10 -6
  276. package/src/loader/final-weights-loader.js +8 -4
  277. package/src/loader/layer-loader.js +2 -1
  278. package/src/loader/loader-state.js +2 -2
  279. package/src/loader/memory-monitor.js +8 -0
  280. package/src/loader/multi-model-loader.d.ts +14 -0
  281. package/src/loader/multi-model-loader.js +70 -24
  282. package/src/loader/shard-cache.js +81 -12
  283. package/src/loader/shard-resolver.js +25 -3
  284. package/src/loader/tensors/tensor-loader.js +209 -144
  285. package/src/loader/tensors/tensor-reader.js +76 -19
  286. package/src/loader/weight-downcast.js +1 -1
  287. package/src/memory/buffer-pool.d.ts +9 -1
  288. package/src/memory/buffer-pool.js +109 -44
  289. package/src/memory/unified-detect.js +1 -1
  290. package/src/rules/inference/kernel-path.rules.json +24 -8
  291. package/src/rules/rule-registry.js +25 -1
  292. package/src/rules/tooling/command-runtime.rules.json +18 -0
  293. package/src/storage/backends/opfs-store.js +68 -24
  294. package/src/storage/downloader.js +364 -83
  295. package/src/storage/index.d.ts +3 -0
  296. package/src/storage/index.js +3 -0
  297. package/src/storage/preflight.d.ts +2 -2
  298. package/src/storage/preflight.js +24 -2
  299. package/src/storage/quickstart-downloader.js +11 -5
  300. package/src/storage/registry.js +10 -4
  301. package/src/storage/reports.js +1 -1
  302. package/src/storage/shard-manager.d.ts +15 -1
  303. package/src/storage/shard-manager.js +51 -3
  304. package/src/storage/source-artifact-store.d.ts +52 -0
  305. package/src/storage/source-artifact-store.js +234 -0
  306. package/src/tooling/command-api-constants.d.ts +9 -0
  307. package/src/tooling/command-api-constants.js +9 -0
  308. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  309. package/src/tooling/command-api-family-normalizers.js +343 -0
  310. package/src/tooling/command-api-helpers.d.ts +25 -0
  311. package/src/tooling/command-api-helpers.js +262 -0
  312. package/src/tooling/command-api.d.ts +27 -1
  313. package/src/tooling/command-api.js +26 -473
  314. package/src/tooling/command-envelope.js +4 -1
  315. package/src/tooling/command-runner-shared.js +52 -18
  316. package/src/tooling/lean-execution-contract.js +150 -3
  317. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  318. package/src/tooling/node-browser-command-runner.js +218 -273
  319. package/src/tooling/node-command-runner.js +44 -3
  320. package/src/tooling/node-converter.js +27 -1
  321. package/src/tooling/node-source-runtime.d.ts +1 -1
  322. package/src/tooling/node-source-runtime.js +84 -3
  323. package/src/tooling/node-webgpu.js +30 -105
  324. package/src/tooling/opfs-cache.js +21 -4
  325. package/src/tooling/runtime-input-composition.d.ts +38 -0
  326. package/src/tooling/runtime-input-composition.js +86 -0
  327. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  328. package/src/tooling/source-runtime-bundle.js +261 -34
  329. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  330. package/src/tooling/source-runtime-materializer.js +93 -0
  331. package/src/training/attention-backward.js +32 -17
  332. package/src/training/autograd.js +80 -52
  333. package/src/training/checkpoint-watch.d.ts +8 -0
  334. package/src/training/checkpoint-watch.js +139 -0
  335. package/src/training/checkpoint.d.ts +6 -1
  336. package/src/training/checkpoint.js +46 -7
  337. package/src/training/clip.js +2 -1
  338. package/src/training/datasets/token-batch.js +20 -8
  339. package/src/training/distillation/artifacts.d.ts +71 -0
  340. package/src/training/distillation/artifacts.js +132 -0
  341. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  342. package/src/training/distillation/checkpoint-watch.js +58 -0
  343. package/src/training/distillation/dataset.d.ts +59 -0
  344. package/src/training/distillation/dataset.js +337 -0
  345. package/src/training/distillation/eval.d.ts +34 -0
  346. package/src/training/distillation/eval.js +310 -0
  347. package/src/training/distillation/index.d.ts +29 -0
  348. package/src/training/distillation/index.js +29 -0
  349. package/src/training/distillation/runtime.d.ts +20 -0
  350. package/src/training/distillation/runtime.js +121 -0
  351. package/src/training/distillation/scoreboard.d.ts +6 -0
  352. package/src/training/distillation/scoreboard.js +8 -0
  353. package/src/training/distillation/stage-a.d.ts +45 -0
  354. package/src/training/distillation/stage-a.js +338 -0
  355. package/src/training/distillation/stage-b.d.ts +24 -0
  356. package/src/training/distillation/stage-b.js +20 -0
  357. package/src/training/distillation/student-fixture.d.ts +22 -0
  358. package/src/training/distillation/student-fixture.js +846 -0
  359. package/src/training/distillation/suite-data.d.ts +45 -0
  360. package/src/training/distillation/suite-data.js +189 -0
  361. package/src/training/index.d.ts +10 -0
  362. package/src/training/index.js +10 -0
  363. package/src/training/lora-pipeline.d.ts +40 -0
  364. package/src/training/lora-pipeline.js +793 -0
  365. package/src/training/lora.js +26 -12
  366. package/src/training/loss.js +5 -6
  367. package/src/training/objectives/cross_entropy.js +2 -5
  368. package/src/training/objectives/distill_kd.js +4 -8
  369. package/src/training/objectives/distill_triplet.js +4 -8
  370. package/src/training/objectives/ul_stage2_base.js +4 -8
  371. package/src/training/operator-artifacts.d.ts +62 -0
  372. package/src/training/operator-artifacts.js +140 -0
  373. package/src/training/operator-command.d.ts +5 -0
  374. package/src/training/operator-command.js +455 -0
  375. package/src/training/operator-eval.d.ts +48 -0
  376. package/src/training/operator-eval.js +230 -0
  377. package/src/training/operator-scoreboard.d.ts +5 -0
  378. package/src/training/operator-scoreboard.js +44 -0
  379. package/src/training/optimizer.js +19 -7
  380. package/src/training/runner.d.ts +52 -0
  381. package/src/training/runner.js +31 -5
  382. package/src/training/suite.d.ts +112 -0
  383. package/src/training/suite.js +24 -984
  384. package/src/training/tensor-factory.d.ts +9 -0
  385. package/src/training/tensor-factory.js +13 -0
  386. package/src/training/trainer.js +3 -5
  387. package/src/training/ul_dataset.js +3 -5
  388. package/src/training/workloads.d.ts +164 -0
  389. package/src/training/workloads.js +530 -0
  390. package/src/version.js +1 -1
  391. package/tools/convert-safetensors-node.js +22 -16
  392. package/tools/doppler-cli.js +179 -63
@@ -1,1007 +1,37 @@
1
1
  import { mergeRuntimeValues } from '../../../config/runtime-merge.js';
2
+ import { buildExecutionV0FromKernelPath } from '../../../converter/execution-v0-manifest.js';
2
3
  import {
3
- buildExecutionV0KernelProfileKey,
4
- indexExecutionV0KernelProfiles,
5
- normalizeExecutionV0Dtype,
6
- resolveExecutionV0KernelProfile,
7
- resolveExecutionV0KVIO,
8
- resolveExecutionV0Precision,
9
- } from '../../../config/execution-v0-contract-check.js';
10
- import {
11
- EXECUTION_V0_SCHEMA_ID,
4
+ DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS,
12
5
  DEFAULT_EXECUTION_V0_POLICIES,
13
6
  DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
14
- isExecutionV0Digest,
15
- isExecutionV0Semver,
16
7
  } from '../../../config/schema/execution-v0.schema.js';
17
- import { KERNEL_CONFIGS } from '../../../gpu/kernels/kernel-configs.js';
18
- import { buildKernelRefFromKernelEntry, isKernelRefBoundToKernel } from '../../../config/kernels/kernel-ref.js';
19
-
20
- const PATCH_SET_MUTABLE_FIELDS = new Set(['precision', 'kvIO', 'constants', 'entry']);
21
- const EXECUTION_V0_RUNTIME_KEYS = new Set(['session', 'executionPatch']);
22
- const PIPELINE_COMPATIBLE_OPS = new Set([
23
- 'save',
24
- 'load',
25
- 'conv',
26
- 'attention',
27
- 'rmsnorm',
28
- 'ffn',
29
- 'residual_add',
30
- 'cast',
31
- 'noop',
32
- ]);
33
-
34
- const KERNEL_OUTPUT_CAPABILITIES = (() => {
35
- const byKernelEntry = new Map();
36
- for (const variants of Object.values(KERNEL_CONFIGS)) {
37
- for (const config of Object.values(variants)) {
38
- const kernel = config?.shaderFile;
39
- const entry = config?.entryPoint ?? 'main';
40
- if (typeof kernel !== 'string' || kernel.length === 0) continue;
41
- const key = `${kernel}#${entry}`;
42
- if (!byKernelEntry.has(key)) {
43
- byKernelEntry.set(key, new Set());
44
- }
45
- const outputDtype = config?.outputDtype;
46
- if (typeof outputDtype === 'string' && outputDtype.length > 0) {
47
- byKernelEntry.get(key).add(String(outputDtype).toLowerCase());
48
- }
49
- }
50
- }
51
- return byKernelEntry;
52
- })();
53
-
54
- function getKernelOutputCapabilities(step) {
55
- const kernel = String(step?.kernel ?? '').trim();
56
- const entry = String(step?.entry ?? 'main').trim() || 'main';
57
- if (!kernel) {
58
- return null;
59
- }
60
- return KERNEL_OUTPUT_CAPABILITIES.get(`${kernel}#${entry}`) ?? null;
61
- }
62
-
63
- function cloneJson(value) {
64
- if (typeof structuredClone === 'function') {
65
- return structuredClone(value);
66
- }
67
- return JSON.parse(JSON.stringify(value));
68
- }
69
-
70
- const normalizeDtype = normalizeExecutionV0Dtype;
71
- const resolvePrecision = resolveExecutionV0Precision;
72
- const resolveKVIO = resolveExecutionV0KVIO;
73
-
74
- function normalizePhase(value, label) {
75
- const normalized = String(value ?? '').trim().toLowerCase();
76
- if (normalized !== 'prefill' && normalized !== 'decode' && normalized !== 'both') {
77
- throw new Error(`[ExecutionV0] ${label} must be prefill|decode|both; got "${value}"`);
78
- }
79
- return normalized;
80
- }
81
-
82
- function normalizeSection(value, label) {
83
- const normalized = String(value ?? '').trim();
84
- if (!['preLayer', 'layer', 'postLayer', 'sampling'].includes(normalized)) {
85
- throw new Error(`[ExecutionV0] ${label} must be preLayer|layer|postLayer|sampling; got "${value}"`);
86
- }
87
- return normalized;
88
- }
89
-
90
- function normalizeKVLayout(value, label) {
91
- if (value == null) {
92
- return null;
93
- }
94
- const normalized = String(value).trim().toLowerCase();
95
- if (!normalized) {
96
- return null;
97
- }
98
- return normalized;
99
- }
100
-
101
- function assertKernelRef(kernelRef, label) {
102
- if (!kernelRef) return;
103
- if (typeof kernelRef.id !== 'string' || kernelRef.id.trim().length === 0) {
104
- throw new Error(`[ExecutionV0] ${label}.id is required`);
105
- }
106
- if (!isExecutionV0Semver(kernelRef.version)) {
107
- throw new Error(`[ExecutionV0] ${label}.version must be semver; got "${kernelRef.version}"`);
108
- }
109
- if (!isExecutionV0Digest(kernelRef.digest)) {
110
- throw new Error(`[ExecutionV0] ${label}.digest must match sha256:<64-hex>`);
111
- }
112
- }
113
-
114
- function isPhaseMatch(phase, targetPhase) {
115
- return phase === 'both' || phase === targetPhase;
116
- }
117
-
118
- function stepHasLayer(step, layerIdx) {
119
- if (step.layers === 'all') return true;
120
- if (!Array.isArray(step.layers)) return false;
121
- return step.layers.includes(layerIdx);
122
- }
123
-
124
- const buildKernelProfileKey = buildExecutionV0KernelProfileKey;
125
-
126
- function normalizeSlot(value, label) {
127
- if (typeof value !== 'string' || value.trim().length === 0) {
128
- throw new Error(`[ExecutionV0] ${label} must be a non-empty string`);
129
- }
130
- return value.trim();
131
- }
132
-
133
- function assertKernelPrecisionCapability(step, resolvedPrecision, policies) {
134
- if (step.op === 'cast') {
135
- return;
136
- }
137
- if (policies.unsupportedPrecision !== 'error') {
138
- return;
139
- }
140
- const kernel = String(step.kernel ?? '').trim();
141
- const entry = String(step.entry ?? 'main').trim() || 'main';
142
- const supportedOutputDtypes = getKernelOutputCapabilities(step);
143
- if (!supportedOutputDtypes) {
144
- throw new Error(
145
- `[ExecutionV0] step "${step.id}" kernel "${kernel}#${entry}" ` +
146
- 'is not present in kernel registry; cannot validate precision capability.'
147
- );
148
- }
149
- if (supportedOutputDtypes.size === 0) {
150
- // Some kernels do not declare output dtype metadata yet; treat as unknown.
151
- return;
152
- }
153
- const outputDtype = normalizeDtype(resolvedPrecision.outputDtype, `${step.id}.precision.outputDtype`);
154
- if (!supportedOutputDtypes.has(outputDtype)) {
155
- throw new Error(
156
- `[ExecutionV0] step "${step.id}" outputDtype=${outputDtype} is unsupported by ` +
157
- `kernel "${kernel}#${entry}" (supported: ${[...supportedOutputDtypes].join(', ') || 'none'}).`
158
- );
159
- }
160
- }
161
-
162
- function createSourceTrace() {
163
- return {
164
- session: {},
165
- steps: {},
166
- };
167
- }
168
-
169
- function setSourceTrace(trace, path, source) {
170
- if (!trace || typeof path !== 'string' || path.length === 0) return;
171
- trace[path] = { source };
172
- }
173
-
174
- function setStepSourceTrace(trace, stepId, path, source) {
175
- if (!trace || !stepId || !path) return;
176
- if (!trace.steps[stepId]) {
177
- trace.steps[stepId] = {};
178
- }
179
- trace.steps[stepId][path] = { source };
180
- }
181
-
182
- function isPlainObject(value) {
183
- return value != null && typeof value === 'object' && !Array.isArray(value);
184
- }
185
-
186
- function collectLeafPaths(value, prefix = [], out = []) {
187
- if (Array.isArray(value)) {
188
- if (prefix.length > 0) {
189
- out.push(prefix);
190
- }
191
- return out;
192
- }
193
- if (!isPlainObject(value)) {
194
- if (prefix.length > 0) {
195
- out.push(prefix);
196
- }
197
- return out;
198
- }
199
- for (const [key, child] of Object.entries(value)) {
200
- collectLeafPaths(child, [...prefix, key], out);
201
- }
202
- return out;
203
- }
204
-
205
- function hasDefinedPath(root, pathSegments) {
206
- let current = root;
207
- for (const segment of pathSegments) {
208
- if (!isPlainObject(current) || !Object.prototype.hasOwnProperty.call(current, segment)) {
209
- return false;
210
- }
211
- current = current[segment];
212
- }
213
- return current !== undefined;
214
- }
215
-
216
- const indexKernelProfiles = indexExecutionV0KernelProfiles;
217
-
218
- function resolveProfile(profileIndex, step) {
219
- return resolveExecutionV0KernelProfile(profileIndex, step);
220
- }
221
-
222
- function validateStepShape(step, index) {
223
- if (!step || typeof step !== 'object') {
224
- throw new Error(`[ExecutionV0] execution.steps[${index}] must be an object`);
225
- }
226
- if (typeof step.id !== 'string' || step.id.trim().length === 0) {
227
- throw new Error(`[ExecutionV0] execution.steps[${index}].id is required`);
228
- }
229
- if (typeof step.op !== 'string' || step.op.trim().length === 0) {
230
- throw new Error(`[ExecutionV0] execution.steps[${index}].op is required`);
231
- }
232
- normalizePhase(step.phase, `execution.steps[${index}].phase`);
233
- normalizeSection(step.section, `execution.steps[${index}].section`);
234
- normalizeSlot(step.src, `execution.steps[${index}].src`);
235
- normalizeSlot(step.dst, `execution.steps[${index}].dst`);
236
- if (step.layers !== 'all' && !Array.isArray(step.layers)) {
237
- throw new Error(`[ExecutionV0] execution.steps[${index}].layers must be "all" or number[]`);
238
- }
239
- if (step.layers !== 'all') {
240
- for (const layer of step.layers) {
241
- if (!Number.isInteger(layer) || layer < 0) {
242
- throw new Error(`[ExecutionV0] execution.steps[${index}].layers must contain non-negative integers`);
243
- }
244
- }
245
- }
246
- if (step.op === 'cast') {
247
- normalizeDtype(step.toDtype, `execution.steps[${index}].toDtype`);
248
- if (step.fromDtype != null) {
249
- normalizeDtype(step.fromDtype, `execution.steps[${index}].fromDtype`);
250
- }
251
- } else {
252
- if (typeof step.kernel !== 'string' || step.kernel.trim().length === 0) {
253
- throw new Error(
254
- `[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernel (non-cast op)`
255
- );
256
- }
257
- if (!step.kernelRef || typeof step.kernelRef !== 'object' || Array.isArray(step.kernelRef)) {
258
- throw new Error(
259
- `[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernelRef {id, version, digest} (non-cast op)`
260
- );
261
- }
262
- assertKernelRef(step.kernelRef, `execution.steps[${index}].kernelRef`);
263
- const entry = String(step.entry ?? 'main').trim() || 'main';
264
- let expectedKernelRef;
265
- try {
266
- expectedKernelRef = buildKernelRefFromKernelEntry(step.kernel, entry);
267
- } catch (error) {
268
- const message = error instanceof Error ? error.message : String(error);
269
- throw new Error(
270
- `[ExecutionV0] execution.steps[${index}] "${step.id}" kernel "${step.kernel}#${entry}" ` +
271
- `cannot be content-pinned: ${message}`
272
- );
273
- }
274
- if (!isKernelRefBoundToKernel(step.kernelRef, step.kernel, entry)) {
275
- throw new Error(
276
- `[ExecutionV0] execution.steps[${index}] "${step.id}" kernelRef does not match kernel binding ` +
277
- `("${step.kernel}#${entry}"). Expected ${expectedKernelRef.id}@${expectedKernelRef.version} ${expectedKernelRef.digest}.`
278
- );
279
- }
280
- }
281
- }
282
-
283
- function assertExecutionRuntimeOverlay(runtimeInference) {
284
- if (!runtimeInference || typeof runtimeInference !== 'object') {
285
- return;
286
- }
287
- const unknownKeys = Object.keys(runtimeInference).filter((key) => !EXECUTION_V0_RUNTIME_KEYS.has(key));
288
- if (unknownKeys.length > 0) {
289
- throw new Error(
290
- `[ExecutionV0] runtime.inference overlay supports only ${[...EXECUTION_V0_RUNTIME_KEYS].join(', ')}; ` +
291
- `got unsupported keys: ${unknownKeys.join(', ')}.`
292
- );
293
- }
294
- }
295
-
296
- function validateUniqueStepIds(steps) {
297
- const ids = new Set();
298
- for (const step of steps) {
299
- if (ids.has(step.id)) {
300
- throw new Error(`[ExecutionV0] duplicate step id "${step.id}"`);
301
- }
302
- ids.add(step.id);
303
- }
304
- }
305
-
306
- function assertExecutionV0Schema(manifestInference) {
307
- if (!hasExecutionV0(manifestInference)) return;
308
- const discriminator = manifestInference?.schema ?? null;
309
- if (discriminator !== EXECUTION_V0_SCHEMA_ID) {
310
- throw new Error(
311
- `[ExecutionV0] manifest.inference.schema must be "${EXECUTION_V0_SCHEMA_ID}" ` +
312
- `when execution is present; got "${discriminator}".`
313
- );
314
- }
315
- }
316
-
317
- function applyExecutionPatchAtomic(baseSteps, patch) {
318
- if (!patch) {
319
- return baseSteps;
320
- }
321
- const steps = cloneJson(baseSteps);
322
- const byId = new Map(steps.map((step, index) => [step.id, index]));
323
-
324
- for (const entry of patch.set ?? []) {
325
- if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
326
- throw new Error('[ExecutionV0] executionPatch.set entries require id');
327
- }
328
- if (!byId.has(entry.id)) {
329
- throw new Error(`[ExecutionV0] executionPatch.set target "${entry.id}" does not exist`);
330
- }
331
- for (const key of Object.keys(entry)) {
332
- if (key === 'id') continue;
333
- if (!PATCH_SET_MUTABLE_FIELDS.has(key)) {
334
- throw new Error(`[ExecutionV0] executionPatch.set "${entry.id}" cannot mutate "${key}"`);
335
- }
336
- }
337
- }
338
-
339
- for (const entry of patch.remove ?? []) {
340
- if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
341
- throw new Error('[ExecutionV0] executionPatch.remove entries require id');
342
- }
343
- if (!byId.has(entry.id)) {
344
- throw new Error(`[ExecutionV0] executionPatch.remove target "${entry.id}" does not exist`);
345
- }
346
- }
347
-
348
- for (const entry of patch.set ?? []) {
349
- const index = byId.get(entry.id);
350
- const target = steps[index];
351
- if (entry.precision !== undefined) target.precision = cloneJson(entry.precision);
352
- if (entry.kvIO !== undefined) target.kvIO = cloneJson(entry.kvIO);
353
- if (entry.constants !== undefined) target.constants = cloneJson(entry.constants);
354
- if (entry.entry !== undefined) target.entry = entry.entry;
355
- }
356
-
357
- const removeIds = new Set((patch.remove ?? []).map((entry) => entry.id));
358
- const removedSteps = steps.filter((step) => !removeIds.has(step.id));
359
-
360
- let current = removedSteps;
361
- const insertedAfterAnchors = new Map();
362
- for (const entry of patch.add ?? []) {
363
- if (!entry?.step || typeof entry.step !== 'object') {
364
- throw new Error('[ExecutionV0] executionPatch.add requires a step payload');
365
- }
366
- const hasBefore = typeof entry.insertBefore === 'string' && entry.insertBefore.length > 0;
367
- const hasAfter = typeof entry.insertAfter === 'string' && entry.insertAfter.length > 0;
368
- if (hasBefore === hasAfter) {
369
- throw new Error('[ExecutionV0] executionPatch.add requires exactly one of insertBefore or insertAfter');
370
- }
371
- if (current.some((step) => step.id === entry.step.id)) {
372
- throw new Error(`[ExecutionV0] executionPatch.add step id "${entry.step.id}" already exists`);
373
- }
374
- const anchorId = hasBefore ? entry.insertBefore : entry.insertAfter;
375
- const anchorIndex = current.findIndex((step) => step.id === anchorId);
376
- if (anchorIndex < 0) {
377
- throw new Error(`[ExecutionV0] executionPatch.add anchor "${anchorId}" not found`);
378
- }
379
- let insertIndex = hasBefore ? anchorIndex : anchorIndex + 1;
380
- if (!hasBefore) {
381
- const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
382
- while (insertIndex < current.length && insertedIds.includes(current[insertIndex].id)) {
383
- insertIndex += 1;
384
- }
385
- }
386
- current = [
387
- ...current.slice(0, insertIndex),
388
- cloneJson(entry.step),
389
- ...current.slice(insertIndex),
390
- ];
391
- if (!hasBefore) {
392
- const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
393
- insertedIds.push(entry.step.id);
394
- insertedAfterAnchors.set(anchorId, insertedIds);
395
- }
396
- }
397
-
398
- validateUniqueStepIds(current);
399
- return current;
400
- }
401
-
402
- function indexRuntimePatchMeta(patch) {
403
- const meta = {
404
- addedSteps: new Set(),
405
- precisionFieldsByStep: new Map(),
406
- kvIOFieldsByStep: new Set(),
407
- };
408
- if (!patch || typeof patch !== 'object') {
409
- return meta;
410
- }
411
-
412
- for (const add of patch.add ?? []) {
413
- const stepId = add?.step?.id;
414
- if (typeof stepId === 'string' && stepId.length > 0) {
415
- meta.addedSteps.add(stepId);
416
- }
417
- }
418
-
419
- for (const set of patch.set ?? []) {
420
- const stepId = set?.id;
421
- if (typeof stepId !== 'string' || stepId.length === 0) continue;
422
- if (set.precision && typeof set.precision === 'object') {
423
- meta.precisionFieldsByStep.set(stepId, new Set(Object.keys(set.precision)));
424
- }
425
- if (set.kvIO && typeof set.kvIO === 'object') {
426
- meta.kvIOFieldsByStep.add(stepId);
427
- }
428
- }
429
- return meta;
430
- }
431
-
432
- function createInitialSlotDtypes(sessionDefaults) {
433
- const activationDefault = normalizeDtype(
434
- sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
435
- 'sessionDefaults.compute.defaults.activationDtype'
436
- );
437
- return new Map([['state', activationDefault]]);
438
- }
439
-
440
- function ensureCompatibleKV(step, kvIO, sessionDefaults) {
441
- if (step.op !== 'attention' || !kvIO) {
442
- return;
443
- }
444
- const runtimeKvDtypeRaw = sessionDefaults?.kvcache?.kvDtype;
445
- if (runtimeKvDtypeRaw == null) {
446
- return;
447
- }
448
- const runtimeKvDtype = normalizeDtype(runtimeKvDtypeRaw, 'sessionDefaults.kvcache.kvDtype');
449
- if (kvIO.readDtype !== runtimeKvDtype || kvIO.writeDtype !== runtimeKvDtype) {
450
- throw new Error(
451
- `[ExecutionV0] step "${step.id}" kvIO read/write (${kvIO.readDtype}/${kvIO.writeDtype}) ` +
452
- `must match sessionDefaults.kvcache.kvDtype (${runtimeKvDtype}).`
453
- );
454
- }
455
- }
456
-
457
- function resolvePhaseSteps(phase, steps, sessionDefaults, profileIndex, policies, options = {}) {
458
- const slotDtypes = options.initialSlotDtypes
459
- ? new Map(options.initialSlotDtypes)
460
- : createInitialSlotDtypes(sessionDefaults);
461
- const resolved = [];
462
- const sourceTrace = options.sourceTrace ?? null;
463
- const sessionDefaultSources = options.sessionDefaultSources ?? {};
464
- const runtimePatchMeta = options.runtimePatchMeta ?? {
465
- addedSteps: new Set(),
466
- precisionFieldsByStep: new Map(),
467
- kvIOFieldsByStep: new Set(),
468
- };
469
-
470
- for (const step of steps) {
471
- const stepPhase = normalizePhase(step.phase, `${step.id}.phase`);
472
- if (!isPhaseMatch(stepPhase, phase)) continue;
473
- const profile = resolveProfile(profileIndex, step);
474
- if (
475
- step.kernelRef
476
- && !profile
477
- && policies.unresolvedKernel === 'error'
478
- ) {
479
- throw new Error(
480
- `[ExecutionV0] step "${step.id}" references kernel profile ` +
481
- `${step.kernelRef.id}@${step.kernelRef.version} (${step.kernelRef.digest}) ` +
482
- 'but no matching sessionDefaults.compute.kernelProfiles entry was found.'
483
- );
484
- }
485
- const { precision, sources: precisionSources } = resolvePrecision(step, profile, sessionDefaults);
486
- const src = normalizeSlot(step.src, `${step.id}.src`);
487
- const dst = normalizeSlot(step.dst, `${step.id}.dst`);
488
- if (!slotDtypes.has(src)) {
489
- throw new Error(
490
- `[ExecutionV0] step "${step.id}" reads slot "${src}" before it is produced. ` +
491
- 'Add an explicit producer step or cast/load bridge.'
492
- );
493
- }
494
- const derivedInput = slotDtypes.get(src);
495
- const inputDtype = normalizeDtype(precision.inputDtype ?? derivedInput, `${step.id}.precision.inputDtype`);
496
-
497
- if (
498
- policies.dtypeTransition === 'require_cast_step'
499
- && step.op !== 'cast'
500
- && inputDtype !== derivedInput
501
- ) {
502
- throw new Error(
503
- `[ExecutionV0] step "${step.id}" requires inputDtype=${inputDtype} ` +
504
- `but slot "${src}" currently holds ${derivedInput}. Insert explicit cast step.`
505
- );
506
- }
507
-
508
- let outputDtype = normalizeDtype(precision.outputDtype, `${step.id}.precision.outputDtype`);
509
- let outputDtypeSource = precisionSources.outputDtype;
510
- if (step.op !== 'cast' && outputDtypeSource === 'sessionDefault') {
511
- const declaredOutputDtypes = getKernelOutputCapabilities(step);
512
- if (declaredOutputDtypes && declaredOutputDtypes.size === 1) {
513
- outputDtype = [...declaredOutputDtypes][0];
514
- outputDtypeSource = 'derived';
515
- }
516
- }
517
- if (step.op === 'cast') {
518
- outputDtype = normalizeDtype(step.toDtype, `${step.id}.toDtype`);
519
- outputDtypeSource = 'manifest';
520
- const fromDtype = step.fromDtype
521
- ? normalizeDtype(step.fromDtype, `${step.id}.fromDtype`)
522
- : derivedInput;
523
- if (fromDtype !== derivedInput) {
524
- throw new Error(
525
- `[ExecutionV0] cast step "${step.id}" fromDtype=${fromDtype} does not match slot "${src}" dtype=${derivedInput}`
526
- );
527
- }
528
- }
529
-
530
- const resolvedPrecision = {
531
- inputDtype,
532
- mathDtype: normalizeDtype(precision.mathDtype, `${step.id}.precision.mathDtype`),
533
- accumDtype: normalizeDtype(precision.accumDtype, `${step.id}.precision.accumDtype`),
534
- outputDtype,
535
- };
536
- assertKernelPrecisionCapability(step, resolvedPrecision, policies);
537
- slotDtypes.set(dst, outputDtype);
538
-
539
- const kvIOResolved = step.op === 'attention'
540
- ? resolveKVIO(step, profile, sessionDefaults)
541
- : null;
542
- const kvIO = kvIOResolved?.value ?? null;
543
- ensureCompatibleKV(step, kvIO, sessionDefaults);
544
-
545
- if (sourceTrace) {
546
- const precisionFieldsPatched = runtimePatchMeta.precisionFieldsByStep.get(step.id) ?? new Set();
547
- const isAddedStep = runtimePatchMeta.addedSteps.has(step.id);
548
- const inputPatched = isAddedStep
549
- ? step.precision?.inputDtype != null
550
- : precisionFieldsPatched.has('inputDtype');
551
- const mathPatched = isAddedStep
552
- ? step.precision?.mathDtype != null
553
- : precisionFieldsPatched.has('mathDtype');
554
- const accumPatched = isAddedStep
555
- ? step.precision?.accumDtype != null
556
- : precisionFieldsPatched.has('accumDtype');
557
- const outputPatched = isAddedStep
558
- ? step.precision?.outputDtype != null
559
- : precisionFieldsPatched.has('outputDtype');
560
- const mathSource = precisionSources.mathDtype === 'sessionDefault'
561
- ? sessionDefaultSources.mathDtype ?? 'derived'
562
- : precisionSources.mathDtype;
563
- const accumSource = precisionSources.accumDtype === 'sessionDefault'
564
- ? sessionDefaultSources.accumDtype ?? 'derived'
565
- : precisionSources.accumDtype;
566
- const outputSource = precisionSources.outputDtype === 'sessionDefault'
567
- ? outputDtypeSource === 'sessionDefault'
568
- ? (sessionDefaultSources.outputDtype ?? 'derived')
569
- : outputDtypeSource
570
- : outputDtypeSource;
571
- setStepSourceTrace(sourceTrace, step.id, 'precision.inputDtype',
572
- inputPatched
573
- ? 'runtime.patch'
574
- : precision.inputDtype != null
575
- ? precisionSources.inputDtype
576
- : 'derived');
577
- setStepSourceTrace(sourceTrace, step.id, 'precision.mathDtype', mathPatched ? 'runtime.patch' : mathSource);
578
- setStepSourceTrace(sourceTrace, step.id, 'precision.accumDtype', accumPatched ? 'runtime.patch' : accumSource);
579
- setStepSourceTrace(sourceTrace, step.id, 'precision.outputDtype', outputPatched ? 'runtime.patch' : outputSource);
580
- if (step.op === 'attention') {
581
- const kvPatched = runtimePatchMeta.kvIOFieldsByStep.has(step.id)
582
- || (isAddedStep && !!step.kvIO);
583
- const kvSource = kvIOResolved?.source === 'sessionDefault'
584
- ? sessionDefaultSources.kvDtype ?? 'derived'
585
- : kvIOResolved?.source ?? 'derived';
586
- const resolvedKvSource = kvPatched ? 'runtime.patch' : kvSource;
587
- setStepSourceTrace(sourceTrace, step.id, 'kvIO.readDtype', resolvedKvSource);
588
- setStepSourceTrace(sourceTrace, step.id, 'kvIO.writeDtype', resolvedKvSource);
589
- }
590
- }
591
-
592
- resolved.push({
593
- ...step,
594
- src,
595
- dst,
596
- phase: stepPhase,
597
- section: normalizeSection(step.section, `${step.id}.section`),
598
- precision: resolvedPrecision,
599
- kvIO,
600
- });
601
- }
602
-
603
- return {
604
- steps: resolved,
605
- finalSlotDtypes: slotDtypes,
606
- };
607
- }
608
-
609
- function stripPresetComputeDefaults(compute, manifestComputeDefaults) {
610
- if (!compute?.defaults || !manifestComputeDefaults) {
611
- return compute;
612
- }
613
- const dtypeKeys = ['activationDtype', 'mathDtype', 'accumDtype', 'outputDtype'];
614
- const hasManifestDtype = dtypeKeys.some(
615
- (key) => manifestComputeDefaults[key] !== undefined && manifestComputeDefaults[key] !== null
616
- );
617
- if (!hasManifestDtype) {
618
- return compute;
619
- }
620
- const nextDefaults = { ...compute.defaults };
621
- for (const key of dtypeKeys) {
622
- if (manifestComputeDefaults[key] !== undefined && manifestComputeDefaults[key] !== null) {
623
- delete nextDefaults[key];
624
- }
625
- }
626
- if (Object.keys(nextDefaults).length === 0) {
627
- const nextCompute = { ...compute };
628
- delete nextCompute.defaults;
629
- return Object.keys(nextCompute).length === 0 ? null : nextCompute;
630
- }
631
- return { ...compute, defaults: nextDefaults };
632
- }
633
-
634
- function normalizeRuntimeSessionForExecutionV0(runtimeSession, manifestInference) {
635
- const manifestSessionDefaults = manifestInference?.sessionDefaults ?? null;
636
- const manifestProfiles = manifestSessionDefaults?.compute?.kernelProfiles;
637
- const hasManifestProfiles = Array.isArray(manifestProfiles) && manifestProfiles.length > 0;
638
- const manifestComputeDefaults = manifestSessionDefaults?.compute?.defaults ?? null;
639
- const hasManifestKVCache = manifestSessionDefaults?.kvcache != null;
640
- const hasManifestDecodeLoop = manifestSessionDefaults?.decodeLoop != null;
641
-
642
- if (!runtimeSession || typeof runtimeSession !== 'object') {
643
- return runtimeSession;
644
- }
645
-
646
- let compute = runtimeSession.compute ?? null;
647
- let kvcache = Object.prototype.hasOwnProperty.call(runtimeSession, 'kvcache')
648
- ? runtimeSession.kvcache
649
- : undefined;
650
- let decodeLoop = Object.prototype.hasOwnProperty.call(runtimeSession, 'decodeLoop')
651
- ? runtimeSession.decodeLoop
652
- : undefined;
653
- let changed = false;
654
-
655
- // Strip preset compute dtype defaults when manifest provides model-specific values.
656
- // default.json sets session.compute.defaults.activationDtype="f16" as a preset default.
657
- // When the manifest declares its own compute dtypes (e.g. activationDtype="f32" for f32
658
- // variants), the manifest must win. Only explicit user overrides (via --runtime-config-json
659
- // or CLI flags) should take precedence, not baked-in preset values.
660
- if (manifestComputeDefaults) {
661
- const stripped = stripPresetComputeDefaults(compute, manifestComputeDefaults);
662
- if (stripped !== compute) {
663
- compute = stripped;
664
- changed = true;
665
- }
666
- }
667
-
668
- // Strip empty kernelProfiles when manifest provides them.
669
- if (compute && Object.prototype.hasOwnProperty.call(compute, 'kernelProfiles')) {
670
- const kernelProfiles = compute.kernelProfiles;
671
- if (Array.isArray(kernelProfiles) && kernelProfiles.length === 0 && hasManifestProfiles) {
672
- const nextCompute = { ...compute };
673
- delete nextCompute.kernelProfiles;
674
- compute = Object.keys(nextCompute).length === 0 ? null : nextCompute;
675
- changed = true;
676
- }
677
- }
678
-
679
- // Strip preset nulls so manifest session defaults can win.
680
- if (kvcache === null && hasManifestKVCache) {
681
- kvcache = undefined;
682
- changed = true;
683
- }
684
-
685
- if (decodeLoop === null && hasManifestDecodeLoop) {
686
- decodeLoop = undefined;
687
- changed = true;
688
- }
689
-
690
- if (!changed) {
691
- return runtimeSession;
692
- }
693
-
694
- const nextRuntimeSession = { ...runtimeSession };
695
- if (!compute) {
696
- delete nextRuntimeSession.compute;
697
- } else {
698
- nextRuntimeSession.compute = compute;
699
- }
700
- if (kvcache === undefined) {
701
- delete nextRuntimeSession.kvcache;
702
- } else {
703
- nextRuntimeSession.kvcache = kvcache;
704
- }
705
- if (decodeLoop === undefined) {
706
- delete nextRuntimeSession.decodeLoop;
707
- } else {
708
- nextRuntimeSession.decodeLoop = decodeLoop;
709
- }
710
-
711
- return Object.keys(nextRuntimeSession).length === 0 ? {} : nextRuntimeSession;
712
- }
713
-
714
- function validatePhaseBoundaryCompatibility(options) {
715
- const {
716
- steps,
717
- prefillFinalSlotDtypes,
718
- decodeInitialSlotDtypes,
719
- sessionDefaults,
720
- profileIndex,
721
- policies,
722
- } = options;
723
- const decodeSteps = steps.filter((step) => isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'decode'));
724
- if (decodeSteps.length === 0) {
725
- return;
726
- }
727
- const writtenSlots = new Set();
728
- for (const step of decodeSteps) {
729
- const src = normalizeSlot(step.src, `${step.id}.src`);
730
- const dst = normalizeSlot(step.dst, `${step.id}.dst`);
731
- const readsCarriedSlot = !writtenSlots.has(src) && prefillFinalSlotDtypes.has(src);
732
- if (readsCarriedSlot && step.op !== 'cast') {
733
- const profile = resolveProfile(profileIndex, step);
734
- const { precision } = resolvePrecision(step, profile, sessionDefaults);
735
- const carriedDtype = prefillFinalSlotDtypes.get(src);
736
- const decodeInput = normalizeDtype(
737
- precision.inputDtype
738
- ?? carriedDtype
739
- ?? decodeInitialSlotDtypes.get(src),
740
- `${step.id}.precision.inputDtype`
741
- );
742
- if (decodeInput !== carriedDtype) {
743
- throw new Error(
744
- `[ExecutionV0] decode step "${step.id}" reads carried slot "${src}" as ${decodeInput} ` +
745
- `but prefill left ${carriedDtype}. Add explicit cast at phase boundary.`
746
- );
747
- }
748
- }
749
- writtenSlots.add(dst);
750
- }
751
- }
752
-
753
- function assertKVLayoutExecutionCompatibility(steps, sessionDefaults) {
754
- const kvLayout = normalizeKVLayout(sessionDefaults?.kvcache?.layout, 'sessionDefaults.kvcache.layout');
755
- if (kvLayout !== 'bdpa') {
756
- return;
757
- }
758
- const incompatibleStep = steps.find((step) => (
759
- step?.op === 'attention'
760
- && isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'prefill')
761
- ));
762
- if (!incompatibleStep) {
763
- return;
764
- }
765
- throw new Error(
766
- `[ExecutionV0] sessionDefaults.kvcache.layout="bdpa" is decode-only, ` +
767
- `but step "${incompatibleStep.id}" declares prefill attention. ` +
768
- 'Use a non-BDPA KV layout for prefill-capable models or remove prefill attention from the execution contract.'
769
- );
770
- }
771
-
772
- function toKernelPathStep(step) {
773
- if (step.op === 'cast') return null;
774
- if (!step.kernel) return null;
775
- return {
776
- op: step.op,
777
- kernel: step.kernel,
778
- entry: step.entry ?? 'main',
779
- ...(step.weights ? { weights: step.weights } : {}),
780
- ...(step.constants ? { constants: step.constants } : {}),
781
- };
782
- }
783
-
784
- function getSectionSteps(steps, section, phase = null) {
785
- return steps
786
- .filter((step) => step.section === section)
787
- .filter((step) => (phase ? isPhaseMatch(step.phase, phase) : true))
788
- .map(toKernelPathStep)
789
- .filter((step) => step != null);
790
- }
791
-
792
- function buildLayerPhaseSteps(steps, phase, layerIdx) {
793
- return steps
794
- .filter((step) => step.section === 'layer' && isPhaseMatch(step.phase, phase))
795
- .filter((step) => stepHasLayer(step, layerIdx))
796
- .map(toKernelPathStep)
797
- .filter((step) => step != null);
798
- }
799
-
800
- function getInlineKernelPathSteps(path) {
801
- return [
802
- ...(path?.preLayer ?? []),
803
- ...(path?.decode?.steps ?? []),
804
- ...(path?.prefill?.steps ?? []),
805
- ...(path?.postLayer ?? []),
806
- ...(path?.sampling ?? []),
807
- ...(path?.layerOverrides?.flatMap((override) => override.steps ?? []) ?? []),
808
- ];
809
- }
810
-
811
- function assertInlineKernelPathSessionCompatibility(path, sessionDefaults) {
812
- if (!path) {
813
- return;
814
- }
815
- const activationDtype = normalizeDtype(
816
- path.activationDtype ?? sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
817
- 'inlineKernelPath.activationDtype'
818
- );
819
- const kvDtype = normalizeDtype(
820
- path.kvDtype ?? sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
821
- 'inlineKernelPath.kvDtype'
822
- );
823
-
824
- for (const step of getInlineKernelPathSteps(path)) {
825
- const kernel = String(step?.kernel ?? '').trim();
826
- if (!kernel.startsWith('attention')) {
827
- continue;
828
- }
829
- if (kernel.includes('_f16kv')) {
830
- if (activationDtype !== 'f32' || kvDtype !== 'f16') {
831
- throw new Error(
832
- `[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
833
- `activationDtype="f32" and kvcache.kvDtype="f16", but resolved ` +
834
- `activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
835
- );
836
- }
837
- continue;
838
- }
839
- if (kernel.includes('_f16')) {
840
- if (activationDtype !== 'f16' || kvDtype !== 'f16') {
841
- throw new Error(
842
- `[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
843
- `activationDtype="f16" and kvcache.kvDtype="f16", but resolved ` +
844
- `activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
845
- );
846
- }
847
- continue;
848
- }
849
- if (activationDtype !== 'f32' || kvDtype !== 'f32') {
850
- throw new Error(
851
- `[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
852
- `activationDtype="f32" and kvcache.kvDtype="f32", but resolved ` +
853
- `activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
854
- );
855
- }
856
- }
857
- }
858
-
859
- function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers) {
860
- const activationDtype = normalizeDtype(
861
- sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
862
- 'sessionDefaults.compute.defaults.activationDtype'
863
- );
864
- const kvDtype = normalizeDtype(
865
- sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
866
- 'sessionDefaults.kvcache.kvDtype'
867
- );
868
- const decodeSteps = buildLayerPhaseSteps(steps, 'decode', 0);
869
- const prefillSteps = buildLayerPhaseSteps(steps, 'prefill', 0);
870
- if (decodeSteps.length === 0 && prefillSteps.length === 0) {
871
- return null;
872
- }
873
-
874
- const path = {
875
- id: `${modelId || 'model'}-execution-v0`,
876
- name: 'Execution v0 inline kernel path',
877
- description: 'Generated from manifest.inference.execution.steps',
878
- activationDtype,
879
- kvDtype,
880
- decode: {
881
- steps: decodeSteps.length > 0 ? decodeSteps : prefillSteps,
882
- },
883
- prefill: {
884
- steps: prefillSteps.length > 0 ? prefillSteps : decodeSteps,
885
- },
886
- };
887
-
888
- if (numLayers > 0) {
889
- const overrides = [];
890
- for (let layerIdx = 0; layerIdx < numLayers; layerIdx++) {
891
- const decodeLayerSteps = buildLayerPhaseSteps(steps, 'decode', layerIdx);
892
- const prefillLayerSteps = buildLayerPhaseSteps(steps, 'prefill', layerIdx);
893
- const hasCustomDecode = JSON.stringify(decodeLayerSteps) !== JSON.stringify(path.decode.steps);
894
- const hasCustomPrefill = JSON.stringify(prefillLayerSteps) !== JSON.stringify(path.prefill.steps);
895
- if (!hasCustomDecode && !hasCustomPrefill) continue;
896
- // Kernel path layerOverrides are single-step lists per layer.
897
- const mergedLayerSteps = decodeLayerSteps.length > 0
898
- ? decodeLayerSteps
899
- : prefillLayerSteps;
900
- if (mergedLayerSteps.length > 0) {
901
- overrides.push({
902
- layers: [layerIdx],
903
- steps: mergedLayerSteps,
904
- });
905
- }
906
- }
907
- if (overrides.length > 0) {
908
- path.layerOverrides = overrides;
909
- }
910
- }
911
-
912
- const preLayer = getSectionSteps(steps, 'preLayer');
913
- if (preLayer.length > 0) {
914
- path.preLayer = preLayer;
915
- }
916
- const postLayer = getSectionSteps(steps, 'postLayer');
917
- if (postLayer.length > 0) {
918
- path.postLayer = postLayer;
919
- }
920
- const sampling = getSectionSteps(steps, 'sampling', 'decode');
921
- if (sampling.length > 0) {
922
- path.sampling = sampling;
923
- }
924
-
925
- assertInlineKernelPathSessionCompatibility(path, sessionDefaults);
926
- return path;
927
- }
928
-
929
- function buildLayerPipelineFromExecution(steps) {
930
- const layerSectionSteps = steps.filter((step) => step.section === 'layer');
931
- if (layerSectionSteps.length === 0) {
932
- return null;
933
- }
934
- if (layerSectionSteps.some((step) => !PIPELINE_COMPATIBLE_OPS.has(step.op))) {
935
- return null;
936
- }
937
-
938
- const layerSteps = layerSectionSteps
939
- .map((step) => ({
940
- op: step.op,
941
- phase: step.phase,
942
- src: step.src ?? 'state',
943
- dst: step.dst ?? 'state',
944
- ...(step.residual !== undefined ? { residual: step.residual } : {}),
945
- ...(step.a !== undefined ? { a: step.a } : {}),
946
- ...(step.b !== undefined ? { b: step.b } : {}),
947
- ...(step.variant !== undefined ? { variant: step.variant } : {}),
948
- ...(step.skipInputNorm !== undefined ? { skipInputNorm: step.skipInputNorm } : {}),
949
- ...(step.precision?.inputDtype ? { inputDtype: step.precision.inputDtype } : {}),
950
- ...(step.precision?.outputDtype ? { outputDtype: step.precision.outputDtype } : {}),
951
- ...(step.fromDtype ? { fromDtype: step.fromDtype } : {}),
952
- ...(step.toDtype ? { toDtype: step.toDtype } : {}),
953
- ...(step.probeStage ? { probeStage: step.probeStage } : {}),
954
- ...(step.name ? { name: step.name } : {}),
955
- ...(step.weight ? { weight: step.weight } : {}),
956
- }));
957
-
958
- return {
959
- steps: layerSteps,
960
- overrides: [],
961
- };
962
- }
963
-
964
- function buildSessionRuntimePatch(sessionDefaults) {
965
- const patch = {};
966
- const computeDefaults = sessionDefaults?.compute?.defaults ?? null;
967
- const computePatch = {};
968
- const activationDtype = computeDefaults?.activationDtype;
969
- if (activationDtype) {
970
- computePatch.activationDtype = activationDtype;
971
- }
972
- if (computeDefaults && (computeDefaults.mathDtype || computeDefaults.accumDtype || computeDefaults.outputDtype)) {
973
- computePatch.defaults = {
974
- ...(computeDefaults.mathDtype ? { mathDtype: computeDefaults.mathDtype } : {}),
975
- ...(computeDefaults.accumDtype ? { accumDtype: computeDefaults.accumDtype } : {}),
976
- ...(computeDefaults.outputDtype ? { outputDtype: computeDefaults.outputDtype } : {}),
977
- };
978
- }
979
- if (Object.keys(computePatch).length > 0) {
980
- patch.compute = computePatch;
981
- }
982
- if (sessionDefaults?.kvcache) {
983
- patch.kvcache = sessionDefaults.kvcache;
984
- }
985
- if (sessionDefaults?.decodeLoop) {
986
- patch.batching = {
987
- batchSize: sessionDefaults.decodeLoop.batchSize,
988
- stopCheckMode: sessionDefaults.decodeLoop.stopCheckMode,
989
- readbackInterval: sessionDefaults.decodeLoop.readbackInterval,
990
- ringTokens: sessionDefaults.decodeLoop.ringTokens,
991
- ringStop: sessionDefaults.decodeLoop.ringStop,
992
- ringStaging: sessionDefaults.decodeLoop.ringStaging,
993
- };
994
- }
995
- return patch;
996
- }
997
-
998
- function buildModelRuntimeOverrides(manifestInference) {
999
- const model = manifestInference?.model;
1000
- if (!model || typeof model !== 'object') {
1001
- return null;
1002
- }
1003
- return cloneJson(model);
1004
- }
8
+ import {
9
+ applyExecutionPatchAtomic,
10
+ assertExecutionRuntimeOverlay,
11
+ assertExecutionV0Schema,
12
+ assertKVLayoutExecutionCompatibility,
13
+ collectLeafPaths,
14
+ createInitialSlotDtypes,
15
+ createSourceTrace,
16
+ hasDefinedPath,
17
+ indexKernelProfiles,
18
+ indexRuntimePatchMeta,
19
+ normalizeRuntimeSessionForExecutionV0,
20
+ resolvePhaseSteps,
21
+ setSourceTrace,
22
+ validateManifestSessionDefaultsContract,
23
+ validatePhaseBoundaryCompatibility,
24
+ validateStepShape,
25
+ validateUniqueStepIds,
26
+ cloneJson,
27
+ } from './execution-v0-contract-helpers.js';
28
+ import {
29
+ buildInlineKernelPath,
30
+ buildLayerPipelineFromExecution,
31
+ buildModelRuntimeOverrides,
32
+ buildSessionRuntimePatch,
33
+ resolveFinitenessFallbackKernelPathId,
34
+ } from './execution-v0-runtime-builders.js';
1005
35
 
1006
36
  export function hasExecutionV0(manifestInference) {
1007
37
  return !!manifestInference?.execution && Array.isArray(manifestInference.execution.steps);
@@ -1013,6 +43,7 @@ export function compileExecutionV0(options = {}) {
1013
43
  return null;
1014
44
  }
1015
45
  assertExecutionV0Schema(manifestInference);
46
+ validateManifestSessionDefaultsContract(manifestInference);
1016
47
 
1017
48
  const modelId = options.modelId ?? 'model';
1018
49
  const numLayers = Number.isInteger(options.numLayers) ? options.numLayers : 0;
@@ -1024,7 +55,8 @@ export function compileExecutionV0(options = {}) {
1024
55
  };
1025
56
  const normalizedRuntimeSession = normalizeRuntimeSessionForExecutionV0(
1026
57
  runtimeInference.session ?? {},
1027
- manifestInference
58
+ manifestInference,
59
+ DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS
1028
60
  );
1029
61
  const sessionDefaults = mergeRuntimeValues(
1030
62
  DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
@@ -1107,7 +139,19 @@ export function compileExecutionV0(options = {}) {
1107
139
  ...resolvedDecodeSteps.filter((step) => step.phase === 'decode'),
1108
140
  ];
1109
141
 
1110
- const kernelPath = buildInlineKernelPath(patchedSteps, resolvedSession, modelId, numLayers);
142
+ const defaultKernelPathId = typeof manifestInference.defaultKernelPath === 'string'
143
+ && manifestInference.defaultKernelPath.trim().length > 0
144
+ ? manifestInference.defaultKernelPath.trim()
145
+ : null;
146
+ const finitenessFallbackKernelPathId = resolveFinitenessFallbackKernelPathId(defaultKernelPathId);
147
+
148
+ const kernelPath = buildInlineKernelPath(
149
+ patchedSteps,
150
+ resolvedSession,
151
+ modelId,
152
+ numLayers,
153
+ finitenessFallbackKernelPathId
154
+ );
1111
155
  const layerPipeline = buildLayerPipelineFromExecution(resolvedSteps);
1112
156
  const sessionPatch = buildSessionRuntimePatch(resolvedSession);
1113
157
  const modelOverrides = buildModelRuntimeOverrides(manifestInference);
@@ -1144,13 +188,23 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
1144
188
  }
1145
189
 
1146
190
  const runtimeInference = runtimeConfig.inference ?? {};
191
+ const kernelPathExecution = runtimeInference.kernelPath !== undefined
192
+ ? buildExecutionV0FromKernelPath(runtimeInference.kernelPath)
193
+ : null;
194
+ const manifestInference = kernelPathExecution
195
+ ? {
196
+ ...manifest.inference,
197
+ ...kernelPathExecution,
198
+ defaultKernelPath: runtimeInference.kernelPath,
199
+ }
200
+ : manifest.inference;
1147
201
  const runtimeExecutionOverlay = {
1148
202
  ...(runtimeInference.session ? { session: runtimeInference.session } : {}),
1149
203
  ...(runtimeInference.executionPatch ? { executionPatch: runtimeInference.executionPatch } : {}),
1150
204
  };
1151
205
 
1152
206
  const executionV0State = compileExecutionV0({
1153
- manifestInference: manifest.inference,
207
+ manifestInference,
1154
208
  runtimeInference: runtimeExecutionOverlay,
1155
209
  modelId: options.modelId ?? manifest.modelId ?? 'model',
1156
210
  numLayers: Number.isInteger(options.numLayers)
@@ -1161,13 +215,35 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
1161
215
  return { runtimeConfig, executionV0State: null };
1162
216
  }
1163
217
 
218
+ const compiledKernelPathSource = runtimeInference.kernelPath !== undefined
219
+ ? 'config'
220
+ : 'manifest';
1164
221
  const runtimeInferencePatch = { ...executionV0State.runtimeInferencePatch };
222
+ if (runtimeInferencePatch.kernelPathSource) {
223
+ runtimeInferencePatch.kernelPathSource = compiledKernelPathSource;
224
+ }
225
+ if (runtimeInference.kernelPath !== undefined) {
226
+ delete runtimeInferencePatch.kernelPath;
227
+ delete runtimeInferencePatch.kernelPathSource;
228
+ }
1165
229
  if (runtimeInferencePatch.modelOverrides) {
1166
230
  runtimeInferencePatch.modelOverrides = mergeRuntimeValues(
1167
231
  runtimeInferencePatch.modelOverrides,
1168
232
  runtimeInference.modelOverrides ?? {}
1169
233
  );
1170
234
  }
235
+ if (runtimeInference.kernelPath !== undefined && runtimeInference.compute) {
236
+ runtimeInferencePatch.compute = mergeRuntimeValues(
237
+ runtimeInferencePatch.compute ?? {},
238
+ runtimeInference.compute
239
+ );
240
+ }
241
+ if (runtimeInference.kernelPath !== undefined && runtimeInference.kvcache) {
242
+ runtimeInferencePatch.kvcache = mergeRuntimeValues(
243
+ runtimeInferencePatch.kvcache ?? {},
244
+ runtimeInference.kvcache
245
+ );
246
+ }
1171
247
 
1172
248
  return {
1173
249
  runtimeConfig: {