@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -1,1011 +1,37 @@
1
1
  import { mergeRuntimeValues } from '../../../config/runtime-merge.js';
2
+ import { buildExecutionV0FromKernelPath } from '../../../converter/execution-v0-manifest.js';
2
3
  import {
3
- buildExecutionV0KernelProfileKey,
4
- indexExecutionV0KernelProfiles,
5
- normalizeExecutionV0Dtype,
6
- resolveExecutionV0KernelProfile,
7
- resolveExecutionV0KVIO,
8
- resolveExecutionV0Precision,
9
- } from '../../../config/execution-v0-contract-check.js';
10
- import { selectRuleValue } from '../../../rules/rule-registry.js';
11
- import {
12
- EXECUTION_V0_SCHEMA_ID,
4
+ DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS,
13
5
  DEFAULT_EXECUTION_V0_POLICIES,
14
6
  DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
15
- isExecutionV0Digest,
16
- isExecutionV0Semver,
17
7
  } from '../../../config/schema/execution-v0.schema.js';
18
- import { KERNEL_CONFIGS } from '../../../gpu/kernels/kernel-configs.js';
19
- import { buildKernelRefFromKernelEntry, isKernelRefBoundToKernel } from '../../../config/kernels/kernel-ref.js';
20
-
21
- const PATCH_SET_MUTABLE_FIELDS = new Set(['precision', 'kvIO', 'constants', 'entry']);
22
- const EXECUTION_V0_RUNTIME_KEYS = new Set(['session', 'executionPatch']);
23
- const PIPELINE_COMPATIBLE_OPS = new Set([
24
- 'save',
25
- 'load',
26
- 'conv',
27
- 'attention',
28
- 'rmsnorm',
29
- 'ffn',
30
- 'residual_add',
31
- 'cast',
32
- 'noop',
33
- ]);
34
-
35
- const KERNEL_OUTPUT_CAPABILITIES = (() => {
36
- const byKernelEntry = new Map();
37
- for (const variants of Object.values(KERNEL_CONFIGS)) {
38
- for (const config of Object.values(variants)) {
39
- const kernel = config?.shaderFile;
40
- const entry = config?.entryPoint ?? 'main';
41
- if (typeof kernel !== 'string' || kernel.length === 0) continue;
42
- const key = `${kernel}#${entry}`;
43
- if (!byKernelEntry.has(key)) {
44
- byKernelEntry.set(key, new Set());
45
- }
46
- const outputDtype = config?.outputDtype;
47
- if (typeof outputDtype === 'string' && outputDtype.length > 0) {
48
- byKernelEntry.get(key).add(String(outputDtype).toLowerCase());
49
- }
50
- }
51
- }
52
- return byKernelEntry;
53
- })();
54
-
55
- function getKernelOutputCapabilities(step) {
56
- const kernel = String(step?.kernel ?? '').trim();
57
- const entry = String(step?.entry ?? 'main').trim() || 'main';
58
- if (!kernel) {
59
- return null;
60
- }
61
- return KERNEL_OUTPUT_CAPABILITIES.get(`${kernel}#${entry}`) ?? null;
62
- }
63
-
64
- function cloneJson(value) {
65
- if (typeof structuredClone === 'function') {
66
- return structuredClone(value);
67
- }
68
- return JSON.parse(JSON.stringify(value));
69
- }
70
-
71
- const normalizeDtype = normalizeExecutionV0Dtype;
72
- const resolvePrecision = resolveExecutionV0Precision;
73
- const resolveKVIO = resolveExecutionV0KVIO;
74
-
75
- function normalizePhase(value, label) {
76
- const normalized = String(value ?? '').trim().toLowerCase();
77
- if (normalized !== 'prefill' && normalized !== 'decode' && normalized !== 'both') {
78
- throw new Error(`[ExecutionV0] ${label} must be prefill|decode|both; got "${value}"`);
79
- }
80
- return normalized;
81
- }
82
-
83
- function normalizeSection(value, label) {
84
- const normalized = String(value ?? '').trim();
85
- if (!['preLayer', 'layer', 'postLayer', 'sampling'].includes(normalized)) {
86
- throw new Error(`[ExecutionV0] ${label} must be preLayer|layer|postLayer|sampling; got "${value}"`);
87
- }
88
- return normalized;
89
- }
90
-
91
- function normalizeKVLayout(value, label) {
92
- if (value == null) {
93
- return null;
94
- }
95
- const normalized = String(value).trim().toLowerCase();
96
- if (!normalized) {
97
- return null;
98
- }
99
- return normalized;
100
- }
101
-
102
- function assertKernelRef(kernelRef, label) {
103
- if (!kernelRef) return;
104
- if (typeof kernelRef.id !== 'string' || kernelRef.id.trim().length === 0) {
105
- throw new Error(`[ExecutionV0] ${label}.id is required`);
106
- }
107
- if (!isExecutionV0Semver(kernelRef.version)) {
108
- throw new Error(`[ExecutionV0] ${label}.version must be semver; got "${kernelRef.version}"`);
109
- }
110
- if (!isExecutionV0Digest(kernelRef.digest)) {
111
- throw new Error(`[ExecutionV0] ${label}.digest must match sha256:<64-hex>`);
112
- }
113
- }
114
-
115
- function isPhaseMatch(phase, targetPhase) {
116
- return phase === 'both' || phase === targetPhase;
117
- }
118
-
119
- function stepHasLayer(step, layerIdx) {
120
- if (step.layers === 'all') return true;
121
- if (!Array.isArray(step.layers)) return false;
122
- return step.layers.includes(layerIdx);
123
- }
124
-
125
- const buildKernelProfileKey = buildExecutionV0KernelProfileKey;
126
-
127
- function normalizeSlot(value, label) {
128
- if (typeof value !== 'string' || value.trim().length === 0) {
129
- throw new Error(`[ExecutionV0] ${label} must be a non-empty string`);
130
- }
131
- return value.trim();
132
- }
133
-
134
- function assertKernelPrecisionCapability(step, resolvedPrecision, policies) {
135
- if (step.op === 'cast') {
136
- return;
137
- }
138
- if (policies.unsupportedPrecision !== 'error') {
139
- return;
140
- }
141
- const kernel = String(step.kernel ?? '').trim();
142
- const entry = String(step.entry ?? 'main').trim() || 'main';
143
- const supportedOutputDtypes = getKernelOutputCapabilities(step);
144
- if (!supportedOutputDtypes) {
145
- throw new Error(
146
- `[ExecutionV0] step "${step.id}" kernel "${kernel}#${entry}" ` +
147
- 'is not present in kernel registry; cannot validate precision capability.'
148
- );
149
- }
150
- if (supportedOutputDtypes.size === 0) {
151
- // Some kernels do not declare output dtype metadata yet; treat as unknown.
152
- return;
153
- }
154
- const outputDtype = normalizeDtype(resolvedPrecision.outputDtype, `${step.id}.precision.outputDtype`);
155
- if (!supportedOutputDtypes.has(outputDtype)) {
156
- throw new Error(
157
- `[ExecutionV0] step "${step.id}" outputDtype=${outputDtype} is unsupported by ` +
158
- `kernel "${kernel}#${entry}" (supported: ${[...supportedOutputDtypes].join(', ') || 'none'}).`
159
- );
160
- }
161
- }
162
-
163
- function createSourceTrace() {
164
- return {
165
- session: {},
166
- steps: {},
167
- };
168
- }
169
-
170
- function setSourceTrace(trace, path, source) {
171
- if (!trace || typeof path !== 'string' || path.length === 0) return;
172
- trace[path] = { source };
173
- }
174
-
175
- function setStepSourceTrace(trace, stepId, path, source) {
176
- if (!trace || !stepId || !path) return;
177
- if (!trace.steps[stepId]) {
178
- trace.steps[stepId] = {};
179
- }
180
- trace.steps[stepId][path] = { source };
181
- }
182
-
183
- function isPlainObject(value) {
184
- return value != null && typeof value === 'object' && !Array.isArray(value);
185
- }
186
-
187
- function collectLeafPaths(value, prefix = [], out = []) {
188
- if (Array.isArray(value)) {
189
- if (prefix.length > 0) {
190
- out.push(prefix);
191
- }
192
- return out;
193
- }
194
- if (!isPlainObject(value)) {
195
- if (prefix.length > 0) {
196
- out.push(prefix);
197
- }
198
- return out;
199
- }
200
- for (const [key, child] of Object.entries(value)) {
201
- collectLeafPaths(child, [...prefix, key], out);
202
- }
203
- return out;
204
- }
205
-
206
- function hasDefinedPath(root, pathSegments) {
207
- let current = root;
208
- for (const segment of pathSegments) {
209
- if (!isPlainObject(current) || !Object.prototype.hasOwnProperty.call(current, segment)) {
210
- return false;
211
- }
212
- current = current[segment];
213
- }
214
- return current !== undefined;
215
- }
216
-
217
- const indexKernelProfiles = indexExecutionV0KernelProfiles;
218
-
219
- function resolveProfile(profileIndex, step) {
220
- return resolveExecutionV0KernelProfile(profileIndex, step);
221
- }
222
-
223
- function validateStepShape(step, index) {
224
- if (!step || typeof step !== 'object') {
225
- throw new Error(`[ExecutionV0] execution.steps[${index}] must be an object`);
226
- }
227
- if (typeof step.id !== 'string' || step.id.trim().length === 0) {
228
- throw new Error(`[ExecutionV0] execution.steps[${index}].id is required`);
229
- }
230
- if (typeof step.op !== 'string' || step.op.trim().length === 0) {
231
- throw new Error(`[ExecutionV0] execution.steps[${index}].op is required`);
232
- }
233
- normalizePhase(step.phase, `execution.steps[${index}].phase`);
234
- normalizeSection(step.section, `execution.steps[${index}].section`);
235
- normalizeSlot(step.src, `execution.steps[${index}].src`);
236
- normalizeSlot(step.dst, `execution.steps[${index}].dst`);
237
- if (step.layers !== 'all' && !Array.isArray(step.layers)) {
238
- throw new Error(`[ExecutionV0] execution.steps[${index}].layers must be "all" or number[]`);
239
- }
240
- if (step.layers !== 'all') {
241
- for (const layer of step.layers) {
242
- if (!Number.isInteger(layer) || layer < 0) {
243
- throw new Error(`[ExecutionV0] execution.steps[${index}].layers must contain non-negative integers`);
244
- }
245
- }
246
- }
247
- if (step.op === 'cast') {
248
- normalizeDtype(step.toDtype, `execution.steps[${index}].toDtype`);
249
- if (step.fromDtype != null) {
250
- normalizeDtype(step.fromDtype, `execution.steps[${index}].fromDtype`);
251
- }
252
- } else {
253
- if (typeof step.kernel !== 'string' || step.kernel.trim().length === 0) {
254
- throw new Error(
255
- `[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernel (non-cast op)`
256
- );
257
- }
258
- if (!step.kernelRef || typeof step.kernelRef !== 'object' || Array.isArray(step.kernelRef)) {
259
- throw new Error(
260
- `[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernelRef {id, version, digest} (non-cast op)`
261
- );
262
- }
263
- assertKernelRef(step.kernelRef, `execution.steps[${index}].kernelRef`);
264
- const entry = String(step.entry ?? 'main').trim() || 'main';
265
- let expectedKernelRef;
266
- try {
267
- expectedKernelRef = buildKernelRefFromKernelEntry(step.kernel, entry);
268
- } catch (error) {
269
- const message = error instanceof Error ? error.message : String(error);
270
- throw new Error(
271
- `[ExecutionV0] execution.steps[${index}] "${step.id}" kernel "${step.kernel}#${entry}" ` +
272
- `cannot be content-pinned: ${message}`
273
- );
274
- }
275
- if (!isKernelRefBoundToKernel(step.kernelRef, step.kernel, entry)) {
276
- throw new Error(
277
- `[ExecutionV0] execution.steps[${index}] "${step.id}" kernelRef does not match kernel binding ` +
278
- `("${step.kernel}#${entry}"). Expected ${expectedKernelRef.id}@${expectedKernelRef.version} ${expectedKernelRef.digest}.`
279
- );
280
- }
281
- }
282
- }
283
-
284
- function assertExecutionRuntimeOverlay(runtimeInference) {
285
- if (!runtimeInference || typeof runtimeInference !== 'object') {
286
- return;
287
- }
288
- const unknownKeys = Object.keys(runtimeInference).filter((key) => !EXECUTION_V0_RUNTIME_KEYS.has(key));
289
- if (unknownKeys.length > 0) {
290
- throw new Error(
291
- `[ExecutionV0] runtime.inference overlay supports only ${[...EXECUTION_V0_RUNTIME_KEYS].join(', ')}; ` +
292
- `got unsupported keys: ${unknownKeys.join(', ')}.`
293
- );
294
- }
295
- }
296
-
297
- function validateUniqueStepIds(steps) {
298
- const ids = new Set();
299
- for (const step of steps) {
300
- if (ids.has(step.id)) {
301
- throw new Error(`[ExecutionV0] duplicate step id "${step.id}"`);
302
- }
303
- ids.add(step.id);
304
- }
305
- }
306
-
307
- function assertExecutionV0Schema(manifestInference) {
308
- if (!hasExecutionV0(manifestInference)) return;
309
- const discriminator = manifestInference?.schema ?? null;
310
- if (discriminator !== EXECUTION_V0_SCHEMA_ID) {
311
- throw new Error(
312
- `[ExecutionV0] manifest.inference.schema must be "${EXECUTION_V0_SCHEMA_ID}" ` +
313
- `when execution is present; got "${discriminator}".`
314
- );
315
- }
316
- }
317
-
318
- function applyExecutionPatchAtomic(baseSteps, patch) {
319
- if (!patch) {
320
- return baseSteps;
321
- }
322
- const steps = cloneJson(baseSteps);
323
- const byId = new Map(steps.map((step, index) => [step.id, index]));
324
-
325
- for (const entry of patch.set ?? []) {
326
- if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
327
- throw new Error('[ExecutionV0] executionPatch.set entries require id');
328
- }
329
- if (!byId.has(entry.id)) {
330
- throw new Error(`[ExecutionV0] executionPatch.set target "${entry.id}" does not exist`);
331
- }
332
- for (const key of Object.keys(entry)) {
333
- if (key === 'id') continue;
334
- if (!PATCH_SET_MUTABLE_FIELDS.has(key)) {
335
- throw new Error(`[ExecutionV0] executionPatch.set "${entry.id}" cannot mutate "${key}"`);
336
- }
337
- }
338
- }
339
-
340
- for (const entry of patch.remove ?? []) {
341
- if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
342
- throw new Error('[ExecutionV0] executionPatch.remove entries require id');
343
- }
344
- if (!byId.has(entry.id)) {
345
- throw new Error(`[ExecutionV0] executionPatch.remove target "${entry.id}" does not exist`);
346
- }
347
- }
348
-
349
- for (const entry of patch.set ?? []) {
350
- const index = byId.get(entry.id);
351
- const target = steps[index];
352
- if (entry.precision !== undefined) target.precision = cloneJson(entry.precision);
353
- if (entry.kvIO !== undefined) target.kvIO = cloneJson(entry.kvIO);
354
- if (entry.constants !== undefined) target.constants = cloneJson(entry.constants);
355
- if (entry.entry !== undefined) target.entry = entry.entry;
356
- }
357
-
358
- const removeIds = new Set((patch.remove ?? []).map((entry) => entry.id));
359
- const removedSteps = steps.filter((step) => !removeIds.has(step.id));
360
-
361
- let current = removedSteps;
362
- const insertedAfterAnchors = new Map();
363
- for (const entry of patch.add ?? []) {
364
- if (!entry?.step || typeof entry.step !== 'object') {
365
- throw new Error('[ExecutionV0] executionPatch.add requires a step payload');
366
- }
367
- const hasBefore = typeof entry.insertBefore === 'string' && entry.insertBefore.length > 0;
368
- const hasAfter = typeof entry.insertAfter === 'string' && entry.insertAfter.length > 0;
369
- if (hasBefore === hasAfter) {
370
- throw new Error('[ExecutionV0] executionPatch.add requires exactly one of insertBefore or insertAfter');
371
- }
372
- if (current.some((step) => step.id === entry.step.id)) {
373
- throw new Error(`[ExecutionV0] executionPatch.add step id "${entry.step.id}" already exists`);
374
- }
375
- const anchorId = hasBefore ? entry.insertBefore : entry.insertAfter;
376
- const anchorIndex = current.findIndex((step) => step.id === anchorId);
377
- if (anchorIndex < 0) {
378
- throw new Error(`[ExecutionV0] executionPatch.add anchor "${anchorId}" not found`);
379
- }
380
- let insertIndex = hasBefore ? anchorIndex : anchorIndex + 1;
381
- if (!hasBefore) {
382
- const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
383
- while (insertIndex < current.length && insertedIds.includes(current[insertIndex].id)) {
384
- insertIndex += 1;
385
- }
386
- }
387
- current = [
388
- ...current.slice(0, insertIndex),
389
- cloneJson(entry.step),
390
- ...current.slice(insertIndex),
391
- ];
392
- if (!hasBefore) {
393
- const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
394
- insertedIds.push(entry.step.id);
395
- insertedAfterAnchors.set(anchorId, insertedIds);
396
- }
397
- }
398
-
399
- validateUniqueStepIds(current);
400
- return current;
401
- }
402
-
403
- function indexRuntimePatchMeta(patch) {
404
- const meta = {
405
- addedSteps: new Set(),
406
- precisionFieldsByStep: new Map(),
407
- kvIOFieldsByStep: new Set(),
408
- };
409
- if (!patch || typeof patch !== 'object') {
410
- return meta;
411
- }
412
-
413
- for (const add of patch.add ?? []) {
414
- const stepId = add?.step?.id;
415
- if (typeof stepId === 'string' && stepId.length > 0) {
416
- meta.addedSteps.add(stepId);
417
- }
418
- }
419
-
420
- for (const set of patch.set ?? []) {
421
- const stepId = set?.id;
422
- if (typeof stepId !== 'string' || stepId.length === 0) continue;
423
- if (set.precision && typeof set.precision === 'object') {
424
- meta.precisionFieldsByStep.set(stepId, new Set(Object.keys(set.precision)));
425
- }
426
- if (set.kvIO && typeof set.kvIO === 'object') {
427
- meta.kvIOFieldsByStep.add(stepId);
428
- }
429
- }
430
- return meta;
431
- }
432
-
433
- function createInitialSlotDtypes(sessionDefaults) {
434
- const activationDefault = normalizeDtype(
435
- sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
436
- 'sessionDefaults.compute.defaults.activationDtype'
437
- );
438
- return new Map([['state', activationDefault]]);
439
- }
440
-
441
- function ensureCompatibleKV(step, kvIO, sessionDefaults) {
442
- if (step.op !== 'attention' || !kvIO) {
443
- return;
444
- }
445
- const runtimeKvDtypeRaw = sessionDefaults?.kvcache?.kvDtype;
446
- if (runtimeKvDtypeRaw == null) {
447
- return;
448
- }
449
- const runtimeKvDtype = normalizeDtype(runtimeKvDtypeRaw, 'sessionDefaults.kvcache.kvDtype');
450
- if (kvIO.readDtype !== runtimeKvDtype || kvIO.writeDtype !== runtimeKvDtype) {
451
- throw new Error(
452
- `[ExecutionV0] step "${step.id}" kvIO read/write (${kvIO.readDtype}/${kvIO.writeDtype}) ` +
453
- `must match sessionDefaults.kvcache.kvDtype (${runtimeKvDtype}).`
454
- );
455
- }
456
- }
457
-
458
- function resolvePhaseSteps(phase, steps, sessionDefaults, profileIndex, policies, options = {}) {
459
- const slotDtypes = options.initialSlotDtypes
460
- ? new Map(options.initialSlotDtypes)
461
- : createInitialSlotDtypes(sessionDefaults);
462
- const resolved = [];
463
- const sourceTrace = options.sourceTrace ?? null;
464
- const sessionDefaultSources = options.sessionDefaultSources ?? {};
465
- const runtimePatchMeta = options.runtimePatchMeta ?? {
466
- addedSteps: new Set(),
467
- precisionFieldsByStep: new Map(),
468
- kvIOFieldsByStep: new Set(),
469
- };
470
-
471
- for (const step of steps) {
472
- const stepPhase = normalizePhase(step.phase, `${step.id}.phase`);
473
- if (!isPhaseMatch(stepPhase, phase)) continue;
474
- const profile = resolveProfile(profileIndex, step);
475
- if (
476
- step.kernelRef
477
- && !profile
478
- && policies.unresolvedKernel === 'error'
479
- ) {
480
- throw new Error(
481
- `[ExecutionV0] step "${step.id}" references kernel profile ` +
482
- `${step.kernelRef.id}@${step.kernelRef.version} (${step.kernelRef.digest}) ` +
483
- 'but no matching sessionDefaults.compute.kernelProfiles entry was found.'
484
- );
485
- }
486
- const { precision, sources: precisionSources } = resolvePrecision(step, profile, sessionDefaults);
487
- const src = normalizeSlot(step.src, `${step.id}.src`);
488
- const dst = normalizeSlot(step.dst, `${step.id}.dst`);
489
- if (!slotDtypes.has(src)) {
490
- throw new Error(
491
- `[ExecutionV0] step "${step.id}" reads slot "${src}" before it is produced. ` +
492
- 'Add an explicit producer step or cast/load bridge.'
493
- );
494
- }
495
- const derivedInput = slotDtypes.get(src);
496
- const inputDtype = normalizeDtype(precision.inputDtype ?? derivedInput, `${step.id}.precision.inputDtype`);
497
-
498
- if (
499
- policies.dtypeTransition === 'require_cast_step'
500
- && step.op !== 'cast'
501
- && inputDtype !== derivedInput
502
- ) {
503
- throw new Error(
504
- `[ExecutionV0] step "${step.id}" requires inputDtype=${inputDtype} ` +
505
- `but slot "${src}" currently holds ${derivedInput}. Insert explicit cast step.`
506
- );
507
- }
508
-
509
- let outputDtype = normalizeDtype(precision.outputDtype, `${step.id}.precision.outputDtype`);
510
- let outputDtypeSource = precisionSources.outputDtype;
511
- if (step.op !== 'cast' && outputDtypeSource === 'sessionDefault') {
512
- const declaredOutputDtypes = getKernelOutputCapabilities(step);
513
- if (declaredOutputDtypes && declaredOutputDtypes.size === 1) {
514
- outputDtype = [...declaredOutputDtypes][0];
515
- outputDtypeSource = 'derived';
516
- }
517
- }
518
- if (step.op === 'cast') {
519
- outputDtype = normalizeDtype(step.toDtype, `${step.id}.toDtype`);
520
- outputDtypeSource = 'manifest';
521
- const fromDtype = step.fromDtype
522
- ? normalizeDtype(step.fromDtype, `${step.id}.fromDtype`)
523
- : derivedInput;
524
- if (fromDtype !== derivedInput) {
525
- throw new Error(
526
- `[ExecutionV0] cast step "${step.id}" fromDtype=${fromDtype} does not match slot "${src}" dtype=${derivedInput}`
527
- );
528
- }
529
- }
530
-
531
- const resolvedPrecision = {
532
- inputDtype,
533
- mathDtype: normalizeDtype(precision.mathDtype, `${step.id}.precision.mathDtype`),
534
- accumDtype: normalizeDtype(precision.accumDtype, `${step.id}.precision.accumDtype`),
535
- outputDtype,
536
- };
537
- assertKernelPrecisionCapability(step, resolvedPrecision, policies);
538
- slotDtypes.set(dst, outputDtype);
539
-
540
- const kvIOResolved = step.op === 'attention'
541
- ? resolveKVIO(step, profile, sessionDefaults)
542
- : null;
543
- const kvIO = kvIOResolved?.value ?? null;
544
- ensureCompatibleKV(step, kvIO, sessionDefaults);
545
-
546
- if (sourceTrace) {
547
- const precisionFieldsPatched = runtimePatchMeta.precisionFieldsByStep.get(step.id) ?? new Set();
548
- const isAddedStep = runtimePatchMeta.addedSteps.has(step.id);
549
- const inputPatched = isAddedStep
550
- ? step.precision?.inputDtype != null
551
- : precisionFieldsPatched.has('inputDtype');
552
- const mathPatched = isAddedStep
553
- ? step.precision?.mathDtype != null
554
- : precisionFieldsPatched.has('mathDtype');
555
- const accumPatched = isAddedStep
556
- ? step.precision?.accumDtype != null
557
- : precisionFieldsPatched.has('accumDtype');
558
- const outputPatched = isAddedStep
559
- ? step.precision?.outputDtype != null
560
- : precisionFieldsPatched.has('outputDtype');
561
- const mathSource = precisionSources.mathDtype === 'sessionDefault'
562
- ? sessionDefaultSources.mathDtype ?? 'derived'
563
- : precisionSources.mathDtype;
564
- const accumSource = precisionSources.accumDtype === 'sessionDefault'
565
- ? sessionDefaultSources.accumDtype ?? 'derived'
566
- : precisionSources.accumDtype;
567
- const outputSource = precisionSources.outputDtype === 'sessionDefault'
568
- ? outputDtypeSource === 'sessionDefault'
569
- ? (sessionDefaultSources.outputDtype ?? 'derived')
570
- : outputDtypeSource
571
- : outputDtypeSource;
572
- setStepSourceTrace(sourceTrace, step.id, 'precision.inputDtype',
573
- inputPatched
574
- ? 'runtime.patch'
575
- : precision.inputDtype != null
576
- ? precisionSources.inputDtype
577
- : 'derived');
578
- setStepSourceTrace(sourceTrace, step.id, 'precision.mathDtype', mathPatched ? 'runtime.patch' : mathSource);
579
- setStepSourceTrace(sourceTrace, step.id, 'precision.accumDtype', accumPatched ? 'runtime.patch' : accumSource);
580
- setStepSourceTrace(sourceTrace, step.id, 'precision.outputDtype', outputPatched ? 'runtime.patch' : outputSource);
581
- if (step.op === 'attention') {
582
- const kvPatched = runtimePatchMeta.kvIOFieldsByStep.has(step.id)
583
- || (isAddedStep && !!step.kvIO);
584
- const kvSource = kvIOResolved?.source === 'sessionDefault'
585
- ? sessionDefaultSources.kvDtype ?? 'derived'
586
- : kvIOResolved?.source ?? 'derived';
587
- const resolvedKvSource = kvPatched ? 'runtime.patch' : kvSource;
588
- setStepSourceTrace(sourceTrace, step.id, 'kvIO.readDtype', resolvedKvSource);
589
- setStepSourceTrace(sourceTrace, step.id, 'kvIO.writeDtype', resolvedKvSource);
590
- }
591
- }
592
-
593
- resolved.push({
594
- ...step,
595
- src,
596
- dst,
597
- phase: stepPhase,
598
- section: normalizeSection(step.section, `${step.id}.section`),
599
- precision: resolvedPrecision,
600
- kvIO,
601
- });
602
- }
603
-
604
- return {
605
- steps: resolved,
606
- finalSlotDtypes: slotDtypes,
607
- };
608
- }
609
-
610
- function stripPresetComputeDefaults(compute, manifestComputeDefaults) {
611
- if (!compute?.defaults || !manifestComputeDefaults) {
612
- return compute;
613
- }
614
- const dtypeKeys = ['activationDtype', 'mathDtype', 'accumDtype', 'outputDtype'];
615
- const hasManifestDtype = dtypeKeys.some(
616
- (key) => manifestComputeDefaults[key] !== undefined && manifestComputeDefaults[key] !== null
617
- );
618
- if (!hasManifestDtype) {
619
- return compute;
620
- }
621
- const nextDefaults = { ...compute.defaults };
622
- for (const key of dtypeKeys) {
623
- if (manifestComputeDefaults[key] !== undefined && manifestComputeDefaults[key] !== null) {
624
- delete nextDefaults[key];
625
- }
626
- }
627
- if (Object.keys(nextDefaults).length === 0) {
628
- const nextCompute = { ...compute };
629
- delete nextCompute.defaults;
630
- return Object.keys(nextCompute).length === 0 ? null : nextCompute;
631
- }
632
- return { ...compute, defaults: nextDefaults };
633
- }
634
-
635
- function normalizeRuntimeSessionForExecutionV0(runtimeSession, manifestInference) {
636
- const manifestSessionDefaults = manifestInference?.sessionDefaults ?? null;
637
- const manifestProfiles = manifestSessionDefaults?.compute?.kernelProfiles;
638
- const hasManifestProfiles = Array.isArray(manifestProfiles) && manifestProfiles.length > 0;
639
- const manifestComputeDefaults = manifestSessionDefaults?.compute?.defaults ?? null;
640
- const hasManifestKVCache = manifestSessionDefaults?.kvcache != null;
641
- const hasManifestDecodeLoop = manifestSessionDefaults?.decodeLoop != null;
642
-
643
- if (!runtimeSession || typeof runtimeSession !== 'object') {
644
- return runtimeSession;
645
- }
646
-
647
- let compute = runtimeSession.compute ?? null;
648
- let kvcache = Object.prototype.hasOwnProperty.call(runtimeSession, 'kvcache')
649
- ? runtimeSession.kvcache
650
- : undefined;
651
- let decodeLoop = Object.prototype.hasOwnProperty.call(runtimeSession, 'decodeLoop')
652
- ? runtimeSession.decodeLoop
653
- : undefined;
654
- let changed = false;
655
-
656
- // Strip preset compute dtype defaults when manifest provides model-specific values.
657
- // default.json sets session.compute.defaults.activationDtype="f16" as a preset default.
658
- // When the manifest declares its own compute dtypes (e.g. activationDtype="f32" for f32
659
- // variants), the manifest must win. Only explicit user overrides (via --runtime-config-json
660
- // or CLI flags) should take precedence, not baked-in preset values.
661
- if (manifestComputeDefaults) {
662
- const stripped = stripPresetComputeDefaults(compute, manifestComputeDefaults);
663
- if (stripped !== compute) {
664
- compute = stripped;
665
- changed = true;
666
- }
667
- }
668
-
669
- // Strip empty kernelProfiles when manifest provides them.
670
- if (compute && Object.prototype.hasOwnProperty.call(compute, 'kernelProfiles')) {
671
- const kernelProfiles = compute.kernelProfiles;
672
- if (Array.isArray(kernelProfiles) && kernelProfiles.length === 0 && hasManifestProfiles) {
673
- const nextCompute = { ...compute };
674
- delete nextCompute.kernelProfiles;
675
- compute = Object.keys(nextCompute).length === 0 ? null : nextCompute;
676
- changed = true;
677
- }
678
- }
679
-
680
- // Strip preset nulls so manifest session defaults can win.
681
- if (kvcache === null && hasManifestKVCache) {
682
- kvcache = undefined;
683
- changed = true;
684
- }
685
-
686
- if (decodeLoop === null && hasManifestDecodeLoop) {
687
- decodeLoop = undefined;
688
- changed = true;
689
- }
690
-
691
- if (!changed) {
692
- return runtimeSession;
693
- }
694
-
695
- const nextRuntimeSession = { ...runtimeSession };
696
- if (!compute) {
697
- delete nextRuntimeSession.compute;
698
- } else {
699
- nextRuntimeSession.compute = compute;
700
- }
701
- if (kvcache === undefined) {
702
- delete nextRuntimeSession.kvcache;
703
- } else {
704
- nextRuntimeSession.kvcache = kvcache;
705
- }
706
- if (decodeLoop === undefined) {
707
- delete nextRuntimeSession.decodeLoop;
708
- } else {
709
- nextRuntimeSession.decodeLoop = decodeLoop;
710
- }
711
-
712
- return Object.keys(nextRuntimeSession).length === 0 ? {} : nextRuntimeSession;
713
- }
714
-
715
- function validatePhaseBoundaryCompatibility(options) {
716
- const {
717
- steps,
718
- prefillFinalSlotDtypes,
719
- decodeInitialSlotDtypes,
720
- sessionDefaults,
721
- profileIndex,
722
- policies,
723
- } = options;
724
- const decodeSteps = steps.filter((step) => isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'decode'));
725
- if (decodeSteps.length === 0) {
726
- return;
727
- }
728
- const writtenSlots = new Set();
729
- for (const step of decodeSteps) {
730
- const src = normalizeSlot(step.src, `${step.id}.src`);
731
- const dst = normalizeSlot(step.dst, `${step.id}.dst`);
732
- const readsCarriedSlot = !writtenSlots.has(src) && prefillFinalSlotDtypes.has(src);
733
- if (readsCarriedSlot && step.op !== 'cast') {
734
- const profile = resolveProfile(profileIndex, step);
735
- const { precision } = resolvePrecision(step, profile, sessionDefaults);
736
- const carriedDtype = prefillFinalSlotDtypes.get(src);
737
- const decodeInput = normalizeDtype(
738
- precision.inputDtype
739
- ?? carriedDtype
740
- ?? decodeInitialSlotDtypes.get(src),
741
- `${step.id}.precision.inputDtype`
742
- );
743
- if (decodeInput !== carriedDtype) {
744
- throw new Error(
745
- `[ExecutionV0] decode step "${step.id}" reads carried slot "${src}" as ${decodeInput} ` +
746
- `but prefill left ${carriedDtype}. Add explicit cast at phase boundary.`
747
- );
748
- }
749
- }
750
- writtenSlots.add(dst);
751
- }
752
- }
753
-
754
- function assertKVLayoutExecutionCompatibility(steps, sessionDefaults) {
755
- const kvLayout = normalizeKVLayout(sessionDefaults?.kvcache?.layout, 'sessionDefaults.kvcache.layout');
756
- if (kvLayout !== 'bdpa') {
757
- return;
758
- }
759
- const incompatibleStep = steps.find((step) => (
760
- step?.op === 'attention'
761
- && isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'prefill')
762
- ));
763
- if (!incompatibleStep) {
764
- return;
765
- }
766
- throw new Error(
767
- `[ExecutionV0] sessionDefaults.kvcache.layout="bdpa" is decode-only, ` +
768
- `but step "${incompatibleStep.id}" declares prefill attention. ` +
769
- 'Use a non-BDPA KV layout for prefill-capable models or remove prefill attention from the execution contract.'
770
- );
771
- }
772
-
773
- function toKernelPathStep(step) {
774
- if (step.op === 'cast') return null;
775
- if (!step.kernel) return null;
776
- return {
777
- op: step.op,
778
- kernel: step.kernel,
779
- entry: step.entry ?? 'main',
780
- ...(step.weights ? { weights: step.weights } : {}),
781
- ...(step.constants ? { constants: step.constants } : {}),
782
- };
783
- }
784
-
785
- function getSectionSteps(steps, section, phase = null) {
786
- return steps
787
- .filter((step) => step.section === section)
788
- .filter((step) => (phase ? isPhaseMatch(step.phase, phase) : true))
789
- .map(toKernelPathStep)
790
- .filter((step) => step != null);
791
- }
792
-
793
- function buildLayerPhaseSteps(steps, phase, layerIdx) {
794
- return steps
795
- .filter((step) => step.section === 'layer' && isPhaseMatch(step.phase, phase))
796
- .filter((step) => stepHasLayer(step, layerIdx))
797
- .map(toKernelPathStep)
798
- .filter((step) => step != null);
799
- }
800
-
801
- function getInlineKernelPathSteps(path) {
802
- return [
803
- ...(path?.preLayer ?? []),
804
- ...(path?.decode?.steps ?? []),
805
- ...(path?.prefill?.steps ?? []),
806
- ...(path?.postLayer ?? []),
807
- ...(path?.sampling ?? []),
808
- ...(path?.layerOverrides?.flatMap((override) => override.steps ?? []) ?? []),
809
- ];
810
- }
811
-
812
- function assertInlineKernelPathSessionCompatibility(path, sessionDefaults) {
813
- if (!path) {
814
- return;
815
- }
816
- const activationDtype = normalizeDtype(
817
- path.activationDtype ?? sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
818
- 'inlineKernelPath.activationDtype'
819
- );
820
- const kvDtype = normalizeDtype(
821
- path.kvDtype ?? sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
822
- 'inlineKernelPath.kvDtype'
823
- );
824
-
825
- for (const step of getInlineKernelPathSteps(path)) {
826
- const kernel = String(step?.kernel ?? '').trim();
827
- if (!kernel.startsWith('attention')) {
828
- continue;
829
- }
830
- if (kernel.includes('_f16kv')) {
831
- if (activationDtype !== 'f32' || kvDtype !== 'f16') {
832
- throw new Error(
833
- `[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
834
- `activationDtype="f32" and kvcache.kvDtype="f16", but resolved ` +
835
- `activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
836
- );
837
- }
838
- continue;
839
- }
840
- if (kernel.includes('_f16')) {
841
- if (activationDtype !== 'f16' || kvDtype !== 'f16') {
842
- throw new Error(
843
- `[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
844
- `activationDtype="f16" and kvcache.kvDtype="f16", but resolved ` +
845
- `activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
846
- );
847
- }
848
- continue;
849
- }
850
- if (activationDtype !== 'f32' || kvDtype !== 'f32') {
851
- throw new Error(
852
- `[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
853
- `activationDtype="f32" and kvcache.kvDtype="f32", but resolved ` +
854
- `activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
855
- );
856
- }
857
- }
858
- }
859
-
860
- function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers, finitenessFallbackKernelPathId = null) {
861
- const activationDtype = normalizeDtype(
862
- sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
863
- 'sessionDefaults.compute.defaults.activationDtype'
864
- );
865
- const kvDtype = normalizeDtype(
866
- sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
867
- 'sessionDefaults.kvcache.kvDtype'
868
- );
869
- const decodeSteps = buildLayerPhaseSteps(steps, 'decode', 0);
870
- const prefillSteps = buildLayerPhaseSteps(steps, 'prefill', 0);
871
- if (decodeSteps.length === 0 && prefillSteps.length === 0) {
872
- return null;
873
- }
874
-
875
- const path = {
876
- id: `${modelId || 'model'}-execution-v0`,
877
- name: 'Execution v0 inline kernel path',
878
- description: 'Generated from manifest.inference.execution.steps',
879
- activationDtype,
880
- kvDtype,
881
- ...(typeof finitenessFallbackKernelPathId === 'string' && finitenessFallbackKernelPathId.length > 0
882
- ? { finitenessFallbackKernelPathId }
883
- : {}),
884
- decode: {
885
- steps: decodeSteps.length > 0 ? decodeSteps : prefillSteps,
886
- },
887
- prefill: {
888
- steps: prefillSteps.length > 0 ? prefillSteps : decodeSteps,
889
- },
890
- };
891
-
892
- if (numLayers > 0) {
893
- const overrides = [];
894
- for (let layerIdx = 0; layerIdx < numLayers; layerIdx++) {
895
- const decodeLayerSteps = buildLayerPhaseSteps(steps, 'decode', layerIdx);
896
- const prefillLayerSteps = buildLayerPhaseSteps(steps, 'prefill', layerIdx);
897
- const hasCustomDecode = JSON.stringify(decodeLayerSteps) !== JSON.stringify(path.decode.steps);
898
- const hasCustomPrefill = JSON.stringify(prefillLayerSteps) !== JSON.stringify(path.prefill.steps);
899
- if (!hasCustomDecode && !hasCustomPrefill) continue;
900
- // Kernel path layerOverrides are single-step lists per layer.
901
- const mergedLayerSteps = decodeLayerSteps.length > 0
902
- ? decodeLayerSteps
903
- : prefillLayerSteps;
904
- if (mergedLayerSteps.length > 0) {
905
- overrides.push({
906
- layers: [layerIdx],
907
- steps: mergedLayerSteps,
908
- });
909
- }
910
- }
911
- if (overrides.length > 0) {
912
- path.layerOverrides = overrides;
913
- }
914
- }
915
-
916
- const preLayer = getSectionSteps(steps, 'preLayer');
917
- if (preLayer.length > 0) {
918
- path.preLayer = preLayer;
919
- }
920
- const postLayer = getSectionSteps(steps, 'postLayer');
921
- if (postLayer.length > 0) {
922
- path.postLayer = postLayer;
923
- }
924
- const sampling = getSectionSteps(steps, 'sampling', 'decode');
925
- if (sampling.length > 0) {
926
- path.sampling = sampling;
927
- }
928
-
929
- assertInlineKernelPathSessionCompatibility(path, sessionDefaults);
930
- return path;
931
- }
932
-
933
- function buildLayerPipelineFromExecution(steps) {
934
- const layerSectionSteps = steps.filter((step) => step.section === 'layer');
935
- if (layerSectionSteps.length === 0) {
936
- return null;
937
- }
938
- if (layerSectionSteps.some((step) => !PIPELINE_COMPATIBLE_OPS.has(step.op))) {
939
- return null;
940
- }
941
-
942
- const layerSteps = layerSectionSteps
943
- .map((step) => ({
944
- op: step.op,
945
- phase: step.phase,
946
- src: step.src ?? 'state',
947
- dst: step.dst ?? 'state',
948
- ...(step.residual !== undefined ? { residual: step.residual } : {}),
949
- ...(step.a !== undefined ? { a: step.a } : {}),
950
- ...(step.b !== undefined ? { b: step.b } : {}),
951
- ...(step.variant !== undefined ? { variant: step.variant } : {}),
952
- ...(step.skipInputNorm !== undefined ? { skipInputNorm: step.skipInputNorm } : {}),
953
- ...(step.precision?.inputDtype ? { inputDtype: step.precision.inputDtype } : {}),
954
- ...(step.precision?.outputDtype ? { outputDtype: step.precision.outputDtype } : {}),
955
- ...(step.fromDtype ? { fromDtype: step.fromDtype } : {}),
956
- ...(step.toDtype ? { toDtype: step.toDtype } : {}),
957
- ...(step.probeStage ? { probeStage: step.probeStage } : {}),
958
- ...(step.name ? { name: step.name } : {}),
959
- ...(step.weight ? { weight: step.weight } : {}),
960
- }));
961
-
962
- return {
963
- steps: layerSteps,
964
- overrides: [],
965
- };
966
- }
967
-
968
- function buildSessionRuntimePatch(sessionDefaults) {
969
- const patch = {};
970
- const computeDefaults = sessionDefaults?.compute?.defaults ?? null;
971
- const computePatch = {};
972
- const activationDtype = computeDefaults?.activationDtype;
973
- if (activationDtype) {
974
- computePatch.activationDtype = activationDtype;
975
- }
976
- if (computeDefaults && (computeDefaults.mathDtype || computeDefaults.accumDtype || computeDefaults.outputDtype)) {
977
- computePatch.defaults = {
978
- ...(computeDefaults.mathDtype ? { mathDtype: computeDefaults.mathDtype } : {}),
979
- ...(computeDefaults.accumDtype ? { accumDtype: computeDefaults.accumDtype } : {}),
980
- ...(computeDefaults.outputDtype ? { outputDtype: computeDefaults.outputDtype } : {}),
981
- };
982
- }
983
- if (Object.keys(computePatch).length > 0) {
984
- patch.compute = computePatch;
985
- }
986
- if (sessionDefaults?.kvcache) {
987
- patch.kvcache = sessionDefaults.kvcache;
988
- }
989
- if (sessionDefaults?.decodeLoop) {
990
- patch.batching = {
991
- batchSize: sessionDefaults.decodeLoop.batchSize,
992
- stopCheckMode: sessionDefaults.decodeLoop.stopCheckMode,
993
- readbackInterval: sessionDefaults.decodeLoop.readbackInterval,
994
- ringTokens: sessionDefaults.decodeLoop.ringTokens,
995
- ringStop: sessionDefaults.decodeLoop.ringStop,
996
- ringStaging: sessionDefaults.decodeLoop.ringStaging,
997
- };
998
- }
999
- return patch;
1000
- }
1001
-
1002
- function buildModelRuntimeOverrides(manifestInference) {
1003
- const model = manifestInference?.model;
1004
- if (!model || typeof model !== 'object') {
1005
- return null;
1006
- }
1007
- return cloneJson(model);
1008
- }
8
+ import {
9
+ applyExecutionPatchAtomic,
10
+ assertExecutionRuntimeOverlay,
11
+ assertExecutionV0Schema,
12
+ assertKVLayoutExecutionCompatibility,
13
+ collectLeafPaths,
14
+ createInitialSlotDtypes,
15
+ createSourceTrace,
16
+ hasDefinedPath,
17
+ indexKernelProfiles,
18
+ indexRuntimePatchMeta,
19
+ normalizeRuntimeSessionForExecutionV0,
20
+ resolvePhaseSteps,
21
+ setSourceTrace,
22
+ validateManifestSessionDefaultsContract,
23
+ validatePhaseBoundaryCompatibility,
24
+ validateStepShape,
25
+ validateUniqueStepIds,
26
+ cloneJson,
27
+ } from './execution-v0-contract-helpers.js';
28
+ import {
29
+ buildInlineKernelPath,
30
+ buildLayerPipelineFromExecution,
31
+ buildModelRuntimeOverrides,
32
+ buildSessionRuntimePatch,
33
+ resolveFinitenessFallbackKernelPathId,
34
+ } from './execution-v0-runtime-builders.js';
1009
35
 
1010
36
  export function hasExecutionV0(manifestInference) {
1011
37
  return !!manifestInference?.execution && Array.isArray(manifestInference.execution.steps);
@@ -1017,6 +43,7 @@ export function compileExecutionV0(options = {}) {
1017
43
  return null;
1018
44
  }
1019
45
  assertExecutionV0Schema(manifestInference);
46
+ validateManifestSessionDefaultsContract(manifestInference);
1020
47
 
1021
48
  const modelId = options.modelId ?? 'model';
1022
49
  const numLayers = Number.isInteger(options.numLayers) ? options.numLayers : 0;
@@ -1028,7 +55,8 @@ export function compileExecutionV0(options = {}) {
1028
55
  };
1029
56
  const normalizedRuntimeSession = normalizeRuntimeSessionForExecutionV0(
1030
57
  runtimeInference.session ?? {},
1031
- manifestInference
58
+ manifestInference,
59
+ DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS
1032
60
  );
1033
61
  const sessionDefaults = mergeRuntimeValues(
1034
62
  DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
@@ -1115,14 +143,7 @@ export function compileExecutionV0(options = {}) {
1115
143
  && manifestInference.defaultKernelPath.trim().length > 0
1116
144
  ? manifestInference.defaultKernelPath.trim()
1117
145
  : null;
1118
- const finitenessFallbackKernelPathId = defaultKernelPathId
1119
- ? selectRuleValue(
1120
- 'inference',
1121
- 'kernelPath',
1122
- 'finitenessFallback',
1123
- { kernelPathId: defaultKernelPathId }
1124
- )
1125
- : null;
146
+ const finitenessFallbackKernelPathId = resolveFinitenessFallbackKernelPathId(defaultKernelPathId);
1126
147
 
1127
148
  const kernelPath = buildInlineKernelPath(
1128
149
  patchedSteps,
@@ -1167,13 +188,23 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
1167
188
  }
1168
189
 
1169
190
  const runtimeInference = runtimeConfig.inference ?? {};
191
+ const kernelPathExecution = runtimeInference.kernelPath !== undefined
192
+ ? buildExecutionV0FromKernelPath(runtimeInference.kernelPath)
193
+ : null;
194
+ const manifestInference = kernelPathExecution
195
+ ? {
196
+ ...manifest.inference,
197
+ ...kernelPathExecution,
198
+ defaultKernelPath: runtimeInference.kernelPath,
199
+ }
200
+ : manifest.inference;
1170
201
  const runtimeExecutionOverlay = {
1171
202
  ...(runtimeInference.session ? { session: runtimeInference.session } : {}),
1172
203
  ...(runtimeInference.executionPatch ? { executionPatch: runtimeInference.executionPatch } : {}),
1173
204
  };
1174
205
 
1175
206
  const executionV0State = compileExecutionV0({
1176
- manifestInference: manifest.inference,
207
+ manifestInference,
1177
208
  runtimeInference: runtimeExecutionOverlay,
1178
209
  modelId: options.modelId ?? manifest.modelId ?? 'model',
1179
210
  numLayers: Number.isInteger(options.numLayers)
@@ -1184,7 +215,13 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
1184
215
  return { runtimeConfig, executionV0State: null };
1185
216
  }
1186
217
 
218
+ const compiledKernelPathSource = runtimeInference.kernelPath !== undefined
219
+ ? 'config'
220
+ : 'manifest';
1187
221
  const runtimeInferencePatch = { ...executionV0State.runtimeInferencePatch };
222
+ if (runtimeInferencePatch.kernelPathSource) {
223
+ runtimeInferencePatch.kernelPathSource = compiledKernelPathSource;
224
+ }
1188
225
  if (runtimeInference.kernelPath !== undefined) {
1189
226
  delete runtimeInferencePatch.kernelPath;
1190
227
  delete runtimeInferencePatch.kernelPathSource;
@@ -1195,6 +232,18 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
1195
232
  runtimeInference.modelOverrides ?? {}
1196
233
  );
1197
234
  }
235
+ if (runtimeInference.kernelPath !== undefined && runtimeInference.compute) {
236
+ runtimeInferencePatch.compute = mergeRuntimeValues(
237
+ runtimeInferencePatch.compute ?? {},
238
+ runtimeInference.compute
239
+ );
240
+ }
241
+ if (runtimeInference.kernelPath !== undefined && runtimeInference.kvcache) {
242
+ runtimeInferencePatch.kvcache = mergeRuntimeValues(
243
+ runtimeInferencePatch.kvcache ?? {},
244
+ runtimeInference.kvcache
245
+ );
246
+ }
1198
247
 
1199
248
  return {
1200
249
  runtimeConfig: {