@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -3,6 +3,7 @@ import { KERNEL_CONFIGS } from '../gpu/kernels/utils.js';
3
3
  import { selectByRules } from '../gpu/kernels/rule-matcher.js';
4
4
  import { loadJson } from '../utils/load-json.js';
5
5
  import { buildKernelPathContractArtifact } from './kernel-path-contract-check.js';
6
+ import { mergeKernelPathPolicy } from './merge-helpers.js';
6
7
 
7
8
  // =============================================================================
8
9
  // Built-in Kernel Paths (imported at build time)
@@ -454,49 +455,17 @@ export function getKernelPathAttentionVariant(
454
455
 
455
456
  let activeKernelPath = null;
456
457
  let activeKernelPathSource = 'none';
457
- let activeKernelPathPolicy = {
458
+ const DEFAULT_ACTIVE_KERNEL_PATH_POLICY = {
458
459
  mode: 'locked',
459
460
  sourceScope: ['model', 'manifest'],
460
461
  onIncompatible: 'error',
461
462
  };
463
+ let activeKernelPathPolicy = DEFAULT_ACTIVE_KERNEL_PATH_POLICY;
462
464
 
463
- function normalizeKernelPathSource(source) {
464
- const normalized = String(source ?? '').trim().toLowerCase();
465
- if (normalized === 'runtime') return 'config';
466
- if (normalized === 'execution_v0') return 'execution-v0';
467
- return normalized;
468
- }
469
-
470
- function normalizeKernelPathPolicy(policy) {
471
- if (!policy || typeof policy !== 'object' || Array.isArray(policy)) {
472
- return {
473
- mode: 'locked',
474
- sourceScope: ['model', 'manifest'],
475
- onIncompatible: 'error',
476
- };
477
- }
478
- const mode = String(policy.mode ?? '').trim().toLowerCase() === 'capability-aware'
479
- ? 'capability-aware'
480
- : 'locked';
481
- const sourceScope = Array.isArray(policy.sourceScope ?? policy.allowSources)
482
- ? (policy.sourceScope ?? policy.allowSources)
483
- .map((source) => normalizeKernelPathSource(source))
484
- .filter((source) => source.length > 0)
485
- : ['model', 'manifest'];
486
- const onIncompatible = String(policy.onIncompatible ?? '').trim().toLowerCase() === 'remap'
487
- ? 'remap'
488
- : 'error';
489
- return {
490
- mode,
491
- sourceScope: sourceScope.length > 0 ? [...new Set(sourceScope)] : ['model', 'manifest'],
492
- onIncompatible,
493
- };
494
- }
495
-
496
- export function setActiveKernelPath(path, source = 'none', policy = null) {
465
+ export function setActiveKernelPath(path, source = 'none', policy = undefined) {
497
466
  activeKernelPath = path;
498
467
  activeKernelPathSource = path ? source : 'none';
499
- activeKernelPathPolicy = normalizeKernelPathPolicy(policy);
468
+ activeKernelPathPolicy = mergeKernelPathPolicy(DEFAULT_ACTIVE_KERNEL_PATH_POLICY, policy);
500
469
  }
501
470
 
502
471
  export function getActiveKernelPath() {
@@ -534,6 +503,19 @@ export function isKernelPathFusedQ4K(path = undefined) {
534
503
  return kernelSteps.some((step) => step.kernel.includes('fused_matmul_q4'));
535
504
  }
536
505
 
506
+ export function kernelPathRequiresF32MatmulWeights(path = undefined) {
507
+ const lookupPath = path === undefined ? activeKernelPath : path;
508
+ if (!lookupPath) return false;
509
+ const kernelSteps = [
510
+ ...(lookupPath.decode?.steps ?? []),
511
+ ...(lookupPath.prefill?.steps ?? []),
512
+ ...(lookupPath.preLayer ?? []),
513
+ ...(lookupPath.postLayer ?? []),
514
+ ...(lookupPath.layerOverrides?.flatMap((override) => override.steps) ?? []),
515
+ ];
516
+ return kernelSteps.some((step) => normalizeKernelFile(step.kernel) === 'matmul_f32.wgsl');
517
+ }
518
+
537
519
  export function isActiveKernelPathFusedQ4K() {
538
520
  return isKernelPathFusedQ4K(activeKernelPath);
539
521
  }
@@ -220,7 +220,7 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
220
220
  "topk.wgsl#main": "a18763303cd18e8a020e647f8a52f65403526849faf835d9f9394f634c3c97eb",
221
221
  "topk.wgsl#softmax_topk": "95ff3517da909e4bd4d0ff8d85b619bd250522943aeb9276375edc59f67e9604",
222
222
  "topk.wgsl#topk_2_small": "289eaa5c4f005e0aaf37dfe5343aeda30d9ab3929979dbf0cc3553f23e136807",
223
- "transpose.wgsl#main": "002bce09c48b63ab5017d83f42233340011ac6fc20dae9cd08e3095ae5bf72b2",
223
+ "transpose.wgsl#main": "8caf8664dfc579b4e92edce50783263c535764006290cc7902108f26586113a2",
224
224
  "upsample2d_f16.wgsl#main": "43cee5f2503cb4b6caea45e9842f8961ce313b02eb8ed23a97d6967113ce521c",
225
225
  "upsample2d.wgsl#main": "6de9172ad3d6940dd3c94470a105755a33760e66a84d6e9e96ec4d6a07dc4a25"
226
226
  });
@@ -4,6 +4,17 @@ let cachedRegistry = null;
4
4
 
5
5
  let registryUrl = null;
6
6
 
7
+ function deepFreeze(value, seen = new WeakSet()) {
8
+ if (!value || typeof value !== 'object' || seen.has(value)) {
9
+ return value;
10
+ }
11
+ seen.add(value);
12
+ for (const entry of Object.values(value)) {
13
+ deepFreeze(entry, seen);
14
+ }
15
+ return Object.freeze(value);
16
+ }
17
+
7
18
  export function setRegistryUrl(url) {
8
19
  registryUrl = url;
9
20
  cachedRegistry = null;
@@ -15,7 +26,9 @@ export async function getRegistry() {
15
26
  }
16
27
 
17
28
  const source = registryUrl || './registry.json';
18
- cachedRegistry = await loadJson(source, import.meta.url, 'Failed to load kernel registry');
29
+ cachedRegistry = deepFreeze(
30
+ await loadJson(source, import.meta.url, 'Failed to load kernel registry')
31
+ );
19
32
  return cachedRegistry;
20
33
  }
21
34
 
@@ -2174,7 +2174,7 @@
2174
2174
  }
2175
2175
  ],
2176
2176
  "baseUniforms": {
2177
- "size": 16,
2177
+ "size": 32,
2178
2178
  "fields": [
2179
2179
  {
2180
2180
  "name": "M",
@@ -2202,7 +2202,9 @@
2202
2202
  1,
2203
2203
  1
2204
2204
  ],
2205
- "requires": []
2205
+ "requires": [
2206
+ "shader-f16"
2207
+ ]
2206
2208
  },
2207
2209
  "f16": {
2208
2210
  "wgsl": "matmul_gemv_residual_f16.wgsl",
@@ -2377,12 +2379,12 @@
2377
2379
  "offset": 20
2378
2380
  },
2379
2381
  {
2380
- "name": "_pad0",
2382
+ "name": "rotary_dim",
2381
2383
  "type": "u32",
2382
2384
  "offset": 24
2383
2385
  },
2384
2386
  {
2385
- "name": "_pad1",
2387
+ "name": "interleaved",
2386
2388
  "type": "u32",
2387
2389
  "offset": 28
2388
2390
  }
@@ -4116,7 +4118,7 @@
4116
4118
  }
4117
4119
  ],
4118
4120
  "baseUniforms": {
4119
- "size": 16,
4121
+ "size": 32,
4120
4122
  "fields": [
4121
4123
  {
4122
4124
  "name": "num_tokens",
@@ -4320,6 +4322,80 @@
4320
4322
  }
4321
4323
  }
4322
4324
  },
4325
+ "split_qg": {
4326
+ "description": "De-interleave Q and Gate projections from q_proj output for attentionOutputGate models",
4327
+ "baseBindings": [
4328
+ {
4329
+ "index": 0,
4330
+ "name": "uniforms",
4331
+ "type": "uniform"
4332
+ },
4333
+ {
4334
+ "index": 1,
4335
+ "name": "qg_interleaved",
4336
+ "type": "read-only-storage"
4337
+ },
4338
+ {
4339
+ "index": 2,
4340
+ "name": "Q",
4341
+ "type": "storage"
4342
+ },
4343
+ {
4344
+ "index": 3,
4345
+ "name": "G",
4346
+ "type": "storage"
4347
+ }
4348
+ ],
4349
+ "baseUniforms": {
4350
+ "size": 16,
4351
+ "fields": [
4352
+ {
4353
+ "name": "num_tokens",
4354
+ "type": "u32",
4355
+ "offset": 0
4356
+ },
4357
+ {
4358
+ "name": "num_heads",
4359
+ "type": "u32",
4360
+ "offset": 4
4361
+ },
4362
+ {
4363
+ "name": "head_dim",
4364
+ "type": "u32",
4365
+ "offset": 8
4366
+ },
4367
+ {
4368
+ "name": "_pad",
4369
+ "type": "u32",
4370
+ "offset": 12
4371
+ }
4372
+ ]
4373
+ },
4374
+ "variants": {
4375
+ "default": {
4376
+ "wgsl": "split_qg.wgsl",
4377
+ "entryPoint": "main",
4378
+ "workgroup": [
4379
+ 256,
4380
+ 1,
4381
+ 1
4382
+ ],
4383
+ "requires": []
4384
+ },
4385
+ "f16": {
4386
+ "wgsl": "split_qg_f16.wgsl",
4387
+ "entryPoint": "main",
4388
+ "workgroup": [
4389
+ 256,
4390
+ 1,
4391
+ 1
4392
+ ],
4393
+ "requires": [
4394
+ "shader-f16"
4395
+ ]
4396
+ }
4397
+ }
4398
+ },
4323
4399
  "sample": {
4324
4400
  "description": "GPU-side sampling kernels",
4325
4401
  "baseBindings": [
@@ -43,7 +43,7 @@ export function resolvePreset(id: string): PresetSchema;
43
43
  export function detectPreset(
44
44
  config: RawModelConfigSchema,
45
45
  architecture?: string
46
- ): string;
46
+ ): string | null;
47
47
 
48
48
  /**
49
49
  * Build a fully resolved config by merging:
@@ -23,6 +23,7 @@ const mambaPreset = await loadJson('./presets/models/mamba.json', import.meta.ur
23
23
  const modernbertPreset = await loadJson('./presets/models/modernbert.json', import.meta.url, 'Failed to load preset');
24
24
  const lfm2Preset = await loadJson('./presets/models/lfm2.json', import.meta.url, 'Failed to load preset');
25
25
  const qwen3Preset = await loadJson('./presets/models/qwen3.json', import.meta.url, 'Failed to load preset');
26
+ const qwen35Preset = await loadJson('./presets/models/qwen3_5.json', import.meta.url, 'Failed to load preset');
26
27
  const kimiK2Preset = await loadJson('./presets/models/kimi-k2.json', import.meta.url, 'Failed to load preset');
27
28
  const gptOssPreset = await loadJson('./presets/models/gpt-oss.json', import.meta.url, 'Failed to load preset');
28
29
 
@@ -46,6 +47,7 @@ export const PRESET_REGISTRY = {
46
47
  modernbert: modernbertPreset,
47
48
  lfm2: lfm2Preset,
48
49
  qwen3: qwen3Preset,
50
+ qwen3_5: qwen35Preset,
49
51
  kimi_k2: kimiK2Preset,
50
52
  gpt_oss: gptOssPreset,
51
53
  };
@@ -97,6 +99,7 @@ export const PRESET_DETECTION_ORDER = [
97
99
  'gemma3',
98
100
  'llama3',
99
101
  'lfm2',
102
+ 'qwen3_5',
100
103
  'qwen3',
101
104
  'kimi_k2',
102
105
  'gpt_oss',
@@ -161,8 +164,7 @@ export function detectPreset(
161
164
  }
162
165
  }
163
166
 
164
- // Default to transformer
165
- return 'transformer';
167
+ return null;
166
168
  }
167
169
 
168
170
  // =============================================================================
@@ -178,6 +180,17 @@ export function resolveConfig(
178
180
  (manifest.config || {}),
179
181
  manifest.modelType
180
182
  );
183
+ if (!id) {
184
+ const modelId = String(manifest?.modelId ?? 'unknown').trim() || 'unknown';
185
+ const modelType = String(manifest?.config?.model_type ?? 'unknown').trim() || 'unknown';
186
+ const architecture = String(manifest?.modelType ?? 'unknown').trim() || 'unknown';
187
+ throw createDopplerError(
188
+ ERROR_CODES.CONFIG_PRESET_UNKNOWN,
189
+ `Could not detect a preset for manifest "${modelId}" ` +
190
+ `(architecture="${architecture}", model_type="${modelType}"). ` +
191
+ 'Provide an explicit presetId instead of relying on the generic transformer fallback.'
192
+ );
193
+ }
181
194
 
182
195
  // Get resolved preset
183
196
  const preset = resolvePreset(id);
@@ -77,6 +77,7 @@ function buildWitnessMergeManifest() {
77
77
  embeddingTranspose: false,
78
78
  embeddingVocabSize: 1024,
79
79
  },
80
+ pipeline: 'decode-only',
80
81
  layerPattern: null,
81
82
  chatTemplate: {
82
83
  type: 'gemma',
@@ -114,6 +115,13 @@ export function buildMergeContractArtifact() {
114
115
  && mergedUndefined._sources.get('inference.defaultKernelPath') === 'manifest',
115
116
  `value=${mergedUndefined.inference.defaultKernelPath}, source=${mergedUndefined._sources.get('inference.defaultKernelPath')}`
116
117
  );
118
+ recordCheck(
119
+ checks,
120
+ 'runtime.mergeConfig.pipeline_preserves_manifest_value',
121
+ mergedUndefined.inference.pipeline === 'decode-only'
122
+ && mergedUndefined._sources.get('inference.pipeline') === 'manifest',
123
+ `value=${String(mergedUndefined.inference.pipeline)}, source=${mergedUndefined._sources.get('inference.pipeline')}`
124
+ );
117
125
 
118
126
  const mergedNull = mergeConfig(buildWitnessMergeManifest(), {
119
127
  defaultKernelPath: null,
@@ -152,6 +160,42 @@ export function buildMergeContractArtifact() {
152
160
  `value=${String(runtimeConfig.runtime.inference.chatTemplate.enabled)}`
153
161
  );
154
162
 
163
+ const isolatedConfigA = createDopplerConfig();
164
+ isolatedConfigA.runtime.inference.compute.activationDtype = 'f32';
165
+ const isolatedConfigB = createDopplerConfig();
166
+ recordCheck(
167
+ checks,
168
+ 'runtime.schema.defaults_are_isolated_per_instance',
169
+ isolatedConfigB.runtime.inference.compute.activationDtype !== 'f32'
170
+ && isolatedConfigA.runtime.inference.compute !== isolatedConfigB.runtime.inference.compute,
171
+ `configA=${isolatedConfigA.runtime.inference.compute.activationDtype}, configB=${isolatedConfigB.runtime.inference.compute.activationDtype}`,
172
+ 'actual'
173
+ );
174
+ recordCheck(
175
+ checks,
176
+ 'runtime.schema.storage.opfs_sync_access_handle_defaults_off',
177
+ isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle === false,
178
+ `value=${String(isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle)}`,
179
+ 'actual'
180
+ );
181
+
182
+ const calibrateConfig = createDopplerConfig({
183
+ runtime: {
184
+ shared: {
185
+ tooling: {
186
+ intent: 'calibrate',
187
+ },
188
+ },
189
+ },
190
+ });
191
+ recordCheck(
192
+ checks,
193
+ 'runtime.schema.calibrate_does_not_mutate_kernel_warmup_defaults',
194
+ calibrateConfig.runtime.shared.kernelWarmup.prewarm === false,
195
+ `prewarm=${String(calibrateConfig.runtime.shared.kernelWarmup.prewarm)}`,
196
+ 'actual'
197
+ );
198
+
155
199
  const overlaySources = new Map();
156
200
  const chosenRuntimeValue = chooseDefinedWithSource(
157
201
  'inference.defaultKernelPath',
@@ -252,6 +296,24 @@ export function buildMergeContractArtifact() {
252
296
  'actual'
253
297
  );
254
298
 
299
+ let invalidShallowOverrideError = null;
300
+ try {
301
+ mergeShallowObject(
302
+ { type: 'base', enabled: true },
303
+ null
304
+ );
305
+ } catch (error) {
306
+ invalidShallowOverrideError = error;
307
+ }
308
+ recordCheck(
309
+ checks,
310
+ 'runtime.mergeShallowObject.invalid_explicit_override_fails_closed',
311
+ invalidShallowOverrideError instanceof Error
312
+ && /shallow object overrides must be plain objects/.test(invalidShallowOverrideError.message),
313
+ `error=${invalidShallowOverrideError?.message ?? 'none'}`,
314
+ 'actual'
315
+ );
316
+
255
317
  const layeredAttention = mergeLayeredShallowObjects(
256
318
  { slidingWindow: 4096, attentionBias: false },
257
319
  { slidingWindow: 2048 },
@@ -273,7 +335,7 @@ export function buildMergeContractArtifact() {
273
335
  onIncompatible: 'error',
274
336
  },
275
337
  {
276
- allowSources: ['runtime', 'execution-v0'],
338
+ allowSources: ['config', 'execution-v0'],
277
339
  onIncompatible: 'remap',
278
340
  }
279
341
  );
@@ -283,7 +345,7 @@ export function buildMergeContractArtifact() {
283
345
  Array.isArray(mergedKernelPathPolicy.sourceScope)
284
346
  && Array.isArray(mergedKernelPathPolicy.allowSources)
285
347
  && mergedKernelPathPolicy.sourceScope.length === 2
286
- && mergedKernelPathPolicy.sourceScope[0] === 'runtime'
348
+ && mergedKernelPathPolicy.sourceScope[0] === 'config'
287
349
  && mergedKernelPathPolicy.allowSources[1] === 'execution-v0'
288
350
  && mergedKernelPathPolicy.onIncompatible === 'remap',
289
351
  `sourceScope=${JSON.stringify(mergedKernelPathPolicy.sourceScope)}, allowSources=${JSON.stringify(mergedKernelPathPolicy.allowSources)}`,
@@ -294,7 +356,7 @@ export function buildMergeContractArtifact() {
294
356
  runtime: {
295
357
  inference: {
296
358
  kernelPathPolicy: {
297
- allowSources: ['runtime', 'execution-v0'],
359
+ allowSources: ['config', 'execution-v0'],
298
360
  },
299
361
  },
300
362
  },
@@ -303,7 +365,7 @@ export function buildMergeContractArtifact() {
303
365
  checks,
304
366
  'runtime.schema.kernelPathPolicy.helper_is_used',
305
367
  Array.isArray(runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope)
306
- && runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope[0] === 'runtime'
368
+ && runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope[0] === 'config'
307
369
  && runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.allowSources[1] === 'execution-v0',
308
370
  `policy=${JSON.stringify(runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy)}`,
309
371
  'actual'
@@ -15,9 +15,14 @@ export function chooseDefinedWithSource(path, overrideValue, fallbackValue, sour
15
15
  }
16
16
 
17
17
  export function mergeShallowObject(base, override) {
18
- if (!override || typeof override !== 'object' || Array.isArray(override)) {
18
+ if (override === undefined) {
19
19
  return base;
20
20
  }
21
+ if (override === null || typeof override !== 'object' || Array.isArray(override)) {
22
+ throw new Error(
23
+ 'DopplerConfigError: shallow object overrides must be plain objects when provided explicitly.'
24
+ );
25
+ }
21
26
  return { ...base, ...override };
22
27
  }
23
28
 
@@ -29,17 +34,133 @@ export function replaceSubtree(overrideValue, fallbackValue) {
29
34
  return chooseNullish(overrideValue, fallbackValue);
30
35
  }
31
36
 
37
+ const DEFAULT_KERNEL_PATH_POLICY = Object.freeze({
38
+ mode: 'locked',
39
+ sourceScope: Object.freeze(['model', 'manifest']),
40
+ onIncompatible: 'error',
41
+ });
42
+
43
+ const VALID_KERNEL_PATH_POLICY_SOURCES = new Set([
44
+ 'model',
45
+ 'manifest',
46
+ 'config',
47
+ 'execution-v0',
48
+ ]);
49
+
50
+ function normalizeKernelPathPolicyMode(value) {
51
+ if (value === undefined) {
52
+ return DEFAULT_KERNEL_PATH_POLICY.mode;
53
+ }
54
+ const normalized = String(value).trim().toLowerCase();
55
+ if (normalized === 'locked' || normalized === 'capability-aware') {
56
+ return normalized;
57
+ }
58
+ throw new Error(
59
+ `DopplerConfigError: runtime.inference.kernelPathPolicy.mode must be "locked" or "capability-aware"; got ${JSON.stringify(value)}.`
60
+ );
61
+ }
62
+
63
+ function normalizeKernelPathPolicySource(source) {
64
+ const normalized = String(source ?? '').trim().toLowerCase();
65
+ if (!normalized) {
66
+ throw new Error(
67
+ 'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be non-empty strings.'
68
+ );
69
+ }
70
+ if (normalized === 'runtime') {
71
+ throw new Error(
72
+ 'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "runtime". Use "config".'
73
+ );
74
+ }
75
+ if (normalized === 'execution_v0') {
76
+ throw new Error(
77
+ 'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "execution_v0". Use "execution-v0".'
78
+ );
79
+ }
80
+ if (!VALID_KERNEL_PATH_POLICY_SOURCES.has(normalized)) {
81
+ throw new Error(
82
+ `DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be model|manifest|config|execution-v0; got ${JSON.stringify(source)}.`
83
+ );
84
+ }
85
+ return normalized;
86
+ }
87
+
88
+ function normalizeKernelPathPolicySourceScope(value) {
89
+ if (value === undefined) {
90
+ return [...DEFAULT_KERNEL_PATH_POLICY.sourceScope];
91
+ }
92
+ if (!Array.isArray(value) || value.length === 0) {
93
+ throw new Error(
94
+ 'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope must be a non-empty array.'
95
+ );
96
+ }
97
+ return [...new Set(value.map((source) => normalizeKernelPathPolicySource(source)))];
98
+ }
99
+
100
+ function normalizeKernelPathPolicyOnIncompatible(value) {
101
+ if (value === undefined) {
102
+ return DEFAULT_KERNEL_PATH_POLICY.onIncompatible;
103
+ }
104
+ const normalized = String(value).trim().toLowerCase();
105
+ if (normalized === 'error' || normalized === 'remap') {
106
+ return normalized;
107
+ }
108
+ throw new Error(
109
+ `DopplerConfigError: runtime.inference.kernelPathPolicy.onIncompatible must be "error" or "remap"; got ${JSON.stringify(value)}.`
110
+ );
111
+ }
112
+
113
+ function assertKernelPathPolicyObject(value, label) {
114
+ if (value === undefined) {
115
+ return;
116
+ }
117
+ if (value === null) {
118
+ throw new Error(`DopplerConfigError: ${label} must not be null.`);
119
+ }
120
+ if (typeof value !== 'object' || Array.isArray(value)) {
121
+ throw new Error(
122
+ `DopplerConfigError: ${label} must be an object.`
123
+ );
124
+ }
125
+ }
126
+
127
+ function assertKernelPathPolicySourceAliasesCompatible(policy, label) {
128
+ if (!policy || policy.sourceScope === undefined || policy.allowSources === undefined) {
129
+ return;
130
+ }
131
+
132
+ const sourceScope = normalizeKernelPathPolicySourceScope(policy.sourceScope);
133
+ const allowSources = normalizeKernelPathPolicySourceScope(policy.allowSources);
134
+ const aliasesMatch = sourceScope.length === allowSources.length
135
+ && sourceScope.every((value, index) => value === allowSources[index]);
136
+
137
+ if (!aliasesMatch) {
138
+ throw new Error(
139
+ `DopplerConfigError: ${label}.sourceScope and ${label}.allowSources must match exactly when both are provided.`
140
+ );
141
+ }
142
+ }
143
+
32
144
  export function mergeKernelPathPolicy(basePolicy, overridePolicy) {
145
+ assertKernelPathPolicyObject(basePolicy, 'runtime.inference.kernelPathPolicy');
146
+ assertKernelPathPolicyObject(overridePolicy, 'runtime.inference.kernelPathPolicy');
147
+ assertKernelPathPolicySourceAliasesCompatible(basePolicy, 'runtime.inference.kernelPathPolicy');
148
+ assertKernelPathPolicySourceAliasesCompatible(overridePolicy, 'runtime.inference.kernelPathPolicy');
33
149
  const base = basePolicy ?? {};
34
150
  const override = overridePolicy ?? {};
35
- const baseSourceScope = base.sourceScope ?? base.allowSources;
36
- const overrideSourceScope = override.sourceScope ?? override.allowSources;
37
- const sourceScope = overrideSourceScope ?? baseSourceScope;
151
+ const sourceScope = normalizeKernelPathPolicySourceScope(
152
+ override.sourceScope
153
+ ?? override.allowSources
154
+ ?? base.sourceScope
155
+ ?? base.allowSources
156
+ );
38
157
  return {
39
- mode: override.mode ?? base.mode,
158
+ mode: normalizeKernelPathPolicyMode(override.mode ?? base.mode),
40
159
  sourceScope,
41
- allowSources: sourceScope,
42
- onIncompatible: override.onIncompatible ?? base.onIncompatible,
160
+ allowSources: [...sourceScope],
161
+ onIncompatible: normalizeKernelPathPolicyOnIncompatible(
162
+ override.onIncompatible ?? base.onIncompatible
163
+ ),
43
164
  };
44
165
  }
45
166
 
@@ -54,6 +54,7 @@ export interface MergedInferenceConfig {
54
54
  ffn: ManifestFFNSchema;
55
55
  rope: ManifestRoPESchema;
56
56
  output: ManifestOutputSchema;
57
+ pipeline: ManifestInferenceSchema['pipeline'];
57
58
  layerPattern: ManifestLayerPatternSchema | null;
58
59
  chatTemplate: ManifestChatTemplateSchema;
59
60
  defaultKernelPath: string | null;
@@ -333,12 +333,22 @@ export function mergeConfig(
333
333
  sources
334
334
  );
335
335
 
336
+ let pipeline = manifestInf.pipeline;
337
+ const runtimePipeline = runtimeOverrides?.pipeline;
338
+ if (runtimePipeline !== undefined) {
339
+ pipeline = runtimePipeline;
340
+ sources.set('inference.pipeline', 'runtime');
341
+ } else {
342
+ sources.set('inference.pipeline', 'manifest');
343
+ }
344
+
336
345
  const inference = {
337
346
  attention: mergeAttention(manifestInf.attention, runtimeOverrides?.attention, sources),
338
347
  normalization: mergeNormalization(manifestInf.normalization, runtimeOverrides?.normalization, sources),
339
348
  ffn: mergeFFN(manifestInf.ffn, runtimeOverrides?.ffn, sources),
340
349
  rope: mergeRoPE(manifestInf.rope, runtimeOverrides?.rope, sources),
341
350
  output: mergeOutput(manifestInf.output, runtimeOverrides?.output, sources),
351
+ pipeline,
342
352
  layerPattern,
343
353
  chatTemplate,
344
354
  defaultKernelPath,