@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -15,10 +15,14 @@ import { KERNEL_CONFIGS } from '../../../gpu/kernels/kernel-configs.js';
15
15
  import { resolveCapabilityKernelPathRef, resolveKernelPathPolicy } from './kernel-path-auto-select.js';
16
16
  import { initTokenizer } from './init.js';
17
17
  import { selectRuleValue } from '../../../rules/rule-registry.js';
18
+ import { mergeRuntimeValues } from '../../../config/runtime-merge.js';
18
19
  import {
19
20
  DEFAULT_BATCHING_DEFAULTS,
21
+ DEFAULT_COMPUTE_DEFAULTS,
20
22
  DEFAULT_GENERATION_CONFIG,
21
23
  } from '../../../config/schema/inference-defaults.schema.js';
24
+ import { DEFAULT_KVCACHE_CONFIG } from '../../../config/schema/kvcache.schema.js';
25
+ import { DEFAULT_EXECUTION_V0_SESSION_DEFAULTS } from '../../../config/schema/execution-v0.schema.js';
22
26
 
23
27
  function validateKernelWarmupMode(mode) {
24
28
  if (mode !== 'parallel' && mode !== 'sequential') {
@@ -48,23 +52,97 @@ function normalizeBoolean(value) {
48
52
  return typeof value === 'boolean' ? value : null;
49
53
  }
50
54
 
55
+ function parseManifestDecodeLoopOptionalPositiveInt(value, label, modelId) {
56
+ if (value === undefined) {
57
+ return undefined;
58
+ }
59
+ if (value === null) {
60
+ return null;
61
+ }
62
+ const normalized = normalizePositiveInt(value);
63
+ if (normalized == null) {
64
+ throw new Error(
65
+ `Manifest "${modelId}" inference.sessionDefaults.decodeLoop.${label} must be a positive integer or null.`
66
+ );
67
+ }
68
+ return normalized;
69
+ }
70
+
71
+ function parseManifestDecodeLoopOptionalBoolean(value, label, modelId) {
72
+ if (value === undefined) {
73
+ return undefined;
74
+ }
75
+ if (typeof value !== 'boolean') {
76
+ throw new Error(
77
+ `Manifest "${modelId}" inference.sessionDefaults.decodeLoop.${label} must be a boolean when provided.`
78
+ );
79
+ }
80
+ return value;
81
+ }
82
+
83
+ function requireGlobalBatchingDefault(value, label) {
84
+ const normalized = normalizePositiveInt(value);
85
+ if (normalized == null) {
86
+ throw new Error(`${label} must be a positive integer.`);
87
+ }
88
+ return normalized;
89
+ }
90
+
91
+ function requireGlobalStopCheckMode(value, label) {
92
+ const normalized = normalizeStopCheckMode(value);
93
+ if (normalized == null) {
94
+ throw new Error(`${label} must be "batch" or "per-token".`);
95
+ }
96
+ return normalized;
97
+ }
98
+
51
99
  const GLOBAL_DEFAULT_BATCHING = Object.freeze({
52
- batchSize: normalizePositiveInt(DEFAULT_BATCHING_DEFAULTS.batchSize) ?? 4,
53
- stopCheckMode: normalizeStopCheckMode(DEFAULT_BATCHING_DEFAULTS.stopCheckMode) ?? 'batch',
54
- readbackInterval: normalizeReadbackInterval(DEFAULT_BATCHING_DEFAULTS.readbackInterval) ?? 1,
100
+ batchSize: requireGlobalBatchingDefault(
101
+ DEFAULT_BATCHING_DEFAULTS.batchSize,
102
+ 'DEFAULT_BATCHING_DEFAULTS.batchSize'
103
+ ),
104
+ stopCheckMode: requireGlobalStopCheckMode(
105
+ DEFAULT_BATCHING_DEFAULTS.stopCheckMode,
106
+ 'DEFAULT_BATCHING_DEFAULTS.stopCheckMode'
107
+ ),
108
+ readbackInterval: requireGlobalBatchingDefault(
109
+ DEFAULT_BATCHING_DEFAULTS.readbackInterval,
110
+ 'DEFAULT_BATCHING_DEFAULTS.readbackInterval'
111
+ ),
112
+ ringTokens: requireGlobalBatchingDefault(
113
+ DEFAULT_BATCHING_DEFAULTS.ringTokens,
114
+ 'DEFAULT_BATCHING_DEFAULTS.ringTokens'
115
+ ),
116
+ ringStop: requireGlobalBatchingDefault(
117
+ DEFAULT_BATCHING_DEFAULTS.ringStop,
118
+ 'DEFAULT_BATCHING_DEFAULTS.ringStop'
119
+ ),
120
+ ringStaging: requireGlobalBatchingDefault(
121
+ DEFAULT_BATCHING_DEFAULTS.ringStaging,
122
+ 'DEFAULT_BATCHING_DEFAULTS.ringStaging'
123
+ ),
55
124
  });
56
125
 
57
126
  const GLOBAL_DEFAULT_GENERATION = Object.freeze({
58
127
  disableCommandBatching: DEFAULT_GENERATION_CONFIG.disableCommandBatching === true,
59
128
  });
60
129
 
130
+ const GLOBAL_DEFAULT_KERNEL_PATH_DTYPES = Object.freeze({
131
+ activationDtype: DEFAULT_COMPUTE_DEFAULTS.activationDtype,
132
+ kvDtype: DEFAULT_KVCACHE_CONFIG.kvDtype,
133
+ outputDtype: DEFAULT_EXECUTION_V0_SESSION_DEFAULTS.compute.defaults.outputDtype,
134
+ });
135
+
61
136
  function isRuntimeBatchingAtGlobalDefaults(batching) {
62
137
  if (!batching || typeof batching !== 'object') {
63
138
  return false;
64
139
  }
65
140
  return normalizePositiveInt(batching.batchSize) === GLOBAL_DEFAULT_BATCHING.batchSize
66
141
  && normalizeStopCheckMode(batching.stopCheckMode) === GLOBAL_DEFAULT_BATCHING.stopCheckMode
67
- && normalizeReadbackInterval(batching.readbackInterval) === GLOBAL_DEFAULT_BATCHING.readbackInterval;
142
+ && normalizeReadbackInterval(batching.readbackInterval) === GLOBAL_DEFAULT_BATCHING.readbackInterval
143
+ && normalizeReadbackInterval(batching.ringTokens) === GLOBAL_DEFAULT_BATCHING.ringTokens
144
+ && normalizeReadbackInterval(batching.ringStop) === GLOBAL_DEFAULT_BATCHING.ringStop
145
+ && normalizeReadbackInterval(batching.ringStaging) === GLOBAL_DEFAULT_BATCHING.ringStaging;
68
146
  }
69
147
 
70
148
  function isRuntimeGenerationAtGlobalDefaults(generation) {
@@ -74,98 +152,130 @@ function isRuntimeGenerationAtGlobalDefaults(generation) {
74
152
  return (generation.disableCommandBatching === true) === GLOBAL_DEFAULT_GENERATION.disableCommandBatching;
75
153
  }
76
154
 
77
- function resolveModelBatchingDefaults(manifest, modelConfig) {
78
- const presetId = String(manifest?.inference?.presetId ?? '').trim().toLowerCase();
79
- const modelType = String(manifest?.modelType ?? '').trim().toLowerCase();
80
- return selectRuleValue('inference', 'execution', 'modelBatchingDefaults', {
81
- modelId: manifest?.modelId ?? null,
82
- presetId: presetId || null,
83
- modelType: modelType || null,
84
- numLayers: Number(modelConfig?.numLayers ?? 0),
85
- hiddenSize: Number(modelConfig?.hiddenSize ?? 0),
86
- });
155
+ function requireManifestDecodeLoopPositiveInt(value, label, modelId) {
156
+ const normalized = normalizePositiveInt(value);
157
+ if (normalized == null) {
158
+ throw new Error(`Manifest "${modelId}" inference.sessionDefaults.decodeLoop.${label} must be a positive integer.`);
159
+ }
160
+ return normalized;
87
161
  }
88
162
 
89
- function resolveManifestDecodeLoopDefaults(manifest) {
163
+ function requireManifestDecodeLoopStopCheckMode(value, modelId) {
164
+ const normalized = normalizeStopCheckMode(value);
165
+ if (normalized == null) {
166
+ throw new Error(
167
+ `Manifest "${modelId}" inference.sessionDefaults.decodeLoop.stopCheckMode must be "batch" or "per-token".`
168
+ );
169
+ }
170
+ return normalized;
171
+ }
172
+
173
+ function buildManifestDecodeLoopRuntimePatch(manifest) {
90
174
  const decodeLoop = manifest?.inference?.sessionDefaults?.decodeLoop;
91
- if (!decodeLoop || typeof decodeLoop !== 'object') {
175
+ if (decodeLoop == null) {
92
176
  return null;
93
177
  }
94
- const batchSize = normalizePositiveInt(decodeLoop.batchSize);
95
- const stopCheckMode = normalizeStopCheckMode(decodeLoop.stopCheckMode);
96
- const readbackInterval = normalizeReadbackInterval(decodeLoop.readbackInterval);
97
- const disableCommandBatching = normalizeBoolean(decodeLoop.disableCommandBatching);
98
- if (batchSize == null || stopCheckMode == null || readbackInterval == null) {
99
- return null;
178
+ const modelId = String(manifest?.modelId ?? 'unknown').trim() || 'unknown';
179
+ if (typeof decodeLoop !== 'object') {
180
+ throw new Error(
181
+ `Manifest "${modelId}" inference.sessionDefaults.decodeLoop must be an object when provided.`
182
+ );
100
183
  }
101
- return {
184
+ const batchSize = requireManifestDecodeLoopPositiveInt(decodeLoop.batchSize, 'batchSize', modelId);
185
+ const stopCheckMode = requireManifestDecodeLoopStopCheckMode(decodeLoop.stopCheckMode, modelId);
186
+ const readbackInterval = requireManifestDecodeLoopPositiveInt(
187
+ decodeLoop.readbackInterval,
188
+ 'readbackInterval',
189
+ modelId
190
+ );
191
+ const disableCommandBatching = parseManifestDecodeLoopOptionalBoolean(
192
+ decodeLoop.disableCommandBatching,
193
+ 'disableCommandBatching',
194
+ modelId
195
+ );
196
+
197
+ const batchingPatch = {
102
198
  batchSize,
103
199
  stopCheckMode,
104
200
  readbackInterval,
105
- ...(disableCommandBatching == null ? {} : { disableCommandBatching }),
201
+ };
202
+ const ringTokens = parseManifestDecodeLoopOptionalPositiveInt(
203
+ decodeLoop.ringTokens,
204
+ 'ringTokens',
205
+ modelId
206
+ );
207
+ if (ringTokens !== undefined) {
208
+ batchingPatch.ringTokens = ringTokens;
209
+ }
210
+ const ringStop = parseManifestDecodeLoopOptionalPositiveInt(
211
+ decodeLoop.ringStop,
212
+ 'ringStop',
213
+ modelId
214
+ );
215
+ if (ringStop !== undefined) {
216
+ batchingPatch.ringStop = ringStop;
217
+ }
218
+ const ringStaging = parseManifestDecodeLoopOptionalPositiveInt(
219
+ decodeLoop.ringStaging,
220
+ 'ringStaging',
221
+ modelId
222
+ );
223
+ if (ringStaging !== undefined) {
224
+ batchingPatch.ringStaging = ringStaging;
225
+ }
226
+
227
+ return {
228
+ batching: batchingPatch,
229
+ generation: disableCommandBatching == null
230
+ ? null
231
+ : { disableCommandBatching: disableCommandBatching === true },
106
232
  };
107
233
  }
108
234
 
109
235
  export function applyModelBatchingRuntimeDefaults(runtimeConfig, manifest, modelConfig) {
236
+ void modelConfig;
237
+ if (manifest?.inference?.schema === 'doppler.execution/v0') {
238
+ return runtimeConfig;
239
+ }
110
240
  const batching = runtimeConfig?.inference?.batching;
111
241
  const generation = runtimeConfig?.inference?.generation;
112
242
  const runtimeBatchingAtDefaults = isRuntimeBatchingAtGlobalDefaults(batching);
113
243
  const runtimeGenerationAtDefaults = isRuntimeGenerationAtGlobalDefaults(generation);
114
244
 
115
- const defaults = resolveManifestDecodeLoopDefaults(manifest)
116
- ?? resolveModelBatchingDefaults(manifest, modelConfig);
117
- if (!defaults || typeof defaults !== 'object') {
245
+ const patch = buildManifestDecodeLoopRuntimePatch(manifest);
246
+ if (!patch) {
118
247
  return runtimeConfig;
119
248
  }
120
249
 
121
- let nextBatching = batching;
122
- let appliedBatching = false;
123
- if (runtimeBatchingAtDefaults) {
124
- const nextBatchSize = normalizePositiveInt(defaults.batchSize);
125
- const nextStopCheckMode = normalizeStopCheckMode(defaults.stopCheckMode);
126
- const nextReadbackInterval = normalizeReadbackInterval(defaults.readbackInterval);
127
- if (nextBatchSize != null && nextStopCheckMode != null && nextReadbackInterval != null) {
128
- nextBatching = {
129
- ...batching,
130
- batchSize: nextBatchSize,
131
- stopCheckMode: nextStopCheckMode,
132
- readbackInterval: nextReadbackInterval,
133
- };
134
- appliedBatching = true;
135
- }
136
- }
137
-
138
- const shouldApplyDisableCommandBatching = runtimeGenerationAtDefaults
139
- && normalizeBoolean(defaults.disableCommandBatching) != null;
140
- const nextGeneration = shouldApplyDisableCommandBatching
141
- ? {
142
- ...generation,
143
- disableCommandBatching: defaults.disableCommandBatching === true,
144
- }
145
- : generation;
146
-
147
- if (!appliedBatching && !shouldApplyDisableCommandBatching) {
148
- return runtimeConfig;
250
+ const runtimeDisableCommandBatching = generation?.disableCommandBatching === true;
251
+ const manifestDisableCommandBatching = patch.generation?.disableCommandBatching === true;
252
+ if (!runtimeBatchingAtDefaults) {
253
+ throw new Error(
254
+ 'Manifest decodeLoop defaults cannot be merged after runtime batching overrides were already resolved. ' +
255
+ 'Set runtime.inference.batching explicitly to the desired final values, or remove manifest.inference.sessionDefaults.decodeLoop.'
256
+ );
149
257
  }
150
-
151
- if (appliedBatching || shouldApplyDisableCommandBatching) {
152
- log.info(
153
- 'Pipeline',
154
- `Model defaults applied (${manifest?.inference?.presetId ?? 'unknown'}): ` +
155
- `batchSize=${nextBatching.batchSize}, stopCheckMode=${nextBatching.stopCheckMode}, ` +
156
- `readbackInterval=${nextBatching.readbackInterval}, ` +
157
- `disableCommandBatching=${nextGeneration.disableCommandBatching === true}`
258
+ if (patch.generation && !runtimeGenerationAtDefaults && runtimeDisableCommandBatching !== manifestDisableCommandBatching) {
259
+ throw new Error(
260
+ 'Manifest decodeLoop.disableCommandBatching conflicts with runtime.inference.generation.disableCommandBatching. ' +
261
+ 'Choose one explicit source of truth.'
158
262
  );
159
263
  }
160
264
 
161
- return {
162
- ...runtimeConfig,
265
+ const nextRuntimeConfig = mergeRuntimeValues(runtimeConfig, {
163
266
  inference: {
164
- ...runtimeConfig.inference,
165
- ...(appliedBatching ? { batching: nextBatching } : {}),
166
- ...(shouldApplyDisableCommandBatching ? { generation: nextGeneration } : {}),
267
+ batching: patch.batching,
268
+ ...(patch.generation ? { generation: patch.generation } : {}),
167
269
  },
168
- };
270
+ });
271
+ log.info(
272
+ 'Pipeline',
273
+ `Manifest decodeLoop applied (${manifest?.modelId ?? 'unknown'}): ` +
274
+ `batchSize=${patch.batching.batchSize}, stopCheckMode=${patch.batching.stopCheckMode}, ` +
275
+ `readbackInterval=${patch.batching.readbackInterval}, ` +
276
+ `disableCommandBatching=${patch.generation?.disableCommandBatching === true}`
277
+ );
278
+ return nextRuntimeConfig;
169
279
  }
170
280
 
171
281
  export async function runKernelWarmup(options) {
@@ -206,7 +316,7 @@ function normalizeKernelPathSourceHint(value) {
206
316
  function resolveKernelPathSource(runtimeConfigKernelPath, runtimeKernelPathSourceHint, modelKernelPath) {
207
317
  if (runtimeConfigKernelPath) {
208
318
  const sourceHint = normalizeKernelPathSourceHint(runtimeKernelPathSourceHint);
209
- if (sourceHint === 'execution-v0') return 'execution-v0';
319
+ if (sourceHint !== 'none') return sourceHint;
210
320
  return 'config';
211
321
  }
212
322
  if (modelKernelPath) return 'model';
@@ -334,7 +444,7 @@ function assertKernelPathFeatureCompatibility(
334
444
 
335
445
  if (kernelPathSource === 'execution-v0' && typeof effectiveKernelPathRef !== 'string') {
336
446
  const remediation = policyAllowsSource
337
- ? 'Execution-v0 inline kernel paths are not auto-remapped yet. Use subgroup/f16-compatible execution steps, or set runtime.inference.kernelPath to a compatible string preset (for example "gemma2-q4k-dequant-f32a").'
447
+ ? 'Execution-v0 inline kernel paths are not auto-remapped yet. Use subgroup/f16-compatible execution steps, or set runtime.inference.kernelPath to a compatible string preset (for example "gemma2-q4k-dequant-f32a-nosubgroups").'
338
448
  : 'Enable runtime.inference.kernelPathPolicy.sourceScope to include "execution-v0", then use compatible execution steps or a compatible preset id.';
339
449
  throw new Error(
340
450
  `[ExecutionV0] Inline kernelPath requires unsupported GPU features. ` +
@@ -366,6 +476,55 @@ function normalizeKernelDtype(value) {
366
476
  });
367
477
  }
368
478
 
479
+ function buildKernelPathDtypeContract(resolvedKernelPath) {
480
+ if (!resolvedKernelPath) {
481
+ return null;
482
+ }
483
+ const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(resolvedKernelPath));
484
+ const outputDtype = normalizeKernelDtype(
485
+ getKernelPathOutputDtype(resolvedKernelPath) ?? activationDtype
486
+ );
487
+ const kvDtype = normalizeKernelDtype(getKernelPathKVDtype(resolvedKernelPath) ?? activationDtype);
488
+ if (!activationDtype && !outputDtype && !kvDtype) {
489
+ return null;
490
+ }
491
+ return {
492
+ activationDtype,
493
+ outputDtype,
494
+ kvDtype,
495
+ };
496
+ }
497
+
498
+ function isGlobalKernelPathDtypeDefault(currentValue, key) {
499
+ if (currentValue == null) {
500
+ return true;
501
+ }
502
+ return currentValue === GLOBAL_DEFAULT_KERNEL_PATH_DTYPES[key];
503
+ }
504
+
505
+ function describeKernelPathDtypeMismatch(contract, current) {
506
+ const mismatches = [];
507
+ if (contract.activationDtype && current.activationDtype !== contract.activationDtype) {
508
+ mismatches.push(
509
+ `runtime.inference.compute.activationDtype=${current.activationDtype ?? 'unset'} ` +
510
+ `(expected ${contract.activationDtype})`
511
+ );
512
+ }
513
+ if (contract.kvDtype && current.kvDtype !== contract.kvDtype) {
514
+ mismatches.push(
515
+ `runtime.inference.kvcache.kvDtype=${current.kvDtype ?? 'unset'} ` +
516
+ `(expected ${contract.kvDtype})`
517
+ );
518
+ }
519
+ if (contract.outputDtype && current.outputDtype !== contract.outputDtype) {
520
+ mismatches.push(
521
+ `runtime.inference.session.compute.defaults.outputDtype=${current.outputDtype ?? 'unset'} ` +
522
+ `(expected ${contract.outputDtype})`
523
+ );
524
+ }
525
+ return mismatches;
526
+ }
527
+
369
528
  function assertManifestKernelPathDtypeCompatibility(manifest, resolvedKernelPath, kernelPathSource) {
370
529
  if (!resolvedKernelPath) return;
371
530
  if (kernelPathSource === 'config') return;
@@ -376,16 +535,6 @@ function assertManifestKernelPathDtypeCompatibility(manifest, resolvedKernelPath
376
535
  if (!manifestCompute || !kernelActivation) return;
377
536
  if (manifestCompute === kernelActivation) return;
378
537
 
379
- const presetId = String(manifest?.inference?.presetId ?? '').trim().toLowerCase();
380
- if (presetId === 'lfm2' && manifestCompute === 'f32' && kernelActivation === 'f16') {
381
- log.warn(
382
- 'Pipeline',
383
- `Manifest "${manifest?.modelId ?? 'unknown'}" uses quantizationInfo.compute=f32 ` +
384
- `with kernelPath activationDtype=f16 (${resolvedKernelPath.id}); continuing for LFM2 mixed-precision compatibility.`
385
- );
386
- return;
387
- }
388
-
389
538
  throw new Error(
390
539
  `Manifest kernel path dtype mismatch for "${manifest?.modelId ?? 'unknown'}": ` +
391
540
  `quantizationInfo.compute=${manifestCompute} but ` +
@@ -402,17 +551,45 @@ function getKernelCapabilitiesSafe() {
402
551
  }
403
552
  }
404
553
 
405
- function applyKernelPathRuntimeDtypeOverrides(resolvedKernelPath, runtimeConfig) {
406
- const kernelPathActivationDtype = getKernelPathActivationDtype(resolvedKernelPath);
407
- const kernelPathOutputDtype = getKernelPathOutputDtype(resolvedKernelPath) ?? kernelPathActivationDtype;
408
- const kernelPathKVDtype = getKernelPathKVDtype(resolvedKernelPath);
409
- if (!kernelPathActivationDtype && !kernelPathOutputDtype && !kernelPathKVDtype) {
554
+ function applyKernelPathRuntimeDtypeContract(resolvedKernelPath, runtimeConfig, kernelPathSource, modelId) {
555
+ const contract = buildKernelPathDtypeContract(resolvedKernelPath);
556
+ if (!contract) {
410
557
  return runtimeConfig;
411
558
  }
412
559
 
413
- const currentActivation = runtimeConfig.inference.compute.activationDtype;
414
- const currentKV = runtimeConfig.inference.kvcache.kvDtype;
415
- const currentOutput = runtimeConfig.inference?.session?.compute?.defaults?.outputDtype;
560
+ const current = {
561
+ activationDtype: normalizeKernelDtype(runtimeConfig.inference?.compute?.activationDtype),
562
+ kvDtype: normalizeKernelDtype(runtimeConfig.inference?.kvcache?.kvDtype),
563
+ outputDtype: normalizeKernelDtype(runtimeConfig.inference?.session?.compute?.defaults?.outputDtype),
564
+ };
565
+ const mismatches = describeKernelPathDtypeMismatch(contract, current);
566
+ if (mismatches.length === 0) {
567
+ return runtimeConfig;
568
+ }
569
+
570
+ if (kernelPathSource === 'config' || kernelPathSource === 'execution-v0') {
571
+ throw new Error(
572
+ `KernelPath "${resolvedKernelPath?.id ?? 'unknown'}" selected from ${kernelPathSource} ` +
573
+ `requires explicit matching runtime dtypes for "${modelId}". ` +
574
+ `Mismatches: ${mismatches.join('; ')}. ` +
575
+ 'Set runtime.inference.compute.activationDtype, runtime.inference.kvcache.kvDtype, ' +
576
+ 'and runtime.inference.session.compute.defaults.outputDtype to match the kernel path.'
577
+ );
578
+ }
579
+
580
+ const canApplyManifestDefaults = (
581
+ (contract.activationDtype == null || isGlobalKernelPathDtypeDefault(current.activationDtype, 'activationDtype'))
582
+ && (contract.kvDtype == null || isGlobalKernelPathDtypeDefault(current.kvDtype, 'kvDtype'))
583
+ && (contract.outputDtype == null || isGlobalKernelPathDtypeDefault(current.outputDtype, 'outputDtype'))
584
+ );
585
+ if (!canApplyManifestDefaults) {
586
+ throw new Error(
587
+ `Manifest/model kernelPath "${resolvedKernelPath?.id ?? 'unknown'}" for "${modelId}" ` +
588
+ `conflicts with runtime dtype overrides. Mismatches: ${mismatches.join('; ')}. ` +
589
+ 'Either remove the runtime dtype override or set it to match the kernel path.'
590
+ );
591
+ }
592
+
416
593
  const nextInference = {
417
594
  ...runtimeConfig.inference,
418
595
  compute: { ...runtimeConfig.inference.compute },
@@ -420,37 +597,33 @@ function applyKernelPathRuntimeDtypeOverrides(resolvedKernelPath, runtimeConfig)
420
597
  };
421
598
  const dtypeChanges = [];
422
599
 
423
- if (kernelPathActivationDtype && currentActivation !== kernelPathActivationDtype) {
424
- nextInference.compute.activationDtype = kernelPathActivationDtype;
425
- dtypeChanges.push(`activation=${currentActivation}->${kernelPathActivationDtype}`);
600
+ if (contract.activationDtype && current.activationDtype !== contract.activationDtype) {
601
+ nextInference.compute.activationDtype = contract.activationDtype;
602
+ dtypeChanges.push(`activation=${current.activationDtype ?? 'unset'}->${contract.activationDtype}`);
426
603
  }
427
604
 
428
- if (kernelPathKVDtype && currentKV !== kernelPathKVDtype) {
429
- nextInference.kvcache.kvDtype = kernelPathKVDtype;
430
- dtypeChanges.push(`kv=${currentKV}->${kernelPathKVDtype}`);
605
+ if (contract.kvDtype && current.kvDtype !== contract.kvDtype) {
606
+ nextInference.kvcache.kvDtype = contract.kvDtype;
607
+ dtypeChanges.push(`kv=${current.kvDtype ?? 'unset'}->${contract.kvDtype}`);
431
608
  }
432
609
 
433
- if (kernelPathOutputDtype && currentOutput !== kernelPathOutputDtype) {
610
+ if (contract.outputDtype && current.outputDtype !== contract.outputDtype) {
434
611
  nextInference.session = {
435
612
  ...(nextInference.session ?? {}),
436
613
  compute: {
437
614
  ...(nextInference.session?.compute ?? {}),
438
615
  defaults: {
439
616
  ...(nextInference.session?.compute?.defaults ?? {}),
440
- outputDtype: kernelPathOutputDtype,
617
+ outputDtype: contract.outputDtype,
441
618
  },
442
619
  },
443
620
  };
444
- dtypeChanges.push(`session.outputDtype=${currentOutput ?? 'undefined'}->${kernelPathOutputDtype}`);
445
- }
446
-
447
- if (dtypeChanges.length === 0) {
448
- return runtimeConfig;
621
+ dtypeChanges.push(`session.outputDtype=${current.outputDtype ?? 'unset'}->${contract.outputDtype}`);
449
622
  }
450
623
 
451
624
  log.info(
452
625
  'Pipeline',
453
- `KernelPath ${resolvedKernelPath?.id ?? 'unknown'} runtime dtype overrides: ${dtypeChanges.join(', ')}`
626
+ `KernelPath ${resolvedKernelPath?.id ?? 'unknown'} applied manifest/model runtime dtype defaults: ${dtypeChanges.join(', ')}`
454
627
  );
455
628
  return { ...runtimeConfig, inference: nextInference };
456
629
  }
@@ -521,7 +694,12 @@ export function resolveKernelPathState(options) {
521
694
  log.info('Pipeline', 'KernelPath: none (no kernel path configured)');
522
695
  }
523
696
 
524
- const nextRuntimeConfig = applyKernelPathRuntimeDtypeOverrides(resolvedKernelPath, runtimeConfig);
697
+ const nextRuntimeConfig = applyKernelPathRuntimeDtypeContract(
698
+ resolvedKernelPath,
699
+ runtimeConfig,
700
+ kernelPathSource,
701
+ String(manifest?.modelId ?? 'unknown').trim() || 'unknown'
702
+ );
525
703
  return {
526
704
  resolvedKernelPath,
527
705
  kernelPathSource,
@@ -1,5 +1,6 @@
1
1
  import { getRuntimeConfig } from '../../../config/runtime.js';
2
2
  import { QK_K } from '../../../config/schema/index.js';
3
+ import { releaseBuffer } from '../../../memory/buffer-pool.js';
3
4
 
4
5
  const dequantCache = new Map();
5
6
  let dequantCacheMaxEntriesOverride = null;
@@ -73,8 +74,8 @@ export function setCachedDequant(layerIdx, expertIdx, outputDtype, gateUp, down)
73
74
  if (oldestKey) {
74
75
  const evicted = dequantCache.get(oldestKey);
75
76
  if (evicted) {
76
- evicted.gateUp.destroy();
77
- evicted.down.destroy();
77
+ releaseBuffer(evicted.gateUp);
78
+ releaseBuffer(evicted.down);
78
79
  }
79
80
  dequantCache.delete(oldestKey);
80
81
  }
@@ -85,8 +86,8 @@ export function setCachedDequant(layerIdx, expertIdx, outputDtype, gateUp, down)
85
86
 
86
87
  export function clearDequantCache() {
87
88
  for (const cached of dequantCache.values()) {
88
- cached.gateUp.destroy();
89
- cached.down.destroy();
89
+ releaseBuffer(cached.gateUp);
90
+ releaseBuffer(cached.down);
90
91
  }
91
92
  dequantCache.clear();
92
93
  dequantCacheHits = 0;