@simulatte/doppler 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +16 -23
  3. package/package.json +14 -1
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +1 -1
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +7 -5
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +12 -2
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +10 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  45. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  46. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  47. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  48. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  49. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  50. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  52. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  54. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  55. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  56. package/src/config/runtime.js +6 -1
  57. package/src/config/schema/debug.schema.d.ts +5 -0
  58. package/src/config/schema/doppler.schema.js +16 -21
  59. package/src/config/schema/inference-defaults.schema.js +3 -3
  60. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  61. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  62. package/src/config/schema/manifest.schema.d.ts +2 -1
  63. package/src/config/schema/manifest.schema.js +16 -3
  64. package/src/config/training-defaults.js +30 -22
  65. package/src/converter/conversion-plan.js +94 -9
  66. package/src/converter/core.d.ts +7 -0
  67. package/src/converter/core.js +14 -9
  68. package/src/converter/execution-v0-manifest.js +4 -1
  69. package/src/converter/index.d.ts +1 -0
  70. package/src/converter/index.js +1 -0
  71. package/src/converter/manifest-inference.js +43 -12
  72. package/src/converter/parsers/diffusion.js +0 -3
  73. package/src/converter/quantization-info.js +35 -15
  74. package/src/converter/shard-packer.d.ts +1 -1
  75. package/src/converter/shard-packer.js +4 -1
  76. package/src/debug/config.js +123 -11
  77. package/src/debug/signals.js +7 -1
  78. package/src/debug/tensor.d.ts +2 -0
  79. package/src/debug/tensor.js +13 -2
  80. package/src/distribution/p2p-control-plane.js +52 -12
  81. package/src/distribution/p2p-observability.js +43 -7
  82. package/src/distribution/p2p-webrtc-browser.js +20 -0
  83. package/src/distribution/shard-delivery.js +77 -26
  84. package/src/formats/gguf/types.js +33 -16
  85. package/src/formats/rdrr/groups.d.ts +12 -4
  86. package/src/formats/rdrr/groups.js +3 -6
  87. package/src/formats/rdrr/parsing.js +39 -2
  88. package/src/formats/rdrr/types.d.ts +2 -1
  89. package/src/gpu/command-recorder.js +86 -61
  90. package/src/gpu/device.d.ts +1 -0
  91. package/src/gpu/device.js +73 -19
  92. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  93. package/src/gpu/kernel-tuner/cache.js +71 -4
  94. package/src/gpu/kernel-tuner/tuner.js +22 -4
  95. package/src/gpu/kernels/attention.js +15 -34
  96. package/src/gpu/kernels/backward/adam.js +62 -58
  97. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  98. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  99. package/src/gpu/kernels/cast.js +191 -149
  100. package/src/gpu/kernels/check-stop.js +33 -44
  101. package/src/gpu/kernels/conv2d.js +27 -17
  102. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  103. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  104. package/src/gpu/kernels/dequant.js +178 -126
  105. package/src/gpu/kernels/energy.d.ts +3 -21
  106. package/src/gpu/kernels/energy.js +111 -88
  107. package/src/gpu/kernels/feature-check.js +1 -1
  108. package/src/gpu/kernels/fused_ffn.js +84 -65
  109. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  110. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  111. package/src/gpu/kernels/gather.js +33 -15
  112. package/src/gpu/kernels/gelu.js +19 -11
  113. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  114. package/src/gpu/kernels/groupnorm.js +34 -23
  115. package/src/gpu/kernels/kv-quantize.js +5 -2
  116. package/src/gpu/kernels/layernorm.js +35 -19
  117. package/src/gpu/kernels/logit-merge.js +5 -3
  118. package/src/gpu/kernels/matmul.js +58 -39
  119. package/src/gpu/kernels/modulate.js +23 -15
  120. package/src/gpu/kernels/moe.js +221 -175
  121. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  122. package/src/gpu/kernels/relu.js +18 -10
  123. package/src/gpu/kernels/repeat_channels.js +25 -17
  124. package/src/gpu/kernels/residual.js +37 -27
  125. package/src/gpu/kernels/rmsnorm.js +57 -41
  126. package/src/gpu/kernels/rope.js +3 -0
  127. package/src/gpu/kernels/sample.js +27 -38
  128. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  129. package/src/gpu/kernels/scale.js +18 -11
  130. package/src/gpu/kernels/shader-cache.js +4 -2
  131. package/src/gpu/kernels/silu.js +120 -72
  132. package/src/gpu/kernels/softmax.js +44 -25
  133. package/src/gpu/kernels/split_qkv.js +23 -13
  134. package/src/gpu/kernels/transpose.js +18 -10
  135. package/src/gpu/kernels/transpose.wgsl +5 -3
  136. package/src/gpu/kernels/upsample2d.js +21 -13
  137. package/src/gpu/kernels/utils.js +20 -13
  138. package/src/gpu/partitioned-buffer-pool.js +10 -2
  139. package/src/gpu/perf-guards.js +2 -9
  140. package/src/gpu/profiler.js +27 -22
  141. package/src/gpu/readback-utils.d.ts +16 -0
  142. package/src/gpu/readback-utils.js +41 -0
  143. package/src/gpu/submit-tracker.js +13 -0
  144. package/src/gpu/uniform-cache.d.ts +1 -0
  145. package/src/gpu/uniform-cache.js +30 -9
  146. package/src/hotswap/intent-bundle.js +6 -0
  147. package/src/hotswap/manifest.d.ts +10 -1
  148. package/src/hotswap/manifest.js +12 -2
  149. package/src/hotswap/runtime.js +30 -8
  150. package/src/index-browser.d.ts +44 -0
  151. package/src/index-browser.js +14 -0
  152. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  153. package/src/inference/browser-harness-contract-helpers.js +28 -0
  154. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  155. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  156. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  157. package/src/inference/browser-harness-model-helpers.js +217 -0
  158. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  159. package/src/inference/browser-harness-report-helpers.js +42 -0
  160. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  161. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  162. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  163. package/src/inference/browser-harness-suite-helpers.js +268 -0
  164. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  165. package/src/inference/browser-harness-text-helpers.js +788 -0
  166. package/src/inference/browser-harness.d.ts +6 -0
  167. package/src/inference/browser-harness.js +130 -1996
  168. package/src/inference/kv-cache/base.js +140 -94
  169. package/src/inference/kv-cache/tiered.js +5 -3
  170. package/src/inference/moe-router.js +88 -56
  171. package/src/inference/multi-model-network.js +5 -3
  172. package/src/inference/network-evolution.d.ts +11 -2
  173. package/src/inference/network-evolution.js +20 -21
  174. package/src/inference/pipelines/context.d.ts +3 -0
  175. package/src/inference/pipelines/context.js +142 -2
  176. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  177. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  178. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  179. package/src/inference/pipelines/diffusion/vae.js +3 -7
  180. package/src/inference/pipelines/energy/pipeline.js +27 -21
  181. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  182. package/src/inference/pipelines/energy/quintel.js +11 -0
  183. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  184. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  185. package/src/inference/pipelines/text/attention/projections.js +151 -101
  186. package/src/inference/pipelines/text/attention/record.js +62 -8
  187. package/src/inference/pipelines/text/attention/run.js +62 -8
  188. package/src/inference/pipelines/text/config.js +3 -4
  189. package/src/inference/pipelines/text/embed.js +2 -8
  190. package/src/inference/pipelines/text/execution-plan.js +41 -19
  191. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  192. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  193. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  194. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  195. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  196. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  197. package/src/inference/pipelines/text/generator-steps.js +298 -207
  198. package/src/inference/pipelines/text/generator.js +6 -23
  199. package/src/inference/pipelines/text/init.js +78 -20
  200. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  201. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  202. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  203. package/src/inference/pipelines/text/layer.js +3 -9
  204. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  205. package/src/inference/pipelines/text/linear-attention.js +80 -6
  206. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  207. package/src/inference/pipelines/text/logits/index.js +10 -11
  208. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  209. package/src/inference/pipelines/text/logits/utils.js +9 -0
  210. package/src/inference/pipelines/text/lora-apply.js +50 -32
  211. package/src/inference/pipelines/text/model-load.js +279 -104
  212. package/src/inference/pipelines/text/moe-cache.js +5 -4
  213. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  214. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  215. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  216. package/src/inference/pipelines/text/ops.js +90 -90
  217. package/src/inference/pipelines/text/probes.js +9 -9
  218. package/src/inference/pipelines/text/weights.js +17 -7
  219. package/src/inference/pipelines/text.js +13 -1
  220. package/src/inference/speculative.d.ts +2 -2
  221. package/src/inference/speculative.js +4 -18
  222. package/src/inference/test-harness.d.ts +1 -1
  223. package/src/inference/test-harness.js +15 -5
  224. package/src/inference/tokenizer.d.ts +0 -5
  225. package/src/inference/tokenizer.js +4 -23
  226. package/src/inference/tokenizers/bpe.js +9 -0
  227. package/src/inference/tokenizers/bundled.js +20 -0
  228. package/src/inference/tokenizers/sentencepiece.js +12 -0
  229. package/src/loader/doppler-loader.js +38 -22
  230. package/src/loader/dtype-utils.js +3 -44
  231. package/src/loader/embedding-loader.js +7 -3
  232. package/src/loader/experts/expert-cache.js +13 -6
  233. package/src/loader/experts/expert-loader.js +10 -6
  234. package/src/loader/final-weights-loader.js +8 -4
  235. package/src/loader/layer-loader.js +2 -1
  236. package/src/loader/loader-state.js +2 -2
  237. package/src/loader/memory-monitor.js +8 -0
  238. package/src/loader/multi-model-loader.d.ts +14 -0
  239. package/src/loader/multi-model-loader.js +70 -24
  240. package/src/loader/shard-cache.js +81 -12
  241. package/src/loader/shard-resolver.js +25 -3
  242. package/src/loader/tensors/tensor-loader.js +209 -144
  243. package/src/loader/tensors/tensor-reader.js +76 -19
  244. package/src/loader/weight-downcast.js +1 -1
  245. package/src/memory/buffer-pool.d.ts +9 -1
  246. package/src/memory/buffer-pool.js +109 -44
  247. package/src/memory/unified-detect.js +1 -1
  248. package/src/rules/inference/kernel-path.rules.json +24 -8
  249. package/src/rules/rule-registry.js +25 -1
  250. package/src/storage/backends/opfs-store.js +68 -24
  251. package/src/storage/downloader.js +364 -83
  252. package/src/storage/index.d.ts +3 -0
  253. package/src/storage/index.js +3 -0
  254. package/src/storage/preflight.d.ts +2 -2
  255. package/src/storage/preflight.js +24 -2
  256. package/src/storage/quickstart-downloader.js +11 -5
  257. package/src/storage/registry.js +10 -4
  258. package/src/storage/reports.js +1 -1
  259. package/src/storage/shard-manager.d.ts +15 -1
  260. package/src/storage/shard-manager.js +51 -3
  261. package/src/storage/source-artifact-store.d.ts +52 -0
  262. package/src/storage/source-artifact-store.js +234 -0
  263. package/src/tooling/command-api-constants.d.ts +9 -0
  264. package/src/tooling/command-api-constants.js +9 -0
  265. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  266. package/src/tooling/command-api-family-normalizers.js +343 -0
  267. package/src/tooling/command-api-helpers.d.ts +25 -0
  268. package/src/tooling/command-api-helpers.js +262 -0
  269. package/src/tooling/command-api.js +16 -602
  270. package/src/tooling/command-envelope.js +4 -1
  271. package/src/tooling/command-runner-shared.js +52 -18
  272. package/src/tooling/lean-execution-contract.js +150 -3
  273. package/src/tooling/node-browser-command-runner.js +161 -271
  274. package/src/tooling/node-command-runner.js +29 -3
  275. package/src/tooling/node-converter.js +27 -1
  276. package/src/tooling/node-source-runtime.d.ts +1 -1
  277. package/src/tooling/node-source-runtime.js +84 -3
  278. package/src/tooling/node-webgpu.js +24 -21
  279. package/src/tooling/opfs-cache.js +21 -4
  280. package/src/tooling/runtime-input-composition.d.ts +38 -0
  281. package/src/tooling/runtime-input-composition.js +86 -0
  282. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  283. package/src/tooling/source-runtime-bundle.js +261 -34
  284. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  285. package/src/tooling/source-runtime-materializer.js +93 -0
  286. package/src/training/attention-backward.js +32 -17
  287. package/src/training/autograd.js +80 -52
  288. package/src/training/checkpoint-watch.d.ts +2 -1
  289. package/src/training/checkpoint-watch.js +39 -6
  290. package/src/training/checkpoint.js +40 -11
  291. package/src/training/clip.js +2 -1
  292. package/src/training/datasets/token-batch.js +20 -8
  293. package/src/training/distillation/checkpoint-watch.js +1 -0
  294. package/src/training/distillation/student-fixture.d.ts +22 -0
  295. package/src/training/distillation/student-fixture.js +846 -0
  296. package/src/training/distillation/suite-data.d.ts +45 -0
  297. package/src/training/distillation/suite-data.js +189 -0
  298. package/src/training/lora-pipeline.js +4 -7
  299. package/src/training/lora.js +26 -12
  300. package/src/training/loss.js +5 -6
  301. package/src/training/objectives/cross_entropy.js +2 -5
  302. package/src/training/objectives/distill_kd.js +4 -8
  303. package/src/training/objectives/distill_triplet.js +4 -8
  304. package/src/training/objectives/ul_stage2_base.js +4 -8
  305. package/src/training/operator-command.js +2 -0
  306. package/src/training/optimizer.js +19 -7
  307. package/src/training/runner.js +2 -1
  308. package/src/training/suite.js +18 -978
  309. package/src/training/tensor-factory.d.ts +9 -0
  310. package/src/training/tensor-factory.js +13 -0
  311. package/src/training/trainer.js +3 -5
  312. package/src/training/ul_dataset.js +3 -5
  313. package/src/training/workloads.js +70 -79
  314. package/src/version.js +1 -1
  315. package/tools/convert-safetensors-node.js +22 -16
  316. package/tools/doppler-cli.js +44 -25
@@ -1,20 +1,13 @@
1
1
  {
2
2
  "id": "kernels/dequant-f32-q4k",
3
3
  "name": "dequant-f32-q4k",
4
- "description": "Runtime alias preset for Gemma 2 Q4K dequant-to-F32 kernel path.",
4
+ "description": "Deprecated alias for kernels/gemma2-q4k-dequant-f32a-nosubgroups.",
5
5
  "intent": "investigate",
6
- "stability": "canonical",
6
+ "stability": "deprecated",
7
7
  "owner": "doppler-core",
8
8
  "createdAtUtc": "2026-02-25T00:00:00Z",
9
- "extends": "default",
10
- "runtime": {
11
- "inference": {
12
- "kernelPath": "gemma2-q4k-dequant-f32a",
13
- "kernelPathPolicy": {
14
- "mode": "capability-aware",
15
- "sourceScope": ["config", "model", "manifest", "execution-v0"],
16
- "onIncompatible": "remap"
17
- }
18
- }
19
- }
9
+ "deprecatedAtUtc": "2026-03-08T00:00:00Z",
10
+ "replacementId": "kernels/gemma2-q4k-dequant-f32a-nosubgroups",
11
+ "extends": "kernels/gemma2-q4k-dequant-f32a-nosubgroups",
12
+ "runtime": {}
20
13
  }
@@ -0,0 +1,37 @@
1
+ {
2
+ "id": "kernels/embeddinggemma-q4k-dequant-f32a",
3
+ "name": "embeddinggemma-q4k-dequant-f32a",
4
+ "description": "Explicit runtime override preset for the EmbeddingGemma Q4K dequant-to-F32 kernel path.",
5
+ "intent": "investigate",
6
+ "stability": "canonical",
7
+ "owner": "doppler-core",
8
+ "createdAtUtc": "2026-03-09T00:00:00Z",
9
+ "extends": "default",
10
+ "runtime": {
11
+ "inference": {
12
+ "compute": {
13
+ "activationDtype": "f32"
14
+ },
15
+ "kvcache": {
16
+ "kvDtype": "f32"
17
+ },
18
+ "session": {
19
+ "compute": {
20
+ "defaults": {
21
+ "activationDtype": "f32",
22
+ "outputDtype": "f32"
23
+ }
24
+ },
25
+ "kvcache": {
26
+ "kvDtype": "f32"
27
+ }
28
+ },
29
+ "kernelPath": "embeddinggemma-q4k-dequant-f32a",
30
+ "kernelPathPolicy": {
31
+ "mode": "capability-aware",
32
+ "sourceScope": ["config", "model", "manifest", "execution-v0"],
33
+ "onIncompatible": "remap"
34
+ }
35
+ }
36
+ }
37
+ }
@@ -1,20 +1,13 @@
1
1
  {
2
2
  "id": "kernels/fused-q4k",
3
3
  "name": "fused-q4k",
4
- "description": "Runtime alias preset for Gemma 2 fused Q4K kernel path.",
4
+ "description": "Deprecated alias for kernels/gemma2-q4k-fused-f32a.",
5
5
  "intent": "investigate",
6
- "stability": "canonical",
6
+ "stability": "deprecated",
7
7
  "owner": "doppler-core",
8
8
  "createdAtUtc": "2026-02-25T00:00:00Z",
9
- "extends": "default",
10
- "runtime": {
11
- "inference": {
12
- "kernelPath": "gemma2-q4k-fused-f32a",
13
- "kernelPathPolicy": {
14
- "mode": "capability-aware",
15
- "sourceScope": ["config", "model", "manifest", "execution-v0"],
16
- "onIncompatible": "remap"
17
- }
18
- }
19
- }
9
+ "deprecatedAtUtc": "2026-03-08T00:00:00Z",
10
+ "replacementId": "kernels/gemma2-q4k-fused-f32a",
11
+ "extends": "kernels/gemma2-q4k-fused-f32a",
12
+ "runtime": {}
20
13
  }
@@ -0,0 +1,33 @@
1
+ {
2
+ "id": "kernels/gemma2-q4k-dequant-f16a",
3
+ "name": "gemma2-q4k-dequant-f16a",
4
+ "description": "Explicit runtime override preset for the Gemma 2 Q4K dequant-to-F16 kernel path.",
5
+ "intent": "investigate",
6
+ "stability": "canonical",
7
+ "owner": "doppler-core",
8
+ "createdAtUtc": "2026-03-08T00:00:00Z",
9
+ "extends": "default",
10
+ "runtime": {
11
+ "inference": {
12
+ "compute": {
13
+ "activationDtype": "f16"
14
+ },
15
+ "kvcache": {
16
+ "kvDtype": "f16"
17
+ },
18
+ "session": {
19
+ "compute": {
20
+ "defaults": {
21
+ "outputDtype": "f16"
22
+ }
23
+ }
24
+ },
25
+ "kernelPath": "gemma2-q4k-dequant-f16a",
26
+ "kernelPathPolicy": {
27
+ "mode": "capability-aware",
28
+ "sourceScope": ["config", "model", "manifest", "execution-v0"],
29
+ "onIncompatible": "remap"
30
+ }
31
+ }
32
+ }
33
+ }
@@ -0,0 +1,33 @@
1
+ {
2
+ "id": "kernels/gemma2-q4k-dequant-f32a-nosubgroups",
3
+ "name": "gemma2-q4k-dequant-f32a-nosubgroups",
4
+ "description": "Explicit runtime override preset for the Gemma 2 Q4K dequant-to-F32 no-subgroups kernel path.",
5
+ "intent": "investigate",
6
+ "stability": "canonical",
7
+ "owner": "doppler-core",
8
+ "createdAtUtc": "2026-03-08T00:00:00Z",
9
+ "extends": "default",
10
+ "runtime": {
11
+ "inference": {
12
+ "compute": {
13
+ "activationDtype": "f32"
14
+ },
15
+ "kvcache": {
16
+ "kvDtype": "f32"
17
+ },
18
+ "session": {
19
+ "compute": {
20
+ "defaults": {
21
+ "outputDtype": "f32"
22
+ }
23
+ }
24
+ },
25
+ "kernelPath": "gemma2-q4k-dequant-f32a-nosubgroups",
26
+ "kernelPathPolicy": {
27
+ "mode": "capability-aware",
28
+ "sourceScope": ["config", "model", "manifest", "execution-v0"],
29
+ "onIncompatible": "remap"
30
+ }
31
+ }
32
+ }
33
+ }
@@ -0,0 +1,33 @@
1
+ {
2
+ "id": "kernels/gemma2-q4k-fused-f32a",
3
+ "name": "gemma2-q4k-fused-f32a",
4
+ "description": "Explicit runtime override preset for the Gemma 2 fused Q4K F32 kernel path.",
5
+ "intent": "investigate",
6
+ "stability": "canonical",
7
+ "owner": "doppler-core",
8
+ "createdAtUtc": "2026-03-08T00:00:00Z",
9
+ "extends": "default",
10
+ "runtime": {
11
+ "inference": {
12
+ "compute": {
13
+ "activationDtype": "f32"
14
+ },
15
+ "kvcache": {
16
+ "kvDtype": "f32"
17
+ },
18
+ "session": {
19
+ "compute": {
20
+ "defaults": {
21
+ "outputDtype": "f32"
22
+ }
23
+ }
24
+ },
25
+ "kernelPath": "gemma2-q4k-fused-f32a",
26
+ "kernelPathPolicy": {
27
+ "mode": "capability-aware",
28
+ "sourceScope": ["config", "model", "manifest", "execution-v0"],
29
+ "onIncompatible": "remap"
30
+ }
31
+ }
32
+ }
33
+ }
@@ -1,20 +1,13 @@
1
1
  {
2
2
  "id": "kernels/safe-q4k",
3
3
  "name": "safe-q4k",
4
- "description": "Runtime alias preset for Gemma 2 Q4K dequant-to-F32 compatibility kernel path.",
4
+ "description": "Deprecated alias for kernels/gemma2-q4k-dequant-f32a-nosubgroups.",
5
5
  "intent": "investigate",
6
- "stability": "canonical",
6
+ "stability": "deprecated",
7
7
  "owner": "doppler-core",
8
8
  "createdAtUtc": "2026-02-25T00:00:00Z",
9
- "extends": "default",
10
- "runtime": {
11
- "inference": {
12
- "kernelPath": "gemma2-q4k-dequant-f32a",
13
- "kernelPathPolicy": {
14
- "mode": "capability-aware",
15
- "sourceScope": ["config", "model", "manifest", "execution-v0"],
16
- "onIncompatible": "remap"
17
- }
18
- }
19
- }
9
+ "deprecatedAtUtc": "2026-03-08T00:00:00Z",
10
+ "replacementId": "kernels/gemma2-q4k-dequant-f32a-nosubgroups",
11
+ "extends": "kernels/gemma2-q4k-dequant-f32a-nosubgroups",
12
+ "runtime": {}
20
13
  }
@@ -6,6 +6,6 @@
6
6
  "stability": "canonical",
7
7
  "owner": "doppler-core",
8
8
  "createdAtUtc": "2026-02-25T00:00:00Z",
9
- "extends": "kernels/dequant-f16-q4k",
9
+ "extends": "kernels/gemma2-q4k-dequant-f16a",
10
10
  "runtime": {}
11
11
  }
@@ -1,5 +1,6 @@
1
1
  import { createDopplerConfig, setKernelThresholds } from './schema/index.js';
2
2
  import { validateRuntimeConfig, validateRuntimeOverrides } from './param-validator.js';
3
+ import { isPlainObject } from '../utils/plain-object.js';
3
4
 
4
5
  let runtimeConfig = createDopplerConfig().runtime;
5
6
  setKernelThresholds(runtimeConfig.shared.kernelThresholds);
@@ -9,12 +10,16 @@ export function getRuntimeConfig() {
9
10
  }
10
11
 
11
12
  export function setRuntimeConfig(overrides) {
12
- if (!overrides) {
13
+ if (overrides === undefined || overrides === null) {
13
14
  runtimeConfig = createDopplerConfig().runtime;
14
15
  setKernelThresholds(runtimeConfig.shared.kernelThresholds);
15
16
  return runtimeConfig;
16
17
  }
17
18
 
19
+ if (!isPlainObject(overrides)) {
20
+ throw new Error('DopplerConfigError: runtime overrides must be an object when provided.');
21
+ }
22
+
18
23
  assertNoDeprecatedRuntimeKeys(overrides);
19
24
  validateRuntimeOverrides(overrides);
20
25
 
@@ -181,6 +181,11 @@ export type ProbeStage =
181
181
  // Attention stages (per-layer)
182
182
  | 'attn_input' // Input to attention (after residual from previous layer)
183
183
  | 'attn_normed' // After input RMSNorm
184
+ | 'linear_qkv_proj' // Linear-attention fused QKV projection output
185
+ | 'linear_z_proj' // Linear-attention z projection output
186
+ | 'linear_a_proj' // Linear-attention a projection output
187
+ | 'linear_b_proj' // Linear-attention b projection output
188
+ | 'linear_core_out' // Linear-attention recurrent core output (before o_proj)
184
189
  | 'q_proj' // Q projection output
185
190
  | 'k_proj' // K projection output
186
191
  | 'v_proj' // V projection output
@@ -5,6 +5,7 @@ import { DEFAULT_EMULATION_CONFIG, createEmulationConfig } from './emulation.sch
5
5
  import { mergeEcosystemConfig } from './ecosystem.schema.js';
6
6
  import {
7
7
  chooseNullish,
8
+ chooseDefined,
8
9
  mergeExecutionPatchLists,
9
10
  mergeKernelPathPolicy,
10
11
  mergeShallowObject,
@@ -31,6 +32,13 @@ export const DEFAULT_DOPPLER_CONFIG = {
31
32
  runtime: DEFAULT_RUNTIME_CONFIG,
32
33
  };
33
34
 
35
+ function cloneConfigTree(value) {
36
+ if (typeof structuredClone === 'function') {
37
+ return structuredClone(value);
38
+ }
39
+ return JSON.parse(JSON.stringify(value));
40
+ }
41
+
34
42
  // =============================================================================
35
43
  // Factory Function
36
44
  // =============================================================================
@@ -39,19 +47,21 @@ export function createDopplerConfig(
39
47
  overrides
40
48
  ) {
41
49
  if (!overrides) {
42
- return { ...DEFAULT_DOPPLER_CONFIG };
50
+ return {
51
+ model: DEFAULT_DOPPLER_CONFIG.model,
52
+ runtime: cloneConfigTree(DEFAULT_RUNTIME_CONFIG),
53
+ };
43
54
  }
44
55
 
45
56
  const runtimeOverrides = overrides.runtime ?? {};
57
+ const runtimeBase = cloneConfigTree(DEFAULT_RUNTIME_CONFIG);
46
58
  const runtime = overrides.runtime
47
- ? mergeRuntimeConfig(DEFAULT_RUNTIME_CONFIG, runtimeOverrides)
48
- : { ...DEFAULT_RUNTIME_CONFIG };
59
+ ? mergeRuntimeConfig(runtimeBase, runtimeOverrides)
60
+ : runtimeBase;
49
61
  const config = {
50
62
  model: overrides.model ?? DEFAULT_DOPPLER_CONFIG.model,
51
63
  runtime,
52
64
  };
53
-
54
- applyCalibrateDefaults(config.runtime, runtimeOverrides);
55
65
  return config;
56
66
  }
57
67
 
@@ -239,7 +249,7 @@ function mergeInferenceConfig(
239
249
  speculative: { ...base.speculative, ...overrides.speculative },
240
250
  generation: { ...base.generation, ...overrides.generation },
241
251
  pipeline: overrides.pipeline ?? base.pipeline,
242
- kernelPath: overrides.kernelPath ?? base.kernelPath,
252
+ kernelPath: chooseDefined(overrides.kernelPath, base.kernelPath),
243
253
  kernelPathSource: overrides.kernelPathSource ?? base.kernelPathSource,
244
254
  kernelPathPolicy: mergeKernelPathPolicy(baseKernelPathPolicy, overrideKernelPathPolicy),
245
255
  chatTemplate: mergeShallowObject(base.chatTemplate, overrides.chatTemplate),
@@ -302,21 +312,6 @@ function mergeDebugConfig(
302
312
  };
303
313
  }
304
314
 
305
- function applyCalibrateDefaults(runtime, runtimeOverrides) {
306
- const intent = runtime?.shared?.tooling?.intent;
307
- if (intent !== 'calibrate') return;
308
-
309
- const warmupOverrides = runtimeOverrides?.shared?.kernelWarmup;
310
- const hasPrewarmOverride = warmupOverrides
311
- && Object.prototype.hasOwnProperty.call(warmupOverrides, 'prewarm');
312
- if (!hasPrewarmOverride) {
313
- runtime.shared.kernelWarmup = {
314
- ...runtime.shared.kernelWarmup,
315
- prewarm: true,
316
- };
317
- }
318
- }
319
-
320
315
  function mergeBenchmarkConfig(
321
316
  base,
322
317
  overrides
@@ -172,9 +172,9 @@ export const DEFAULT_PRESET_INFERENCE_CONFIG = {
172
172
  ropeScalingFactor: 1.0,
173
173
  ropeLocalScalingType: null,
174
174
  ropeLocalScalingFactor: 1.0,
175
- yarnBetaFast: 32,
176
- yarnBetaSlow: 1,
177
- yarnOriginalMaxPos: 4096,
175
+ yarnBetaFast: null,
176
+ yarnBetaSlow: null,
177
+ yarnOriginalMaxPos: null,
178
178
  ropeLocalYarnBetaFast: null,
179
179
  ropeLocalYarnBetaSlow: null,
180
180
  ropeLocalYarnOriginalMaxPos: null,
@@ -151,13 +151,17 @@ export interface KernelPathSchema {
151
151
  export type BuiltinKernelPathId =
152
152
  | 'gemma2-q4k-fused-f32a' // Gemma 2 Q4K weights, fused matmul, F32 activations
153
153
  | 'gemma2-q4k-dequant-f16a' // Gemma 2 Q4K -> F16 dequant, F16 activations
154
+ | 'gemma2-q4k-dequant-f32a-nosubgroups' // Gemma 2 Q4K -> F32 dequant path with no subgroup requirement
155
+ | 'gemma2-q4k-dequant-f32a' // Legacy alias for gemma2-q4k-dequant-f32a-nosubgroups
154
156
  | 'gemma2-f16-f16a' // Gemma 2 F16 weights, F16 activations
155
157
  | 'gemma2-f16-f32a' // Gemma 2 F16 weights, F32 activations
156
158
  | 'gemma3-f16-fused-f16a-online' // Gemma 3 F16 fused FFN online path
157
159
  | 'gemma3-f16-fused-f32a-online' // Gemma 3 F16 fused FFN online path with F32 activations
160
+ | 'gemma3-f16-fused-f32a-online-streamingprefill' // Gemma 3 F16 fused FFN online path with streaming prefill attention
158
161
  | 'gemma3-q4k-dequant-f16a-online' // Gemma 3 Q4K dequant online path (F16 activations)
159
162
  | 'gemma3-q4k-dequant-f32a-online' // Gemma 3 Q4K dequant online path with F32 activations
160
- | 'gemma3-q4k-dequant-f32a' // Gemma 3 Q4K dequant path with F32 activations
163
+ | 'gemma3-q4k-dequant-f32a-nosubgroups' // Gemma 3 Q4K dequant path with no subgroup requirement
164
+ | 'gemma3-q4k-dequant-f32a' // Legacy alias for gemma3-q4k-dequant-f32a-nosubgroups
161
165
  | 'lfm2-q4k-dequant-f32a-online' // LFM2 Q4K path with F32 activations and fast prefill
162
166
  | 'embeddinggemma-f16-f32a' // EmbeddingGemma F16 weights, F32 activations
163
167
  | 'embeddinggemma-f32-f32a' // EmbeddingGemma F32 weights, F32 activations
@@ -155,18 +155,25 @@ export const DEFAULT_KERNEL_THRESHOLDS = {
155
155
  tuner: DEFAULT_TUNER_LIMITS,
156
156
  };
157
157
 
158
+ function cloneThresholdTree(value) {
159
+ if (typeof structuredClone === 'function') {
160
+ return structuredClone(value);
161
+ }
162
+ return JSON.parse(JSON.stringify(value));
163
+ }
164
+
158
165
  // =============================================================================
159
166
  // Runtime Access
160
167
  // =============================================================================
161
168
 
162
- let currentThresholds = { ...DEFAULT_KERNEL_THRESHOLDS };
169
+ let currentThresholds = cloneThresholdTree(DEFAULT_KERNEL_THRESHOLDS);
163
170
 
164
171
  export function getKernelThresholds() {
165
- return currentThresholds;
172
+ return cloneThresholdTree(currentThresholds);
166
173
  }
167
174
 
168
175
  export function setKernelThresholds(overrides) {
169
- currentThresholds = {
176
+ const nextThresholds = {
170
177
  ...currentThresholds,
171
178
  ...overrides,
172
179
  matmul: { ...currentThresholds.matmul, ...overrides.matmul },
@@ -180,8 +187,9 @@ export function setKernelThresholds(overrides) {
180
187
  cast: { ...currentThresholds.cast, ...overrides.cast },
181
188
  tuner: { ...currentThresholds.tuner, ...overrides.tuner },
182
189
  };
190
+ currentThresholds = cloneThresholdTree(nextThresholds);
183
191
  }
184
192
 
185
193
  export function resetKernelThresholds() {
186
- currentThresholds = { ...DEFAULT_KERNEL_THRESHOLDS };
194
+ currentThresholds = cloneThresholdTree(DEFAULT_KERNEL_THRESHOLDS);
187
195
  }
@@ -88,7 +88,8 @@ export interface QuantizationInfoSchema {
88
88
  tts?: QuantizationValue; // TTS decoder
89
89
  projector?: QuantizationValue; // Cross-modal projection layers
90
90
 
91
- // Runtime hints (NOT included in variantTag - these are runtime, not storage)
91
+ // Runtime hints. `compute` may be included in variantTag when artifact naming
92
+ // treats activation dtype as part of the published variant identity.
92
93
  kvCache?: QuantizationValue;
93
94
  compute?: QuantizationValue;
94
95
 
@@ -1,4 +1,5 @@
1
1
  import { MB } from './units.schema.js';
2
+ import { validateRequiredInferenceFields } from '../../inference/pipelines/text/config.js';
2
3
 
3
4
  // =============================================================================
4
5
  // Hash & Versioning
@@ -70,9 +71,9 @@ export const DEFAULT_MANIFEST_INFERENCE = {
70
71
  ropeLocalScalingType: null, // Local scaling policy (null = no scaling)
71
72
  ropeLocalScalingFactor: 1.0,
72
73
  // YARN parameters - only relevant when ropeScalingType='yarn'
73
- yarnBetaFast: 32,
74
- yarnBetaSlow: 1,
75
- yarnOriginalMaxPos: 4096,
74
+ yarnBetaFast: null,
75
+ yarnBetaSlow: null,
76
+ yarnOriginalMaxPos: null,
76
77
  // Local YARN parameters - only relevant when ropeLocalScalingType='yarn'
77
78
  ropeLocalYarnBetaFast: null,
78
79
  ropeLocalYarnBetaSlow: null,
@@ -124,6 +125,18 @@ export function validateManifestInference(
124
125
  `Please re-convert the model using the latest converter.`
125
126
  );
126
127
  }
128
+
129
+ if (manifest.modelType === 'diffusion' || manifest.modelType === 'energy') {
130
+ return;
131
+ }
132
+
133
+ const inference = typeof structuredClone === 'function'
134
+ ? structuredClone(manifest.inference)
135
+ : JSON.parse(JSON.stringify(manifest.inference));
136
+ validateRequiredInferenceFields(
137
+ inference,
138
+ manifest.modelId ?? 'unknown'
139
+ );
127
140
  }
128
141
 
129
142
  export function hasInferenceConfig(
@@ -2,9 +2,17 @@ import { createDopplerConfig, DEFAULT_TRAINING_SETTINGS } from './schema/index.j
2
2
  import { validateDistillTrainingConfig } from './schema/distill-training.schema.js';
3
3
  import { validateUlTrainingConfig } from './schema/ul-training.schema.js';
4
4
 
5
+ function cloneConfigTree(value) {
6
+ if (typeof structuredClone === 'function') {
7
+ return structuredClone(value);
8
+ }
9
+ return JSON.parse(JSON.stringify(value));
10
+ }
11
+
5
12
  function mergeTrainingSettings(base, overrides) {
13
+ const baseConfig = cloneConfigTree(base);
6
14
  if (!overrides) {
7
- const merged = { ...base };
15
+ const merged = baseConfig;
8
16
  validateDistillTrainingConfig(merged.distill);
9
17
  validateUlTrainingConfig(merged.ul);
10
18
  if (merged.distill.enabled === true && merged.ul.enabled === true) {
@@ -14,42 +22,42 @@ function mergeTrainingSettings(base, overrides) {
14
22
  }
15
23
 
16
24
  const merged = {
17
- enabled: overrides.enabled ?? base.enabled,
18
- lora: { ...base.lora, ...overrides.lora },
25
+ enabled: overrides.enabled ?? baseConfig.enabled,
26
+ lora: { ...baseConfig.lora, ...overrides.lora },
19
27
  optimizer: {
20
- ...base.optimizer,
28
+ ...baseConfig.optimizer,
21
29
  ...overrides.optimizer,
22
- scheduler: { ...base.optimizer.scheduler, ...overrides.optimizer?.scheduler },
30
+ scheduler: { ...baseConfig.optimizer.scheduler, ...overrides.optimizer?.scheduler },
23
31
  },
24
- gradient: { ...base.gradient, ...overrides.gradient },
25
- precision: { ...base.precision, ...overrides.precision },
26
- attention: { ...base.attention, ...overrides.attention },
32
+ gradient: { ...baseConfig.gradient, ...overrides.gradient },
33
+ precision: { ...baseConfig.precision, ...overrides.precision },
34
+ attention: { ...baseConfig.attention, ...overrides.attention },
27
35
  telemetry: {
28
- ...base.telemetry,
36
+ ...baseConfig.telemetry,
29
37
  ...overrides.telemetry,
30
38
  alerts: {
31
- ...base.telemetry.alerts,
39
+ ...baseConfig.telemetry.alerts,
32
40
  ...overrides.telemetry?.alerts,
33
41
  thresholds: {
34
- ...base.telemetry.alerts.thresholds,
42
+ ...baseConfig.telemetry.alerts.thresholds,
35
43
  ...overrides.telemetry?.alerts?.thresholds,
36
44
  },
37
45
  },
38
46
  },
39
- lossScaling: { ...base.lossScaling, ...overrides.lossScaling },
47
+ lossScaling: { ...baseConfig.lossScaling, ...overrides.lossScaling },
40
48
  distill: {
41
- ...base.distill,
49
+ ...baseConfig.distill,
42
50
  ...overrides.distill,
43
- freeze: { ...base.distill.freeze, ...overrides.distill?.freeze },
51
+ freeze: { ...baseConfig.distill.freeze, ...overrides.distill?.freeze },
44
52
  },
45
53
  ul: {
46
- ...base.ul,
54
+ ...baseConfig.ul,
47
55
  ...overrides.ul,
48
- noiseSchedule: { ...base.ul.noiseSchedule, ...overrides.ul?.noiseSchedule },
49
- priorAlignment: { ...base.ul.priorAlignment, ...overrides.ul?.priorAlignment },
50
- decoderSigmoidWeight: { ...base.ul.decoderSigmoidWeight, ...overrides.ul?.decoderSigmoidWeight },
51
- lossWeights: { ...base.ul.lossWeights, ...overrides.ul?.lossWeights },
52
- freeze: { ...base.ul.freeze, ...overrides.ul?.freeze },
56
+ noiseSchedule: { ...baseConfig.ul.noiseSchedule, ...overrides.ul?.noiseSchedule },
57
+ priorAlignment: { ...baseConfig.ul.priorAlignment, ...overrides.ul?.priorAlignment },
58
+ decoderSigmoidWeight: { ...baseConfig.ul.decoderSigmoidWeight, ...overrides.ul?.decoderSigmoidWeight },
59
+ lossWeights: { ...baseConfig.ul.lossWeights, ...overrides.ul?.lossWeights },
60
+ freeze: { ...baseConfig.ul.freeze, ...overrides.ul?.freeze },
53
61
  },
54
62
  };
55
63
  validateDistillTrainingConfig(merged.distill);
@@ -74,7 +82,7 @@ export function createTrainingConfig(overrides = {}) {
74
82
 
75
83
  export const DEFAULT_TRAINING_CONFIG = createTrainingConfig();
76
84
 
77
- let trainingConfig = DEFAULT_TRAINING_CONFIG;
85
+ let trainingConfig = createTrainingConfig();
78
86
 
79
87
  export function getTrainingConfig() {
80
88
  return trainingConfig;
@@ -86,6 +94,6 @@ export function setTrainingConfig(overrides) {
86
94
  }
87
95
 
88
96
  export function resetTrainingConfig() {
89
- trainingConfig = DEFAULT_TRAINING_CONFIG;
97
+ trainingConfig = createTrainingConfig();
90
98
  return trainingConfig;
91
99
  }