@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +25 -17
  3. package/package.json +20 -4
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +39 -39
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +49 -7
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +43 -4
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +28 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/models/qwen3.json +9 -2
  45. package/src/config/presets/models/transformer.json +5 -0
  46. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  47. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  48. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  49. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  50. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  52. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  54. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  55. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  56. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  57. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  58. package/src/config/required-inference-fields-contract-check.js +6 -0
  59. package/src/config/runtime.js +6 -1
  60. package/src/config/schema/debug.schema.d.ts +5 -0
  61. package/src/config/schema/doppler.schema.js +16 -21
  62. package/src/config/schema/inference-defaults.schema.js +6 -3
  63. package/src/config/schema/inference.schema.d.ts +9 -0
  64. package/src/config/schema/kernel-path.schema.d.ts +11 -1
  65. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  66. package/src/config/schema/manifest.schema.d.ts +8 -1
  67. package/src/config/schema/manifest.schema.js +19 -3
  68. package/src/config/training-defaults.js +30 -22
  69. package/src/converter/conversion-plan.js +94 -9
  70. package/src/converter/core.d.ts +7 -0
  71. package/src/converter/core.js +14 -9
  72. package/src/converter/execution-v0-manifest.js +4 -1
  73. package/src/converter/index.d.ts +1 -0
  74. package/src/converter/index.js +1 -0
  75. package/src/converter/manifest-inference.js +43 -12
  76. package/src/converter/parsers/diffusion.js +0 -3
  77. package/src/converter/quantization-info.js +35 -15
  78. package/src/converter/rope-config.js +42 -0
  79. package/src/converter/shard-packer.d.ts +1 -1
  80. package/src/converter/shard-packer.js +4 -1
  81. package/src/debug/config.js +123 -11
  82. package/src/debug/signals.js +7 -1
  83. package/src/debug/tensor.d.ts +2 -0
  84. package/src/debug/tensor.js +13 -2
  85. package/src/distribution/p2p-control-plane.js +52 -12
  86. package/src/distribution/p2p-observability.js +43 -7
  87. package/src/distribution/p2p-webrtc-browser.js +20 -0
  88. package/src/distribution/shard-delivery.js +77 -26
  89. package/src/formats/gguf/types.js +33 -16
  90. package/src/formats/rdrr/groups.d.ts +12 -4
  91. package/src/formats/rdrr/groups.js +3 -6
  92. package/src/formats/rdrr/parsing.js +39 -2
  93. package/src/formats/rdrr/types.d.ts +2 -1
  94. package/src/gpu/command-recorder.js +86 -61
  95. package/src/gpu/device.d.ts +1 -0
  96. package/src/gpu/device.js +131 -19
  97. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  98. package/src/gpu/kernel-tuner/cache.js +71 -4
  99. package/src/gpu/kernel-tuner/tuner.js +22 -4
  100. package/src/gpu/kernels/attention.js +113 -34
  101. package/src/gpu/kernels/backward/adam.js +62 -58
  102. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  103. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  104. package/src/gpu/kernels/bias_add.wgsl +8 -6
  105. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  106. package/src/gpu/kernels/cast.js +191 -149
  107. package/src/gpu/kernels/check-stop.js +33 -44
  108. package/src/gpu/kernels/conv2d.js +27 -17
  109. package/src/gpu/kernels/conv2d.wgsl +7 -8
  110. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  111. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  112. package/src/gpu/kernels/depthwise_conv2d.js +37 -26
  113. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  114. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  115. package/src/gpu/kernels/dequant.js +178 -126
  116. package/src/gpu/kernels/energy.d.ts +3 -21
  117. package/src/gpu/kernels/energy.js +111 -88
  118. package/src/gpu/kernels/feature-check.js +1 -1
  119. package/src/gpu/kernels/fused_ffn.js +84 -65
  120. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  121. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  122. package/src/gpu/kernels/gather.js +33 -15
  123. package/src/gpu/kernels/gelu.js +19 -11
  124. package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
  125. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  126. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  127. package/src/gpu/kernels/groupnorm.js +34 -23
  128. package/src/gpu/kernels/kv-quantize.js +5 -2
  129. package/src/gpu/kernels/layernorm.js +35 -19
  130. package/src/gpu/kernels/logit-merge.js +5 -3
  131. package/src/gpu/kernels/matmul.js +83 -39
  132. package/src/gpu/kernels/modulate.js +23 -15
  133. package/src/gpu/kernels/moe.js +221 -175
  134. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  135. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  136. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  137. package/src/gpu/kernels/relu.js +31 -10
  138. package/src/gpu/kernels/relu.wgsl +2 -1
  139. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  140. package/src/gpu/kernels/repeat_channels.js +25 -17
  141. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  142. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  143. package/src/gpu/kernels/residual.js +69 -23
  144. package/src/gpu/kernels/residual.wgsl +6 -3
  145. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  146. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  147. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  148. package/src/gpu/kernels/rmsnorm.js +96 -28
  149. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  150. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  151. package/src/gpu/kernels/rope.d.ts +2 -0
  152. package/src/gpu/kernels/rope.js +14 -1
  153. package/src/gpu/kernels/rope.wgsl +56 -40
  154. package/src/gpu/kernels/sample.js +27 -38
  155. package/src/gpu/kernels/sana_linear_attention.js +19 -12
  156. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  157. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  158. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  159. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  160. package/src/gpu/kernels/scale.js +18 -11
  161. package/src/gpu/kernels/shader-cache.js +4 -2
  162. package/src/gpu/kernels/silu.d.ts +1 -0
  163. package/src/gpu/kernels/silu.js +148 -82
  164. package/src/gpu/kernels/silu.wgsl +19 -9
  165. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  166. package/src/gpu/kernels/softmax.js +44 -25
  167. package/src/gpu/kernels/split_qkv.js +23 -13
  168. package/src/gpu/kernels/transpose.js +31 -10
  169. package/src/gpu/kernels/transpose.wgsl +6 -5
  170. package/src/gpu/kernels/upsample2d.js +22 -13
  171. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  172. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  173. package/src/gpu/kernels/utils.js +35 -13
  174. package/src/gpu/partitioned-buffer-pool.js +10 -2
  175. package/src/gpu/perf-guards.js +2 -9
  176. package/src/gpu/profiler.js +27 -22
  177. package/src/gpu/readback-utils.d.ts +16 -0
  178. package/src/gpu/readback-utils.js +41 -0
  179. package/src/gpu/submit-tracker.js +13 -0
  180. package/src/gpu/uniform-cache.d.ts +1 -0
  181. package/src/gpu/uniform-cache.js +30 -9
  182. package/src/hotswap/intent-bundle.js +6 -0
  183. package/src/hotswap/manifest.d.ts +10 -1
  184. package/src/hotswap/manifest.js +12 -2
  185. package/src/hotswap/runtime.js +30 -8
  186. package/src/index-browser.d.ts +44 -0
  187. package/src/index-browser.js +14 -0
  188. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  189. package/src/inference/browser-harness-contract-helpers.js +28 -0
  190. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  191. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  192. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  193. package/src/inference/browser-harness-model-helpers.js +217 -0
  194. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  195. package/src/inference/browser-harness-report-helpers.js +42 -0
  196. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  197. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  198. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  199. package/src/inference/browser-harness-suite-helpers.js +268 -0
  200. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  201. package/src/inference/browser-harness-text-helpers.js +788 -0
  202. package/src/inference/browser-harness.d.ts +6 -0
  203. package/src/inference/browser-harness.js +130 -1950
  204. package/src/inference/kv-cache/base.js +140 -94
  205. package/src/inference/kv-cache/tiered.js +5 -3
  206. package/src/inference/moe-router.js +88 -56
  207. package/src/inference/multi-model-network.js +5 -3
  208. package/src/inference/network-evolution.d.ts +11 -2
  209. package/src/inference/network-evolution.js +20 -21
  210. package/src/inference/pipelines/context.d.ts +3 -0
  211. package/src/inference/pipelines/context.js +142 -2
  212. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  213. package/src/inference/pipelines/diffusion/pipeline.js +17 -7
  214. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  215. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  216. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  217. package/src/inference/pipelines/diffusion/vae.js +3 -7
  218. package/src/inference/pipelines/energy/pipeline.js +27 -21
  219. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  220. package/src/inference/pipelines/energy/quintel.js +11 -0
  221. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  222. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  223. package/src/inference/pipelines/text/attention/projections.js +151 -101
  224. package/src/inference/pipelines/text/attention/record.js +73 -10
  225. package/src/inference/pipelines/text/attention/run.js +73 -10
  226. package/src/inference/pipelines/text/chat-format.js +25 -1
  227. package/src/inference/pipelines/text/config.d.ts +4 -0
  228. package/src/inference/pipelines/text/config.js +71 -5
  229. package/src/inference/pipelines/text/embed.js +2 -8
  230. package/src/inference/pipelines/text/execution-plan.js +64 -50
  231. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  232. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  233. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  234. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  235. package/src/inference/pipelines/text/execution-v0.js +78 -1002
  236. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  237. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  238. package/src/inference/pipelines/text/generator-steps.js +298 -207
  239. package/src/inference/pipelines/text/generator.js +6 -23
  240. package/src/inference/pipelines/text/init.d.ts +4 -0
  241. package/src/inference/pipelines/text/init.js +134 -29
  242. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  243. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  244. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  245. package/src/inference/pipelines/text/layer.js +14 -9
  246. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  247. package/src/inference/pipelines/text/linear-attention.js +80 -6
  248. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  249. package/src/inference/pipelines/text/logits/index.js +10 -11
  250. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  251. package/src/inference/pipelines/text/logits/utils.js +9 -0
  252. package/src/inference/pipelines/text/lora-apply.js +50 -32
  253. package/src/inference/pipelines/text/model-load.js +279 -104
  254. package/src/inference/pipelines/text/moe-cache.js +5 -4
  255. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  256. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  257. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  258. package/src/inference/pipelines/text/ops.js +90 -90
  259. package/src/inference/pipelines/text/probes.js +9 -9
  260. package/src/inference/pipelines/text/weights.js +17 -7
  261. package/src/inference/pipelines/text.js +17 -1
  262. package/src/inference/speculative.d.ts +2 -2
  263. package/src/inference/speculative.js +4 -18
  264. package/src/inference/test-harness.d.ts +1 -1
  265. package/src/inference/test-harness.js +15 -5
  266. package/src/inference/tokenizer.d.ts +0 -5
  267. package/src/inference/tokenizer.js +4 -23
  268. package/src/inference/tokenizers/bpe.js +9 -0
  269. package/src/inference/tokenizers/bundled.js +176 -33
  270. package/src/inference/tokenizers/sentencepiece.js +12 -0
  271. package/src/loader/doppler-loader.js +38 -22
  272. package/src/loader/dtype-utils.js +3 -44
  273. package/src/loader/embedding-loader.js +7 -3
  274. package/src/loader/experts/expert-cache.js +13 -6
  275. package/src/loader/experts/expert-loader.js +10 -6
  276. package/src/loader/final-weights-loader.js +8 -4
  277. package/src/loader/layer-loader.js +2 -1
  278. package/src/loader/loader-state.js +2 -2
  279. package/src/loader/memory-monitor.js +8 -0
  280. package/src/loader/multi-model-loader.d.ts +14 -0
  281. package/src/loader/multi-model-loader.js +70 -24
  282. package/src/loader/shard-cache.js +81 -12
  283. package/src/loader/shard-resolver.js +25 -3
  284. package/src/loader/tensors/tensor-loader.js +209 -144
  285. package/src/loader/tensors/tensor-reader.js +76 -19
  286. package/src/loader/weight-downcast.js +1 -1
  287. package/src/memory/buffer-pool.d.ts +9 -1
  288. package/src/memory/buffer-pool.js +109 -44
  289. package/src/memory/unified-detect.js +1 -1
  290. package/src/rules/inference/kernel-path.rules.json +24 -8
  291. package/src/rules/rule-registry.js +25 -1
  292. package/src/rules/tooling/command-runtime.rules.json +18 -0
  293. package/src/storage/backends/opfs-store.js +68 -24
  294. package/src/storage/downloader.js +364 -83
  295. package/src/storage/index.d.ts +3 -0
  296. package/src/storage/index.js +3 -0
  297. package/src/storage/preflight.d.ts +2 -2
  298. package/src/storage/preflight.js +24 -2
  299. package/src/storage/quickstart-downloader.js +11 -5
  300. package/src/storage/registry.js +10 -4
  301. package/src/storage/reports.js +1 -1
  302. package/src/storage/shard-manager.d.ts +15 -1
  303. package/src/storage/shard-manager.js +51 -3
  304. package/src/storage/source-artifact-store.d.ts +52 -0
  305. package/src/storage/source-artifact-store.js +234 -0
  306. package/src/tooling/command-api-constants.d.ts +9 -0
  307. package/src/tooling/command-api-constants.js +9 -0
  308. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  309. package/src/tooling/command-api-family-normalizers.js +343 -0
  310. package/src/tooling/command-api-helpers.d.ts +25 -0
  311. package/src/tooling/command-api-helpers.js +262 -0
  312. package/src/tooling/command-api.d.ts +27 -1
  313. package/src/tooling/command-api.js +26 -473
  314. package/src/tooling/command-envelope.js +4 -1
  315. package/src/tooling/command-runner-shared.js +52 -18
  316. package/src/tooling/lean-execution-contract.js +150 -3
  317. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  318. package/src/tooling/node-browser-command-runner.js +218 -273
  319. package/src/tooling/node-command-runner.js +44 -3
  320. package/src/tooling/node-converter.js +27 -1
  321. package/src/tooling/node-source-runtime.d.ts +1 -1
  322. package/src/tooling/node-source-runtime.js +84 -3
  323. package/src/tooling/node-webgpu.js +30 -105
  324. package/src/tooling/opfs-cache.js +21 -4
  325. package/src/tooling/runtime-input-composition.d.ts +38 -0
  326. package/src/tooling/runtime-input-composition.js +86 -0
  327. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  328. package/src/tooling/source-runtime-bundle.js +261 -34
  329. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  330. package/src/tooling/source-runtime-materializer.js +93 -0
  331. package/src/training/attention-backward.js +32 -17
  332. package/src/training/autograd.js +80 -52
  333. package/src/training/checkpoint-watch.d.ts +8 -0
  334. package/src/training/checkpoint-watch.js +139 -0
  335. package/src/training/checkpoint.d.ts +6 -1
  336. package/src/training/checkpoint.js +46 -7
  337. package/src/training/clip.js +2 -1
  338. package/src/training/datasets/token-batch.js +20 -8
  339. package/src/training/distillation/artifacts.d.ts +71 -0
  340. package/src/training/distillation/artifacts.js +132 -0
  341. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  342. package/src/training/distillation/checkpoint-watch.js +58 -0
  343. package/src/training/distillation/dataset.d.ts +59 -0
  344. package/src/training/distillation/dataset.js +337 -0
  345. package/src/training/distillation/eval.d.ts +34 -0
  346. package/src/training/distillation/eval.js +310 -0
  347. package/src/training/distillation/index.d.ts +29 -0
  348. package/src/training/distillation/index.js +29 -0
  349. package/src/training/distillation/runtime.d.ts +20 -0
  350. package/src/training/distillation/runtime.js +121 -0
  351. package/src/training/distillation/scoreboard.d.ts +6 -0
  352. package/src/training/distillation/scoreboard.js +8 -0
  353. package/src/training/distillation/stage-a.d.ts +45 -0
  354. package/src/training/distillation/stage-a.js +338 -0
  355. package/src/training/distillation/stage-b.d.ts +24 -0
  356. package/src/training/distillation/stage-b.js +20 -0
  357. package/src/training/distillation/student-fixture.d.ts +22 -0
  358. package/src/training/distillation/student-fixture.js +846 -0
  359. package/src/training/distillation/suite-data.d.ts +45 -0
  360. package/src/training/distillation/suite-data.js +189 -0
  361. package/src/training/index.d.ts +10 -0
  362. package/src/training/index.js +10 -0
  363. package/src/training/lora-pipeline.d.ts +40 -0
  364. package/src/training/lora-pipeline.js +793 -0
  365. package/src/training/lora.js +26 -12
  366. package/src/training/loss.js +5 -6
  367. package/src/training/objectives/cross_entropy.js +2 -5
  368. package/src/training/objectives/distill_kd.js +4 -8
  369. package/src/training/objectives/distill_triplet.js +4 -8
  370. package/src/training/objectives/ul_stage2_base.js +4 -8
  371. package/src/training/operator-artifacts.d.ts +62 -0
  372. package/src/training/operator-artifacts.js +140 -0
  373. package/src/training/operator-command.d.ts +5 -0
  374. package/src/training/operator-command.js +455 -0
  375. package/src/training/operator-eval.d.ts +48 -0
  376. package/src/training/operator-eval.js +230 -0
  377. package/src/training/operator-scoreboard.d.ts +5 -0
  378. package/src/training/operator-scoreboard.js +44 -0
  379. package/src/training/optimizer.js +19 -7
  380. package/src/training/runner.d.ts +52 -0
  381. package/src/training/runner.js +31 -5
  382. package/src/training/suite.d.ts +112 -0
  383. package/src/training/suite.js +24 -984
  384. package/src/training/tensor-factory.d.ts +9 -0
  385. package/src/training/tensor-factory.js +13 -0
  386. package/src/training/trainer.js +3 -5
  387. package/src/training/ul_dataset.js +3 -5
  388. package/src/training/workloads.d.ts +164 -0
  389. package/src/training/workloads.js +530 -0
  390. package/src/version.js +1 -1
  391. package/tools/convert-safetensors-node.js +22 -16
  392. package/tools/doppler-cli.js +179 -63
@@ -49,16 +49,16 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
49
49
  "backward/upsample2d_backward.wgsl#main": "3f3a27fc880d3c4cba49889cafb4be66d30132cfeec9c2105751cc12a5a44ae3",
50
50
  "bf16_to_f16.wgsl#main": "91c009d291a205fc42e7b45942e58630442eb67f8cb4bacc2e54160fa5f25c35",
51
51
  "bf16_to_f32.wgsl#main": "2c5c08476d40aeb64287b8b31d1ef088c63e8bb4568bf7b7c5faf4a4ed493576",
52
- "bias_add_f16.wgsl#main": "8ec47faa1a52190616b431ca8bbd7b1e657ef13720e35e787e4df16b65817b03",
53
- "bias_add.wgsl#main": "d15e3f38379e5a7b393ab88c8a1a6eae13bbc8fec4348267d891ce5b7e8bb963",
52
+ "bias_add_f16.wgsl#main": "f4c5b6e6495bcaae4582c3eb11c7fe39466a9625bbe57eede116c24db71682f6",
53
+ "bias_add.wgsl#main": "e7d3b4a5f9efc8b0569c56bcdeb63e921fa6d1d006901deabc734cab40346715",
54
54
  "cast_f16_to_f32.wgsl#main": "98a0b31fcba2bad945e9f8522d643ae75135bfdf6b39a824565f57d5d4fd965d",
55
55
  "cast_f32_to_f16.wgsl#main": "0b669e7812c09a2f44e219548e11ca0dfd8af921d30017e2a5c79d54f792542f",
56
56
  "clamp.wgsl#main": "9d8039a590d102133636d67de8ae76ca8e9129bf220c48f7a08e9d82fcc48bad",
57
- "conv2d_f16.wgsl#main": "aa139e9f0270873acbc1c4b3cbacff4d224cae7247b520ec129a4f068eb6ed59",
58
- "conv2d.wgsl#main": "484a676692d2b8097daeefe42e2296a1f8b3ef11abfd7b41df6cdcdf16b7a8fd",
57
+ "conv2d_f16.wgsl#main": "1e26a7e45e434fc45bdcd1913b0b97d2ed7e80b3c0bb1beae3d762ee457841c7",
58
+ "conv2d.wgsl#main": "140ed68bf1554ebfadbad7bcf1dfdff7f95aff18588459205f14b05cb432eee2",
59
59
  "cross_entropy_loss.wgsl#main": "5a48087bdec94184432c90ce5b345e1eadbdfcb13b9793ecee8052bc7392239c",
60
- "depthwise_conv2d_f16.wgsl#main": "d5d8d195b1449e39715340af4a0759da4b44b54f6a3cfbdfa6abe743b0f1d002",
61
- "depthwise_conv2d.wgsl#main": "e5da160f505e18508619b78ba30f9bde0c84689a166df06cb59ef0e6591c6faf",
60
+ "depthwise_conv2d_f16.wgsl#main": "f7f093a7e6623ed17a675bac729149e94718aece916416966eaf03c1d6939f2a",
61
+ "depthwise_conv2d.wgsl#main": "cf14cb40d282ad4d4fab160109b97eaeaf12aab62579b73324ac485ac75155b0",
62
62
  "dequant_f16_out_vec4.wgsl#main_vec4": "61c20e6c71c1c8421b4ec202dbd26292a6300587bd44c314f2a6c6d9d9442c3a",
63
63
  "dequant_f16_out.wgsl#main": "94d61843d56f9a3bbc6b7c2b95dc6ecbba3f6a262b2c4086a076f69a8c38ccae",
64
64
  "dequant_f16_rowwise.wgsl#main": "f5bf7cef950b52d65cee6121dbaa176244d3221045b3b6386b3be47f23ce17dc",
@@ -118,8 +118,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
118
118
  "gelu.wgsl#main": "a9007ea08aaff98f9be08f1e0490a6bcf252883eac5513de876ab9ce918865e6",
119
119
  "gptoss_mxfp4_expert_fused.wgsl#main_expert": "3159e8cd81da13f909cf905e6d35307fefe1dcbbdf1b2b8e8ff0ce923bd71180",
120
120
  "gptoss_router_topk.wgsl#softmax_topk": "86e4ea709c0c0084d09c6a4cd07710dc14f380e03f91b8ed9ec871b310be49f1",
121
- "grouped_pointwise_conv2d_f16.wgsl#main": "11bcaefc5929b2e3c1ba338ebea6a28d2cac26553be8b00f51bfddbabf513be7",
122
- "grouped_pointwise_conv2d.wgsl#main": "c0d5cdec0743b4ee337a8df95bda442e617c1678e3d1b6e20ec692d500ede50d",
121
+ "grouped_pointwise_conv2d_f16.wgsl#main": "578a0fcb3362864feb0de0e800b2df49e66e42db4360a08189e5f815ead944c4",
122
+ "grouped_pointwise_conv2d.wgsl#main": "9cf77937b10dd96f3aedc1793183ef2fff05d075fac3884fad5cd5869d0d9181",
123
123
  "groupnorm_apply_f16.wgsl#main": "cfd850b87944ac1c03ba7bd98136db556dadd8a70611e351d82d297299a7cd02",
124
124
  "groupnorm_apply.wgsl#main": "b09b8f2f57dcdfa1a0366daa30d3910feb134204652c711d2ba564e566b5a334",
125
125
  "groupnorm_stats_f16.wgsl#main": "fb76f78ce668ea8459110335698fe4b09a2425fc71deed3bab67efd7641c3199",
@@ -155,33 +155,33 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
155
155
  "moe_gather.wgsl#gather_single_pass": "8dbd0c38a323ba6c73af3ecaad297d79d861b817152e6e2c12fcd9db6f767f1e",
156
156
  "moe_gather.wgsl#gather_tokens": "58e40a63a39c2f99c47c75dee71767f98482217afd74607cee1b7efc2c85738b",
157
157
  "moe_offsets.wgsl#build_offsets": "3ea004145fa234659408cdeb0d4d802adff1037c9c5c03af146b3734cc69dd27",
158
- "pixel_shuffle_f16.wgsl#main": "57903a9c19cecc56371b2198402745127115680d266c3ce609201be9119aa359",
159
- "pixel_shuffle.wgsl#main": "845b88700b1b46d18cde6f2ec11bb89512c90d7e148763e74ce2a4173fd99b21",
160
- "relu_f16.wgsl#main": "fc6134aabe43081b42ce8507d8f374092d0f2e03316aa42c25dd50229dc0ee40",
161
- "relu.wgsl#main": "ca2c9bfa0acb9ece3b7e67de5209e00e553602b3917d23aca10338c1e6f01e27",
162
- "repeat_channels_f16.wgsl#main": "e7e4d9164752e782d482db40256d0d86d96f784aa7debdb72faf3261b9bdd737",
163
- "repeat_channels.wgsl#main": "ad0e34925c8c1173b9f0d92fa6e3808d039f82b3d9ad943b0a75b213ee1776e5",
164
- "residual_f16_vec4.wgsl#add_vec4": "30e9226fb6636e2f01e65b1dc8e93c8e849a87acec6215342fc114996da1ed41",
165
- "residual_f16.wgsl#main": "d392433f3065d1caf68b033219f4ffacf022dc1f90fc3cf3fd620e4ba49f3219",
166
- "residual_vec4.wgsl#add_vec4": "ef011d1683e62887db712da563e783d12fdc80c152955661137d2dca612d7d6a",
167
- "residual.wgsl#main": "1fc456b14e2fb2bc9627107b4e51e7a2098f723b5ba6ab5542cd9455af99f423",
168
- "rmsnorm_f16.wgsl#main": "21d6d702cfcb8d653d8e105bbbc522e85704fa5628b865a7d36cde338ca5c779",
169
- "rmsnorm_f16.wgsl#rmsnorm_small_f16": "10803373bce7d5d5eb0939821e629e0f3fc4e8d27cdddab89f14a9db02e83603",
170
- "rmsnorm.wgsl#main": "c529986befb29a04b94d89744585923a7cef82baf4b2b0a243aa2431618622cc",
171
- "rmsnorm.wgsl#main_cached": "828d30dd8b6137457375b9f7b446a6bbdc1df826ce4d9b1818f6d1abe7adc9c5",
172
- "rmsnorm.wgsl#main_small": "2baf3a1a1a1e982cf9ffcddd3646655503536148c4a3ce9938646bc05cf701b5",
173
- "rmsnorm.wgsl#main_small_subgroup": "909b52d4dfcdd780f531afeb6964885adc90552487f36f131aa349bcc54fb495",
174
- "rmsnorm.wgsl#main_subgroup": "545ebdd1e6e3ab9790a17a4dd5501b12dbb7bc0073787f9a4f5821752a75991c",
158
+ "pixel_shuffle_f16.wgsl#main": "1d1202cdaa8f7a94e015c5e2212bc98881d00d548a12fe7a8e91c4e17f2ba723",
159
+ "pixel_shuffle.wgsl#main": "91c017f0642132229768a2be6c8d47ad26d486f5d40e7dbf9e2349c847e527b8",
160
+ "relu_f16.wgsl#main": "baac3a33693e5a72e60c7ec9b4a4dbcec10a83ca1fac2972988b07a75780f14a",
161
+ "relu.wgsl#main": "52ffa66f9898a4f291758ae93055000cb1eb488c22a993e6b0568a29d9d3ad28",
162
+ "repeat_channels_f16.wgsl#main": "6eee5f9f74d8c17a71f6f422b503bbfb872350a9486ec0065fd1f67718311883",
163
+ "repeat_channels.wgsl#main": "bce0cb33beed50ce388b2d069961ae0fbe361dd4930b9344cb066b3390475a51",
164
+ "residual_f16_vec4.wgsl#add_vec4": "bf8cd304a1d4c5500143b0bc52d0236a7e8bbc4cc2d51d54ef0d4fce209f503b",
165
+ "residual_f16.wgsl#main": "3ca5c562fb5baf495e31e662f85fb26863f8f6d4ad29bb119c484e2ffdee7791",
166
+ "residual_vec4.wgsl#add_vec4": "f2f30d2dec9d90062bf5fce1f141951e8e6b54f56698b87ffb9bf6662c8acce2",
167
+ "residual.wgsl#main": "f1abd88c959c5d8dd27b9353d487e37b2a96850ed9d90c365212e260399cc2a7",
168
+ "rmsnorm_f16.wgsl#main": "7ae20c01b0453306504f777c4a8de37364a8b45bef3c569b0572c7863740a6bd",
169
+ "rmsnorm_f16.wgsl#rmsnorm_small_f16": "e9ce0a7427831e4d1280691eb9ca0daab55d917d4f0d9975d4bcd7e8fe960941",
170
+ "rmsnorm.wgsl#main": "f516b3e4bde2015f2a207c3ca5b8c9820c7809fa8f8d0786f90c568e0f1ac077",
171
+ "rmsnorm.wgsl#main_cached": "bcae48e93d63e11701386850559fec5d4924128ad9d6ac1de27d1b8c34fc3dff",
172
+ "rmsnorm.wgsl#main_small": "95c65dcb443717c821c44de87dc89cdd4f6da97e08cccf8a9526f5cdd19dd33d",
173
+ "rmsnorm.wgsl#main_small_subgroup": "8ff72e445b662b1820be25a594fb0558007bfca7e50d2d1bc915df5774a76f6a",
174
+ "rmsnorm.wgsl#main_subgroup": "0aac7dd6455bf8f3d11917ec87fa71bb5fa9ef4f8e5bba02dfbfd11b92ccd01f",
175
175
  "rope_f16.wgsl#main": "a87f2964b77e851a2fbcc88305adeecaf8eba372291d83a71b817c8ef3da5c58",
176
176
  "rope_f16.wgsl#rope_compute_freqs": "c7aa2cb50420ab2709b20e0a33e93ab6aa4f50d2fa8d9f79b0bfcfefb2f7abab",
177
177
  "rope_f16.wgsl#rope_ntk_scaled": "46d2574b46539b289371c0c37a0b4e2fb21279134126b36a1fc30b98523905dd",
178
178
  "rope_f16.wgsl#rope_qk": "3bc01e167dc3cc5397bd7751e493311b4d3f5c6c0f6fad30234a740bb4c4507b",
179
179
  "rope_f16.wgsl#rope_yarn": "9b788dd05a1598aadcba5b0218d1666ce75faadbe32b71ee1def65ec23fb7dfe",
180
- "rope.wgsl#main": "b639fe8a54508115c82c13c923bfea89f59c6e15a5bef66bfc34e12f0ab4e32f",
181
- "rope.wgsl#rope_compute_freqs": "3d3a25df6036c9e87f6270bd6e1f67ffcd5cf3a1aac7cea19b1935fdb6ea6046",
182
- "rope.wgsl#rope_ntk_scaled": "14b044c70e8f7bc775606a88298fefbf1181024f45775497cf2350682879fcd4",
183
- "rope.wgsl#rope_qk": "d84c973564235347fa16b13552b5a6d7ab524f4ac9b65f0504fec07568b2f032",
184
- "rope.wgsl#rope_yarn": "00b473ce1d86f16444d948739da39e834e21d4db30ab09b0df7502d2e4c511b2",
180
+ "rope.wgsl#main": "4c803ad5e0dd065d5572c7aecc1def277c43884dcc02f22a9676914c10111400",
181
+ "rope.wgsl#rope_compute_freqs": "c9338316a31c8d467acbf8d512cb9616ee902d2619fa9187639f8ff5d78414ac",
182
+ "rope.wgsl#rope_ntk_scaled": "818f89865a3d1d6f2d49f671ac882d0fde9709702160a1ae8d9a8ef113afb511",
183
+ "rope.wgsl#rope_qk": "3d773c8b8c400142edc8a4111afb04a2bf75bdb109b2d41cbe5afdb72a959772",
184
+ "rope.wgsl#rope_yarn": "cb00e1cf87fac198dcf0fb0d4e2d5f6f99d2fed6dff0a089a96bb459917851d2",
185
185
  "sample_f16.wgsl#argmax": "30b9f199b49352e5aff91b7aa8016edb423ce33f77481c3a7bc184251856fb27",
186
186
  "sample_f16.wgsl#argmax_reduce": "a3ca27fc50b10c36c1676bdd5dbfe5edc67850cdd5c1af7a1d3ad70f830dd8a7",
187
187
  "sample_f16.wgsl#find_topk_phase1": "24e47e5ced28af802959e350ff0a6eec6b9a26f89fb38e222990eeaffb16bd36",
@@ -194,10 +194,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
194
194
  "sample.wgsl#find_topk_phase2": "940b216e605d22096da5aca65950a8030866fc5a39e7fdf484d69a832de1b63a",
195
195
  "sample.wgsl#sample_single_pass": "4412357e84113ee2f1bc0dc8bf89e314c2ab482c89c14ca016ea9949d16a9d0c",
196
196
  "sample.wgsl#softmax_and_sample": "7172c60e76430fbe130e530e3564b569b45eccf193987b32d6f52bd6bbcc9f08",
197
- "sana_linear_attention_apply_f16.wgsl#main": "e47366b94d40c4388e631b5bf93f8d61ef4e52cc65ffcd3b08d9d170616bb138",
198
- "sana_linear_attention_apply.wgsl#main": "59cad7974c644fd910af776ad85a9a2c43c00492d4d1152fdc8373ecbb8bba18",
199
- "sana_linear_attention_summary_f16.wgsl#main": "e3c040bb6469d37fc78eb22c1cc3e0456301607e461bbcdf5365a583c5d260d2",
200
- "sana_linear_attention_summary.wgsl#main": "20c7ecdbcd1c73c0f9937c3cdac07b4b6edfe8618bf6f66281806343fd41b122",
197
+ "sana_linear_attention_apply_f16.wgsl#main": "4a7426ce67eccfb70956feeae84275f4d3cc586c50e8442c07eb69993b378ab5",
198
+ "sana_linear_attention_apply.wgsl#main": "5f69e0bc1d9e2df5a61e13bd819313c8f7ff5dfc4b7d78e71d5152dc23b6a86c",
199
+ "sana_linear_attention_summary_f16.wgsl#main": "3abb736ead999485b5dac9c6b534143b464cfd0b5300c5e03c56cec03c8fa48e",
200
+ "sana_linear_attention_summary.wgsl#main": "be9c1fe861dcb5ea46927749764267656a69160bc8b732c6eb1a1bcb0c075589",
201
201
  "scale.wgsl#main": "44ec481452b586307957163e3d65c9d02561d3f2f3db633f906f5488b1ea1ca4",
202
202
  "scale.wgsl#main_inplace": "020824c7118a59c461ce81f1c2cd01b7c2a3f1aab326392b7d48d4448a0c2ed1",
203
203
  "scatter_add_dynamic_f16_weights.wgsl#scatter_add_dynamic": "42799e745bc445b199b1cbc384bc12bb9372ed1599af3260a803cefc8dd35497",
@@ -206,8 +206,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
206
206
  "scatter_add_vec4.wgsl#scatter_add_vec4": "247c4f23129cdfbb19593b17c5833d85048da117d77141c74bc4e16e691d94e1",
207
207
  "scatter_add.wgsl#main": "dea947b8014e9b674e4fec8f15fac6c926e8a3a4d8eff104b953d77f35a1ac35",
208
208
  "scatter_add.wgsl#scatter_add_accumulate": "561800af22dedae63f1abe69b757b0ef6c7832a2bff228c2262e0b7111d89247",
209
- "silu_f16.wgsl#main": "66a87710c1a680f9bf8d01f804709ecd2ce6a99bf2ee82a5f7e199e48eca6e29",
210
- "silu.wgsl#main": "e833f98b090845ada7b9ae8edc68cd6cfe823bd1d5ae7a6401f32b6e3bb6ee75",
209
+ "silu_f16.wgsl#main": "867634b20dcb75969e001966836892a2b7e01782b0028d94779c6ec21c254ae0",
210
+ "silu.wgsl#main": "7b52d30fb741beef2dbf728e0c4ecffe5b08d9661d63c306caecb4cb3ced85e5",
211
211
  "softmax_subgroup.wgsl#main_subgroup": "88472c0dab5f81c5f045f0ee79c4c3bb484791a4a2b84af398c019851438f091",
212
212
  "softmax_subgroup.wgsl#softmax_small_subgroup": "5d7bd1b698910a437197bf6c8b7f8b259036dd006ad5470f767b539dba8538f8",
213
213
  "softmax.wgsl#main": "45c5876806b442222d7e190e595f55a0079bae82e07d37586996c1a63790bb7a",
@@ -220,7 +220,7 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
220
220
  "topk.wgsl#main": "a18763303cd18e8a020e647f8a52f65403526849faf835d9f9394f634c3c97eb",
221
221
  "topk.wgsl#softmax_topk": "95ff3517da909e4bd4d0ff8d85b619bd250522943aeb9276375edc59f67e9604",
222
222
  "topk.wgsl#topk_2_small": "289eaa5c4f005e0aaf37dfe5343aeda30d9ab3929979dbf0cc3553f23e136807",
223
- "transpose.wgsl#main": "3f80f9db1b150f453a9abb62300bfef35e744ec6a8c774c7b64f2ad6088204e5",
224
- "upsample2d_f16.wgsl#main": "961f876852d9f4f1d3b224f39cdae3fa434378db12f7903d23a8dab6824644d4",
225
- "upsample2d.wgsl#main": "9ff2d864c58a6776b7e03221bc6a7409e9e646fbabe7d9a33f4e06ed62120e43",
223
+ "transpose.wgsl#main": "8caf8664dfc579b4e92edce50783263c535764006290cc7902108f26586113a2",
224
+ "upsample2d_f16.wgsl#main": "43cee5f2503cb4b6caea45e9842f8961ce313b02eb8ed23a97d6967113ce521c",
225
+ "upsample2d.wgsl#main": "6de9172ad3d6940dd3c94470a105755a33760e66a84d6e9e96ec4d6a07dc4a25"
226
226
  });
@@ -4,6 +4,17 @@ let cachedRegistry = null;
4
4
 
5
5
  let registryUrl = null;
6
6
 
7
+ function deepFreeze(value, seen = new WeakSet()) {
8
+ if (!value || typeof value !== 'object' || seen.has(value)) {
9
+ return value;
10
+ }
11
+ seen.add(value);
12
+ for (const entry of Object.values(value)) {
13
+ deepFreeze(entry, seen);
14
+ }
15
+ return Object.freeze(value);
16
+ }
17
+
7
18
  export function setRegistryUrl(url) {
8
19
  registryUrl = url;
9
20
  cachedRegistry = null;
@@ -15,7 +26,9 @@ export async function getRegistry() {
15
26
  }
16
27
 
17
28
  const source = registryUrl || './registry.json';
18
- cachedRegistry = await loadJson(source, import.meta.url, 'Failed to load kernel registry');
29
+ cachedRegistry = deepFreeze(
30
+ await loadJson(source, import.meta.url, 'Failed to load kernel registry')
31
+ );
19
32
  return cachedRegistry;
20
33
  }
21
34
 
@@ -1826,7 +1826,7 @@
1826
1826
  }
1827
1827
  ],
1828
1828
  "baseUniforms": {
1829
- "size": 16,
1829
+ "size": 32,
1830
1830
  "fields": [
1831
1831
  {
1832
1832
  "name": "hidden_size",
@@ -1847,6 +1847,26 @@
1847
1847
  "name": "has_residual",
1848
1848
  "type": "u32",
1849
1849
  "offset": 12
1850
+ },
1851
+ {
1852
+ "name": "token_stride",
1853
+ "type": "u32",
1854
+ "offset": 16
1855
+ },
1856
+ {
1857
+ "name": "_pad0",
1858
+ "type": "u32",
1859
+ "offset": 20
1860
+ },
1861
+ {
1862
+ "name": "_pad1",
1863
+ "type": "u32",
1864
+ "offset": 24
1865
+ },
1866
+ {
1867
+ "name": "_pad2",
1868
+ "type": "u32",
1869
+ "offset": 28
1850
1870
  }
1851
1871
  ]
1852
1872
  },
@@ -2154,7 +2174,7 @@
2154
2174
  }
2155
2175
  ],
2156
2176
  "baseUniforms": {
2157
- "size": 16,
2177
+ "size": 32,
2158
2178
  "fields": [
2159
2179
  {
2160
2180
  "name": "M",
@@ -2182,7 +2202,9 @@
2182
2202
  1,
2183
2203
  1
2184
2204
  ],
2185
- "requires": []
2205
+ "requires": [
2206
+ "shader-f16"
2207
+ ]
2186
2208
  },
2187
2209
  "f16": {
2188
2210
  "wgsl": "matmul_gemv_residual_f16.wgsl",
@@ -2357,12 +2379,12 @@
2357
2379
  "offset": 20
2358
2380
  },
2359
2381
  {
2360
- "name": "_pad0",
2382
+ "name": "rotary_dim",
2361
2383
  "type": "u32",
2362
2384
  "offset": 24
2363
2385
  },
2364
2386
  {
2365
- "name": "_pad1",
2387
+ "name": "interleaved",
2366
2388
  "type": "u32",
2367
2389
  "offset": 28
2368
2390
  }
@@ -3637,7 +3659,7 @@
3637
3659
  }
3638
3660
  ],
3639
3661
  "baseUniforms": {
3640
- "size": 16,
3662
+ "size": 32,
3641
3663
  "fields": [
3642
3664
  {
3643
3665
  "name": "num_tokens",
@@ -4096,7 +4118,7 @@
4096
4118
  }
4097
4119
  ],
4098
4120
  "baseUniforms": {
4099
- "size": 16,
4121
+ "size": 32,
4100
4122
  "fields": [
4101
4123
  {
4102
4124
  "name": "num_tokens",
@@ -4117,6 +4139,26 @@
4117
4139
  "name": "bias_offset",
4118
4140
  "type": "u32",
4119
4141
  "offset": 12
4142
+ },
4143
+ {
4144
+ "name": "token_stride",
4145
+ "type": "u32",
4146
+ "offset": 16
4147
+ },
4148
+ {
4149
+ "name": "_pad0",
4150
+ "type": "u32",
4151
+ "offset": 20
4152
+ },
4153
+ {
4154
+ "name": "_pad1",
4155
+ "type": "u32",
4156
+ "offset": 24
4157
+ },
4158
+ {
4159
+ "name": "_pad2",
4160
+ "type": "u32",
4161
+ "offset": 28
4120
4162
  }
4121
4163
  ]
4122
4164
  },
@@ -43,7 +43,7 @@ export function resolvePreset(id: string): PresetSchema;
43
43
  export function detectPreset(
44
44
  config: RawModelConfigSchema,
45
45
  architecture?: string
46
- ): string;
46
+ ): string | null;
47
47
 
48
48
  /**
49
49
  * Build a fully resolved config by merging:
@@ -161,8 +161,7 @@ export function detectPreset(
161
161
  }
162
162
  }
163
163
 
164
- // Default to transformer
165
- return 'transformer';
164
+ return null;
166
165
  }
167
166
 
168
167
  // =============================================================================
@@ -178,6 +177,17 @@ export function resolveConfig(
178
177
  (manifest.config || {}),
179
178
  manifest.modelType
180
179
  );
180
+ if (!id) {
181
+ const modelId = String(manifest?.modelId ?? 'unknown').trim() || 'unknown';
182
+ const modelType = String(manifest?.config?.model_type ?? 'unknown').trim() || 'unknown';
183
+ const architecture = String(manifest?.modelType ?? 'unknown').trim() || 'unknown';
184
+ throw createDopplerError(
185
+ ERROR_CODES.CONFIG_PRESET_UNKNOWN,
186
+ `Could not detect a preset for manifest "${modelId}" ` +
187
+ `(architecture="${architecture}", model_type="${modelType}"). ` +
188
+ 'Provide an explicit presetId instead of relying on the generic transformer fallback.'
189
+ );
190
+ }
181
191
 
182
192
  // Get resolved preset
183
193
  const preset = resolvePreset(id);
@@ -326,6 +336,8 @@ function assertArchitecture(manifest, architecture) {
326
336
 
327
337
  function extractArchitectureFromConfig(config) {
328
338
  const nestedTextConfig = getNestedTextConfig(config);
339
+ const topLevelRoPEParameters = getFlatRoPEParameters(config);
340
+ const nestedRoPEParameters = getFlatRoPEParameters(nestedTextConfig);
329
341
  return {
330
342
  numLayers: config.num_hidden_layers ?? nestedTextConfig?.num_hidden_layers ?? config.n_layer ?? config.blockCount,
331
343
  hiddenSize: config.hidden_size ?? nestedTextConfig?.hidden_size ?? config.n_embd ?? config.embeddingLength,
@@ -335,13 +347,20 @@ function extractArchitectureFromConfig(config) {
335
347
  headDim: config.head_dim ?? nestedTextConfig?.head_dim,
336
348
  vocabSize: config.vocab_size ?? nestedTextConfig?.vocab_size ?? config.vocabSize,
337
349
  maxSeqLen: config.max_position_embeddings ?? nestedTextConfig?.max_position_embeddings ?? config.n_positions ?? config.contextLength,
338
- ropeTheta: config.rope_theta ?? nestedTextConfig?.rope_theta ?? config.ropeFreqBase,
350
+ ropeTheta: topLevelRoPEParameters?.rope_theta
351
+ ?? nestedRoPEParameters?.rope_theta
352
+ ?? config.rope_theta
353
+ ?? nestedTextConfig?.rope_theta
354
+ ?? config.ropeFreqBase,
339
355
  rmsNormEps: config.rms_norm_eps ?? nestedTextConfig?.rms_norm_eps ?? config.attentionLayerNormRMSEpsilon,
340
356
  };
341
357
  }
342
358
 
343
359
  function extractInferenceFromConfig(config) {
344
360
  const nestedTextConfig = getNestedTextConfig(config);
361
+ const topLevelRoPEParameters = getFlatRoPEParameters(config);
362
+ const nestedRoPEParameters = getFlatRoPEParameters(nestedTextConfig);
363
+ const ropeParameters = nestedRoPEParameters ?? topLevelRoPEParameters;
345
364
  return {
346
365
  attention: {
347
366
  slidingWindow: config.sliding_window ?? nestedTextConfig?.sliding_window,
@@ -355,7 +374,13 @@ function extractInferenceFromConfig(config) {
355
374
  },
356
375
  pipeline: config.pipeline ?? nestedTextConfig?.pipeline,
357
376
  rope: {
358
- ropeTheta: config.rope_theta ?? nestedTextConfig?.rope_theta ?? config.ropeFreqBase,
377
+ ropeTheta: ropeParameters?.rope_theta
378
+ ?? config.rope_theta
379
+ ?? nestedTextConfig?.rope_theta
380
+ ?? config.ropeFreqBase,
381
+ mropeInterleaved: ropeParameters?.mrope_interleaved,
382
+ mropeSection: Array.isArray(ropeParameters?.mrope_section) ? ropeParameters.mrope_section : undefined,
383
+ partialRotaryFactor: ropeParameters?.partial_rotary_factor,
359
384
  ropeScalingType: config.rope_scaling_type ?? nestedTextConfig?.rope_scaling_type,
360
385
  ropeScalingFactor: config.rope_scaling_factor ?? nestedTextConfig?.rope_scaling_factor,
361
386
  },
@@ -375,6 +400,20 @@ function getNestedTextConfig(config) {
375
400
  return null;
376
401
  }
377
402
 
403
+ function getFlatRoPEParameters(config) {
404
+ if (!config || typeof config !== 'object' || Array.isArray(config)) {
405
+ return null;
406
+ }
407
+ const ropeParameters = config.rope_parameters;
408
+ if (!ropeParameters || typeof ropeParameters !== 'object' || Array.isArray(ropeParameters)) {
409
+ return null;
410
+ }
411
+ if (ropeParameters.full_attention || ropeParameters.sliding_attention) {
412
+ return null;
413
+ }
414
+ return ropeParameters;
415
+ }
416
+
378
417
  function extractTokenizerFromManifest(manifest) {
379
418
  if (!manifest.tokenizer) return {};
380
419
 
@@ -77,6 +77,7 @@ function buildWitnessMergeManifest() {
77
77
  embeddingTranspose: false,
78
78
  embeddingVocabSize: 1024,
79
79
  },
80
+ pipeline: 'decode-only',
80
81
  layerPattern: null,
81
82
  chatTemplate: {
82
83
  type: 'gemma',
@@ -114,6 +115,13 @@ export function buildMergeContractArtifact() {
114
115
  && mergedUndefined._sources.get('inference.defaultKernelPath') === 'manifest',
115
116
  `value=${mergedUndefined.inference.defaultKernelPath}, source=${mergedUndefined._sources.get('inference.defaultKernelPath')}`
116
117
  );
118
+ recordCheck(
119
+ checks,
120
+ 'runtime.mergeConfig.pipeline_preserves_manifest_value',
121
+ mergedUndefined.inference.pipeline === 'decode-only'
122
+ && mergedUndefined._sources.get('inference.pipeline') === 'manifest',
123
+ `value=${String(mergedUndefined.inference.pipeline)}, source=${mergedUndefined._sources.get('inference.pipeline')}`
124
+ );
117
125
 
118
126
  const mergedNull = mergeConfig(buildWitnessMergeManifest(), {
119
127
  defaultKernelPath: null,
@@ -152,6 +160,35 @@ export function buildMergeContractArtifact() {
152
160
  `value=${String(runtimeConfig.runtime.inference.chatTemplate.enabled)}`
153
161
  );
154
162
 
163
+ const isolatedConfigA = createDopplerConfig();
164
+ isolatedConfigA.runtime.inference.compute.activationDtype = 'f32';
165
+ const isolatedConfigB = createDopplerConfig();
166
+ recordCheck(
167
+ checks,
168
+ 'runtime.schema.defaults_are_isolated_per_instance',
169
+ isolatedConfigB.runtime.inference.compute.activationDtype !== 'f32'
170
+ && isolatedConfigA.runtime.inference.compute !== isolatedConfigB.runtime.inference.compute,
171
+ `configA=${isolatedConfigA.runtime.inference.compute.activationDtype}, configB=${isolatedConfigB.runtime.inference.compute.activationDtype}`,
172
+ 'actual'
173
+ );
174
+
175
+ const calibrateConfig = createDopplerConfig({
176
+ runtime: {
177
+ shared: {
178
+ tooling: {
179
+ intent: 'calibrate',
180
+ },
181
+ },
182
+ },
183
+ });
184
+ recordCheck(
185
+ checks,
186
+ 'runtime.schema.calibrate_does_not_mutate_kernel_warmup_defaults',
187
+ calibrateConfig.runtime.shared.kernelWarmup.prewarm === false,
188
+ `prewarm=${String(calibrateConfig.runtime.shared.kernelWarmup.prewarm)}`,
189
+ 'actual'
190
+ );
191
+
155
192
  const overlaySources = new Map();
156
193
  const chosenRuntimeValue = chooseDefinedWithSource(
157
194
  'inference.defaultKernelPath',
@@ -252,6 +289,24 @@ export function buildMergeContractArtifact() {
252
289
  'actual'
253
290
  );
254
291
 
292
+ let invalidShallowOverrideError = null;
293
+ try {
294
+ mergeShallowObject(
295
+ { type: 'base', enabled: true },
296
+ null
297
+ );
298
+ } catch (error) {
299
+ invalidShallowOverrideError = error;
300
+ }
301
+ recordCheck(
302
+ checks,
303
+ 'runtime.mergeShallowObject.invalid_explicit_override_fails_closed',
304
+ invalidShallowOverrideError instanceof Error
305
+ && /shallow object overrides must be plain objects/.test(invalidShallowOverrideError.message),
306
+ `error=${invalidShallowOverrideError?.message ?? 'none'}`,
307
+ 'actual'
308
+ );
309
+
255
310
  const layeredAttention = mergeLayeredShallowObjects(
256
311
  { slidingWindow: 4096, attentionBias: false },
257
312
  { slidingWindow: 2048 },
@@ -273,7 +328,7 @@ export function buildMergeContractArtifact() {
273
328
  onIncompatible: 'error',
274
329
  },
275
330
  {
276
- allowSources: ['runtime', 'execution-v0'],
331
+ allowSources: ['config', 'execution-v0'],
277
332
  onIncompatible: 'remap',
278
333
  }
279
334
  );
@@ -283,7 +338,7 @@ export function buildMergeContractArtifact() {
283
338
  Array.isArray(mergedKernelPathPolicy.sourceScope)
284
339
  && Array.isArray(mergedKernelPathPolicy.allowSources)
285
340
  && mergedKernelPathPolicy.sourceScope.length === 2
286
- && mergedKernelPathPolicy.sourceScope[0] === 'runtime'
341
+ && mergedKernelPathPolicy.sourceScope[0] === 'config'
287
342
  && mergedKernelPathPolicy.allowSources[1] === 'execution-v0'
288
343
  && mergedKernelPathPolicy.onIncompatible === 'remap',
289
344
  `sourceScope=${JSON.stringify(mergedKernelPathPolicy.sourceScope)}, allowSources=${JSON.stringify(mergedKernelPathPolicy.allowSources)}`,
@@ -294,7 +349,7 @@ export function buildMergeContractArtifact() {
294
349
  runtime: {
295
350
  inference: {
296
351
  kernelPathPolicy: {
297
- allowSources: ['runtime', 'execution-v0'],
352
+ allowSources: ['config', 'execution-v0'],
298
353
  },
299
354
  },
300
355
  },
@@ -303,7 +358,7 @@ export function buildMergeContractArtifact() {
303
358
  checks,
304
359
  'runtime.schema.kernelPathPolicy.helper_is_used',
305
360
  Array.isArray(runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope)
306
- && runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope[0] === 'runtime'
361
+ && runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope[0] === 'config'
307
362
  && runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.allowSources[1] === 'execution-v0',
308
363
  `policy=${JSON.stringify(runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy)}`,
309
364
  'actual'
@@ -15,9 +15,14 @@ export function chooseDefinedWithSource(path, overrideValue, fallbackValue, sour
15
15
  }
16
16
 
17
17
  export function mergeShallowObject(base, override) {
18
- if (!override || typeof override !== 'object' || Array.isArray(override)) {
18
+ if (override === undefined) {
19
19
  return base;
20
20
  }
21
+ if (override === null || typeof override !== 'object' || Array.isArray(override)) {
22
+ throw new Error(
23
+ 'DopplerConfigError: shallow object overrides must be plain objects when provided explicitly.'
24
+ );
25
+ }
21
26
  return { ...base, ...override };
22
27
  }
23
28
 
@@ -29,17 +34,133 @@ export function replaceSubtree(overrideValue, fallbackValue) {
29
34
  return chooseNullish(overrideValue, fallbackValue);
30
35
  }
31
36
 
37
+ const DEFAULT_KERNEL_PATH_POLICY = Object.freeze({
38
+ mode: 'locked',
39
+ sourceScope: Object.freeze(['model', 'manifest']),
40
+ onIncompatible: 'error',
41
+ });
42
+
43
+ const VALID_KERNEL_PATH_POLICY_SOURCES = new Set([
44
+ 'model',
45
+ 'manifest',
46
+ 'config',
47
+ 'execution-v0',
48
+ ]);
49
+
50
+ function normalizeKernelPathPolicyMode(value) {
51
+ if (value === undefined) {
52
+ return DEFAULT_KERNEL_PATH_POLICY.mode;
53
+ }
54
+ const normalized = String(value).trim().toLowerCase();
55
+ if (normalized === 'locked' || normalized === 'capability-aware') {
56
+ return normalized;
57
+ }
58
+ throw new Error(
59
+ `DopplerConfigError: runtime.inference.kernelPathPolicy.mode must be "locked" or "capability-aware"; got ${JSON.stringify(value)}.`
60
+ );
61
+ }
62
+
63
+ function normalizeKernelPathPolicySource(source) {
64
+ const normalized = String(source ?? '').trim().toLowerCase();
65
+ if (!normalized) {
66
+ throw new Error(
67
+ 'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be non-empty strings.'
68
+ );
69
+ }
70
+ if (normalized === 'runtime') {
71
+ throw new Error(
72
+ 'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "runtime". Use "config".'
73
+ );
74
+ }
75
+ if (normalized === 'execution_v0') {
76
+ throw new Error(
77
+ 'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "execution_v0". Use "execution-v0".'
78
+ );
79
+ }
80
+ if (!VALID_KERNEL_PATH_POLICY_SOURCES.has(normalized)) {
81
+ throw new Error(
82
+ `DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be model|manifest|config|execution-v0; got ${JSON.stringify(source)}.`
83
+ );
84
+ }
85
+ return normalized;
86
+ }
87
+
88
+ function normalizeKernelPathPolicySourceScope(value) {
89
+ if (value === undefined) {
90
+ return [...DEFAULT_KERNEL_PATH_POLICY.sourceScope];
91
+ }
92
+ if (!Array.isArray(value) || value.length === 0) {
93
+ throw new Error(
94
+ 'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope must be a non-empty array.'
95
+ );
96
+ }
97
+ return [...new Set(value.map((source) => normalizeKernelPathPolicySource(source)))];
98
+ }
99
+
100
+ function normalizeKernelPathPolicyOnIncompatible(value) {
101
+ if (value === undefined) {
102
+ return DEFAULT_KERNEL_PATH_POLICY.onIncompatible;
103
+ }
104
+ const normalized = String(value).trim().toLowerCase();
105
+ if (normalized === 'error' || normalized === 'remap') {
106
+ return normalized;
107
+ }
108
+ throw new Error(
109
+ `DopplerConfigError: runtime.inference.kernelPathPolicy.onIncompatible must be "error" or "remap"; got ${JSON.stringify(value)}.`
110
+ );
111
+ }
112
+
113
+ function assertKernelPathPolicyObject(value, label) {
114
+ if (value === undefined) {
115
+ return;
116
+ }
117
+ if (value === null) {
118
+ throw new Error(`DopplerConfigError: ${label} must not be null.`);
119
+ }
120
+ if (typeof value !== 'object' || Array.isArray(value)) {
121
+ throw new Error(
122
+ `DopplerConfigError: ${label} must be an object.`
123
+ );
124
+ }
125
+ }
126
+
127
+ function assertKernelPathPolicySourceAliasesCompatible(policy, label) {
128
+ if (!policy || policy.sourceScope === undefined || policy.allowSources === undefined) {
129
+ return;
130
+ }
131
+
132
+ const sourceScope = normalizeKernelPathPolicySourceScope(policy.sourceScope);
133
+ const allowSources = normalizeKernelPathPolicySourceScope(policy.allowSources);
134
+ const aliasesMatch = sourceScope.length === allowSources.length
135
+ && sourceScope.every((value, index) => value === allowSources[index]);
136
+
137
+ if (!aliasesMatch) {
138
+ throw new Error(
139
+ `DopplerConfigError: ${label}.sourceScope and ${label}.allowSources must match exactly when both are provided.`
140
+ );
141
+ }
142
+ }
143
+
32
144
  export function mergeKernelPathPolicy(basePolicy, overridePolicy) {
145
+ assertKernelPathPolicyObject(basePolicy, 'runtime.inference.kernelPathPolicy');
146
+ assertKernelPathPolicyObject(overridePolicy, 'runtime.inference.kernelPathPolicy');
147
+ assertKernelPathPolicySourceAliasesCompatible(basePolicy, 'runtime.inference.kernelPathPolicy');
148
+ assertKernelPathPolicySourceAliasesCompatible(overridePolicy, 'runtime.inference.kernelPathPolicy');
33
149
  const base = basePolicy ?? {};
34
150
  const override = overridePolicy ?? {};
35
- const baseSourceScope = base.sourceScope ?? base.allowSources;
36
- const overrideSourceScope = override.sourceScope ?? override.allowSources;
37
- const sourceScope = overrideSourceScope ?? baseSourceScope;
151
+ const sourceScope = normalizeKernelPathPolicySourceScope(
152
+ override.sourceScope
153
+ ?? override.allowSources
154
+ ?? base.sourceScope
155
+ ?? base.allowSources
156
+ );
38
157
  return {
39
- mode: override.mode ?? base.mode,
158
+ mode: normalizeKernelPathPolicyMode(override.mode ?? base.mode),
40
159
  sourceScope,
41
- allowSources: sourceScope,
42
- onIncompatible: override.onIncompatible ?? base.onIncompatible,
160
+ allowSources: [...sourceScope],
161
+ onIncompatible: normalizeKernelPathPolicyOnIncompatible(
162
+ override.onIncompatible ?? base.onIncompatible
163
+ ),
43
164
  };
44
165
  }
45
166
 
@@ -54,6 +54,7 @@ export interface MergedInferenceConfig {
54
54
  ffn: ManifestFFNSchema;
55
55
  rope: ManifestRoPESchema;
56
56
  output: ManifestOutputSchema;
57
+ pipeline: ManifestInferenceSchema['pipeline'];
57
58
  layerPattern: ManifestLayerPatternSchema | null;
58
59
  chatTemplate: ManifestChatTemplateSchema;
59
60
  defaultKernelPath: string | null;