@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -1,4 +1,5 @@
1
1
  import { MB } from './units.schema.js';
2
+ import { validateRequiredInferenceFields } from '../../inference/pipelines/text/config.js';
2
3
 
3
4
  // =============================================================================
4
5
  // Hash & Versioning
@@ -39,7 +40,7 @@ export const DEFAULT_MANIFEST_INFERENCE = {
39
40
  schema: null,
40
41
  presetId: null,
41
42
  attention: {
42
- queryPreAttnScalar: 8, // sqrt(64) for standard 64-dim heads
43
+ queryPreAttnScalar: 64, // headDim for standard 64-dim heads; attnScale = 1/sqrt(scalar)
43
44
  attnLogitSoftcapping: null, // No softcapping (null = disabled)
44
45
  slidingWindow: null, // Full attention (null = no sliding window)
45
46
  queryKeyNorm: false,
@@ -70,9 +71,9 @@ export const DEFAULT_MANIFEST_INFERENCE = {
70
71
  ropeLocalScalingType: null, // Local scaling policy (null = no scaling)
71
72
  ropeLocalScalingFactor: 1.0,
72
73
  // YARN parameters - only relevant when ropeScalingType='yarn'
73
- yarnBetaFast: 32,
74
- yarnBetaSlow: 1,
75
- yarnOriginalMaxPos: 4096,
74
+ yarnBetaFast: null,
75
+ yarnBetaSlow: null,
76
+ yarnOriginalMaxPos: null,
76
77
  // Local YARN parameters - only relevant when ropeLocalScalingType='yarn'
77
78
  ropeLocalYarnBetaFast: null,
78
79
  ropeLocalYarnBetaSlow: null,
@@ -124,6 +125,18 @@ export function validateManifestInference(
124
125
  `Please re-convert the model using the latest converter.`
125
126
  );
126
127
  }
128
+
129
+ if (manifest.modelType === 'diffusion' || manifest.modelType === 'energy') {
130
+ return;
131
+ }
132
+
133
+ const inference = typeof structuredClone === 'function'
134
+ ? structuredClone(manifest.inference)
135
+ : JSON.parse(JSON.stringify(manifest.inference));
136
+ validateRequiredInferenceFields(
137
+ inference,
138
+ manifest.modelId ?? 'unknown'
139
+ );
127
140
  }
128
141
 
129
142
  export function hasInferenceConfig(
@@ -35,7 +35,7 @@ export const DEFAULT_STORAGE_ALIGNMENT_CONFIG = {
35
35
  export const DEFAULT_STORAGE_BACKEND_CONFIG = {
36
36
  backend: 'auto', // auto | opfs | indexeddb | memory
37
37
  opfs: {
38
- useSyncAccessHandle: true,
38
+ useSyncAccessHandle: false,
39
39
  maxConcurrentHandles: 2,
40
40
  },
41
41
  indexeddb: {
@@ -2,9 +2,17 @@ import { createDopplerConfig, DEFAULT_TRAINING_SETTINGS } from './schema/index.j
2
2
  import { validateDistillTrainingConfig } from './schema/distill-training.schema.js';
3
3
  import { validateUlTrainingConfig } from './schema/ul-training.schema.js';
4
4
 
5
+ function cloneConfigTree(value) {
6
+ if (typeof structuredClone === 'function') {
7
+ return structuredClone(value);
8
+ }
9
+ return JSON.parse(JSON.stringify(value));
10
+ }
11
+
5
12
  function mergeTrainingSettings(base, overrides) {
13
+ const baseConfig = cloneConfigTree(base);
6
14
  if (!overrides) {
7
- const merged = { ...base };
15
+ const merged = baseConfig;
8
16
  validateDistillTrainingConfig(merged.distill);
9
17
  validateUlTrainingConfig(merged.ul);
10
18
  if (merged.distill.enabled === true && merged.ul.enabled === true) {
@@ -14,42 +22,42 @@ function mergeTrainingSettings(base, overrides) {
14
22
  }
15
23
 
16
24
  const merged = {
17
- enabled: overrides.enabled ?? base.enabled,
18
- lora: { ...base.lora, ...overrides.lora },
25
+ enabled: overrides.enabled ?? baseConfig.enabled,
26
+ lora: { ...baseConfig.lora, ...overrides.lora },
19
27
  optimizer: {
20
- ...base.optimizer,
28
+ ...baseConfig.optimizer,
21
29
  ...overrides.optimizer,
22
- scheduler: { ...base.optimizer.scheduler, ...overrides.optimizer?.scheduler },
30
+ scheduler: { ...baseConfig.optimizer.scheduler, ...overrides.optimizer?.scheduler },
23
31
  },
24
- gradient: { ...base.gradient, ...overrides.gradient },
25
- precision: { ...base.precision, ...overrides.precision },
26
- attention: { ...base.attention, ...overrides.attention },
32
+ gradient: { ...baseConfig.gradient, ...overrides.gradient },
33
+ precision: { ...baseConfig.precision, ...overrides.precision },
34
+ attention: { ...baseConfig.attention, ...overrides.attention },
27
35
  telemetry: {
28
- ...base.telemetry,
36
+ ...baseConfig.telemetry,
29
37
  ...overrides.telemetry,
30
38
  alerts: {
31
- ...base.telemetry.alerts,
39
+ ...baseConfig.telemetry.alerts,
32
40
  ...overrides.telemetry?.alerts,
33
41
  thresholds: {
34
- ...base.telemetry.alerts.thresholds,
42
+ ...baseConfig.telemetry.alerts.thresholds,
35
43
  ...overrides.telemetry?.alerts?.thresholds,
36
44
  },
37
45
  },
38
46
  },
39
- lossScaling: { ...base.lossScaling, ...overrides.lossScaling },
47
+ lossScaling: { ...baseConfig.lossScaling, ...overrides.lossScaling },
40
48
  distill: {
41
- ...base.distill,
49
+ ...baseConfig.distill,
42
50
  ...overrides.distill,
43
- freeze: { ...base.distill.freeze, ...overrides.distill?.freeze },
51
+ freeze: { ...baseConfig.distill.freeze, ...overrides.distill?.freeze },
44
52
  },
45
53
  ul: {
46
- ...base.ul,
54
+ ...baseConfig.ul,
47
55
  ...overrides.ul,
48
- noiseSchedule: { ...base.ul.noiseSchedule, ...overrides.ul?.noiseSchedule },
49
- priorAlignment: { ...base.ul.priorAlignment, ...overrides.ul?.priorAlignment },
50
- decoderSigmoidWeight: { ...base.ul.decoderSigmoidWeight, ...overrides.ul?.decoderSigmoidWeight },
51
- lossWeights: { ...base.ul.lossWeights, ...overrides.ul?.lossWeights },
52
- freeze: { ...base.ul.freeze, ...overrides.ul?.freeze },
56
+ noiseSchedule: { ...baseConfig.ul.noiseSchedule, ...overrides.ul?.noiseSchedule },
57
+ priorAlignment: { ...baseConfig.ul.priorAlignment, ...overrides.ul?.priorAlignment },
58
+ decoderSigmoidWeight: { ...baseConfig.ul.decoderSigmoidWeight, ...overrides.ul?.decoderSigmoidWeight },
59
+ lossWeights: { ...baseConfig.ul.lossWeights, ...overrides.ul?.lossWeights },
60
+ freeze: { ...baseConfig.ul.freeze, ...overrides.ul?.freeze },
53
61
  },
54
62
  };
55
63
  validateDistillTrainingConfig(merged.distill);
@@ -74,7 +82,7 @@ export function createTrainingConfig(overrides = {}) {
74
82
 
75
83
  export const DEFAULT_TRAINING_CONFIG = createTrainingConfig();
76
84
 
77
- let trainingConfig = DEFAULT_TRAINING_CONFIG;
85
+ let trainingConfig = createTrainingConfig();
78
86
 
79
87
  export function getTrainingConfig() {
80
88
  return trainingConfig;
@@ -86,6 +94,6 @@ export function setTrainingConfig(overrides) {
86
94
  }
87
95
 
88
96
  export function resetTrainingConfig() {
89
- trainingConfig = DEFAULT_TRAINING_CONFIG;
97
+ trainingConfig = createTrainingConfig();
90
98
  return trainingConfig;
91
99
  }
@@ -17,6 +17,8 @@ import { sanitizeModelId } from './core.js';
17
17
  import { classifyTensorRole } from '../formats/rdrr/index.js';
18
18
  import { selectRuleValue } from '../rules/rule-registry.js';
19
19
  import { buildKernelRefFromKernelEntry, isKernelRefBoundToKernel } from '../config/kernels/kernel-ref.js';
20
+ import { mergeLayeredShallowObjects } from '../config/merge-helpers.js';
21
+ import { buildExecutionV0ContractArtifact } from '../config/execution-v0-contract-check.js';
20
22
 
21
23
  const KNOWN_MODEL_PRESETS = new Set(listPresets());
22
24
  const CONVERSION_SUPPORTED_PRESETS = [...KNOWN_MODEL_PRESETS]
@@ -115,7 +117,10 @@ function isLikelyEmbeddingGemma(rawConfig, architectureHint) {
115
117
 
116
118
  export function inferSourceWeightQuantization(tensors) {
117
119
  if (!Array.isArray(tensors) || tensors.length === 0) {
118
- return 'f16';
120
+ throw new Error(
121
+ 'Cannot infer source weight quantization: no tensors provided. ' +
122
+ 'Set converterConfig.quantization.weights explicitly.'
123
+ );
119
124
  }
120
125
  const weightTensors = [];
121
126
  for (const tensor of tensors) {
@@ -126,7 +131,12 @@ export function inferSourceWeightQuantization(tensors) {
126
131
  weightTensors.push({ name, dtype });
127
132
  }
128
133
  const dtypes = new Set(weightTensors.map((tensor) => tensor.dtype));
129
- if (dtypes.size === 0) return 'f16';
134
+ if (dtypes.size === 0) {
135
+ throw new Error(
136
+ 'Cannot infer source weight quantization: no recognizable weight dtypes found. ' +
137
+ 'Set converterConfig.quantization.weights explicitly.'
138
+ );
139
+ }
130
140
  if (dtypes.size > 1) {
131
141
  const detail = Array.from(dtypes)
132
142
  .sort()
@@ -179,9 +189,6 @@ export function validateDefaultKernelPath(inference, context = {}) {
179
189
  && expectedComputeDtype !== kernelActivationDtype
180
190
  ) {
181
191
  const presetId = context?.presetId ?? 'unknown';
182
- if (presetId === 'lfm2' && expectedComputeDtype === 'f32' && kernelActivationDtype === 'f16') {
183
- return;
184
- }
185
192
  throw new Error(
186
193
  `Invalid defaultKernelPath "${inference.defaultKernelPath}" for preset "${presetId}" ` +
187
194
  `(weights=${quantizationInfo?.weights ?? 'unknown'}, compute=${expectedComputeDtype}, ` +
@@ -208,6 +215,61 @@ function cloneJson(value) {
208
215
  return JSON.parse(JSON.stringify(value));
209
216
  }
210
217
 
218
+ function mergeExecutionV0SessionDefaults(baseSessionDefaults, overrideSessionDefaults) {
219
+ if (!overrideSessionDefaults) {
220
+ return cloneJson(baseSessionDefaults);
221
+ }
222
+ const base = cloneJson(baseSessionDefaults ?? {});
223
+ const override = cloneJson(overrideSessionDefaults);
224
+ const baseCompute = base.compute ?? {};
225
+ const overrideCompute = override.compute ?? {};
226
+
227
+ return {
228
+ ...base,
229
+ ...override,
230
+ compute: {
231
+ ...baseCompute,
232
+ ...overrideCompute,
233
+ defaults: mergeLayeredShallowObjects(
234
+ baseCompute.defaults ?? {},
235
+ overrideCompute.defaults ?? {}
236
+ ),
237
+ kernelProfiles: Object.prototype.hasOwnProperty.call(overrideCompute, 'kernelProfiles')
238
+ ? overrideCompute.kernelProfiles
239
+ : baseCompute.kernelProfiles,
240
+ },
241
+ kvcache: Object.prototype.hasOwnProperty.call(override, 'kvcache')
242
+ ? (
243
+ override.kvcache === null
244
+ ? null
245
+ : mergeLayeredShallowObjects(base.kvcache ?? {}, override.kvcache ?? {})
246
+ )
247
+ : base.kvcache,
248
+ decodeLoop: Object.prototype.hasOwnProperty.call(override, 'decodeLoop')
249
+ ? (
250
+ override.decodeLoop === null
251
+ ? null
252
+ : mergeLayeredShallowObjects(base.decodeLoop ?? {}, override.decodeLoop ?? {})
253
+ )
254
+ : base.decodeLoop,
255
+ };
256
+ }
257
+
258
+ function assertExecutionV0ConversionContract(manifestInference, modelId) {
259
+ if (!manifestInference?.execution) {
260
+ return;
261
+ }
262
+ const artifact = buildExecutionV0ContractArtifact(manifestInference, {
263
+ modelId: modelId ?? 'converted-model',
264
+ });
265
+ if (!artifact?.ok) {
266
+ const detail = artifact?.errors?.join(' ') ?? 'unknown execution-v0 contract error';
267
+ throw new Error(
268
+ `converterConfig.inference produced an invalid execution-v0 contract: ${detail}`
269
+ );
270
+ }
271
+ }
272
+
211
273
  function readConverterSessionDefaultsOverride(converterConfig) {
212
274
  const raw = converterConfig?.inference?.sessionDefaults;
213
275
  if (raw == null) return null;
@@ -219,6 +281,26 @@ function readConverterSessionDefaultsOverride(converterConfig) {
219
281
  return cloneJson(raw);
220
282
  }
221
283
 
284
+ function assertNonExecutionSessionDefaults(manifestInference) {
285
+ const sessionDefaults = manifestInference?.sessionDefaults;
286
+ if (sessionDefaults == null) {
287
+ return;
288
+ }
289
+ if (typeof sessionDefaults !== 'object' || Array.isArray(sessionDefaults)) {
290
+ throw new Error(
291
+ 'converterConfig.inference.sessionDefaults must resolve to an object for non-execution manifests.'
292
+ );
293
+ }
294
+ const keys = Object.keys(sessionDefaults);
295
+ const invalidKeys = keys.filter((key) => key !== 'decodeLoop');
296
+ if (invalidKeys.length > 0) {
297
+ throw new Error(
298
+ 'converterConfig.inference.sessionDefaults may only set decodeLoop unless ' +
299
+ 'converterConfig.inference.execution is present.'
300
+ );
301
+ }
302
+ }
303
+
222
304
  function readConverterExecutionOverride(converterConfig) {
223
305
  const raw = converterConfig?.inference?.execution;
224
306
  if (raw == null) return null;
@@ -331,10 +413,10 @@ function applyConverterInferenceOverrides(manifestInference, converterConfig, co
331
413
  manifestInference.defaultKernelPath = overrideKernelPath;
332
414
  }
333
415
  const sessionDefaults = readConverterSessionDefaultsOverride(converterConfig);
416
+ const execution = readConverterExecutionOverride(converterConfig);
334
417
  if (sessionDefaults) {
335
418
  manifestInference.sessionDefaults = sessionDefaults;
336
419
  }
337
- const execution = readConverterExecutionOverride(converterConfig);
338
420
  if (execution) {
339
421
  manifestInference.execution = execution;
340
422
  }
@@ -351,17 +433,28 @@ function applyConverterInferenceOverrides(manifestInference, converterConfig, co
351
433
  const generatedExecution = buildExecutionV0FromKernelPath(manifestInference.defaultKernelPath);
352
434
  if (generatedExecution) {
353
435
  manifestInference.execution = generatedExecution.execution;
354
- if (!manifestInference.sessionDefaults) {
355
- manifestInference.sessionDefaults = generatedExecution.sessionDefaults;
356
- }
436
+ manifestInference.sessionDefaults = mergeExecutionV0SessionDefaults(
437
+ generatedExecution.sessionDefaults,
438
+ manifestInference.sessionDefaults
439
+ );
357
440
  manifestInference.schema = generatedExecution.schema;
358
441
  }
359
442
  }
360
443
 
361
- if (manifestInference.execution || sessionDefaults || execution) {
444
+ if (execution && !manifestInference.sessionDefaults) {
445
+ throw new Error(
446
+ 'converterConfig.inference.execution requires converterConfig.inference.sessionDefaults.'
447
+ );
448
+ }
449
+
450
+ if (manifestInference.execution) {
362
451
  manifestInference.schema = EXECUTION_V0_SCHEMA_ID;
452
+ } else {
453
+ assertNonExecutionSessionDefaults(manifestInference);
454
+ manifestInference.schema = null;
363
455
  }
364
456
  validateDefaultKernelPath(manifestInference, context);
457
+ assertExecutionV0ConversionContract(manifestInference, context?.modelId ?? context?.presetId);
365
458
  }
366
459
 
367
460
  export function resolveConversionPlan(options) {
@@ -418,7 +511,7 @@ export function resolveConversionPlan(options) {
418
511
  if (!presetOverride && isLikelyEmbeddingGemma(rawConfig, architectureHint)) {
419
512
  presetId = 'embeddinggemma';
420
513
  }
421
- if (presetId === 'transformer') {
514
+ if (!presetId) {
422
515
  throw buildUnknownFamilyError(architectureHint, rawConfig, options?.includePresetOverrideHint === true);
423
516
  }
424
517
  const preset = resolvePreset(presetId);
@@ -168,6 +168,13 @@ export declare const RDRR_VERSION: number;
168
168
  */
169
169
  export declare function sanitizeModelId(name: string): string | null;
170
170
 
171
+ /**
172
+ * Resolve bundled tokenizer vocab size from Hugging Face tokenizer.json payloads.
173
+ */
174
+ export declare function resolveBundledTokenizerVocabSize(
175
+ tokenizerJson: Record<string, unknown> | null | undefined
176
+ ): number;
177
+
171
178
  /**
172
179
  * Format bytes for human-readable display
173
180
  */
@@ -26,6 +26,7 @@ import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required
26
26
  import { buildManifestInference, inferEmbeddingOutputConfig } from './manifest-inference.js';
27
27
  import { resolveEosTokenId } from './tokenizer-utils.js';
28
28
  import {
29
+ normalizeQ4KLayout,
29
30
  resolveManifestQuantization,
30
31
  resolveEffectiveQuantizationInfo,
31
32
  } from './quantization-info.js';
@@ -122,11 +123,6 @@ function bf16ToFloat32(value) {
122
123
  return view.getFloat32(0, true);
123
124
  }
124
125
 
125
- function normalizeQ4KLayout(value) {
126
- const normalized = String(value || '').trim().toLowerCase();
127
- return normalized === 'col' ? 'col' : 'row';
128
- }
129
-
130
126
  function normalizeTensorName(tensor) {
131
127
  const name = tensor?.name;
132
128
  return typeof name === 'string' ? name : '';
@@ -495,6 +491,17 @@ function buildSentencepieceTokenizer(tokenizerConfig, rawConfig, architecture, m
495
491
  return tokenizer;
496
492
  }
497
493
 
494
+ export function resolveBundledTokenizerVocabSize(tokenizerJson) {
495
+ const vocab = tokenizerJson?.model?.vocab;
496
+ if (Array.isArray(vocab)) {
497
+ return vocab.length;
498
+ }
499
+ if (vocab && typeof vocab === 'object') {
500
+ return Object.keys(vocab).length;
501
+ }
502
+ return 0;
503
+ }
504
+
498
505
 
499
506
  export function sanitizeModelId(name) {
500
507
  const sanitized = name
@@ -976,6 +983,7 @@ export function createManifest(
976
983
  isDiffusion ? 'diffusion' : extractArchitecture(model.config, model.ggufConfig)
977
984
  );
978
985
  const rawConfig = model.config || {};
986
+ const generationConfig = model.generationConfig ?? null;
979
987
  const resolvedArchitecture = isDiffusion
980
988
  ? architecture
981
989
  : resolveIntermediateSizeFromTensors(architecture, model, tensorLocations, rawConfig, modelId);
@@ -988,7 +996,7 @@ export function createManifest(
988
996
  inference = { ...DEFAULT_MANIFEST_INFERENCE, presetId: 'diffusion' };
989
997
  } else {
990
998
  const presetId = detectPreset(rawConfig, model.architecture);
991
- if (presetId === 'transformer') {
999
+ if (!presetId) {
992
1000
  const modelType = rawConfig.model_type ?? 'unknown';
993
1001
  throw new Error(
994
1002
  `Unknown model family: architecture="${model.architecture || 'unknown'}", model_type="${modelType}"\n\n` +
@@ -1030,6 +1038,7 @@ export function createManifest(
1030
1038
  ? null
1031
1039
  : resolveEosTokenId({
1032
1040
  config: rawConfig,
1041
+ generationConfig,
1033
1042
  tokenizer: model.tokenizer ?? model.tokenizerConfig ?? null,
1034
1043
  tokenizerJson: model.tokenizerJson ?? null,
1035
1044
  });
@@ -1070,9 +1079,7 @@ export function createManifest(
1070
1079
  // Include tokenizer if available
1071
1080
  if (model.tokenizerJson) {
1072
1081
  const tokenizer = model.tokenizerJson;
1073
- const vocabSize =
1074
- tokenizer.model?.vocab?.length ||
1075
- Object.keys(tokenizer.model?.vocab || {}).length;
1082
+ const vocabSize = resolveBundledTokenizerVocabSize(tokenizer);
1076
1083
  if (!vocabSize) {
1077
1084
  throw new Error('Tokenizer vocab is missing or empty');
1078
1085
  }
@@ -104,7 +104,10 @@ function buildKernelProfiles(steps) {
104
104
  }
105
105
 
106
106
  function buildSessionDefaults(kernelPath) {
107
- const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(kernelPath)) ?? 'f16';
107
+ const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(kernelPath));
108
+ if (!activationDtype) {
109
+ throw new Error('execution-v0 manifest: kernel path is missing activationDtype.');
110
+ }
108
111
  const outputDtype = normalizeKernelDtype(getKernelPathOutputDtype(kernelPath)) ?? activationDtype;
109
112
  const kvDtype = normalizeKernelDtype(getKernelPathKVDtype(kernelPath)) ?? activationDtype;
110
113
  return {
@@ -28,6 +28,7 @@ export {
28
28
  RDRR_VERSION,
29
29
  ConvertStage,
30
30
  sanitizeModelId,
31
+ resolveBundledTokenizerVocabSize,
31
32
  formatBytes,
32
33
  shouldQuantize,
33
34
  normalizeStorageQuant,
@@ -6,6 +6,7 @@ export {
6
6
  RDRR_VERSION,
7
7
  ConvertStage,
8
8
  sanitizeModelId,
9
+ resolveBundledTokenizerVocabSize,
9
10
  formatBytes,
10
11
  shouldQuantize,
11
12
  normalizeStorageQuant,
@@ -240,16 +240,6 @@ function detectAttentionOutputGate(presetInference, modelConfig, defaults) {
240
240
  return modelConfig.attn_output_gate;
241
241
  }
242
242
 
243
- const modelType = normalizeLayerTypeName(modelConfig?.model_type);
244
- const hasLinearAttentionLayers = Array.isArray(modelConfig?.layer_types)
245
- && modelConfig.layer_types.some((entry) => normalizeCustomLayerType(entry) === 'linear_attention');
246
- if (
247
- hasLinearAttentionLayers
248
- && (modelType === 'qwen2' || modelType === 'qwen3_5' || modelType === 'qwen3_5_text')
249
- ) {
250
- return true;
251
- }
252
-
253
243
  return defaults.attention.attentionOutputGate;
254
244
  }
255
245
 
@@ -259,13 +249,23 @@ function resolveQueryPreAttnScalar(preset, modelConfig, headDim) {
259
249
  return explicit;
260
250
  }
261
251
 
262
- const modelType = normalizeLayerTypeName(modelConfig?.model_type);
263
- const presetId = normalizeLayerTypeName(preset?.id);
264
- if (modelType.startsWith('qwen') || presetId === 'qwen3') {
265
- return headDim;
252
+ // Standard attention scaling: attnScale = 1/sqrt(queryPreAttnScalar).
253
+ // For standard transformers queryPreAttnScalar = headDim, giving 1/sqrt(headDim).
254
+ // Preset may override for non-standard models.
255
+ const presetScalar = Number(preset?.inference?.attention?.queryPreAttnScalar);
256
+ if (Number.isFinite(presetScalar) && presetScalar > 0) {
257
+ return presetScalar;
258
+ }
259
+
260
+ return headDim;
261
+ }
262
+
263
+ function detectRmsNormWeightOffset(presetInference, modelConfig, defaults) {
264
+ if (typeof presetInference?.normalization?.rmsNormWeightOffset === 'boolean') {
265
+ return presetInference.normalization.rmsNormWeightOffset;
266
266
  }
267
267
 
268
- return Math.sqrt(headDim);
268
+ return defaults.normalization.rmsNormWeightOffset;
269
269
  }
270
270
 
271
271
  // Build normalization config with auto-detection from tensor names.
@@ -278,7 +278,7 @@ function buildNormalizationConfig(presetInference, modelConfig, defaults, tensor
278
278
  modelConfig.rms_norm_eps ??
279
279
  modelConfig.attentionLayerNormRMSEpsilon ??
280
280
  defaults.normalization.rmsNormEps,
281
- rmsNormWeightOffset: presetInference.normalization?.rmsNormWeightOffset ?? defaults.normalization.rmsNormWeightOffset,
281
+ rmsNormWeightOffset: detectRmsNormWeightOffset(presetInference, modelConfig, defaults),
282
282
  // For norm flags: auto-detected > preset > default
283
283
  postAttentionNorm: detected.postAttentionNorm ?? presetInference.normalization?.postAttentionNorm ?? defaults.normalization.postAttentionNorm,
284
284
  preFeedforwardNorm: detected.preFeedforwardNorm ?? presetInference.normalization?.preFeedforwardNorm ?? defaults.normalization.preFeedforwardNorm,
@@ -303,26 +303,44 @@ function resolveKernelPathFromPreset(presetInference, quantizationInfo, q4kLayou
303
303
  }
304
304
 
305
305
  const weightKey = normalizeKernelDtype(quantizationInfo?.weights);
306
- const computeKey = normalizeKernelDtype(quantizationInfo?.compute) ?? (quantizationInfo ? 'f16' : null);
307
-
308
- const entry = (weightKey && kernelPaths[weightKey]) || kernelPaths.default;
306
+ const computeKey = normalizeKernelDtype(quantizationInfo?.compute);
307
+ const hasWeightEntry = weightKey != null && Object.prototype.hasOwnProperty.call(kernelPaths, weightKey);
308
+ const entry = hasWeightEntry ? kernelPaths[weightKey] : kernelPaths.default;
309
+ const weightLabel = weightKey ? `.${weightKey}` : '';
309
310
  let resolved = null;
311
+ if (entry == null) {
312
+ return presetInference?.kernelPath ?? null;
313
+ }
314
+
310
315
  if (typeof entry === 'string') {
311
316
  resolved = entry;
312
- } else if (entry && computeKey && entry[computeKey]) {
317
+ } else if (entry && computeKey && Object.prototype.hasOwnProperty.call(entry, computeKey)) {
313
318
  resolved = entry[computeKey];
314
- } else if (entry && entry.default) {
319
+ } else if (entry && typeof entry === 'object' && !Array.isArray(entry) && Object.prototype.hasOwnProperty.call(entry, 'default')) {
315
320
  resolved = entry.default;
321
+ } else if (entry && typeof entry === 'object' && !Array.isArray(entry) && !computeKey) {
322
+ throw new Error(
323
+ `Preset kernelPaths${weightLabel} requires quantizationInfo.compute ` +
324
+ 'to resolve a compute-specific defaultKernelPath.'
325
+ );
326
+ } else if (entry && typeof entry === 'object' && !Array.isArray(entry)) {
327
+ throw new Error(
328
+ `Preset kernelPaths${weightLabel} is missing compute "${computeKey}". ` +
329
+ 'Add an explicit compute-specific mapping or default instead of relying on JS fallbacks.'
330
+ );
316
331
  } else {
317
- resolved = presetInference?.kernelPath ?? null;
332
+ throw new Error(
333
+ `Preset kernelPaths${weightLabel} must resolve to a string or object.`
334
+ );
318
335
  }
319
336
 
320
- // When q4kLayout is 'col' (column-wise), fused Q4K kernels cannot be used.
321
- // Try to find a corresponding dequant kernel path.
337
+ // Column-wise Q4K must be mapped explicitly in preset JSON; JS must not
338
+ // rewrite kernel-path ids to infer policy.
322
339
  if (resolved && q4kLayout === 'col' && resolved.includes('-fused-')) {
323
- const dequantPath = resolved.replace('-fused-', '-dequant-');
324
- // Return dequant variant (caller should verify it exists)
325
- return dequantPath;
340
+ throw new Error(
341
+ `Preset kernelPaths${weightKey ? `.${weightKey}` : ''} resolved fused kernel path "${resolved}" ` +
342
+ 'for q4k layout "col". Add an explicit dequant kernel path mapping to the preset instead of relying on JS rewrites.'
343
+ );
326
344
  }
327
345
 
328
346
  return resolved;
@@ -354,8 +372,8 @@ export function buildManifestInference(preset, config, headDim = 64, quantizatio
354
372
  queryPreAttnScalar: resolveQueryPreAttnScalar(preset, modelConfig, headDim),
355
373
  attnLogitSoftcapping: presetInference.attention?.attnLogitSoftcapping ??
356
374
  modelConfig.attn_logit_softcapping ?? defaults.attention.attnLogitSoftcapping,
357
- slidingWindow: presetInference.attention?.slidingWindow ??
358
- modelConfig.sliding_window ?? defaults.attention.slidingWindow,
375
+ slidingWindow: modelConfig.sliding_window ??
376
+ presetInference.attention?.slidingWindow ?? defaults.attention.slidingWindow,
359
377
  queryKeyNorm: presetInference.attention?.queryKeyNorm ?? defaults.attention.queryKeyNorm,
360
378
  attentionOutputGate: detectAttentionOutputGate(presetInference, modelConfig, defaults),
361
379
  causal: detectedCausalAttention ?? presetInference.attention?.causal ?? defaults.attention.causal,
@@ -428,6 +446,9 @@ export function buildManifestInference(preset, config, headDim = 64, quantizatio
428
446
  );
429
447
  }
430
448
  globalPattern = null;
449
+ // Default offset 0 means first global layer at index 0 (most common pattern).
450
+ // This is the every_n pattern default, distinct from layerPattern.offset=null
451
+ // which means "not applicable" in the schema.
431
452
  offset = (
432
453
  detectEveryNOffsetFromLayerTypes(modelConfig.layer_types, period)
433
454
  ?? normalizeEveryNOffset(presetPattern.offset, period)
@@ -261,9 +261,6 @@ export async function parseDiffusionModel(adapter) {
261
261
  }
262
262
  const configSuffix = defaultConfigPath(componentId);
263
263
  const config = await readJson(configSuffix, `${componentId} config`);
264
- if (componentId === 'transformer' && config && !config.weight_format) {
265
- config.weight_format = 'diffusers';
266
- }
267
264
  diffusionConfig.components[componentId] = {
268
265
  ...(diffusionConfig.components[componentId] || {}),
269
266
  config,
@@ -7,6 +7,9 @@ export async function parseTransformerModel(adapter) {
7
7
  } = adapter;
8
8
 
9
9
  const config = await readJson('config.json', 'config.json');
10
+ const generationConfig = await fileExists('generation_config.json')
11
+ ? await readJson('generation_config.json', 'generation_config.json')
12
+ : null;
10
13
  const architectureHint = config.architectures?.[0] ?? config.model_type ?? '';
11
14
 
12
15
  let tensors = null;
@@ -19,6 +22,7 @@ export async function parseTransformerModel(adapter) {
19
22
 
20
23
  return {
21
24
  config,
25
+ generationConfig,
22
26
  tensors,
23
27
  architectureHint,
24
28
  };