@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +25 -17
  3. package/package.json +20 -4
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +39 -39
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +49 -7
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +43 -4
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +28 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/models/qwen3.json +9 -2
  45. package/src/config/presets/models/transformer.json +5 -0
  46. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  47. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  48. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  49. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  50. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  52. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  54. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  55. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  56. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  57. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  58. package/src/config/required-inference-fields-contract-check.js +6 -0
  59. package/src/config/runtime.js +6 -1
  60. package/src/config/schema/debug.schema.d.ts +5 -0
  61. package/src/config/schema/doppler.schema.js +16 -21
  62. package/src/config/schema/inference-defaults.schema.js +6 -3
  63. package/src/config/schema/inference.schema.d.ts +9 -0
  64. package/src/config/schema/kernel-path.schema.d.ts +11 -1
  65. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  66. package/src/config/schema/manifest.schema.d.ts +8 -1
  67. package/src/config/schema/manifest.schema.js +19 -3
  68. package/src/config/training-defaults.js +30 -22
  69. package/src/converter/conversion-plan.js +94 -9
  70. package/src/converter/core.d.ts +7 -0
  71. package/src/converter/core.js +14 -9
  72. package/src/converter/execution-v0-manifest.js +4 -1
  73. package/src/converter/index.d.ts +1 -0
  74. package/src/converter/index.js +1 -0
  75. package/src/converter/manifest-inference.js +43 -12
  76. package/src/converter/parsers/diffusion.js +0 -3
  77. package/src/converter/quantization-info.js +35 -15
  78. package/src/converter/rope-config.js +42 -0
  79. package/src/converter/shard-packer.d.ts +1 -1
  80. package/src/converter/shard-packer.js +4 -1
  81. package/src/debug/config.js +123 -11
  82. package/src/debug/signals.js +7 -1
  83. package/src/debug/tensor.d.ts +2 -0
  84. package/src/debug/tensor.js +13 -2
  85. package/src/distribution/p2p-control-plane.js +52 -12
  86. package/src/distribution/p2p-observability.js +43 -7
  87. package/src/distribution/p2p-webrtc-browser.js +20 -0
  88. package/src/distribution/shard-delivery.js +77 -26
  89. package/src/formats/gguf/types.js +33 -16
  90. package/src/formats/rdrr/groups.d.ts +12 -4
  91. package/src/formats/rdrr/groups.js +3 -6
  92. package/src/formats/rdrr/parsing.js +39 -2
  93. package/src/formats/rdrr/types.d.ts +2 -1
  94. package/src/gpu/command-recorder.js +86 -61
  95. package/src/gpu/device.d.ts +1 -0
  96. package/src/gpu/device.js +131 -19
  97. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  98. package/src/gpu/kernel-tuner/cache.js +71 -4
  99. package/src/gpu/kernel-tuner/tuner.js +22 -4
  100. package/src/gpu/kernels/attention.js +113 -34
  101. package/src/gpu/kernels/backward/adam.js +62 -58
  102. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  103. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  104. package/src/gpu/kernels/bias_add.wgsl +8 -6
  105. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  106. package/src/gpu/kernels/cast.js +191 -149
  107. package/src/gpu/kernels/check-stop.js +33 -44
  108. package/src/gpu/kernels/conv2d.js +27 -17
  109. package/src/gpu/kernels/conv2d.wgsl +7 -8
  110. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  111. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  112. package/src/gpu/kernels/depthwise_conv2d.js +37 -26
  113. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  114. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  115. package/src/gpu/kernels/dequant.js +178 -126
  116. package/src/gpu/kernels/energy.d.ts +3 -21
  117. package/src/gpu/kernels/energy.js +111 -88
  118. package/src/gpu/kernels/feature-check.js +1 -1
  119. package/src/gpu/kernels/fused_ffn.js +84 -65
  120. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  121. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  122. package/src/gpu/kernels/gather.js +33 -15
  123. package/src/gpu/kernels/gelu.js +19 -11
  124. package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
  125. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  126. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  127. package/src/gpu/kernels/groupnorm.js +34 -23
  128. package/src/gpu/kernels/kv-quantize.js +5 -2
  129. package/src/gpu/kernels/layernorm.js +35 -19
  130. package/src/gpu/kernels/logit-merge.js +5 -3
  131. package/src/gpu/kernels/matmul.js +83 -39
  132. package/src/gpu/kernels/modulate.js +23 -15
  133. package/src/gpu/kernels/moe.js +221 -175
  134. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  135. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  136. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  137. package/src/gpu/kernels/relu.js +31 -10
  138. package/src/gpu/kernels/relu.wgsl +2 -1
  139. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  140. package/src/gpu/kernels/repeat_channels.js +25 -17
  141. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  142. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  143. package/src/gpu/kernels/residual.js +69 -23
  144. package/src/gpu/kernels/residual.wgsl +6 -3
  145. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  146. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  147. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  148. package/src/gpu/kernels/rmsnorm.js +96 -28
  149. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  150. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  151. package/src/gpu/kernels/rope.d.ts +2 -0
  152. package/src/gpu/kernels/rope.js +14 -1
  153. package/src/gpu/kernels/rope.wgsl +56 -40
  154. package/src/gpu/kernels/sample.js +27 -38
  155. package/src/gpu/kernels/sana_linear_attention.js +19 -12
  156. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  157. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  158. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  159. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  160. package/src/gpu/kernels/scale.js +18 -11
  161. package/src/gpu/kernels/shader-cache.js +4 -2
  162. package/src/gpu/kernels/silu.d.ts +1 -0
  163. package/src/gpu/kernels/silu.js +148 -82
  164. package/src/gpu/kernels/silu.wgsl +19 -9
  165. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  166. package/src/gpu/kernels/softmax.js +44 -25
  167. package/src/gpu/kernels/split_qkv.js +23 -13
  168. package/src/gpu/kernels/transpose.js +31 -10
  169. package/src/gpu/kernels/transpose.wgsl +6 -5
  170. package/src/gpu/kernels/upsample2d.js +22 -13
  171. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  172. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  173. package/src/gpu/kernels/utils.js +35 -13
  174. package/src/gpu/partitioned-buffer-pool.js +10 -2
  175. package/src/gpu/perf-guards.js +2 -9
  176. package/src/gpu/profiler.js +27 -22
  177. package/src/gpu/readback-utils.d.ts +16 -0
  178. package/src/gpu/readback-utils.js +41 -0
  179. package/src/gpu/submit-tracker.js +13 -0
  180. package/src/gpu/uniform-cache.d.ts +1 -0
  181. package/src/gpu/uniform-cache.js +30 -9
  182. package/src/hotswap/intent-bundle.js +6 -0
  183. package/src/hotswap/manifest.d.ts +10 -1
  184. package/src/hotswap/manifest.js +12 -2
  185. package/src/hotswap/runtime.js +30 -8
  186. package/src/index-browser.d.ts +44 -0
  187. package/src/index-browser.js +14 -0
  188. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  189. package/src/inference/browser-harness-contract-helpers.js +28 -0
  190. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  191. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  192. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  193. package/src/inference/browser-harness-model-helpers.js +217 -0
  194. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  195. package/src/inference/browser-harness-report-helpers.js +42 -0
  196. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  197. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  198. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  199. package/src/inference/browser-harness-suite-helpers.js +268 -0
  200. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  201. package/src/inference/browser-harness-text-helpers.js +788 -0
  202. package/src/inference/browser-harness.d.ts +6 -0
  203. package/src/inference/browser-harness.js +130 -1950
  204. package/src/inference/kv-cache/base.js +140 -94
  205. package/src/inference/kv-cache/tiered.js +5 -3
  206. package/src/inference/moe-router.js +88 -56
  207. package/src/inference/multi-model-network.js +5 -3
  208. package/src/inference/network-evolution.d.ts +11 -2
  209. package/src/inference/network-evolution.js +20 -21
  210. package/src/inference/pipelines/context.d.ts +3 -0
  211. package/src/inference/pipelines/context.js +142 -2
  212. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  213. package/src/inference/pipelines/diffusion/pipeline.js +17 -7
  214. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  215. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  216. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  217. package/src/inference/pipelines/diffusion/vae.js +3 -7
  218. package/src/inference/pipelines/energy/pipeline.js +27 -21
  219. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  220. package/src/inference/pipelines/energy/quintel.js +11 -0
  221. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  222. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  223. package/src/inference/pipelines/text/attention/projections.js +151 -101
  224. package/src/inference/pipelines/text/attention/record.js +73 -10
  225. package/src/inference/pipelines/text/attention/run.js +73 -10
  226. package/src/inference/pipelines/text/chat-format.js +25 -1
  227. package/src/inference/pipelines/text/config.d.ts +4 -0
  228. package/src/inference/pipelines/text/config.js +71 -5
  229. package/src/inference/pipelines/text/embed.js +2 -8
  230. package/src/inference/pipelines/text/execution-plan.js +64 -50
  231. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  232. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  233. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  234. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  235. package/src/inference/pipelines/text/execution-v0.js +78 -1002
  236. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  237. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  238. package/src/inference/pipelines/text/generator-steps.js +298 -207
  239. package/src/inference/pipelines/text/generator.js +6 -23
  240. package/src/inference/pipelines/text/init.d.ts +4 -0
  241. package/src/inference/pipelines/text/init.js +134 -29
  242. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  243. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  244. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  245. package/src/inference/pipelines/text/layer.js +14 -9
  246. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  247. package/src/inference/pipelines/text/linear-attention.js +80 -6
  248. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  249. package/src/inference/pipelines/text/logits/index.js +10 -11
  250. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  251. package/src/inference/pipelines/text/logits/utils.js +9 -0
  252. package/src/inference/pipelines/text/lora-apply.js +50 -32
  253. package/src/inference/pipelines/text/model-load.js +279 -104
  254. package/src/inference/pipelines/text/moe-cache.js +5 -4
  255. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  256. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  257. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  258. package/src/inference/pipelines/text/ops.js +90 -90
  259. package/src/inference/pipelines/text/probes.js +9 -9
  260. package/src/inference/pipelines/text/weights.js +17 -7
  261. package/src/inference/pipelines/text.js +17 -1
  262. package/src/inference/speculative.d.ts +2 -2
  263. package/src/inference/speculative.js +4 -18
  264. package/src/inference/test-harness.d.ts +1 -1
  265. package/src/inference/test-harness.js +15 -5
  266. package/src/inference/tokenizer.d.ts +0 -5
  267. package/src/inference/tokenizer.js +4 -23
  268. package/src/inference/tokenizers/bpe.js +9 -0
  269. package/src/inference/tokenizers/bundled.js +176 -33
  270. package/src/inference/tokenizers/sentencepiece.js +12 -0
  271. package/src/loader/doppler-loader.js +38 -22
  272. package/src/loader/dtype-utils.js +3 -44
  273. package/src/loader/embedding-loader.js +7 -3
  274. package/src/loader/experts/expert-cache.js +13 -6
  275. package/src/loader/experts/expert-loader.js +10 -6
  276. package/src/loader/final-weights-loader.js +8 -4
  277. package/src/loader/layer-loader.js +2 -1
  278. package/src/loader/loader-state.js +2 -2
  279. package/src/loader/memory-monitor.js +8 -0
  280. package/src/loader/multi-model-loader.d.ts +14 -0
  281. package/src/loader/multi-model-loader.js +70 -24
  282. package/src/loader/shard-cache.js +81 -12
  283. package/src/loader/shard-resolver.js +25 -3
  284. package/src/loader/tensors/tensor-loader.js +209 -144
  285. package/src/loader/tensors/tensor-reader.js +76 -19
  286. package/src/loader/weight-downcast.js +1 -1
  287. package/src/memory/buffer-pool.d.ts +9 -1
  288. package/src/memory/buffer-pool.js +109 -44
  289. package/src/memory/unified-detect.js +1 -1
  290. package/src/rules/inference/kernel-path.rules.json +24 -8
  291. package/src/rules/rule-registry.js +25 -1
  292. package/src/rules/tooling/command-runtime.rules.json +18 -0
  293. package/src/storage/backends/opfs-store.js +68 -24
  294. package/src/storage/downloader.js +364 -83
  295. package/src/storage/index.d.ts +3 -0
  296. package/src/storage/index.js +3 -0
  297. package/src/storage/preflight.d.ts +2 -2
  298. package/src/storage/preflight.js +24 -2
  299. package/src/storage/quickstart-downloader.js +11 -5
  300. package/src/storage/registry.js +10 -4
  301. package/src/storage/reports.js +1 -1
  302. package/src/storage/shard-manager.d.ts +15 -1
  303. package/src/storage/shard-manager.js +51 -3
  304. package/src/storage/source-artifact-store.d.ts +52 -0
  305. package/src/storage/source-artifact-store.js +234 -0
  306. package/src/tooling/command-api-constants.d.ts +9 -0
  307. package/src/tooling/command-api-constants.js +9 -0
  308. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  309. package/src/tooling/command-api-family-normalizers.js +343 -0
  310. package/src/tooling/command-api-helpers.d.ts +25 -0
  311. package/src/tooling/command-api-helpers.js +262 -0
  312. package/src/tooling/command-api.d.ts +27 -1
  313. package/src/tooling/command-api.js +26 -473
  314. package/src/tooling/command-envelope.js +4 -1
  315. package/src/tooling/command-runner-shared.js +52 -18
  316. package/src/tooling/lean-execution-contract.js +150 -3
  317. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  318. package/src/tooling/node-browser-command-runner.js +218 -273
  319. package/src/tooling/node-command-runner.js +44 -3
  320. package/src/tooling/node-converter.js +27 -1
  321. package/src/tooling/node-source-runtime.d.ts +1 -1
  322. package/src/tooling/node-source-runtime.js +84 -3
  323. package/src/tooling/node-webgpu.js +30 -105
  324. package/src/tooling/opfs-cache.js +21 -4
  325. package/src/tooling/runtime-input-composition.d.ts +38 -0
  326. package/src/tooling/runtime-input-composition.js +86 -0
  327. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  328. package/src/tooling/source-runtime-bundle.js +261 -34
  329. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  330. package/src/tooling/source-runtime-materializer.js +93 -0
  331. package/src/training/attention-backward.js +32 -17
  332. package/src/training/autograd.js +80 -52
  333. package/src/training/checkpoint-watch.d.ts +8 -0
  334. package/src/training/checkpoint-watch.js +139 -0
  335. package/src/training/checkpoint.d.ts +6 -1
  336. package/src/training/checkpoint.js +46 -7
  337. package/src/training/clip.js +2 -1
  338. package/src/training/datasets/token-batch.js +20 -8
  339. package/src/training/distillation/artifacts.d.ts +71 -0
  340. package/src/training/distillation/artifacts.js +132 -0
  341. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  342. package/src/training/distillation/checkpoint-watch.js +58 -0
  343. package/src/training/distillation/dataset.d.ts +59 -0
  344. package/src/training/distillation/dataset.js +337 -0
  345. package/src/training/distillation/eval.d.ts +34 -0
  346. package/src/training/distillation/eval.js +310 -0
  347. package/src/training/distillation/index.d.ts +29 -0
  348. package/src/training/distillation/index.js +29 -0
  349. package/src/training/distillation/runtime.d.ts +20 -0
  350. package/src/training/distillation/runtime.js +121 -0
  351. package/src/training/distillation/scoreboard.d.ts +6 -0
  352. package/src/training/distillation/scoreboard.js +8 -0
  353. package/src/training/distillation/stage-a.d.ts +45 -0
  354. package/src/training/distillation/stage-a.js +338 -0
  355. package/src/training/distillation/stage-b.d.ts +24 -0
  356. package/src/training/distillation/stage-b.js +20 -0
  357. package/src/training/distillation/student-fixture.d.ts +22 -0
  358. package/src/training/distillation/student-fixture.js +846 -0
  359. package/src/training/distillation/suite-data.d.ts +45 -0
  360. package/src/training/distillation/suite-data.js +189 -0
  361. package/src/training/index.d.ts +10 -0
  362. package/src/training/index.js +10 -0
  363. package/src/training/lora-pipeline.d.ts +40 -0
  364. package/src/training/lora-pipeline.js +793 -0
  365. package/src/training/lora.js +26 -12
  366. package/src/training/loss.js +5 -6
  367. package/src/training/objectives/cross_entropy.js +2 -5
  368. package/src/training/objectives/distill_kd.js +4 -8
  369. package/src/training/objectives/distill_triplet.js +4 -8
  370. package/src/training/objectives/ul_stage2_base.js +4 -8
  371. package/src/training/operator-artifacts.d.ts +62 -0
  372. package/src/training/operator-artifacts.js +140 -0
  373. package/src/training/operator-command.d.ts +5 -0
  374. package/src/training/operator-command.js +455 -0
  375. package/src/training/operator-eval.d.ts +48 -0
  376. package/src/training/operator-eval.js +230 -0
  377. package/src/training/operator-scoreboard.d.ts +5 -0
  378. package/src/training/operator-scoreboard.js +44 -0
  379. package/src/training/optimizer.js +19 -7
  380. package/src/training/runner.d.ts +52 -0
  381. package/src/training/runner.js +31 -5
  382. package/src/training/suite.d.ts +112 -0
  383. package/src/training/suite.js +24 -984
  384. package/src/training/tensor-factory.d.ts +9 -0
  385. package/src/training/tensor-factory.js +13 -0
  386. package/src/training/trainer.js +3 -5
  387. package/src/training/ul_dataset.js +3 -5
  388. package/src/training/workloads.d.ts +164 -0
  389. package/src/training/workloads.js +530 -0
  390. package/src/version.js +1 -1
  391. package/tools/convert-safetensors-node.js +22 -16
  392. package/tools/doppler-cli.js +179 -63
@@ -17,6 +17,8 @@ import { sanitizeModelId } from './core.js';
17
17
  import { classifyTensorRole } from '../formats/rdrr/index.js';
18
18
  import { selectRuleValue } from '../rules/rule-registry.js';
19
19
  import { buildKernelRefFromKernelEntry, isKernelRefBoundToKernel } from '../config/kernels/kernel-ref.js';
20
+ import { mergeLayeredShallowObjects } from '../config/merge-helpers.js';
21
+ import { buildExecutionV0ContractArtifact } from '../config/execution-v0-contract-check.js';
20
22
 
21
23
  const KNOWN_MODEL_PRESETS = new Set(listPresets());
22
24
  const CONVERSION_SUPPORTED_PRESETS = [...KNOWN_MODEL_PRESETS]
@@ -179,9 +181,6 @@ export function validateDefaultKernelPath(inference, context = {}) {
179
181
  && expectedComputeDtype !== kernelActivationDtype
180
182
  ) {
181
183
  const presetId = context?.presetId ?? 'unknown';
182
- if (presetId === 'lfm2' && expectedComputeDtype === 'f32' && kernelActivationDtype === 'f16') {
183
- return;
184
- }
185
184
  throw new Error(
186
185
  `Invalid defaultKernelPath "${inference.defaultKernelPath}" for preset "${presetId}" ` +
187
186
  `(weights=${quantizationInfo?.weights ?? 'unknown'}, compute=${expectedComputeDtype}, ` +
@@ -208,6 +207,61 @@ function cloneJson(value) {
208
207
  return JSON.parse(JSON.stringify(value));
209
208
  }
210
209
 
210
+ function mergeExecutionV0SessionDefaults(baseSessionDefaults, overrideSessionDefaults) {
211
+ if (!overrideSessionDefaults) {
212
+ return cloneJson(baseSessionDefaults);
213
+ }
214
+ const base = cloneJson(baseSessionDefaults ?? {});
215
+ const override = cloneJson(overrideSessionDefaults);
216
+ const baseCompute = base.compute ?? {};
217
+ const overrideCompute = override.compute ?? {};
218
+
219
+ return {
220
+ ...base,
221
+ ...override,
222
+ compute: {
223
+ ...baseCompute,
224
+ ...overrideCompute,
225
+ defaults: mergeLayeredShallowObjects(
226
+ baseCompute.defaults ?? {},
227
+ overrideCompute.defaults ?? {}
228
+ ),
229
+ kernelProfiles: Object.prototype.hasOwnProperty.call(overrideCompute, 'kernelProfiles')
230
+ ? overrideCompute.kernelProfiles
231
+ : baseCompute.kernelProfiles,
232
+ },
233
+ kvcache: Object.prototype.hasOwnProperty.call(override, 'kvcache')
234
+ ? (
235
+ override.kvcache === null
236
+ ? null
237
+ : mergeLayeredShallowObjects(base.kvcache ?? {}, override.kvcache ?? {})
238
+ )
239
+ : base.kvcache,
240
+ decodeLoop: Object.prototype.hasOwnProperty.call(override, 'decodeLoop')
241
+ ? (
242
+ override.decodeLoop === null
243
+ ? null
244
+ : mergeLayeredShallowObjects(base.decodeLoop ?? {}, override.decodeLoop ?? {})
245
+ )
246
+ : base.decodeLoop,
247
+ };
248
+ }
249
+
250
+ function assertExecutionV0ConversionContract(manifestInference, modelId) {
251
+ if (!manifestInference?.execution) {
252
+ return;
253
+ }
254
+ const artifact = buildExecutionV0ContractArtifact(manifestInference, {
255
+ modelId: modelId ?? 'converted-model',
256
+ });
257
+ if (!artifact?.ok) {
258
+ const detail = artifact?.errors?.join(' ') ?? 'unknown execution-v0 contract error';
259
+ throw new Error(
260
+ `converterConfig.inference produced an invalid execution-v0 contract: ${detail}`
261
+ );
262
+ }
263
+ }
264
+
211
265
  function readConverterSessionDefaultsOverride(converterConfig) {
212
266
  const raw = converterConfig?.inference?.sessionDefaults;
213
267
  if (raw == null) return null;
@@ -219,6 +273,26 @@ function readConverterSessionDefaultsOverride(converterConfig) {
219
273
  return cloneJson(raw);
220
274
  }
221
275
 
276
+ function assertNonExecutionSessionDefaults(manifestInference) {
277
+ const sessionDefaults = manifestInference?.sessionDefaults;
278
+ if (sessionDefaults == null) {
279
+ return;
280
+ }
281
+ if (typeof sessionDefaults !== 'object' || Array.isArray(sessionDefaults)) {
282
+ throw new Error(
283
+ 'converterConfig.inference.sessionDefaults must resolve to an object for non-execution manifests.'
284
+ );
285
+ }
286
+ const keys = Object.keys(sessionDefaults);
287
+ const invalidKeys = keys.filter((key) => key !== 'decodeLoop');
288
+ if (invalidKeys.length > 0) {
289
+ throw new Error(
290
+ 'converterConfig.inference.sessionDefaults may only set decodeLoop unless ' +
291
+ 'converterConfig.inference.execution is present.'
292
+ );
293
+ }
294
+ }
295
+
222
296
  function readConverterExecutionOverride(converterConfig) {
223
297
  const raw = converterConfig?.inference?.execution;
224
298
  if (raw == null) return null;
@@ -331,10 +405,10 @@ function applyConverterInferenceOverrides(manifestInference, converterConfig, co
331
405
  manifestInference.defaultKernelPath = overrideKernelPath;
332
406
  }
333
407
  const sessionDefaults = readConverterSessionDefaultsOverride(converterConfig);
408
+ const execution = readConverterExecutionOverride(converterConfig);
334
409
  if (sessionDefaults) {
335
410
  manifestInference.sessionDefaults = sessionDefaults;
336
411
  }
337
- const execution = readConverterExecutionOverride(converterConfig);
338
412
  if (execution) {
339
413
  manifestInference.execution = execution;
340
414
  }
@@ -351,17 +425,28 @@ function applyConverterInferenceOverrides(manifestInference, converterConfig, co
351
425
  const generatedExecution = buildExecutionV0FromKernelPath(manifestInference.defaultKernelPath);
352
426
  if (generatedExecution) {
353
427
  manifestInference.execution = generatedExecution.execution;
354
- if (!manifestInference.sessionDefaults) {
355
- manifestInference.sessionDefaults = generatedExecution.sessionDefaults;
356
- }
428
+ manifestInference.sessionDefaults = mergeExecutionV0SessionDefaults(
429
+ generatedExecution.sessionDefaults,
430
+ manifestInference.sessionDefaults
431
+ );
357
432
  manifestInference.schema = generatedExecution.schema;
358
433
  }
359
434
  }
360
435
 
361
- if (manifestInference.execution || sessionDefaults || execution) {
436
+ if (execution && !manifestInference.sessionDefaults) {
437
+ throw new Error(
438
+ 'converterConfig.inference.execution requires converterConfig.inference.sessionDefaults.'
439
+ );
440
+ }
441
+
442
+ if (manifestInference.execution) {
362
443
  manifestInference.schema = EXECUTION_V0_SCHEMA_ID;
444
+ } else {
445
+ assertNonExecutionSessionDefaults(manifestInference);
446
+ manifestInference.schema = null;
363
447
  }
364
448
  validateDefaultKernelPath(manifestInference, context);
449
+ assertExecutionV0ConversionContract(manifestInference, context?.modelId ?? context?.presetId);
365
450
  }
366
451
 
367
452
  export function resolveConversionPlan(options) {
@@ -418,7 +503,7 @@ export function resolveConversionPlan(options) {
418
503
  if (!presetOverride && isLikelyEmbeddingGemma(rawConfig, architectureHint)) {
419
504
  presetId = 'embeddinggemma';
420
505
  }
421
- if (presetId === 'transformer') {
506
+ if (!presetId) {
422
507
  throw buildUnknownFamilyError(architectureHint, rawConfig, options?.includePresetOverrideHint === true);
423
508
  }
424
509
  const preset = resolvePreset(presetId);
@@ -168,6 +168,13 @@ export declare const RDRR_VERSION: number;
168
168
  */
169
169
  export declare function sanitizeModelId(name: string): string | null;
170
170
 
171
+ /**
172
+ * Resolve bundled tokenizer vocab size from Hugging Face tokenizer.json payloads.
173
+ */
174
+ export declare function resolveBundledTokenizerVocabSize(
175
+ tokenizerJson: Record<string, unknown> | null | undefined
176
+ ): number;
177
+
171
178
  /**
172
179
  * Format bytes for human-readable display
173
180
  */
@@ -26,6 +26,7 @@ import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required
26
26
  import { buildManifestInference, inferEmbeddingOutputConfig } from './manifest-inference.js';
27
27
  import { resolveEosTokenId } from './tokenizer-utils.js';
28
28
  import {
29
+ normalizeQ4KLayout,
29
30
  resolveManifestQuantization,
30
31
  resolveEffectiveQuantizationInfo,
31
32
  } from './quantization-info.js';
@@ -122,11 +123,6 @@ function bf16ToFloat32(value) {
122
123
  return view.getFloat32(0, true);
123
124
  }
124
125
 
125
- function normalizeQ4KLayout(value) {
126
- const normalized = String(value || '').trim().toLowerCase();
127
- return normalized === 'col' ? 'col' : 'row';
128
- }
129
-
130
126
  function normalizeTensorName(tensor) {
131
127
  const name = tensor?.name;
132
128
  return typeof name === 'string' ? name : '';
@@ -495,6 +491,17 @@ function buildSentencepieceTokenizer(tokenizerConfig, rawConfig, architecture, m
495
491
  return tokenizer;
496
492
  }
497
493
 
494
+ export function resolveBundledTokenizerVocabSize(tokenizerJson) {
495
+ const vocab = tokenizerJson?.model?.vocab;
496
+ if (Array.isArray(vocab)) {
497
+ return vocab.length;
498
+ }
499
+ if (vocab && typeof vocab === 'object') {
500
+ return Object.keys(vocab).length;
501
+ }
502
+ return 0;
503
+ }
504
+
498
505
 
499
506
  export function sanitizeModelId(name) {
500
507
  const sanitized = name
@@ -988,7 +995,7 @@ export function createManifest(
988
995
  inference = { ...DEFAULT_MANIFEST_INFERENCE, presetId: 'diffusion' };
989
996
  } else {
990
997
  const presetId = detectPreset(rawConfig, model.architecture);
991
- if (presetId === 'transformer') {
998
+ if (!presetId) {
992
999
  const modelType = rawConfig.model_type ?? 'unknown';
993
1000
  throw new Error(
994
1001
  `Unknown model family: architecture="${model.architecture || 'unknown'}", model_type="${modelType}"\n\n` +
@@ -1070,9 +1077,7 @@ export function createManifest(
1070
1077
  // Include tokenizer if available
1071
1078
  if (model.tokenizerJson) {
1072
1079
  const tokenizer = model.tokenizerJson;
1073
- const vocabSize =
1074
- tokenizer.model?.vocab?.length ||
1075
- Object.keys(tokenizer.model?.vocab || {}).length;
1080
+ const vocabSize = resolveBundledTokenizerVocabSize(tokenizer);
1076
1081
  if (!vocabSize) {
1077
1082
  throw new Error('Tokenizer vocab is missing or empty');
1078
1083
  }
@@ -104,7 +104,10 @@ function buildKernelProfiles(steps) {
104
104
  }
105
105
 
106
106
  function buildSessionDefaults(kernelPath) {
107
- const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(kernelPath)) ?? 'f16';
107
+ const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(kernelPath));
108
+ if (!activationDtype) {
109
+ throw new Error('execution-v0 manifest: kernel path is missing activationDtype.');
110
+ }
108
111
  const outputDtype = normalizeKernelDtype(getKernelPathOutputDtype(kernelPath)) ?? activationDtype;
109
112
  const kvDtype = normalizeKernelDtype(getKernelPathKVDtype(kernelPath)) ?? activationDtype;
110
113
  return {
@@ -28,6 +28,7 @@ export {
28
28
  RDRR_VERSION,
29
29
  ConvertStage,
30
30
  sanitizeModelId,
31
+ resolveBundledTokenizerVocabSize,
31
32
  formatBytes,
32
33
  shouldQuantize,
33
34
  normalizeStorageQuant,
@@ -6,6 +6,7 @@ export {
6
6
  RDRR_VERSION,
7
7
  ConvertStage,
8
8
  sanitizeModelId,
9
+ resolveBundledTokenizerVocabSize,
9
10
  formatBytes,
10
11
  shouldQuantize,
11
12
  normalizeStorageQuant,
@@ -268,6 +268,19 @@ function resolveQueryPreAttnScalar(preset, modelConfig, headDim) {
268
268
  return Math.sqrt(headDim);
269
269
  }
270
270
 
271
+ function detectRmsNormWeightOffset(presetInference, modelConfig, defaults) {
272
+ const modelType = normalizeLayerTypeName(modelConfig?.model_type);
273
+ if (modelType === 'qwen3_5' || modelType === 'qwen3_5_text') {
274
+ return true;
275
+ }
276
+
277
+ if (typeof presetInference?.normalization?.rmsNormWeightOffset === 'boolean') {
278
+ return presetInference.normalization.rmsNormWeightOffset;
279
+ }
280
+
281
+ return defaults.normalization.rmsNormWeightOffset;
282
+ }
283
+
271
284
  // Build normalization config with auto-detection from tensor names.
272
285
  // Priority: auto-detected > preset > default
273
286
  function buildNormalizationConfig(presetInference, modelConfig, defaults, tensorNames) {
@@ -278,7 +291,7 @@ function buildNormalizationConfig(presetInference, modelConfig, defaults, tensor
278
291
  modelConfig.rms_norm_eps ??
279
292
  modelConfig.attentionLayerNormRMSEpsilon ??
280
293
  defaults.normalization.rmsNormEps,
281
- rmsNormWeightOffset: presetInference.normalization?.rmsNormWeightOffset ?? defaults.normalization.rmsNormWeightOffset,
294
+ rmsNormWeightOffset: detectRmsNormWeightOffset(presetInference, modelConfig, defaults),
282
295
  // For norm flags: auto-detected > preset > default
283
296
  postAttentionNorm: detected.postAttentionNorm ?? presetInference.normalization?.postAttentionNorm ?? defaults.normalization.postAttentionNorm,
284
297
  preFeedforwardNorm: detected.preFeedforwardNorm ?? presetInference.normalization?.preFeedforwardNorm ?? defaults.normalization.preFeedforwardNorm,
@@ -303,26 +316,44 @@ function resolveKernelPathFromPreset(presetInference, quantizationInfo, q4kLayou
303
316
  }
304
317
 
305
318
  const weightKey = normalizeKernelDtype(quantizationInfo?.weights);
306
- const computeKey = normalizeKernelDtype(quantizationInfo?.compute) ?? (quantizationInfo ? 'f16' : null);
307
-
308
- const entry = (weightKey && kernelPaths[weightKey]) || kernelPaths.default;
319
+ const computeKey = normalizeKernelDtype(quantizationInfo?.compute);
320
+ const hasWeightEntry = weightKey != null && Object.prototype.hasOwnProperty.call(kernelPaths, weightKey);
321
+ const entry = hasWeightEntry ? kernelPaths[weightKey] : kernelPaths.default;
322
+ const weightLabel = weightKey ? `.${weightKey}` : '';
309
323
  let resolved = null;
324
+ if (entry == null) {
325
+ return presetInference?.kernelPath ?? null;
326
+ }
327
+
310
328
  if (typeof entry === 'string') {
311
329
  resolved = entry;
312
- } else if (entry && computeKey && entry[computeKey]) {
330
+ } else if (entry && computeKey && Object.prototype.hasOwnProperty.call(entry, computeKey)) {
313
331
  resolved = entry[computeKey];
314
- } else if (entry && entry.default) {
332
+ } else if (entry && typeof entry === 'object' && !Array.isArray(entry) && Object.prototype.hasOwnProperty.call(entry, 'default')) {
315
333
  resolved = entry.default;
334
+ } else if (entry && typeof entry === 'object' && !Array.isArray(entry) && !computeKey) {
335
+ throw new Error(
336
+ `Preset kernelPaths${weightLabel} requires quantizationInfo.compute ` +
337
+ 'to resolve a compute-specific defaultKernelPath.'
338
+ );
339
+ } else if (entry && typeof entry === 'object' && !Array.isArray(entry)) {
340
+ throw new Error(
341
+ `Preset kernelPaths${weightLabel} is missing compute "${computeKey}". ` +
342
+ 'Add an explicit compute-specific mapping or default instead of relying on JS fallbacks.'
343
+ );
316
344
  } else {
317
- resolved = presetInference?.kernelPath ?? null;
345
+ throw new Error(
346
+ `Preset kernelPaths${weightLabel} must resolve to a string or object.`
347
+ );
318
348
  }
319
349
 
320
- // When q4kLayout is 'col' (column-wise), fused Q4K kernels cannot be used.
321
- // Try to find a corresponding dequant kernel path.
350
+ // Column-wise Q4K must be mapped explicitly in preset JSON; JS must not
351
+ // rewrite kernel-path ids to infer policy.
322
352
  if (resolved && q4kLayout === 'col' && resolved.includes('-fused-')) {
323
- const dequantPath = resolved.replace('-fused-', '-dequant-');
324
- // Return dequant variant (caller should verify it exists)
325
- return dequantPath;
353
+ throw new Error(
354
+ `Preset kernelPaths${weightKey ? `.${weightKey}` : ''} resolved fused kernel path "${resolved}" ` +
355
+ 'for q4k layout "col". Add an explicit dequant kernel path mapping to the preset instead of relying on JS rewrites.'
356
+ );
326
357
  }
327
358
 
328
359
  return resolved;
@@ -261,9 +261,6 @@ export async function parseDiffusionModel(adapter) {
261
261
  }
262
262
  const configSuffix = defaultConfigPath(componentId);
263
263
  const config = await readJson(configSuffix, `${componentId} config`);
264
- if (componentId === 'transformer' && config && !config.weight_format) {
265
- config.weight_format = 'diffusers';
266
- }
267
264
  diffusionConfig.components[componentId] = {
268
265
  ...(diffusionConfig.components[componentId] || {}),
269
266
  config,
@@ -91,6 +91,7 @@ export function buildVariantTag(info) {
91
91
  const weights = info.weights;
92
92
  const embeddings = info.embeddings ?? weights;
93
93
  const lmHead = info.lmHead ?? embeddings;
94
+ const compute = info.compute ? normalizeQuantTag(info.compute) : null;
94
95
  const experts = info.experts ?? null;
95
96
  const layout = info.layout ?? null;
96
97
 
@@ -100,30 +101,42 @@ export function buildVariantTag(info) {
100
101
  ? `${weights}${layout === 'row' ? '' : '-col'}`
101
102
  : weights;
102
103
 
103
- const parts = [`w${weightTag}`];
104
+ const parts = [weightTag];
105
+ const groupedRolesByDtype = new Map();
106
+ const GROUPED_ROLE_ORDER = ['e', 'h', 'a'];
104
107
 
105
- if (embeddings !== weights) {
106
- parts.push(`e${embeddings}`);
107
- }
108
+ const addGroupedRole = (role, dtype) => {
109
+ if (!dtype || dtype === weights) return;
110
+ const existing = groupedRolesByDtype.get(dtype) ?? [];
111
+ if (!existing.includes(role)) {
112
+ existing.push(role);
113
+ groupedRolesByDtype.set(dtype, existing);
114
+ }
115
+ };
108
116
 
109
- if (lmHead !== embeddings) {
110
- parts.push(`h${lmHead}`);
117
+ addGroupedRole('e', embeddings);
118
+ addGroupedRole('h', lmHead);
119
+ addGroupedRole('a', compute);
120
+
121
+ for (const [dtype, roles] of groupedRolesByDtype.entries()) {
122
+ const orderedRoles = GROUPED_ROLE_ORDER.filter((role) => roles.includes(role));
123
+ parts.push(`${orderedRoles.join('')}${dtype}`);
111
124
  }
112
125
 
113
126
  if (experts && experts !== weights) {
114
127
  parts.push(`x${experts}`);
115
128
  }
116
129
 
117
- if (info.vision) {
130
+ if (info.vision && info.vision !== weights) {
118
131
  parts.push(`v${info.vision}`);
119
132
  }
120
- if (info.audio) {
121
- parts.push(`a${info.audio}`);
133
+ if (info.audio && info.audio !== weights) {
134
+ parts.push(`audio${info.audio}`);
122
135
  }
123
- if (info.tts) {
124
- parts.push(`t${info.tts}`);
136
+ if (info.tts && info.tts !== weights) {
137
+ parts.push(`tts${info.tts}`);
125
138
  }
126
- if (info.projector) {
139
+ if (info.projector && info.projector !== weights) {
127
140
  parts.push(`p${info.projector}`);
128
141
  }
129
142
 
@@ -167,9 +180,16 @@ const Q4K_LAYOUT_ALIASES = {
167
180
  };
168
181
 
169
182
  export function normalizeQ4KLayout(value) {
170
- if (!value) return null;
171
- const lower = String(value).toLowerCase().replace(/_/g, '');
172
- return Q4K_LAYOUT_ALIASES[lower] ?? null;
183
+ if (value == null) return null;
184
+ const lower = String(value).trim().toLowerCase().replace(/_/g, '');
185
+ if (!lower) return null;
186
+ const normalized = Q4K_LAYOUT_ALIASES[lower];
187
+ if (!normalized) {
188
+ throw new Error(
189
+ `converter.quantization.q4kLayout must be "row" or "col"; got ${JSON.stringify(value)}.`
190
+ );
191
+ }
192
+ return normalized;
173
193
  }
174
194
 
175
195
  export function buildQuantizationInfo(
@@ -6,10 +6,26 @@ function asObject(value) {
6
6
  }
7
7
 
8
8
  function asFiniteNumber(value) {
9
+ if (value == null || value === '') {
10
+ return null;
11
+ }
9
12
  const parsed = Number(value);
10
13
  return Number.isFinite(parsed) ? parsed : null;
11
14
  }
12
15
 
16
+ function asBoolean(value) {
17
+ return typeof value === 'boolean' ? value : null;
18
+ }
19
+
20
+ function asNumberArray(value) {
21
+ if (!Array.isArray(value)) return null;
22
+ const normalized = value.map((entry) => asFiniteNumber(entry));
23
+ if (normalized.some((entry) => entry == null || entry <= 0)) {
24
+ return null;
25
+ }
26
+ return normalized.map((entry) => Math.trunc(entry));
27
+ }
28
+
13
29
  function normalizeRoPEType(value) {
14
30
  if (typeof value !== 'string') return null;
15
31
  const normalized = value.trim().toLowerCase();
@@ -125,6 +141,13 @@ function failOnConflictingScaling(sourceLabel, canonicalScaling, candidateScalin
125
141
  export function buildRoPEConfig(presetInference, config) {
126
142
  const ropeScaling = asObject(config.rope_scaling);
127
143
  const ropeParameters = asObject(config.rope_parameters);
144
+ const flatRoPEParameters = (
145
+ ropeParameters
146
+ && !asObject(ropeParameters.full_attention)
147
+ && !asObject(ropeParameters.sliding_attention)
148
+ )
149
+ ? ropeParameters
150
+ : null;
128
151
  const fullAttentionRoPE = asObject(ropeParameters?.full_attention);
129
152
  const slidingAttentionRoPE = asObject(ropeParameters?.sliding_attention);
130
153
  const presetRoPE = presetInference.rope ?? {};
@@ -164,6 +187,11 @@ export function buildRoPEConfig(presetInference, config) {
164
187
  strictMissingTypeAndFactor: false,
165
188
  sourceLabel: 'HF config rope_parameters.full_attention',
166
189
  });
190
+ } else if (flatRoPEParameters) {
191
+ globalScaling = resolveScalingConfig(flatRoPEParameters, {
192
+ strictMissingTypeAndFactor: false,
193
+ sourceLabel: 'HF config rope_parameters',
194
+ });
167
195
  }
168
196
 
169
197
  const hasPresetLocalScaling = presetRoPE.ropeLocalScalingType !== undefined
@@ -192,6 +220,7 @@ export function buildRoPEConfig(presetInference, config) {
192
220
  // HF config is source of truth for ropeTheta when provided:
193
221
  // prefer rope_parameters.full_attention.rope_theta, then rope_theta.
194
222
  const ropeTheta = asFiniteNumber(fullAttentionRoPE?.rope_theta)
223
+ ?? asFiniteNumber(flatRoPEParameters?.rope_theta)
195
224
  ?? asFiniteNumber(config.rope_theta)
196
225
  ?? presetInference.rope?.ropeTheta
197
226
  ?? 10000;
@@ -201,9 +230,22 @@ export function buildRoPEConfig(presetInference, config) {
201
230
  ?? presetInference.rope?.ropeLocalTheta
202
231
  ?? null;
203
232
 
233
+ const mropeInterleaved = asBoolean(flatRoPEParameters?.mrope_interleaved)
234
+ ?? presetInference.rope?.mropeInterleaved
235
+ ?? false;
236
+ const mropeSection = asNumberArray(flatRoPEParameters?.mrope_section)
237
+ ?? presetInference.rope?.mropeSection
238
+ ?? null;
239
+ const partialRotaryFactor = asFiniteNumber(flatRoPEParameters?.partial_rotary_factor)
240
+ ?? asFiniteNumber(presetInference.rope?.partialRotaryFactor)
241
+ ?? null;
242
+
204
243
  return {
205
244
  ropeTheta,
206
245
  ropeLocalTheta,
246
+ mropeInterleaved,
247
+ mropeSection,
248
+ partialRotaryFactor,
207
249
  ropeScalingType: globalScaling.ropeScalingType,
208
250
  ropeScalingFactor: globalScaling.ropeScalingFactor,
209
251
  yarnBetaFast: globalScaling.yarnBetaFast,
@@ -126,7 +126,7 @@ export declare class ShardPacker {
126
126
  */
127
127
  export declare function sortTensorsByGroup(
128
128
  tensors: TensorInfoSchema[],
129
- modelType?: ModelType
129
+ modelType: ModelType
130
130
  ): TensorInfoSchema[];
131
131
 
132
132
  /**
@@ -399,7 +399,10 @@ function bytesToHex(bytes) {
399
399
  }
400
400
 
401
401
 
402
- export function sortTensorsByGroup(tensors, modelType = 'transformer') {
402
+ export function sortTensorsByGroup(tensors, modelType) {
403
+ if (typeof modelType !== 'string' || modelType.trim().length === 0) {
404
+ throw new Error('sortTensorsByGroup requires an explicit modelType.');
405
+ }
403
406
  return [...tensors].sort((a, b) => {
404
407
  const groupA = classifyTensor(a.name, modelType);
405
408
  const groupB = classifyTensor(b.name, modelType);