@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +25 -17
  3. package/package.json +20 -4
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +39 -39
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +49 -7
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +43 -4
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +28 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/models/qwen3.json +9 -2
  45. package/src/config/presets/models/transformer.json +5 -0
  46. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  47. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  48. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  49. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  50. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  52. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  54. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  55. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  56. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  57. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  58. package/src/config/required-inference-fields-contract-check.js +6 -0
  59. package/src/config/runtime.js +6 -1
  60. package/src/config/schema/debug.schema.d.ts +5 -0
  61. package/src/config/schema/doppler.schema.js +16 -21
  62. package/src/config/schema/inference-defaults.schema.js +6 -3
  63. package/src/config/schema/inference.schema.d.ts +9 -0
  64. package/src/config/schema/kernel-path.schema.d.ts +11 -1
  65. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  66. package/src/config/schema/manifest.schema.d.ts +8 -1
  67. package/src/config/schema/manifest.schema.js +19 -3
  68. package/src/config/training-defaults.js +30 -22
  69. package/src/converter/conversion-plan.js +94 -9
  70. package/src/converter/core.d.ts +7 -0
  71. package/src/converter/core.js +14 -9
  72. package/src/converter/execution-v0-manifest.js +4 -1
  73. package/src/converter/index.d.ts +1 -0
  74. package/src/converter/index.js +1 -0
  75. package/src/converter/manifest-inference.js +43 -12
  76. package/src/converter/parsers/diffusion.js +0 -3
  77. package/src/converter/quantization-info.js +35 -15
  78. package/src/converter/rope-config.js +42 -0
  79. package/src/converter/shard-packer.d.ts +1 -1
  80. package/src/converter/shard-packer.js +4 -1
  81. package/src/debug/config.js +123 -11
  82. package/src/debug/signals.js +7 -1
  83. package/src/debug/tensor.d.ts +2 -0
  84. package/src/debug/tensor.js +13 -2
  85. package/src/distribution/p2p-control-plane.js +52 -12
  86. package/src/distribution/p2p-observability.js +43 -7
  87. package/src/distribution/p2p-webrtc-browser.js +20 -0
  88. package/src/distribution/shard-delivery.js +77 -26
  89. package/src/formats/gguf/types.js +33 -16
  90. package/src/formats/rdrr/groups.d.ts +12 -4
  91. package/src/formats/rdrr/groups.js +3 -6
  92. package/src/formats/rdrr/parsing.js +39 -2
  93. package/src/formats/rdrr/types.d.ts +2 -1
  94. package/src/gpu/command-recorder.js +86 -61
  95. package/src/gpu/device.d.ts +1 -0
  96. package/src/gpu/device.js +131 -19
  97. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  98. package/src/gpu/kernel-tuner/cache.js +71 -4
  99. package/src/gpu/kernel-tuner/tuner.js +22 -4
  100. package/src/gpu/kernels/attention.js +113 -34
  101. package/src/gpu/kernels/backward/adam.js +62 -58
  102. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  103. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  104. package/src/gpu/kernels/bias_add.wgsl +8 -6
  105. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  106. package/src/gpu/kernels/cast.js +191 -149
  107. package/src/gpu/kernels/check-stop.js +33 -44
  108. package/src/gpu/kernels/conv2d.js +27 -17
  109. package/src/gpu/kernels/conv2d.wgsl +7 -8
  110. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  111. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  112. package/src/gpu/kernels/depthwise_conv2d.js +37 -26
  113. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  114. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  115. package/src/gpu/kernels/dequant.js +178 -126
  116. package/src/gpu/kernels/energy.d.ts +3 -21
  117. package/src/gpu/kernels/energy.js +111 -88
  118. package/src/gpu/kernels/feature-check.js +1 -1
  119. package/src/gpu/kernels/fused_ffn.js +84 -65
  120. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  121. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  122. package/src/gpu/kernels/gather.js +33 -15
  123. package/src/gpu/kernels/gelu.js +19 -11
  124. package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
  125. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  126. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  127. package/src/gpu/kernels/groupnorm.js +34 -23
  128. package/src/gpu/kernels/kv-quantize.js +5 -2
  129. package/src/gpu/kernels/layernorm.js +35 -19
  130. package/src/gpu/kernels/logit-merge.js +5 -3
  131. package/src/gpu/kernels/matmul.js +83 -39
  132. package/src/gpu/kernels/modulate.js +23 -15
  133. package/src/gpu/kernels/moe.js +221 -175
  134. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  135. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  136. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  137. package/src/gpu/kernels/relu.js +31 -10
  138. package/src/gpu/kernels/relu.wgsl +2 -1
  139. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  140. package/src/gpu/kernels/repeat_channels.js +25 -17
  141. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  142. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  143. package/src/gpu/kernels/residual.js +69 -23
  144. package/src/gpu/kernels/residual.wgsl +6 -3
  145. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  146. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  147. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  148. package/src/gpu/kernels/rmsnorm.js +96 -28
  149. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  150. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  151. package/src/gpu/kernels/rope.d.ts +2 -0
  152. package/src/gpu/kernels/rope.js +14 -1
  153. package/src/gpu/kernels/rope.wgsl +56 -40
  154. package/src/gpu/kernels/sample.js +27 -38
  155. package/src/gpu/kernels/sana_linear_attention.js +19 -12
  156. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  157. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  158. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  159. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  160. package/src/gpu/kernels/scale.js +18 -11
  161. package/src/gpu/kernels/shader-cache.js +4 -2
  162. package/src/gpu/kernels/silu.d.ts +1 -0
  163. package/src/gpu/kernels/silu.js +148 -82
  164. package/src/gpu/kernels/silu.wgsl +19 -9
  165. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  166. package/src/gpu/kernels/softmax.js +44 -25
  167. package/src/gpu/kernels/split_qkv.js +23 -13
  168. package/src/gpu/kernels/transpose.js +31 -10
  169. package/src/gpu/kernels/transpose.wgsl +6 -5
  170. package/src/gpu/kernels/upsample2d.js +22 -13
  171. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  172. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  173. package/src/gpu/kernels/utils.js +35 -13
  174. package/src/gpu/partitioned-buffer-pool.js +10 -2
  175. package/src/gpu/perf-guards.js +2 -9
  176. package/src/gpu/profiler.js +27 -22
  177. package/src/gpu/readback-utils.d.ts +16 -0
  178. package/src/gpu/readback-utils.js +41 -0
  179. package/src/gpu/submit-tracker.js +13 -0
  180. package/src/gpu/uniform-cache.d.ts +1 -0
  181. package/src/gpu/uniform-cache.js +30 -9
  182. package/src/hotswap/intent-bundle.js +6 -0
  183. package/src/hotswap/manifest.d.ts +10 -1
  184. package/src/hotswap/manifest.js +12 -2
  185. package/src/hotswap/runtime.js +30 -8
  186. package/src/index-browser.d.ts +44 -0
  187. package/src/index-browser.js +14 -0
  188. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  189. package/src/inference/browser-harness-contract-helpers.js +28 -0
  190. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  191. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  192. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  193. package/src/inference/browser-harness-model-helpers.js +217 -0
  194. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  195. package/src/inference/browser-harness-report-helpers.js +42 -0
  196. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  197. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  198. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  199. package/src/inference/browser-harness-suite-helpers.js +268 -0
  200. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  201. package/src/inference/browser-harness-text-helpers.js +788 -0
  202. package/src/inference/browser-harness.d.ts +6 -0
  203. package/src/inference/browser-harness.js +130 -1950
  204. package/src/inference/kv-cache/base.js +140 -94
  205. package/src/inference/kv-cache/tiered.js +5 -3
  206. package/src/inference/moe-router.js +88 -56
  207. package/src/inference/multi-model-network.js +5 -3
  208. package/src/inference/network-evolution.d.ts +11 -2
  209. package/src/inference/network-evolution.js +20 -21
  210. package/src/inference/pipelines/context.d.ts +3 -0
  211. package/src/inference/pipelines/context.js +142 -2
  212. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  213. package/src/inference/pipelines/diffusion/pipeline.js +17 -7
  214. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  215. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  216. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  217. package/src/inference/pipelines/diffusion/vae.js +3 -7
  218. package/src/inference/pipelines/energy/pipeline.js +27 -21
  219. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  220. package/src/inference/pipelines/energy/quintel.js +11 -0
  221. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  222. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  223. package/src/inference/pipelines/text/attention/projections.js +151 -101
  224. package/src/inference/pipelines/text/attention/record.js +73 -10
  225. package/src/inference/pipelines/text/attention/run.js +73 -10
  226. package/src/inference/pipelines/text/chat-format.js +25 -1
  227. package/src/inference/pipelines/text/config.d.ts +4 -0
  228. package/src/inference/pipelines/text/config.js +71 -5
  229. package/src/inference/pipelines/text/embed.js +2 -8
  230. package/src/inference/pipelines/text/execution-plan.js +64 -50
  231. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  232. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  233. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  234. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  235. package/src/inference/pipelines/text/execution-v0.js +78 -1002
  236. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  237. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  238. package/src/inference/pipelines/text/generator-steps.js +298 -207
  239. package/src/inference/pipelines/text/generator.js +6 -23
  240. package/src/inference/pipelines/text/init.d.ts +4 -0
  241. package/src/inference/pipelines/text/init.js +134 -29
  242. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  243. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  244. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  245. package/src/inference/pipelines/text/layer.js +14 -9
  246. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  247. package/src/inference/pipelines/text/linear-attention.js +80 -6
  248. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  249. package/src/inference/pipelines/text/logits/index.js +10 -11
  250. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  251. package/src/inference/pipelines/text/logits/utils.js +9 -0
  252. package/src/inference/pipelines/text/lora-apply.js +50 -32
  253. package/src/inference/pipelines/text/model-load.js +279 -104
  254. package/src/inference/pipelines/text/moe-cache.js +5 -4
  255. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  256. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  257. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  258. package/src/inference/pipelines/text/ops.js +90 -90
  259. package/src/inference/pipelines/text/probes.js +9 -9
  260. package/src/inference/pipelines/text/weights.js +17 -7
  261. package/src/inference/pipelines/text.js +17 -1
  262. package/src/inference/speculative.d.ts +2 -2
  263. package/src/inference/speculative.js +4 -18
  264. package/src/inference/test-harness.d.ts +1 -1
  265. package/src/inference/test-harness.js +15 -5
  266. package/src/inference/tokenizer.d.ts +0 -5
  267. package/src/inference/tokenizer.js +4 -23
  268. package/src/inference/tokenizers/bpe.js +9 -0
  269. package/src/inference/tokenizers/bundled.js +176 -33
  270. package/src/inference/tokenizers/sentencepiece.js +12 -0
  271. package/src/loader/doppler-loader.js +38 -22
  272. package/src/loader/dtype-utils.js +3 -44
  273. package/src/loader/embedding-loader.js +7 -3
  274. package/src/loader/experts/expert-cache.js +13 -6
  275. package/src/loader/experts/expert-loader.js +10 -6
  276. package/src/loader/final-weights-loader.js +8 -4
  277. package/src/loader/layer-loader.js +2 -1
  278. package/src/loader/loader-state.js +2 -2
  279. package/src/loader/memory-monitor.js +8 -0
  280. package/src/loader/multi-model-loader.d.ts +14 -0
  281. package/src/loader/multi-model-loader.js +70 -24
  282. package/src/loader/shard-cache.js +81 -12
  283. package/src/loader/shard-resolver.js +25 -3
  284. package/src/loader/tensors/tensor-loader.js +209 -144
  285. package/src/loader/tensors/tensor-reader.js +76 -19
  286. package/src/loader/weight-downcast.js +1 -1
  287. package/src/memory/buffer-pool.d.ts +9 -1
  288. package/src/memory/buffer-pool.js +109 -44
  289. package/src/memory/unified-detect.js +1 -1
  290. package/src/rules/inference/kernel-path.rules.json +24 -8
  291. package/src/rules/rule-registry.js +25 -1
  292. package/src/rules/tooling/command-runtime.rules.json +18 -0
  293. package/src/storage/backends/opfs-store.js +68 -24
  294. package/src/storage/downloader.js +364 -83
  295. package/src/storage/index.d.ts +3 -0
  296. package/src/storage/index.js +3 -0
  297. package/src/storage/preflight.d.ts +2 -2
  298. package/src/storage/preflight.js +24 -2
  299. package/src/storage/quickstart-downloader.js +11 -5
  300. package/src/storage/registry.js +10 -4
  301. package/src/storage/reports.js +1 -1
  302. package/src/storage/shard-manager.d.ts +15 -1
  303. package/src/storage/shard-manager.js +51 -3
  304. package/src/storage/source-artifact-store.d.ts +52 -0
  305. package/src/storage/source-artifact-store.js +234 -0
  306. package/src/tooling/command-api-constants.d.ts +9 -0
  307. package/src/tooling/command-api-constants.js +9 -0
  308. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  309. package/src/tooling/command-api-family-normalizers.js +343 -0
  310. package/src/tooling/command-api-helpers.d.ts +25 -0
  311. package/src/tooling/command-api-helpers.js +262 -0
  312. package/src/tooling/command-api.d.ts +27 -1
  313. package/src/tooling/command-api.js +26 -473
  314. package/src/tooling/command-envelope.js +4 -1
  315. package/src/tooling/command-runner-shared.js +52 -18
  316. package/src/tooling/lean-execution-contract.js +150 -3
  317. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  318. package/src/tooling/node-browser-command-runner.js +218 -273
  319. package/src/tooling/node-command-runner.js +44 -3
  320. package/src/tooling/node-converter.js +27 -1
  321. package/src/tooling/node-source-runtime.d.ts +1 -1
  322. package/src/tooling/node-source-runtime.js +84 -3
  323. package/src/tooling/node-webgpu.js +30 -105
  324. package/src/tooling/opfs-cache.js +21 -4
  325. package/src/tooling/runtime-input-composition.d.ts +38 -0
  326. package/src/tooling/runtime-input-composition.js +86 -0
  327. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  328. package/src/tooling/source-runtime-bundle.js +261 -34
  329. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  330. package/src/tooling/source-runtime-materializer.js +93 -0
  331. package/src/training/attention-backward.js +32 -17
  332. package/src/training/autograd.js +80 -52
  333. package/src/training/checkpoint-watch.d.ts +8 -0
  334. package/src/training/checkpoint-watch.js +139 -0
  335. package/src/training/checkpoint.d.ts +6 -1
  336. package/src/training/checkpoint.js +46 -7
  337. package/src/training/clip.js +2 -1
  338. package/src/training/datasets/token-batch.js +20 -8
  339. package/src/training/distillation/artifacts.d.ts +71 -0
  340. package/src/training/distillation/artifacts.js +132 -0
  341. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  342. package/src/training/distillation/checkpoint-watch.js +58 -0
  343. package/src/training/distillation/dataset.d.ts +59 -0
  344. package/src/training/distillation/dataset.js +337 -0
  345. package/src/training/distillation/eval.d.ts +34 -0
  346. package/src/training/distillation/eval.js +310 -0
  347. package/src/training/distillation/index.d.ts +29 -0
  348. package/src/training/distillation/index.js +29 -0
  349. package/src/training/distillation/runtime.d.ts +20 -0
  350. package/src/training/distillation/runtime.js +121 -0
  351. package/src/training/distillation/scoreboard.d.ts +6 -0
  352. package/src/training/distillation/scoreboard.js +8 -0
  353. package/src/training/distillation/stage-a.d.ts +45 -0
  354. package/src/training/distillation/stage-a.js +338 -0
  355. package/src/training/distillation/stage-b.d.ts +24 -0
  356. package/src/training/distillation/stage-b.js +20 -0
  357. package/src/training/distillation/student-fixture.d.ts +22 -0
  358. package/src/training/distillation/student-fixture.js +846 -0
  359. package/src/training/distillation/suite-data.d.ts +45 -0
  360. package/src/training/distillation/suite-data.js +189 -0
  361. package/src/training/index.d.ts +10 -0
  362. package/src/training/index.js +10 -0
  363. package/src/training/lora-pipeline.d.ts +40 -0
  364. package/src/training/lora-pipeline.js +793 -0
  365. package/src/training/lora.js +26 -12
  366. package/src/training/loss.js +5 -6
  367. package/src/training/objectives/cross_entropy.js +2 -5
  368. package/src/training/objectives/distill_kd.js +4 -8
  369. package/src/training/objectives/distill_triplet.js +4 -8
  370. package/src/training/objectives/ul_stage2_base.js +4 -8
  371. package/src/training/operator-artifacts.d.ts +62 -0
  372. package/src/training/operator-artifacts.js +140 -0
  373. package/src/training/operator-command.d.ts +5 -0
  374. package/src/training/operator-command.js +455 -0
  375. package/src/training/operator-eval.d.ts +48 -0
  376. package/src/training/operator-eval.js +230 -0
  377. package/src/training/operator-scoreboard.d.ts +5 -0
  378. package/src/training/operator-scoreboard.js +44 -0
  379. package/src/training/optimizer.js +19 -7
  380. package/src/training/runner.d.ts +52 -0
  381. package/src/training/runner.js +31 -5
  382. package/src/training/suite.d.ts +112 -0
  383. package/src/training/suite.js +24 -984
  384. package/src/training/tensor-factory.d.ts +9 -0
  385. package/src/training/tensor-factory.js +13 -0
  386. package/src/training/trainer.js +3 -5
  387. package/src/training/ul_dataset.js +3 -5
  388. package/src/training/workloads.d.ts +164 -0
  389. package/src/training/workloads.js +530 -0
  390. package/src/version.js +1 -1
  391. package/tools/convert-safetensors-node.js +22 -16
  392. package/tools/doppler-cli.js +179 -63
@@ -224,6 +224,29 @@ function formatChatML(messages) {
224
224
  return parts.join('');
225
225
  }
226
226
 
227
+ function formatQwen(messages) {
228
+ // Qwen 3.5 chat format is ChatML-like, but the generation prelude includes
229
+ // an explicit empty thinking block before assistant output.
230
+ const parts = [];
231
+ for (const [index, message] of messages.entries()) {
232
+ const role = normalizeChatRole(message?.role);
233
+ assertSupportedChatRole(role, 'Qwen', index);
234
+ if (role === 'system' && index !== 0) {
235
+ throw new Error('Qwen template requires any system message to appear first.');
236
+ }
237
+ const content = normalizeChatMessageContent(message?.content);
238
+ if (role === 'system') {
239
+ parts.push(`<|im_start|>system\n${content}<|im_end|>\n`);
240
+ } else if (role === 'user') {
241
+ parts.push(`<|im_start|>user\n${content}<|im_end|>\n`);
242
+ } else if (role === 'assistant') {
243
+ parts.push(`<|im_start|>assistant\n${content}<|im_end|>\n`);
244
+ }
245
+ }
246
+ parts.push('<|im_start|>assistant\n<think>\n\n</think>\n\n');
247
+ return parts.join('');
248
+ }
249
+
227
250
  function formatTranslateGemmaUserPrompt(content) {
228
251
  if (!Array.isArray(content) || content.length !== 1) {
229
252
  throw new Error(
@@ -345,7 +368,7 @@ const CHAT_FORMATTERS = {
345
368
  'llama3': formatHeaderBased,
346
369
  'gpt-oss': formatChannelBased,
347
370
  'chatml': formatChatML,
348
- 'qwen': formatChatML,
371
+ 'qwen': formatQwen,
349
372
  'translategemma': formatTranslateGemma,
350
373
  };
351
374
 
@@ -363,4 +386,5 @@ export function formatChatMessages(messages, templateType) {
363
386
  export const formatGemmaChat = formatTurnBased;
364
387
  export const formatLlama3Chat = formatHeaderBased;
365
388
  export const formatGptOssChat = formatChannelBased;
389
+ export const formatQwenChat = formatQwen;
366
390
  export const formatTranslateGemmaChat = formatTranslateGemma;
@@ -148,6 +148,10 @@ export interface ParsedModelConfig {
148
148
  slidingWindow: number | null;
149
149
  ropeTheta: number;
150
150
  ropeLocalTheta: number | null;
151
+ ropeRotaryDim: number;
152
+ ropeInterleaved: boolean;
153
+ mropeSection: number[] | null;
154
+ partialRotaryFactor: number | null;
151
155
  ropeScale: number;
152
156
  ropeLocalScale: number;
153
157
  ropeScalingType: string | null;
@@ -21,6 +21,28 @@ function assertSupportedRuntimeModelType(manifest) {
21
21
  );
22
22
  }
23
23
 
24
+ function resolveRotaryDim(headDim, partialRotaryFactor, modelId) {
25
+ if (partialRotaryFactor == null) {
26
+ return headDim;
27
+ }
28
+ if (typeof partialRotaryFactor !== 'number' || Number.isNaN(partialRotaryFactor)) {
29
+ throw new Error(`Manifest "${modelId}" has invalid rope.partialRotaryFactor.`);
30
+ }
31
+ if (partialRotaryFactor <= 0 || partialRotaryFactor > 1) {
32
+ throw new Error(
33
+ `Manifest "${modelId}" requires 0 < rope.partialRotaryFactor <= 1; got ${partialRotaryFactor}.`
34
+ );
35
+ }
36
+ const rotaryDim = Math.trunc(headDim * partialRotaryFactor);
37
+ if (rotaryDim <= 0 || (rotaryDim % 2) !== 0) {
38
+ throw new Error(
39
+ `Manifest "${modelId}" resolves rope rotary dim ${rotaryDim} from headDim=${headDim} ` +
40
+ `and partialRotaryFactor=${partialRotaryFactor}, but rotary dim must be a positive even integer.`
41
+ );
42
+ }
43
+ return rotaryDim;
44
+ }
45
+
24
46
  export function getStopTokenIds(manifest) {
25
47
  const eosTokenId = manifest?.eos_token_id;
26
48
  if (Array.isArray(eosTokenId)) return eosTokenId;
@@ -112,11 +134,10 @@ function resolveIntermediateSizeForRuntime(manifest, inf, arch, modelId) {
112
134
  if (inferred == null || inferred === fromArch) {
113
135
  return fromArch;
114
136
  }
115
- log.warn(
116
- 'Config',
117
- `Manifest "${modelId}" has intermediateSize=${fromArch}, inferred ${inferred} from FFN tensor shapes; using inferred value.`
137
+ throw new Error(
138
+ `Manifest "${modelId}" has intermediateSize=${fromArch}, but FFN tensors imply ${inferred}. ` +
139
+ 'Re-convert the model so manifest architecture matches the weights.'
118
140
  );
119
- return inferred;
120
141
  }
121
142
 
122
143
  // =============================================================================
@@ -130,7 +151,14 @@ export function hasManifestInference(manifest) {
130
151
 
131
152
 
132
153
  export function validateRequiredInferenceFields(inf, modelId) {
133
-
154
+ inf = inf ?? {};
155
+ inf.attention = inf.attention ?? {};
156
+ inf.normalization = inf.normalization ?? {};
157
+ inf.ffn = inf.ffn ?? {};
158
+ inf.rope = inf.rope ?? {};
159
+ inf.output = inf.output ?? {};
160
+ inf.layerPattern = inf.layerPattern ?? {};
161
+ inf.chatTemplate = inf.chatTemplate ?? {};
134
162
  const errors = [];
135
163
 
136
164
  // Attention fields - non-nullable required
@@ -201,6 +229,20 @@ export function validateRequiredInferenceFields(inf, modelId) {
201
229
  if (inf.rope.ropeLocalTheta === undefined) {
202
230
  errors.push('rope.ropeLocalTheta must be explicitly set (null for no local theta, or number)');
203
231
  }
232
+ if (inf.rope.mropeInterleaved == null) {
233
+ errors.push('rope.mropeInterleaved is required');
234
+ }
235
+ if (inf.rope.mropeSection === undefined) {
236
+ errors.push('rope.mropeSection must be explicitly set (null when unused, or an array of positive integers)');
237
+ }
238
+ if (inf.rope.partialRotaryFactor === undefined) {
239
+ errors.push('rope.partialRotaryFactor must be explicitly set (null when unused, or a number in (0, 1])');
240
+ } else {
241
+ const factor = inf.rope.partialRotaryFactor;
242
+ if (factor !== null && (typeof factor !== 'number' || Number.isNaN(factor) || factor <= 0 || factor > 1)) {
243
+ errors.push('rope.partialRotaryFactor must be a number in (0, 1] or null');
244
+ }
245
+ }
204
246
 
205
247
  // Output fields - non-nullable required
206
248
  if (inf.output.tieWordEmbeddings == null) {
@@ -458,6 +500,26 @@ export function toParsedConfigFromMerged(merged, manifest) {
458
500
  const ropeScalingType = inf.rope.ropeScalingType;
459
501
  const ropeLocalScale = inf.rope.ropeLocalScalingFactor ?? ropeScale;
460
502
  const ropeLocalScalingType = inf.rope.ropeLocalScalingType ?? ropeScalingType;
503
+ const partialRotaryFactor = inf.rope.partialRotaryFactor;
504
+ const ropeInterleaved = inf.rope.mropeInterleaved === true;
505
+ const mropeSection = Array.isArray(inf.rope.mropeSection)
506
+ ? inf.rope.mropeSection.map((entry) => Math.trunc(Number(entry)))
507
+ : null;
508
+ const ropeRotaryDim = resolveRotaryDim(arch.headDim, partialRotaryFactor, merged.modelId);
509
+ if (mropeSection && mropeSection.some((entry) => !Number.isFinite(entry) || entry <= 0)) {
510
+ throw new Error(
511
+ `Manifest "${merged.modelId}" has invalid rope.mropeSection; expected positive integers.`
512
+ );
513
+ }
514
+ if (ropeInterleaved && mropeSection) {
515
+ const doubledMropeDim = mropeSection.reduce((sum, entry) => sum + entry, 0) * 2;
516
+ if (doubledMropeDim !== ropeRotaryDim) {
517
+ throw new Error(
518
+ `Manifest "${merged.modelId}" declares rope.mropeSection=${JSON.stringify(mropeSection)}, ` +
519
+ `which expands to rotary dim ${doubledMropeDim}, but the resolved rotary dim is ${ropeRotaryDim}.`
520
+ );
521
+ }
522
+ }
461
523
 
462
524
  // Build ropeScaling object from manifest values if scaling is enabled
463
525
  // Include YARN params when present
@@ -532,6 +594,10 @@ export function toParsedConfigFromMerged(merged, manifest) {
532
594
  slidingWindow: inf.attention.slidingWindow,
533
595
  ropeTheta: inf.rope.ropeTheta,
534
596
  ropeLocalTheta: inf.rope.ropeLocalTheta,
597
+ ropeRotaryDim,
598
+ ropeInterleaved,
599
+ mropeSection,
600
+ partialRotaryFactor,
535
601
  ropeScale,
536
602
  ropeLocalScale,
537
603
  ropeScalingType,
@@ -319,14 +319,8 @@ export async function embed(tokenIds, embedBuffer, config) {
319
319
  const firstTokenId = tokenIdArray[0];
320
320
  const bytesPerElement = useF16 ? 2 : 4;
321
321
  const sampleSize = Math.min(32 * bytesPerElement, hiddenSize * bytesPerElement);
322
- const staging = device.createBuffer({ size: sampleSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ });
323
- const enc = device.createCommandEncoder();
324
- enc.copyBufferToBuffer(gatherOutput.buffer, 0, staging, 0, sampleSize);
325
- device.queue.submit([enc.finish()]);
326
- await staging.mapAsync(GPUMapMode.READ);
327
- const data = decodeReadback(staging.getMappedRange().slice(0), gatherOptions.outputDtype);
328
- staging.unmap();
329
- staging.destroy();
322
+ const readback = await readBuffer(gatherOutput.buffer, sampleSize);
323
+ const data = decodeReadback(readback, gatherOptions.outputDtype);
330
324
 
331
325
  // Compute statistics
332
326
  let sum = 0, sumSq = 0;
@@ -1,4 +1,3 @@
1
- import { log } from '../../../debug/index.js';
2
1
  import { resolveKernelPath } from '../../../config/kernel-path-loader.js';
3
2
  import { selectRuleValue } from '../../../rules/rule-registry.js';
4
3
  import {
@@ -9,19 +8,36 @@ import {
9
8
  export const PRIMARY_EXECUTION_PLAN_ID = 'primary';
10
9
  export const FINITENESS_FALLBACK_EXECUTION_PLAN_ID = 'finiteness_fallback';
11
10
 
12
- function normalizePositiveInt(value, fallback, label) {
13
- if (!Number.isFinite(value)) return fallback;
14
- const normalized = Math.floor(value);
15
- if (normalized >= 1) return normalized;
16
- log.warn('Pipeline', `[ExecutionPlan] ${label}=${value} is invalid; using ${fallback}.`);
17
- return fallback;
11
+ function assertOptionalBoolean(value, label) {
12
+ if (value === undefined) {
13
+ return undefined;
14
+ }
15
+ if (typeof value !== 'boolean') {
16
+ throw new Error(`[ExecutionPlan] ${label} must be boolean when provided; got ${JSON.stringify(value)}.`);
17
+ }
18
+ return value;
19
+ }
20
+
21
+ function assertOptionalPositiveInt(value, label) {
22
+ if (value === undefined) {
23
+ return undefined;
24
+ }
25
+ if (!Number.isInteger(value) || value < 1) {
26
+ throw new Error(`[ExecutionPlan] ${label} must be a positive integer when provided; got ${JSON.stringify(value)}.`);
27
+ }
28
+ return value;
18
29
  }
19
30
 
20
- function normalizeStopCheckMode(value, fallback) {
21
- if (value === 'batch' || value === 'per-token') {
22
- return value;
31
+ function assertOptionalStopCheckMode(value) {
32
+ if (value === undefined) {
33
+ return undefined;
23
34
  }
24
- return fallback;
35
+ if (value !== 'batch' && value !== 'per-token') {
36
+ throw new Error(
37
+ `[ExecutionPlan] stopCheckMode must be "batch" or "per-token" when provided; got ${JSON.stringify(value)}.`
38
+ );
39
+ }
40
+ return value;
25
41
  }
26
42
 
27
43
  function resolveFallbackActivationDtype(primaryActivationDtype) {
@@ -42,56 +58,48 @@ function resolveFallbackActivationDtype(primaryActivationDtype) {
42
58
  function resolveFallbackKernelPath(primaryKernelPath) {
43
59
  const primaryKernelPathId = primaryKernelPath?.id ?? null;
44
60
  if (!primaryKernelPathId) {
45
- return {
46
- kernelPath: null,
47
- kernelPathId: null,
48
- kernelPathSource: 'none',
49
- };
61
+ throw new Error(
62
+ '[ExecutionPlan] F16 finiteness fallback requires a primary kernel path with a stable id. ' +
63
+ 'Add a registered kernelPath id and a finiteness fallback rule.'
64
+ );
50
65
  }
51
66
 
52
- const primaryKernelPathIsObject = typeof primaryKernelPath === 'object' && primaryKernelPath !== null;
67
+ const explicitFallbackKernelPathId = typeof primaryKernelPath?.finitenessFallbackKernelPathId === 'string'
68
+ && primaryKernelPath.finitenessFallbackKernelPathId.length > 0
69
+ ? primaryKernelPath.finitenessFallbackKernelPathId
70
+ : null;
53
71
 
54
- const fallbackKernelPathId = selectRuleValue(
72
+ const fallbackKernelPathId = explicitFallbackKernelPathId ?? selectRuleValue(
55
73
  'inference',
56
74
  'kernelPath',
57
75
  'finitenessFallback',
58
76
  { kernelPathId: primaryKernelPathId }
59
77
  );
60
78
 
61
- const resolvedKernelPathId = typeof fallbackKernelPathId === 'string' && fallbackKernelPathId.length > 0
62
- ? fallbackKernelPathId
63
- : primaryKernelPathId;
64
- const kernelPathSource = resolvedKernelPathId === primaryKernelPathId ? 'self' : 'rule';
79
+ if (typeof fallbackKernelPathId !== 'string' || fallbackKernelPathId.length === 0) {
80
+ throw new Error(
81
+ `[ExecutionPlan] Missing finiteness fallback kernel path mapping for "${primaryKernelPathId}". ` +
82
+ 'Add an explicit rule in src/rules/inference/kernel-path.rules.json.'
83
+ );
84
+ }
65
85
 
66
- if (kernelPathSource === 'self') {
67
- log.warn(
68
- 'Pipeline',
69
- `[ExecutionPlan] No finiteness fallback kernel path mapping for "${primaryKernelPathId}"; using primary kernel path.`
86
+ if (fallbackKernelPathId === primaryKernelPathId) {
87
+ throw new Error(
88
+ `[ExecutionPlan] Invalid finiteness fallback mapping for "${primaryKernelPathId}": ` +
89
+ `fallback kernel path resolves to itself. Add an explicit widening path.`
70
90
  );
71
91
  }
72
92
 
73
93
  try {
74
- const kernelPath = resolveKernelPath(resolvedKernelPathId);
94
+ const kernelPath = resolveKernelPath(fallbackKernelPathId);
75
95
  return {
76
96
  kernelPath,
77
- kernelPathId: resolvedKernelPathId,
78
- kernelPathSource,
97
+ kernelPathId: fallbackKernelPathId,
98
+ kernelPathSource: 'rule',
79
99
  };
80
100
  } catch (error) {
81
- if (primaryKernelPathIsObject) {
82
- log.warn(
83
- 'Pipeline',
84
- `[ExecutionPlan] Failed to resolve finiteness fallback kernel path "${resolvedKernelPathId}" ` +
85
- `for "${primaryKernelPathId}", using inline kernel path as fallback. ${error?.message || error}`
86
- );
87
- return {
88
- kernelPath: primaryKernelPath,
89
- kernelPathId: primaryKernelPathId,
90
- kernelPathSource,
91
- };
92
- }
93
101
  throw new Error(
94
- `[ExecutionPlan] Failed to resolve finiteness fallback kernel path "${resolvedKernelPathId}" ` +
102
+ `[ExecutionPlan] Failed to resolve finiteness fallback kernel path "${fallbackKernelPathId}" ` +
95
103
  `(from "${primaryKernelPathId}"): ${error?.message || error}`
96
104
  );
97
105
  }
@@ -252,11 +260,17 @@ export function activateFallbackExecutionPlan(container) {
252
260
 
253
261
  function resolveExecutionOverrides(options = {}) {
254
262
  return {
255
- disableCommandBatching: options.disableCommandBatching,
256
- disableMultiTokenDecode: options.disableMultiTokenDecode,
257
- batchSize: options.batchSize,
258
- stopCheckMode: options.stopCheckMode,
259
- maxTokens: options.maxTokens,
263
+ disableCommandBatching: assertOptionalBoolean(
264
+ options.disableCommandBatching,
265
+ 'disableCommandBatching'
266
+ ),
267
+ disableMultiTokenDecode: assertOptionalBoolean(
268
+ options.disableMultiTokenDecode,
269
+ 'disableMultiTokenDecode'
270
+ ),
271
+ batchSize: assertOptionalPositiveInt(options.batchSize, 'batchSize'),
272
+ stopCheckMode: assertOptionalStopCheckMode(options.stopCheckMode),
273
+ maxTokens: assertOptionalPositiveInt(options.maxTokens, 'maxTokens'),
260
274
  };
261
275
  }
262
276
 
@@ -276,9 +290,9 @@ export function resolveExecutionSessionPlan(container, options = {}) {
276
290
  deferredRoundingWindowTokens: activePlan.deferredRoundingWindowTokens,
277
291
  disableCommandBatching: overrides.disableCommandBatching ?? activePlan.defaultDisableCommandBatching,
278
292
  disableMultiTokenDecode: overrides.disableMultiTokenDecode ?? activePlan.defaultDisableMultiTokenDecode,
279
- batchSize: normalizePositiveInt(overrides.batchSize, activePlan.defaultBatchSize, 'batchSize'),
280
- stopCheckMode: normalizeStopCheckMode(overrides.stopCheckMode, activePlan.defaultStopCheckMode),
281
- maxTokens: normalizePositiveInt(overrides.maxTokens, activePlan.defaultMaxTokens, 'maxTokens'),
293
+ batchSize: overrides.batchSize ?? activePlan.defaultBatchSize,
294
+ stopCheckMode: overrides.stopCheckMode ?? activePlan.defaultStopCheckMode,
295
+ maxTokens: overrides.maxTokens ?? activePlan.defaultMaxTokens,
282
296
  readbackInterval: activePlan.readbackInterval,
283
297
  ringTokens: activePlan.ringTokens,
284
298
  ringStop: activePlan.ringStop,
@@ -0,0 +1,59 @@
1
+ export declare function cloneJson<T>(value: T): T;
2
+ export declare function validateManifestSessionDefaultsContract(manifestInference: Record<string, unknown> | null): void;
3
+ export declare function isPhaseMatch(phase: string, targetPhase: string): boolean;
4
+ export declare function stepHasLayer(step: Record<string, unknown>, layerIdx: number): boolean;
5
+ export declare function normalizePhase(value: unknown, label: string): string;
6
+ export declare function normalizeSection(value: unknown, label: string): string;
7
+ export declare function normalizeSlot(value: unknown, label: string): string;
8
+ export declare function createSourceTrace(): { session: Record<string, unknown>; steps: Record<string, unknown> };
9
+ export declare function setSourceTrace(trace: Record<string, unknown>, path: string, source: string): void;
10
+ export declare function collectLeafPaths(value: unknown, prefix?: string[], out?: string[][]): string[][];
11
+ export declare function hasDefinedPath(root: unknown, pathSegments: string[]): boolean;
12
+ export declare function validateStepShape(step: Record<string, unknown>, index: number): void;
13
+ export declare function assertExecutionRuntimeOverlay(runtimeInference: Record<string, unknown> | null | undefined): void;
14
+ export declare function validateUniqueStepIds(steps: Array<Record<string, unknown>>): void;
15
+ export declare function hasExecutionV0(manifestInference: Record<string, unknown> | null | undefined): boolean;
16
+ export declare function assertExecutionV0Schema(manifestInference: Record<string, unknown> | null | undefined): void;
17
+ export declare function applyExecutionPatchAtomic(
18
+ baseSteps: Array<Record<string, unknown>>,
19
+ patch: Record<string, unknown> | null | undefined
20
+ ): Array<Record<string, unknown>>;
21
+ export declare function indexRuntimePatchMeta(
22
+ patch: Record<string, unknown> | null | undefined
23
+ ): {
24
+ addedSteps: Set<string>;
25
+ precisionFieldsByStep: Map<string, Set<string>>;
26
+ kvIOFieldsByStep: Set<string>;
27
+ };
28
+ export declare function requireSessionActivationDtype(
29
+ sessionDefaults: Record<string, unknown> | null | undefined,
30
+ label?: string
31
+ ): string;
32
+ export declare function createInitialSlotDtypes(sessionDefaults: Record<string, unknown>): Map<string, string>;
33
+ export declare function resolvePhaseSteps(
34
+ phase: string,
35
+ steps: Array<Record<string, unknown>>,
36
+ sessionDefaults: Record<string, unknown>,
37
+ profileIndex: Map<string, unknown>,
38
+ policies: Record<string, unknown>,
39
+ options?: Record<string, unknown>
40
+ ): {
41
+ steps: Array<Record<string, unknown>>;
42
+ finalSlotDtypes: Map<string, string>;
43
+ };
44
+ export declare function normalizeRuntimeSessionForExecutionV0(
45
+ runtimeSession: Record<string, unknown> | null | undefined,
46
+ manifestInference: Record<string, unknown> | null | undefined,
47
+ defaultComputeDefaults: Record<string, unknown>
48
+ ): Record<string, unknown> | null | undefined;
49
+ export declare function validatePhaseBoundaryCompatibility(options: Record<string, unknown>): void;
50
+ export declare function assertKVLayoutExecutionCompatibility(
51
+ steps: Array<Record<string, unknown>>,
52
+ sessionDefaults: Record<string, unknown>
53
+ ): void;
54
+ export declare const buildKernelProfileKey: (
55
+ kernelRef: Record<string, unknown> | null | undefined,
56
+ step?: Record<string, unknown> | null | undefined
57
+ ) => string;
58
+ export declare const indexKernelProfiles: (sessionDefaults: Record<string, unknown>) => Map<string, unknown>;
59
+ export declare const normalizeDtype: (value: unknown, label: string) => string;