@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -0,0 +1,9 @@
1
+ import type { Tensor, TensorDType } from '../gpu/tensor.js';
2
+
3
+ export declare function createUploadedTensor(
4
+ data: ArrayBufferView,
5
+ dtype: TensorDType,
6
+ shape: number[],
7
+ label: string,
8
+ usage?: number | undefined
9
+ ): Tensor;
@@ -0,0 +1,13 @@
1
+ import { acquireBuffer, releaseBuffer, uploadData } from '../memory/buffer-pool.js';
2
+ import { createTensor } from '../gpu/tensor.js';
3
+
4
+ export function createUploadedTensor(data, dtype, shape, label, usage = undefined) {
5
+ const buffer = acquireBuffer(data.byteLength, usage, label);
6
+ try {
7
+ uploadData(buffer, data);
8
+ return createTensor(buffer, dtype, [...shape], label);
9
+ } catch (error) {
10
+ releaseBuffer(buffer);
11
+ throw error;
12
+ }
13
+ }
@@ -1,17 +1,15 @@
1
1
  import { AutogradTape } from './autograd.js';
2
2
  import { loadBackwardRegistry } from '../config/backward-registry-loader.js';
3
3
  import { runScale } from '../gpu/kernels/index.js';
4
- import { acquireBuffer, uploadData, releaseBuffer } from '../memory/buffer-pool.js';
5
- import { createTensor } from '../gpu/tensor.js';
4
+ import { releaseBuffer } from '../memory/buffer-pool.js';
6
5
  import { createCrossEntropyObjective } from './objectives/cross_entropy.js';
6
+ import { createUploadedTensor } from './tensor-factory.js';
7
7
 
8
8
  function createLossGradient(loss, lossScale) {
9
9
  const lossElements = loss.shape.reduce((acc, value) => acc * value, 1);
10
10
  const gradData = new Float32Array(lossElements);
11
11
  gradData.fill(lossScale);
12
- const gradBuf = acquireBuffer(gradData.byteLength, undefined, 'loss_grad_output');
13
- uploadData(gradBuf, gradData);
14
- return createTensor(gradBuf, 'f32', [...loss.shape], 'loss_grad_output');
12
+ return createUploadedTensor(gradData, 'f32', loss.shape, 'loss_grad_output');
15
13
  }
16
14
 
17
15
  function normalizeLossResult(value) {
@@ -1,6 +1,6 @@
1
- import { acquireBuffer, uploadData, readBuffer, releaseBuffer } from '../memory/buffer-pool.js';
2
- import { createTensor } from '../gpu/tensor.js';
1
+ import { readBuffer, releaseBuffer } from '../memory/buffer-pool.js';
3
2
  import { resolveUlScheduledLambda } from './ul_schedule.js';
3
+ import { createUploadedTensor } from './tensor-factory.js';
4
4
 
5
5
  function xorshift32(value) {
6
6
  let x = value >>> 0;
@@ -79,9 +79,7 @@ export async function buildNoisyLatentsFromInputTensor(inputTensor, ulConfig, op
79
79
  noisy[i] = alpha * inputData[i] + sigma * n;
80
80
  }
81
81
 
82
- const noisyBuffer = acquireBuffer(noisy.byteLength, undefined, 'ul_noisy_latents');
83
- uploadData(noisyBuffer, noisy);
84
- const noisyTensor = createTensor(noisyBuffer, 'f32', [...inputTensor.shape], 'ul_noisy_latents');
82
+ const noisyTensor = createUploadedTensor(noisy, 'f32', inputTensor.shape, 'ul_noisy_latents');
85
83
  const cleanStats = summarizeArray(inputData);
86
84
  const noiseStats = summarizeArray(noise);
87
85
  const noisyStats = summarizeArray(noisy);
@@ -4,6 +4,11 @@ import { resolve } from 'node:path';
4
4
  import { isPlainObject } from '../utils/plain-object.js';
5
5
  import { sha256Hex } from '../utils/sha256.js';
6
6
  import { VALID_LORA_TARGET_MODULES } from '../config/schema/adapter.schema.js';
7
+ import {
8
+ DEFAULT_TRAINING_GRADIENT_CONFIG,
9
+ DEFAULT_TRAINING_OPTIMIZER_CONFIG,
10
+ DEFAULT_TRAINING_PRECISION_CONFIG,
11
+ } from '../config/schema/training.schema.js';
7
12
 
8
13
  export const TRAINING_WORKLOAD_SCHEMA_VERSION = 1;
9
14
  export const TRAINING_WORKLOAD_KINDS = Object.freeze(['lora', 'distill', 'ul']);
@@ -140,64 +145,57 @@ function inferLegacyKind(payload, contextLabel) {
140
145
  if (explicitKind) return explicitKind;
141
146
  const workloadKind = typeof payload?.workloadKind === 'string' ? payload.workloadKind.trim() : '';
142
147
  if (workloadKind) return workloadKind;
143
- const id = String(payload?.id || '').trim();
144
- if (id.startsWith('distill-')) return 'distill';
145
- if (id.startsWith('lora-')) return 'lora';
146
- if (id.startsWith('ul-') || id.startsWith('ul_training') || id.startsWith('ul-training')) {
148
+ const trainingTests = Array.isArray(payload?.trainingTests)
149
+ ? payload.trainingTests.map((entry) => String(entry))
150
+ : null;
151
+ const hasLegacyUlShape = trainingTests
152
+ && trainingTests.length > 0
153
+ && trainingTests.every((entry) => entry === 'ul-stage1' || entry === 'ul-stage2')
154
+ && Number.isInteger(Number(payload?.trainingBenchSteps));
155
+ if (hasLegacyUlShape) {
147
156
  return 'ul';
148
157
  }
149
- if (Array.isArray(payload?.trainingTests) && payload.trainingTests.every((entry) => LEGACY_DISTILL_TEST_IDS.includes(String(entry)))) {
150
- return 'distill';
151
- }
152
158
  throw new Error(`${contextLabel}.kind is required.`);
153
159
  }
154
160
 
155
161
  function normalizeScheduler(value, label) {
156
- const scheduler = asObject(value, label, { optional: true }) || {};
162
+ const scheduler = asObject(value, label);
157
163
  return {
158
- enabled: scheduler.enabled === true,
159
- type: asNonEmptyString(scheduler.type ?? 'constant', `${label}.type`),
160
- warmupSteps: asNonNegativeInteger(
161
- scheduler.warmupSteps ?? 0,
162
- `${label}.warmupSteps`,
163
- { optional: true }
164
- ) ?? 0,
165
- stepSize: asPositiveInteger(scheduler.stepSize ?? 1, `${label}.stepSize`, { optional: true }) ?? 1,
166
- gamma: asFiniteNumber(scheduler.gamma ?? 1, `${label}.gamma`, { optional: true }) ?? 1,
167
- totalSteps: asPositiveInteger(scheduler.totalSteps ?? 1, `${label}.totalSteps`, { optional: true }) ?? 1,
168
- minLr: asFiniteNumber(scheduler.minLr ?? 0, `${label}.minLr`, { optional: true }) ?? 0,
164
+ enabled: asBoolean(scheduler.enabled, `${label}.enabled`),
165
+ type: asNonEmptyString(scheduler.type, `${label}.type`),
166
+ warmupSteps: asNonNegativeInteger(scheduler.warmupSteps, `${label}.warmupSteps`),
167
+ stepSize: asPositiveInteger(scheduler.stepSize, `${label}.stepSize`),
168
+ gamma: asFiniteNumber(scheduler.gamma, `${label}.gamma`),
169
+ totalSteps: asPositiveInteger(scheduler.totalSteps, `${label}.totalSteps`),
170
+ minLr: asFiniteNumber(scheduler.minLr, `${label}.minLr`),
169
171
  };
170
172
  }
171
173
 
172
174
  function normalizeTrainingConfig(value, label) {
173
175
  const training = asObject(value, label);
174
176
  const optimizer = asObject(training.optimizer, `${label}.optimizer`);
175
- const precision = isPlainObject(training.precision)
176
- ? training.precision
177
- : { activations: training.precision ?? 'f16' };
178
- const gradientClipping = isPlainObject(training.gradientClipping)
179
- ? training.gradientClipping
180
- : { maxNorm: training.gradientClipping ?? 1 };
177
+ const precision = asObject(training.precision, `${label}.precision`);
178
+ const gradientClipping = asObject(training.gradientClipping, `${label}.gradientClipping`);
181
179
  return {
182
180
  optimizer: {
183
- type: asNonEmptyString(optimizer.type ?? 'adam', `${label}.optimizer.type`),
181
+ type: asNonEmptyString(optimizer.type, `${label}.optimizer.type`),
184
182
  lr: asFiniteNumber(optimizer.lr, `${label}.optimizer.lr`),
185
- beta1: asFiniteNumber(optimizer.beta1 ?? 0.9, `${label}.optimizer.beta1`, { optional: true }) ?? 0.9,
186
- beta2: asFiniteNumber(optimizer.beta2 ?? 0.999, `${label}.optimizer.beta2`, { optional: true }) ?? 0.999,
187
- eps: asFiniteNumber(optimizer.eps ?? 1e-8, `${label}.optimizer.eps`, { optional: true }) ?? 1e-8,
188
- weightDecay: asFiniteNumber(optimizer.weightDecay ?? 0, `${label}.optimizer.weightDecay`, { optional: true }) ?? 0,
183
+ beta1: asFiniteNumber(optimizer.beta1, `${label}.optimizer.beta1`),
184
+ beta2: asFiniteNumber(optimizer.beta2, `${label}.optimizer.beta2`),
185
+ eps: asFiniteNumber(optimizer.eps, `${label}.optimizer.eps`),
186
+ weightDecay: asFiniteNumber(optimizer.weightDecay, `${label}.optimizer.weightDecay`),
189
187
  scheduler: normalizeScheduler(optimizer.scheduler, `${label}.optimizer.scheduler`),
190
188
  },
191
189
  batchSize: asPositiveInteger(training.batchSize, `${label}.batchSize`),
192
- accumSteps: asPositiveInteger(training.accumSteps ?? 1, `${label}.accumSteps`, { optional: true }) ?? 1,
190
+ accumSteps: asPositiveInteger(training.accumSteps, `${label}.accumSteps`),
193
191
  steps: asPositiveInteger(training.steps, `${label}.steps`),
194
192
  precision: {
195
- activations: asNonEmptyString(precision.activations ?? 'f16', `${label}.precision.activations`),
196
- gradients: asNonEmptyString(precision.gradients ?? 'f32', `${label}.precision.gradients`),
197
- loraParams: asNonEmptyString(precision.loraParams ?? 'f32', `${label}.precision.loraParams`),
193
+ activations: asNonEmptyString(precision.activations, `${label}.precision.activations`),
194
+ gradients: asNonEmptyString(precision.gradients, `${label}.precision.gradients`),
195
+ loraParams: asNonEmptyString(precision.loraParams, `${label}.precision.loraParams`),
198
196
  },
199
197
  gradientClipping: {
200
- maxNorm: asFiniteNumber(gradientClipping.maxNorm ?? 1, `${label}.gradientClipping.maxNorm`, { optional: true }) ?? 1,
198
+ maxNorm: asFiniteNumber(gradientClipping.maxNorm, `${label}.gradientClipping.maxNorm`),
201
199
  },
202
200
  };
203
201
  }
@@ -215,7 +213,7 @@ function normalizeEvalDatasets(value, label) {
215
213
  id: asNonEmptyString(dataset.id, `${label}[${index}].id`),
216
214
  datasetPath: asNonEmptyString(dataset.datasetPath ?? dataset.path, `${label}[${index}].datasetPath`),
217
215
  evalKind: asEnum(
218
- dataset.evalKind ?? dataset.kind ?? 'text_generation',
216
+ dataset.evalKind ?? dataset.kind,
219
217
  `${label}[${index}].evalKind`,
220
218
  TRAINING_EVAL_KINDS
221
219
  ),
@@ -230,11 +228,7 @@ function normalizeEvalDatasets(value, label) {
230
228
  `${label}[${index}].decodePolicy.maxTokens`,
231
229
  { optional: true }
232
230
  ),
233
- stopOnEos: asBoolean(
234
- decodePolicy.stopOnEos ?? true,
235
- `${label}[${index}].decodePolicy.stopOnEos`,
236
- { optional: true }
237
- ) ?? true,
231
+ stopOnEos: asBoolean(decodePolicy.stopOnEos, `${label}[${index}].decodePolicy.stopOnEos`),
238
232
  }
239
233
  : null,
240
234
  scoreboardColumns: asStringArray(
@@ -267,11 +261,11 @@ function normalizeStagePlan(value, label) {
267
261
  return value.map((entry, index) => {
268
262
  const stage = asObject(entry, `${label}[${index}]`);
269
263
  const selectionMetric = asNonEmptyString(
270
- stage.selectionMetric ?? stage.metric ?? 'bleu',
264
+ stage.selectionMetric ?? stage.metric,
271
265
  `${label}[${index}].selectionMetric`
272
266
  );
273
267
  const selectionGoal = asEnum(
274
- stage.selectionGoal ?? stage.goal ?? 'max',
268
+ stage.selectionGoal ?? stage.goal,
275
269
  `${label}[${index}].selectionGoal`,
276
270
  TRAINING_SELECTION_GOALS
277
271
  );
@@ -281,15 +275,12 @@ function normalizeStagePlan(value, label) {
281
275
  objective: asNonEmptyString(stage.objective, `${label}[${index}].objective`),
282
276
  steps: asPositiveInteger(stage.steps, `${label}[${index}].steps`),
283
277
  checkpointEvery: asPositiveInteger(
284
- stage.checkpointEvery ?? stage.steps,
278
+ stage.checkpointEvery,
285
279
  `${label}[${index}].checkpointEvery`
286
280
  ),
287
281
  selectionMetric,
288
282
  selectionGoal,
289
- evalSchedule: asNonEmptyString(
290
- stage.evalSchedule ?? 'on_checkpoint',
291
- `${label}[${index}].evalSchedule`
292
- ),
283
+ evalSchedule: asNonEmptyString(stage.evalSchedule, `${label}[${index}].evalSchedule`),
293
284
  };
294
285
  });
295
286
  }
@@ -306,29 +297,29 @@ function normalizeLoraConfig(value, label) {
306
297
  }
307
298
  }
308
299
  return {
309
- datasetFormat: asNonEmptyString(lora.datasetFormat ?? 'prompt_completion_jsonl', `${label}.datasetFormat`),
310
- taskType: asNonEmptyString(lora.taskType ?? 'text_generation', `${label}.taskType`),
300
+ datasetFormat: asNonEmptyString(lora.datasetFormat, `${label}.datasetFormat`),
301
+ taskType: asNonEmptyString(lora.taskType, `${label}.taskType`),
311
302
  adapter: {
312
303
  rank: asPositiveInteger(adapter.rank, `${label}.adapter.rank`),
313
304
  alpha: asFiniteNumber(adapter.alpha, `${label}.adapter.alpha`),
314
- dropout: asFiniteNumber(adapter.dropout ?? 0, `${label}.adapter.dropout`, { optional: true }) ?? 0,
305
+ dropout: asFiniteNumber(adapter.dropout, `${label}.adapter.dropout`),
315
306
  targetModules,
316
307
  },
317
308
  freeze: normalizeFreezeConfig(lora.freeze, `${label}.freeze`),
318
309
  export: exportConfig
319
310
  ? {
320
- enabled: exportConfig.enabled !== false,
321
- atCheckpoints: exportConfig.atCheckpoints === true,
322
- select: asNonEmptyString(exportConfig.select ?? 'best', `${label}.export.select`),
311
+ enabled: asBoolean(exportConfig.enabled, `${label}.export.enabled`),
312
+ atCheckpoints: asBoolean(exportConfig.atCheckpoints, `${label}.export.atCheckpoints`),
313
+ select: asNonEmptyString(exportConfig.select, `${label}.export.select`),
323
314
  id: asNonEmptyString(exportConfig.id, `${label}.export.id`, { optional: true }),
324
315
  name: asNonEmptyString(exportConfig.name, `${label}.export.name`, { optional: true }),
325
- format: asNonEmptyString(exportConfig.format ?? 'manifest_json', `${label}.export.format`),
316
+ format: asNonEmptyString(exportConfig.format, `${label}.export.format`),
326
317
  }
327
318
  : null,
328
319
  activation: activation
329
320
  ? {
330
- enabled: activation.enabled === true,
331
- autoActivate: activation.autoActivate === true,
321
+ enabled: asBoolean(activation.enabled, `${label}.activation.enabled`),
322
+ autoActivate: asBoolean(activation.autoActivate, `${label}.activation.autoActivate`),
332
323
  smokePrompt: asNonEmptyString(activation.smokePrompt, `${label}.activation.smokePrompt`, { optional: true }),
333
324
  }
334
325
  : null,
@@ -339,27 +330,21 @@ function normalizeDistillConfig(value, label) {
339
330
  const distill = asObject(value, label);
340
331
  return {
341
332
  stagePlan: normalizeStagePlan(distill.stagePlan, `${label}.stagePlan`),
342
- studentGraphMode: asNonEmptyString(
343
- distill.studentGraphMode ?? 'transformer_full',
344
- `${label}.studentGraphMode`
345
- ),
346
- temperature: asFiniteNumber(distill.temperature ?? 1, `${label}.temperature`, { optional: true }) ?? 1,
347
- alphaKd: asFiniteNumber(distill.alphaKd ?? 1, `${label}.alphaKd`, { optional: true }) ?? 1,
348
- alphaCe: asFiniteNumber(distill.alphaCe ?? 0, `${label}.alphaCe`, { optional: true }) ?? 0,
349
- tripletMargin: asFiniteNumber(distill.tripletMargin ?? 0.2, `${label}.tripletMargin`, { optional: true }) ?? 0.2,
333
+ studentGraphMode: asNonEmptyString(distill.studentGraphMode, `${label}.studentGraphMode`),
334
+ temperature: asFiniteNumber(distill.temperature, `${label}.temperature`),
335
+ alphaKd: asFiniteNumber(distill.alphaKd, `${label}.alphaKd`),
336
+ alphaCe: asFiniteNumber(distill.alphaCe, `${label}.alphaCe`),
337
+ tripletMargin: asFiniteNumber(distill.tripletMargin, `${label}.tripletMargin`),
350
338
  sourceLangs: asStringArray(distill.sourceLangs, `${label}.sourceLangs`, { optional: true, allowEmpty: true }),
351
339
  targetLangs: asStringArray(distill.targetLangs, `${label}.targetLangs`, { optional: true, allowEmpty: true }),
352
340
  pairAllowlist: asStringArray(distill.pairAllowlist, `${label}.pairAllowlist`, { optional: true, allowEmpty: true }),
353
- strictPairContract: asBoolean(
354
- distill.strictPairContract ?? false,
355
- `${label}.strictPairContract`,
356
- { optional: true }
357
- ) ?? false,
341
+ strictPairContract: asBoolean(distill.strictPairContract, `${label}.strictPairContract`),
358
342
  subsetSpec: asObject(distill.subsetSpec, `${label}.subsetSpec`, { optional: true }),
359
343
  };
360
344
  }
361
345
 
362
346
  function normalizeLegacyUlPayload(payload, contextLabel) {
347
+ const optimizerOverrides = isPlainObject(payload.training?.optimizer) ? payload.training.optimizer : {};
363
348
  return {
364
349
  schemaVersion: asPositiveInteger(payload.schemaVersion, `${contextLabel}.schemaVersion`),
365
350
  kind: 'ul',
@@ -393,19 +378,25 @@ function normalizeLegacyUlPayload(payload, contextLabel) {
393
378
  TRAINING_WORKLOAD_SURFACE_SUPPORT
394
379
  ),
395
380
  training: normalizeTrainingConfig({
396
- optimizer: payload.training?.optimizer ?? {
397
- type: 'adam',
398
- lr: 2e-4,
381
+ optimizer: {
382
+ ...DEFAULT_TRAINING_OPTIMIZER_CONFIG,
383
+ ...optimizerOverrides,
384
+ scheduler: {
385
+ ...DEFAULT_TRAINING_OPTIMIZER_CONFIG.scheduler,
386
+ ...(isPlainObject(optimizerOverrides.scheduler) ? optimizerOverrides.scheduler : {}),
387
+ },
399
388
  },
400
389
  batchSize: payload.training?.batchSize ?? 1,
401
- accumSteps: payload.training?.accumSteps ?? 1,
390
+ accumSteps: payload.training?.accumSteps ?? DEFAULT_TRAINING_GRADIENT_CONFIG.accumSteps,
402
391
  steps: payload.training?.steps ?? payload.trainingBenchSteps ?? 1,
403
- precision: payload.training?.precision ?? {
404
- activations: 'f16',
405
- gradients: 'f32',
406
- loraParams: 'f32',
392
+ precision: {
393
+ ...DEFAULT_TRAINING_PRECISION_CONFIG,
394
+ ...(payload.training?.precision ?? {}),
395
+ },
396
+ gradientClipping: {
397
+ maxNorm: payload.training?.gradientClipping?.maxNorm
398
+ ?? DEFAULT_TRAINING_GRADIENT_CONFIG.maxNorm,
407
399
  },
408
- gradientClipping: payload.training?.gradientClipping ?? { maxNorm: 1 },
409
400
  }, `${contextLabel}.training`),
410
401
  pipeline: {
411
402
  legacyWorkloadType: 'ul',
@@ -9,7 +9,11 @@ export type ModelArchitecture =
9
9
  | 'gemma'
10
10
  | 'gemma2'
11
11
  | 'gemma3'
12
+ | 'embeddinggemma'
12
13
  | 'functiongemma'
14
+ | 'janus_text'
15
+ | 'lfm2'
16
+ | 'modernbert'
13
17
  | 'qwen2'
14
18
  | 'qwen3'
15
19
  | 'phi3'
@@ -19,6 +23,7 @@ export type ModelArchitecture =
19
23
  | 'deepseek'
20
24
  | 'mamba'
21
25
  | 'kimi_k2'
26
+ | 'translategemma'
22
27
  | 'transformer';
23
28
 
24
29
  /** Attention type variants */
package/src/version.js CHANGED
@@ -1,2 +1,2 @@
1
- export const DOPPLER_VERSION = '0.1.6';
1
+ export const DOPPLER_VERSION = '0.1.7';
2
2
  export const DOPPLER_PROVIDER_VERSION = DOPPLER_VERSION;
@@ -14,42 +14,43 @@ function parseArgs(argv) {
14
14
  const positional = [];
15
15
  for (let i = 0; i < argv.length; i += 1) {
16
16
  const arg = argv[i];
17
- if (arg === '--output-dir') {
18
- out.outputDir = argv[i + 1] ?? null;
17
+ const nextValue = () => {
18
+ const value = argv[i + 1];
19
+ if (value == null || String(value).startsWith('--')) {
20
+ throw new Error(`Missing value for ${arg}.`);
21
+ }
19
22
  i += 1;
23
+ return value;
24
+ };
25
+ if (arg === '--output-dir') {
26
+ out.outputDir = nextValue();
20
27
  continue;
21
28
  }
22
29
  if (arg === '--config') {
23
- out.configPath = argv[i + 1] ?? null;
24
- i += 1;
30
+ out.configPath = nextValue();
25
31
  continue;
26
32
  }
27
33
  if (arg === '--converter-config') {
28
34
  throw new Error('--converter-config has been removed. Use --config <path.json>.');
29
35
  }
30
36
  if (arg === '--workers') {
31
- execution.workers = argv[i + 1] ?? null;
32
- i += 1;
37
+ execution.workers = nextValue();
33
38
  continue;
34
39
  }
35
40
  if (arg === '--worker-policy') {
36
- execution.workerCountPolicy = argv[i + 1] ?? null;
37
- i += 1;
41
+ execution.workerCountPolicy = nextValue();
38
42
  continue;
39
43
  }
40
44
  if (arg === '--row-chunk-rows') {
41
- execution.rowChunkRows = argv[i + 1] ?? null;
42
- i += 1;
45
+ execution.rowChunkRows = nextValue();
43
46
  continue;
44
47
  }
45
48
  if (arg === '--row-chunk-min-tensor-bytes') {
46
- execution.rowChunkMinTensorBytes = argv[i + 1] ?? null;
47
- i += 1;
49
+ execution.rowChunkMinTensorBytes = nextValue();
48
50
  continue;
49
51
  }
50
52
  if (arg === '--max-in-flight-jobs') {
51
- execution.maxInFlightJobs = argv[i + 1] ?? null;
52
- i += 1;
53
+ execution.maxInFlightJobs = nextValue();
53
54
  continue;
54
55
  }
55
56
  if (arg === '--use-gpu-cast') {
@@ -57,12 +58,17 @@ function parseArgs(argv) {
57
58
  continue;
58
59
  }
59
60
  if (arg === '--gpu-cast-min-tensor-bytes') {
60
- execution.gpuCastMinTensorBytes = argv[i + 1] ?? null;
61
- i += 1;
61
+ execution.gpuCastMinTensorBytes = nextValue();
62
62
  continue;
63
63
  }
64
+ if (arg.startsWith('--')) {
65
+ throw new Error(`Unknown flag: ${arg}`);
66
+ }
64
67
  positional.push(arg);
65
68
  }
69
+ if (positional.length > 1) {
70
+ throw new Error(`Unexpected positional arguments: ${positional.slice(1).join(', ')}`);
71
+ }
66
72
  out.inputDir = positional[0] ?? null;
67
73
  out.execution = Object.keys(execution).length > 0 ? execution : null;
68
74
  return out;
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
 
3
+ import { existsSync } from 'node:fs';
3
4
  import fs from 'node:fs/promises';
4
5
  import path from 'node:path';
5
6
  import { fileURLToPath, pathToFileURL } from 'node:url';
@@ -13,7 +14,8 @@ import { createToolingErrorEnvelope } from '../src/tooling/command-envelope.js';
13
14
 
14
15
  const NODE_WEBGPU_INCOMPLETE_MESSAGE = 'node command: WebGPU runtime is incomplete in Node';
15
16
  const CLI_POLICY_PATH = fileURLToPath(new URL('./configs/cli/doppler-cli-policy.json', import.meta.url));
16
- const DEFAULT_EXTERNAL_MODELS_ROOT = process.env.DOPPLER_EXTERNAL_MODELS_ROOT || '/media/x/models';
17
+ const DEFAULT_EXTERNAL_MODELS_ROOT = process.env.DOPPLER_EXTERNAL_MODELS_ROOT
18
+ || (existsSync('/Volumes/models') ? '/Volumes/models' : '/media/x/models');
17
19
  const DEFAULT_EXTERNAL_RDRR_ROOT = path.join(DEFAULT_EXTERNAL_MODELS_ROOT, 'rdrr');
18
20
  const DEFAULT_CLI_POLICY = {
19
21
  defaults: {
@@ -22,8 +24,6 @@ const DEFAULT_CLI_POLICY = {
22
24
  allowed: ['auto', 'node', 'browser'],
23
25
  },
24
26
  bench: {
25
- modelId: 'gemma-3-270m-it-wf16-ef16-hf16',
26
- surface: 'browser',
27
27
  cacheMode: 'warm',
28
28
  },
29
29
  cacheMode: null,
@@ -82,7 +82,7 @@ function usage() {
82
82
  ' - run: CLI-only run controls (surface, browser options, and bench save/compare/manifest settings).',
83
83
  '',
84
84
  'Example:',
85
- ' doppler verify --config \'{"request":{"suite":"inference","modelId":"gemma-3-270m-it-wf16-ef16-hf16"}}\' --json',
85
+ ' doppler verify --config \'{"request":{"suite":"inference","modelId":"gemma-3-270m-it-f16-af32"}}\' --json',
86
86
  ].join('\n');
87
87
  }
88
88
 
@@ -439,10 +439,6 @@ export async function resolveBrowserModelUrl(request, browserOptions = {}) {
439
439
  const staticRootDir = resolveStaticRootDir(browserOptions);
440
440
  const externalModel = await resolveExternalModelDirectory(resolveRdrrRoot(browserOptions), modelId);
441
441
  const candidates = [
442
- {
443
- modelUrl: `/models/curated/${encodedModelId}`,
444
- manifestPath: path.join(staticRootDir, 'models', 'curated', modelId, 'manifest.json'),
445
- },
446
442
  {
447
443
  modelUrl: `/models/local/${encodedModelId}`,
448
444
  manifestPath: path.join(staticRootDir, 'models', 'local', modelId, 'manifest.json'),
@@ -528,6 +524,9 @@ function parseSurface(value, command, policy = DEFAULT_CLI_POLICY) {
528
524
  if (command === 'convert' && normalized === 'browser') {
529
525
  throw new Error('convert is not supported on browser relay. Use --surface node or --surface auto.');
530
526
  }
527
+ if ((command === 'lora' || command === 'distill') && normalized === 'browser') {
528
+ throw new Error(`${command} is not supported on browser relay. Use --surface node or --surface auto.`);
529
+ }
531
530
  return normalized;
532
531
  }
533
532
 
@@ -587,13 +586,10 @@ function resolveBenchRunOptions(runConfig, policy = DEFAULT_CLI_POLICY) {
587
586
  function resolveSurfaceForCommand(command, parsed, runConfig, policy = DEFAULT_CLI_POLICY) {
588
587
  const fromCli = asStringOrNull(parsed.flags.surface);
589
588
  const fromRun = asStringOrNull(runConfig?.surface);
590
- const fromPolicy = command === 'bench'
591
- ? asStringOrNull(policy?.defaults?.bench?.surface)
592
- : null;
593
- return parseSurface(fromCli ?? fromRun ?? fromPolicy, command, policy);
589
+ return parseSurface(fromCli ?? fromRun ?? null, command, policy);
594
590
  }
595
591
 
596
- async function buildRequest(parsed, policy = DEFAULT_CLI_POLICY) {
592
+ export async function buildRequest(parsed, policy = DEFAULT_CLI_POLICY) {
597
593
  const command = parsed.command;
598
594
  if (!command || !TOOLING_COMMANDS.includes(command)) {
599
595
  throw new Error(`Unsupported command "${command || ''}"`);
@@ -612,21 +608,15 @@ async function buildRequest(parsed, policy = DEFAULT_CLI_POLICY) {
612
608
  }
613
609
  requestInput.command = command;
614
610
 
615
- if (command === 'bench' && !asStringOrNull(requestInput.modelId) && !asStringOrNull(requestInput.modelUrl)) {
616
- const benchDefaultModelId = asStringOrNull(policy?.defaults?.bench?.modelId);
617
- if (benchDefaultModelId) {
618
- requestInput.modelId = benchDefaultModelId;
619
- }
620
- }
621
-
622
611
  applyRuntimeFlagOverride(requestInput, runtimeOverride);
623
612
 
624
613
  const surfaceFromCli = asStringOrNull(parsed.flags.surface) !== null;
614
+ const surface = resolveSurfaceForCommand(command, parsed, envelope.run, policy);
625
615
 
626
616
  return {
627
617
  request: normalizeToolingCommandRequest(requestInput),
628
618
  runConfig: envelope.run,
629
- surface: resolveSurfaceForCommand(command, parsed, envelope.run, policy),
619
+ surface,
630
620
  surfaceFromCli,
631
621
  benchRunOptions: resolveBenchRunOptions(envelope.run, policy),
632
622
  };
@@ -723,6 +713,32 @@ function isTrainingCommandFlow(request) {
723
713
  return request.command === 'bench' && request.workloadType === 'training';
724
714
  }
725
715
 
716
+ function resolveErrorSurface(error, fallbackSurface = null) {
717
+ return (
718
+ asStringOrNull(fallbackSurface)
719
+ || asStringOrNull(error?.surface)
720
+ || asStringOrNull(error?.details?.surface)
721
+ || null
722
+ );
723
+ }
724
+
725
+ export function createCliToolingErrorEnvelope(error, context = {}) {
726
+ return createToolingErrorEnvelope(error, {
727
+ surface: resolveErrorSurface(error, context.surface),
728
+ request: context.request ?? null,
729
+ });
730
+ }
731
+
732
+ export function finalizeCliCommandResponse(response, request) {
733
+ if (!isPlainObject(response) || !Object.prototype.hasOwnProperty.call(response, 'request')) {
734
+ return response;
735
+ }
736
+ return {
737
+ ...response,
738
+ request,
739
+ };
740
+ }
741
+
726
742
  async function runCommandOnSurface(request, surface, runConfig, jsonOutput) {
727
743
  if (surface === 'node') {
728
744
  const nodeRequest = await resolveNodeModelUrl(request);
@@ -732,7 +748,8 @@ async function runCommandOnSurface(request, surface, runConfig, jsonOutput) {
732
748
  console.error(`[surface] node resolved modelUrl=${nodeRequest.modelUrl}`);
733
749
  }
734
750
  }
735
- return runNodeCommand(nodeRequest, buildNodeRunOptions(jsonOutput));
751
+ const response = await runNodeCommand(nodeRequest, buildNodeRunOptions(jsonOutput));
752
+ return finalizeCliCommandResponse(response, request);
736
753
  }
737
754
 
738
755
  const browserOptions = buildBrowserRunOptions(runConfig, jsonOutput, request);
@@ -746,7 +763,8 @@ async function runCommandOnSurface(request, surface, runConfig, jsonOutput) {
746
763
  }
747
764
  }
748
765
 
749
- return runBrowserCommandInNode(browserRequest, browserOptions);
766
+ const response = await runBrowserCommandInNode(browserRequest, browserOptions);
767
+ return finalizeCliCommandResponse(response, request);
750
768
  }
751
769
 
752
770
  async function runWithAutoSurface(request, runConfig, jsonOutput, policy = DEFAULT_CLI_POLICY) {
@@ -763,9 +781,12 @@ async function runWithAutoSurface(request, runConfig, jsonOutput, policy = DEFAU
763
781
  }
764
782
  if (isTrainingCommandFlow(request)) {
765
783
  const downgradeError = new Error(
766
- 'Training command auto-surface downgrade is blocked. Re-run with --surface node after fixing Node WebGPU support, or explicitly choose --surface browser.'
784
+ (request.command === 'lora' || request.command === 'distill')
785
+ ? 'Training command auto-surface downgrade is blocked. Re-run with --surface node after fixing Node WebGPU support.'
786
+ : 'Training command auto-surface downgrade is blocked. Re-run with --surface node after fixing Node WebGPU support, or explicitly choose --surface browser.'
767
787
  );
768
788
  downgradeError.code = 'training_surface_downgrade_blocked';
789
+ downgradeError.surface = 'node';
769
790
  downgradeError.command = request.command;
770
791
  downgradeError.suite = request.suite;
771
792
  downgradeError.workloadType = request.workloadType || null;
@@ -1021,7 +1042,7 @@ async function runManifestSweep(manifest, commandContext, jsonOutput, policy = D
1021
1042
  results.push({
1022
1043
  label,
1023
1044
  response: null,
1024
- error: createToolingErrorEnvelope(error, {
1045
+ error: createCliToolingErrorEnvelope(error, {
1025
1046
  surface: surface === 'auto' ? null : surface,
1026
1047
  request,
1027
1048
  }),
@@ -1241,6 +1262,9 @@ function printMetricsSummary(result) {
1241
1262
  `prefill=${formatNumber(metrics.prefillTokensPerSec)} ` +
1242
1263
  `decode=${formatNumber(metrics.decodeTokensPerSec)}`
1243
1264
  );
1265
+ if (typeof result.output === 'string' && result.output.length > 0) {
1266
+ console.log(`[output] ${quoteOneLine(result.output)}`);
1267
+ }
1244
1268
  printExecutionContractSummary(result);
1245
1269
  printExecutionV0GraphSummary(metrics.executionV0GraphContractArtifact);
1246
1270
  return;
@@ -1404,7 +1428,7 @@ async function main() {
1404
1428
  printMetricsSummary(response.result);
1405
1429
  } catch (error) {
1406
1430
  if (jsonOutputRequested) {
1407
- console.log(JSON.stringify(createToolingErrorEnvelope(error, errorContext), null, 2));
1431
+ console.log(JSON.stringify(createCliToolingErrorEnvelope(error, errorContext), null, 2));
1408
1432
  process.exitCode = 1;
1409
1433
  return;
1410
1434
  }