@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +25 -17
  3. package/package.json +20 -4
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +39 -39
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +49 -7
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +43 -4
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +28 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/models/qwen3.json +9 -2
  45. package/src/config/presets/models/transformer.json +5 -0
  46. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  47. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  48. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  49. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  50. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  52. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  54. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  55. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  56. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  57. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  58. package/src/config/required-inference-fields-contract-check.js +6 -0
  59. package/src/config/runtime.js +6 -1
  60. package/src/config/schema/debug.schema.d.ts +5 -0
  61. package/src/config/schema/doppler.schema.js +16 -21
  62. package/src/config/schema/inference-defaults.schema.js +6 -3
  63. package/src/config/schema/inference.schema.d.ts +9 -0
  64. package/src/config/schema/kernel-path.schema.d.ts +11 -1
  65. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  66. package/src/config/schema/manifest.schema.d.ts +8 -1
  67. package/src/config/schema/manifest.schema.js +19 -3
  68. package/src/config/training-defaults.js +30 -22
  69. package/src/converter/conversion-plan.js +94 -9
  70. package/src/converter/core.d.ts +7 -0
  71. package/src/converter/core.js +14 -9
  72. package/src/converter/execution-v0-manifest.js +4 -1
  73. package/src/converter/index.d.ts +1 -0
  74. package/src/converter/index.js +1 -0
  75. package/src/converter/manifest-inference.js +43 -12
  76. package/src/converter/parsers/diffusion.js +0 -3
  77. package/src/converter/quantization-info.js +35 -15
  78. package/src/converter/rope-config.js +42 -0
  79. package/src/converter/shard-packer.d.ts +1 -1
  80. package/src/converter/shard-packer.js +4 -1
  81. package/src/debug/config.js +123 -11
  82. package/src/debug/signals.js +7 -1
  83. package/src/debug/tensor.d.ts +2 -0
  84. package/src/debug/tensor.js +13 -2
  85. package/src/distribution/p2p-control-plane.js +52 -12
  86. package/src/distribution/p2p-observability.js +43 -7
  87. package/src/distribution/p2p-webrtc-browser.js +20 -0
  88. package/src/distribution/shard-delivery.js +77 -26
  89. package/src/formats/gguf/types.js +33 -16
  90. package/src/formats/rdrr/groups.d.ts +12 -4
  91. package/src/formats/rdrr/groups.js +3 -6
  92. package/src/formats/rdrr/parsing.js +39 -2
  93. package/src/formats/rdrr/types.d.ts +2 -1
  94. package/src/gpu/command-recorder.js +86 -61
  95. package/src/gpu/device.d.ts +1 -0
  96. package/src/gpu/device.js +131 -19
  97. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  98. package/src/gpu/kernel-tuner/cache.js +71 -4
  99. package/src/gpu/kernel-tuner/tuner.js +22 -4
  100. package/src/gpu/kernels/attention.js +113 -34
  101. package/src/gpu/kernels/backward/adam.js +62 -58
  102. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  103. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  104. package/src/gpu/kernels/bias_add.wgsl +8 -6
  105. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  106. package/src/gpu/kernels/cast.js +191 -149
  107. package/src/gpu/kernels/check-stop.js +33 -44
  108. package/src/gpu/kernels/conv2d.js +27 -17
  109. package/src/gpu/kernels/conv2d.wgsl +7 -8
  110. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  111. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  112. package/src/gpu/kernels/depthwise_conv2d.js +37 -26
  113. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  114. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  115. package/src/gpu/kernels/dequant.js +178 -126
  116. package/src/gpu/kernels/energy.d.ts +3 -21
  117. package/src/gpu/kernels/energy.js +111 -88
  118. package/src/gpu/kernels/feature-check.js +1 -1
  119. package/src/gpu/kernels/fused_ffn.js +84 -65
  120. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  121. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  122. package/src/gpu/kernels/gather.js +33 -15
  123. package/src/gpu/kernels/gelu.js +19 -11
  124. package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
  125. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  126. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  127. package/src/gpu/kernels/groupnorm.js +34 -23
  128. package/src/gpu/kernels/kv-quantize.js +5 -2
  129. package/src/gpu/kernels/layernorm.js +35 -19
  130. package/src/gpu/kernels/logit-merge.js +5 -3
  131. package/src/gpu/kernels/matmul.js +83 -39
  132. package/src/gpu/kernels/modulate.js +23 -15
  133. package/src/gpu/kernels/moe.js +221 -175
  134. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  135. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  136. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  137. package/src/gpu/kernels/relu.js +31 -10
  138. package/src/gpu/kernels/relu.wgsl +2 -1
  139. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  140. package/src/gpu/kernels/repeat_channels.js +25 -17
  141. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  142. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  143. package/src/gpu/kernels/residual.js +69 -23
  144. package/src/gpu/kernels/residual.wgsl +6 -3
  145. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  146. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  147. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  148. package/src/gpu/kernels/rmsnorm.js +96 -28
  149. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  150. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  151. package/src/gpu/kernels/rope.d.ts +2 -0
  152. package/src/gpu/kernels/rope.js +14 -1
  153. package/src/gpu/kernels/rope.wgsl +56 -40
  154. package/src/gpu/kernels/sample.js +27 -38
  155. package/src/gpu/kernels/sana_linear_attention.js +19 -12
  156. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  157. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  158. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  159. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  160. package/src/gpu/kernels/scale.js +18 -11
  161. package/src/gpu/kernels/shader-cache.js +4 -2
  162. package/src/gpu/kernels/silu.d.ts +1 -0
  163. package/src/gpu/kernels/silu.js +148 -82
  164. package/src/gpu/kernels/silu.wgsl +19 -9
  165. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  166. package/src/gpu/kernels/softmax.js +44 -25
  167. package/src/gpu/kernels/split_qkv.js +23 -13
  168. package/src/gpu/kernels/transpose.js +31 -10
  169. package/src/gpu/kernels/transpose.wgsl +6 -5
  170. package/src/gpu/kernels/upsample2d.js +22 -13
  171. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  172. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  173. package/src/gpu/kernels/utils.js +35 -13
  174. package/src/gpu/partitioned-buffer-pool.js +10 -2
  175. package/src/gpu/perf-guards.js +2 -9
  176. package/src/gpu/profiler.js +27 -22
  177. package/src/gpu/readback-utils.d.ts +16 -0
  178. package/src/gpu/readback-utils.js +41 -0
  179. package/src/gpu/submit-tracker.js +13 -0
  180. package/src/gpu/uniform-cache.d.ts +1 -0
  181. package/src/gpu/uniform-cache.js +30 -9
  182. package/src/hotswap/intent-bundle.js +6 -0
  183. package/src/hotswap/manifest.d.ts +10 -1
  184. package/src/hotswap/manifest.js +12 -2
  185. package/src/hotswap/runtime.js +30 -8
  186. package/src/index-browser.d.ts +44 -0
  187. package/src/index-browser.js +14 -0
  188. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  189. package/src/inference/browser-harness-contract-helpers.js +28 -0
  190. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  191. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  192. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  193. package/src/inference/browser-harness-model-helpers.js +217 -0
  194. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  195. package/src/inference/browser-harness-report-helpers.js +42 -0
  196. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  197. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  198. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  199. package/src/inference/browser-harness-suite-helpers.js +268 -0
  200. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  201. package/src/inference/browser-harness-text-helpers.js +788 -0
  202. package/src/inference/browser-harness.d.ts +6 -0
  203. package/src/inference/browser-harness.js +130 -1950
  204. package/src/inference/kv-cache/base.js +140 -94
  205. package/src/inference/kv-cache/tiered.js +5 -3
  206. package/src/inference/moe-router.js +88 -56
  207. package/src/inference/multi-model-network.js +5 -3
  208. package/src/inference/network-evolution.d.ts +11 -2
  209. package/src/inference/network-evolution.js +20 -21
  210. package/src/inference/pipelines/context.d.ts +3 -0
  211. package/src/inference/pipelines/context.js +142 -2
  212. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  213. package/src/inference/pipelines/diffusion/pipeline.js +17 -7
  214. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  215. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  216. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  217. package/src/inference/pipelines/diffusion/vae.js +3 -7
  218. package/src/inference/pipelines/energy/pipeline.js +27 -21
  219. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  220. package/src/inference/pipelines/energy/quintel.js +11 -0
  221. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  222. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  223. package/src/inference/pipelines/text/attention/projections.js +151 -101
  224. package/src/inference/pipelines/text/attention/record.js +73 -10
  225. package/src/inference/pipelines/text/attention/run.js +73 -10
  226. package/src/inference/pipelines/text/chat-format.js +25 -1
  227. package/src/inference/pipelines/text/config.d.ts +4 -0
  228. package/src/inference/pipelines/text/config.js +71 -5
  229. package/src/inference/pipelines/text/embed.js +2 -8
  230. package/src/inference/pipelines/text/execution-plan.js +64 -50
  231. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  232. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  233. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  234. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  235. package/src/inference/pipelines/text/execution-v0.js +78 -1002
  236. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  237. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  238. package/src/inference/pipelines/text/generator-steps.js +298 -207
  239. package/src/inference/pipelines/text/generator.js +6 -23
  240. package/src/inference/pipelines/text/init.d.ts +4 -0
  241. package/src/inference/pipelines/text/init.js +134 -29
  242. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  243. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  244. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  245. package/src/inference/pipelines/text/layer.js +14 -9
  246. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  247. package/src/inference/pipelines/text/linear-attention.js +80 -6
  248. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  249. package/src/inference/pipelines/text/logits/index.js +10 -11
  250. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  251. package/src/inference/pipelines/text/logits/utils.js +9 -0
  252. package/src/inference/pipelines/text/lora-apply.js +50 -32
  253. package/src/inference/pipelines/text/model-load.js +279 -104
  254. package/src/inference/pipelines/text/moe-cache.js +5 -4
  255. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  256. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  257. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  258. package/src/inference/pipelines/text/ops.js +90 -90
  259. package/src/inference/pipelines/text/probes.js +9 -9
  260. package/src/inference/pipelines/text/weights.js +17 -7
  261. package/src/inference/pipelines/text.js +17 -1
  262. package/src/inference/speculative.d.ts +2 -2
  263. package/src/inference/speculative.js +4 -18
  264. package/src/inference/test-harness.d.ts +1 -1
  265. package/src/inference/test-harness.js +15 -5
  266. package/src/inference/tokenizer.d.ts +0 -5
  267. package/src/inference/tokenizer.js +4 -23
  268. package/src/inference/tokenizers/bpe.js +9 -0
  269. package/src/inference/tokenizers/bundled.js +176 -33
  270. package/src/inference/tokenizers/sentencepiece.js +12 -0
  271. package/src/loader/doppler-loader.js +38 -22
  272. package/src/loader/dtype-utils.js +3 -44
  273. package/src/loader/embedding-loader.js +7 -3
  274. package/src/loader/experts/expert-cache.js +13 -6
  275. package/src/loader/experts/expert-loader.js +10 -6
  276. package/src/loader/final-weights-loader.js +8 -4
  277. package/src/loader/layer-loader.js +2 -1
  278. package/src/loader/loader-state.js +2 -2
  279. package/src/loader/memory-monitor.js +8 -0
  280. package/src/loader/multi-model-loader.d.ts +14 -0
  281. package/src/loader/multi-model-loader.js +70 -24
  282. package/src/loader/shard-cache.js +81 -12
  283. package/src/loader/shard-resolver.js +25 -3
  284. package/src/loader/tensors/tensor-loader.js +209 -144
  285. package/src/loader/tensors/tensor-reader.js +76 -19
  286. package/src/loader/weight-downcast.js +1 -1
  287. package/src/memory/buffer-pool.d.ts +9 -1
  288. package/src/memory/buffer-pool.js +109 -44
  289. package/src/memory/unified-detect.js +1 -1
  290. package/src/rules/inference/kernel-path.rules.json +24 -8
  291. package/src/rules/rule-registry.js +25 -1
  292. package/src/rules/tooling/command-runtime.rules.json +18 -0
  293. package/src/storage/backends/opfs-store.js +68 -24
  294. package/src/storage/downloader.js +364 -83
  295. package/src/storage/index.d.ts +3 -0
  296. package/src/storage/index.js +3 -0
  297. package/src/storage/preflight.d.ts +2 -2
  298. package/src/storage/preflight.js +24 -2
  299. package/src/storage/quickstart-downloader.js +11 -5
  300. package/src/storage/registry.js +10 -4
  301. package/src/storage/reports.js +1 -1
  302. package/src/storage/shard-manager.d.ts +15 -1
  303. package/src/storage/shard-manager.js +51 -3
  304. package/src/storage/source-artifact-store.d.ts +52 -0
  305. package/src/storage/source-artifact-store.js +234 -0
  306. package/src/tooling/command-api-constants.d.ts +9 -0
  307. package/src/tooling/command-api-constants.js +9 -0
  308. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  309. package/src/tooling/command-api-family-normalizers.js +343 -0
  310. package/src/tooling/command-api-helpers.d.ts +25 -0
  311. package/src/tooling/command-api-helpers.js +262 -0
  312. package/src/tooling/command-api.d.ts +27 -1
  313. package/src/tooling/command-api.js +26 -473
  314. package/src/tooling/command-envelope.js +4 -1
  315. package/src/tooling/command-runner-shared.js +52 -18
  316. package/src/tooling/lean-execution-contract.js +150 -3
  317. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  318. package/src/tooling/node-browser-command-runner.js +218 -273
  319. package/src/tooling/node-command-runner.js +44 -3
  320. package/src/tooling/node-converter.js +27 -1
  321. package/src/tooling/node-source-runtime.d.ts +1 -1
  322. package/src/tooling/node-source-runtime.js +84 -3
  323. package/src/tooling/node-webgpu.js +30 -105
  324. package/src/tooling/opfs-cache.js +21 -4
  325. package/src/tooling/runtime-input-composition.d.ts +38 -0
  326. package/src/tooling/runtime-input-composition.js +86 -0
  327. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  328. package/src/tooling/source-runtime-bundle.js +261 -34
  329. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  330. package/src/tooling/source-runtime-materializer.js +93 -0
  331. package/src/training/attention-backward.js +32 -17
  332. package/src/training/autograd.js +80 -52
  333. package/src/training/checkpoint-watch.d.ts +8 -0
  334. package/src/training/checkpoint-watch.js +139 -0
  335. package/src/training/checkpoint.d.ts +6 -1
  336. package/src/training/checkpoint.js +46 -7
  337. package/src/training/clip.js +2 -1
  338. package/src/training/datasets/token-batch.js +20 -8
  339. package/src/training/distillation/artifacts.d.ts +71 -0
  340. package/src/training/distillation/artifacts.js +132 -0
  341. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  342. package/src/training/distillation/checkpoint-watch.js +58 -0
  343. package/src/training/distillation/dataset.d.ts +59 -0
  344. package/src/training/distillation/dataset.js +337 -0
  345. package/src/training/distillation/eval.d.ts +34 -0
  346. package/src/training/distillation/eval.js +310 -0
  347. package/src/training/distillation/index.d.ts +29 -0
  348. package/src/training/distillation/index.js +29 -0
  349. package/src/training/distillation/runtime.d.ts +20 -0
  350. package/src/training/distillation/runtime.js +121 -0
  351. package/src/training/distillation/scoreboard.d.ts +6 -0
  352. package/src/training/distillation/scoreboard.js +8 -0
  353. package/src/training/distillation/stage-a.d.ts +45 -0
  354. package/src/training/distillation/stage-a.js +338 -0
  355. package/src/training/distillation/stage-b.d.ts +24 -0
  356. package/src/training/distillation/stage-b.js +20 -0
  357. package/src/training/distillation/student-fixture.d.ts +22 -0
  358. package/src/training/distillation/student-fixture.js +846 -0
  359. package/src/training/distillation/suite-data.d.ts +45 -0
  360. package/src/training/distillation/suite-data.js +189 -0
  361. package/src/training/index.d.ts +10 -0
  362. package/src/training/index.js +10 -0
  363. package/src/training/lora-pipeline.d.ts +40 -0
  364. package/src/training/lora-pipeline.js +793 -0
  365. package/src/training/lora.js +26 -12
  366. package/src/training/loss.js +5 -6
  367. package/src/training/objectives/cross_entropy.js +2 -5
  368. package/src/training/objectives/distill_kd.js +4 -8
  369. package/src/training/objectives/distill_triplet.js +4 -8
  370. package/src/training/objectives/ul_stage2_base.js +4 -8
  371. package/src/training/operator-artifacts.d.ts +62 -0
  372. package/src/training/operator-artifacts.js +140 -0
  373. package/src/training/operator-command.d.ts +5 -0
  374. package/src/training/operator-command.js +455 -0
  375. package/src/training/operator-eval.d.ts +48 -0
  376. package/src/training/operator-eval.js +230 -0
  377. package/src/training/operator-scoreboard.d.ts +5 -0
  378. package/src/training/operator-scoreboard.js +44 -0
  379. package/src/training/optimizer.js +19 -7
  380. package/src/training/runner.d.ts +52 -0
  381. package/src/training/runner.js +31 -5
  382. package/src/training/suite.d.ts +112 -0
  383. package/src/training/suite.js +24 -984
  384. package/src/training/tensor-factory.d.ts +9 -0
  385. package/src/training/tensor-factory.js +13 -0
  386. package/src/training/trainer.js +3 -5
  387. package/src/training/ul_dataset.js +3 -5
  388. package/src/training/workloads.d.ts +164 -0
  389. package/src/training/workloads.js +530 -0
  390. package/src/version.js +1 -1
  391. package/tools/convert-safetensors-node.js +22 -16
  392. package/tools/doppler-cli.js +179 -63
@@ -6,18 +6,40 @@ function normalizeRolloutPolicy(policy) {
6
6
  ? policy.rollout
7
7
  : {};
8
8
  const rawMode = String(rollout.mode || 'shadow').trim().toLowerCase().replace(/_/g, '-');
9
- const mode = rawMode === 'default' || rawMode === 'canary' || rawMode === 'opt-in' || rawMode === 'shadow'
10
- ? rawMode
11
- : 'shadow';
12
- const canaryPercent = Number.isFinite(rollout.canaryPercent)
13
- ? Math.min(100, Math.max(0, Number(rollout.canaryPercent)))
14
- : 0;
9
+ if (rawMode !== 'default' && rawMode !== 'canary' && rawMode !== 'opt-in' && rawMode !== 'shadow') {
10
+ throw new Error(
11
+ `hotswap.rollout.mode must be one of default, canary, opt-in, shadow (received "${rollout.mode}")`
12
+ );
13
+ }
14
+ let canaryPercent = 0;
15
+ if (rollout.canaryPercent !== undefined && rollout.canaryPercent !== null) {
16
+ const parsedCanaryPercent = Number(rollout.canaryPercent);
17
+ if (!Number.isFinite(parsedCanaryPercent) || parsedCanaryPercent < 0 || parsedCanaryPercent > 100) {
18
+ throw new Error('hotswap.rollout.canaryPercent must be a number between 0 and 100 when provided.');
19
+ }
20
+ canaryPercent = parsedCanaryPercent;
21
+ }
22
+ if (rollout.cohortSalt !== undefined && rollout.cohortSalt !== null && typeof rollout.cohortSalt !== 'string') {
23
+ throw new Error('hotswap.rollout.cohortSalt must be a string when provided.');
24
+ }
15
25
  const cohortSalt = String(rollout.cohortSalt || 'doppler-hotswap-v1').trim() || 'doppler-hotswap-v1';
26
+ if (rollout.optInAllowlist !== undefined && rollout.optInAllowlist !== null && !Array.isArray(rollout.optInAllowlist)) {
27
+ throw new Error('hotswap.rollout.optInAllowlist must be an array of strings when provided.');
28
+ }
16
29
  const optInAllowlist = Array.isArray(rollout.optInAllowlist)
17
- ? rollout.optInAllowlist.map((entry) => String(entry || '').trim()).filter(Boolean)
30
+ ? rollout.optInAllowlist.map((entry, index) => {
31
+ if (typeof entry !== 'string') {
32
+ throw new Error(`hotswap.rollout.optInAllowlist[${index}] must be a string.`);
33
+ }
34
+ const normalized = entry.trim();
35
+ if (!normalized) {
36
+ throw new Error(`hotswap.rollout.optInAllowlist[${index}] must not be empty.`);
37
+ }
38
+ return normalized;
39
+ })
18
40
  : [];
19
41
  return {
20
- mode,
42
+ mode: rawMode,
21
43
  canaryPercent,
22
44
  cohortSalt,
23
45
  optInAllowlist,
@@ -9,6 +9,20 @@ export {
9
9
  export { MultiModelLoader } from './loader/multi-model-loader.js';
10
10
 
11
11
  export { InferencePipeline, EmbeddingPipeline, createPipeline } from './generation/index.js';
12
+ export {
13
+ StructuredJsonHeadPipeline,
14
+ isStructuredJsonHeadModelType,
15
+ createStructuredJsonHeadPipeline,
16
+ DreamStructuredPipeline,
17
+ isDreamStructuredModelType,
18
+ createDreamStructuredPipeline,
19
+ } from './generation/index.js';
20
+ export {
21
+ EnergyRowHeadPipeline,
22
+ createEnergyRowHeadPipeline,
23
+ DreamEnergyHeadPipeline,
24
+ createDreamEnergyHeadPipeline,
25
+ } from './inference/pipelines/energy-head/row-head-pipeline.js';
12
26
  export { KVCache } from './inference/kv-cache.js';
13
27
  export { Tokenizer } from './inference/tokenizer.js';
14
28
  export { SpeculativeDecoder } from './inference/speculative.js';
@@ -25,6 +39,22 @@ export {
25
39
  mergeMultipleLogits,
26
40
  } from './gpu/kernels/logit-merge.js';
27
41
 
42
+ export type { RDRRManifest, ShardInfo } from './formats/rdrr/index.js';
43
+ export type { TensorLocation, LoadProgress, LoadOptions, LoaderStats } from './loader/doppler-loader.js';
44
+ export type { AdapterSource } from './loader/multi-model-loader.js';
45
+ export type { ParsedModelConfig } from './generation/index.js';
46
+ export type { SamplingOptions } from './generation/index.js';
47
+ export type {
48
+ GenerateOptions,
49
+ GenerationResult,
50
+ KVCacheSnapshot,
51
+ LayerWeights,
52
+ ExpertWeights,
53
+ RouterWeights,
54
+ } from './generation/index.js';
55
+ export type { LoRAAdapter, LoRAModuleName } from './generation/index.js';
56
+ export type { ExpertNode, ExpertTask } from './inference/multi-model-network.js';
57
+
28
58
  export {
29
59
  ADAPTER_MANIFEST_SCHEMA,
30
60
  validateManifest as validateAdapterManifest,
@@ -45,4 +75,18 @@ export {
45
75
  createMemoryRegistry,
46
76
  } from './adapters/index.js';
47
77
 
78
+ export type {
79
+ AdapterManifest,
80
+ AdapterMetadata,
81
+ AdapterTensorSpec,
82
+ LoRALoadOptions,
83
+ LoRAWeightsResult,
84
+ AdapterState,
85
+ EnableAdapterOptions,
86
+ AdapterStackOptions,
87
+ AdapterManagerEvents,
88
+ AdapterRegistryEntry,
89
+ AdapterQueryOptions,
90
+ } from './adapters/index.js';
91
+
48
92
  export * from './tooling-exports.browser.js';
@@ -11,6 +11,20 @@ export { MultiModelLoader } from './loader/multi-model-loader.js';
11
11
 
12
12
  // Inference pipeline
13
13
  export { InferencePipeline, EmbeddingPipeline, createPipeline } from './generation/index.js';
14
+ export {
15
+ StructuredJsonHeadPipeline,
16
+ isStructuredJsonHeadModelType,
17
+ createStructuredJsonHeadPipeline,
18
+ DreamStructuredPipeline,
19
+ isDreamStructuredModelType,
20
+ createDreamStructuredPipeline,
21
+ } from './generation/index.js';
22
+ export {
23
+ EnergyRowHeadPipeline,
24
+ createEnergyRowHeadPipeline,
25
+ DreamEnergyHeadPipeline,
26
+ createDreamEnergyHeadPipeline,
27
+ } from './inference/pipelines/energy-head/row-head-pipeline.js';
14
28
  export { KVCache } from './inference/kv-cache.js';
15
29
  export { Tokenizer } from './inference/tokenizer.js';
16
30
  export { SpeculativeDecoder } from './inference/speculative.js';
@@ -0,0 +1,5 @@
1
+ export declare function buildSuiteContractMetrics(
2
+ suite: string,
3
+ baseMetrics: Record<string, unknown>,
4
+ manifest: Record<string, unknown> | null | undefined
5
+ ): Record<string, unknown>;
@@ -0,0 +1,28 @@
1
+ import { getInferenceLayerPatternContractArtifact } from '../rules/rule-registry.js';
2
+ import { isPlainObject } from '../utils/plain-object.js';
3
+ import { validateBrowserSuiteMetrics } from '../config/schema/browser-suite-metrics.schema.js';
4
+ import { buildExecutionContractArtifact } from '../config/execution-contract-check.js';
5
+ import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
6
+
7
+ export function buildSuiteContractMetrics(suite, baseMetrics, manifest) {
8
+ const executionContractArtifact = buildExecutionContractArtifact(manifest);
9
+ const executionV0GraphContractArtifact = executionContractArtifact?.executionV0?.graph ?? null;
10
+ const layerPatternContractArtifact = getInferenceLayerPatternContractArtifact();
11
+ const requiredInferenceFieldsArtifact = manifest?.modelType === 'transformer'
12
+ && isPlainObject(manifest?.inference?.attention)
13
+ ? buildManifestRequiredInferenceFieldsArtifact(
14
+ manifest?.inference ?? null,
15
+ `${manifest?.modelId ?? 'unknown'}.inference`
16
+ )
17
+ : null;
18
+ return validateBrowserSuiteMetrics({
19
+ ...baseMetrics,
20
+ schemaVersion: 1,
21
+ source: 'doppler',
22
+ suite,
23
+ ...(executionContractArtifact ? { executionContractArtifact } : {}),
24
+ executionV0GraphContractArtifact,
25
+ layerPatternContractArtifact,
26
+ requiredInferenceFieldsArtifact,
27
+ });
28
+ }
@@ -0,0 +1,2 @@
1
+ export declare function runDiffusionSuite(options?: Record<string, unknown>): Promise<Record<string, unknown>>;
2
+ export declare function runEnergySuite(options?: Record<string, unknown>): Promise<Record<string, unknown>>;
@@ -0,0 +1,269 @@
1
+ import { getRuntimeConfig } from '../config/runtime.js';
2
+ import { computeSampleStats } from '../debug/stats.js';
3
+ import { initializeSuiteModel, resolveDeviceInfo } from './browser-harness-model-helpers.js';
4
+ import { buildSuiteContractMetrics } from './browser-harness-contract-helpers.js';
5
+ import { resolvePrompt } from './browser-harness-text-helpers.js';
6
+ import {
7
+ buildSuiteSummary,
8
+ normalizeCacheMode,
9
+ normalizeLoadMode,
10
+ safeStatsValue,
11
+ buildDiffusionPerformanceArtifact,
12
+ buildCanonicalTiming,
13
+ buildTimingDiagnostics,
14
+ } from './browser-harness-suite-helpers.js';
15
+
16
+ export async function runDiffusionSuite(options = {}) {
17
+ const startTime = performance.now();
18
+ const runtimeConfig = getRuntimeConfig();
19
+ const captureOutput = options.captureOutput === true;
20
+ const cacheMode = normalizeCacheMode(options.cacheMode);
21
+ const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
22
+ const benchConfig = runtimeConfig.shared?.benchmark?.run || {};
23
+ const warmupRuns = Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0));
24
+ const timedRuns = Math.max(1, Math.floor(benchConfig.timedRuns ?? 1));
25
+
26
+ const diffusionConfig = runtimeConfig.inference?.diffusion;
27
+ if (!diffusionConfig) {
28
+ throw new Error('runtime.inference.diffusion must be set for diffusion harness runs.');
29
+ }
30
+ const scheduler = diffusionConfig.scheduler;
31
+ const latent = diffusionConfig.latent;
32
+ const prompt = resolvePrompt(runtimeConfig);
33
+ const negativePrompt = diffusionConfig.negativePrompt ?? '';
34
+
35
+ const width = Math.floor(latent?.width);
36
+ const height = Math.floor(latent?.height);
37
+ const steps = Math.floor(scheduler?.numSteps);
38
+ const guidanceScale = scheduler?.guidanceScale;
39
+
40
+ if (!Number.isFinite(width) || width <= 0) {
41
+ throw new Error('runtime.inference.diffusion.latent.width must be set for diffusion harness runs.');
42
+ }
43
+ if (!Number.isFinite(height) || height <= 0) {
44
+ throw new Error('runtime.inference.diffusion.latent.height must be set for diffusion harness runs.');
45
+ }
46
+ if (!Number.isFinite(steps) || steps <= 0) {
47
+ throw new Error('runtime.inference.diffusion.scheduler.numSteps must be set for diffusion harness runs.');
48
+ }
49
+ if (!Number.isFinite(guidanceScale) || guidanceScale <= 0) {
50
+ throw new Error('runtime.inference.diffusion.scheduler.guidanceScale must be set for diffusion harness runs.');
51
+ }
52
+
53
+ const harness = await initializeSuiteModel(options);
54
+ const totalMs = [];
55
+ const prefillMs = [];
56
+ const denoiseMs = [];
57
+ const vaeMs = [];
58
+ const prefillTokens = [];
59
+ const decodeTokens = [];
60
+ const gpuTotalMs = [];
61
+ const gpuPrefillMs = [];
62
+ const gpuDenoiseMs = [];
63
+ const gpuVaeMs = [];
64
+ let output = null;
65
+
66
+ for (let i = 0; i < warmupRuns + timedRuns; i++) {
67
+ harness.pipeline.reset?.();
68
+ const result = await harness.pipeline.generate({
69
+ prompt,
70
+ negativePrompt,
71
+ steps,
72
+ guidanceScale,
73
+ width,
74
+ height,
75
+ });
76
+ if (captureOutput && i === warmupRuns + timedRuns - 1) {
77
+ output = result;
78
+ }
79
+
80
+ if (i < warmupRuns) continue;
81
+
82
+ const stats = harness.pipeline.getStats?.() ?? {};
83
+ if (Number.isFinite(stats.totalTimeMs)) totalMs.push(stats.totalTimeMs);
84
+ if (Number.isFinite(stats.prefillTimeMs)) prefillMs.push(stats.prefillTimeMs);
85
+ if (Number.isFinite(stats.decodeTimeMs)) denoiseMs.push(stats.decodeTimeMs);
86
+ if (Number.isFinite(stats.vaeTimeMs)) vaeMs.push(stats.vaeTimeMs);
87
+ if (Number.isFinite(stats.prefillTokens)) prefillTokens.push(stats.prefillTokens);
88
+ if (Number.isFinite(stats.decodeTokens)) decodeTokens.push(stats.decodeTokens);
89
+
90
+ const gpu = stats.gpu ?? null;
91
+ if (gpu?.available) {
92
+ if (Number.isFinite(gpu.totalMs)) gpuTotalMs.push(gpu.totalMs);
93
+ if (Number.isFinite(gpu.prefillMs)) gpuPrefillMs.push(gpu.prefillMs);
94
+ if (Number.isFinite(gpu.denoiseMs)) gpuDenoiseMs.push(gpu.denoiseMs);
95
+ if (Number.isFinite(gpu.vaeMs)) gpuVaeMs.push(gpu.vaeMs);
96
+ }
97
+ }
98
+
99
+ const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
100
+ ? harness.pipeline.getMemoryStats()
101
+ : null;
102
+
103
+ if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
104
+ await harness.pipeline.unload();
105
+ }
106
+
107
+ const results = [
108
+ {
109
+ name: 'diffusion',
110
+ passed: totalMs.length > 0,
111
+ duration: totalMs.reduce((sum, value) => sum + value, 0),
112
+ error: totalMs.length > 0 ? undefined : 'No diffusion runs completed',
113
+ },
114
+ ];
115
+
116
+ const summary = buildSuiteSummary('diffusion', results, startTime);
117
+ const cpuStats = {
118
+ totalMs: computeSampleStats(totalMs),
119
+ prefillMs: computeSampleStats(prefillMs),
120
+ denoiseMs: computeSampleStats(denoiseMs),
121
+ vaeMs: computeSampleStats(vaeMs),
122
+ };
123
+ const gpuStats = gpuTotalMs.length > 0
124
+ ? {
125
+ available: true,
126
+ totalMs: computeSampleStats(gpuTotalMs),
127
+ prefillMs: computeSampleStats(gpuPrefillMs),
128
+ denoiseMs: computeSampleStats(gpuDenoiseMs),
129
+ vaeMs: computeSampleStats(gpuVaeMs),
130
+ }
131
+ : { available: false };
132
+
133
+ const avgPrefillTokens = prefillTokens.length
134
+ ? Math.round(prefillTokens.reduce((a, b) => a + b, 0) / prefillTokens.length)
135
+ : 0;
136
+ const avgDecodeTokens = decodeTokens.length
137
+ ? Math.round(decodeTokens.reduce((a, b) => a + b, 0) / decodeTokens.length)
138
+ : 0;
139
+ const prefillMsMedian = safeStatsValue(cpuStats.prefillMs?.median);
140
+ const denoiseMsMedian = safeStatsValue(cpuStats.denoiseMs?.median);
141
+ const totalMsMedian = safeStatsValue(cpuStats.totalMs?.median);
142
+ const diffusionPerformanceArtifact = buildDiffusionPerformanceArtifact({
143
+ warmupRuns,
144
+ timedRuns,
145
+ width,
146
+ height,
147
+ steps,
148
+ guidanceScale,
149
+ avgPrefillTokens,
150
+ avgDecodeTokens,
151
+ cpuStats,
152
+ gpuStats,
153
+ });
154
+ const timing = buildCanonicalTiming({
155
+ modelLoadMs: 0,
156
+ firstTokenMs: null,
157
+ firstResponseMs: null,
158
+ prefillMs: prefillMsMedian,
159
+ decodeMs: denoiseMsMedian,
160
+ totalRunMs: totalMsMedian,
161
+ prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
162
+ decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
163
+ cacheMode,
164
+ loadMode,
165
+ });
166
+ const timingDiagnostics = buildTimingDiagnostics(timing, {
167
+ source: 'doppler',
168
+ prefillSemantics: 'internal_prefill_phase',
169
+ });
170
+ const metricsWithContracts = buildSuiteContractMetrics(
171
+ 'diffusion',
172
+ {
173
+ warmupRuns,
174
+ timedRuns,
175
+ width,
176
+ height,
177
+ steps,
178
+ guidanceScale,
179
+ prompt,
180
+ avgPrefillTokens,
181
+ avgDecodeTokens,
182
+ latency: {
183
+ totalMs: cpuStats.totalMs,
184
+ prefillMs: cpuStats.prefillMs,
185
+ denoiseMs: cpuStats.denoiseMs,
186
+ vaeMs: cpuStats.vaeMs,
187
+ },
188
+ throughput: {
189
+ prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
190
+ decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
191
+ decodeStepsPerSec: diffusionPerformanceArtifact.throughput.decodeStepsPerSec,
192
+ },
193
+ cpu: cpuStats,
194
+ gpu: gpuStats,
195
+ performanceArtifact: diffusionPerformanceArtifact,
196
+ },
197
+ harness.manifest
198
+ );
199
+
200
+ return {
201
+ ...summary,
202
+ modelId: options.modelId || harness.manifest?.modelId || 'unknown',
203
+ cacheMode,
204
+ loadMode,
205
+ env: {
206
+ library: 'doppler',
207
+ runtime: 'browser',
208
+ device: 'webgpu',
209
+ browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
210
+ browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
211
+ browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
212
+ browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
213
+ },
214
+ timing,
215
+ timingDiagnostics,
216
+ output,
217
+ metrics: metricsWithContracts,
218
+ memoryStats,
219
+ deviceInfo: resolveDeviceInfo(),
220
+ pipeline: options.keepPipeline ? harness.pipeline : null,
221
+ };
222
+ }
223
+
224
+ export async function runEnergySuite(options = {}) {
225
+ const startTime = performance.now();
226
+ const harness = await initializeSuiteModel(options);
227
+ if (harness.manifest?.modelType !== 'energy') {
228
+ throw new Error('Energy suite requires an energy model manifest.');
229
+ }
230
+
231
+ const result = await harness.pipeline.generate();
232
+ const stats = harness.pipeline.getStats?.() ?? {};
233
+
234
+ const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
235
+ ? harness.pipeline.getMemoryStats()
236
+ : null;
237
+
238
+ if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
239
+ await harness.pipeline.unload();
240
+ }
241
+
242
+ const results = [
243
+ {
244
+ name: 'energy',
245
+ passed: Number.isFinite(result.energy ?? NaN),
246
+ duration: result.totalTimeMs ?? Math.max(0, performance.now() - startTime),
247
+ error: Number.isFinite(result.energy ?? NaN) ? undefined : 'Energy did not converge',
248
+ },
249
+ ];
250
+
251
+ const summary = buildSuiteSummary('energy', results, startTime);
252
+ return {
253
+ ...summary,
254
+ modelId: options.modelId || harness.manifest?.modelId || 'unknown',
255
+ metrics: {
256
+ steps: result.steps,
257
+ energy: result.energy ?? null,
258
+ dtype: result.dtype,
259
+ shape: result.shape,
260
+ totalTimeMs: result.totalTimeMs ?? null,
261
+ energyHistory: result.energyHistory ?? [],
262
+ stateStats: result.stateStats ?? null,
263
+ readbackCount: stats.readbackCount ?? null,
264
+ },
265
+ memoryStats,
266
+ deviceInfo: resolveDeviceInfo(),
267
+ pipeline: options.keepPipeline ? harness.pipeline : null,
268
+ };
269
+ }
@@ -0,0 +1,16 @@
1
+ export declare function resolveDeviceInfo(): Record<string, unknown> | null;
2
+ export declare function resolveKernelPathForModel(options?: Record<string, unknown>): Promise<{
3
+ modelId: string | null;
4
+ kernelPath: unknown;
5
+ source: string | null;
6
+ } | null>;
7
+ export declare function initializeInferenceFromStorage(
8
+ modelId: string,
9
+ options?: Record<string, unknown>
10
+ ): Promise<Record<string, unknown>>;
11
+ export declare function initializeInferenceFromSourcePath(
12
+ sourcePath: string,
13
+ options?: Record<string, unknown>
14
+ ): Promise<Record<string, unknown>>;
15
+ export declare function resolveHarnessOverride(options?: Record<string, unknown>): Promise<Record<string, unknown>>;
16
+ export declare function initializeSuiteModel(options?: Record<string, unknown>): Promise<Record<string, unknown>>;
@@ -0,0 +1,217 @@
1
+ import { initializeInference } from './test-harness.js';
2
+ import { setRuntimeConfig } from '../config/runtime.js';
3
+ import { initDevice, getKernelCapabilities, getDevice } from '../gpu/device.js';
4
+ import { createPipeline } from './pipelines/text.js';
5
+ import { parseModelConfigFromManifest } from './pipelines/text/config.js';
6
+ import { resolveKernelPathState, activateKernelPathState } from './pipelines/text/model-load.js';
7
+ import { openModelStore, loadManifestFromStore } from '../storage/shard-manager.js';
8
+ import { parseManifest } from '../formats/rdrr/index.js';
9
+ import { resolveRuntime } from './browser-harness-runtime-helpers.js';
10
+ import { normalizeLoadMode } from './browser-harness-suite-helpers.js';
11
+ import { buildSourceArtifactFingerprint, createStoredSourceArtifactContext } from '../storage/source-artifact-store.js';
12
+
13
+ const NODE_SOURCE_RUNTIME_MODULE_PATH = '../tooling/node-source-runtime.js';
14
+
15
+ function isNodeRuntime() {
16
+ return typeof process !== 'undefined' && !!process.versions?.node;
17
+ }
18
+
19
+ function resolveSourceVerifyHashes(options = {}) {
20
+ const explicit = options?.runtime?.runtimeConfig?.loading?.shardCache?.verifyHashes;
21
+ if (explicit == null) {
22
+ return true;
23
+ }
24
+ return explicit === true;
25
+ }
26
+
27
+ export function resolveDeviceInfo() {
28
+ try {
29
+ return getKernelCapabilities();
30
+ } catch {
31
+ return null;
32
+ }
33
+ }
34
+
35
+ export async function resolveKernelPathForModel(options = {}) {
36
+ const runtimeConfig = options.runtime?.runtimeConfig ?? null;
37
+ let manifest = null;
38
+ let manifestModelId = options.modelId || null;
39
+
40
+ if (options.modelId) {
41
+ await openModelStore(options.modelId);
42
+ const manifestText = await loadManifestFromStore();
43
+ if (manifestText) {
44
+ manifest = parseManifest(manifestText);
45
+ manifestModelId = manifest.modelId ?? options.modelId;
46
+ }
47
+ }
48
+
49
+ if (!manifest) return null;
50
+
51
+ const modelConfig = parseModelConfigFromManifest(manifest, runtimeConfig);
52
+ const kernelPathState = resolveKernelPathState({
53
+ manifest,
54
+ runtimeConfig,
55
+ modelConfig,
56
+ });
57
+ activateKernelPathState(kernelPathState);
58
+ return {
59
+ modelId: manifestModelId,
60
+ kernelPath: kernelPathState.resolvedKernelPath,
61
+ source: kernelPathState.kernelPathSource,
62
+ };
63
+ }
64
+
65
+ export async function initializeInferenceFromStorage(modelId, options = {}) {
66
+ const { onProgress } = options;
67
+ if (!modelId) {
68
+ throw new Error('modelId is required');
69
+ }
70
+
71
+ if (options.runtime?.runtimeConfig) {
72
+ setRuntimeConfig(options.runtime.runtimeConfig);
73
+ }
74
+
75
+ onProgress?.('storage', 0.05, 'Opening model store...');
76
+ await openModelStore(modelId);
77
+
78
+ onProgress?.('manifest', 0.1, 'Loading manifest...');
79
+ const manifestText = await loadManifestFromStore();
80
+ if (!manifestText) {
81
+ throw new Error('Manifest not found in storage');
82
+ }
83
+ const manifest = parseManifest(manifestText);
84
+
85
+ onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
86
+ await initDevice();
87
+ const device = getDevice();
88
+ const capabilities = getKernelCapabilities();
89
+
90
+ onProgress?.('pipeline', 0.3, 'Creating pipeline...');
91
+ const storage = buildSourceArtifactFingerprint(manifest)
92
+ ? createStoredSourceArtifactContext(manifest, { verifyHashes: true })
93
+ : null;
94
+ const pipeline = await createPipeline(manifest, {
95
+ gpu: { device },
96
+ runtime: options.runtime,
97
+ ...(storage ? { storage } : {}),
98
+ onProgress,
99
+ });
100
+
101
+ return { pipeline, manifest, capabilities };
102
+ }
103
+
104
+ export async function initializeInferenceFromSourcePath(sourcePath, options = {}) {
105
+ const { onProgress } = options;
106
+ if (!sourcePath || typeof sourcePath !== 'string') {
107
+ throw new Error('modelUrl is required for loadMode=memory.');
108
+ }
109
+ if (!isNodeRuntime()) {
110
+ throw new Error('loadMode=memory source runtime is currently supported on Node only.');
111
+ }
112
+ if (/^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//.test(sourcePath)) {
113
+ throw new Error(
114
+ 'loadMode=memory expects a local filesystem path (Safetensors directory or .gguf file), not an URL.'
115
+ );
116
+ }
117
+
118
+ if (options.runtime?.runtimeConfig) {
119
+ setRuntimeConfig(options.runtime.runtimeConfig);
120
+ }
121
+
122
+ onProgress?.('source', 0.05, 'Preparing source runtime bundle...');
123
+ const { resolveNodeSourceRuntimeBundle } = await import(NODE_SOURCE_RUNTIME_MODULE_PATH);
124
+ const sourceBundle = await resolveNodeSourceRuntimeBundle({
125
+ inputPath: sourcePath,
126
+ modelId: options.modelId || null,
127
+ verifyHashes: resolveSourceVerifyHashes(options),
128
+ });
129
+ if (!sourceBundle) {
130
+ throw new Error(
131
+ `No source-runtime model detected at "${sourcePath}". ` +
132
+ 'Expected a Safetensors directory or a .gguf file path.'
133
+ );
134
+ }
135
+
136
+ onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
137
+ await initDevice();
138
+ const device = getDevice();
139
+ const capabilities = getKernelCapabilities();
140
+
141
+ onProgress?.('pipeline', 0.3, 'Creating pipeline...');
142
+ const pipeline = await createPipeline(sourceBundle.manifest, {
143
+ gpu: { device },
144
+ runtime: options.runtime,
145
+ storage: sourceBundle.storageContext,
146
+ onProgress,
147
+ });
148
+
149
+ return {
150
+ pipeline,
151
+ manifest: sourceBundle.manifest,
152
+ capabilities,
153
+ };
154
+ }
155
+
156
+ export async function resolveHarnessOverride(options = {}) {
157
+ const input = typeof options.harnessOverride === 'function'
158
+ ? await options.harnessOverride(options)
159
+ : options.harnessOverride;
160
+
161
+ if (!input || typeof input !== 'object') {
162
+ throw new Error('harnessOverride must resolve to an object.');
163
+ }
164
+
165
+ if (!input.pipeline || typeof input.pipeline.generate !== 'function') {
166
+ throw new Error('harnessOverride.pipeline.generate(request) is required.');
167
+ }
168
+
169
+ const manifest = input.manifest && typeof input.manifest === 'object'
170
+ ? input.manifest
171
+ : {
172
+ modelId: options.modelId || 'diffusion-harness-override',
173
+ modelType: 'diffusion',
174
+ };
175
+
176
+ const modelLoadMs = Number.isFinite(input.modelLoadMs)
177
+ ? Math.max(0, input.modelLoadMs)
178
+ : 0;
179
+
180
+ return {
181
+ ...input,
182
+ manifest,
183
+ modelLoadMs,
184
+ };
185
+ }
186
+
187
+ export async function initializeSuiteModel(options = {}) {
188
+ if (options.harnessOverride) {
189
+ if (options.runtime?.runtimeConfig) {
190
+ setRuntimeConfig(options.runtime.runtimeConfig);
191
+ }
192
+ return resolveHarnessOverride(options);
193
+ }
194
+ const loadStart = performance.now();
195
+ const runtime = resolveRuntime(options);
196
+ const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
197
+ let harness;
198
+ if (loadMode === 'memory') {
199
+ if (!options.modelUrl) {
200
+ throw new Error('loadMode=memory requires modelUrl to be a local model path.');
201
+ }
202
+ harness = await initializeInferenceFromSourcePath(options.modelUrl, { ...options, runtime });
203
+ } else if (options.modelId && !options.modelUrl) {
204
+ harness = await initializeInferenceFromStorage(options.modelId, { ...options, runtime });
205
+ } else {
206
+ if (!options.modelUrl) {
207
+ throw new Error('modelUrl is required for this suite');
208
+ }
209
+ harness = await initializeInference(options.modelUrl, {
210
+ runtime,
211
+ onProgress: options.onProgress,
212
+ log: options.log,
213
+ });
214
+ }
215
+ const modelLoadMs = Math.max(0, performance.now() - loadStart);
216
+ return { ...harness, modelLoadMs };
217
+ }
@@ -0,0 +1,7 @@
1
+ export declare function collectTrainingArtifactsFromSuiteResult(
2
+ suiteResult: Record<string, unknown>
3
+ ): {
4
+ ulArtifacts: Array<Record<string, unknown>>;
5
+ distillArtifacts: Array<Record<string, unknown>>;
6
+ checkpointResumeTimeline: Array<Record<string, unknown>>;
7
+ };