@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +25 -17
  3. package/package.json +20 -4
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +39 -39
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +49 -7
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +43 -4
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +28 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/models/qwen3.json +9 -2
  45. package/src/config/presets/models/transformer.json +5 -0
  46. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  47. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  48. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  49. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  50. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  52. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  54. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  55. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  56. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  57. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  58. package/src/config/required-inference-fields-contract-check.js +6 -0
  59. package/src/config/runtime.js +6 -1
  60. package/src/config/schema/debug.schema.d.ts +5 -0
  61. package/src/config/schema/doppler.schema.js +16 -21
  62. package/src/config/schema/inference-defaults.schema.js +6 -3
  63. package/src/config/schema/inference.schema.d.ts +9 -0
  64. package/src/config/schema/kernel-path.schema.d.ts +11 -1
  65. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  66. package/src/config/schema/manifest.schema.d.ts +8 -1
  67. package/src/config/schema/manifest.schema.js +19 -3
  68. package/src/config/training-defaults.js +30 -22
  69. package/src/converter/conversion-plan.js +94 -9
  70. package/src/converter/core.d.ts +7 -0
  71. package/src/converter/core.js +14 -9
  72. package/src/converter/execution-v0-manifest.js +4 -1
  73. package/src/converter/index.d.ts +1 -0
  74. package/src/converter/index.js +1 -0
  75. package/src/converter/manifest-inference.js +43 -12
  76. package/src/converter/parsers/diffusion.js +0 -3
  77. package/src/converter/quantization-info.js +35 -15
  78. package/src/converter/rope-config.js +42 -0
  79. package/src/converter/shard-packer.d.ts +1 -1
  80. package/src/converter/shard-packer.js +4 -1
  81. package/src/debug/config.js +123 -11
  82. package/src/debug/signals.js +7 -1
  83. package/src/debug/tensor.d.ts +2 -0
  84. package/src/debug/tensor.js +13 -2
  85. package/src/distribution/p2p-control-plane.js +52 -12
  86. package/src/distribution/p2p-observability.js +43 -7
  87. package/src/distribution/p2p-webrtc-browser.js +20 -0
  88. package/src/distribution/shard-delivery.js +77 -26
  89. package/src/formats/gguf/types.js +33 -16
  90. package/src/formats/rdrr/groups.d.ts +12 -4
  91. package/src/formats/rdrr/groups.js +3 -6
  92. package/src/formats/rdrr/parsing.js +39 -2
  93. package/src/formats/rdrr/types.d.ts +2 -1
  94. package/src/gpu/command-recorder.js +86 -61
  95. package/src/gpu/device.d.ts +1 -0
  96. package/src/gpu/device.js +131 -19
  97. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  98. package/src/gpu/kernel-tuner/cache.js +71 -4
  99. package/src/gpu/kernel-tuner/tuner.js +22 -4
  100. package/src/gpu/kernels/attention.js +113 -34
  101. package/src/gpu/kernels/backward/adam.js +62 -58
  102. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  103. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  104. package/src/gpu/kernels/bias_add.wgsl +8 -6
  105. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  106. package/src/gpu/kernels/cast.js +191 -149
  107. package/src/gpu/kernels/check-stop.js +33 -44
  108. package/src/gpu/kernels/conv2d.js +27 -17
  109. package/src/gpu/kernels/conv2d.wgsl +7 -8
  110. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  111. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  112. package/src/gpu/kernels/depthwise_conv2d.js +37 -26
  113. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  114. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  115. package/src/gpu/kernels/dequant.js +178 -126
  116. package/src/gpu/kernels/energy.d.ts +3 -21
  117. package/src/gpu/kernels/energy.js +111 -88
  118. package/src/gpu/kernels/feature-check.js +1 -1
  119. package/src/gpu/kernels/fused_ffn.js +84 -65
  120. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  121. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  122. package/src/gpu/kernels/gather.js +33 -15
  123. package/src/gpu/kernels/gelu.js +19 -11
  124. package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
  125. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  126. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  127. package/src/gpu/kernels/groupnorm.js +34 -23
  128. package/src/gpu/kernels/kv-quantize.js +5 -2
  129. package/src/gpu/kernels/layernorm.js +35 -19
  130. package/src/gpu/kernels/logit-merge.js +5 -3
  131. package/src/gpu/kernels/matmul.js +83 -39
  132. package/src/gpu/kernels/modulate.js +23 -15
  133. package/src/gpu/kernels/moe.js +221 -175
  134. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  135. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  136. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  137. package/src/gpu/kernels/relu.js +31 -10
  138. package/src/gpu/kernels/relu.wgsl +2 -1
  139. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  140. package/src/gpu/kernels/repeat_channels.js +25 -17
  141. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  142. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  143. package/src/gpu/kernels/residual.js +69 -23
  144. package/src/gpu/kernels/residual.wgsl +6 -3
  145. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  146. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  147. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  148. package/src/gpu/kernels/rmsnorm.js +96 -28
  149. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  150. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  151. package/src/gpu/kernels/rope.d.ts +2 -0
  152. package/src/gpu/kernels/rope.js +14 -1
  153. package/src/gpu/kernels/rope.wgsl +56 -40
  154. package/src/gpu/kernels/sample.js +27 -38
  155. package/src/gpu/kernels/sana_linear_attention.js +19 -12
  156. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  157. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  158. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  159. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  160. package/src/gpu/kernels/scale.js +18 -11
  161. package/src/gpu/kernels/shader-cache.js +4 -2
  162. package/src/gpu/kernels/silu.d.ts +1 -0
  163. package/src/gpu/kernels/silu.js +148 -82
  164. package/src/gpu/kernels/silu.wgsl +19 -9
  165. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  166. package/src/gpu/kernels/softmax.js +44 -25
  167. package/src/gpu/kernels/split_qkv.js +23 -13
  168. package/src/gpu/kernels/transpose.js +31 -10
  169. package/src/gpu/kernels/transpose.wgsl +6 -5
  170. package/src/gpu/kernels/upsample2d.js +22 -13
  171. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  172. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  173. package/src/gpu/kernels/utils.js +35 -13
  174. package/src/gpu/partitioned-buffer-pool.js +10 -2
  175. package/src/gpu/perf-guards.js +2 -9
  176. package/src/gpu/profiler.js +27 -22
  177. package/src/gpu/readback-utils.d.ts +16 -0
  178. package/src/gpu/readback-utils.js +41 -0
  179. package/src/gpu/submit-tracker.js +13 -0
  180. package/src/gpu/uniform-cache.d.ts +1 -0
  181. package/src/gpu/uniform-cache.js +30 -9
  182. package/src/hotswap/intent-bundle.js +6 -0
  183. package/src/hotswap/manifest.d.ts +10 -1
  184. package/src/hotswap/manifest.js +12 -2
  185. package/src/hotswap/runtime.js +30 -8
  186. package/src/index-browser.d.ts +44 -0
  187. package/src/index-browser.js +14 -0
  188. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  189. package/src/inference/browser-harness-contract-helpers.js +28 -0
  190. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  191. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  192. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  193. package/src/inference/browser-harness-model-helpers.js +217 -0
  194. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  195. package/src/inference/browser-harness-report-helpers.js +42 -0
  196. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  197. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  198. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  199. package/src/inference/browser-harness-suite-helpers.js +268 -0
  200. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  201. package/src/inference/browser-harness-text-helpers.js +788 -0
  202. package/src/inference/browser-harness.d.ts +6 -0
  203. package/src/inference/browser-harness.js +130 -1950
  204. package/src/inference/kv-cache/base.js +140 -94
  205. package/src/inference/kv-cache/tiered.js +5 -3
  206. package/src/inference/moe-router.js +88 -56
  207. package/src/inference/multi-model-network.js +5 -3
  208. package/src/inference/network-evolution.d.ts +11 -2
  209. package/src/inference/network-evolution.js +20 -21
  210. package/src/inference/pipelines/context.d.ts +3 -0
  211. package/src/inference/pipelines/context.js +142 -2
  212. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  213. package/src/inference/pipelines/diffusion/pipeline.js +17 -7
  214. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  215. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  216. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  217. package/src/inference/pipelines/diffusion/vae.js +3 -7
  218. package/src/inference/pipelines/energy/pipeline.js +27 -21
  219. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  220. package/src/inference/pipelines/energy/quintel.js +11 -0
  221. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  222. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  223. package/src/inference/pipelines/text/attention/projections.js +151 -101
  224. package/src/inference/pipelines/text/attention/record.js +73 -10
  225. package/src/inference/pipelines/text/attention/run.js +73 -10
  226. package/src/inference/pipelines/text/chat-format.js +25 -1
  227. package/src/inference/pipelines/text/config.d.ts +4 -0
  228. package/src/inference/pipelines/text/config.js +71 -5
  229. package/src/inference/pipelines/text/embed.js +2 -8
  230. package/src/inference/pipelines/text/execution-plan.js +64 -50
  231. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  232. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  233. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  234. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  235. package/src/inference/pipelines/text/execution-v0.js +78 -1002
  236. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  237. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  238. package/src/inference/pipelines/text/generator-steps.js +298 -207
  239. package/src/inference/pipelines/text/generator.js +6 -23
  240. package/src/inference/pipelines/text/init.d.ts +4 -0
  241. package/src/inference/pipelines/text/init.js +134 -29
  242. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  243. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  244. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  245. package/src/inference/pipelines/text/layer.js +14 -9
  246. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  247. package/src/inference/pipelines/text/linear-attention.js +80 -6
  248. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  249. package/src/inference/pipelines/text/logits/index.js +10 -11
  250. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  251. package/src/inference/pipelines/text/logits/utils.js +9 -0
  252. package/src/inference/pipelines/text/lora-apply.js +50 -32
  253. package/src/inference/pipelines/text/model-load.js +279 -104
  254. package/src/inference/pipelines/text/moe-cache.js +5 -4
  255. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  256. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  257. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  258. package/src/inference/pipelines/text/ops.js +90 -90
  259. package/src/inference/pipelines/text/probes.js +9 -9
  260. package/src/inference/pipelines/text/weights.js +17 -7
  261. package/src/inference/pipelines/text.js +17 -1
  262. package/src/inference/speculative.d.ts +2 -2
  263. package/src/inference/speculative.js +4 -18
  264. package/src/inference/test-harness.d.ts +1 -1
  265. package/src/inference/test-harness.js +15 -5
  266. package/src/inference/tokenizer.d.ts +0 -5
  267. package/src/inference/tokenizer.js +4 -23
  268. package/src/inference/tokenizers/bpe.js +9 -0
  269. package/src/inference/tokenizers/bundled.js +176 -33
  270. package/src/inference/tokenizers/sentencepiece.js +12 -0
  271. package/src/loader/doppler-loader.js +38 -22
  272. package/src/loader/dtype-utils.js +3 -44
  273. package/src/loader/embedding-loader.js +7 -3
  274. package/src/loader/experts/expert-cache.js +13 -6
  275. package/src/loader/experts/expert-loader.js +10 -6
  276. package/src/loader/final-weights-loader.js +8 -4
  277. package/src/loader/layer-loader.js +2 -1
  278. package/src/loader/loader-state.js +2 -2
  279. package/src/loader/memory-monitor.js +8 -0
  280. package/src/loader/multi-model-loader.d.ts +14 -0
  281. package/src/loader/multi-model-loader.js +70 -24
  282. package/src/loader/shard-cache.js +81 -12
  283. package/src/loader/shard-resolver.js +25 -3
  284. package/src/loader/tensors/tensor-loader.js +209 -144
  285. package/src/loader/tensors/tensor-reader.js +76 -19
  286. package/src/loader/weight-downcast.js +1 -1
  287. package/src/memory/buffer-pool.d.ts +9 -1
  288. package/src/memory/buffer-pool.js +109 -44
  289. package/src/memory/unified-detect.js +1 -1
  290. package/src/rules/inference/kernel-path.rules.json +24 -8
  291. package/src/rules/rule-registry.js +25 -1
  292. package/src/rules/tooling/command-runtime.rules.json +18 -0
  293. package/src/storage/backends/opfs-store.js +68 -24
  294. package/src/storage/downloader.js +364 -83
  295. package/src/storage/index.d.ts +3 -0
  296. package/src/storage/index.js +3 -0
  297. package/src/storage/preflight.d.ts +2 -2
  298. package/src/storage/preflight.js +24 -2
  299. package/src/storage/quickstart-downloader.js +11 -5
  300. package/src/storage/registry.js +10 -4
  301. package/src/storage/reports.js +1 -1
  302. package/src/storage/shard-manager.d.ts +15 -1
  303. package/src/storage/shard-manager.js +51 -3
  304. package/src/storage/source-artifact-store.d.ts +52 -0
  305. package/src/storage/source-artifact-store.js +234 -0
  306. package/src/tooling/command-api-constants.d.ts +9 -0
  307. package/src/tooling/command-api-constants.js +9 -0
  308. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  309. package/src/tooling/command-api-family-normalizers.js +343 -0
  310. package/src/tooling/command-api-helpers.d.ts +25 -0
  311. package/src/tooling/command-api-helpers.js +262 -0
  312. package/src/tooling/command-api.d.ts +27 -1
  313. package/src/tooling/command-api.js +26 -473
  314. package/src/tooling/command-envelope.js +4 -1
  315. package/src/tooling/command-runner-shared.js +52 -18
  316. package/src/tooling/lean-execution-contract.js +150 -3
  317. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  318. package/src/tooling/node-browser-command-runner.js +218 -273
  319. package/src/tooling/node-command-runner.js +44 -3
  320. package/src/tooling/node-converter.js +27 -1
  321. package/src/tooling/node-source-runtime.d.ts +1 -1
  322. package/src/tooling/node-source-runtime.js +84 -3
  323. package/src/tooling/node-webgpu.js +30 -105
  324. package/src/tooling/opfs-cache.js +21 -4
  325. package/src/tooling/runtime-input-composition.d.ts +38 -0
  326. package/src/tooling/runtime-input-composition.js +86 -0
  327. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  328. package/src/tooling/source-runtime-bundle.js +261 -34
  329. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  330. package/src/tooling/source-runtime-materializer.js +93 -0
  331. package/src/training/attention-backward.js +32 -17
  332. package/src/training/autograd.js +80 -52
  333. package/src/training/checkpoint-watch.d.ts +8 -0
  334. package/src/training/checkpoint-watch.js +139 -0
  335. package/src/training/checkpoint.d.ts +6 -1
  336. package/src/training/checkpoint.js +46 -7
  337. package/src/training/clip.js +2 -1
  338. package/src/training/datasets/token-batch.js +20 -8
  339. package/src/training/distillation/artifacts.d.ts +71 -0
  340. package/src/training/distillation/artifacts.js +132 -0
  341. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  342. package/src/training/distillation/checkpoint-watch.js +58 -0
  343. package/src/training/distillation/dataset.d.ts +59 -0
  344. package/src/training/distillation/dataset.js +337 -0
  345. package/src/training/distillation/eval.d.ts +34 -0
  346. package/src/training/distillation/eval.js +310 -0
  347. package/src/training/distillation/index.d.ts +29 -0
  348. package/src/training/distillation/index.js +29 -0
  349. package/src/training/distillation/runtime.d.ts +20 -0
  350. package/src/training/distillation/runtime.js +121 -0
  351. package/src/training/distillation/scoreboard.d.ts +6 -0
  352. package/src/training/distillation/scoreboard.js +8 -0
  353. package/src/training/distillation/stage-a.d.ts +45 -0
  354. package/src/training/distillation/stage-a.js +338 -0
  355. package/src/training/distillation/stage-b.d.ts +24 -0
  356. package/src/training/distillation/stage-b.js +20 -0
  357. package/src/training/distillation/student-fixture.d.ts +22 -0
  358. package/src/training/distillation/student-fixture.js +846 -0
  359. package/src/training/distillation/suite-data.d.ts +45 -0
  360. package/src/training/distillation/suite-data.js +189 -0
  361. package/src/training/index.d.ts +10 -0
  362. package/src/training/index.js +10 -0
  363. package/src/training/lora-pipeline.d.ts +40 -0
  364. package/src/training/lora-pipeline.js +793 -0
  365. package/src/training/lora.js +26 -12
  366. package/src/training/loss.js +5 -6
  367. package/src/training/objectives/cross_entropy.js +2 -5
  368. package/src/training/objectives/distill_kd.js +4 -8
  369. package/src/training/objectives/distill_triplet.js +4 -8
  370. package/src/training/objectives/ul_stage2_base.js +4 -8
  371. package/src/training/operator-artifacts.d.ts +62 -0
  372. package/src/training/operator-artifacts.js +140 -0
  373. package/src/training/operator-command.d.ts +5 -0
  374. package/src/training/operator-command.js +455 -0
  375. package/src/training/operator-eval.d.ts +48 -0
  376. package/src/training/operator-eval.js +230 -0
  377. package/src/training/operator-scoreboard.d.ts +5 -0
  378. package/src/training/operator-scoreboard.js +44 -0
  379. package/src/training/optimizer.js +19 -7
  380. package/src/training/runner.d.ts +52 -0
  381. package/src/training/runner.js +31 -5
  382. package/src/training/suite.d.ts +112 -0
  383. package/src/training/suite.js +24 -984
  384. package/src/training/tensor-factory.d.ts +9 -0
  385. package/src/training/tensor-factory.js +13 -0
  386. package/src/training/trainer.js +3 -5
  387. package/src/training/ul_dataset.js +3 -5
  388. package/src/training/workloads.d.ts +164 -0
  389. package/src/training/workloads.js +530 -0
  390. package/src/version.js +1 -1
  391. package/tools/convert-safetensors-node.js +22 -16
  392. package/tools/doppler-cli.js +179 -63
@@ -175,103 +175,103 @@ export async function doConv(
175
175
  }
176
176
 
177
177
  // Use the first 2x hidden projection channels as a gated conv-state projection.
178
- const inProj = await doMatmul(
179
- inputTensor,
180
- convInProj,
181
- numTokens,
182
- hiddenSize * 2,
183
- hiddenSize,
184
- {
185
- transposeB: 'auto',
186
- label: `${label}.in_proj`,
187
- layerIdx,
188
- kernelPath,
189
- role: 'conv_in_proj',
190
- },
191
- recorder
192
- );
193
- const activated = await doSiLURowSplit(inProj, {
194
- numTokens,
195
- dim: hiddenSize,
196
- activation: 'silu',
197
- swigluLimit: options.swigluLimit ?? null,
198
- label: `${label}.activation`,
199
- layerIdx,
200
- }, recorder);
201
-
202
- if (recorder) {
203
- recorder.trackTemporaryBuffer(inProj.buffer);
204
- } else {
205
- releaseBuffer(inProj.buffer);
206
- }
207
-
208
- // Optional generic conv2d stage when explicit shape metadata is provided.
209
- // LFM2 depthwise conv kernels use model-specific packing, so this path is best-effort only.
210
- let convInput = activated;
211
- if (convKernel && options.conv2d && options.conv2d.enabled === true) {
212
- const convTensorInput = createTensor(activated.buffer, activated.dtype, [
213
- options.conv2d.inChannels,
214
- options.conv2d.height,
215
- options.conv2d.width,
216
- ], `${label}.conv_input`);
217
- const convOptions = {
218
- inChannels: options.conv2d.inChannels,
219
- outChannels: options.conv2d.outChannels,
220
- height: options.conv2d.height,
221
- width: options.conv2d.width,
222
- kernelH: options.conv2d.kernelH,
223
- kernelW: options.conv2d.kernelW,
224
- stride: options.conv2d.stride ?? 1,
225
- pad: options.conv2d.pad ?? 0,
226
- };
227
- const convResult = recorder
228
- ? await recordConv2D(recorder, convTensorInput, convKernel, null, convOptions)
229
- : await runConv2D(convTensorInput, convKernel, null, convOptions);
230
- convInput = createTensor(
231
- convResult.buffer,
232
- convResult.dtype,
233
- [numTokens, hiddenSize],
234
- `${label}.conv_output`
178
+ let inProj = null;
179
+ let activated = null;
180
+ let convInput = null;
181
+ let outProj = null;
182
+ try {
183
+ inProj = await doMatmul(
184
+ inputTensor,
185
+ convInProj,
186
+ numTokens,
187
+ hiddenSize * 2,
188
+ hiddenSize,
189
+ {
190
+ transposeB: 'auto',
191
+ label: `${label}.in_proj`,
192
+ layerIdx,
193
+ kernelPath,
194
+ role: 'conv_in_proj',
195
+ },
196
+ recorder
235
197
  );
236
- if (recorder) {
237
- recorder.trackTemporaryBuffer(activated.buffer);
238
- } else {
239
- releaseBuffer(activated.buffer);
198
+ activated = await doSiLURowSplit(inProj, {
199
+ numTokens,
200
+ dim: hiddenSize,
201
+ activation: 'silu',
202
+ swigluLimit: options.swigluLimit ?? null,
203
+ label: `${label}.activation`,
204
+ layerIdx,
205
+ }, recorder);
206
+
207
+ releaseOrTrack(recorder, inProj.buffer);
208
+ inProj = null;
209
+
210
+ convInput = activated;
211
+ if (convKernel && options.conv2d && options.conv2d.enabled === true) {
212
+ const convTensorInput = createTensor(activated.buffer, activated.dtype, [
213
+ options.conv2d.inChannels,
214
+ options.conv2d.height,
215
+ options.conv2d.width,
216
+ ], `${label}.conv_input`);
217
+ const convOptions = {
218
+ inChannels: options.conv2d.inChannels,
219
+ outChannels: options.conv2d.outChannels,
220
+ height: options.conv2d.height,
221
+ width: options.conv2d.width,
222
+ kernelH: options.conv2d.kernelH,
223
+ kernelW: options.conv2d.kernelW,
224
+ stride: options.conv2d.stride ?? 1,
225
+ pad: options.conv2d.pad ?? 0,
226
+ };
227
+ const convResult = recorder
228
+ ? await recordConv2D(recorder, convTensorInput, convKernel, null, convOptions)
229
+ : await runConv2D(convTensorInput, convKernel, null, convOptions);
230
+ convInput = createTensor(
231
+ convResult.buffer,
232
+ convResult.dtype,
233
+ [numTokens, hiddenSize],
234
+ `${label}.conv_output`
235
+ );
236
+ releaseOrTrack(recorder, activated.buffer);
237
+ activated = null;
240
238
  }
241
- }
242
239
 
243
- const outProj = await doMatmul(
244
- convInput,
245
- convOutProj,
246
- numTokens,
247
- hiddenSize,
248
- hiddenSize,
249
- {
250
- transposeB: 'auto',
251
- label: `${label}.out_proj`,
252
- layerIdx,
253
- kernelPath,
254
- role: 'conv_out_proj',
255
- },
256
- recorder
257
- );
240
+ outProj = await doMatmul(
241
+ convInput,
242
+ convOutProj,
243
+ numTokens,
244
+ hiddenSize,
245
+ hiddenSize,
246
+ {
247
+ transposeB: 'auto',
248
+ label: `${label}.out_proj`,
249
+ layerIdx,
250
+ kernelPath,
251
+ role: 'conv_out_proj',
252
+ },
253
+ recorder
254
+ );
258
255
 
259
- if (convInput.buffer !== activated.buffer) {
260
- if (recorder) {
261
- recorder.trackTemporaryBuffer(convInput.buffer);
262
- } else {
263
- releaseBuffer(convInput.buffer);
256
+ if (convInput && (!activated || convInput.buffer !== activated.buffer)) {
257
+ releaseOrTrack(recorder, convInput.buffer);
258
+ convInput = null;
259
+ } else if (activated) {
260
+ releaseOrTrack(recorder, activated.buffer);
261
+ activated = null;
264
262
  }
265
- } else if (recorder) {
266
- recorder.trackTemporaryBuffer(activated.buffer);
267
- } else {
268
- releaseBuffer(activated.buffer);
269
- }
270
263
 
271
- if (kernelTrace.enabled && !recorder) {
272
- await traceStep('conv', label, layerIdx, outProj.buffer, [numTokens, hiddenSize]);
264
+ if (kernelTrace.enabled && !recorder) {
265
+ await traceStep('conv', label, layerIdx, outProj.buffer, [numTokens, hiddenSize]);
266
+ }
267
+ return outProj;
268
+ } catch (error) {
269
+ if (outProj) releaseOrTrack(recorder, outProj.buffer);
270
+ if (convInput && (!activated || convInput.buffer !== activated.buffer)) releaseOrTrack(recorder, convInput.buffer);
271
+ if (activated) releaseOrTrack(recorder, activated.buffer);
272
+ if (inProj) releaseOrTrack(recorder, inProj.buffer);
273
+ throw error;
273
274
  }
274
- return outProj;
275
275
  }
276
276
 
277
277
  export async function doCast(input, toDtype, recorder) {
@@ -4,6 +4,7 @@ import { trace } from '../../../debug/index.js';
4
4
  import { getDevice } from '../../../gpu/device.js';
5
5
  import { allowReadback } from '../../../gpu/perf-guards.js';
6
6
  import { f16ToF32 } from '../../../loader/dtype-utils.js';
7
+ import { readBufferSlice } from '../../../memory/buffer-pool.js';
7
8
 
8
9
 
9
10
  const STAGE_DEFAULT_CATEGORY = {
@@ -11,6 +12,11 @@ const STAGE_DEFAULT_CATEGORY = {
11
12
  // Attention stages (per-layer)
12
13
  attn_input: 'attn',
13
14
  attn_normed: 'attn',
15
+ linear_qkv_proj: 'attn',
16
+ linear_z_proj: 'attn',
17
+ linear_a_proj: 'attn',
18
+ linear_b_proj: 'attn',
19
+ linear_core_out: 'attn',
14
20
  q_proj: 'attn',
15
21
  k_proj: 'attn',
16
22
  v_proj: 'attn',
@@ -139,22 +145,16 @@ export async function runProbes(stage, buffer, options) {
139
145
  const alignedOffset = Math.floor(byteOffset / 4) * 4;
140
146
  const offsetWithinRead = byteOffset - alignedOffset;
141
147
  const readSize = 4; // Always read 4 bytes (aligned)
142
- const staging = (device).createBuffer({ size: readSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ });
143
- const enc = (device).createCommandEncoder();
144
- enc.copyBufferToBuffer( (buffer), alignedOffset, staging, 0, readSize);
145
- (device).queue.submit([enc.finish()]);
146
- await staging.mapAsync(GPUMapMode.READ);
148
+ const readback = await readBufferSlice(buffer, alignedOffset, readSize);
147
149
  let value;
148
150
  if (dtype === 'f16') {
149
151
  // offsetWithinRead is 0 or 2 for F16 - extract correct u16
150
- const u16Array = new Uint16Array(staging.getMappedRange().slice(0));
152
+ const u16Array = new Uint16Array(readback);
151
153
  const u16Index = offsetWithinRead / 2;
152
154
  value = f16ToF32(u16Array[u16Index]);
153
155
  } else {
154
- value = new Float32Array(staging.getMappedRange().slice(0))[0];
156
+ value = new Float32Array(readback)[0];
155
157
  }
156
- staging.unmap();
157
- staging.destroy();
158
158
  values.push(`${dimIdx}=${value.toFixed(4)}`);
159
159
  }
160
160
 
@@ -1,7 +1,7 @@
1
1
 
2
2
 
3
3
  import { getDevice } from '../../../gpu/device.js';
4
- import { acquireBuffer } from '../../../memory/buffer-pool.js';
4
+ import { acquireBuffer, releaseBuffer } from '../../../memory/buffer-pool.js';
5
5
  import { log } from '../../../debug/index.js';
6
6
  import { isWeightBuffer, isCpuWeightBuffer, tagBufferDtype } from '../../../gpu/weight-buffer.js';
7
7
 
@@ -53,9 +53,14 @@ export function getWeightBuffer(weight, label) {
53
53
  }
54
54
 
55
55
  const buf = acquireBuffer(data.byteLength, undefined, label);
56
- device.queue.writeBuffer(buf, 0, ( (data)));
57
- tagBufferDtype(buf, bufferDtype);
58
- return buf;
56
+ try {
57
+ device.queue.writeBuffer(buf, 0, ( (data)));
58
+ tagBufferDtype(buf, bufferDtype);
59
+ return buf;
60
+ } catch (error) {
61
+ releaseBuffer(buf);
62
+ throw error;
63
+ }
59
64
  }
60
65
 
61
66
 
@@ -92,9 +97,14 @@ export function getNormWeightBuffer(weight, label, config, debugFlags) {
92
97
  }
93
98
 
94
99
  const buf = acquireBuffer(data.byteLength, undefined, label);
95
- device.queue.writeBuffer(buf, 0, ( (data)));
96
- tagBufferDtype(buf, 'f32');
97
- return buf;
100
+ try {
101
+ device.queue.writeBuffer(buf, 0, ( (data)));
102
+ tagBufferDtype(buf, 'f32');
103
+ return buf;
104
+ } catch (error) {
105
+ releaseBuffer(buf);
106
+ throw error;
107
+ }
98
108
  }
99
109
 
100
110
 
@@ -6,7 +6,7 @@ import { configurePerfGuards } from '../../gpu/perf-guards.js';
6
6
  import { MoERouter } from '../moe-router.js';
7
7
  import { DecodeBufferManager } from '../decode-buffers.js';
8
8
  import { DecodeRing } from '../decode-ring.js';
9
- import { applyPipelineContexts } from './context.js';
9
+ import { applyPipelineContexts, restorePipelineContexts } from './context.js';
10
10
  import { createInitializedPipeline } from './factory.js';
11
11
 
12
12
  // Pipeline sub-modules
@@ -44,6 +44,11 @@ import { getDopplerLoader } from '../../loader/doppler-loader.js';
44
44
  import { registerPipeline, getPipelineFactory } from './registry.js';
45
45
  import { selectRuleValue } from '../../rules/rule-registry.js';
46
46
 
47
+ function destroyMoERouter(router) {
48
+ if (router && typeof router.destroy === 'function') {
49
+ router.destroy();
50
+ }
51
+ }
47
52
 
48
53
 
49
54
  // ============================================================================
@@ -102,6 +107,8 @@ export class InferencePipeline extends PipelineState {
102
107
  this.manifest = manifest;
103
108
  this.decodeRing?.release();
104
109
  this.linearAttentionRuntime = resetLinearAttentionRuntime(this.linearAttentionRuntime);
110
+ destroyMoERouter(this.moeRouter);
111
+ this.moeRouter = null;
105
112
 
106
113
  const executionV0Runtime = applyExecutionV0RuntimeConfig({
107
114
  runtimeConfig: this.runtimeConfig,
@@ -299,9 +306,13 @@ export class InferencePipeline extends PipelineState {
299
306
  const maxSeqLen = config.maxSeqLen;
300
307
  const ropeBuffers = await initRoPEFrequencies({
301
308
  headDim: config.headDim,
309
+ rotaryDim: config.ropeRotaryDim,
302
310
  maxSeqLen,
303
311
  ropeTheta: config.ropeTheta,
304
312
  ropeLocalTheta: config.ropeLocalTheta,
313
+ mropeInterleaved: config.ropeInterleaved,
314
+ mropeSection: config.mropeSection,
315
+ partialRotaryFactor: config.partialRotaryFactor,
305
316
  ropeScale: config.ropeScale,
306
317
  ropeLocalScale: config.ropeLocalScale,
307
318
  ropeScalingType: config.ropeScalingType,
@@ -486,12 +497,15 @@ export class InferencePipeline extends PipelineState {
486
497
  this.expertWeights.clear();
487
498
  this.linearAttentionRuntime = resetLinearAttentionRuntime(this.linearAttentionRuntime);
488
499
  this.lora = null;
500
+ destroyMoERouter(this.moeRouter);
501
+ this.moeRouter = null;
489
502
  if (this.finitenessBuffer) {
490
503
  this.finitenessBuffer.destroy();
491
504
  this.finitenessBuffer = null;
492
505
  }
493
506
  this.isLoaded = false;
494
507
  this.currentSeqLen = 0;
508
+ restorePipelineContexts(this);
495
509
  log.info('Pipeline', 'Unloaded');
496
510
  }
497
511
 
@@ -529,6 +543,8 @@ export class InferencePipeline extends PipelineState {
529
543
  releaseGPUResources() {
530
544
  this.decodeBuffers?.release();
531
545
  this.decodeRing?.release();
546
+ destroyMoERouter(this.moeRouter);
547
+ this.moeRouter = null;
532
548
  if (this.finitenessBuffer) {
533
549
  this.finitenessBuffer.destroy();
534
550
  this.finitenessBuffer = null;
@@ -66,8 +66,8 @@ export interface SpeculativeConfig {
66
66
  enableTreeDraft: boolean;
67
67
  /** Temperature for draft sampling */
68
68
  temperature: number;
69
- /** Optional deterministic seed for speculative sampling */
70
- randomSeed?: number | null;
69
+ /** Deterministic seed for speculative sampling */
70
+ randomSeed: number;
71
71
  }
72
72
 
73
73
  /**
@@ -10,22 +10,6 @@ function createRng(seed) {
10
10
  };
11
11
  }
12
12
 
13
- function createUnseededRng() {
14
- let fallbackState = ((Date.now() >>> 0) ^ 0xa341316c) >>> 0;
15
- return () => {
16
- const cryptoApi = typeof globalThis !== 'undefined' ? globalThis.crypto : null;
17
- if (cryptoApi && typeof cryptoApi.getRandomValues === 'function') {
18
- const random = new Uint32Array(1);
19
- cryptoApi.getRandomValues(random);
20
- return random[0] / 4294967296;
21
- }
22
- fallbackState = (fallbackState + 0x6d2b79f5) | 0;
23
- let t = Math.imul(fallbackState ^ (fallbackState >>> 15), 1 | fallbackState);
24
- t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
25
- return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
26
- };
27
- }
28
-
29
13
  function coerceLogitsVector(value, label) {
30
14
  if (value instanceof Float32Array) {
31
15
  if (value.length === 0) {
@@ -110,6 +94,9 @@ export class SpeculativeDecoder {
110
94
  if (config.temperature == null) {
111
95
  throw new Error('SpeculativeDecoder requires temperature.');
112
96
  }
97
+ if (!Number.isFinite(config.randomSeed)) {
98
+ throw new Error('SpeculativeDecoder requires randomSeed.');
99
+ }
113
100
 
114
101
  assertTemperature(config.temperature, 'temperature');
115
102
  this.numDraftTokens = config.numDraftTokens;
@@ -117,8 +104,7 @@ export class SpeculativeDecoder {
117
104
  this.enableTreeDraft = config.enableTreeDraft;
118
105
  this.temperature = config.temperature;
119
106
 
120
- const seed = Number.isFinite(config.randomSeed) ? Math.floor(config.randomSeed) : null;
121
- this.random = seed === null ? createUnseededRng() : createRng(seed);
107
+ this.random = createRng(Math.floor(config.randomSeed));
122
108
  }
123
109
 
124
110
  setDraftModel(model) {
@@ -74,7 +74,7 @@ export interface InitializeResult {
74
74
  /**
75
75
  * Discover available models from the catalog.json endpoint.
76
76
  *
77
- * @param fallbackModels - Models to return if catalog fails
77
+ * @param fallbackModels - Explicit fallback models to use when catalog fetch is unavailable
78
78
  * @returns Array of model info objects
79
79
  */
80
80
  export declare function discoverModels(
@@ -25,7 +25,7 @@ import {
25
25
 
26
26
 
27
27
  export async function discoverModels(
28
- fallbackModels = ['gemma3-1b-q4', 'mistral-7b-q4', 'llama3-8b-q4']
28
+ fallbackModels
29
29
  ) {
30
30
  try {
31
31
  const resp = await fetch('/models/catalog.json');
@@ -40,10 +40,13 @@ export async function discoverModels(
40
40
  }));
41
41
  }
42
42
  }
43
- } catch (e) {
44
- // Catalog not available, use fallback
43
+ } catch (e) {}
44
+
45
+ if (Array.isArray(fallbackModels) && fallbackModels.length > 0) {
46
+ return fallbackModels.map((id) => ({ id, name: id }));
45
47
  }
46
- return fallbackModels.map((id) => ({ id, name: id }));
48
+
49
+ throw new Error('discoverModels: failed to fetch /models/catalog.json and no explicit fallback model list was provided.');
47
50
  }
48
51
 
49
52
  // ============================================================================
@@ -238,7 +241,13 @@ export async function initializeInference(modelUrl, options = {}) {
238
241
  onProgress('hotswap', 0.05, 'Loading hot-swap manifest...');
239
242
  log(`Hot-swap: loading manifest ${hotSwapConfig.manifestUrl}`);
240
243
  const hotSwapManifest = await fetchHotSwapManifest(hotSwapConfig.manifestUrl);
241
- const verification = await verifyHotSwapManifest(hotSwapManifest, hotSwapConfig);
244
+ const verification = await verifyHotSwapManifest(hotSwapManifest, hotSwapConfig, {
245
+ source: {
246
+ kind: 'remote',
247
+ isLocal: false,
248
+ url: hotSwapConfig.manifestUrl,
249
+ },
250
+ });
242
251
  if (!verification.ok) {
243
252
  throw new Error(`Hot-swap manifest rejected: ${verification.reason}`);
244
253
  }
@@ -309,6 +318,7 @@ export async function initializeInference(modelUrl, options = {}) {
309
318
  const pipeline = await createPipeline( ( (manifest)), {
310
319
  storage: { loadShard },
311
320
  gpu: { device },
321
+ runtime,
312
322
  baseUrl: modelUrl,
313
323
  onProgress: ( progress) => {
314
324
  const pct = 0.2 + progress.percent * 0.8;
@@ -46,11 +46,6 @@ export declare class Tokenizer {
46
46
  */
47
47
  initialize(manifest: ModelManifest, options?: TokenizerInitOptions): Promise<void>;
48
48
 
49
- /**
50
- * Infer HuggingFace model ID from manifest architecture
51
- */
52
- private _inferHuggingFaceModel(manifest: ModelManifest): string | null;
53
-
54
49
  /**
55
50
  * Encode text to token IDs
56
51
  */
@@ -130,14 +130,12 @@ export class Tokenizer {
130
130
  );
131
131
  }
132
132
 
133
- let hfModel = tokenizerConfig.hfModel;
133
+ let hfModel = tokenizerConfig.hfModel ?? tokenizerConfig.modelId ?? null;
134
134
  const allowArchFallback = tokenizerConfig.allowArchFallback === true;
135
135
  if (allowArchFallback && !hfModel) {
136
- const inferred = this._inferHuggingFaceModel(manifest);
137
- if (inferred) {
138
- hfModel = inferred;
139
- log.warn('Tokenizer', `Using inferred HuggingFace model: ${inferred}`);
140
- }
136
+ throw new Error(
137
+ `[Tokenizer] tokenizer.allowArchFallback requires explicit tokenizer.hfModel or tokenizer.modelId for model "${modelId}".`
138
+ );
141
139
  }
142
140
 
143
141
  if (hfModel) {
@@ -212,23 +210,6 @@ export class Tokenizer {
212
210
 
213
211
  this.config = tokenizerConfig;
214
212
  }
215
-
216
-
217
- _inferHuggingFaceModel(manifest) {
218
- const tokenizer = manifest?.tokenizer ?? {};
219
- if (typeof tokenizer.modelId === 'string' && tokenizer.modelId.length > 0) {
220
- return tokenizer.modelId;
221
- }
222
- if (typeof tokenizer.hfModel === 'string' && tokenizer.hfModel.length > 0) {
223
- return tokenizer.hfModel;
224
- }
225
- if (typeof manifest?.modelId === 'string' && manifest.modelId.length > 0) {
226
- return manifest.modelId;
227
- }
228
- return null;
229
- }
230
-
231
-
232
213
  encode(text) {
233
214
  if (!this.backend) {
234
215
  throw new Error('Tokenizer not initialized');
@@ -21,8 +21,17 @@ export class BPETokenizer extends BaseTokenizer {
21
21
  });
22
22
  }
23
23
 
24
+ #resetState() {
25
+ this.#vocab.clear();
26
+ this.#reverseVocab.clear();
27
+ this.#merges = [];
28
+ this.#mergeRanks.clear();
29
+ this.vocabSize = 0;
30
+ }
31
+
24
32
 
25
33
  load(vocab, merges) {
34
+ this.#resetState();
26
35
  // Build vocab maps
27
36
  for (const [token, id] of Object.entries(vocab)) {
28
37
  this.#vocab.set(token, id);