@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -1,4 +1,4 @@
1
- import { acquireBuffer } from '../../memory/buffer-pool.js';
1
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
2
2
  import { createTensor, dtypeBytes } from '../tensor.js';
3
3
  import { WORKGROUP_SIZES } from './constants.js';
4
4
  import { unifiedKernelWrapper } from './utils.js';
@@ -20,18 +20,26 @@ async function _transpose(target, input, rows, cols, options = {}) {
20
20
  const bytesPerElement = dtypeBytes(input.dtype);
21
21
  const outputSize = rows * cols * bytesPerElement;
22
22
  const outputBuf = outputBuffer || acquireBuffer(outputSize, undefined, 'transpose_output');
23
+ const ownedOutput = outputBuffer ? null : outputBuf;
23
24
  const dispatchPlan = planTransposeDispatch(target, cols);
24
25
 
25
- await unifiedKernelWrapper(
26
- 'transpose',
27
- target,
28
- 'default',
29
- [input, outputBuf],
30
- { rows, cols, _pad0: dispatchPlan.dispatchStride, _pad1: 0 },
31
- [dispatchPlan.workgroups[0], rows, 1]
32
- );
26
+ try {
27
+ await unifiedKernelWrapper(
28
+ 'transpose',
29
+ target,
30
+ 'default',
31
+ [input, outputBuf],
32
+ { rows, cols, _pad0: dispatchPlan.dispatchStride, _pad1: 0 },
33
+ [dispatchPlan.workgroups[0], rows, 1]
34
+ );
33
35
 
34
- return createTensor(outputBuf, input.dtype, [cols, rows], 'transpose_output');
36
+ return createTensor(outputBuf, input.dtype, [cols, rows], 'transpose_output');
37
+ } catch (error) {
38
+ if (ownedOutput) {
39
+ releaseBuffer(ownedOutput);
40
+ }
41
+ throw error;
42
+ }
35
43
  }
36
44
 
37
45
  export async function runTranspose(input, rows, cols, options = {}) {
@@ -20,11 +20,13 @@ struct Uniforms {
20
20
  @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
21
21
  fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
22
22
  let dispatch_stride = max(u._pad0, 1u);
23
- let row = gid.y;
24
- let col = gid.x + row * dispatch_stride;
25
- if (row >= u.rows || col >= u.cols) {
23
+ let linear_idx = gid.y * dispatch_stride + gid.x;
24
+ let total = u.rows * u.cols;
25
+ if (linear_idx >= total) {
26
26
  return;
27
27
  }
28
+ let row = linear_idx / u.cols;
29
+ let col = linear_idx % u.cols;
28
30
  let idx = row * u.cols + col;
29
31
  let out_idx = col * u.rows + row;
30
32
  output[out_idx] = input[idx];
@@ -1,4 +1,4 @@
1
- import { acquireBuffer } from '../../memory/buffer-pool.js';
1
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
2
2
  import { createTensor, dtypeBytes } from '../tensor.js';
3
3
  import { unifiedKernelWrapper } from './utils.js';
4
4
  import { selectRuleValue } from './rule-registry.js';
@@ -35,19 +35,27 @@ async function _upsample2d(target, input, options = {}) {
35
35
  const bytesPerElement = dtypeBytes(input.dtype);
36
36
  const outputSize = channels * outHeight * outWidth * bytesPerElement;
37
37
  const output = outputBuffer || acquireBuffer(outputSize, undefined, 'upsample2d_output');
38
+ const ownedOutput = outputBuffer ? null : output;
38
39
 
39
- await unifiedKernelWrapper(
40
- 'upsample2d', target, selectUpsample2DVariant(input.dtype === 'f16'),
41
- [input, output],
42
- {
43
- channels, in_height: resolvedHeight, in_width: resolvedWidth,
44
- out_height: outHeight, out_width: outWidth, scale,
45
- _pad0: 0, _pad1: 0,
46
- },
47
- [Math.ceil(outSpatial / WORKGROUP_SIZES.DEFAULT), channels, 1]
48
- );
49
-
50
- return createTensor(output, input.dtype, [channels, outHeight, outWidth], 'upsample2d_output');
40
+ try {
41
+ await unifiedKernelWrapper(
42
+ 'upsample2d', target, selectUpsample2DVariant(input.dtype === 'f16'),
43
+ [input, output],
44
+ {
45
+ channels, in_height: resolvedHeight, in_width: resolvedWidth,
46
+ out_height: outHeight, out_width: outWidth, scale,
47
+ _pad0: 0, _pad1: 0,
48
+ },
49
+ [Math.ceil(outSpatial / WORKGROUP_SIZES.DEFAULT), channels, 1]
50
+ );
51
+
52
+ return createTensor(output, input.dtype, [channels, outHeight, outWidth], 'upsample2d_output');
53
+ } catch (error) {
54
+ if (ownedOutput) {
55
+ releaseBuffer(ownedOutput);
56
+ }
57
+ throw error;
58
+ }
51
59
  }
52
60
 
53
61
  export async function runUpsample2D(input, options = {}) {
@@ -137,21 +137,28 @@ export async function unifiedKernelWrapper(opName, target, variant, bindings, un
137
137
  });
138
138
  }
139
139
 
140
- const bindGroup = device.createBindGroup({
141
- label: `${opName}_bind_group`,
142
- layout: pipeline.getBindGroupLayout(0),
143
- entries: bindGroupEntries,
144
- });
145
-
146
- if (workgroups && typeof workgroups === 'object' && workgroups.indirectBuffer) {
147
- const indirectOffset = workgroups.indirectOffset ?? 0;
148
- if (recorder) {
149
- recordDispatchIndirect(recorder, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
140
+ try {
141
+ const bindGroup = device.createBindGroup({
142
+ label: `${opName}_bind_group`,
143
+ layout: pipeline.getBindGroupLayout(0),
144
+ entries: bindGroupEntries,
145
+ });
146
+
147
+ if (workgroups && typeof workgroups === 'object' && workgroups.indirectBuffer) {
148
+ const indirectOffset = workgroups.indirectOffset ?? 0;
149
+ if (recorder) {
150
+ recordDispatchIndirect(recorder, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
151
+ } else {
152
+ dispatchIndirect(device, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
153
+ }
150
154
  } else {
151
- dispatchIndirect(device, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
155
+ dispatchKernel(target, pipeline, bindGroup, workgroups, opName);
156
+ }
157
+ } catch (error) {
158
+ if (!recorder) {
159
+ uniformBuffer.destroy();
152
160
  }
153
- } else {
154
- dispatchKernel(target, pipeline, bindGroup, workgroups, opName);
161
+ throw error;
155
162
  }
156
163
 
157
164
  if (!recorder) {
@@ -11,10 +11,13 @@ export class PartitionedBufferPool {
11
11
 
12
12
  #expertPools;
13
13
 
14
+ #bufferOwners;
15
+
14
16
 
15
17
  constructor(partitions, schemaConfig = getRuntimeConfig().shared.bufferPool) {
16
18
  this.#sharedPool = new BufferPool(false, schemaConfig);
17
19
  this.#expertPools = new Map();
20
+ this.#bufferOwners = new WeakMap();
18
21
  for (const partition of partitions) {
19
22
  this.#expertPools.set(partition.id, new BufferPool(false, schemaConfig));
20
23
  }
@@ -28,12 +31,17 @@ export class PartitionedBufferPool {
28
31
  label
29
32
  ) {
30
33
  const pool = this.#expertPools.get(partitionId) || this.#sharedPool;
31
- return pool.acquire(size, usage, label);
34
+ const buffer = pool.acquire(size, usage, label);
35
+ this.#bufferOwners.set(buffer, pool);
36
+ return buffer;
32
37
  }
33
38
 
34
39
 
35
40
  release(partitionId, buffer) {
36
- const pool = this.#expertPools.get(partitionId) || this.#sharedPool;
41
+ const pool = this.#bufferOwners.get(buffer)
42
+ || this.#expertPools.get(partitionId)
43
+ || this.#sharedPool;
44
+ this.#bufferOwners.delete(buffer);
37
45
  pool.release(buffer);
38
46
  }
39
47
 
@@ -1,15 +1,8 @@
1
1
 
2
-
3
2
  import { log, trace } from '../debug/index.js';
3
+ import { DEFAULT_PERF_GUARDS_CONFIG } from '../config/schema/debug.schema.js';
4
4
 
5
- // Initial config uses inline defaults; caller should configure via configurePerfGuards()
6
- let config = {
7
- allowGPUReadback: true,
8
- trackSubmitCount: false,
9
- trackAllocations: false,
10
- logExpensiveOps: false,
11
- strictMode: false,
12
- };
5
+ let config = { ...DEFAULT_PERF_GUARDS_CONFIG };
13
6
 
14
7
 
15
8
  let counters = {
@@ -179,6 +179,8 @@ export class GPUProfiler {
179
179
 
180
180
  if (!this.#device || !this.#querySet || !this.#queryBuffer || !this.#readbackBuffer) {
181
181
  log.warn('GPUProfiler', 'Missing required resources for resolve');
182
+ this.#pendingResolves = [];
183
+ this.#nextQueryIndex = 0;
182
184
  return;
183
185
  }
184
186
 
@@ -199,34 +201,35 @@ export class GPUProfiler {
199
201
 
200
202
  this.#device.queue.submit([encoder.finish()]);
201
203
 
202
- if (!allowReadback('GPUProfiler.resolve')) {
203
- return;
204
- }
204
+ let mapped = false;
205
205
 
206
- // Read back timestamps
207
- await this.#readbackBuffer.mapAsync(GPUMapMode.READ);
208
- const timestamps = new BigUint64Array(this.#readbackBuffer.getMappedRange());
206
+ try {
207
+ if (!allowReadback('GPUProfiler.resolve')) {
208
+ return;
209
+ }
209
210
 
210
- // Process pending resolves
211
- for (const pending of this.#pendingResolves) {
212
- const startNs = timestamps[pending.startIndex];
213
- const endNs = timestamps[pending.endIndex];
211
+ await this.#readbackBuffer.mapAsync(GPUMapMode.READ);
212
+ mapped = true;
213
+ const timestamps = new BigUint64Array(this.#readbackBuffer.getMappedRange());
214
214
 
215
- // Convert nanoseconds to milliseconds
216
- const durationMs = Number(endNs - startNs) / 1_000_000;
215
+ for (const pending of this.#pendingResolves) {
216
+ const startNs = timestamps[pending.startIndex];
217
+ const endNs = timestamps[pending.endIndex];
218
+ const durationMs = Number(endNs - startNs) / 1_000_000;
217
219
 
218
- // Sanity check - use CPU timing if GPU timing seems wrong
219
- if (durationMs < 0 || durationMs > this.#maxDurationMs) {
220
- // Fallback to CPU timing
221
- this.#recordResult(pending.label, pending.cpuEndTime - pending.cpuStartTime);
222
- } else {
223
- this.#recordResult(pending.label, durationMs);
220
+ if (durationMs < 0 || durationMs > this.#maxDurationMs) {
221
+ this.#recordResult(pending.label, pending.cpuEndTime - pending.cpuStartTime);
222
+ } else {
223
+ this.#recordResult(pending.label, durationMs);
224
+ }
225
+ }
226
+ } finally {
227
+ if (mapped) {
228
+ this.#readbackBuffer.unmap();
224
229
  }
230
+ this.#pendingResolves = [];
231
+ this.#nextQueryIndex = 0;
225
232
  }
226
-
227
- this.#readbackBuffer.unmap();
228
- this.#pendingResolves = [];
229
- this.#nextQueryIndex = 0;
230
233
  }
231
234
 
232
235
 
@@ -350,6 +353,8 @@ export class GPUProfiler {
350
353
  }
351
354
  this.#results.clear();
352
355
  this.#activeLabels.clear();
356
+ this.#pendingResolves = [];
357
+ this.#nextQueryIndex = 0;
353
358
  }
354
359
  }
355
360
 
@@ -0,0 +1,16 @@
1
+ export interface ReadbackBufferEntry {
2
+ buffer: GPUBuffer;
3
+ destroy?: boolean;
4
+ offset?: number;
5
+ size?: number;
6
+ }
7
+
8
+ export function withMappedReadBuffer<T>(
9
+ buffer: GPUBuffer,
10
+ read: (range: ArrayBuffer) => T | Promise<T>,
11
+ ): Promise<T>;
12
+
13
+ export function withMappedReadBuffers<T>(
14
+ entries: ReadonlyArray<ReadbackBufferEntry>,
15
+ read: (ranges: Array<ArrayBuffer>) => T | Promise<T>,
16
+ ): Promise<T>;
@@ -0,0 +1,41 @@
1
+ export async function withMappedReadBuffer(buffer, read) {
2
+ let mapped = false;
3
+ try {
4
+ await buffer.mapAsync(GPUMapMode.READ);
5
+ mapped = true;
6
+ return await read(buffer.getMappedRange());
7
+ } finally {
8
+ if (mapped) {
9
+ buffer.unmap();
10
+ }
11
+ }
12
+ }
13
+
14
+ export async function withMappedReadBuffers(entries, read) {
15
+ const mappedEntries = [];
16
+ try {
17
+ await Promise.all(entries.map(async (entry) => {
18
+ await entry.buffer.mapAsync(GPUMapMode.READ);
19
+ mappedEntries.push(entry);
20
+ }));
21
+ const ranges = entries.map((entry) => {
22
+ if (entry.offset != null || entry.size != null) {
23
+ return entry.buffer.getMappedRange(
24
+ entry.offset ?? 0,
25
+ entry.size ?? (entry.buffer.size - (entry.offset ?? 0))
26
+ );
27
+ }
28
+ return entry.buffer.getMappedRange();
29
+ });
30
+ return await read(ranges);
31
+ } finally {
32
+ for (let index = mappedEntries.length - 1; index >= 0; index -= 1) {
33
+ mappedEntries[index].buffer.unmap();
34
+ }
35
+ for (const entry of entries) {
36
+ if (entry.destroy === true) {
37
+ entry.buffer.destroy();
38
+ }
39
+ }
40
+ }
41
+ }
@@ -21,6 +21,8 @@ let submitSources = new Map();
21
21
 
22
22
  let currentPhase = 'other';
23
23
 
24
+ const WRAPPED_QUEUE_SENTINEL = Symbol.for('doppler.submitTrackerWrapped');
25
+
24
26
 
25
27
  const phaseStats = {
26
28
  prefill: { count: 0, times: [], totalMs: 0, maxMs: 0, minMs: Infinity, sources: new Map() },
@@ -194,6 +196,10 @@ function extractSourceFromStack() {
194
196
 
195
197
 
196
198
  export function wrapQueueForTracking(queue) {
199
+ if (!queue || queue[WRAPPED_QUEUE_SENTINEL] === true) {
200
+ return queue;
201
+ }
202
+
197
203
  const originalSubmit = queue.submit.bind(queue);
198
204
 
199
205
  (queue).submit = function( commandBuffers) {
@@ -210,6 +216,13 @@ export function wrapQueueForTracking(queue) {
210
216
  return result;
211
217
  };
212
218
 
219
+ Object.defineProperty(queue, WRAPPED_QUEUE_SENTINEL, {
220
+ value: true,
221
+ configurable: true,
222
+ enumerable: false,
223
+ writable: false,
224
+ });
225
+
213
226
  return queue;
214
227
  }
215
228
 
@@ -8,6 +8,7 @@
8
8
 
9
9
  interface UniformCacheEntry {
10
10
  buffer: GPUBuffer;
11
+ bytes: Uint8Array;
11
12
  lastUsed: number;
12
13
  refCount: number;
13
14
  }
@@ -17,6 +17,18 @@ function hashArrayBuffer(data) {
17
17
  return (hash >>> 0).toString(16).padStart(8, '0');
18
18
  }
19
19
 
20
+ function copyUniformBytes(data) {
21
+ return new Uint8Array(data.slice(0));
22
+ }
23
+
24
+ function equalUniformBytes(a, b) {
25
+ if (a.length !== b.length) return false;
26
+ for (let i = 0; i < a.length; i++) {
27
+ if (a[i] !== b[i]) return false;
28
+ }
29
+ return true;
30
+ }
31
+
20
32
 
21
33
  export class UniformBufferCache {
22
34
 
@@ -50,14 +62,22 @@ export class UniformBufferCache {
50
62
 
51
63
 
52
64
  getOrCreate(data, label) {
53
- const hash = hashArrayBuffer(data);
54
- const existing = this.#cache.get(hash);
55
-
56
- if (existing) {
57
- existing.lastUsed = performance.now();
58
- existing.refCount++;
59
- this.#stats.hits++;
60
- return existing.buffer;
65
+ const baseKey = `${data.byteLength}:${hashArrayBuffer(data)}`;
66
+ const dataBytes = copyUniformBytes(data);
67
+ let key = baseKey;
68
+ let suffix = 0;
69
+ let existing = this.#cache.get(key);
70
+
71
+ while (existing) {
72
+ if (equalUniformBytes(existing.bytes, dataBytes)) {
73
+ existing.lastUsed = performance.now();
74
+ existing.refCount++;
75
+ this.#stats.hits++;
76
+ return existing.buffer;
77
+ }
78
+ suffix += 1;
79
+ key = `${baseKey}#${suffix}`;
80
+ existing = this.#cache.get(key);
61
81
  }
62
82
 
63
83
  // Cache miss - create new buffer
@@ -80,8 +100,9 @@ export class UniformBufferCache {
80
100
  this.#evictLRU();
81
101
  }
82
102
 
83
- this.#cache.set(hash, {
103
+ this.#cache.set(key, {
84
104
  buffer,
105
+ bytes: dataBytes,
85
106
  lastUsed: performance.now(),
86
107
  refCount: 1,
87
108
  });
@@ -110,6 +110,6 @@ export function getBuffer(weight: GPUBuffer | WeightBuffer | TensorLike): GPUBuf
110
110
  export function getLayout(weight: GPUBuffer | WeightBuffer | TensorLike): WeightLayout | null;
111
111
 
112
112
  /**
113
- * Get dtype from WeightBuffer, or null for raw GPUBuffer.
113
+ * Get dtype from WeightBuffer, tagged raw GPUBuffer, or TensorLike.
114
114
  */
115
115
  export function getWeightDtype(weight: GPUBuffer | WeightBuffer | TensorLike): WeightDtype | TensorLike['dtype'] | null;
@@ -114,5 +114,5 @@ export function getLayout(weight) {
114
114
  export function getWeightDtype(weight) {
115
115
  if (isWeightBuffer(weight)) return weight.dtype;
116
116
  if (isTensorLike(weight)) return weight.dtype;
117
- return null;
117
+ return getBufferDtype(weight);
118
118
  }
@@ -42,6 +42,9 @@ export async function verifyIntentBundle(bundle, context) {
42
42
  reasons.push('Missing payload.expectedOutputHash');
43
43
  }
44
44
 
45
+ if (baseModelHash && !context?.manifest) {
46
+ reasons.push('Missing verification context manifest');
47
+ }
45
48
  if (context?.manifest && baseModelHash) {
46
49
  const manifestHash = await computeManifestHash(context.manifest);
47
50
  if (manifestHash !== baseModelHash.replace('sha256:', '')) {
@@ -49,6 +52,9 @@ export async function verifyIntentBundle(bundle, context) {
49
52
  }
50
53
  }
51
54
 
55
+ if (kernelRegistryVersion && context?.kernelRegistryVersion == null) {
56
+ reasons.push('Missing verification context kernelRegistryVersion');
57
+ }
52
58
  if (context?.kernelRegistryVersion && kernelRegistryVersion) {
53
59
  if (context.kernelRegistryVersion !== kernelRegistryVersion) {
54
60
  reasons.push('Kernel registry version mismatch');
@@ -23,11 +23,20 @@ export interface HotSwapVerificationResult {
23
23
  signerId?: string;
24
24
  }
25
25
 
26
+ export interface HotSwapVerificationContext {
27
+ source?: {
28
+ kind?: 'local' | 'remote' | string | null;
29
+ isLocal?: boolean | null;
30
+ url?: string | null;
31
+ } | null;
32
+ }
33
+
26
34
  export declare function fetchHotSwapManifest(url: string): Promise<HotSwapManifest>;
27
35
 
28
36
  export declare function verifyHotSwapManifest(
29
37
  manifest: HotSwapManifest,
30
- policy: HotSwapConfigSchema
38
+ policy: HotSwapConfigSchema,
39
+ context?: HotSwapVerificationContext
31
40
  ): Promise<HotSwapVerificationResult>;
32
41
 
33
42
  export declare function serializeHotSwapManifest(manifest: HotSwapManifest): string;
@@ -14,13 +14,23 @@ export async function fetchHotSwapManifest(url) {
14
14
  return response.json();
15
15
  }
16
16
 
17
- export async function verifyHotSwapManifest(manifest, policy) {
17
+ function isExplicitLocalSource(source) {
18
+ if (!source || typeof source !== 'object') {
19
+ return false;
20
+ }
21
+ if (source.isLocal === true) {
22
+ return true;
23
+ }
24
+ return source.kind === 'local';
25
+ }
26
+
27
+ export async function verifyHotSwapManifest(manifest, policy, context = {}) {
18
28
  if (!policy.enabled) {
19
29
  return { ok: false, reason: 'Hot-swap disabled' };
20
30
  }
21
31
 
22
32
  if (!manifest.signature) {
23
- if (policy.localOnly && policy.allowUnsignedLocal) {
33
+ if (policy.localOnly && policy.allowUnsignedLocal && isExplicitLocalSource(context.source)) {
24
34
  return { ok: true, reason: 'Local-only unsigned manifest accepted' };
25
35
  }
26
36
  return { ok: false, reason: 'Signature required' };
@@ -6,18 +6,40 @@ function normalizeRolloutPolicy(policy) {
6
6
  ? policy.rollout
7
7
  : {};
8
8
  const rawMode = String(rollout.mode || 'shadow').trim().toLowerCase().replace(/_/g, '-');
9
- const mode = rawMode === 'default' || rawMode === 'canary' || rawMode === 'opt-in' || rawMode === 'shadow'
10
- ? rawMode
11
- : 'shadow';
12
- const canaryPercent = Number.isFinite(rollout.canaryPercent)
13
- ? Math.min(100, Math.max(0, Number(rollout.canaryPercent)))
14
- : 0;
9
+ if (rawMode !== 'default' && rawMode !== 'canary' && rawMode !== 'opt-in' && rawMode !== 'shadow') {
10
+ throw new Error(
11
+ `hotswap.rollout.mode must be one of default, canary, opt-in, shadow (received "${rollout.mode}")`
12
+ );
13
+ }
14
+ let canaryPercent = 0;
15
+ if (rollout.canaryPercent !== undefined && rollout.canaryPercent !== null) {
16
+ const parsedCanaryPercent = Number(rollout.canaryPercent);
17
+ if (!Number.isFinite(parsedCanaryPercent) || parsedCanaryPercent < 0 || parsedCanaryPercent > 100) {
18
+ throw new Error('hotswap.rollout.canaryPercent must be a number between 0 and 100 when provided.');
19
+ }
20
+ canaryPercent = parsedCanaryPercent;
21
+ }
22
+ if (rollout.cohortSalt !== undefined && rollout.cohortSalt !== null && typeof rollout.cohortSalt !== 'string') {
23
+ throw new Error('hotswap.rollout.cohortSalt must be a string when provided.');
24
+ }
15
25
  const cohortSalt = String(rollout.cohortSalt || 'doppler-hotswap-v1').trim() || 'doppler-hotswap-v1';
26
+ if (rollout.optInAllowlist !== undefined && rollout.optInAllowlist !== null && !Array.isArray(rollout.optInAllowlist)) {
27
+ throw new Error('hotswap.rollout.optInAllowlist must be an array of strings when provided.');
28
+ }
16
29
  const optInAllowlist = Array.isArray(rollout.optInAllowlist)
17
- ? rollout.optInAllowlist.map((entry) => String(entry || '').trim()).filter(Boolean)
30
+ ? rollout.optInAllowlist.map((entry, index) => {
31
+ if (typeof entry !== 'string') {
32
+ throw new Error(`hotswap.rollout.optInAllowlist[${index}] must be a string.`);
33
+ }
34
+ const normalized = entry.trim();
35
+ if (!normalized) {
36
+ throw new Error(`hotswap.rollout.optInAllowlist[${index}] must not be empty.`);
37
+ }
38
+ return normalized;
39
+ })
18
40
  : [];
19
41
  return {
20
- mode,
42
+ mode: rawMode,
21
43
  canaryPercent,
22
44
  cohortSalt,
23
45
  optInAllowlist,
@@ -9,6 +9,20 @@ export {
9
9
  export { MultiModelLoader } from './loader/multi-model-loader.js';
10
10
 
11
11
  export { InferencePipeline, EmbeddingPipeline, createPipeline } from './generation/index.js';
12
+ export {
13
+ StructuredJsonHeadPipeline,
14
+ isStructuredJsonHeadModelType,
15
+ createStructuredJsonHeadPipeline,
16
+ DreamStructuredPipeline,
17
+ isDreamStructuredModelType,
18
+ createDreamStructuredPipeline,
19
+ } from './generation/index.js';
20
+ export {
21
+ EnergyRowHeadPipeline,
22
+ createEnergyRowHeadPipeline,
23
+ DreamEnergyHeadPipeline,
24
+ createDreamEnergyHeadPipeline,
25
+ } from './inference/pipelines/energy-head/row-head-pipeline.js';
12
26
  export { KVCache } from './inference/kv-cache.js';
13
27
  export { Tokenizer } from './inference/tokenizer.js';
14
28
  export { SpeculativeDecoder } from './inference/speculative.js';
@@ -25,6 +39,22 @@ export {
25
39
  mergeMultipleLogits,
26
40
  } from './gpu/kernels/logit-merge.js';
27
41
 
42
+ export type { RDRRManifest, ShardInfo } from './formats/rdrr/index.js';
43
+ export type { TensorLocation, LoadProgress, LoadOptions, LoaderStats } from './loader/doppler-loader.js';
44
+ export type { AdapterSource } from './loader/multi-model-loader.js';
45
+ export type { ParsedModelConfig } from './generation/index.js';
46
+ export type { SamplingOptions } from './generation/index.js';
47
+ export type {
48
+ GenerateOptions,
49
+ GenerationResult,
50
+ KVCacheSnapshot,
51
+ LayerWeights,
52
+ ExpertWeights,
53
+ RouterWeights,
54
+ } from './generation/index.js';
55
+ export type { LoRAAdapter, LoRAModuleName } from './generation/index.js';
56
+ export type { ExpertNode, ExpertTask } from './inference/multi-model-network.js';
57
+
28
58
  export {
29
59
  ADAPTER_MANIFEST_SCHEMA,
30
60
  validateManifest as validateAdapterManifest,
@@ -45,4 +75,18 @@ export {
45
75
  createMemoryRegistry,
46
76
  } from './adapters/index.js';
47
77
 
78
+ export type {
79
+ AdapterManifest,
80
+ AdapterMetadata,
81
+ AdapterTensorSpec,
82
+ LoRALoadOptions,
83
+ LoRAWeightsResult,
84
+ AdapterState,
85
+ EnableAdapterOptions,
86
+ AdapterStackOptions,
87
+ AdapterManagerEvents,
88
+ AdapterRegistryEntry,
89
+ AdapterQueryOptions,
90
+ } from './adapters/index.js';
91
+
48
92
  export * from './tooling-exports.browser.js';
@@ -11,6 +11,20 @@ export { MultiModelLoader } from './loader/multi-model-loader.js';
11
11
 
12
12
  // Inference pipeline
13
13
  export { InferencePipeline, EmbeddingPipeline, createPipeline } from './generation/index.js';
14
+ export {
15
+ StructuredJsonHeadPipeline,
16
+ isStructuredJsonHeadModelType,
17
+ createStructuredJsonHeadPipeline,
18
+ DreamStructuredPipeline,
19
+ isDreamStructuredModelType,
20
+ createDreamStructuredPipeline,
21
+ } from './generation/index.js';
22
+ export {
23
+ EnergyRowHeadPipeline,
24
+ createEnergyRowHeadPipeline,
25
+ DreamEnergyHeadPipeline,
26
+ createDreamEnergyHeadPipeline,
27
+ } from './inference/pipelines/energy-head/row-head-pipeline.js';
14
28
  export { KVCache } from './inference/kv-cache.js';
15
29
  export { Tokenizer } from './inference/tokenizer.js';
16
30
  export { SpeculativeDecoder } from './inference/speculative.js';