@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +25 -17
  3. package/package.json +20 -4
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +39 -39
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +49 -7
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +43 -4
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +28 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/models/qwen3.json +9 -2
  45. package/src/config/presets/models/transformer.json +5 -0
  46. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  47. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  48. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  49. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  50. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  52. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  54. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  55. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  56. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  57. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  58. package/src/config/required-inference-fields-contract-check.js +6 -0
  59. package/src/config/runtime.js +6 -1
  60. package/src/config/schema/debug.schema.d.ts +5 -0
  61. package/src/config/schema/doppler.schema.js +16 -21
  62. package/src/config/schema/inference-defaults.schema.js +6 -3
  63. package/src/config/schema/inference.schema.d.ts +9 -0
  64. package/src/config/schema/kernel-path.schema.d.ts +11 -1
  65. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  66. package/src/config/schema/manifest.schema.d.ts +8 -1
  67. package/src/config/schema/manifest.schema.js +19 -3
  68. package/src/config/training-defaults.js +30 -22
  69. package/src/converter/conversion-plan.js +94 -9
  70. package/src/converter/core.d.ts +7 -0
  71. package/src/converter/core.js +14 -9
  72. package/src/converter/execution-v0-manifest.js +4 -1
  73. package/src/converter/index.d.ts +1 -0
  74. package/src/converter/index.js +1 -0
  75. package/src/converter/manifest-inference.js +43 -12
  76. package/src/converter/parsers/diffusion.js +0 -3
  77. package/src/converter/quantization-info.js +35 -15
  78. package/src/converter/rope-config.js +42 -0
  79. package/src/converter/shard-packer.d.ts +1 -1
  80. package/src/converter/shard-packer.js +4 -1
  81. package/src/debug/config.js +123 -11
  82. package/src/debug/signals.js +7 -1
  83. package/src/debug/tensor.d.ts +2 -0
  84. package/src/debug/tensor.js +13 -2
  85. package/src/distribution/p2p-control-plane.js +52 -12
  86. package/src/distribution/p2p-observability.js +43 -7
  87. package/src/distribution/p2p-webrtc-browser.js +20 -0
  88. package/src/distribution/shard-delivery.js +77 -26
  89. package/src/formats/gguf/types.js +33 -16
  90. package/src/formats/rdrr/groups.d.ts +12 -4
  91. package/src/formats/rdrr/groups.js +3 -6
  92. package/src/formats/rdrr/parsing.js +39 -2
  93. package/src/formats/rdrr/types.d.ts +2 -1
  94. package/src/gpu/command-recorder.js +86 -61
  95. package/src/gpu/device.d.ts +1 -0
  96. package/src/gpu/device.js +131 -19
  97. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  98. package/src/gpu/kernel-tuner/cache.js +71 -4
  99. package/src/gpu/kernel-tuner/tuner.js +22 -4
  100. package/src/gpu/kernels/attention.js +113 -34
  101. package/src/gpu/kernels/backward/adam.js +62 -58
  102. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  103. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  104. package/src/gpu/kernels/bias_add.wgsl +8 -6
  105. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  106. package/src/gpu/kernels/cast.js +191 -149
  107. package/src/gpu/kernels/check-stop.js +33 -44
  108. package/src/gpu/kernels/conv2d.js +27 -17
  109. package/src/gpu/kernels/conv2d.wgsl +7 -8
  110. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  111. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  112. package/src/gpu/kernels/depthwise_conv2d.js +37 -26
  113. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  114. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  115. package/src/gpu/kernels/dequant.js +178 -126
  116. package/src/gpu/kernels/energy.d.ts +3 -21
  117. package/src/gpu/kernels/energy.js +111 -88
  118. package/src/gpu/kernels/feature-check.js +1 -1
  119. package/src/gpu/kernels/fused_ffn.js +84 -65
  120. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  121. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  122. package/src/gpu/kernels/gather.js +33 -15
  123. package/src/gpu/kernels/gelu.js +19 -11
  124. package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
  125. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  126. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  127. package/src/gpu/kernels/groupnorm.js +34 -23
  128. package/src/gpu/kernels/kv-quantize.js +5 -2
  129. package/src/gpu/kernels/layernorm.js +35 -19
  130. package/src/gpu/kernels/logit-merge.js +5 -3
  131. package/src/gpu/kernels/matmul.js +83 -39
  132. package/src/gpu/kernels/modulate.js +23 -15
  133. package/src/gpu/kernels/moe.js +221 -175
  134. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  135. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  136. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  137. package/src/gpu/kernels/relu.js +31 -10
  138. package/src/gpu/kernels/relu.wgsl +2 -1
  139. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  140. package/src/gpu/kernels/repeat_channels.js +25 -17
  141. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  142. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  143. package/src/gpu/kernels/residual.js +69 -23
  144. package/src/gpu/kernels/residual.wgsl +6 -3
  145. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  146. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  147. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  148. package/src/gpu/kernels/rmsnorm.js +96 -28
  149. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  150. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  151. package/src/gpu/kernels/rope.d.ts +2 -0
  152. package/src/gpu/kernels/rope.js +14 -1
  153. package/src/gpu/kernels/rope.wgsl +56 -40
  154. package/src/gpu/kernels/sample.js +27 -38
  155. package/src/gpu/kernels/sana_linear_attention.js +19 -12
  156. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  157. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  158. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  159. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  160. package/src/gpu/kernels/scale.js +18 -11
  161. package/src/gpu/kernels/shader-cache.js +4 -2
  162. package/src/gpu/kernels/silu.d.ts +1 -0
  163. package/src/gpu/kernels/silu.js +148 -82
  164. package/src/gpu/kernels/silu.wgsl +19 -9
  165. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  166. package/src/gpu/kernels/softmax.js +44 -25
  167. package/src/gpu/kernels/split_qkv.js +23 -13
  168. package/src/gpu/kernels/transpose.js +31 -10
  169. package/src/gpu/kernels/transpose.wgsl +6 -5
  170. package/src/gpu/kernels/upsample2d.js +22 -13
  171. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  172. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  173. package/src/gpu/kernels/utils.js +35 -13
  174. package/src/gpu/partitioned-buffer-pool.js +10 -2
  175. package/src/gpu/perf-guards.js +2 -9
  176. package/src/gpu/profiler.js +27 -22
  177. package/src/gpu/readback-utils.d.ts +16 -0
  178. package/src/gpu/readback-utils.js +41 -0
  179. package/src/gpu/submit-tracker.js +13 -0
  180. package/src/gpu/uniform-cache.d.ts +1 -0
  181. package/src/gpu/uniform-cache.js +30 -9
  182. package/src/hotswap/intent-bundle.js +6 -0
  183. package/src/hotswap/manifest.d.ts +10 -1
  184. package/src/hotswap/manifest.js +12 -2
  185. package/src/hotswap/runtime.js +30 -8
  186. package/src/index-browser.d.ts +44 -0
  187. package/src/index-browser.js +14 -0
  188. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  189. package/src/inference/browser-harness-contract-helpers.js +28 -0
  190. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  191. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  192. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  193. package/src/inference/browser-harness-model-helpers.js +217 -0
  194. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  195. package/src/inference/browser-harness-report-helpers.js +42 -0
  196. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  197. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  198. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  199. package/src/inference/browser-harness-suite-helpers.js +268 -0
  200. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  201. package/src/inference/browser-harness-text-helpers.js +788 -0
  202. package/src/inference/browser-harness.d.ts +6 -0
  203. package/src/inference/browser-harness.js +130 -1950
  204. package/src/inference/kv-cache/base.js +140 -94
  205. package/src/inference/kv-cache/tiered.js +5 -3
  206. package/src/inference/moe-router.js +88 -56
  207. package/src/inference/multi-model-network.js +5 -3
  208. package/src/inference/network-evolution.d.ts +11 -2
  209. package/src/inference/network-evolution.js +20 -21
  210. package/src/inference/pipelines/context.d.ts +3 -0
  211. package/src/inference/pipelines/context.js +142 -2
  212. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  213. package/src/inference/pipelines/diffusion/pipeline.js +17 -7
  214. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  215. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  216. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  217. package/src/inference/pipelines/diffusion/vae.js +3 -7
  218. package/src/inference/pipelines/energy/pipeline.js +27 -21
  219. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  220. package/src/inference/pipelines/energy/quintel.js +11 -0
  221. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  222. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  223. package/src/inference/pipelines/text/attention/projections.js +151 -101
  224. package/src/inference/pipelines/text/attention/record.js +73 -10
  225. package/src/inference/pipelines/text/attention/run.js +73 -10
  226. package/src/inference/pipelines/text/chat-format.js +25 -1
  227. package/src/inference/pipelines/text/config.d.ts +4 -0
  228. package/src/inference/pipelines/text/config.js +71 -5
  229. package/src/inference/pipelines/text/embed.js +2 -8
  230. package/src/inference/pipelines/text/execution-plan.js +64 -50
  231. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  232. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  233. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  234. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  235. package/src/inference/pipelines/text/execution-v0.js +78 -1002
  236. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  237. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  238. package/src/inference/pipelines/text/generator-steps.js +298 -207
  239. package/src/inference/pipelines/text/generator.js +6 -23
  240. package/src/inference/pipelines/text/init.d.ts +4 -0
  241. package/src/inference/pipelines/text/init.js +134 -29
  242. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  243. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  244. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  245. package/src/inference/pipelines/text/layer.js +14 -9
  246. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  247. package/src/inference/pipelines/text/linear-attention.js +80 -6
  248. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  249. package/src/inference/pipelines/text/logits/index.js +10 -11
  250. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  251. package/src/inference/pipelines/text/logits/utils.js +9 -0
  252. package/src/inference/pipelines/text/lora-apply.js +50 -32
  253. package/src/inference/pipelines/text/model-load.js +279 -104
  254. package/src/inference/pipelines/text/moe-cache.js +5 -4
  255. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  256. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  257. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  258. package/src/inference/pipelines/text/ops.js +90 -90
  259. package/src/inference/pipelines/text/probes.js +9 -9
  260. package/src/inference/pipelines/text/weights.js +17 -7
  261. package/src/inference/pipelines/text.js +17 -1
  262. package/src/inference/speculative.d.ts +2 -2
  263. package/src/inference/speculative.js +4 -18
  264. package/src/inference/test-harness.d.ts +1 -1
  265. package/src/inference/test-harness.js +15 -5
  266. package/src/inference/tokenizer.d.ts +0 -5
  267. package/src/inference/tokenizer.js +4 -23
  268. package/src/inference/tokenizers/bpe.js +9 -0
  269. package/src/inference/tokenizers/bundled.js +176 -33
  270. package/src/inference/tokenizers/sentencepiece.js +12 -0
  271. package/src/loader/doppler-loader.js +38 -22
  272. package/src/loader/dtype-utils.js +3 -44
  273. package/src/loader/embedding-loader.js +7 -3
  274. package/src/loader/experts/expert-cache.js +13 -6
  275. package/src/loader/experts/expert-loader.js +10 -6
  276. package/src/loader/final-weights-loader.js +8 -4
  277. package/src/loader/layer-loader.js +2 -1
  278. package/src/loader/loader-state.js +2 -2
  279. package/src/loader/memory-monitor.js +8 -0
  280. package/src/loader/multi-model-loader.d.ts +14 -0
  281. package/src/loader/multi-model-loader.js +70 -24
  282. package/src/loader/shard-cache.js +81 -12
  283. package/src/loader/shard-resolver.js +25 -3
  284. package/src/loader/tensors/tensor-loader.js +209 -144
  285. package/src/loader/tensors/tensor-reader.js +76 -19
  286. package/src/loader/weight-downcast.js +1 -1
  287. package/src/memory/buffer-pool.d.ts +9 -1
  288. package/src/memory/buffer-pool.js +109 -44
  289. package/src/memory/unified-detect.js +1 -1
  290. package/src/rules/inference/kernel-path.rules.json +24 -8
  291. package/src/rules/rule-registry.js +25 -1
  292. package/src/rules/tooling/command-runtime.rules.json +18 -0
  293. package/src/storage/backends/opfs-store.js +68 -24
  294. package/src/storage/downloader.js +364 -83
  295. package/src/storage/index.d.ts +3 -0
  296. package/src/storage/index.js +3 -0
  297. package/src/storage/preflight.d.ts +2 -2
  298. package/src/storage/preflight.js +24 -2
  299. package/src/storage/quickstart-downloader.js +11 -5
  300. package/src/storage/registry.js +10 -4
  301. package/src/storage/reports.js +1 -1
  302. package/src/storage/shard-manager.d.ts +15 -1
  303. package/src/storage/shard-manager.js +51 -3
  304. package/src/storage/source-artifact-store.d.ts +52 -0
  305. package/src/storage/source-artifact-store.js +234 -0
  306. package/src/tooling/command-api-constants.d.ts +9 -0
  307. package/src/tooling/command-api-constants.js +9 -0
  308. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  309. package/src/tooling/command-api-family-normalizers.js +343 -0
  310. package/src/tooling/command-api-helpers.d.ts +25 -0
  311. package/src/tooling/command-api-helpers.js +262 -0
  312. package/src/tooling/command-api.d.ts +27 -1
  313. package/src/tooling/command-api.js +26 -473
  314. package/src/tooling/command-envelope.js +4 -1
  315. package/src/tooling/command-runner-shared.js +52 -18
  316. package/src/tooling/lean-execution-contract.js +150 -3
  317. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  318. package/src/tooling/node-browser-command-runner.js +218 -273
  319. package/src/tooling/node-command-runner.js +44 -3
  320. package/src/tooling/node-converter.js +27 -1
  321. package/src/tooling/node-source-runtime.d.ts +1 -1
  322. package/src/tooling/node-source-runtime.js +84 -3
  323. package/src/tooling/node-webgpu.js +30 -105
  324. package/src/tooling/opfs-cache.js +21 -4
  325. package/src/tooling/runtime-input-composition.d.ts +38 -0
  326. package/src/tooling/runtime-input-composition.js +86 -0
  327. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  328. package/src/tooling/source-runtime-bundle.js +261 -34
  329. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  330. package/src/tooling/source-runtime-materializer.js +93 -0
  331. package/src/training/attention-backward.js +32 -17
  332. package/src/training/autograd.js +80 -52
  333. package/src/training/checkpoint-watch.d.ts +8 -0
  334. package/src/training/checkpoint-watch.js +139 -0
  335. package/src/training/checkpoint.d.ts +6 -1
  336. package/src/training/checkpoint.js +46 -7
  337. package/src/training/clip.js +2 -1
  338. package/src/training/datasets/token-batch.js +20 -8
  339. package/src/training/distillation/artifacts.d.ts +71 -0
  340. package/src/training/distillation/artifacts.js +132 -0
  341. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  342. package/src/training/distillation/checkpoint-watch.js +58 -0
  343. package/src/training/distillation/dataset.d.ts +59 -0
  344. package/src/training/distillation/dataset.js +337 -0
  345. package/src/training/distillation/eval.d.ts +34 -0
  346. package/src/training/distillation/eval.js +310 -0
  347. package/src/training/distillation/index.d.ts +29 -0
  348. package/src/training/distillation/index.js +29 -0
  349. package/src/training/distillation/runtime.d.ts +20 -0
  350. package/src/training/distillation/runtime.js +121 -0
  351. package/src/training/distillation/scoreboard.d.ts +6 -0
  352. package/src/training/distillation/scoreboard.js +8 -0
  353. package/src/training/distillation/stage-a.d.ts +45 -0
  354. package/src/training/distillation/stage-a.js +338 -0
  355. package/src/training/distillation/stage-b.d.ts +24 -0
  356. package/src/training/distillation/stage-b.js +20 -0
  357. package/src/training/distillation/student-fixture.d.ts +22 -0
  358. package/src/training/distillation/student-fixture.js +846 -0
  359. package/src/training/distillation/suite-data.d.ts +45 -0
  360. package/src/training/distillation/suite-data.js +189 -0
  361. package/src/training/index.d.ts +10 -0
  362. package/src/training/index.js +10 -0
  363. package/src/training/lora-pipeline.d.ts +40 -0
  364. package/src/training/lora-pipeline.js +793 -0
  365. package/src/training/lora.js +26 -12
  366. package/src/training/loss.js +5 -6
  367. package/src/training/objectives/cross_entropy.js +2 -5
  368. package/src/training/objectives/distill_kd.js +4 -8
  369. package/src/training/objectives/distill_triplet.js +4 -8
  370. package/src/training/objectives/ul_stage2_base.js +4 -8
  371. package/src/training/operator-artifacts.d.ts +62 -0
  372. package/src/training/operator-artifacts.js +140 -0
  373. package/src/training/operator-command.d.ts +5 -0
  374. package/src/training/operator-command.js +455 -0
  375. package/src/training/operator-eval.d.ts +48 -0
  376. package/src/training/operator-eval.js +230 -0
  377. package/src/training/operator-scoreboard.d.ts +5 -0
  378. package/src/training/operator-scoreboard.js +44 -0
  379. package/src/training/optimizer.js +19 -7
  380. package/src/training/runner.d.ts +52 -0
  381. package/src/training/runner.js +31 -5
  382. package/src/training/suite.d.ts +112 -0
  383. package/src/training/suite.js +24 -984
  384. package/src/training/tensor-factory.d.ts +9 -0
  385. package/src/training/tensor-factory.js +13 -0
  386. package/src/training/trainer.js +3 -5
  387. package/src/training/ul_dataset.js +3 -5
  388. package/src/training/workloads.d.ts +164 -0
  389. package/src/training/workloads.js +530 -0
  390. package/src/version.js +1 -1
  391. package/tools/convert-safetensors-node.js +22 -16
  392. package/tools/doppler-cli.js +179 -63
@@ -64,6 +64,68 @@ function resolveSpecialTokens(specialTokensRaw, fallbackTokens, vocab) {
64
64
  return resolved;
65
65
  }
66
66
 
67
+ function resolveByteLevelPretokenizerConfig(preTokenizer) {
68
+ if (!preTokenizer || typeof preTokenizer !== 'object') {
69
+ return {
70
+ useByteLevel: false,
71
+ addPrefixSpace: null,
72
+ };
73
+ }
74
+
75
+ if (preTokenizer.type === 'ByteLevel') {
76
+ return {
77
+ useByteLevel: true,
78
+ addPrefixSpace: preTokenizer.add_prefix_space === true,
79
+ };
80
+ }
81
+
82
+ if (preTokenizer.type === 'Sequence' && Array.isArray(preTokenizer.pretokenizers)) {
83
+ for (const entry of preTokenizer.pretokenizers) {
84
+ const resolved = resolveByteLevelPretokenizerConfig(entry);
85
+ if (resolved.useByteLevel) {
86
+ return resolved;
87
+ }
88
+ }
89
+ }
90
+
91
+ return {
92
+ useByteLevel: false,
93
+ addPrefixSpace: null,
94
+ };
95
+ }
96
+
97
+ function registerAddedTokens(addedTokens, vocab, reverseVocab, patterns, specialTokenIds, derivedSpecialTokens = null) {
98
+ let maxId = -1;
99
+ for (const token of addedTokens) {
100
+ const content = token?.content;
101
+ const id = typeof token?.id === 'number' ? token.id : parseInt(token?.id, 10);
102
+ if (!Number.isFinite(id) || !content) continue;
103
+ if (!vocab.has(content)) {
104
+ vocab.set(content, id);
105
+ reverseVocab.set(id, content);
106
+ }
107
+ if (id > maxId) maxId = id;
108
+ if (content.length > 1) {
109
+ patterns.push({ content, id });
110
+ }
111
+ if (token.special) {
112
+ specialTokenIds.add(id);
113
+ if (derivedSpecialTokens) {
114
+ if (derivedSpecialTokens.bos == null && (content === '<bos>' || content === '<s>' || content.includes('bos'))) {
115
+ derivedSpecialTokens.bos = id;
116
+ } else if (derivedSpecialTokens.eos == null && (content === '<eos>' || content === '</s>' || content.includes('eos'))) {
117
+ derivedSpecialTokens.eos = id;
118
+ } else if (derivedSpecialTokens.pad == null && (content === '<pad>' || content.includes('pad'))) {
119
+ derivedSpecialTokens.pad = id;
120
+ } else if (derivedSpecialTokens.unk == null && (content === '<unk>' || content.includes('unk'))) {
121
+ derivedSpecialTokens.unk = id;
122
+ }
123
+ }
124
+ }
125
+ }
126
+ return maxId;
127
+ }
128
+
67
129
 
68
130
  export class TransformersTokenizer extends BaseTokenizer {
69
131
 
@@ -156,6 +218,10 @@ export class BundledTokenizer extends BaseTokenizer {
156
218
 
157
219
  #byteDecoder = null;
158
220
 
221
+ #byteEncoder = null;
222
+
223
+ #useByteLevelEncoding = false;
224
+
159
225
 
160
226
  constructor(config = {}) {
161
227
  // BundledTokenizer gets vocabSize from load(), so defer validation
@@ -164,6 +230,25 @@ export class BundledTokenizer extends BaseTokenizer {
164
230
  });
165
231
  }
166
232
 
233
+ #resetState() {
234
+ this.#vocab.clear();
235
+ this.#reverseVocab.clear();
236
+ this.#merges = [];
237
+ this.#mergeRanks.clear();
238
+ this.#scores = [];
239
+ this.#tokenTypes = [];
240
+ this.#type = null;
241
+ this.#byteTokens.clear();
242
+ this.#specialTokenPatterns = [];
243
+ this.#specialTokenIds = new Set();
244
+ this.#addSpacePrefix = true;
245
+ this.#spacePrefixChar = '▁';
246
+ this.#byteDecoder = null;
247
+ this.#byteEncoder = null;
248
+ this.#useByteLevelEncoding = false;
249
+ this.vocabSize = 0;
250
+ }
251
+
167
252
 
168
253
  isSpecialToken(tokenId) {
169
254
  if (this.#specialTokenIds.size > 0) {
@@ -199,13 +284,25 @@ export class BundledTokenizer extends BaseTokenizer {
199
284
  }
200
285
 
201
286
  this.#byteDecoder = new Map();
287
+ this.#byteEncoder = new Map();
202
288
  for (let i = 0; i < base.length; i++) {
203
289
  this.#byteDecoder.set(String.fromCodePoint(chars[i]), base[i]);
290
+ this.#byteEncoder.set(base[i], String.fromCodePoint(chars[i]));
204
291
  }
205
292
  }
206
293
 
294
+ #encodeByteLevelText(text) {
295
+ const bytes = new TextEncoder().encode(text);
296
+ let out = '';
297
+ for (const byte of bytes) {
298
+ out += this.#byteEncoder?.get(byte) ?? String.fromCharCode(byte);
299
+ }
300
+ return out;
301
+ }
302
+
207
303
 
208
304
  load(tokenizerJson) {
305
+ this.#resetState();
209
306
  // Detect format: HuggingFace has model.vocab, bundled has top-level vocab
210
307
  const isHuggingFace = 'model' in tokenizerJson && tokenizerJson.model?.vocab !== undefined;
211
308
 
@@ -290,30 +387,16 @@ export class BundledTokenizer extends BaseTokenizer {
290
387
  eos: null,
291
388
  unk: null,
292
389
  };
293
- for (const token of addedTokens) {
294
- const content = token.content;
295
- const id = typeof token.id === 'number' ? token.id : parseInt( (token.id), 10);
296
- if (!Number.isFinite(id) || !content) continue;
297
- if (!this.#vocab.has(content)) {
298
- this.#vocab.set(content, id);
299
- this.#reverseVocab.set(id, content);
300
- }
301
- if (id > maxId) maxId = id;
302
- if (token.special) {
303
- specialTokenIds.add(id);
304
- if (content.length > 1) {
305
- specialTokenPatterns.push({ content, id });
306
- }
307
- if (derivedSpecialTokens.bos == null && (content === '<bos>' || content === '<s>' || content.includes('bos'))) {
308
- derivedSpecialTokens.bos = id;
309
- } else if (derivedSpecialTokens.eos == null && (content === '<eos>' || content === '</s>' || content.includes('eos'))) {
310
- derivedSpecialTokens.eos = id;
311
- } else if (derivedSpecialTokens.pad == null && (content === '<pad>' || content.includes('pad'))) {
312
- derivedSpecialTokens.pad = id;
313
- } else if (derivedSpecialTokens.unk == null && (content === '<unk>' || content.includes('unk'))) {
314
- derivedSpecialTokens.unk = id;
315
- }
316
- }
390
+ const addedMaxId = registerAddedTokens(
391
+ addedTokens,
392
+ this.#vocab,
393
+ this.#reverseVocab,
394
+ specialTokenPatterns,
395
+ specialTokenIds,
396
+ derivedSpecialTokens
397
+ );
398
+ if (addedMaxId > maxId) {
399
+ maxId = addedMaxId;
317
400
  }
318
401
 
319
402
  const specialTokensRaw = hf.special_tokens_map || hf.specialTokens || hf.special_tokens || null;
@@ -351,6 +434,7 @@ export class BundledTokenizer extends BaseTokenizer {
351
434
 
352
435
  // Handle behavior flags (use HF config if present, else runtime defaults)
353
436
  const runtimeDefaults = getRuntimeConfig().inference.tokenizer;
437
+ const byteLevelPretokenizer = resolveByteLevelPretokenizerConfig(hf.pre_tokenizer);
354
438
  const configuredAddBosToken = this.addBosToken;
355
439
  const configuredAddEosToken = this.addEosToken;
356
440
  this.addBosToken =
@@ -378,9 +462,16 @@ export class BundledTokenizer extends BaseTokenizer {
378
462
  // - runtime config addSpacePrefix (user override or null for auto-detect)
379
463
  const decoderPrepend = hf.decoder?.prepend_scheme === 'always' || hf.decoder?.add_prefix_space === true;
380
464
  const normalizerPrepend = hf.normalizer?.prepend_scheme === 'always' || hf.normalizer?.add_prefix_space === true;
465
+ this.#useByteLevelEncoding = byteLevelPretokenizer.useByteLevel;
381
466
  const runtimeSpacePrefix = runtimeDefaults.addSpacePrefix;
382
467
  // Use explicit runtime config if set (non-null), otherwise auto-detect from tokenizer.json
383
- this.#addSpacePrefix = runtimeSpacePrefix ?? model.add_prefix_space ?? model.add_dummy_prefix ?? decoderPrepend ?? normalizerPrepend ?? false;
468
+ this.#addSpacePrefix = runtimeSpacePrefix
469
+ ?? byteLevelPretokenizer.addPrefixSpace
470
+ ?? model.add_prefix_space
471
+ ?? model.add_dummy_prefix
472
+ ?? decoderPrepend
473
+ ?? normalizerPrepend
474
+ ?? false;
384
475
  log.debug('Tokenizer', `addSpacePrefix=${this.#addSpacePrefix} (runtime=${runtimeSpacePrefix}, model=${model.add_prefix_space ?? model.add_dummy_prefix}, decoder=${decoderPrepend}, normalizer=${normalizerPrepend})`);
385
476
 
386
477
  // Detect space prefix style by checking which WORD tokens exist in vocab
@@ -469,11 +560,47 @@ export class BundledTokenizer extends BaseTokenizer {
469
560
  this.#tokenTypes = tokenizerJson.tokenTypes;
470
561
  }
471
562
 
563
+ let maxId = -1;
564
+ for (const id of this.#vocab.values()) {
565
+ if (Number.isFinite(id) && id > maxId) {
566
+ maxId = id;
567
+ }
568
+ }
569
+
570
+ const addedTokens = Array.isArray(tokenizerJson.added_tokens) ? tokenizerJson.added_tokens : [];
571
+ const tokenPatterns = [];
572
+ const specialTokenIds = new Set();
573
+ const derivedSpecialTokens = {
574
+ pad: null,
575
+ bos: null,
576
+ eos: null,
577
+ unk: null,
578
+ };
579
+ const addedMaxId = registerAddedTokens(
580
+ addedTokens,
581
+ this.#vocab,
582
+ this.#reverseVocab,
583
+ tokenPatterns,
584
+ specialTokenIds,
585
+ derivedSpecialTokens
586
+ );
587
+ if (addedMaxId > maxId) {
588
+ maxId = addedMaxId;
589
+ }
590
+
472
591
  // Set special tokens - support both camelCase and snake_case formats
473
592
  const specialTokensRaw = (tokenizerJson.specialTokens || (tokenizerJson).special_tokens);
474
- this.specialTokens = resolveSpecialTokens(specialTokensRaw, this.specialTokens, this.#vocab);
593
+ this.specialTokens = resolveSpecialTokens(
594
+ specialTokensRaw,
595
+ {
596
+ ...derivedSpecialTokens,
597
+ ...this.specialTokens,
598
+ },
599
+ this.#vocab
600
+ );
475
601
  log.debug('Tokenizer', `Special tokens: BOS=${this.specialTokens.bos}, EOS=${this.specialTokens.eos}`);
476
- this.#specialTokenIds = new Set();
602
+ this.#specialTokenIds = specialTokenIds;
603
+ this.#specialTokenPatterns = tokenPatterns;
477
604
  const builtinSpecials = [
478
605
  this.specialTokens.pad,
479
606
  this.specialTokens.bos,
@@ -485,8 +612,13 @@ export class BundledTokenizer extends BaseTokenizer {
485
612
  this.#specialTokenIds.add(id);
486
613
  }
487
614
  }
615
+ this.#specialTokenPatterns.sort((a, b) => b.content.length - a.content.length);
616
+ if (maxId >= 0) {
617
+ this.vocabSize = Math.max(this.vocabSize, maxId + 1);
618
+ }
488
619
 
489
620
  const runtimeDefaults = getRuntimeConfig().inference.tokenizer;
621
+ const byteLevelPretokenizer = resolveByteLevelPretokenizerConfig(tokenizerJson.pre_tokenizer);
490
622
  const configuredAddBosToken = this.addBosToken;
491
623
  const configuredAddEosToken = this.addEosToken;
492
624
  this.addBosToken =
@@ -505,9 +637,11 @@ export class BundledTokenizer extends BaseTokenizer {
505
637
  if (this.addEosToken && this.specialTokens.eos == null) {
506
638
  throw new Error('[Tokenizer] addEosToken is enabled but eos token is missing.');
507
639
  }
640
+ this.#useByteLevelEncoding = byteLevelPretokenizer.useByteLevel;
508
641
  // NOTE: Default to FALSE - first word shouldn't get space prefix
509
642
  // Space prefixes are only for words that follow a space in original text
510
- this.#addSpacePrefix = tokenizerJson.addSpacePrefix === true;
643
+ this.#addSpacePrefix = tokenizerJson.addSpacePrefix === true
644
+ || byteLevelPretokenizer.addPrefixSpace === true;
511
645
 
512
646
  // Detect space prefix style based on vocab tokens
513
647
  // GPT-style uses 'Ġ' (U+0120), SentencePiece uses '▁' (U+2581)
@@ -548,7 +682,8 @@ export class BundledTokenizer extends BaseTokenizer {
548
682
  ids.push(this.specialTokens.bos);
549
683
  }
550
684
 
551
- // Split text around special tokens and tokenize each segment
685
+ // Split text around literal added tokens and special tokens, then tokenize
686
+ // the remaining plain-text segments normally.
552
687
  const segments = this.#splitOnSpecialTokens(text);
553
688
  for (const seg of segments) {
554
689
  if (seg.isSpecial && seg.id !== undefined) {
@@ -690,11 +825,19 @@ export class BundledTokenizer extends BaseTokenizer {
690
825
  if (text.length === 0) return [];
691
826
 
692
827
  let normalized = text;
693
- if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
694
- normalized = ` ${normalized}`;
828
+ let prefixed;
829
+ if (this.#useByteLevelEncoding) {
830
+ if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
831
+ normalized = ` ${normalized}`;
832
+ }
833
+ prefixed = this.#encodeByteLevelText(normalized);
834
+ } else {
835
+ if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
836
+ normalized = ` ${normalized}`;
837
+ }
838
+ const sp = this.#spacePrefixChar;
839
+ prefixed = normalized.replace(/ /g, sp);
695
840
  }
696
- const sp = this.#spacePrefixChar;
697
- const prefixed = normalized.replace(/ /g, sp);
698
841
 
699
842
  if (this.#mergeRanks.size === 0) {
700
843
  return this.#encodeBPEGreedy(prefixed);
@@ -31,8 +31,18 @@ export class SentencePieceTokenizer extends BaseTokenizer {
31
31
  });
32
32
  }
33
33
 
34
+ #resetState() {
35
+ this.#modelData = null;
36
+ this.#pieces.clear();
37
+ this.#reverseVocab.clear();
38
+ this.#algorithm = 'unigram';
39
+ this.#byteTokens.clear();
40
+ this.vocabSize = 0;
41
+ }
42
+
34
43
 
35
44
  async load(modelData) {
45
+ this.#resetState();
36
46
  this.#modelData = modelData;
37
47
 
38
48
  try {
@@ -42,6 +52,8 @@ export class SentencePieceTokenizer extends BaseTokenizer {
42
52
  } catch (err) {
43
53
  const message = err instanceof Error ? err.message : String(err);
44
54
  log.warn('Tokenizer', `Failed to parse model, using byte fallback: ${message}`);
55
+ this.#resetState();
56
+ this.#modelData = modelData;
45
57
  this.#initByteFallback();
46
58
  }
47
59
  }
@@ -9,7 +9,7 @@ import {
9
9
  verifyIntegrity,
10
10
  loadManifestFromStore,
11
11
  } from '../storage/shard-manager.js';
12
- import { parseManifest } from '../formats/rdrr/index.js';
12
+ import { clearManifest, parseManifest, setManifest as setCurrentManifest } from '../formats/rdrr/index.js';
13
13
  import { initDevice, getDevice, getKernelCapabilities } from '../gpu/device.js';
14
14
  import { acquireBuffer, releaseBuffer, forceBufferPoolReclaim } from '../memory/buffer-pool.js';
15
15
  import { getExpertCache } from './experts/expert-cache.js';
@@ -50,6 +50,10 @@ function hasExpertGroups(manifest) {
50
50
  return Object.keys(manifest.groups).some((groupId) => groupId.includes('.expert.'));
51
51
  }
52
52
 
53
+ function isGpuBufferInstance(value) {
54
+ return typeof GPUBuffer !== 'undefined' && value instanceof GPUBuffer;
55
+ }
56
+
53
57
  // Re-export types for backward compatibility
54
58
  export {
55
59
  // Types are in .d.ts file
@@ -252,6 +256,7 @@ export class DopplerLoader {
252
256
 
253
257
  setManifest(manifest) {
254
258
  this.manifest = manifest;
259
+ setCurrentManifest(manifest);
255
260
  const moeConfig = manifest.moeConfig;
256
261
  this.isMoE = moeConfig != null && (moeConfig.numExperts ?? 0) > 1;
257
262
  if (!this.isMoE && hasExpertGroups(manifest)) {
@@ -259,6 +264,7 @@ export class DopplerLoader {
259
264
  `Manifest "${manifest.modelId ?? 'unknown'}" missing moeConfig for MoE model. Re-convert with moeConfig.`
260
265
  );
261
266
  }
267
+ this.shardCache.setManifest(this.manifest);
262
268
  this.shardCache.configureForModel(this.manifest, this.shardCache.hasCustomLoader);
263
269
  debugTrace.loader('Manifest set externally');
264
270
  }
@@ -679,7 +685,7 @@ export class DopplerLoader {
679
685
  const device = getDevice();
680
686
  if (!device) {
681
687
  log.warn('Loader', 'GPU device not available; falling back to CPU');
682
- if (shardData instanceof GPUBuffer) {
688
+ if (isGpuBufferInstance(shardData)) {
683
689
  releaseBuffer(shardData);
684
690
  shardData = await this.#assembleShardData(location, name);
685
691
  }
@@ -708,7 +714,7 @@ export class DopplerLoader {
708
714
  return result.data;
709
715
  }
710
716
 
711
- if (shardData instanceof GPUBuffer) {
717
+ if (isGpuBufferInstance(shardData)) {
712
718
  // Shouldn't happen (streaming is only used for toGPU), but keep this leak-proof.
713
719
  releaseBuffer(shardData);
714
720
  shardData = await this.#assembleShardData(location, name);
@@ -751,31 +757,40 @@ export class DopplerLoader {
751
757
  // queue.writeBuffer requires 4-byte aligned sizes; we pad the buffer.
752
758
  const alignedSize = Math.ceil(location.size / 4) * 4;
753
759
  const raw = acquireBuffer(alignedSize, undefined, `raw_${name}`);
760
+ let complete = false;
754
761
 
755
- let dstOffset = 0;
756
- const uploadChunk = (bytes) => {
757
- device.queue.writeBuffer(raw, dstOffset, bytes, bytes.byteOffset, bytes.byteLength);
758
- dstOffset += bytes.byteLength;
759
- };
760
- const streamRange = (idx, offset, length) => this.shardCache.streamRange(idx, offset, length, { chunkBytes });
762
+ try {
763
+ let dstOffset = 0;
764
+ const uploadChunk = (bytes) => {
765
+ device.queue.writeBuffer(raw, dstOffset, bytes, bytes.byteOffset, bytes.byteLength);
766
+ dstOffset += bytes.byteLength;
767
+ };
768
+ const streamRange = (idx, offset, length) => this.shardCache.streamRange(idx, offset, length, { chunkBytes });
761
769
 
762
- if (location.spans) {
763
- for (const span of location.spans) {
764
- for await (const chunk of streamRange(span.shardIndex, span.offset, span.size)) {
770
+ if (location.spans) {
771
+ for (const span of location.spans) {
772
+ for await (const chunk of streamRange(span.shardIndex, span.offset, span.size)) {
773
+ uploadChunk(chunk);
774
+ }
775
+ }
776
+ } else {
777
+ for await (const chunk of streamRange(location.shardIndex, location.offset, location.size)) {
765
778
  uploadChunk(chunk);
766
779
  }
767
780
  }
768
- } else {
769
- for await (const chunk of streamRange(location.shardIndex, location.offset, location.size)) {
770
- uploadChunk(chunk);
771
- }
772
- }
773
781
 
774
- if (dstOffset < location.size) {
775
- log.warn('Loader', `Stream upload short read for "${name}": got=${dstOffset}, expected=${location.size}`);
782
+ if (dstOffset !== location.size) {
783
+ throw new Error(
784
+ `Stream upload short read for "${name}": got=${dstOffset}, expected=${location.size}.`
785
+ );
786
+ }
787
+ complete = true;
788
+ return raw;
789
+ } finally {
790
+ if (!complete) {
791
+ releaseBuffer(raw);
792
+ }
776
793
  }
777
-
778
- return raw;
779
794
  }
780
795
 
781
796
 
@@ -950,7 +965,7 @@ export class DopplerLoader {
950
965
  if (!value) return;
951
966
  const gpuBuffer = isWeightBuffer(value)
952
967
  ? value.buffer
953
- : (value instanceof GPUBuffer ? value : null);
968
+ : (isGpuBufferInstance(value) ? value : null);
954
969
  if (!gpuBuffer) return;
955
970
  try {
956
971
  releaseBuffer(gpuBuffer);
@@ -990,6 +1005,7 @@ export class DopplerLoader {
990
1005
  this.lmHead = null;
991
1006
  this.finalNorm = null;
992
1007
  this.manifest = null;
1008
+ clearManifest();
993
1009
  this.modelId = null;
994
1010
  this.loadedShards.clear();
995
1011
  this.isLoaded = false;
@@ -1,7 +1,4 @@
1
1
 
2
-
3
- import { getDevice } from '../gpu/device.js';
4
- import { isTraceEnabled, log, trace as debugTrace } from '../debug/index.js';
5
2
  import { selectRuleValue } from '../rules/rule-registry.js';
6
3
  import { tagBufferDtype } from '../gpu/weight-buffer.js';
7
4
 
@@ -26,46 +23,8 @@ export function f16ToF32(h) {
26
23
 
27
24
 
28
25
  export async function convertBF16ToF32GPU(srcBuffer, numElements, name) {
29
- debugTrace.loader(`[BF16->F32] Importing cast.js...`);
30
- const castModule = await import('../gpu/kernels/cast.js');
31
- debugTrace.loader(`[BF16->F32] castModule keys:`, Object.keys(castModule));
32
- const { runBF16ToF32 } = castModule;
33
- debugTrace.loader(`[BF16->F32] runBF16ToF32 type: ${typeof runBF16ToF32}`);
26
+ const { runBF16ToF32 } = await import('../gpu/kernels/cast.js');
34
27
  const resultTensor = await runBF16ToF32(srcBuffer, [numElements], name);
35
- debugTrace.loader(`[BF16->F32] runBF16ToF32 returned, result.size=${resultTensor.buffer?.size}`);
36
-
37
- // Debug: Verify conversion produced non-zero values
38
- const shouldCheckEmbed = isTraceEnabled('loader') &&
39
- name.includes('embed') &&
40
- name.includes('embed_tokens');
41
- if (shouldCheckEmbed) {
42
- try {
43
- debugTrace.loader(`[BF16->F32] Checking embed buffer for non-zeros...`);
44
- const device = getDevice();
45
- const sampleSize = Math.min(1024, resultTensor.buffer.size);
46
- debugTrace.loader(`[BF16->F32] Creating staging buffer size=${sampleSize}`);
47
- const stagingBuffer = device.createBuffer({
48
- size: sampleSize,
49
- usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
50
- });
51
- debugTrace.loader(`[BF16->F32] Copying to staging buffer...`);
52
- const encoder = device.createCommandEncoder();
53
- encoder.copyBufferToBuffer(resultTensor.buffer, 0, stagingBuffer, 0, sampleSize);
54
- device.queue.submit([encoder.finish()]);
55
- debugTrace.loader(`[BF16->F32] Mapping staging buffer...`);
56
- await stagingBuffer.mapAsync(GPUMapMode.READ);
57
- debugTrace.loader(`[BF16->F32] Reading data...`);
58
- const data = new Float32Array(stagingBuffer.getMappedRange().slice(0));
59
- stagingBuffer.unmap();
60
- stagingBuffer.destroy();
61
- const nonZero = Array.from(data).filter(x => x !== 0);
62
- const nanCount = data.filter(x => !Number.isFinite(x)).length;
63
- debugTrace.loader(`[BF16->F32] nonZero=${nonZero.length}/${data.length}, nan=${nanCount}, sample=[${nonZero.slice(0, 5).map(x => x.toFixed(4)).join(', ')}]`);
64
- } catch (err) {
65
- log.error('Loader', 'BF16->F32 embed buffer check error:', (err).message);
66
- }
67
- }
68
-
69
28
  return resultTensor.buffer;
70
29
  }
71
30
 
@@ -84,11 +43,11 @@ function normalizeBufferDtype(locationDtype, outputDtype) {
84
43
  if (explicit) {
85
44
  return explicit;
86
45
  }
87
- const location = typeof locationDtype === 'string' ? locationDtype.toLowerCase() : null;
46
+ const location = typeof locationDtype === 'string' ? locationDtype.toUpperCase() : null;
88
47
  if (!location) {
89
48
  return null;
90
49
  }
91
- return selectRuleValue('loader', 'weights', 'floatLocationDtype', { locationDtype: locationDtype });
50
+ return selectRuleValue('loader', 'weights', 'floatLocationDtype', { locationDtype: location });
92
51
  }
93
52
 
94
53
  export function applyBufferLayout(buffer, location, outputDtype = null) {
@@ -23,6 +23,10 @@ import { releaseBuffer } from '../memory/buffer-pool.js';
23
23
  const EMBEDDING_ROLE = 'embedding';
24
24
  const EMBEDDING_GROUP = 'embed';
25
25
 
26
+ function isGpuBufferInstance(value) {
27
+ return typeof GPUBuffer !== 'undefined' && value instanceof GPUBuffer;
28
+ }
29
+
26
30
  // ============================================================================
27
31
  // Main Function
28
32
  // ============================================================================
@@ -59,7 +63,7 @@ export async function loadEmbeddings(ctx) {
59
63
  }
60
64
 
61
65
  // Handle valid tensor types
62
- if (tensor instanceof GPUBuffer || isWeightBuffer(tensor) || tensor instanceof Float32Array) {
66
+ if (isGpuBufferInstance(tensor) || isWeightBuffer(tensor) || tensor instanceof Float32Array) {
63
67
  const result = await processEmbeddingTensor(ctx, tensor, name, loc, shouldStream);
64
68
  if (result) {
65
69
  return result;
@@ -107,7 +111,7 @@ async function processEmbeddingTensor(ctx, tensor, name, loc, shouldStream) {
107
111
  }
108
112
 
109
113
  // Raw GPUBuffer - wrap with dtype/layout metadata
110
- if (promoted instanceof GPUBuffer && loc?.shape && loc.shape.length === 2) {
114
+ if (isGpuBufferInstance(promoted) && loc?.shape && loc.shape.length === 2) {
111
115
  const layout = ctx.resolveWeightLayout(loc);
112
116
 
113
117
  const dtype = selectRuleValue('loader', 'weights', 'floatLocationDtype', {
@@ -140,7 +144,7 @@ async function maybePromoteEmbeddingsToF32(ctx, current, name, loc) {
140
144
  return wrapped;
141
145
  }
142
146
 
143
- if (!(current instanceof GPUBuffer)) return current;
147
+ if (!isGpuBufferInstance(current)) return current;
144
148
 
145
149
  const sourceDtype = selectRuleValue('loader', 'weights', 'floatLocationDtype', {
146
150
  locationDtype: loc?.dtype,
@@ -3,6 +3,11 @@
3
3
  import { releaseBuffer } from '../../memory/buffer-pool.js';
4
4
  import { log, trace } from '../../debug/index.js';
5
5
  import { getRuntimeConfig } from '../../config/runtime.js';
6
+ import { isWeightBuffer } from '../../gpu/weight-buffer.js';
7
+
8
+ function isGpuBufferInstance(value) {
9
+ return typeof GPUBuffer !== 'undefined' && value instanceof GPUBuffer;
10
+ }
6
11
 
7
12
 
8
13
 
@@ -256,12 +261,14 @@ export class ExpertCache {
256
261
  ];
257
262
 
258
263
  for (const buf of buffers) {
259
- if (buf instanceof GPUBuffer) {
260
- try {
261
- releaseBuffer(buf);
262
- } catch (e) {
263
- // Buffer may already be released
264
- }
264
+ const gpuBuffer = isWeightBuffer(buf)
265
+ ? buf.buffer
266
+ : (isGpuBufferInstance(buf) ? buf : null);
267
+ if (!gpuBuffer) continue;
268
+ try {
269
+ releaseBuffer(gpuBuffer);
270
+ } catch (e) {
271
+ // Buffer may already be released
265
272
  }
266
273
  }
267
274
  }
@@ -18,7 +18,7 @@ import { releaseBuffer } from '../../memory/buffer-pool.js';
18
18
 
19
19
  export async function preloadShardsForExpert(ctx, layerIdx, expertIdx, options) {
20
20
  // Get required shards from manifest mapping
21
- const shardIndices = getShardsForExpert(layerIdx, expertIdx);
21
+ const shardIndices = getShardsForExpert(layerIdx, expertIdx, ctx.manifest);
22
22
  if (shardIndices.length === 0) {
23
23
  // No mapping available, fall back to loading all shards on demand
24
24
  return;
@@ -69,6 +69,10 @@ export function predictNextLayerExperts(currentExperts) {
69
69
  return currentExperts;
70
70
  }
71
71
 
72
+ function isGpuBufferInstance(value) {
73
+ return typeof GPUBuffer !== 'undefined' && value instanceof GPUBuffer;
74
+ }
75
+
72
76
  // ============================================================================
73
77
  // Expert Loading
74
78
  // ============================================================================
@@ -95,7 +99,7 @@ export async function loadExpert(ctx, layerIdx, expertIdx) {
95
99
  await preloadShardsForExpert(ctx, layerIdx, expertIdx);
96
100
 
97
101
  // Get tensor names from manifest if available (for logging/debugging)
98
- const tensorNames = getTensorsForExpert(layerIdx, expertIdx);
102
+ const tensorNames = getTensorsForExpert(layerIdx, expertIdx, ctx.manifest);
99
103
  if (tensorNames.length > 0) {
100
104
  debugTrace.loader(`Expert ${layerIdx}_${expertIdx} tensors: ${tensorNames.length}`);
101
105
  }
@@ -260,7 +264,7 @@ function getGpuBuffer(value) {
260
264
  if (isWeightBuffer(value)) {
261
265
  return value.buffer;
262
266
  }
263
- if (value instanceof GPUBuffer) {
267
+ if (isGpuBufferInstance(value)) {
264
268
  return value;
265
269
  }
266
270
  return null;
@@ -342,7 +346,7 @@ async function downcastExpertWeights(ctx, weights) {
342
346
  if (!buf) continue;
343
347
 
344
348
  // Only downcast GPUBuffer or WeightBuffer (not Float32Array)
345
- if (!(buf instanceof GPUBuffer) && !isWeightBuffer(buf)) {
349
+ if (!isGpuBufferInstance(buf) && !isWeightBuffer(buf)) {
346
350
  continue;
347
351
  }
348
352
 
@@ -369,13 +373,13 @@ function calculateExpertSize(weights) {
369
373
  const buf = weights[k];
370
374
  if (isWeightBuffer(buf)) {
371
375
  sizeBytes += buf.buffer.size;
372
- } else if (buf instanceof GPUBuffer) {
376
+ } else if (isGpuBufferInstance(buf)) {
373
377
  sizeBytes += buf.size;
374
378
  }
375
379
  }
376
380
 
377
381
  // Use manifest-provided expert size if available, otherwise use calculated
378
- const manifestBytes = getExpertBytes();
382
+ const manifestBytes = getExpertBytes(ctx.manifest);
379
383
  if (manifestBytes > 0) {
380
384
  sizeBytes = manifestBytes;
381
385
  }