@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -175,103 +175,103 @@ export async function doConv(
175
175
  }
176
176
 
177
177
  // Use the first 2x hidden projection channels as a gated conv-state projection.
178
- const inProj = await doMatmul(
179
- inputTensor,
180
- convInProj,
181
- numTokens,
182
- hiddenSize * 2,
183
- hiddenSize,
184
- {
185
- transposeB: 'auto',
186
- label: `${label}.in_proj`,
187
- layerIdx,
188
- kernelPath,
189
- role: 'conv_in_proj',
190
- },
191
- recorder
192
- );
193
- const activated = await doSiLURowSplit(inProj, {
194
- numTokens,
195
- dim: hiddenSize,
196
- activation: 'silu',
197
- swigluLimit: options.swigluLimit ?? null,
198
- label: `${label}.activation`,
199
- layerIdx,
200
- }, recorder);
201
-
202
- if (recorder) {
203
- recorder.trackTemporaryBuffer(inProj.buffer);
204
- } else {
205
- releaseBuffer(inProj.buffer);
206
- }
207
-
208
- // Optional generic conv2d stage when explicit shape metadata is provided.
209
- // LFM2 depthwise conv kernels use model-specific packing, so this path is best-effort only.
210
- let convInput = activated;
211
- if (convKernel && options.conv2d && options.conv2d.enabled === true) {
212
- const convTensorInput = createTensor(activated.buffer, activated.dtype, [
213
- options.conv2d.inChannels,
214
- options.conv2d.height,
215
- options.conv2d.width,
216
- ], `${label}.conv_input`);
217
- const convOptions = {
218
- inChannels: options.conv2d.inChannels,
219
- outChannels: options.conv2d.outChannels,
220
- height: options.conv2d.height,
221
- width: options.conv2d.width,
222
- kernelH: options.conv2d.kernelH,
223
- kernelW: options.conv2d.kernelW,
224
- stride: options.conv2d.stride ?? 1,
225
- pad: options.conv2d.pad ?? 0,
226
- };
227
- const convResult = recorder
228
- ? await recordConv2D(recorder, convTensorInput, convKernel, null, convOptions)
229
- : await runConv2D(convTensorInput, convKernel, null, convOptions);
230
- convInput = createTensor(
231
- convResult.buffer,
232
- convResult.dtype,
233
- [numTokens, hiddenSize],
234
- `${label}.conv_output`
178
+ let inProj = null;
179
+ let activated = null;
180
+ let convInput = null;
181
+ let outProj = null;
182
+ try {
183
+ inProj = await doMatmul(
184
+ inputTensor,
185
+ convInProj,
186
+ numTokens,
187
+ hiddenSize * 2,
188
+ hiddenSize,
189
+ {
190
+ transposeB: 'auto',
191
+ label: `${label}.in_proj`,
192
+ layerIdx,
193
+ kernelPath,
194
+ role: 'conv_in_proj',
195
+ },
196
+ recorder
235
197
  );
236
- if (recorder) {
237
- recorder.trackTemporaryBuffer(activated.buffer);
238
- } else {
239
- releaseBuffer(activated.buffer);
198
+ activated = await doSiLURowSplit(inProj, {
199
+ numTokens,
200
+ dim: hiddenSize,
201
+ activation: 'silu',
202
+ swigluLimit: options.swigluLimit ?? null,
203
+ label: `${label}.activation`,
204
+ layerIdx,
205
+ }, recorder);
206
+
207
+ releaseOrTrack(recorder, inProj.buffer);
208
+ inProj = null;
209
+
210
+ convInput = activated;
211
+ if (convKernel && options.conv2d && options.conv2d.enabled === true) {
212
+ const convTensorInput = createTensor(activated.buffer, activated.dtype, [
213
+ options.conv2d.inChannels,
214
+ options.conv2d.height,
215
+ options.conv2d.width,
216
+ ], `${label}.conv_input`);
217
+ const convOptions = {
218
+ inChannels: options.conv2d.inChannels,
219
+ outChannels: options.conv2d.outChannels,
220
+ height: options.conv2d.height,
221
+ width: options.conv2d.width,
222
+ kernelH: options.conv2d.kernelH,
223
+ kernelW: options.conv2d.kernelW,
224
+ stride: options.conv2d.stride ?? 1,
225
+ pad: options.conv2d.pad ?? 0,
226
+ };
227
+ const convResult = recorder
228
+ ? await recordConv2D(recorder, convTensorInput, convKernel, null, convOptions)
229
+ : await runConv2D(convTensorInput, convKernel, null, convOptions);
230
+ convInput = createTensor(
231
+ convResult.buffer,
232
+ convResult.dtype,
233
+ [numTokens, hiddenSize],
234
+ `${label}.conv_output`
235
+ );
236
+ releaseOrTrack(recorder, activated.buffer);
237
+ activated = null;
240
238
  }
241
- }
242
239
 
243
- const outProj = await doMatmul(
244
- convInput,
245
- convOutProj,
246
- numTokens,
247
- hiddenSize,
248
- hiddenSize,
249
- {
250
- transposeB: 'auto',
251
- label: `${label}.out_proj`,
252
- layerIdx,
253
- kernelPath,
254
- role: 'conv_out_proj',
255
- },
256
- recorder
257
- );
240
+ outProj = await doMatmul(
241
+ convInput,
242
+ convOutProj,
243
+ numTokens,
244
+ hiddenSize,
245
+ hiddenSize,
246
+ {
247
+ transposeB: 'auto',
248
+ label: `${label}.out_proj`,
249
+ layerIdx,
250
+ kernelPath,
251
+ role: 'conv_out_proj',
252
+ },
253
+ recorder
254
+ );
258
255
 
259
- if (convInput.buffer !== activated.buffer) {
260
- if (recorder) {
261
- recorder.trackTemporaryBuffer(convInput.buffer);
262
- } else {
263
- releaseBuffer(convInput.buffer);
256
+ if (convInput && (!activated || convInput.buffer !== activated.buffer)) {
257
+ releaseOrTrack(recorder, convInput.buffer);
258
+ convInput = null;
259
+ } else if (activated) {
260
+ releaseOrTrack(recorder, activated.buffer);
261
+ activated = null;
264
262
  }
265
- } else if (recorder) {
266
- recorder.trackTemporaryBuffer(activated.buffer);
267
- } else {
268
- releaseBuffer(activated.buffer);
269
- }
270
263
 
271
- if (kernelTrace.enabled && !recorder) {
272
- await traceStep('conv', label, layerIdx, outProj.buffer, [numTokens, hiddenSize]);
264
+ if (kernelTrace.enabled && !recorder) {
265
+ await traceStep('conv', label, layerIdx, outProj.buffer, [numTokens, hiddenSize]);
266
+ }
267
+ return outProj;
268
+ } catch (error) {
269
+ if (outProj) releaseOrTrack(recorder, outProj.buffer);
270
+ if (convInput && (!activated || convInput.buffer !== activated.buffer)) releaseOrTrack(recorder, convInput.buffer);
271
+ if (activated) releaseOrTrack(recorder, activated.buffer);
272
+ if (inProj) releaseOrTrack(recorder, inProj.buffer);
273
+ throw error;
273
274
  }
274
- return outProj;
275
275
  }
276
276
 
277
277
  export async function doCast(input, toDtype, recorder) {
@@ -4,6 +4,7 @@ import { trace } from '../../../debug/index.js';
4
4
  import { getDevice } from '../../../gpu/device.js';
5
5
  import { allowReadback } from '../../../gpu/perf-guards.js';
6
6
  import { f16ToF32 } from '../../../loader/dtype-utils.js';
7
+ import { readBufferSlice } from '../../../memory/buffer-pool.js';
7
8
 
8
9
 
9
10
  const STAGE_DEFAULT_CATEGORY = {
@@ -11,6 +12,11 @@ const STAGE_DEFAULT_CATEGORY = {
11
12
  // Attention stages (per-layer)
12
13
  attn_input: 'attn',
13
14
  attn_normed: 'attn',
15
+ linear_qkv_proj: 'attn',
16
+ linear_z_proj: 'attn',
17
+ linear_a_proj: 'attn',
18
+ linear_b_proj: 'attn',
19
+ linear_core_out: 'attn',
14
20
  q_proj: 'attn',
15
21
  k_proj: 'attn',
16
22
  v_proj: 'attn',
@@ -139,22 +145,16 @@ export async function runProbes(stage, buffer, options) {
139
145
  const alignedOffset = Math.floor(byteOffset / 4) * 4;
140
146
  const offsetWithinRead = byteOffset - alignedOffset;
141
147
  const readSize = 4; // Always read 4 bytes (aligned)
142
- const staging = (device).createBuffer({ size: readSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ });
143
- const enc = (device).createCommandEncoder();
144
- enc.copyBufferToBuffer( (buffer), alignedOffset, staging, 0, readSize);
145
- (device).queue.submit([enc.finish()]);
146
- await staging.mapAsync(GPUMapMode.READ);
148
+ const readback = await readBufferSlice(buffer, alignedOffset, readSize);
147
149
  let value;
148
150
  if (dtype === 'f16') {
149
151
  // offsetWithinRead is 0 or 2 for F16 - extract correct u16
150
- const u16Array = new Uint16Array(staging.getMappedRange().slice(0));
152
+ const u16Array = new Uint16Array(readback);
151
153
  const u16Index = offsetWithinRead / 2;
152
154
  value = f16ToF32(u16Array[u16Index]);
153
155
  } else {
154
- value = new Float32Array(staging.getMappedRange().slice(0))[0];
156
+ value = new Float32Array(readback)[0];
155
157
  }
156
- staging.unmap();
157
- staging.destroy();
158
158
  values.push(`${dimIdx}=${value.toFixed(4)}`);
159
159
  }
160
160
 
@@ -58,6 +58,30 @@ export function softmax(logits) {
58
58
  return exps;
59
59
  }
60
60
 
61
+ function countFiniteCandidates(logits, padTokenId) {
62
+ let finiteCandidateCount = 0;
63
+ for (let i = 0; i < logits.length; i++) {
64
+ if (padTokenId != null && i === padTokenId) {
65
+ continue;
66
+ }
67
+ if (Number.isFinite(logits[i])) {
68
+ finiteCandidateCount += 1;
69
+ }
70
+ }
71
+ return finiteCandidateCount;
72
+ }
73
+
74
+ function assertFiniteSamplingCandidates(logits, padTokenId, label) {
75
+ const finiteCandidateCount = countFiniteCandidates(logits, padTokenId);
76
+ if (finiteCandidateCount > 0) {
77
+ return;
78
+ }
79
+ throw new Error(
80
+ `[Sampling] ${label} has no finite candidate logits after masking the pad token. ` +
81
+ 'Upstream decode likely produced NaN/Inf or an all-masked distribution.'
82
+ );
83
+ }
84
+
61
85
 
62
86
  export function sample(logits, opts) {
63
87
  const { temperature, topP, topK, decode, debug = false, padTokenId, seed } = opts;
@@ -66,16 +90,28 @@ export function sample(logits, opts) {
66
90
  logits[padTokenId] = -Infinity;
67
91
  }
68
92
 
93
+ assertFiniteSamplingCandidates(logits, padTokenId, 'Logits');
94
+
69
95
  // Greedy (argmax) when temperature = 0
70
96
  if (temperature === 0) {
71
- let maxIdx = 0;
72
- let maxVal = logits[0];
73
- for (let i = 1; i < logits.length; i++) {
74
- if (logits[i] > maxVal) {
75
- maxVal = logits[i];
97
+ let maxIdx = -1;
98
+ let maxVal = -Infinity;
99
+ for (let i = 0; i < logits.length; i++) {
100
+ const value = logits[i];
101
+ if (!Number.isFinite(value)) {
102
+ continue;
103
+ }
104
+ if (value > maxVal) {
105
+ maxVal = value;
76
106
  maxIdx = i;
77
107
  }
78
108
  }
109
+ if (maxIdx < 0) {
110
+ throw new Error(
111
+ '[Sampling] Greedy sampling could not find a finite candidate logit. ' +
112
+ 'Upstream decode likely produced NaN/Inf.'
113
+ );
114
+ }
79
115
  if (debug) {
80
116
  const text = decode?.([maxIdx]) ?? '?';
81
117
  trace.sample(`Greedy: id=${maxIdx} "${text}" logit=${maxVal.toFixed(4)}`);
@@ -96,7 +132,17 @@ export function sample(logits, opts) {
96
132
 
97
133
  let candidates = [];
98
134
  for (let i = 0; i < probs.length; i++) {
99
- candidates.push({ token: i, prob: probs[i] });
135
+ const probability = probs[i];
136
+ if (!Number.isFinite(probability) || probability <= 0) {
137
+ continue;
138
+ }
139
+ candidates.push({ token: i, prob: probability });
140
+ }
141
+ if (candidates.length === 0) {
142
+ throw new Error(
143
+ '[Sampling] Softmax produced no finite candidate probabilities. ' +
144
+ 'Upstream decode likely produced NaN/Inf logits.'
145
+ );
100
146
  }
101
147
  candidates.sort((a, b) => b.prob - a.prob);
102
148
 
@@ -1,7 +1,7 @@
1
1
 
2
2
 
3
3
  import { getDevice } from '../../../gpu/device.js';
4
- import { acquireBuffer } from '../../../memory/buffer-pool.js';
4
+ import { acquireBuffer, releaseBuffer } from '../../../memory/buffer-pool.js';
5
5
  import { log } from '../../../debug/index.js';
6
6
  import { isWeightBuffer, isCpuWeightBuffer, tagBufferDtype } from '../../../gpu/weight-buffer.js';
7
7
 
@@ -53,9 +53,14 @@ export function getWeightBuffer(weight, label) {
53
53
  }
54
54
 
55
55
  const buf = acquireBuffer(data.byteLength, undefined, label);
56
- device.queue.writeBuffer(buf, 0, ( (data)));
57
- tagBufferDtype(buf, bufferDtype);
58
- return buf;
56
+ try {
57
+ device.queue.writeBuffer(buf, 0, ( (data)));
58
+ tagBufferDtype(buf, bufferDtype);
59
+ return buf;
60
+ } catch (error) {
61
+ releaseBuffer(buf);
62
+ throw error;
63
+ }
59
64
  }
60
65
 
61
66
 
@@ -92,9 +97,14 @@ export function getNormWeightBuffer(weight, label, config, debugFlags) {
92
97
  }
93
98
 
94
99
  const buf = acquireBuffer(data.byteLength, undefined, label);
95
- device.queue.writeBuffer(buf, 0, ( (data)));
96
- tagBufferDtype(buf, 'f32');
97
- return buf;
100
+ try {
101
+ device.queue.writeBuffer(buf, 0, ( (data)));
102
+ tagBufferDtype(buf, 'f32');
103
+ return buf;
104
+ } catch (error) {
105
+ releaseBuffer(buf);
106
+ throw error;
107
+ }
98
108
  }
99
109
 
100
110
 
@@ -6,7 +6,7 @@ import { configurePerfGuards } from '../../gpu/perf-guards.js';
6
6
  import { MoERouter } from '../moe-router.js';
7
7
  import { DecodeBufferManager } from '../decode-buffers.js';
8
8
  import { DecodeRing } from '../decode-ring.js';
9
- import { applyPipelineContexts } from './context.js';
9
+ import { applyPipelineContexts, restorePipelineContexts } from './context.js';
10
10
  import { createInitializedPipeline } from './factory.js';
11
11
 
12
12
  // Pipeline sub-modules
@@ -44,6 +44,11 @@ import { getDopplerLoader } from '../../loader/doppler-loader.js';
44
44
  import { registerPipeline, getPipelineFactory } from './registry.js';
45
45
  import { selectRuleValue } from '../../rules/rule-registry.js';
46
46
 
47
+ function destroyMoERouter(router) {
48
+ if (router && typeof router.destroy === 'function') {
49
+ router.destroy();
50
+ }
51
+ }
47
52
 
48
53
 
49
54
  // ============================================================================
@@ -102,6 +107,8 @@ export class InferencePipeline extends PipelineState {
102
107
  this.manifest = manifest;
103
108
  this.decodeRing?.release();
104
109
  this.linearAttentionRuntime = resetLinearAttentionRuntime(this.linearAttentionRuntime);
110
+ destroyMoERouter(this.moeRouter);
111
+ this.moeRouter = null;
105
112
 
106
113
  const executionV0Runtime = applyExecutionV0RuntimeConfig({
107
114
  runtimeConfig: this.runtimeConfig,
@@ -490,12 +497,15 @@ export class InferencePipeline extends PipelineState {
490
497
  this.expertWeights.clear();
491
498
  this.linearAttentionRuntime = resetLinearAttentionRuntime(this.linearAttentionRuntime);
492
499
  this.lora = null;
500
+ destroyMoERouter(this.moeRouter);
501
+ this.moeRouter = null;
493
502
  if (this.finitenessBuffer) {
494
503
  this.finitenessBuffer.destroy();
495
504
  this.finitenessBuffer = null;
496
505
  }
497
506
  this.isLoaded = false;
498
507
  this.currentSeqLen = 0;
508
+ restorePipelineContexts(this);
499
509
  log.info('Pipeline', 'Unloaded');
500
510
  }
501
511
 
@@ -533,6 +543,8 @@ export class InferencePipeline extends PipelineState {
533
543
  releaseGPUResources() {
534
544
  this.decodeBuffers?.release();
535
545
  this.decodeRing?.release();
546
+ destroyMoERouter(this.moeRouter);
547
+ this.moeRouter = null;
536
548
  if (this.finitenessBuffer) {
537
549
  this.finitenessBuffer.destroy();
538
550
  this.finitenessBuffer = null;
@@ -66,8 +66,8 @@ export interface SpeculativeConfig {
66
66
  enableTreeDraft: boolean;
67
67
  /** Temperature for draft sampling */
68
68
  temperature: number;
69
- /** Optional deterministic seed for speculative sampling */
70
- randomSeed?: number | null;
69
+ /** Deterministic seed for speculative sampling */
70
+ randomSeed: number;
71
71
  }
72
72
 
73
73
  /**
@@ -10,22 +10,6 @@ function createRng(seed) {
10
10
  };
11
11
  }
12
12
 
13
- function createUnseededRng() {
14
- let fallbackState = ((Date.now() >>> 0) ^ 0xa341316c) >>> 0;
15
- return () => {
16
- const cryptoApi = typeof globalThis !== 'undefined' ? globalThis.crypto : null;
17
- if (cryptoApi && typeof cryptoApi.getRandomValues === 'function') {
18
- const random = new Uint32Array(1);
19
- cryptoApi.getRandomValues(random);
20
- return random[0] / 4294967296;
21
- }
22
- fallbackState = (fallbackState + 0x6d2b79f5) | 0;
23
- let t = Math.imul(fallbackState ^ (fallbackState >>> 15), 1 | fallbackState);
24
- t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
25
- return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
26
- };
27
- }
28
-
29
13
  function coerceLogitsVector(value, label) {
30
14
  if (value instanceof Float32Array) {
31
15
  if (value.length === 0) {
@@ -110,6 +94,9 @@ export class SpeculativeDecoder {
110
94
  if (config.temperature == null) {
111
95
  throw new Error('SpeculativeDecoder requires temperature.');
112
96
  }
97
+ if (!Number.isFinite(config.randomSeed)) {
98
+ throw new Error('SpeculativeDecoder requires randomSeed.');
99
+ }
113
100
 
114
101
  assertTemperature(config.temperature, 'temperature');
115
102
  this.numDraftTokens = config.numDraftTokens;
@@ -117,8 +104,7 @@ export class SpeculativeDecoder {
117
104
  this.enableTreeDraft = config.enableTreeDraft;
118
105
  this.temperature = config.temperature;
119
106
 
120
- const seed = Number.isFinite(config.randomSeed) ? Math.floor(config.randomSeed) : null;
121
- this.random = seed === null ? createUnseededRng() : createRng(seed);
107
+ this.random = createRng(Math.floor(config.randomSeed));
122
108
  }
123
109
 
124
110
  setDraftModel(model) {
@@ -74,7 +74,7 @@ export interface InitializeResult {
74
74
  /**
75
75
  * Discover available models from the catalog.json endpoint.
76
76
  *
77
- * @param fallbackModels - Models to return if catalog fails
77
+ * @param fallbackModels - Explicit fallback models to use when catalog fetch is unavailable
78
78
  * @returns Array of model info objects
79
79
  */
80
80
  export declare function discoverModels(
@@ -1,7 +1,7 @@
1
1
 
2
2
 
3
3
  import { initDevice, getDevice, getKernelCapabilities } from '../gpu/device.js';
4
- import { parseManifest } from '../formats/rdrr/index.js';
4
+ import { parseManifest, getExpectedShardHash } from '../formats/rdrr/index.js';
5
5
  import { createPipeline } from './pipelines/text.js';
6
6
  import { log as debugLog } from '../debug/index.js';
7
7
  import { getRuntimeConfig, setRuntimeConfig } from '../config/runtime.js';
@@ -25,7 +25,7 @@ import {
25
25
 
26
26
 
27
27
  export async function discoverModels(
28
- fallbackModels = ['gemma3-1b-q4', 'mistral-7b-q4', 'llama3-8b-q4']
28
+ fallbackModels
29
29
  ) {
30
30
  try {
31
31
  const resp = await fetch('/models/catalog.json');
@@ -40,10 +40,13 @@ export async function discoverModels(
40
40
  }));
41
41
  }
42
42
  }
43
- } catch (e) {
44
- // Catalog not available, use fallback
43
+ } catch (e) {}
44
+
45
+ if (Array.isArray(fallbackModels) && fallbackModels.length > 0) {
46
+ return fallbackModels.map((id) => ({ id, name: id }));
45
47
  }
46
- return fallbackModels.map((id) => ({ id, name: id }));
48
+
49
+ throw new Error('discoverModels: failed to fetch /models/catalog.json and no explicit fallback model list was provided.');
47
50
  }
48
51
 
49
52
  // ============================================================================
@@ -165,7 +168,7 @@ export function createHttpShardLoader(baseUrl, manifest, log) {
165
168
  distributionConfig,
166
169
  algorithm,
167
170
  requiredEncoding,
168
- expectedHash: shard.hash ?? null,
171
+ expectedHash: getExpectedShardHash(shard, algorithm) || null,
169
172
  expectedSize: Number.isFinite(shard.size) ? Math.floor(shard.size) : null,
170
173
  expectedManifestVersionSet: manifestVersionSet,
171
174
  writeToStore: false,
@@ -238,7 +241,13 @@ export async function initializeInference(modelUrl, options = {}) {
238
241
  onProgress('hotswap', 0.05, 'Loading hot-swap manifest...');
239
242
  log(`Hot-swap: loading manifest ${hotSwapConfig.manifestUrl}`);
240
243
  const hotSwapManifest = await fetchHotSwapManifest(hotSwapConfig.manifestUrl);
241
- const verification = await verifyHotSwapManifest(hotSwapManifest, hotSwapConfig);
244
+ const verification = await verifyHotSwapManifest(hotSwapManifest, hotSwapConfig, {
245
+ source: {
246
+ kind: 'remote',
247
+ isLocal: false,
248
+ url: hotSwapConfig.manifestUrl,
249
+ },
250
+ });
242
251
  if (!verification.ok) {
243
252
  throw new Error(`Hot-swap manifest rejected: ${verification.reason}`);
244
253
  }
@@ -309,6 +318,7 @@ export async function initializeInference(modelUrl, options = {}) {
309
318
  const pipeline = await createPipeline( ( (manifest)), {
310
319
  storage: { loadShard },
311
320
  gpu: { device },
321
+ runtime,
312
322
  baseUrl: modelUrl,
313
323
  onProgress: ( progress) => {
314
324
  const pct = 0.2 + progress.percent * 0.8;
@@ -46,11 +46,6 @@ export declare class Tokenizer {
46
46
  */
47
47
  initialize(manifest: ModelManifest, options?: TokenizerInitOptions): Promise<void>;
48
48
 
49
- /**
50
- * Infer HuggingFace model ID from manifest architecture
51
- */
52
- private _inferHuggingFaceModel(manifest: ModelManifest): string | null;
53
-
54
49
  /**
55
50
  * Encode text to token IDs
56
51
  */
@@ -130,14 +130,12 @@ export class Tokenizer {
130
130
  );
131
131
  }
132
132
 
133
- let hfModel = tokenizerConfig.hfModel;
133
+ let hfModel = tokenizerConfig.hfModel ?? tokenizerConfig.modelId ?? null;
134
134
  const allowArchFallback = tokenizerConfig.allowArchFallback === true;
135
135
  if (allowArchFallback && !hfModel) {
136
- const inferred = this._inferHuggingFaceModel(manifest);
137
- if (inferred) {
138
- hfModel = inferred;
139
- log.warn('Tokenizer', `Using inferred HuggingFace model: ${inferred}`);
140
- }
136
+ throw new Error(
137
+ `[Tokenizer] tokenizer.allowArchFallback requires explicit tokenizer.hfModel or tokenizer.modelId for model "${modelId}".`
138
+ );
141
139
  }
142
140
 
143
141
  if (hfModel) {
@@ -212,23 +210,6 @@ export class Tokenizer {
212
210
 
213
211
  this.config = tokenizerConfig;
214
212
  }
215
-
216
-
217
- _inferHuggingFaceModel(manifest) {
218
- const tokenizer = manifest?.tokenizer ?? {};
219
- if (typeof tokenizer.modelId === 'string' && tokenizer.modelId.length > 0) {
220
- return tokenizer.modelId;
221
- }
222
- if (typeof tokenizer.hfModel === 'string' && tokenizer.hfModel.length > 0) {
223
- return tokenizer.hfModel;
224
- }
225
- if (typeof manifest?.modelId === 'string' && manifest.modelId.length > 0) {
226
- return manifest.modelId;
227
- }
228
- return null;
229
- }
230
-
231
-
232
213
  encode(text) {
233
214
  if (!this.backend) {
234
215
  throw new Error('Tokenizer not initialized');
@@ -21,8 +21,17 @@ export class BPETokenizer extends BaseTokenizer {
21
21
  });
22
22
  }
23
23
 
24
+ #resetState() {
25
+ this.#vocab.clear();
26
+ this.#reverseVocab.clear();
27
+ this.#merges = [];
28
+ this.#mergeRanks.clear();
29
+ this.vocabSize = 0;
30
+ }
31
+
24
32
 
25
33
  load(vocab, merges) {
34
+ this.#resetState();
26
35
  // Build vocab maps
27
36
  for (const [token, id] of Object.entries(vocab)) {
28
37
  this.#vocab.set(token, id);
@@ -230,6 +230,25 @@ export class BundledTokenizer extends BaseTokenizer {
230
230
  });
231
231
  }
232
232
 
233
+ #resetState() {
234
+ this.#vocab.clear();
235
+ this.#reverseVocab.clear();
236
+ this.#merges = [];
237
+ this.#mergeRanks.clear();
238
+ this.#scores = [];
239
+ this.#tokenTypes = [];
240
+ this.#type = null;
241
+ this.#byteTokens.clear();
242
+ this.#specialTokenPatterns = [];
243
+ this.#specialTokenIds = new Set();
244
+ this.#addSpacePrefix = true;
245
+ this.#spacePrefixChar = '▁';
246
+ this.#byteDecoder = null;
247
+ this.#byteEncoder = null;
248
+ this.#useByteLevelEncoding = false;
249
+ this.vocabSize = 0;
250
+ }
251
+
233
252
 
234
253
  isSpecialToken(tokenId) {
235
254
  if (this.#specialTokenIds.size > 0) {
@@ -283,6 +302,7 @@ export class BundledTokenizer extends BaseTokenizer {
283
302
 
284
303
 
285
304
  load(tokenizerJson) {
305
+ this.#resetState();
286
306
  // Detect format: HuggingFace has model.vocab, bundled has top-level vocab
287
307
  const isHuggingFace = 'model' in tokenizerJson && tokenizerJson.model?.vocab !== undefined;
288
308