@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -2,6 +2,10 @@
2
2
  import { DEFAULT_QUANTIZATION_DEFAULTS, DEFAULT_Q4K_LAYOUT } from '../config/index.js';
3
3
  import { classifyTensorRole } from '../formats/rdrr/index.js';
4
4
 
5
+ // Default quantization tag when no explicit dtype is provided.
6
+ // F16 is the canonical unquantized storage format for WebGPU inference.
7
+ const DEFAULT_QUANT_TAG = 'f16';
8
+
5
9
  // Quantization tag aliases mapped to canonical names.
6
10
  // Add new aliases here rather than adding if/else branches.
7
11
  const QUANT_TAG_ALIASES = {
@@ -47,7 +51,7 @@ const QUANT_TAG_ALIASES = {
47
51
  };
48
52
 
49
53
  export function normalizeQuantTag(value) {
50
- if (!value) return 'f16';
54
+ if (!value) return DEFAULT_QUANT_TAG;
51
55
  const lower = value.toLowerCase();
52
56
  return QUANT_TAG_ALIASES[lower] ?? lower;
53
57
  }
@@ -91,6 +95,7 @@ export function buildVariantTag(info) {
91
95
  const weights = info.weights;
92
96
  const embeddings = info.embeddings ?? weights;
93
97
  const lmHead = info.lmHead ?? embeddings;
98
+ const compute = info.compute ? normalizeQuantTag(info.compute) : null;
94
99
  const experts = info.experts ?? null;
95
100
  const layout = info.layout ?? null;
96
101
 
@@ -100,30 +105,42 @@ export function buildVariantTag(info) {
100
105
  ? `${weights}${layout === 'row' ? '' : '-col'}`
101
106
  : weights;
102
107
 
103
- const parts = [`w${weightTag}`];
108
+ const parts = [weightTag];
109
+ const groupedRolesByDtype = new Map();
110
+ const GROUPED_ROLE_ORDER = ['e', 'h', 'a'];
104
111
 
105
- if (embeddings !== weights) {
106
- parts.push(`e${embeddings}`);
107
- }
112
+ const addGroupedRole = (role, dtype) => {
113
+ if (!dtype || dtype === weights) return;
114
+ const existing = groupedRolesByDtype.get(dtype) ?? [];
115
+ if (!existing.includes(role)) {
116
+ existing.push(role);
117
+ groupedRolesByDtype.set(dtype, existing);
118
+ }
119
+ };
108
120
 
109
- if (lmHead !== embeddings) {
110
- parts.push(`h${lmHead}`);
121
+ addGroupedRole('e', embeddings);
122
+ addGroupedRole('h', lmHead);
123
+ addGroupedRole('a', compute);
124
+
125
+ for (const [dtype, roles] of groupedRolesByDtype.entries()) {
126
+ const orderedRoles = GROUPED_ROLE_ORDER.filter((role) => roles.includes(role));
127
+ parts.push(`${orderedRoles.join('')}${dtype}`);
111
128
  }
112
129
 
113
130
  if (experts && experts !== weights) {
114
131
  parts.push(`x${experts}`);
115
132
  }
116
133
 
117
- if (info.vision) {
134
+ if (info.vision && info.vision !== weights) {
118
135
  parts.push(`v${info.vision}`);
119
136
  }
120
- if (info.audio) {
121
- parts.push(`a${info.audio}`);
137
+ if (info.audio && info.audio !== weights) {
138
+ parts.push(`audio${info.audio}`);
122
139
  }
123
- if (info.tts) {
124
- parts.push(`t${info.tts}`);
140
+ if (info.tts && info.tts !== weights) {
141
+ parts.push(`tts${info.tts}`);
125
142
  }
126
- if (info.projector) {
143
+ if (info.projector && info.projector !== weights) {
127
144
  parts.push(`p${info.projector}`);
128
145
  }
129
146
 
@@ -167,9 +184,16 @@ const Q4K_LAYOUT_ALIASES = {
167
184
  };
168
185
 
169
186
  export function normalizeQ4KLayout(value) {
170
- if (!value) return null;
171
- const lower = String(value).toLowerCase().replace(/_/g, '');
172
- return Q4K_LAYOUT_ALIASES[lower] ?? null;
187
+ if (value == null) return null;
188
+ const lower = String(value).trim().toLowerCase().replace(/_/g, '');
189
+ if (!lower) return null;
190
+ const normalized = Q4K_LAYOUT_ALIASES[lower];
191
+ if (!normalized) {
192
+ throw new Error(
193
+ `converter.quantization.q4kLayout must be "row" or "col"; got ${JSON.stringify(value)}.`
194
+ );
195
+ }
196
+ return normalized;
173
197
  }
174
198
 
175
199
  export function buildQuantizationInfo(
@@ -74,9 +74,10 @@ function findMinMax(data, offset, length) {
74
74
  return { min, max };
75
75
  }
76
76
 
77
- export function quantizeQ4KBlock(data, offset) {
77
+ function quantizeQ4KBlockWithValidLength(data, offset, validLength = QK_K) {
78
78
  const block = new Uint8Array(QK4_K_BLOCK_SIZE);
79
79
  const blockView = new DataView(block.buffer);
80
+ const clampedValidLength = Math.max(0, Math.min(QK_K, Math.trunc(validLength)));
80
81
 
81
82
  const scales = new Float32Array(8);
82
83
  const minOffsets = new Float32Array(8);
@@ -84,14 +85,22 @@ export function quantizeQ4KBlock(data, offset) {
84
85
 
85
86
  for (let sb = 0; sb < 8; sb++) {
86
87
  const sbOffset = offset + sb * 32;
87
- const { min, max } = findMinMax(data, sbOffset, 32);
88
+ const subblockStart = sb * 32;
89
+ const validInSubblock = Math.max(0, Math.min(32, clampedValidLength - subblockStart));
90
+ if (validInSubblock === 0) {
91
+ scales[sb] = 0;
92
+ minOffsets[sb] = 0;
93
+ continue;
94
+ }
95
+
96
+ const { min, max } = findMinMax(data, sbOffset, validInSubblock);
88
97
 
89
98
  minOffsets[sb] = -min;
90
99
  const range = max - min;
91
100
  scales[sb] = range > 0 ? range / 15 : 0;
92
101
 
93
102
  const invScale = scales[sb] > 0 ? 1 / scales[sb] : 0;
94
- for (let i = 0; i < 32; i++) {
103
+ for (let i = 0; i < validInSubblock; i++) {
95
104
  const val = data[sbOffset + i];
96
105
  let q = Math.round((val - min) * invScale);
97
106
  q = Math.max(0, Math.min(15, q));
@@ -155,6 +164,10 @@ export function quantizeQ4KBlock(data, offset) {
155
164
  return block;
156
165
  }
157
166
 
167
+ export function quantizeQ4KBlock(data, offset) {
168
+ return quantizeQ4KBlockWithValidLength(data, offset, QK_K);
169
+ }
170
+
158
171
  function dequantizeQ4KBlock(block) {
159
172
  const blockView = new DataView(block.buffer, block.byteOffset);
160
173
  const result = new Float32Array(256);
@@ -245,22 +258,16 @@ export function quantizeToQ4KMRowWise(data, shape) {
245
258
  }
246
259
 
247
260
  const blocksPerRow = Math.ceil(cols / QK_K);
248
- const paddedColsPerRow = blocksPerRow * QK_K;
249
261
  const totalBlocks = rows * blocksPerRow;
250
262
 
251
263
  const quantized = new Uint8Array(totalBlocks * QK4_K_BLOCK_SIZE);
252
264
 
253
265
  for (let row = 0; row < rows; row++) {
254
- // Extract and pad this row
255
- const rowData = new Float32Array(paddedColsPerRow);
256
- const srcOffset = row * cols;
257
- for (let c = 0; c < cols; c++) {
258
- rowData[c] = data[srcOffset + c];
259
- }
260
-
261
266
  // Quantize each block in this row
262
267
  for (let b = 0; b < blocksPerRow; b++) {
263
- const block = quantizeQ4KBlock(rowData, b * QK_K);
268
+ const validLength = Math.max(0, Math.min(QK_K, cols - b * QK_K));
269
+ const srcOffset = row * cols + b * QK_K;
270
+ const block = quantizeQ4KBlockWithValidLength(data, srcOffset, validLength);
264
271
  const dstOffset = (row * blocksPerRow + b) * QK4_K_BLOCK_SIZE;
265
272
  quantized.set(block, dstOffset);
266
273
  }
@@ -1,3 +1,5 @@
1
+ import { DEFAULT_MANIFEST_INFERENCE } from '../config/schema/index.js';
2
+
1
3
  function asObject(value) {
2
4
  if (value == null || typeof value !== 'object' || Array.isArray(value)) {
3
5
  return null;
@@ -50,7 +52,7 @@ function resolveScalingConfig(ropeScalingConfig, options = {}) {
50
52
  }
51
53
  return {
52
54
  ropeScalingType: null,
53
- ropeScalingFactor: 1.0,
55
+ ropeScalingFactor: DEFAULT_MANIFEST_INFERENCE.rope.ropeScalingFactor,
54
56
  yarnBetaFast: null,
55
57
  yarnBetaSlow: null,
56
58
  yarnOriginalMaxPos: null,
@@ -58,7 +60,7 @@ function resolveScalingConfig(ropeScalingConfig, options = {}) {
58
60
  }
59
61
 
60
62
  let ropeScalingType = scalingType;
61
- let ropeScalingFactor = 1.0;
63
+ let ropeScalingFactor = DEFAULT_MANIFEST_INFERENCE.rope.ropeScalingFactor;
62
64
  let yarnBetaFast = null;
63
65
  let yarnBetaSlow = null;
64
66
  let yarnOriginalMaxPos = null;
@@ -110,7 +112,7 @@ function hasScalingDirective(ropeScalingConfig) {
110
112
  function hasMeaningfulScalingConfig(resolvedScaling) {
111
113
  if (!resolvedScaling) return false;
112
114
  return resolvedScaling.ropeScalingType != null
113
- || resolvedScaling.ropeScalingFactor !== 1.0
115
+ || resolvedScaling.ropeScalingFactor !== DEFAULT_MANIFEST_INFERENCE.rope.ropeScalingFactor
114
116
  || resolvedScaling.yarnBetaFast != null
115
117
  || resolvedScaling.yarnBetaSlow != null
116
118
  || resolvedScaling.yarnOriginalMaxPos != null;
@@ -159,7 +161,7 @@ export function buildRoPEConfig(presetInference, config) {
159
161
  ?? null,
160
162
  ropeScalingFactor: presetRoPE.ropeScalingFactor
161
163
  ?? presetAttn?.ropeScalingFactor // Deprecated location
162
- ?? 1.0,
164
+ ?? DEFAULT_MANIFEST_INFERENCE.rope.ropeScalingFactor,
163
165
  yarnBetaFast: presetRoPE.yarnBetaFast ?? null,
164
166
  yarnBetaSlow: presetRoPE.yarnBetaSlow ?? null,
165
167
  yarnOriginalMaxPos: presetRoPE.yarnOriginalMaxPos ?? null,
@@ -223,7 +225,7 @@ export function buildRoPEConfig(presetInference, config) {
223
225
  ?? asFiniteNumber(flatRoPEParameters?.rope_theta)
224
226
  ?? asFiniteNumber(config.rope_theta)
225
227
  ?? presetInference.rope?.ropeTheta
226
- ?? 10000;
228
+ ?? DEFAULT_MANIFEST_INFERENCE.rope.ropeTheta;
227
229
 
228
230
  // For Gemma 3, local sliding attention theta comes from rope_parameters.sliding_attention.
229
231
  const ropeLocalTheta = asFiniteNumber(slidingAttentionRoPE?.rope_theta)
@@ -232,7 +234,7 @@ export function buildRoPEConfig(presetInference, config) {
232
234
 
233
235
  const mropeInterleaved = asBoolean(flatRoPEParameters?.mrope_interleaved)
234
236
  ?? presetInference.rope?.mropeInterleaved
235
- ?? false;
237
+ ?? DEFAULT_MANIFEST_INFERENCE.rope.mropeInterleaved;
236
238
  const mropeSection = asNumberArray(flatRoPEParameters?.mrope_section)
237
239
  ?? presetInference.rope?.mropeSection
238
240
  ?? null;
@@ -126,7 +126,7 @@ export declare class ShardPacker {
126
126
  */
127
127
  export declare function sortTensorsByGroup(
128
128
  tensors: TensorInfoSchema[],
129
- modelType?: ModelType
129
+ modelType: ModelType
130
130
  ): TensorInfoSchema[];
131
131
 
132
132
  /**
@@ -399,7 +399,10 @@ function bytesToHex(bytes) {
399
399
  }
400
400
 
401
401
 
402
- export function sortTensorsByGroup(tensors, modelType = 'transformer') {
402
+ export function sortTensorsByGroup(tensors, modelType) {
403
+ if (typeof modelType !== 'string' || modelType.trim().length === 0) {
404
+ throw new Error('sortTensorsByGroup requires an explicit modelType.');
405
+ }
403
406
  return [...tensors].sort((a, b) => {
404
407
  const groupA = classifyTensor(a.name, modelType);
405
408
  const groupB = classifyTensor(b.name, modelType);
@@ -1,5 +1,6 @@
1
1
  export declare function resolveEosTokenId(options: {
2
2
  config?: Record<string, unknown> | null;
3
+ generationConfig?: Record<string, unknown> | null;
3
4
  tokenizer?: {
4
5
  eosTokenId?: number;
5
6
  eos_token_id?: number;
@@ -1,6 +1,8 @@
1
- export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
1
+ export function resolveEosTokenId({ config, generationConfig, tokenizer, tokenizerJson }) {
2
2
  const nestedTextConfig = getNestedTextConfig(config);
3
3
  const candidateSources = [
4
+ generationConfig?.eos_token_id,
5
+ generationConfig?.eos_token_ids,
4
6
  tokenizer?.eosTokenId,
5
7
  tokenizer?.eos_token_id,
6
8
  tokenizerJson?.specialTokens?.eos,
@@ -19,6 +21,7 @@ export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
19
21
  }
20
22
 
21
23
  const eosTokenStringCandidates = [
24
+ generationConfig?.eos_token,
22
25
  tokenizer?.eosToken,
23
26
  tokenizer?.eos_token,
24
27
  tokenizerJson?.specialTokens?.eos_token,
@@ -50,6 +50,108 @@ const originalConsoleInfo = console.info;
50
50
  const originalConsoleWarn = console.warn;
51
51
  let warnedBenchmarkMode = false;
52
52
 
53
+ function requirePlainObject(value, label) {
54
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
55
+ throw new Error(`${label} must be an object when provided.`);
56
+ }
57
+ return value;
58
+ }
59
+
60
+ function requireNonNegativeIntegerArray(value, label) {
61
+ if (!Array.isArray(value)) {
62
+ throw new Error(`${label} must be an array of non-negative integers when provided.`);
63
+ }
64
+ return value.map((entry, index) => {
65
+ const parsed = Number(entry);
66
+ if (!Number.isInteger(parsed) || parsed < 0) {
67
+ throw new Error(`${label}[${index}] must be a non-negative integer.`);
68
+ }
69
+ return parsed;
70
+ });
71
+ }
72
+
73
+ function requireNonNegativeInteger(value, label) {
74
+ const parsed = Number(value);
75
+ if (!Number.isInteger(parsed) || parsed < 0) {
76
+ throw new Error(`${label} must be a non-negative integer when provided.`);
77
+ }
78
+ return parsed;
79
+ }
80
+
81
+ function requireBoolean(value, label) {
82
+ if (typeof value !== 'boolean') {
83
+ throw new Error(`${label} must be a boolean when provided.`);
84
+ }
85
+ return value;
86
+ }
87
+
88
+ function normalizeLogLevel(level) {
89
+ if (typeof level !== 'string' || !level.trim()) {
90
+ throw new Error('setLogLevel(level) requires a non-empty log level string.');
91
+ }
92
+ return level.trim().toLowerCase();
93
+ }
94
+
95
+ function normalizeTraceCategories(categories) {
96
+ if (typeof categories === 'string') {
97
+ const values = categories
98
+ .split(',')
99
+ .map((value) => value.trim())
100
+ .filter(Boolean);
101
+ if (values.length === 0) {
102
+ throw new Error('setTrace(categories) requires at least one trace category.');
103
+ }
104
+ return values;
105
+ }
106
+ if (Array.isArray(categories) && categories.length > 0) {
107
+ return categories.map((value, index) => {
108
+ if (typeof value !== 'string' || !value.trim()) {
109
+ throw new Error(`setTrace(categories)[${index}] must be a non-empty string.`);
110
+ }
111
+ return value.trim();
112
+ });
113
+ }
114
+ throw new Error(
115
+ 'setTrace(categories) requires false, a comma-delimited string, or a non-empty string array.'
116
+ );
117
+ }
118
+
119
+ function validateTraceCategoryToken(token) {
120
+ if (token === 'all') {
121
+ return;
122
+ }
123
+ const value = token.startsWith('-') ? token.slice(1) : token;
124
+ if (!TRACE_CATEGORIES.includes(value)) {
125
+ throw new Error(
126
+ `Unknown trace category "${token}". Allowed categories: all, ${TRACE_CATEGORIES.join(', ')}.`
127
+ );
128
+ }
129
+ }
130
+
131
+ function normalizeTraceOptions(options) {
132
+ if (options == null) {
133
+ return {};
134
+ }
135
+ const normalized = requirePlainObject(options, 'setTrace(options)');
136
+ return {
137
+ ...(normalized.layers === undefined ? {} : {
138
+ layers: requireNonNegativeIntegerArray(normalized.layers, 'setTrace(options).layers'),
139
+ }),
140
+ ...(normalized.maxDecodeSteps === undefined ? {} : {
141
+ maxDecodeSteps: requireNonNegativeInteger(
142
+ normalized.maxDecodeSteps,
143
+ 'setTrace(options).maxDecodeSteps'
144
+ ),
145
+ }),
146
+ ...(normalized.breakOnAnomaly === undefined ? {} : {
147
+ breakOnAnomaly: requireBoolean(
148
+ normalized.breakOnAnomaly,
149
+ 'setTrace(options).breakOnAnomaly'
150
+ ),
151
+ }),
152
+ };
153
+ }
154
+
53
155
  export function setLogLevel(level) {
54
156
  const levelMap = {
55
157
  debug: LOG_LEVELS.DEBUG,
@@ -59,8 +161,14 @@ export function setLogLevel(level) {
59
161
  error: LOG_LEVELS.ERROR,
60
162
  silent: LOG_LEVELS.SILENT,
61
163
  };
62
- currentLogLevel = levelMap[level.toLowerCase()] ?? LOG_LEVELS.INFO;
63
- console.log(`[Doppler] Log level set to: ${level.toUpperCase()}`);
164
+ const normalizedLevel = normalizeLogLevel(level);
165
+ if (!Object.prototype.hasOwnProperty.call(levelMap, normalizedLevel)) {
166
+ throw new Error(
167
+ `Unknown log level "${level}". Allowed levels: ${Object.keys(levelMap).join(', ')}.`
168
+ );
169
+ }
170
+ currentLogLevel = levelMap[normalizedLevel];
171
+ console.log(`[Doppler] Log level set to: ${normalizedLevel.toUpperCase()}`);
64
172
  }
65
173
 
66
174
  export function getLogLevel() {
@@ -77,9 +185,11 @@ export function setTrace(categories, options) {
77
185
  return;
78
186
  }
79
187
 
80
- const catArray = typeof categories === 'string'
81
- ? categories.split(',').map(s => s.trim())
82
- : categories;
188
+ const catArray = normalizeTraceCategories(categories);
189
+ const traceOptions = normalizeTraceOptions(options);
190
+ for (const cat of catArray) {
191
+ validateTraceCategoryToken(cat);
192
+ }
83
193
 
84
194
  enabledTraceCategories.clear();
85
195
 
@@ -101,14 +211,14 @@ export function setTrace(categories, options) {
101
211
  }
102
212
  }
103
213
 
104
- if (options?.layers) {
105
- traceLayerFilter = options.layers;
214
+ if (traceOptions.layers !== undefined) {
215
+ traceLayerFilter = traceOptions.layers;
106
216
  }
107
- if (options?.maxDecodeSteps !== undefined) {
108
- traceMaxDecodeSteps = options.maxDecodeSteps;
217
+ if (traceOptions.maxDecodeSteps !== undefined) {
218
+ traceMaxDecodeSteps = traceOptions.maxDecodeSteps;
109
219
  }
110
- if (options?.breakOnAnomaly !== undefined) {
111
- traceBreakOnAnomaly = options.breakOnAnomaly;
220
+ if (traceOptions.breakOnAnomaly !== undefined) {
221
+ traceBreakOnAnomaly = traceOptions.breakOnAnomaly;
112
222
  }
113
223
 
114
224
  const enabled = [...enabledTraceCategories].join(',') || 'none';
@@ -184,11 +294,13 @@ export function setSilentMode(enabled) {
184
294
  console.log = noop;
185
295
  console.debug = noop;
186
296
  console.info = noop;
297
+ console.warn = noop;
187
298
  originalConsoleLog('[Doppler] Silent mode enabled - logging silenced');
188
299
  } else {
189
300
  console.log = originalConsoleLog;
190
301
  console.debug = originalConsoleDebug;
191
302
  console.info = originalConsoleInfo;
303
+ console.warn = originalConsoleWarn;
192
304
  console.log('[Doppler] Silent mode disabled - logging restored');
193
305
  }
194
306
  }