@simulatte/doppler 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +25 -6
  3. package/package.json +25 -38
  4. package/src/browser/browser-converter.js +5 -0
  5. package/src/client/doppler-api.browser.js +6 -0
  6. package/src/client/doppler-api.d.ts +3 -0
  7. package/src/client/doppler-api.js +11 -2
  8. package/src/client/doppler-registry.js +3 -5
  9. package/src/client/doppler-registry.json +2 -2
  10. package/src/config/kernel-path-loader.d.ts +5 -0
  11. package/src/config/kernel-path-loader.js +13 -0
  12. package/src/config/kernels/kernel-ref-digests.js +23 -21
  13. package/src/config/kernels/moe/mixtral.paths.json +46 -0
  14. package/src/config/kernels/registry.json +74 -0
  15. package/src/config/loader.js +9 -0
  16. package/src/config/merge-contract-check.js +7 -0
  17. package/src/config/platforms/loader.js +3 -1
  18. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-nosubgroups.json +16 -16
  19. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-online.json +8 -8
  20. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-small-attn.json +61 -0
  21. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  22. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  23. package/src/config/presets/kernel-paths/registry.json +21 -0
  24. package/src/config/presets/models/gemma2.json +2 -1
  25. package/src/config/presets/models/gemma3.json +4 -1
  26. package/src/config/presets/models/gemma4.json +61 -0
  27. package/src/config/presets/models/granite-docling.json +70 -0
  28. package/src/config/presets/models/lfm2.json +6 -1
  29. package/src/config/presets/models/qwen3.json +4 -3
  30. package/src/config/presets/models/qwen3_5.json +16 -0
  31. package/src/config/presets/models/qwen3_vl.json +40 -0
  32. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +2 -1
  33. package/src/config/presets/runtime/experiments/verify/lfm2-verify.json +46 -0
  34. package/src/config/presets/runtime/experiments/verify/translategemma-verify.json +39 -0
  35. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  36. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  37. package/src/config/presets/runtime/modes/trace-layers.json +1 -0
  38. package/src/config/presets/runtime/tiers/gemma4-16gb.json +69 -0
  39. package/src/config/presets/runtime/tiers/gemma4-24gb.json +66 -0
  40. package/src/config/presets/runtime/tiers/gemma4-32gb.json +66 -0
  41. package/src/config/runtime.js +3 -0
  42. package/src/config/schema/conversion.schema.d.ts +1 -0
  43. package/src/config/schema/debug.schema.d.ts +40 -0
  44. package/src/config/schema/debug.schema.js +28 -0
  45. package/src/config/schema/index.js +2 -0
  46. package/src/config/schema/inference-defaults.schema.js +1 -1
  47. package/src/config/schema/kernel-path.schema.d.ts +1 -0
  48. package/src/config/schema/manifest.schema.d.ts +1 -1
  49. package/src/config/schema/manifest.schema.js +1 -1
  50. package/src/config/schema/memory-limits.schema.js +2 -2
  51. package/src/config/schema/storage.schema.js +2 -2
  52. package/src/converter/conversion-plan.js +11 -3
  53. package/src/converter/core.js +19 -8
  54. package/src/converter/manifest-inference.js +12 -22
  55. package/src/converter/parsers/transformer.js +4 -0
  56. package/src/converter/quantization-info.js +5 -1
  57. package/src/converter/quantizer.d.ts +5 -0
  58. package/src/converter/quantizer.js +34 -12
  59. package/src/converter/rope-config.js +8 -6
  60. package/src/converter/tokenizer-utils.d.ts +1 -0
  61. package/src/converter/tokenizer-utils.js +4 -1
  62. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  63. package/src/distribution/shard-delivery.js +40 -1
  64. package/src/formats/rdrr/classification.js +32 -0
  65. package/src/formats/rdrr/parsing.d.ts +4 -0
  66. package/src/formats/rdrr/parsing.js +14 -1
  67. package/src/gpu/kernel-runtime.js +4 -2
  68. package/src/gpu/kernels/attention.js +2 -1
  69. package/src/gpu/kernels/dequant_f16_out.wgsl +4 -2
  70. package/src/gpu/kernels/dequant_f16_out_vec4.wgsl +5 -2
  71. package/src/gpu/kernels/dequant_shared.wgsl +4 -2
  72. package/src/gpu/kernels/dequant_shared_vec4.wgsl +4 -2
  73. package/src/gpu/kernels/dequant_subgroup.wgsl +6 -2
  74. package/src/gpu/kernels/gated-short-conv.d.ts +63 -0
  75. package/src/gpu/kernels/gated-short-conv.js +284 -0
  76. package/src/gpu/kernels/index.d.ts +8 -0
  77. package/src/gpu/kernels/index.js +6 -0
  78. package/src/gpu/kernels/linear-attention-core.js +37 -17
  79. package/src/gpu/kernels/matmul-selection.js +48 -4
  80. package/src/gpu/kernels/matmul.d.ts +5 -0
  81. package/src/gpu/kernels/matmul.js +71 -2
  82. package/src/gpu/kernels/matmul_gemv_subgroup.wgsl +77 -79
  83. package/src/gpu/kernels/rmsnorm.js +9 -2
  84. package/src/gpu/kernels/sample.js +1 -3
  85. package/src/gpu/kernels/sample.wgsl +39 -9
  86. package/src/gpu/kernels/sample_f16.wgsl +38 -8
  87. package/src/gpu/kernels/shader-cache.js +9 -4
  88. package/src/gpu/kernels/split_qg.d.ts +50 -0
  89. package/src/gpu/kernels/split_qg.js +46 -0
  90. package/src/gpu/kernels/split_qg.wgsl +58 -0
  91. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  92. package/src/gpu/weight-buffer.d.ts +1 -1
  93. package/src/gpu/weight-buffer.js +1 -1
  94. package/src/inference/browser-harness.d.ts +2 -0
  95. package/src/inference/browser-harness.js +20 -1
  96. package/src/inference/kv-cache/base.js +3 -10
  97. package/src/inference/pipelines/diffusion/helpers.js +3 -0
  98. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  99. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +10 -3
  100. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  101. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  102. package/src/inference/pipelines/text/attention/projections.d.ts +13 -1
  103. package/src/inference/pipelines/text/attention/projections.js +54 -13
  104. package/src/inference/pipelines/text/attention/record.js +16 -6
  105. package/src/inference/pipelines/text/attention/run.js +59 -6
  106. package/src/inference/pipelines/text/config.d.ts +1 -0
  107. package/src/inference/pipelines/text/config.js +46 -4
  108. package/src/inference/pipelines/text/embed.js +26 -7
  109. package/src/inference/pipelines/text/execution-plan.js +5 -4
  110. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +10 -3
  111. package/src/inference/pipelines/text/execution-v0.js +12 -1
  112. package/src/inference/pipelines/text/generator-helpers.js +1 -0
  113. package/src/inference/pipelines/text/generator-runtime.js +19 -0
  114. package/src/inference/pipelines/text/generator-steps.d.ts +15 -0
  115. package/src/inference/pipelines/text/generator-steps.js +71 -26
  116. package/src/inference/pipelines/text/generator.d.ts +5 -0
  117. package/src/inference/pipelines/text/generator.js +353 -166
  118. package/src/inference/pipelines/text/init.d.ts +15 -0
  119. package/src/inference/pipelines/text/init.js +35 -10
  120. package/src/inference/pipelines/text/layer.js +38 -8
  121. package/src/inference/pipelines/text/linear-attention.d.ts +5 -0
  122. package/src/inference/pipelines/text/linear-attention.js +33 -3
  123. package/src/inference/pipelines/text/logits/gpu.js +2 -2
  124. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  125. package/src/inference/pipelines/text/logits/index.js +3 -1
  126. package/src/inference/pipelines/text/model-load.js +3 -0
  127. package/src/inference/pipelines/text/moe-gpu.js +21 -3
  128. package/src/inference/pipelines/text/moe-shape-validator.d.ts +9 -0
  129. package/src/inference/pipelines/text/moe-shape-validator.js +31 -11
  130. package/src/inference/pipelines/text/ops.js +123 -53
  131. package/src/inference/pipelines/text/probes.js +1 -0
  132. package/src/inference/pipelines/text/sampling.js +52 -6
  133. package/src/inference/pipelines/text/state.js +2 -0
  134. package/src/inference/pipelines/text.d.ts +5 -0
  135. package/src/inference/pipelines/text.js +59 -1
  136. package/src/inference/pipelines/vision/encoder.js +386 -0
  137. package/src/inference/pipelines/vision/image-preprocess.js +151 -0
  138. package/src/inference/pipelines/vision/index.js +173 -0
  139. package/src/inference/pipelines/vision/ops.js +78 -0
  140. package/src/inference/pipelines/vision/patch-embed.js +151 -0
  141. package/src/inference/test-harness.js +11 -9
  142. package/src/loader/doppler-loader.d.ts +3 -0
  143. package/src/loader/doppler-loader.js +20 -3
  144. package/src/loader/experts/expert-cache.js +6 -2
  145. package/src/loader/experts/expert-loader.js +6 -2
  146. package/src/loader/final-weights-loader.js +2 -0
  147. package/src/loader/layer-loader.js +42 -3
  148. package/src/loader/manifest-config.js +3 -1
  149. package/src/loader/shard-cache.js +3 -2
  150. package/src/loader/tensors/tensor-loader.d.ts +3 -0
  151. package/src/loader/tensors/tensor-loader.js +130 -4
  152. package/src/rules/inference/dtype.rules.json +5 -0
  153. package/src/rules/inference/kernel-path.rules.json +2 -2
  154. package/src/rules/kernels/moe.rules.mixtral.json +75 -0
  155. package/src/rules/kernels/softmax.rules.json +2 -0
  156. package/src/rules/kernels/split-qg.rules.json +6 -0
  157. package/src/rules/rule-registry.d.ts +1 -0
  158. package/src/rules/rule-registry.js +4 -0
  159. package/src/storage/downloader.js +2 -1
  160. package/src/storage/quickstart-downloader.d.ts +3 -0
  161. package/src/storage/quickstart-downloader.js +27 -30
  162. package/src/storage/shard-manager.js +4 -3
  163. package/src/tooling/conversion-config-materializer.js +3 -5
  164. package/src/tooling/node-converter.js +28 -7
  165. package/src/tooling/node-source-runtime.js +65 -5
  166. package/src/tooling/node-webgpu.js +24 -7
  167. package/src/types/model.d.ts +5 -0
  168. package/src/utils/hf-resolve-url.d.ts +16 -0
  169. package/src/utils/hf-resolve-url.js +17 -0
  170. package/src/version.js +1 -1
  171. package/tools/doppler-cli.js +6 -1
  172. package/src/tooling/node-convert.d.ts +0 -54
package/CHANGELOG.md CHANGED
@@ -6,6 +6,38 @@ This changelog is package-facing and release-oriented. Entries before `0.1.7`
6
6
  were retrofitted from package version history, release commits, and release
7
7
  docs so the `0.1.x` line has one conventional npm-visible history surface.
8
8
 
9
+ ## [0.1.8] - 2026-03-18
10
+
11
+ ### Changed
12
+
13
+ - Simplified demo to show only verified Q4K models (Gemma 3 270M, Gemma 3 1B).
14
+ Hidden Translate, Diffusion, and Embedding tabs until models are ready.
15
+ - Split demo monolith (6,680 lines) into focused modules: core, generation,
16
+ storage, translate, diagnostics, routing, utils.
17
+ - Trimmed hosted HF registry and quickstart registry to the two verified models.
18
+ - Aligned catalog, HF registry, and quickstart registry to the canonical
19
+ external support registry as single source of truth for HF revisions.
20
+ - Renamed all `.mjs` tool scripts to `.js` to match `"type": "module"` convention.
21
+ - Switched WebGPU optional dependency from `@simulatte/webgpu` to `webgpu ^0.3.8`.
22
+ - Pruned unused `verify:*` npm scripts for models no longer in the active set.
23
+ - Updated release-claim policy with newly verified models (LFM2, Qwen 3.5,
24
+ TranslateGemma variants).
25
+
26
+ ### Fixed
27
+
28
+ - Fixed Qwen 3.5 conversion configs using wrong model preset (`qwen3` instead
29
+ of `qwen3_5`), which caused support matrix check failures.
30
+ - Fixed Qwen mRoPE conflation: `ropeInterleaved` was incorrectly set from
31
+ `mropeInterleaved`, forcing adjacent-pair RoPE rotation on Qwen models.
32
+ - Fixed catalog lifecycle metadata inconsistencies: corrected `local`, `hf`,
33
+ `curated`, and `demo` fields to match actual artifact availability.
34
+ - Fixed GPU-dependent unit tests failing in non-GPU environments by adding
35
+ proper GPU readiness probes with clear skip reasons.
36
+ - Fixed kernel-ref digest registry drift (222 vs 224 entries).
37
+ - Fixed stale vendor benchmark fixture hashes after compare-engines config update.
38
+ - Removed failing and unverified models from demo visibility (TranslateGemma 4B,
39
+ EmbeddingGemma 300M with broken HF manifest, Qwen 3.5 0.8B/2B, F16 variant).
40
+
9
41
  ## [0.1.7] - 2026-03-10
10
42
 
11
43
  ### Added
package/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  Inference and training on raw WebGPU. Pure JS + WGSL.
4
4
 
5
+ **[Try the live demo](https://d4da.com)** | **[npm](https://www.npmjs.com/package/@simulatte/doppler)** | **[docs](https://github.com/clocksmith/doppler/blob/main/docs/INDEX.md)**
6
+
5
7
  ![Phase-latency comparison on one workload across models](https://raw.githubusercontent.com/clocksmith/doppler/main/benchmarks/vendors/results/compare_1b_multi-workload_favorable_phases.svg)
6
8
 
7
9
  ## Quick start
@@ -28,8 +30,6 @@ Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default.
28
30
  npm install @simulatte/doppler
29
31
  ```
30
32
 
31
- **[Live Demo](https://d4da.com)** · **[npm](https://www.npmjs.com/package/@simulatte/doppler)** · **[docs](https://github.com/clocksmith/doppler/blob/main/docs/INDEX.md)** · **[Project site](https://simulatte.world)**
32
-
33
33
  ## Why Doppler
34
34
 
35
35
  **JS → WGSL → WebGPU.** Direct JavaScript orchestration into native WebGPU kernels, avoiding ONNX runtimes, WASM blobs, and bridge layers.
@@ -46,6 +46,28 @@ Snapshot artifacts:
46
46
  - [g3-1b-p064-d064-t0-k1.compare.json](https://github.com/clocksmith/doppler/blob/main/benchmarks/vendors/fixtures/g3-1b-p064-d064-t0-k1.compare.json)
47
47
  - [lfm2-5-1-2b-p064-d064-t0-k1.compare.json](https://github.com/clocksmith/doppler/blob/main/benchmarks/vendors/fixtures/lfm2-5-1-2b-p064-d064-t0-k1.compare.json)
48
48
 
49
+ ## Supported models
50
+
51
+ All models below are verified with deterministic greedy decoding on WebGPU hardware.
52
+ Registry IDs resolve to hosted RDRR artifacts automatically.
53
+
54
+ | Model | Registry ID | Quant | Params |
55
+ | --- | --- | --- | --- |
56
+ | Gemma 3 270M IT | `gemma3-270m` | Q4K | 270M |
57
+ | Gemma 3 1B IT | `gemma3-1b` | Q4K | 1B |
58
+ | Gemma 3 1B IT (F16) | `gemma-3-1b-it-f16-af32` | F16 | 1B |
59
+ | TranslateGemma 4B IT | `translategemma-4b-it-q4k-ehf16-af32` | Q4K | 4B |
60
+ | TranslateGemma 4B 1B EN-ES | `translategemma-4b-1b-enes-q4k-ehf16-af32` | Q4K | 1B |
61
+ | EmbeddingGemma 300M | `google-embeddinggemma-300m-q4k-ehf16-af32` | Q4K | 300M |
62
+ | Qwen 3.5 0.8B | `qwen-3-5-0-8b-q4k-ehaf16` | Q4K | 0.8B |
63
+ | Qwen 3.5 2B | `qwen-3-5-2b-q4k-ehaf16` | Q4K | 2B |
64
+ | LFM2.5 1.2B Instruct | `lfm2-5-1-2b-instruct-q4k-ehf16-af32` | Q4K | 1.2B |
65
+
66
+ Additional model families (Llama 3, DeepSeek, Gemma 4 MoE, Mixtral, and others) have conversion
67
+ configs ready but are not yet cataloged. See the full
68
+ [model support matrix](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md)
69
+ for details.
70
+
49
71
  ## Under the hood
50
72
 
51
73
  - Sharded weight loading via OPFS moves multi-GB weights into VRAM without blocking the main thread.
@@ -85,10 +107,7 @@ for await (const token of doppler('Hello', { model: 'gemma3-270m' })) {
85
107
  - First-run workflow: [docs/getting-started.md](https://github.com/clocksmith/doppler/blob/main/docs/getting-started.md)
86
108
  - Runtime config contract: [docs/config.md](https://github.com/clocksmith/doppler/blob/main/docs/config.md)
87
109
  - Architecture: [docs/architecture.md](https://github.com/clocksmith/doppler/blob/main/docs/architecture.md)
88
- - Generated model support table: [docs/model-support-matrix.md](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md)
89
-
90
- Current model support is generated from the catalog and conversion registry.
91
- See [docs/model-support-matrix.md](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md) for the canonical verified, failing, and unverified status table.
110
+ - Model support matrix: [docs/model-support-matrix.md](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md)
92
111
 
93
112
  ## Environment requirements
94
113
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@simulatte/doppler",
3
- "version": "0.1.7",
3
+ "version": "0.1.9",
4
4
  "description": "Browser-native WebGPU inference engine for local intent and inference loops",
5
5
  "main": "src/index.js",
6
6
  "types": "src/index.d.ts",
@@ -29,22 +29,22 @@
29
29
  "bench:chart": "node ./benchmarks/vendors/compare-chart.js",
30
30
  "bench:chart:readme": "node ./benchmarks/vendors/compare-chart.js --preset readme-evidence",
31
31
  "bench:architecture:chart": "node ./benchmarks/vendors/generate-architecture-overview-svg.js",
32
- "ci:diffusion:contract": "node tools/ci-diffusion-contract-gates.mjs",
33
- "ci:diffusion:contract:list": "node tools/ci-diffusion-contract-gates.mjs --list",
34
- "ci:training:contract": "node tools/ci-training-contract-gates.mjs",
35
- "ci:training:contract:list": "node tools/ci-training-contract-gates.mjs --list",
36
- "training:contract:delta": "node tools/emit-training-contract-delta.mjs",
37
- "training:workloads:verify": "node tools/verify-training-workload-packs.mjs --registry tools/configs/training-workloads/registry.json",
38
- "training:report-ids:publish": "node tools/publish-training-report-ids.mjs --registry tools/configs/training-workloads/registry.json",
39
- "distill:studio:mvp": "node tools/distill-studio-mvp.mjs",
40
- "distill:quality-gate": "node tools/distill-studio-quality-gate.mjs",
41
- "p2p:observability": "node tools/p2p-delivery-observability.mjs",
42
- "p2p:drill": "node tools/p2p-resilience-drill.mjs",
32
+ "ci:diffusion:contract": "node tools/ci-diffusion-contract-gates.js",
33
+ "ci:diffusion:contract:list": "node tools/ci-diffusion-contract-gates.js --list",
34
+ "ci:training:contract": "node tools/ci-training-contract-gates.js",
35
+ "ci:training:contract:list": "node tools/ci-training-contract-gates.js --list",
36
+ "training:contract:delta": "node tools/emit-training-contract-delta.js",
37
+ "training:workloads:verify": "node tools/verify-training-workload-packs.js --registry tools/configs/training-workloads/registry.json",
38
+ "training:report-ids:publish": "node tools/publish-training-report-ids.js --registry tools/configs/training-workloads/registry.json",
39
+ "distill:studio:mvp": "node tools/distill-studio-mvp.js",
40
+ "distill:quality-gate": "node tools/distill-studio-quality-gate.js",
41
+ "p2p:observability": "node tools/p2p-delivery-observability.js",
42
+ "p2p:drill": "node tools/p2p-resilience-drill.js",
43
43
  "test": "npm run test:unit",
44
- "test:unit": "node tools/run-node-tests.mjs --suite unit",
45
- "test:gpu": "node tools/run-node-tests.mjs --suite gpu",
46
- "test:coverage": "node tools/run-node-coverage.mjs",
47
- "test:coverage:report": "node tools/run-node-coverage.mjs --no-threshold",
44
+ "test:unit": "node tools/run-node-tests.js --suite unit",
45
+ "test:gpu": "node tools/run-node-tests.js --suite gpu",
46
+ "test:coverage": "node tools/run-node-coverage.js",
47
+ "test:coverage:report": "node tools/run-node-coverage.js --no-threshold",
48
48
  "test:gpu:browser": "node tools/doppler-cli.js verify --config '{\"request\":{\"suite\":\"kernels\"},\"run\":{\"surface\":\"browser\",\"browser\":{\"opfsCache\":false,\"headless\":true,\"channel\":\"chromium\",\"browserArgs\":[\"--use-angle=swiftshader\",\"--disable-vulkan-surface\"],\"console\":true}}}'",
49
49
  "agents:verify": "node tools/verify-agent-parity.js",
50
50
  "agents:freshness": "node tools/verify-agent-freshness.js",
@@ -74,9 +74,11 @@
74
74
  "ci:catalog:check": "npm run registry:sync:scripts:check && npm run support:matrix:check && npm run registry:hf:check",
75
75
  "external:rdrr:index": "node tools/sync-external-rdrr-index.js",
76
76
  "external:rdrr:index:check": "node tools/sync-external-rdrr-index.js --check",
77
- "verify:embeddinggemma-300m": "node tools/run-registry-verify.js embeddinggemma-300m",
78
- "verify:gemma-3-1b-it-f16": "node tools/run-registry-verify.js gemma-3-1b-it-f16",
79
- "verify:gemma-3-1b-it-f16-af32": "node tools/run-registry-verify.js gemma-3-1b-it-f16-af32",
77
+ "external:support:sync": "node tools/sync-external-support-registry.js",
78
+ "external:support:promote": "node tools/sync-external-support-registry.js --source-support-file models/catalog.json",
79
+ "external:support:check": "node tools/sync-external-support-registry.js --check",
80
+ "catalog:sync:external": "node tools/sync-catalog-from-external-support.js",
81
+ "catalog:sync:external:check": "node tools/sync-catalog-from-external-support.js --check",
80
82
  "verify:gemma-3-1b-it-q4k-ehf16-af32": "node tools/run-registry-verify.js gemma-3-1b-it-q4k-ehf16-af32",
81
83
  "verify:gemma-3-1b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js gemma-3-1b-it-wq4k-ef16-hf16",
82
84
  "verify:gemma-3-270m-it-q4k-ehf16-af32": "node tools/run-registry-verify.js gemma-3-270m-it-q4k-ehf16-af32",
@@ -84,25 +86,9 @@
84
86
  "verify:gemma-3-270m-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js gemma-3-270m-it-wq4k-ef16-hf16",
85
87
  "verify:gemma-3-270m-it-wq4k-ef16-hf16-f32": "node tools/run-registry-verify.js gemma-3-270m-it-wq4k-ef16-hf16-f32",
86
88
  "verify:gemma3-1b": "node tools/run-registry-verify.js gemma3-1b",
87
- "verify:gemma3-1b-f16": "node tools/run-registry-verify.js gemma3-1b-f16",
88
89
  "verify:gemma3-270m": "node tools/run-registry-verify.js gemma3-270m",
89
- "verify:google-embeddinggemma-300m": "node tools/run-registry-verify.js google-embeddinggemma-300m",
90
- "verify:google-embeddinggemma-300m-q4k-ehf16-af32": "node tools/run-registry-verify.js google-embeddinggemma-300m-q4k-ehf16-af32",
91
- "verify:google-embeddinggemma-300m-wq4k-ef16": "node tools/run-registry-verify.js google-embeddinggemma-300m-wq4k-ef16",
92
90
  "verify:google-gemma-3-1b-it": "node tools/run-registry-verify.js google-gemma-3-1b-it",
93
- "verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it",
94
- "verify:google-translategemma-4b-it": "node tools/run-registry-verify.js google-translategemma-4b-it",
95
- "verify:qwen-3-5-0-8b": "node tools/run-registry-verify.js qwen-3-5-0-8b",
96
- "verify:qwen-3-5-0-8b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-0-8b-wq4k-ef16-hf16-f16",
97
- "verify:qwen-3-5-2b": "node tools/run-registry-verify.js qwen-3-5-2b",
98
- "verify:qwen-3-5-2b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-2b-wq4k-ef16-hf16-f16",
99
- "verify:qwen-qwen3.5-0.8b": "node tools/run-registry-verify.js qwen-qwen3.5-0.8b",
100
- "verify:qwen-qwen3.5-2b": "node tools/run-registry-verify.js qwen-qwen3.5-2b",
101
- "verify:qwen3-0.8b": "node tools/run-registry-verify.js qwen3-0.8b",
102
- "verify:qwen3-2b": "node tools/run-registry-verify.js qwen3-2b",
103
- "verify:translategemma": "node tools/run-registry-verify.js translategemma",
104
- "verify:translategemma-4b": "node tools/run-registry-verify.js translategemma-4b",
105
- "verify:translategemma-4b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js translategemma-4b-it-wq4k-ef16-hf16"
91
+ "verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it"
106
92
  },
107
93
  "exports": {
108
94
  ".": {
@@ -170,12 +156,13 @@
170
156
  "tools/convert-safetensors-node.js"
171
157
  ],
172
158
  "devDependencies": {
173
- "@huggingface/transformers": "^3.8.1",
159
+ "@huggingface/transformers": "4.0.0-next.8",
174
160
  "jest": "^30.2.0",
175
161
  "onnxruntime-web": "^1.24.1",
176
162
  "playwright": "^1.58.2"
177
163
  },
178
164
  "optionalDependencies": {
179
- "@simulatte/webgpu": "0.1.x"
165
+ "@simulatte/webgpu": "0.x.x",
166
+ "webgpu": "^0.3.8"
180
167
  }
181
168
  }
@@ -408,6 +408,7 @@ export async function convertModel(files, options = {}) {
408
408
  // Parse based on format
409
409
  let modelInfo;
410
410
  let config = null;
411
+ let generationConfig = null;
411
412
  let tokenizerJson = null;
412
413
  let tokenizerConfig = null;
413
414
  let tokenizerModel = null;
@@ -455,6 +456,10 @@ export async function convertModel(files, options = {}) {
455
456
  tokenizerConfig = await parseTokenizerConfigJson(auxiliary.tokenizerConfig);
456
457
  modelInfo.tokenizerConfig = tokenizerConfig;
457
458
  }
459
+ if (auxiliary.generationConfig) {
460
+ generationConfig = await parseConfigJson(auxiliary.generationConfig);
461
+ modelInfo.generationConfig = generationConfig;
462
+ }
458
463
  if (auxiliary.tokenizerModel) {
459
464
  const source = normalizeTensorSource(auxiliary.tokenizerModel);
460
465
  tokenizerModel = await source.readRange(0, source.size);
@@ -165,6 +165,12 @@ function createModelHandle(pipeline, resolved) {
165
165
  prefillKV(prompt, options = {}) {
166
166
  return pipeline.prefillKVOnly(prompt, options);
167
167
  },
168
+ prefillWithLogits(prompt, options = {}) {
169
+ return pipeline.prefillWithLogits(prompt, options);
170
+ },
171
+ decodeStepLogits(currentIds, options = {}) {
172
+ return pipeline.decodeStepLogits(currentIds, options);
173
+ },
168
174
  generateWithPrefixKV(prefix, prompt, options = {}) {
169
175
  return pipeline.generateWithPrefixKV(prefix, prompt, options);
170
176
  },
@@ -2,6 +2,7 @@ import type { RDRRManifest } from '../formats/rdrr/index.js';
2
2
  import type { GenerateOptions, KVCacheSnapshot } from '../generation/index.js';
3
3
  import type { ChatMessage } from '../inference/pipelines/text/chat-format.js';
4
4
  import type { LoRAManifest } from '../adapters/lora-loader.js';
5
+ import type { LogitsStepResult, PrefillResult } from '../inference/pipelines/text/types.d.ts';
5
6
 
6
7
  export interface DopplerLoadProgress {
7
8
  phase: 'resolve' | 'manifest' | 'load' | 'ready';
@@ -43,6 +44,8 @@ export interface DopplerModel {
43
44
  readonly deviceInfo: Record<string, unknown> | null;
44
45
  readonly advanced: {
45
46
  prefillKV(prompt: string, options?: GenerateOptions): Promise<KVCacheSnapshot>;
47
+ prefillWithLogits(prompt: string | ChatMessage[] | { messages: ChatMessage[] }, options?: GenerateOptions): Promise<PrefillResult>;
48
+ decodeStepLogits(currentIds: number[], options?: GenerateOptions): Promise<LogitsStepResult>;
46
49
  generateWithPrefixKV(
47
50
  prefix: KVCacheSnapshot,
48
51
  prompt: string,
@@ -199,6 +199,12 @@ function createModelHandle(pipeline, resolved) {
199
199
  prefillKV(prompt, options = {}) {
200
200
  return pipeline.prefillKVOnly(prompt, options);
201
201
  },
202
+ prefillWithLogits(prompt, options = {}) {
203
+ return pipeline.prefillWithLogits(prompt, options);
204
+ },
205
+ decodeStepLogits(currentIds, options = {}) {
206
+ return pipeline.decodeStepLogits(currentIds, options);
207
+ },
202
208
  generateWithPrefixKV(prefix, prompt, options = {}) {
203
209
  return pipeline.generateWithPrefixKV(prefix, prompt, options);
204
210
  },
@@ -272,6 +278,9 @@ export function doppler(prompt, options) {
272
278
 
273
279
  doppler.load = load;
274
280
  doppler.text = async function text(prompt, options) {
281
+ if (!options || typeof options !== 'object' || options.model == null) {
282
+ throw new Error('doppler.text() requires options.model.');
283
+ }
275
284
  assertNoLoadAffectingOptions('doppler.text()', options);
276
285
  return collectText(doppler(prompt, options));
277
286
  };
@@ -299,14 +308,14 @@ doppler.evict = async function evict(model) {
299
308
  if (!cached) {
300
309
  return false;
301
310
  }
302
- convenienceModelCache.delete(resolved.modelId);
303
311
  await cached.unload();
312
+ convenienceModelCache.delete(resolved.modelId);
304
313
  return true;
305
314
  };
306
315
  doppler.evictAll = async function evictAll() {
307
316
  const cached = [...convenienceModelCache.values()];
308
317
  convenienceModelCache.clear();
309
- await Promise.all(cached.map((entry) => entry.unload()));
318
+ await Promise.allSettled(cached.map((entry) => entry.unload()));
310
319
  };
311
320
  doppler.listModels = async function listModels() {
312
321
  const models = await listQuickstartModels();
@@ -1,4 +1,5 @@
1
1
  import { getCdnBasePath } from '../storage/download-types.js';
2
+ import { buildHfResolveBaseUrl } from '../utils/hf-resolve-url.js';
2
3
  import { loadJson } from '../utils/load-json.js';
3
4
 
4
5
  let registryPromise = null;
@@ -80,9 +81,6 @@ export function buildQuickstartModelBaseUrl(entry, options = {}) {
80
81
  }
81
82
  const cdnBasePath = typeof options.cdnBasePath === 'string' && options.cdnBasePath.length > 0
82
83
  ? options.cdnBasePath
83
- : (getCdnBasePath() || 'https://huggingface.co');
84
- const revision = entry.hf.revision || 'main';
85
- const base = cdnBasePath.replace(/\/$/, '');
86
- const path = entry.hf.path.replace(/^\/+/, '');
87
- return `${base}/${entry.hf.repoId}/resolve/${revision}/${path}`;
84
+ : getCdnBasePath();
85
+ return buildHfResolveBaseUrl(entry.hf, { cdnBasePath });
88
86
  }
@@ -16,7 +16,7 @@
16
16
  ],
17
17
  "hf": {
18
18
  "repoId": "Clocksmith/rdrr",
19
- "revision": "cd6c12be0e83e92d6dbd92598a0aa94391ec7e94",
19
+ "revision": "ca6f0dbdf3882d3893a65cf48f2bb6f1520df162",
20
20
  "path": "models/gemma-3-270m-it-q4k-ehf16-af32"
21
21
  }
22
22
  },
@@ -32,7 +32,7 @@
32
32
  ],
33
33
  "hf": {
34
34
  "repoId": "Clocksmith/rdrr",
35
- "revision": "b23aca921ea11729d6f34b9484555968a5ab0e42",
35
+ "revision": "7e79c466d54455bd370c81685956ea9abae0fd30",
36
36
  "path": "models/google-embeddinggemma-300m-q4k-ehf16-af32"
37
37
  }
38
38
  }
@@ -134,6 +134,11 @@ export function getKernelPathStrict(): boolean;
134
134
  */
135
135
  export function isKernelPathFusedQ4K(path?: KernelPathSchema | null): boolean;
136
136
 
137
+ /**
138
+ * Check if a kernel path requires matmul weights to stay in F32.
139
+ */
140
+ export function kernelPathRequiresF32MatmulWeights(path?: KernelPathSchema | null): boolean;
141
+
137
142
  /**
138
143
  * Check if the active kernel path uses fused Q4K matmul.
139
144
  */
@@ -503,6 +503,19 @@ export function isKernelPathFusedQ4K(path = undefined) {
503
503
  return kernelSteps.some((step) => step.kernel.includes('fused_matmul_q4'));
504
504
  }
505
505
 
506
+ export function kernelPathRequiresF32MatmulWeights(path = undefined) {
507
+ const lookupPath = path === undefined ? activeKernelPath : path;
508
+ if (!lookupPath) return false;
509
+ const kernelSteps = [
510
+ ...(lookupPath.decode?.steps ?? []),
511
+ ...(lookupPath.prefill?.steps ?? []),
512
+ ...(lookupPath.preLayer ?? []),
513
+ ...(lookupPath.postLayer ?? []),
514
+ ...(lookupPath.layerOverrides?.flatMap((override) => override.steps) ?? []),
515
+ ];
516
+ return kernelSteps.some((step) => normalizeKernelFile(step.kernel) === 'matmul_f32.wgsl');
517
+ }
518
+
506
519
  export function isActiveKernelPathFusedQ4K() {
507
520
  return isKernelPathFusedQ4K(activeKernelPath);
508
521
  }
@@ -59,8 +59,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
59
59
  "cross_entropy_loss.wgsl#main": "5a48087bdec94184432c90ce5b345e1eadbdfcb13b9793ecee8052bc7392239c",
60
60
  "depthwise_conv2d_f16.wgsl#main": "f7f093a7e6623ed17a675bac729149e94718aece916416966eaf03c1d6939f2a",
61
61
  "depthwise_conv2d.wgsl#main": "cf14cb40d282ad4d4fab160109b97eaeaf12aab62579b73324ac485ac75155b0",
62
- "dequant_f16_out_vec4.wgsl#main_vec4": "61c20e6c71c1c8421b4ec202dbd26292a6300587bd44c314f2a6c6d9d9442c3a",
63
- "dequant_f16_out.wgsl#main": "94d61843d56f9a3bbc6b7c2b95dc6ecbba3f6a262b2c4086a076f69a8c38ccae",
62
+ "dequant_f16_out_vec4.wgsl#main_vec4": "ff729cc220ba5425e17c4c537a9993f25b6541046b6c2553d2a43a8b40ed2ce9",
63
+ "dequant_f16_out.wgsl#main": "caed21e420cbace78d3203548962a5ec3fc36980f153ae775f6a91a31af97d3a",
64
64
  "dequant_f16_rowwise.wgsl#main": "f5bf7cef950b52d65cee6121dbaa176244d3221045b3b6386b3be47f23ce17dc",
65
65
  "dequant_f32_rowwise.wgsl#main": "e73606e1b47e1191203a210bececa8a597bcab8bcc535146718afa6a021cab0d",
66
66
  "dequant_mxfp4_expert_f16.wgsl#main_expert": "96af52551ac40e1b86121a528a3ffaba835c5d0419e06407fed80353d46b17e1",
@@ -69,10 +69,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
69
69
  "dequant_mxfp4.wgsl#main": "885a5f752b684c6ca0bb10e3a1846a396eef14d2158e8c8ad31bd1dd4c74b9ef",
70
70
  "dequant_q6k.wgsl#main": "be0aed027932d8b7dd1e92d0090ced39e4df8be724acf290f52db0004be9a35e",
71
71
  "dequant_q8_0.wgsl#main": "ff5f800da963b0502a9ffab723cbcac0bbb5eb9a02898afc2aba2db215a58da7",
72
- "dequant_shared_vec4.wgsl#main_vec4": "aaf330e814cbd4c2838f07639025e414542ec2d0ddbb34913d7f10d28253baf2",
73
- "dequant_shared.wgsl#main": "d83d83a5d2bbcd29a06f67fb41496edaa45216438ff0dd177e7ec33af08104a4",
74
- "dequant_subgroup.wgsl#main": "6b6296d8b060a9b16e7b3c2bee99dbbcbbf9cc3d557f1e2b000f7f9bfc020309",
75
- "dequant_subgroup.wgsl#main_vec4": "d9de6ea645f558bcdf6db622fe146710e143fc4c2bb56f130342d62858eb456c",
72
+ "dequant_shared_vec4.wgsl#main_vec4": "24820dae36f6669a33f22b428df03791d9c700944c5ae33bd8c88e8cbeffd103",
73
+ "dequant_shared.wgsl#main": "e21284b5b70d4ac88d7c151760e451c2006705f1ea617b3db7f89994af4cc7df",
74
+ "dequant_subgroup.wgsl#main": "cbc2d86a5a2234b4c1691d5df02279263be7a66a1d4a2ad4aec1845a26baa9c9",
75
+ "dequant_subgroup.wgsl#main_vec4": "9e044bd0f44e73872dd8d8aa467e802c5471de86a2044de2cf8efc726e5a1182",
76
76
  "energy_eval_f16.wgsl#main": "09223ae193593f3555866a3acfe76ca35442ef4f3967cae376bdcc211f3054b3",
77
77
  "energy_eval.wgsl#main": "e10d9572397ebece5275aecd907cba5970f6a5c3744dd8b982677efb8982bdd2",
78
78
  "energy_quintel_grad_f16.wgsl#main": "eb87ed8592b46b0a4d866c245b664cadb2bca016f72419e763402a6a721c4951",
@@ -142,9 +142,9 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
142
142
  "matmul_gemv_subgroup_f16a.wgsl#main_multicol": "c8e86ecbbefa27a3b7366af676d89a992c2e951329cdf19abb57b9c90144379e",
143
143
  "matmul_gemv_subgroup_f16a.wgsl#main_vec4": "f227a403cdf9717dd68224c9ea55708ffe14c618d8146f5d48b42af0f253df29",
144
144
  "matmul_gemv_subgroup_f16a.wgsl#main_vec4_cols8": "9e7aba97a6cf199b3f574166e295ea051ebd59e308b5f6f2ce5a4de2d04963ce",
145
- "matmul_gemv_subgroup.wgsl#main": "ff526a7e70d6933f5c7a59aee476cc9c5cf123903222ed2e74f555e86e1ac757",
146
- "matmul_gemv_subgroup.wgsl#main_multicol": "96c38c15e6fed0d7efdc5cd094db5843a8e8ddfe01eee3bc7322fa555dacf3d0",
147
- "matmul_gemv_subgroup.wgsl#main_vec4": "3cee3bed453b40c5564a751d2a917649e10ad52f5268e77cbfecfcee34780457",
145
+ "matmul_gemv_subgroup.wgsl#main": "ac84b6dc88fe077dc885d8547e55526bec2f792074dd8746f907ce4a7c342028",
146
+ "matmul_gemv_subgroup.wgsl#main_multicol": "6631ed8936b6316499e1e1493915dc02a2e137d4f4d2650b62ce63e8805067f1",
147
+ "matmul_gemv_subgroup.wgsl#main_vec4": "de04e5670494401dd975915e77a603e07144aa1c928c47270afe7a806428cbfd",
148
148
  "matmul_gemv.wgsl#main": "dc892efc87edc6d5ddaf191b86c1cc41a603352a332023aa0b1fe55d166673d0",
149
149
  "modulate_f16.wgsl#main": "44a98cda1cc7a3575788f865173b9890be792c94e852ac8311b6b8ffbdc1438d",
150
150
  "modulate.wgsl#main": "dfe88a35b94752573199c16b3d8aecd4e8e7da57dc88d7b342aa61e0122e71ec",
@@ -182,18 +182,18 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
182
182
  "rope.wgsl#rope_ntk_scaled": "818f89865a3d1d6f2d49f671ac882d0fde9709702160a1ae8d9a8ef113afb511",
183
183
  "rope.wgsl#rope_qk": "3d773c8b8c400142edc8a4111afb04a2bf75bdb109b2d41cbe5afdb72a959772",
184
184
  "rope.wgsl#rope_yarn": "cb00e1cf87fac198dcf0fb0d4e2d5f6f99d2fed6dff0a089a96bb459917851d2",
185
- "sample_f16.wgsl#argmax": "30b9f199b49352e5aff91b7aa8016edb423ce33f77481c3a7bc184251856fb27",
186
- "sample_f16.wgsl#argmax_reduce": "a3ca27fc50b10c36c1676bdd5dbfe5edc67850cdd5c1af7a1d3ad70f830dd8a7",
187
- "sample_f16.wgsl#find_topk_phase1": "24e47e5ced28af802959e350ff0a6eec6b9a26f89fb38e222990eeaffb16bd36",
188
- "sample_f16.wgsl#find_topk_phase2": "f40b76d7e1e5606ff6a1e369cf285cb3e802047cfd81377458d4ed0248fcb6a5",
189
- "sample_f16.wgsl#sample_single_pass": "f3551288786f0a47f5222e07ab7f54a646e422c2493a62b8074202601eb06ddb",
190
- "sample_f16.wgsl#softmax_and_sample": "f1201f01af94c5536463be6439fbe7ed166fcb9494ea081f7d393be3125a7efd",
191
- "sample.wgsl#argmax": "bde072faa1ce26e19b6fa1f4ba24c4cd2465909c77a60b175bde1d250113a3b4",
192
- "sample.wgsl#argmax_reduce": "b7fa3aea69d9888b9385b380dc70416350d375564b95688592768cd81560b214",
193
- "sample.wgsl#find_topk_phase1": "d5fbe00d9ad0c89bd3af72a06f7c964c1b6c2634974ed9a76397fd441422827c",
194
- "sample.wgsl#find_topk_phase2": "940b216e605d22096da5aca65950a8030866fc5a39e7fdf484d69a832de1b63a",
195
- "sample.wgsl#sample_single_pass": "4412357e84113ee2f1bc0dc8bf89e314c2ab482c89c14ca016ea9949d16a9d0c",
196
- "sample.wgsl#softmax_and_sample": "7172c60e76430fbe130e530e3564b569b45eccf193987b32d6f52bd6bbcc9f08",
185
+ "sample_f16.wgsl#argmax": "7d7188081953a79b6d71bdf783d75df97a78401e2fe62e6d356cc44756a42c41",
186
+ "sample_f16.wgsl#argmax_reduce": "c0284bc9a50d25e215b21cb8c70e24dae3cb32b578691c2b6df5f3ede68a67c7",
187
+ "sample_f16.wgsl#find_topk_phase1": "8abd0a978d87adb7cce7337bb1b045a151768724f57802ad060df1dad735cff6",
188
+ "sample_f16.wgsl#find_topk_phase2": "ea7684cf0cc6014d39ac821edf9c89e140552b5009a72e6e91b00f8816678568",
189
+ "sample_f16.wgsl#sample_single_pass": "1cd8f0babc5c824b455080d30028109adfe52ce6f79009fbb986fde0d377fcb5",
190
+ "sample_f16.wgsl#softmax_and_sample": "c1d58cb952b704596d7ab6a2aa32b911a6e869e05b42adac3e4a19d898aa17ae",
191
+ "sample.wgsl#argmax": "f68b9cfdd3265a5cc52b216e549b629f1f8209e5aaa2f788142fa03db4c2d538",
192
+ "sample.wgsl#argmax_reduce": "96f8dd75a13db82e1928914e1f40ff1b9e03563eb5f8e3708b230f453b1fc160",
193
+ "sample.wgsl#find_topk_phase1": "736222d54f805b2791ebb803e9574fca93ab2b25fad0a64245f782499ce2d10c",
194
+ "sample.wgsl#find_topk_phase2": "a590107f0b7603b4b9624140dea1b436362062f63d64ed6d77e1628578796e77",
195
+ "sample.wgsl#sample_single_pass": "91c5c30bbc3e034457c1521c1ad576ce798c0868a1fe16e02be5f92706614096",
196
+ "sample.wgsl#softmax_and_sample": "132d67a1393702c81ee896975447f14f9a6a2589b25125d28401bc8ca47a253d",
197
197
  "sana_linear_attention_apply_f16.wgsl#main": "4a7426ce67eccfb70956feeae84275f4d3cc586c50e8442c07eb69993b378ab5",
198
198
  "sana_linear_attention_apply.wgsl#main": "5f69e0bc1d9e2df5a61e13bd819313c8f7ff5dfc4b7d78e71d5152dc23b6a86c",
199
199
  "sana_linear_attention_summary_f16.wgsl#main": "3abb736ead999485b5dac9c6b534143b464cfd0b5300c5e03c56cec03c8fa48e",
@@ -213,6 +213,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
213
213
  "softmax.wgsl#main": "45c5876806b442222d7e190e595f55a0079bae82e07d37586996c1a63790bb7a",
214
214
  "softmax.wgsl#softmax_online": "6c62601ba2f88f7de9dacf026cc2357168df47d009fd108736655b645217cd0a",
215
215
  "softmax.wgsl#softmax_small": "ad75f10e0a3caadd278130504e7d0e4e1b2f0621f8bd390abae5d973e301e47b",
216
+ "split_qg_f16.wgsl#main": "1d19e9d9900a270a3a1bd407347908f891ba98a1cdcee35ae932257a9a1c72cb",
217
+ "split_qg.wgsl#main": "64a8aa855c6246675bebdeab0258dc0e649e3986ef2bba4572d2d7dc1af902f4",
216
218
  "split_qkv_f16.wgsl#main": "bd1a92fcd9382bdcdf00bcc59248a12489444904a4f10845a381f177b6ad649f",
217
219
  "split_qkv.wgsl#main": "bc7c95a47322edc11fec19105efd3774c2adfed151530c849909d03af7503e4c",
218
220
  "topk_f16_weights.wgsl#softmax_topk": "863559c28eb46a2b4dc16f21a19aca2424a5d68fc3430b29461bebdd7ec8f625",
@@ -0,0 +1,46 @@
1
+ {
2
+ "id": "mixtral-moe-v1",
3
+ "description": "Deterministic Mixtral-style MoE kernel routing profile for Q4K/F16 expert weights with standard gate/up/down FFN.",
4
+ "router": {
5
+ "topk": [
6
+ {
7
+ "match": { "hasF16": true, "hasSubgroups": true, "routerDtype": "f32" },
8
+ "value": "softmax_topk_f32_subgroup"
9
+ },
10
+ {
11
+ "match": { "hasF16": true, "routerDtype": "f32" },
12
+ "value": "softmax_topk_f32"
13
+ },
14
+ {
15
+ "match": {},
16
+ "value": "softmax_topk_f32"
17
+ }
18
+ ]
19
+ },
20
+ "dequant": {
21
+ "q4kExpert": [
22
+ {
23
+ "match": { "hasF16": true, "hasSubgroups": true, "outputDtype": "f32" },
24
+ "value": "q4k_expert_dequant_f32_subgroup"
25
+ },
26
+ {
27
+ "match": { "hasF16": true, "outputDtype": "f16" },
28
+ "value": "q4k_expert_dequant_f16"
29
+ },
30
+ {
31
+ "match": {},
32
+ "value": "q4k_expert_dequant_f32"
33
+ }
34
+ ],
35
+ "f16Expert": [
36
+ {
37
+ "match": { "hasF16": true, "outputDtype": "f16" },
38
+ "value": "f16_expert_passthrough"
39
+ },
40
+ {
41
+ "match": {},
42
+ "value": "f16_expert_upcast_f32"
43
+ }
44
+ ]
45
+ }
46
+ }
@@ -4322,6 +4322,80 @@
4322
4322
  }
4323
4323
  }
4324
4324
  },
4325
+ "split_qg": {
4326
+ "description": "De-interleave Q and Gate projections from q_proj output for attentionOutputGate models",
4327
+ "baseBindings": [
4328
+ {
4329
+ "index": 0,
4330
+ "name": "uniforms",
4331
+ "type": "uniform"
4332
+ },
4333
+ {
4334
+ "index": 1,
4335
+ "name": "qg_interleaved",
4336
+ "type": "read-only-storage"
4337
+ },
4338
+ {
4339
+ "index": 2,
4340
+ "name": "Q",
4341
+ "type": "storage"
4342
+ },
4343
+ {
4344
+ "index": 3,
4345
+ "name": "G",
4346
+ "type": "storage"
4347
+ }
4348
+ ],
4349
+ "baseUniforms": {
4350
+ "size": 16,
4351
+ "fields": [
4352
+ {
4353
+ "name": "num_tokens",
4354
+ "type": "u32",
4355
+ "offset": 0
4356
+ },
4357
+ {
4358
+ "name": "num_heads",
4359
+ "type": "u32",
4360
+ "offset": 4
4361
+ },
4362
+ {
4363
+ "name": "head_dim",
4364
+ "type": "u32",
4365
+ "offset": 8
4366
+ },
4367
+ {
4368
+ "name": "_pad",
4369
+ "type": "u32",
4370
+ "offset": 12
4371
+ }
4372
+ ]
4373
+ },
4374
+ "variants": {
4375
+ "default": {
4376
+ "wgsl": "split_qg.wgsl",
4377
+ "entryPoint": "main",
4378
+ "workgroup": [
4379
+ 256,
4380
+ 1,
4381
+ 1
4382
+ ],
4383
+ "requires": []
4384
+ },
4385
+ "f16": {
4386
+ "wgsl": "split_qg_f16.wgsl",
4387
+ "entryPoint": "main",
4388
+ "workgroup": [
4389
+ 256,
4390
+ 1,
4391
+ 1
4392
+ ],
4393
+ "requires": [
4394
+ "shader-f16"
4395
+ ]
4396
+ }
4397
+ }
4398
+ },
4325
4399
  "sample": {
4326
4400
  "description": "GPU-side sampling kernels",
4327
4401
  "baseBindings": [
@@ -12,6 +12,7 @@ const transformerPreset = await loadJson('./presets/models/transformer.json', im
12
12
  const diffusionPreset = await loadJson('./presets/models/diffusion.json', import.meta.url, 'Failed to load preset');
13
13
  const gemma2Preset = await loadJson('./presets/models/gemma2.json', import.meta.url, 'Failed to load preset');
14
14
  const gemma3Preset = await loadJson('./presets/models/gemma3.json', import.meta.url, 'Failed to load preset');
15
+ const gemma4Preset = await loadJson('./presets/models/gemma4.json', import.meta.url, 'Failed to load preset');
15
16
  const translateGemmaPreset = await loadJson('./presets/models/translategemma.json', import.meta.url, 'Failed to load preset');
16
17
  const embeddingGemmaPreset = await loadJson('./presets/models/embeddinggemma.json', import.meta.url, 'Failed to load preset');
17
18
  const functiongemmaPreset = await loadJson('./presets/models/functiongemma.json', import.meta.url, 'Failed to load preset');
@@ -23,6 +24,8 @@ const mambaPreset = await loadJson('./presets/models/mamba.json', import.meta.ur
23
24
  const modernbertPreset = await loadJson('./presets/models/modernbert.json', import.meta.url, 'Failed to load preset');
24
25
  const lfm2Preset = await loadJson('./presets/models/lfm2.json', import.meta.url, 'Failed to load preset');
25
26
  const qwen3Preset = await loadJson('./presets/models/qwen3.json', import.meta.url, 'Failed to load preset');
27
+ const qwen35Preset = await loadJson('./presets/models/qwen3_5.json', import.meta.url, 'Failed to load preset');
28
+ const qwen3VlPreset = await loadJson('./presets/models/qwen3_vl.json', import.meta.url, 'Failed to load preset');
26
29
  const kimiK2Preset = await loadJson('./presets/models/kimi-k2.json', import.meta.url, 'Failed to load preset');
27
30
  const gptOssPreset = await loadJson('./presets/models/gpt-oss.json', import.meta.url, 'Failed to load preset');
28
31
 
@@ -35,6 +38,7 @@ export const PRESET_REGISTRY = {
35
38
  transformer: transformerPreset,
36
39
  gemma2: gemma2Preset,
37
40
  gemma3: gemma3Preset,
41
+ gemma4: gemma4Preset,
38
42
  translategemma: translateGemmaPreset,
39
43
  embeddinggemma: embeddingGemmaPreset,
40
44
  functiongemma: functiongemmaPreset,
@@ -46,6 +50,8 @@ export const PRESET_REGISTRY = {
46
50
  modernbert: modernbertPreset,
47
51
  lfm2: lfm2Preset,
48
52
  qwen3: qwen3Preset,
53
+ qwen3_5: qwen35Preset,
54
+ qwen3_vl: qwen3VlPreset,
49
55
  kimi_k2: kimiK2Preset,
50
56
  gpt_oss: gptOssPreset,
51
57
  };
@@ -93,10 +99,13 @@ export const PRESET_DETECTION_ORDER = [
93
99
  'diffusion',
94
100
  // Model families (check more specific patterns first)
95
101
  'gemma2',
102
+ 'gemma4',
96
103
  'translategemma',
97
104
  'gemma3',
98
105
  'llama3',
99
106
  'lfm2',
107
+ 'qwen3_vl',
108
+ 'qwen3_5',
100
109
  'qwen3',
101
110
  'kimi_k2',
102
111
  'gpt_oss',
@@ -171,6 +171,13 @@ export function buildMergeContractArtifact() {
171
171
  `configA=${isolatedConfigA.runtime.inference.compute.activationDtype}, configB=${isolatedConfigB.runtime.inference.compute.activationDtype}`,
172
172
  'actual'
173
173
  );
174
+ recordCheck(
175
+ checks,
176
+ 'runtime.schema.storage.opfs_sync_access_handle_defaults_off',
177
+ isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle === false,
178
+ `value=${String(isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle)}`,
179
+ 'actual'
180
+ );
174
181
 
175
182
  const calibrateConfig = createDopplerConfig({
176
183
  runtime: {