@simulatte/doppler 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +25 -6
- package/package.json +25 -38
- package/src/browser/browser-converter.js +5 -0
- package/src/client/doppler-api.browser.js +6 -0
- package/src/client/doppler-api.d.ts +3 -0
- package/src/client/doppler-api.js +11 -2
- package/src/client/doppler-registry.js +3 -5
- package/src/client/doppler-registry.json +2 -2
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +13 -0
- package/src/config/kernels/kernel-ref-digests.js +23 -21
- package/src/config/kernels/moe/mixtral.paths.json +46 -0
- package/src/config/kernels/registry.json +74 -0
- package/src/config/loader.js +9 -0
- package/src/config/merge-contract-check.js +7 -0
- package/src/config/platforms/loader.js +3 -1
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-nosubgroups.json +16 -16
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-online.json +8 -8
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-small-attn.json +61 -0
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +21 -0
- package/src/config/presets/models/gemma2.json +2 -1
- package/src/config/presets/models/gemma3.json +4 -1
- package/src/config/presets/models/gemma4.json +61 -0
- package/src/config/presets/models/granite-docling.json +70 -0
- package/src/config/presets/models/lfm2.json +6 -1
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/models/qwen3_vl.json +40 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +2 -1
- package/src/config/presets/runtime/experiments/verify/lfm2-verify.json +46 -0
- package/src/config/presets/runtime/experiments/verify/translategemma-verify.json +39 -0
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/modes/trace-layers.json +1 -0
- package/src/config/presets/runtime/tiers/gemma4-16gb.json +69 -0
- package/src/config/presets/runtime/tiers/gemma4-24gb.json +66 -0
- package/src/config/presets/runtime/tiers/gemma4-32gb.json +66 -0
- package/src/config/runtime.js +3 -0
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +40 -0
- package/src/config/schema/debug.schema.js +28 -0
- package/src/config/schema/index.js +2 -0
- package/src/config/schema/inference-defaults.schema.js +1 -1
- package/src/config/schema/kernel-path.schema.d.ts +1 -0
- package/src/config/schema/manifest.schema.d.ts +1 -1
- package/src/config/schema/manifest.schema.js +1 -1
- package/src/config/schema/memory-limits.schema.js +2 -2
- package/src/config/schema/storage.schema.js +2 -2
- package/src/converter/conversion-plan.js +11 -3
- package/src/converter/core.js +19 -8
- package/src/converter/manifest-inference.js +12 -22
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +5 -1
- package/src/converter/quantizer.d.ts +5 -0
- package/src/converter/quantizer.js +34 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/distribution/shard-delivery.js +40 -1
- package/src/formats/rdrr/classification.js +32 -0
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +14 -1
- package/src/gpu/kernel-runtime.js +4 -2
- package/src/gpu/kernels/attention.js +2 -1
- package/src/gpu/kernels/dequant_f16_out.wgsl +4 -2
- package/src/gpu/kernels/dequant_f16_out_vec4.wgsl +5 -2
- package/src/gpu/kernels/dequant_shared.wgsl +4 -2
- package/src/gpu/kernels/dequant_shared_vec4.wgsl +4 -2
- package/src/gpu/kernels/dequant_subgroup.wgsl +6 -2
- package/src/gpu/kernels/gated-short-conv.d.ts +63 -0
- package/src/gpu/kernels/gated-short-conv.js +284 -0
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/linear-attention-core.js +37 -17
- package/src/gpu/kernels/matmul-selection.js +48 -4
- package/src/gpu/kernels/matmul.d.ts +5 -0
- package/src/gpu/kernels/matmul.js +71 -2
- package/src/gpu/kernels/matmul_gemv_subgroup.wgsl +77 -79
- package/src/gpu/kernels/rmsnorm.js +9 -2
- package/src/gpu/kernels/sample.js +1 -3
- package/src/gpu/kernels/sample.wgsl +39 -9
- package/src/gpu/kernels/sample_f16.wgsl +38 -8
- package/src/gpu/kernels/shader-cache.js +9 -4
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/inference/browser-harness.d.ts +2 -0
- package/src/inference/browser-harness.js +20 -1
- package/src/inference/kv-cache/base.js +3 -10
- package/src/inference/pipelines/diffusion/helpers.js +3 -0
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +10 -3
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +13 -1
- package/src/inference/pipelines/text/attention/projections.js +54 -13
- package/src/inference/pipelines/text/attention/record.js +16 -6
- package/src/inference/pipelines/text/attention/run.js +59 -6
- package/src/inference/pipelines/text/config.d.ts +1 -0
- package/src/inference/pipelines/text/config.js +46 -4
- package/src/inference/pipelines/text/embed.js +26 -7
- package/src/inference/pipelines/text/execution-plan.js +5 -4
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +10 -3
- package/src/inference/pipelines/text/execution-v0.js +12 -1
- package/src/inference/pipelines/text/generator-helpers.js +1 -0
- package/src/inference/pipelines/text/generator-runtime.js +19 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +15 -0
- package/src/inference/pipelines/text/generator-steps.js +71 -26
- package/src/inference/pipelines/text/generator.d.ts +5 -0
- package/src/inference/pipelines/text/generator.js +353 -166
- package/src/inference/pipelines/text/init.d.ts +15 -0
- package/src/inference/pipelines/text/init.js +35 -10
- package/src/inference/pipelines/text/layer.js +38 -8
- package/src/inference/pipelines/text/linear-attention.d.ts +5 -0
- package/src/inference/pipelines/text/linear-attention.js +33 -3
- package/src/inference/pipelines/text/logits/gpu.js +2 -2
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +3 -1
- package/src/inference/pipelines/text/model-load.js +3 -0
- package/src/inference/pipelines/text/moe-gpu.js +21 -3
- package/src/inference/pipelines/text/moe-shape-validator.d.ts +9 -0
- package/src/inference/pipelines/text/moe-shape-validator.js +31 -11
- package/src/inference/pipelines/text/ops.js +123 -53
- package/src/inference/pipelines/text/probes.js +1 -0
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/state.js +2 -0
- package/src/inference/pipelines/text.d.ts +5 -0
- package/src/inference/pipelines/text.js +59 -1
- package/src/inference/pipelines/vision/encoder.js +386 -0
- package/src/inference/pipelines/vision/image-preprocess.js +151 -0
- package/src/inference/pipelines/vision/index.js +173 -0
- package/src/inference/pipelines/vision/ops.js +78 -0
- package/src/inference/pipelines/vision/patch-embed.js +151 -0
- package/src/inference/test-harness.js +11 -9
- package/src/loader/doppler-loader.d.ts +3 -0
- package/src/loader/doppler-loader.js +20 -3
- package/src/loader/experts/expert-cache.js +6 -2
- package/src/loader/experts/expert-loader.js +6 -2
- package/src/loader/final-weights-loader.js +2 -0
- package/src/loader/layer-loader.js +42 -3
- package/src/loader/manifest-config.js +3 -1
- package/src/loader/shard-cache.js +3 -2
- package/src/loader/tensors/tensor-loader.d.ts +3 -0
- package/src/loader/tensors/tensor-loader.js +130 -4
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +2 -2
- package/src/rules/kernels/moe.rules.mixtral.json +75 -0
- package/src/rules/kernels/softmax.rules.json +2 -0
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.d.ts +1 -0
- package/src/rules/rule-registry.js +4 -0
- package/src/storage/downloader.js +2 -1
- package/src/storage/quickstart-downloader.d.ts +3 -0
- package/src/storage/quickstart-downloader.js +27 -30
- package/src/storage/shard-manager.js +4 -3
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/node-converter.js +28 -7
- package/src/tooling/node-source-runtime.js +65 -5
- package/src/tooling/node-webgpu.js +24 -7
- package/src/types/model.d.ts +5 -0
- package/src/utils/hf-resolve-url.d.ts +16 -0
- package/src/utils/hf-resolve-url.js +17 -0
- package/src/version.js +1 -1
- package/tools/doppler-cli.js +6 -1
- package/src/tooling/node-convert.d.ts +0 -54
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,38 @@ This changelog is package-facing and release-oriented. Entries before `0.1.7`
|
|
|
6
6
|
were retrofitted from package version history, release commits, and release
|
|
7
7
|
docs so the `0.1.x` line has one conventional npm-visible history surface.
|
|
8
8
|
|
|
9
|
+
## [0.1.8] - 2026-03-18
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- Simplified demo to show only verified Q4K models (Gemma 3 270M, Gemma 3 1B).
|
|
14
|
+
Hidden Translate, Diffusion, and Embedding tabs until models are ready.
|
|
15
|
+
- Split demo monolith (6,680 lines) into focused modules: core, generation,
|
|
16
|
+
storage, translate, diagnostics, routing, utils.
|
|
17
|
+
- Trimmed hosted HF registry and quickstart registry to the two verified models.
|
|
18
|
+
- Aligned catalog, HF registry, and quickstart registry to the canonical
|
|
19
|
+
external support registry as single source of truth for HF revisions.
|
|
20
|
+
- Renamed all `.mjs` tool scripts to `.js` to match `"type": "module"` convention.
|
|
21
|
+
- Switched WebGPU optional dependency from `@simulatte/webgpu` to `webgpu ^0.3.8`.
|
|
22
|
+
- Pruned unused `verify:*` npm scripts for models no longer in the active set.
|
|
23
|
+
- Updated release-claim policy with newly verified models (LFM2, Qwen 3.5,
|
|
24
|
+
TranslateGemma variants).
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- Fixed Qwen 3.5 conversion configs using wrong model preset (`qwen3` instead
|
|
29
|
+
of `qwen3_5`), which caused support matrix check failures.
|
|
30
|
+
- Fixed Qwen mRoPE conflation: `ropeInterleaved` was incorrectly set from
|
|
31
|
+
`mropeInterleaved`, forcing adjacent-pair RoPE rotation on Qwen models.
|
|
32
|
+
- Fixed catalog lifecycle metadata inconsistencies: corrected `local`, `hf`,
|
|
33
|
+
`curated`, and `demo` fields to match actual artifact availability.
|
|
34
|
+
- Fixed GPU-dependent unit tests failing in non-GPU environments by adding
|
|
35
|
+
proper GPU readiness probes with clear skip reasons.
|
|
36
|
+
- Fixed kernel-ref digest registry drift (222 vs 224 entries).
|
|
37
|
+
- Fixed stale vendor benchmark fixture hashes after compare-engines config update.
|
|
38
|
+
- Removed failing and unverified models from demo visibility (TranslateGemma 4B,
|
|
39
|
+
EmbeddingGemma 300M with broken HF manifest, Qwen 3.5 0.8B/2B, F16 variant).
|
|
40
|
+
|
|
9
41
|
## [0.1.7] - 2026-03-10
|
|
10
42
|
|
|
11
43
|
### Added
|
package/README.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
Inference and training on raw WebGPU. Pure JS + WGSL.
|
|
4
4
|
|
|
5
|
+
**[Try the live demo](https://d4da.com)** | **[npm](https://www.npmjs.com/package/@simulatte/doppler)** | **[docs](https://github.com/clocksmith/doppler/blob/main/docs/INDEX.md)**
|
|
6
|
+
|
|
5
7
|

|
|
6
8
|
|
|
7
9
|
## Quick start
|
|
@@ -28,8 +30,6 @@ Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default.
|
|
|
28
30
|
npm install @simulatte/doppler
|
|
29
31
|
```
|
|
30
32
|
|
|
31
|
-
**[Live Demo](https://d4da.com)** · **[npm](https://www.npmjs.com/package/@simulatte/doppler)** · **[docs](https://github.com/clocksmith/doppler/blob/main/docs/INDEX.md)** · **[Project site](https://simulatte.world)**
|
|
32
|
-
|
|
33
33
|
## Why Doppler
|
|
34
34
|
|
|
35
35
|
**JS → WGSL → WebGPU.** Direct JavaScript orchestration into native WebGPU kernels, avoiding ONNX runtimes, WASM blobs, and bridge layers.
|
|
@@ -46,6 +46,28 @@ Snapshot artifacts:
|
|
|
46
46
|
- [g3-1b-p064-d064-t0-k1.compare.json](https://github.com/clocksmith/doppler/blob/main/benchmarks/vendors/fixtures/g3-1b-p064-d064-t0-k1.compare.json)
|
|
47
47
|
- [lfm2-5-1-2b-p064-d064-t0-k1.compare.json](https://github.com/clocksmith/doppler/blob/main/benchmarks/vendors/fixtures/lfm2-5-1-2b-p064-d064-t0-k1.compare.json)
|
|
48
48
|
|
|
49
|
+
## Supported models
|
|
50
|
+
|
|
51
|
+
All models below are verified with deterministic greedy decoding on WebGPU hardware.
|
|
52
|
+
Registry IDs resolve to hosted RDRR artifacts automatically.
|
|
53
|
+
|
|
54
|
+
| Model | Registry ID | Quant | Params |
|
|
55
|
+
| --- | --- | --- | --- |
|
|
56
|
+
| Gemma 3 270M IT | `gemma3-270m` | Q4K | 270M |
|
|
57
|
+
| Gemma 3 1B IT | `gemma3-1b` | Q4K | 1B |
|
|
58
|
+
| Gemma 3 1B IT (F16) | `gemma-3-1b-it-f16-af32` | F16 | 1B |
|
|
59
|
+
| TranslateGemma 4B IT | `translategemma-4b-it-q4k-ehf16-af32` | Q4K | 4B |
|
|
60
|
+
| TranslateGemma 4B 1B EN-ES | `translategemma-4b-1b-enes-q4k-ehf16-af32` | Q4K | 1B |
|
|
61
|
+
| EmbeddingGemma 300M | `google-embeddinggemma-300m-q4k-ehf16-af32` | Q4K | 300M |
|
|
62
|
+
| Qwen 3.5 0.8B | `qwen-3-5-0-8b-q4k-ehaf16` | Q4K | 0.8B |
|
|
63
|
+
| Qwen 3.5 2B | `qwen-3-5-2b-q4k-ehaf16` | Q4K | 2B |
|
|
64
|
+
| LFM2.5 1.2B Instruct | `lfm2-5-1-2b-instruct-q4k-ehf16-af32` | Q4K | 1.2B |
|
|
65
|
+
|
|
66
|
+
Additional model families (Llama 3, DeepSeek, Gemma 4 MoE, Mixtral, and others) have conversion
|
|
67
|
+
configs ready but are not yet cataloged. See the full
|
|
68
|
+
[model support matrix](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md)
|
|
69
|
+
for details.
|
|
70
|
+
|
|
49
71
|
## Under the hood
|
|
50
72
|
|
|
51
73
|
- Sharded weight loading via OPFS moves multi-GB weights into VRAM without blocking the main thread.
|
|
@@ -85,10 +107,7 @@ for await (const token of doppler('Hello', { model: 'gemma3-270m' })) {
|
|
|
85
107
|
- First-run workflow: [docs/getting-started.md](https://github.com/clocksmith/doppler/blob/main/docs/getting-started.md)
|
|
86
108
|
- Runtime config contract: [docs/config.md](https://github.com/clocksmith/doppler/blob/main/docs/config.md)
|
|
87
109
|
- Architecture: [docs/architecture.md](https://github.com/clocksmith/doppler/blob/main/docs/architecture.md)
|
|
88
|
-
-
|
|
89
|
-
|
|
90
|
-
Current model support is generated from the catalog and conversion registry.
|
|
91
|
-
See [docs/model-support-matrix.md](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md) for the canonical verified, failing, and unverified status table.
|
|
110
|
+
- Model support matrix: [docs/model-support-matrix.md](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md)
|
|
92
111
|
|
|
93
112
|
## Environment requirements
|
|
94
113
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@simulatte/doppler",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.9",
|
|
4
4
|
"description": "Browser-native WebGPU inference engine for local intent and inference loops",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "src/index.d.ts",
|
|
@@ -29,22 +29,22 @@
|
|
|
29
29
|
"bench:chart": "node ./benchmarks/vendors/compare-chart.js",
|
|
30
30
|
"bench:chart:readme": "node ./benchmarks/vendors/compare-chart.js --preset readme-evidence",
|
|
31
31
|
"bench:architecture:chart": "node ./benchmarks/vendors/generate-architecture-overview-svg.js",
|
|
32
|
-
"ci:diffusion:contract": "node tools/ci-diffusion-contract-gates.
|
|
33
|
-
"ci:diffusion:contract:list": "node tools/ci-diffusion-contract-gates.
|
|
34
|
-
"ci:training:contract": "node tools/ci-training-contract-gates.
|
|
35
|
-
"ci:training:contract:list": "node tools/ci-training-contract-gates.
|
|
36
|
-
"training:contract:delta": "node tools/emit-training-contract-delta.
|
|
37
|
-
"training:workloads:verify": "node tools/verify-training-workload-packs.
|
|
38
|
-
"training:report-ids:publish": "node tools/publish-training-report-ids.
|
|
39
|
-
"distill:studio:mvp": "node tools/distill-studio-mvp.
|
|
40
|
-
"distill:quality-gate": "node tools/distill-studio-quality-gate.
|
|
41
|
-
"p2p:observability": "node tools/p2p-delivery-observability.
|
|
42
|
-
"p2p:drill": "node tools/p2p-resilience-drill.
|
|
32
|
+
"ci:diffusion:contract": "node tools/ci-diffusion-contract-gates.js",
|
|
33
|
+
"ci:diffusion:contract:list": "node tools/ci-diffusion-contract-gates.js --list",
|
|
34
|
+
"ci:training:contract": "node tools/ci-training-contract-gates.js",
|
|
35
|
+
"ci:training:contract:list": "node tools/ci-training-contract-gates.js --list",
|
|
36
|
+
"training:contract:delta": "node tools/emit-training-contract-delta.js",
|
|
37
|
+
"training:workloads:verify": "node tools/verify-training-workload-packs.js --registry tools/configs/training-workloads/registry.json",
|
|
38
|
+
"training:report-ids:publish": "node tools/publish-training-report-ids.js --registry tools/configs/training-workloads/registry.json",
|
|
39
|
+
"distill:studio:mvp": "node tools/distill-studio-mvp.js",
|
|
40
|
+
"distill:quality-gate": "node tools/distill-studio-quality-gate.js",
|
|
41
|
+
"p2p:observability": "node tools/p2p-delivery-observability.js",
|
|
42
|
+
"p2p:drill": "node tools/p2p-resilience-drill.js",
|
|
43
43
|
"test": "npm run test:unit",
|
|
44
|
-
"test:unit": "node tools/run-node-tests.
|
|
45
|
-
"test:gpu": "node tools/run-node-tests.
|
|
46
|
-
"test:coverage": "node tools/run-node-coverage.
|
|
47
|
-
"test:coverage:report": "node tools/run-node-coverage.
|
|
44
|
+
"test:unit": "node tools/run-node-tests.js --suite unit",
|
|
45
|
+
"test:gpu": "node tools/run-node-tests.js --suite gpu",
|
|
46
|
+
"test:coverage": "node tools/run-node-coverage.js",
|
|
47
|
+
"test:coverage:report": "node tools/run-node-coverage.js --no-threshold",
|
|
48
48
|
"test:gpu:browser": "node tools/doppler-cli.js verify --config '{\"request\":{\"suite\":\"kernels\"},\"run\":{\"surface\":\"browser\",\"browser\":{\"opfsCache\":false,\"headless\":true,\"channel\":\"chromium\",\"browserArgs\":[\"--use-angle=swiftshader\",\"--disable-vulkan-surface\"],\"console\":true}}}'",
|
|
49
49
|
"agents:verify": "node tools/verify-agent-parity.js",
|
|
50
50
|
"agents:freshness": "node tools/verify-agent-freshness.js",
|
|
@@ -74,9 +74,11 @@
|
|
|
74
74
|
"ci:catalog:check": "npm run registry:sync:scripts:check && npm run support:matrix:check && npm run registry:hf:check",
|
|
75
75
|
"external:rdrr:index": "node tools/sync-external-rdrr-index.js",
|
|
76
76
|
"external:rdrr:index:check": "node tools/sync-external-rdrr-index.js --check",
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"
|
|
77
|
+
"external:support:sync": "node tools/sync-external-support-registry.js",
|
|
78
|
+
"external:support:promote": "node tools/sync-external-support-registry.js --source-support-file models/catalog.json",
|
|
79
|
+
"external:support:check": "node tools/sync-external-support-registry.js --check",
|
|
80
|
+
"catalog:sync:external": "node tools/sync-catalog-from-external-support.js",
|
|
81
|
+
"catalog:sync:external:check": "node tools/sync-catalog-from-external-support.js --check",
|
|
80
82
|
"verify:gemma-3-1b-it-q4k-ehf16-af32": "node tools/run-registry-verify.js gemma-3-1b-it-q4k-ehf16-af32",
|
|
81
83
|
"verify:gemma-3-1b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js gemma-3-1b-it-wq4k-ef16-hf16",
|
|
82
84
|
"verify:gemma-3-270m-it-q4k-ehf16-af32": "node tools/run-registry-verify.js gemma-3-270m-it-q4k-ehf16-af32",
|
|
@@ -84,25 +86,9 @@
|
|
|
84
86
|
"verify:gemma-3-270m-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js gemma-3-270m-it-wq4k-ef16-hf16",
|
|
85
87
|
"verify:gemma-3-270m-it-wq4k-ef16-hf16-f32": "node tools/run-registry-verify.js gemma-3-270m-it-wq4k-ef16-hf16-f32",
|
|
86
88
|
"verify:gemma3-1b": "node tools/run-registry-verify.js gemma3-1b",
|
|
87
|
-
"verify:gemma3-1b-f16": "node tools/run-registry-verify.js gemma3-1b-f16",
|
|
88
89
|
"verify:gemma3-270m": "node tools/run-registry-verify.js gemma3-270m",
|
|
89
|
-
"verify:google-embeddinggemma-300m": "node tools/run-registry-verify.js google-embeddinggemma-300m",
|
|
90
|
-
"verify:google-embeddinggemma-300m-q4k-ehf16-af32": "node tools/run-registry-verify.js google-embeddinggemma-300m-q4k-ehf16-af32",
|
|
91
|
-
"verify:google-embeddinggemma-300m-wq4k-ef16": "node tools/run-registry-verify.js google-embeddinggemma-300m-wq4k-ef16",
|
|
92
90
|
"verify:google-gemma-3-1b-it": "node tools/run-registry-verify.js google-gemma-3-1b-it",
|
|
93
|
-
"verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it"
|
|
94
|
-
"verify:google-translategemma-4b-it": "node tools/run-registry-verify.js google-translategemma-4b-it",
|
|
95
|
-
"verify:qwen-3-5-0-8b": "node tools/run-registry-verify.js qwen-3-5-0-8b",
|
|
96
|
-
"verify:qwen-3-5-0-8b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-0-8b-wq4k-ef16-hf16-f16",
|
|
97
|
-
"verify:qwen-3-5-2b": "node tools/run-registry-verify.js qwen-3-5-2b",
|
|
98
|
-
"verify:qwen-3-5-2b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-2b-wq4k-ef16-hf16-f16",
|
|
99
|
-
"verify:qwen-qwen3.5-0.8b": "node tools/run-registry-verify.js qwen-qwen3.5-0.8b",
|
|
100
|
-
"verify:qwen-qwen3.5-2b": "node tools/run-registry-verify.js qwen-qwen3.5-2b",
|
|
101
|
-
"verify:qwen3-0.8b": "node tools/run-registry-verify.js qwen3-0.8b",
|
|
102
|
-
"verify:qwen3-2b": "node tools/run-registry-verify.js qwen3-2b",
|
|
103
|
-
"verify:translategemma": "node tools/run-registry-verify.js translategemma",
|
|
104
|
-
"verify:translategemma-4b": "node tools/run-registry-verify.js translategemma-4b",
|
|
105
|
-
"verify:translategemma-4b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js translategemma-4b-it-wq4k-ef16-hf16"
|
|
91
|
+
"verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it"
|
|
106
92
|
},
|
|
107
93
|
"exports": {
|
|
108
94
|
".": {
|
|
@@ -170,12 +156,13 @@
|
|
|
170
156
|
"tools/convert-safetensors-node.js"
|
|
171
157
|
],
|
|
172
158
|
"devDependencies": {
|
|
173
|
-
"@huggingface/transformers": "
|
|
159
|
+
"@huggingface/transformers": "4.0.0-next.8",
|
|
174
160
|
"jest": "^30.2.0",
|
|
175
161
|
"onnxruntime-web": "^1.24.1",
|
|
176
162
|
"playwright": "^1.58.2"
|
|
177
163
|
},
|
|
178
164
|
"optionalDependencies": {
|
|
179
|
-
"@simulatte/webgpu": "0.
|
|
165
|
+
"@simulatte/webgpu": "0.x.x",
|
|
166
|
+
"webgpu": "^0.3.8"
|
|
180
167
|
}
|
|
181
168
|
}
|
|
@@ -408,6 +408,7 @@ export async function convertModel(files, options = {}) {
|
|
|
408
408
|
// Parse based on format
|
|
409
409
|
let modelInfo;
|
|
410
410
|
let config = null;
|
|
411
|
+
let generationConfig = null;
|
|
411
412
|
let tokenizerJson = null;
|
|
412
413
|
let tokenizerConfig = null;
|
|
413
414
|
let tokenizerModel = null;
|
|
@@ -455,6 +456,10 @@ export async function convertModel(files, options = {}) {
|
|
|
455
456
|
tokenizerConfig = await parseTokenizerConfigJson(auxiliary.tokenizerConfig);
|
|
456
457
|
modelInfo.tokenizerConfig = tokenizerConfig;
|
|
457
458
|
}
|
|
459
|
+
if (auxiliary.generationConfig) {
|
|
460
|
+
generationConfig = await parseConfigJson(auxiliary.generationConfig);
|
|
461
|
+
modelInfo.generationConfig = generationConfig;
|
|
462
|
+
}
|
|
458
463
|
if (auxiliary.tokenizerModel) {
|
|
459
464
|
const source = normalizeTensorSource(auxiliary.tokenizerModel);
|
|
460
465
|
tokenizerModel = await source.readRange(0, source.size);
|
|
@@ -165,6 +165,12 @@ function createModelHandle(pipeline, resolved) {
|
|
|
165
165
|
prefillKV(prompt, options = {}) {
|
|
166
166
|
return pipeline.prefillKVOnly(prompt, options);
|
|
167
167
|
},
|
|
168
|
+
prefillWithLogits(prompt, options = {}) {
|
|
169
|
+
return pipeline.prefillWithLogits(prompt, options);
|
|
170
|
+
},
|
|
171
|
+
decodeStepLogits(currentIds, options = {}) {
|
|
172
|
+
return pipeline.decodeStepLogits(currentIds, options);
|
|
173
|
+
},
|
|
168
174
|
generateWithPrefixKV(prefix, prompt, options = {}) {
|
|
169
175
|
return pipeline.generateWithPrefixKV(prefix, prompt, options);
|
|
170
176
|
},
|
|
@@ -2,6 +2,7 @@ import type { RDRRManifest } from '../formats/rdrr/index.js';
|
|
|
2
2
|
import type { GenerateOptions, KVCacheSnapshot } from '../generation/index.js';
|
|
3
3
|
import type { ChatMessage } from '../inference/pipelines/text/chat-format.js';
|
|
4
4
|
import type { LoRAManifest } from '../adapters/lora-loader.js';
|
|
5
|
+
import type { LogitsStepResult, PrefillResult } from '../inference/pipelines/text/types.d.ts';
|
|
5
6
|
|
|
6
7
|
export interface DopplerLoadProgress {
|
|
7
8
|
phase: 'resolve' | 'manifest' | 'load' | 'ready';
|
|
@@ -43,6 +44,8 @@ export interface DopplerModel {
|
|
|
43
44
|
readonly deviceInfo: Record<string, unknown> | null;
|
|
44
45
|
readonly advanced: {
|
|
45
46
|
prefillKV(prompt: string, options?: GenerateOptions): Promise<KVCacheSnapshot>;
|
|
47
|
+
prefillWithLogits(prompt: string | ChatMessage[] | { messages: ChatMessage[] }, options?: GenerateOptions): Promise<PrefillResult>;
|
|
48
|
+
decodeStepLogits(currentIds: number[], options?: GenerateOptions): Promise<LogitsStepResult>;
|
|
46
49
|
generateWithPrefixKV(
|
|
47
50
|
prefix: KVCacheSnapshot,
|
|
48
51
|
prompt: string,
|
|
@@ -199,6 +199,12 @@ function createModelHandle(pipeline, resolved) {
|
|
|
199
199
|
prefillKV(prompt, options = {}) {
|
|
200
200
|
return pipeline.prefillKVOnly(prompt, options);
|
|
201
201
|
},
|
|
202
|
+
prefillWithLogits(prompt, options = {}) {
|
|
203
|
+
return pipeline.prefillWithLogits(prompt, options);
|
|
204
|
+
},
|
|
205
|
+
decodeStepLogits(currentIds, options = {}) {
|
|
206
|
+
return pipeline.decodeStepLogits(currentIds, options);
|
|
207
|
+
},
|
|
202
208
|
generateWithPrefixKV(prefix, prompt, options = {}) {
|
|
203
209
|
return pipeline.generateWithPrefixKV(prefix, prompt, options);
|
|
204
210
|
},
|
|
@@ -272,6 +278,9 @@ export function doppler(prompt, options) {
|
|
|
272
278
|
|
|
273
279
|
doppler.load = load;
|
|
274
280
|
doppler.text = async function text(prompt, options) {
|
|
281
|
+
if (!options || typeof options !== 'object' || options.model == null) {
|
|
282
|
+
throw new Error('doppler.text() requires options.model.');
|
|
283
|
+
}
|
|
275
284
|
assertNoLoadAffectingOptions('doppler.text()', options);
|
|
276
285
|
return collectText(doppler(prompt, options));
|
|
277
286
|
};
|
|
@@ -299,14 +308,14 @@ doppler.evict = async function evict(model) {
|
|
|
299
308
|
if (!cached) {
|
|
300
309
|
return false;
|
|
301
310
|
}
|
|
302
|
-
convenienceModelCache.delete(resolved.modelId);
|
|
303
311
|
await cached.unload();
|
|
312
|
+
convenienceModelCache.delete(resolved.modelId);
|
|
304
313
|
return true;
|
|
305
314
|
};
|
|
306
315
|
doppler.evictAll = async function evictAll() {
|
|
307
316
|
const cached = [...convenienceModelCache.values()];
|
|
308
317
|
convenienceModelCache.clear();
|
|
309
|
-
await Promise.
|
|
318
|
+
await Promise.allSettled(cached.map((entry) => entry.unload()));
|
|
310
319
|
};
|
|
311
320
|
doppler.listModels = async function listModels() {
|
|
312
321
|
const models = await listQuickstartModels();
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { getCdnBasePath } from '../storage/download-types.js';
|
|
2
|
+
import { buildHfResolveBaseUrl } from '../utils/hf-resolve-url.js';
|
|
2
3
|
import { loadJson } from '../utils/load-json.js';
|
|
3
4
|
|
|
4
5
|
let registryPromise = null;
|
|
@@ -80,9 +81,6 @@ export function buildQuickstartModelBaseUrl(entry, options = {}) {
|
|
|
80
81
|
}
|
|
81
82
|
const cdnBasePath = typeof options.cdnBasePath === 'string' && options.cdnBasePath.length > 0
|
|
82
83
|
? options.cdnBasePath
|
|
83
|
-
:
|
|
84
|
-
|
|
85
|
-
const base = cdnBasePath.replace(/\/$/, '');
|
|
86
|
-
const path = entry.hf.path.replace(/^\/+/, '');
|
|
87
|
-
return `${base}/${entry.hf.repoId}/resolve/${revision}/${path}`;
|
|
84
|
+
: getCdnBasePath();
|
|
85
|
+
return buildHfResolveBaseUrl(entry.hf, { cdnBasePath });
|
|
88
86
|
}
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
],
|
|
17
17
|
"hf": {
|
|
18
18
|
"repoId": "Clocksmith/rdrr",
|
|
19
|
-
"revision": "
|
|
19
|
+
"revision": "ca6f0dbdf3882d3893a65cf48f2bb6f1520df162",
|
|
20
20
|
"path": "models/gemma-3-270m-it-q4k-ehf16-af32"
|
|
21
21
|
}
|
|
22
22
|
},
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
],
|
|
33
33
|
"hf": {
|
|
34
34
|
"repoId": "Clocksmith/rdrr",
|
|
35
|
-
"revision": "
|
|
35
|
+
"revision": "7e79c466d54455bd370c81685956ea9abae0fd30",
|
|
36
36
|
"path": "models/google-embeddinggemma-300m-q4k-ehf16-af32"
|
|
37
37
|
}
|
|
38
38
|
}
|
|
@@ -134,6 +134,11 @@ export function getKernelPathStrict(): boolean;
|
|
|
134
134
|
*/
|
|
135
135
|
export function isKernelPathFusedQ4K(path?: KernelPathSchema | null): boolean;
|
|
136
136
|
|
|
137
|
+
/**
|
|
138
|
+
* Check if a kernel path requires matmul weights to stay in F32.
|
|
139
|
+
*/
|
|
140
|
+
export function kernelPathRequiresF32MatmulWeights(path?: KernelPathSchema | null): boolean;
|
|
141
|
+
|
|
137
142
|
/**
|
|
138
143
|
* Check if the active kernel path uses fused Q4K matmul.
|
|
139
144
|
*/
|
|
@@ -503,6 +503,19 @@ export function isKernelPathFusedQ4K(path = undefined) {
|
|
|
503
503
|
return kernelSteps.some((step) => step.kernel.includes('fused_matmul_q4'));
|
|
504
504
|
}
|
|
505
505
|
|
|
506
|
+
export function kernelPathRequiresF32MatmulWeights(path = undefined) {
|
|
507
|
+
const lookupPath = path === undefined ? activeKernelPath : path;
|
|
508
|
+
if (!lookupPath) return false;
|
|
509
|
+
const kernelSteps = [
|
|
510
|
+
...(lookupPath.decode?.steps ?? []),
|
|
511
|
+
...(lookupPath.prefill?.steps ?? []),
|
|
512
|
+
...(lookupPath.preLayer ?? []),
|
|
513
|
+
...(lookupPath.postLayer ?? []),
|
|
514
|
+
...(lookupPath.layerOverrides?.flatMap((override) => override.steps) ?? []),
|
|
515
|
+
];
|
|
516
|
+
return kernelSteps.some((step) => normalizeKernelFile(step.kernel) === 'matmul_f32.wgsl');
|
|
517
|
+
}
|
|
518
|
+
|
|
506
519
|
export function isActiveKernelPathFusedQ4K() {
|
|
507
520
|
return isKernelPathFusedQ4K(activeKernelPath);
|
|
508
521
|
}
|
|
@@ -59,8 +59,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
59
59
|
"cross_entropy_loss.wgsl#main": "5a48087bdec94184432c90ce5b345e1eadbdfcb13b9793ecee8052bc7392239c",
|
|
60
60
|
"depthwise_conv2d_f16.wgsl#main": "f7f093a7e6623ed17a675bac729149e94718aece916416966eaf03c1d6939f2a",
|
|
61
61
|
"depthwise_conv2d.wgsl#main": "cf14cb40d282ad4d4fab160109b97eaeaf12aab62579b73324ac485ac75155b0",
|
|
62
|
-
"dequant_f16_out_vec4.wgsl#main_vec4": "
|
|
63
|
-
"dequant_f16_out.wgsl#main": "
|
|
62
|
+
"dequant_f16_out_vec4.wgsl#main_vec4": "ff729cc220ba5425e17c4c537a9993f25b6541046b6c2553d2a43a8b40ed2ce9",
|
|
63
|
+
"dequant_f16_out.wgsl#main": "caed21e420cbace78d3203548962a5ec3fc36980f153ae775f6a91a31af97d3a",
|
|
64
64
|
"dequant_f16_rowwise.wgsl#main": "f5bf7cef950b52d65cee6121dbaa176244d3221045b3b6386b3be47f23ce17dc",
|
|
65
65
|
"dequant_f32_rowwise.wgsl#main": "e73606e1b47e1191203a210bececa8a597bcab8bcc535146718afa6a021cab0d",
|
|
66
66
|
"dequant_mxfp4_expert_f16.wgsl#main_expert": "96af52551ac40e1b86121a528a3ffaba835c5d0419e06407fed80353d46b17e1",
|
|
@@ -69,10 +69,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
69
69
|
"dequant_mxfp4.wgsl#main": "885a5f752b684c6ca0bb10e3a1846a396eef14d2158e8c8ad31bd1dd4c74b9ef",
|
|
70
70
|
"dequant_q6k.wgsl#main": "be0aed027932d8b7dd1e92d0090ced39e4df8be724acf290f52db0004be9a35e",
|
|
71
71
|
"dequant_q8_0.wgsl#main": "ff5f800da963b0502a9ffab723cbcac0bbb5eb9a02898afc2aba2db215a58da7",
|
|
72
|
-
"dequant_shared_vec4.wgsl#main_vec4": "
|
|
73
|
-
"dequant_shared.wgsl#main": "
|
|
74
|
-
"dequant_subgroup.wgsl#main": "
|
|
75
|
-
"dequant_subgroup.wgsl#main_vec4": "
|
|
72
|
+
"dequant_shared_vec4.wgsl#main_vec4": "24820dae36f6669a33f22b428df03791d9c700944c5ae33bd8c88e8cbeffd103",
|
|
73
|
+
"dequant_shared.wgsl#main": "e21284b5b70d4ac88d7c151760e451c2006705f1ea617b3db7f89994af4cc7df",
|
|
74
|
+
"dequant_subgroup.wgsl#main": "cbc2d86a5a2234b4c1691d5df02279263be7a66a1d4a2ad4aec1845a26baa9c9",
|
|
75
|
+
"dequant_subgroup.wgsl#main_vec4": "9e044bd0f44e73872dd8d8aa467e802c5471de86a2044de2cf8efc726e5a1182",
|
|
76
76
|
"energy_eval_f16.wgsl#main": "09223ae193593f3555866a3acfe76ca35442ef4f3967cae376bdcc211f3054b3",
|
|
77
77
|
"energy_eval.wgsl#main": "e10d9572397ebece5275aecd907cba5970f6a5c3744dd8b982677efb8982bdd2",
|
|
78
78
|
"energy_quintel_grad_f16.wgsl#main": "eb87ed8592b46b0a4d866c245b664cadb2bca016f72419e763402a6a721c4951",
|
|
@@ -142,9 +142,9 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
142
142
|
"matmul_gemv_subgroup_f16a.wgsl#main_multicol": "c8e86ecbbefa27a3b7366af676d89a992c2e951329cdf19abb57b9c90144379e",
|
|
143
143
|
"matmul_gemv_subgroup_f16a.wgsl#main_vec4": "f227a403cdf9717dd68224c9ea55708ffe14c618d8146f5d48b42af0f253df29",
|
|
144
144
|
"matmul_gemv_subgroup_f16a.wgsl#main_vec4_cols8": "9e7aba97a6cf199b3f574166e295ea051ebd59e308b5f6f2ce5a4de2d04963ce",
|
|
145
|
-
"matmul_gemv_subgroup.wgsl#main": "
|
|
146
|
-
"matmul_gemv_subgroup.wgsl#main_multicol": "
|
|
147
|
-
"matmul_gemv_subgroup.wgsl#main_vec4": "
|
|
145
|
+
"matmul_gemv_subgroup.wgsl#main": "ac84b6dc88fe077dc885d8547e55526bec2f792074dd8746f907ce4a7c342028",
|
|
146
|
+
"matmul_gemv_subgroup.wgsl#main_multicol": "6631ed8936b6316499e1e1493915dc02a2e137d4f4d2650b62ce63e8805067f1",
|
|
147
|
+
"matmul_gemv_subgroup.wgsl#main_vec4": "de04e5670494401dd975915e77a603e07144aa1c928c47270afe7a806428cbfd",
|
|
148
148
|
"matmul_gemv.wgsl#main": "dc892efc87edc6d5ddaf191b86c1cc41a603352a332023aa0b1fe55d166673d0",
|
|
149
149
|
"modulate_f16.wgsl#main": "44a98cda1cc7a3575788f865173b9890be792c94e852ac8311b6b8ffbdc1438d",
|
|
150
150
|
"modulate.wgsl#main": "dfe88a35b94752573199c16b3d8aecd4e8e7da57dc88d7b342aa61e0122e71ec",
|
|
@@ -182,18 +182,18 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
182
182
|
"rope.wgsl#rope_ntk_scaled": "818f89865a3d1d6f2d49f671ac882d0fde9709702160a1ae8d9a8ef113afb511",
|
|
183
183
|
"rope.wgsl#rope_qk": "3d773c8b8c400142edc8a4111afb04a2bf75bdb109b2d41cbe5afdb72a959772",
|
|
184
184
|
"rope.wgsl#rope_yarn": "cb00e1cf87fac198dcf0fb0d4e2d5f6f99d2fed6dff0a089a96bb459917851d2",
|
|
185
|
-
"sample_f16.wgsl#argmax": "
|
|
186
|
-
"sample_f16.wgsl#argmax_reduce": "
|
|
187
|
-
"sample_f16.wgsl#find_topk_phase1": "
|
|
188
|
-
"sample_f16.wgsl#find_topk_phase2": "
|
|
189
|
-
"sample_f16.wgsl#sample_single_pass": "
|
|
190
|
-
"sample_f16.wgsl#softmax_and_sample": "
|
|
191
|
-
"sample.wgsl#argmax": "
|
|
192
|
-
"sample.wgsl#argmax_reduce": "
|
|
193
|
-
"sample.wgsl#find_topk_phase1": "
|
|
194
|
-
"sample.wgsl#find_topk_phase2": "
|
|
195
|
-
"sample.wgsl#sample_single_pass": "
|
|
196
|
-
"sample.wgsl#softmax_and_sample": "
|
|
185
|
+
"sample_f16.wgsl#argmax": "7d7188081953a79b6d71bdf783d75df97a78401e2fe62e6d356cc44756a42c41",
|
|
186
|
+
"sample_f16.wgsl#argmax_reduce": "c0284bc9a50d25e215b21cb8c70e24dae3cb32b578691c2b6df5f3ede68a67c7",
|
|
187
|
+
"sample_f16.wgsl#find_topk_phase1": "8abd0a978d87adb7cce7337bb1b045a151768724f57802ad060df1dad735cff6",
|
|
188
|
+
"sample_f16.wgsl#find_topk_phase2": "ea7684cf0cc6014d39ac821edf9c89e140552b5009a72e6e91b00f8816678568",
|
|
189
|
+
"sample_f16.wgsl#sample_single_pass": "1cd8f0babc5c824b455080d30028109adfe52ce6f79009fbb986fde0d377fcb5",
|
|
190
|
+
"sample_f16.wgsl#softmax_and_sample": "c1d58cb952b704596d7ab6a2aa32b911a6e869e05b42adac3e4a19d898aa17ae",
|
|
191
|
+
"sample.wgsl#argmax": "f68b9cfdd3265a5cc52b216e549b629f1f8209e5aaa2f788142fa03db4c2d538",
|
|
192
|
+
"sample.wgsl#argmax_reduce": "96f8dd75a13db82e1928914e1f40ff1b9e03563eb5f8e3708b230f453b1fc160",
|
|
193
|
+
"sample.wgsl#find_topk_phase1": "736222d54f805b2791ebb803e9574fca93ab2b25fad0a64245f782499ce2d10c",
|
|
194
|
+
"sample.wgsl#find_topk_phase2": "a590107f0b7603b4b9624140dea1b436362062f63d64ed6d77e1628578796e77",
|
|
195
|
+
"sample.wgsl#sample_single_pass": "91c5c30bbc3e034457c1521c1ad576ce798c0868a1fe16e02be5f92706614096",
|
|
196
|
+
"sample.wgsl#softmax_and_sample": "132d67a1393702c81ee896975447f14f9a6a2589b25125d28401bc8ca47a253d",
|
|
197
197
|
"sana_linear_attention_apply_f16.wgsl#main": "4a7426ce67eccfb70956feeae84275f4d3cc586c50e8442c07eb69993b378ab5",
|
|
198
198
|
"sana_linear_attention_apply.wgsl#main": "5f69e0bc1d9e2df5a61e13bd819313c8f7ff5dfc4b7d78e71d5152dc23b6a86c",
|
|
199
199
|
"sana_linear_attention_summary_f16.wgsl#main": "3abb736ead999485b5dac9c6b534143b464cfd0b5300c5e03c56cec03c8fa48e",
|
|
@@ -213,6 +213,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
213
213
|
"softmax.wgsl#main": "45c5876806b442222d7e190e595f55a0079bae82e07d37586996c1a63790bb7a",
|
|
214
214
|
"softmax.wgsl#softmax_online": "6c62601ba2f88f7de9dacf026cc2357168df47d009fd108736655b645217cd0a",
|
|
215
215
|
"softmax.wgsl#softmax_small": "ad75f10e0a3caadd278130504e7d0e4e1b2f0621f8bd390abae5d973e301e47b",
|
|
216
|
+
"split_qg_f16.wgsl#main": "1d19e9d9900a270a3a1bd407347908f891ba98a1cdcee35ae932257a9a1c72cb",
|
|
217
|
+
"split_qg.wgsl#main": "64a8aa855c6246675bebdeab0258dc0e649e3986ef2bba4572d2d7dc1af902f4",
|
|
216
218
|
"split_qkv_f16.wgsl#main": "bd1a92fcd9382bdcdf00bcc59248a12489444904a4f10845a381f177b6ad649f",
|
|
217
219
|
"split_qkv.wgsl#main": "bc7c95a47322edc11fec19105efd3774c2adfed151530c849909d03af7503e4c",
|
|
218
220
|
"topk_f16_weights.wgsl#softmax_topk": "863559c28eb46a2b4dc16f21a19aca2424a5d68fc3430b29461bebdd7ec8f625",
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "mixtral-moe-v1",
|
|
3
|
+
"description": "Deterministic Mixtral-style MoE kernel routing profile for Q4K/F16 expert weights with standard gate/up/down FFN.",
|
|
4
|
+
"router": {
|
|
5
|
+
"topk": [
|
|
6
|
+
{
|
|
7
|
+
"match": { "hasF16": true, "hasSubgroups": true, "routerDtype": "f32" },
|
|
8
|
+
"value": "softmax_topk_f32_subgroup"
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"match": { "hasF16": true, "routerDtype": "f32" },
|
|
12
|
+
"value": "softmax_topk_f32"
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"match": {},
|
|
16
|
+
"value": "softmax_topk_f32"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
"dequant": {
|
|
21
|
+
"q4kExpert": [
|
|
22
|
+
{
|
|
23
|
+
"match": { "hasF16": true, "hasSubgroups": true, "outputDtype": "f32" },
|
|
24
|
+
"value": "q4k_expert_dequant_f32_subgroup"
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"match": { "hasF16": true, "outputDtype": "f16" },
|
|
28
|
+
"value": "q4k_expert_dequant_f16"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"match": {},
|
|
32
|
+
"value": "q4k_expert_dequant_f32"
|
|
33
|
+
}
|
|
34
|
+
],
|
|
35
|
+
"f16Expert": [
|
|
36
|
+
{
|
|
37
|
+
"match": { "hasF16": true, "outputDtype": "f16" },
|
|
38
|
+
"value": "f16_expert_passthrough"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"match": {},
|
|
42
|
+
"value": "f16_expert_upcast_f32"
|
|
43
|
+
}
|
|
44
|
+
]
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -4322,6 +4322,80 @@
|
|
|
4322
4322
|
}
|
|
4323
4323
|
}
|
|
4324
4324
|
},
|
|
4325
|
+
"split_qg": {
|
|
4326
|
+
"description": "De-interleave Q and Gate projections from q_proj output for attentionOutputGate models",
|
|
4327
|
+
"baseBindings": [
|
|
4328
|
+
{
|
|
4329
|
+
"index": 0,
|
|
4330
|
+
"name": "uniforms",
|
|
4331
|
+
"type": "uniform"
|
|
4332
|
+
},
|
|
4333
|
+
{
|
|
4334
|
+
"index": 1,
|
|
4335
|
+
"name": "qg_interleaved",
|
|
4336
|
+
"type": "read-only-storage"
|
|
4337
|
+
},
|
|
4338
|
+
{
|
|
4339
|
+
"index": 2,
|
|
4340
|
+
"name": "Q",
|
|
4341
|
+
"type": "storage"
|
|
4342
|
+
},
|
|
4343
|
+
{
|
|
4344
|
+
"index": 3,
|
|
4345
|
+
"name": "G",
|
|
4346
|
+
"type": "storage"
|
|
4347
|
+
}
|
|
4348
|
+
],
|
|
4349
|
+
"baseUniforms": {
|
|
4350
|
+
"size": 16,
|
|
4351
|
+
"fields": [
|
|
4352
|
+
{
|
|
4353
|
+
"name": "num_tokens",
|
|
4354
|
+
"type": "u32",
|
|
4355
|
+
"offset": 0
|
|
4356
|
+
},
|
|
4357
|
+
{
|
|
4358
|
+
"name": "num_heads",
|
|
4359
|
+
"type": "u32",
|
|
4360
|
+
"offset": 4
|
|
4361
|
+
},
|
|
4362
|
+
{
|
|
4363
|
+
"name": "head_dim",
|
|
4364
|
+
"type": "u32",
|
|
4365
|
+
"offset": 8
|
|
4366
|
+
},
|
|
4367
|
+
{
|
|
4368
|
+
"name": "_pad",
|
|
4369
|
+
"type": "u32",
|
|
4370
|
+
"offset": 12
|
|
4371
|
+
}
|
|
4372
|
+
]
|
|
4373
|
+
},
|
|
4374
|
+
"variants": {
|
|
4375
|
+
"default": {
|
|
4376
|
+
"wgsl": "split_qg.wgsl",
|
|
4377
|
+
"entryPoint": "main",
|
|
4378
|
+
"workgroup": [
|
|
4379
|
+
256,
|
|
4380
|
+
1,
|
|
4381
|
+
1
|
|
4382
|
+
],
|
|
4383
|
+
"requires": []
|
|
4384
|
+
},
|
|
4385
|
+
"f16": {
|
|
4386
|
+
"wgsl": "split_qg_f16.wgsl",
|
|
4387
|
+
"entryPoint": "main",
|
|
4388
|
+
"workgroup": [
|
|
4389
|
+
256,
|
|
4390
|
+
1,
|
|
4391
|
+
1
|
|
4392
|
+
],
|
|
4393
|
+
"requires": [
|
|
4394
|
+
"shader-f16"
|
|
4395
|
+
]
|
|
4396
|
+
}
|
|
4397
|
+
}
|
|
4398
|
+
},
|
|
4325
4399
|
"sample": {
|
|
4326
4400
|
"description": "GPU-side sampling kernels",
|
|
4327
4401
|
"baseBindings": [
|
package/src/config/loader.js
CHANGED
|
@@ -12,6 +12,7 @@ const transformerPreset = await loadJson('./presets/models/transformer.json', im
|
|
|
12
12
|
const diffusionPreset = await loadJson('./presets/models/diffusion.json', import.meta.url, 'Failed to load preset');
|
|
13
13
|
const gemma2Preset = await loadJson('./presets/models/gemma2.json', import.meta.url, 'Failed to load preset');
|
|
14
14
|
const gemma3Preset = await loadJson('./presets/models/gemma3.json', import.meta.url, 'Failed to load preset');
|
|
15
|
+
const gemma4Preset = await loadJson('./presets/models/gemma4.json', import.meta.url, 'Failed to load preset');
|
|
15
16
|
const translateGemmaPreset = await loadJson('./presets/models/translategemma.json', import.meta.url, 'Failed to load preset');
|
|
16
17
|
const embeddingGemmaPreset = await loadJson('./presets/models/embeddinggemma.json', import.meta.url, 'Failed to load preset');
|
|
17
18
|
const functiongemmaPreset = await loadJson('./presets/models/functiongemma.json', import.meta.url, 'Failed to load preset');
|
|
@@ -23,6 +24,8 @@ const mambaPreset = await loadJson('./presets/models/mamba.json', import.meta.ur
|
|
|
23
24
|
const modernbertPreset = await loadJson('./presets/models/modernbert.json', import.meta.url, 'Failed to load preset');
|
|
24
25
|
const lfm2Preset = await loadJson('./presets/models/lfm2.json', import.meta.url, 'Failed to load preset');
|
|
25
26
|
const qwen3Preset = await loadJson('./presets/models/qwen3.json', import.meta.url, 'Failed to load preset');
|
|
27
|
+
const qwen35Preset = await loadJson('./presets/models/qwen3_5.json', import.meta.url, 'Failed to load preset');
|
|
28
|
+
const qwen3VlPreset = await loadJson('./presets/models/qwen3_vl.json', import.meta.url, 'Failed to load preset');
|
|
26
29
|
const kimiK2Preset = await loadJson('./presets/models/kimi-k2.json', import.meta.url, 'Failed to load preset');
|
|
27
30
|
const gptOssPreset = await loadJson('./presets/models/gpt-oss.json', import.meta.url, 'Failed to load preset');
|
|
28
31
|
|
|
@@ -35,6 +38,7 @@ export const PRESET_REGISTRY = {
|
|
|
35
38
|
transformer: transformerPreset,
|
|
36
39
|
gemma2: gemma2Preset,
|
|
37
40
|
gemma3: gemma3Preset,
|
|
41
|
+
gemma4: gemma4Preset,
|
|
38
42
|
translategemma: translateGemmaPreset,
|
|
39
43
|
embeddinggemma: embeddingGemmaPreset,
|
|
40
44
|
functiongemma: functiongemmaPreset,
|
|
@@ -46,6 +50,8 @@ export const PRESET_REGISTRY = {
|
|
|
46
50
|
modernbert: modernbertPreset,
|
|
47
51
|
lfm2: lfm2Preset,
|
|
48
52
|
qwen3: qwen3Preset,
|
|
53
|
+
qwen3_5: qwen35Preset,
|
|
54
|
+
qwen3_vl: qwen3VlPreset,
|
|
49
55
|
kimi_k2: kimiK2Preset,
|
|
50
56
|
gpt_oss: gptOssPreset,
|
|
51
57
|
};
|
|
@@ -93,10 +99,13 @@ export const PRESET_DETECTION_ORDER = [
|
|
|
93
99
|
'diffusion',
|
|
94
100
|
// Model families (check more specific patterns first)
|
|
95
101
|
'gemma2',
|
|
102
|
+
'gemma4',
|
|
96
103
|
'translategemma',
|
|
97
104
|
'gemma3',
|
|
98
105
|
'llama3',
|
|
99
106
|
'lfm2',
|
|
107
|
+
'qwen3_vl',
|
|
108
|
+
'qwen3_5',
|
|
100
109
|
'qwen3',
|
|
101
110
|
'kimi_k2',
|
|
102
111
|
'gpt_oss',
|
|
@@ -171,6 +171,13 @@ export function buildMergeContractArtifact() {
|
|
|
171
171
|
`configA=${isolatedConfigA.runtime.inference.compute.activationDtype}, configB=${isolatedConfigB.runtime.inference.compute.activationDtype}`,
|
|
172
172
|
'actual'
|
|
173
173
|
);
|
|
174
|
+
recordCheck(
|
|
175
|
+
checks,
|
|
176
|
+
'runtime.schema.storage.opfs_sync_access_handle_defaults_off',
|
|
177
|
+
isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle === false,
|
|
178
|
+
`value=${String(isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle)}`,
|
|
179
|
+
'actual'
|
|
180
|
+
);
|
|
174
181
|
|
|
175
182
|
const calibrateConfig = createDopplerConfig({
|
|
176
183
|
runtime: {
|