@simulatte/doppler 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/package.json +21 -36
- package/src/browser/browser-converter.js +5 -0
- package/src/client/doppler-registry.json +1 -17
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +13 -0
- package/src/config/kernels/registry.json +74 -0
- package/src/config/loader.js +3 -0
- package/src/config/merge-contract-check.js +7 -0
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +14 -0
- package/src/config/presets/models/gemma2.json +2 -1
- package/src/config/presets/models/gemma3.json +2 -0
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/manifest.schema.d.ts +1 -1
- package/src/config/schema/manifest.schema.js +1 -1
- package/src/config/schema/storage.schema.js +1 -1
- package/src/converter/conversion-plan.js +10 -2
- package/src/converter/core.js +2 -0
- package/src/converter/manifest-inference.js +12 -22
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +5 -1
- package/src/converter/quantizer.js +19 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/distribution/shard-delivery.js +6 -1
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +14 -1
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/matmul-selection.js +47 -4
- package/src/gpu/kernels/matmul.d.ts +2 -0
- package/src/gpu/kernels/matmul.js +1 -1
- package/src/gpu/kernels/rmsnorm.js +9 -2
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/inference/browser-harness.d.ts +2 -0
- package/src/inference/browser-harness.js +20 -1
- package/src/inference/pipelines/diffusion/helpers.js +3 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
- package/src/inference/pipelines/text/attention/projections.js +41 -11
- package/src/inference/pipelines/text/attention/record.js +15 -6
- package/src/inference/pipelines/text/attention/run.js +50 -6
- package/src/inference/pipelines/text/config.js +14 -0
- package/src/inference/pipelines/text/execution-plan.js +5 -4
- package/src/inference/pipelines/text/generator-runtime.js +5 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +6 -0
- package/src/inference/pipelines/text/generator-steps.js +43 -15
- package/src/inference/pipelines/text/generator.js +50 -17
- package/src/inference/pipelines/text/init.d.ts +13 -0
- package/src/inference/pipelines/text/init.js +16 -5
- package/src/inference/pipelines/text/layer.js +1 -0
- package/src/inference/pipelines/text/linear-attention.d.ts +5 -0
- package/src/inference/pipelines/text/linear-attention.js +33 -3
- package/src/inference/pipelines/text/logits/gpu.js +2 -2
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +3 -1
- package/src/inference/pipelines/text/model-load.js +3 -0
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/test-harness.js +2 -2
- package/src/loader/final-weights-loader.js +2 -0
- package/src/loader/shard-cache.js +3 -2
- package/src/loader/tensors/tensor-loader.js +6 -1
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +2 -2
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.js +2 -0
- package/src/storage/downloader.js +2 -1
- package/src/storage/shard-manager.js +4 -3
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/node-converter.js +3 -0
- package/src/tooling/node-source-runtime.js +36 -0
- package/src/types/model.d.ts +5 -0
- package/tools/doppler-cli.js +6 -1
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,25 @@ This changelog is package-facing and release-oriented. Entries before `0.1.7`
|
|
|
6
6
|
were retrofitted from package version history, release commits, and release
|
|
7
7
|
docs so the `0.1.x` line has one conventional npm-visible history surface.
|
|
8
8
|
|
|
9
|
+
## [0.1.8] - 2026-03-13
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- Simplified demo to show only verified Q4K models (Gemma 3 270M, Gemma 3 1B).
|
|
14
|
+
Hidden Translate, Diffusion, and Embedding tabs until models are ready.
|
|
15
|
+
- Trimmed hosted HF registry and quickstart registry to the two verified models.
|
|
16
|
+
- Aligned catalog, HF registry, and quickstart registry to the canonical
|
|
17
|
+
external support registry as single source of truth for HF revisions.
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
|
|
21
|
+
- Fixed Qwen 3.5 conversion configs using wrong model preset (`qwen3` instead
|
|
22
|
+
of `qwen3_5`), which caused support matrix check failures.
|
|
23
|
+
- Fixed catalog lifecycle metadata inconsistencies: corrected `local`, `hf`,
|
|
24
|
+
`curated`, and `demo` fields to match actual artifact availability.
|
|
25
|
+
- Removed failing and unverified models from demo visibility (TranslateGemma 4B,
|
|
26
|
+
EmbeddingGemma 300M with broken HF manifest, Qwen 3.5 0.8B/2B, F16 variant).
|
|
27
|
+
|
|
9
28
|
## [0.1.7] - 2026-03-10
|
|
10
29
|
|
|
11
30
|
### Added
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@simulatte/doppler",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.8",
|
|
4
4
|
"description": "Browser-native WebGPU inference engine for local intent and inference loops",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "src/index.d.ts",
|
|
@@ -29,22 +29,22 @@
|
|
|
29
29
|
"bench:chart": "node ./benchmarks/vendors/compare-chart.js",
|
|
30
30
|
"bench:chart:readme": "node ./benchmarks/vendors/compare-chart.js --preset readme-evidence",
|
|
31
31
|
"bench:architecture:chart": "node ./benchmarks/vendors/generate-architecture-overview-svg.js",
|
|
32
|
-
"ci:diffusion:contract": "node tools/ci-diffusion-contract-gates.
|
|
33
|
-
"ci:diffusion:contract:list": "node tools/ci-diffusion-contract-gates.
|
|
34
|
-
"ci:training:contract": "node tools/ci-training-contract-gates.
|
|
35
|
-
"ci:training:contract:list": "node tools/ci-training-contract-gates.
|
|
36
|
-
"training:contract:delta": "node tools/emit-training-contract-delta.
|
|
37
|
-
"training:workloads:verify": "node tools/verify-training-workload-packs.
|
|
38
|
-
"training:report-ids:publish": "node tools/publish-training-report-ids.
|
|
39
|
-
"distill:studio:mvp": "node tools/distill-studio-mvp.
|
|
40
|
-
"distill:quality-gate": "node tools/distill-studio-quality-gate.
|
|
41
|
-
"p2p:observability": "node tools/p2p-delivery-observability.
|
|
42
|
-
"p2p:drill": "node tools/p2p-resilience-drill.
|
|
32
|
+
"ci:diffusion:contract": "node tools/ci-diffusion-contract-gates.js",
|
|
33
|
+
"ci:diffusion:contract:list": "node tools/ci-diffusion-contract-gates.js --list",
|
|
34
|
+
"ci:training:contract": "node tools/ci-training-contract-gates.js",
|
|
35
|
+
"ci:training:contract:list": "node tools/ci-training-contract-gates.js --list",
|
|
36
|
+
"training:contract:delta": "node tools/emit-training-contract-delta.js",
|
|
37
|
+
"training:workloads:verify": "node tools/verify-training-workload-packs.js --registry tools/configs/training-workloads/registry.json",
|
|
38
|
+
"training:report-ids:publish": "node tools/publish-training-report-ids.js --registry tools/configs/training-workloads/registry.json",
|
|
39
|
+
"distill:studio:mvp": "node tools/distill-studio-mvp.js",
|
|
40
|
+
"distill:quality-gate": "node tools/distill-studio-quality-gate.js",
|
|
41
|
+
"p2p:observability": "node tools/p2p-delivery-observability.js",
|
|
42
|
+
"p2p:drill": "node tools/p2p-resilience-drill.js",
|
|
43
43
|
"test": "npm run test:unit",
|
|
44
|
-
"test:unit": "node tools/run-node-tests.
|
|
45
|
-
"test:gpu": "node tools/run-node-tests.
|
|
46
|
-
"test:coverage": "node tools/run-node-coverage.
|
|
47
|
-
"test:coverage:report": "node tools/run-node-coverage.
|
|
44
|
+
"test:unit": "node tools/run-node-tests.js --suite unit",
|
|
45
|
+
"test:gpu": "node tools/run-node-tests.js --suite gpu",
|
|
46
|
+
"test:coverage": "node tools/run-node-coverage.js",
|
|
47
|
+
"test:coverage:report": "node tools/run-node-coverage.js --no-threshold",
|
|
48
48
|
"test:gpu:browser": "node tools/doppler-cli.js verify --config '{\"request\":{\"suite\":\"kernels\"},\"run\":{\"surface\":\"browser\",\"browser\":{\"opfsCache\":false,\"headless\":true,\"channel\":\"chromium\",\"browserArgs\":[\"--use-angle=swiftshader\",\"--disable-vulkan-surface\"],\"console\":true}}}'",
|
|
49
49
|
"agents:verify": "node tools/verify-agent-parity.js",
|
|
50
50
|
"agents:freshness": "node tools/verify-agent-freshness.js",
|
|
@@ -74,9 +74,10 @@
|
|
|
74
74
|
"ci:catalog:check": "npm run registry:sync:scripts:check && npm run support:matrix:check && npm run registry:hf:check",
|
|
75
75
|
"external:rdrr:index": "node tools/sync-external-rdrr-index.js",
|
|
76
76
|
"external:rdrr:index:check": "node tools/sync-external-rdrr-index.js --check",
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"
|
|
77
|
+
"external:support:sync": "node tools/sync-external-support-registry.js",
|
|
78
|
+
"external:support:check": "node tools/sync-external-support-registry.js --check",
|
|
79
|
+
"catalog:sync:external": "node tools/sync-catalog-from-external-support.js",
|
|
80
|
+
"catalog:sync:external:check": "node tools/sync-catalog-from-external-support.js --check",
|
|
80
81
|
"verify:gemma-3-1b-it-q4k-ehf16-af32": "node tools/run-registry-verify.js gemma-3-1b-it-q4k-ehf16-af32",
|
|
81
82
|
"verify:gemma-3-1b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js gemma-3-1b-it-wq4k-ef16-hf16",
|
|
82
83
|
"verify:gemma-3-270m-it-q4k-ehf16-af32": "node tools/run-registry-verify.js gemma-3-270m-it-q4k-ehf16-af32",
|
|
@@ -84,25 +85,9 @@
|
|
|
84
85
|
"verify:gemma-3-270m-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js gemma-3-270m-it-wq4k-ef16-hf16",
|
|
85
86
|
"verify:gemma-3-270m-it-wq4k-ef16-hf16-f32": "node tools/run-registry-verify.js gemma-3-270m-it-wq4k-ef16-hf16-f32",
|
|
86
87
|
"verify:gemma3-1b": "node tools/run-registry-verify.js gemma3-1b",
|
|
87
|
-
"verify:gemma3-1b-f16": "node tools/run-registry-verify.js gemma3-1b-f16",
|
|
88
88
|
"verify:gemma3-270m": "node tools/run-registry-verify.js gemma3-270m",
|
|
89
|
-
"verify:google-embeddinggemma-300m": "node tools/run-registry-verify.js google-embeddinggemma-300m",
|
|
90
|
-
"verify:google-embeddinggemma-300m-q4k-ehf16-af32": "node tools/run-registry-verify.js google-embeddinggemma-300m-q4k-ehf16-af32",
|
|
91
|
-
"verify:google-embeddinggemma-300m-wq4k-ef16": "node tools/run-registry-verify.js google-embeddinggemma-300m-wq4k-ef16",
|
|
92
89
|
"verify:google-gemma-3-1b-it": "node tools/run-registry-verify.js google-gemma-3-1b-it",
|
|
93
|
-
"verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it"
|
|
94
|
-
"verify:google-translategemma-4b-it": "node tools/run-registry-verify.js google-translategemma-4b-it",
|
|
95
|
-
"verify:qwen-3-5-0-8b": "node tools/run-registry-verify.js qwen-3-5-0-8b",
|
|
96
|
-
"verify:qwen-3-5-0-8b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-0-8b-wq4k-ef16-hf16-f16",
|
|
97
|
-
"verify:qwen-3-5-2b": "node tools/run-registry-verify.js qwen-3-5-2b",
|
|
98
|
-
"verify:qwen-3-5-2b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-2b-wq4k-ef16-hf16-f16",
|
|
99
|
-
"verify:qwen-qwen3.5-0.8b": "node tools/run-registry-verify.js qwen-qwen3.5-0.8b",
|
|
100
|
-
"verify:qwen-qwen3.5-2b": "node tools/run-registry-verify.js qwen-qwen3.5-2b",
|
|
101
|
-
"verify:qwen3-0.8b": "node tools/run-registry-verify.js qwen3-0.8b",
|
|
102
|
-
"verify:qwen3-2b": "node tools/run-registry-verify.js qwen3-2b",
|
|
103
|
-
"verify:translategemma": "node tools/run-registry-verify.js translategemma",
|
|
104
|
-
"verify:translategemma-4b": "node tools/run-registry-verify.js translategemma-4b",
|
|
105
|
-
"verify:translategemma-4b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js translategemma-4b-it-wq4k-ef16-hf16"
|
|
90
|
+
"verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it"
|
|
106
91
|
},
|
|
107
92
|
"exports": {
|
|
108
93
|
".": {
|
|
@@ -408,6 +408,7 @@ export async function convertModel(files, options = {}) {
|
|
|
408
408
|
// Parse based on format
|
|
409
409
|
let modelInfo;
|
|
410
410
|
let config = null;
|
|
411
|
+
let generationConfig = null;
|
|
411
412
|
let tokenizerJson = null;
|
|
412
413
|
let tokenizerConfig = null;
|
|
413
414
|
let tokenizerModel = null;
|
|
@@ -455,6 +456,10 @@ export async function convertModel(files, options = {}) {
|
|
|
455
456
|
tokenizerConfig = await parseTokenizerConfigJson(auxiliary.tokenizerConfig);
|
|
456
457
|
modelInfo.tokenizerConfig = tokenizerConfig;
|
|
457
458
|
}
|
|
459
|
+
if (auxiliary.generationConfig) {
|
|
460
|
+
generationConfig = await parseConfigJson(auxiliary.generationConfig);
|
|
461
|
+
modelInfo.generationConfig = generationConfig;
|
|
462
|
+
}
|
|
458
463
|
if (auxiliary.tokenizerModel) {
|
|
459
464
|
const source = normalizeTensorSource(auxiliary.tokenizerModel);
|
|
460
465
|
tokenizerModel = await source.readRange(0, source.size);
|
|
@@ -16,25 +16,9 @@
|
|
|
16
16
|
],
|
|
17
17
|
"hf": {
|
|
18
18
|
"repoId": "Clocksmith/rdrr",
|
|
19
|
-
"revision": "
|
|
19
|
+
"revision": "ca6f0dbdf3882d3893a65cf48f2bb6f1520df162",
|
|
20
20
|
"path": "models/gemma-3-270m-it-q4k-ehf16-af32"
|
|
21
21
|
}
|
|
22
|
-
},
|
|
23
|
-
{
|
|
24
|
-
"modelId": "google-embeddinggemma-300m-q4k-ehf16-af32",
|
|
25
|
-
"aliases": [
|
|
26
|
-
"embeddinggemma-300m",
|
|
27
|
-
"google/embeddinggemma-300m",
|
|
28
|
-
"google-embeddinggemma-300m-wq4k-ef16"
|
|
29
|
-
],
|
|
30
|
-
"modes": [
|
|
31
|
-
"embedding"
|
|
32
|
-
],
|
|
33
|
-
"hf": {
|
|
34
|
-
"repoId": "Clocksmith/rdrr",
|
|
35
|
-
"revision": "b23aca921ea11729d6f34b9484555968a5ab0e42",
|
|
36
|
-
"path": "models/google-embeddinggemma-300m-q4k-ehf16-af32"
|
|
37
|
-
}
|
|
38
22
|
}
|
|
39
23
|
]
|
|
40
24
|
}
|
|
@@ -134,6 +134,11 @@ export function getKernelPathStrict(): boolean;
|
|
|
134
134
|
*/
|
|
135
135
|
export function isKernelPathFusedQ4K(path?: KernelPathSchema | null): boolean;
|
|
136
136
|
|
|
137
|
+
/**
|
|
138
|
+
* Check if a kernel path requires matmul weights to stay in F32.
|
|
139
|
+
*/
|
|
140
|
+
export function kernelPathRequiresF32MatmulWeights(path?: KernelPathSchema | null): boolean;
|
|
141
|
+
|
|
137
142
|
/**
|
|
138
143
|
* Check if the active kernel path uses fused Q4K matmul.
|
|
139
144
|
*/
|
|
@@ -503,6 +503,19 @@ export function isKernelPathFusedQ4K(path = undefined) {
|
|
|
503
503
|
return kernelSteps.some((step) => step.kernel.includes('fused_matmul_q4'));
|
|
504
504
|
}
|
|
505
505
|
|
|
506
|
+
export function kernelPathRequiresF32MatmulWeights(path = undefined) {
|
|
507
|
+
const lookupPath = path === undefined ? activeKernelPath : path;
|
|
508
|
+
if (!lookupPath) return false;
|
|
509
|
+
const kernelSteps = [
|
|
510
|
+
...(lookupPath.decode?.steps ?? []),
|
|
511
|
+
...(lookupPath.prefill?.steps ?? []),
|
|
512
|
+
...(lookupPath.preLayer ?? []),
|
|
513
|
+
...(lookupPath.postLayer ?? []),
|
|
514
|
+
...(lookupPath.layerOverrides?.flatMap((override) => override.steps) ?? []),
|
|
515
|
+
];
|
|
516
|
+
return kernelSteps.some((step) => normalizeKernelFile(step.kernel) === 'matmul_f32.wgsl');
|
|
517
|
+
}
|
|
518
|
+
|
|
506
519
|
export function isActiveKernelPathFusedQ4K() {
|
|
507
520
|
return isKernelPathFusedQ4K(activeKernelPath);
|
|
508
521
|
}
|
|
@@ -4322,6 +4322,80 @@
|
|
|
4322
4322
|
}
|
|
4323
4323
|
}
|
|
4324
4324
|
},
|
|
4325
|
+
"split_qg": {
|
|
4326
|
+
"description": "De-interleave Q and Gate projections from q_proj output for attentionOutputGate models",
|
|
4327
|
+
"baseBindings": [
|
|
4328
|
+
{
|
|
4329
|
+
"index": 0,
|
|
4330
|
+
"name": "uniforms",
|
|
4331
|
+
"type": "uniform"
|
|
4332
|
+
},
|
|
4333
|
+
{
|
|
4334
|
+
"index": 1,
|
|
4335
|
+
"name": "qg_interleaved",
|
|
4336
|
+
"type": "read-only-storage"
|
|
4337
|
+
},
|
|
4338
|
+
{
|
|
4339
|
+
"index": 2,
|
|
4340
|
+
"name": "Q",
|
|
4341
|
+
"type": "storage"
|
|
4342
|
+
},
|
|
4343
|
+
{
|
|
4344
|
+
"index": 3,
|
|
4345
|
+
"name": "G",
|
|
4346
|
+
"type": "storage"
|
|
4347
|
+
}
|
|
4348
|
+
],
|
|
4349
|
+
"baseUniforms": {
|
|
4350
|
+
"size": 16,
|
|
4351
|
+
"fields": [
|
|
4352
|
+
{
|
|
4353
|
+
"name": "num_tokens",
|
|
4354
|
+
"type": "u32",
|
|
4355
|
+
"offset": 0
|
|
4356
|
+
},
|
|
4357
|
+
{
|
|
4358
|
+
"name": "num_heads",
|
|
4359
|
+
"type": "u32",
|
|
4360
|
+
"offset": 4
|
|
4361
|
+
},
|
|
4362
|
+
{
|
|
4363
|
+
"name": "head_dim",
|
|
4364
|
+
"type": "u32",
|
|
4365
|
+
"offset": 8
|
|
4366
|
+
},
|
|
4367
|
+
{
|
|
4368
|
+
"name": "_pad",
|
|
4369
|
+
"type": "u32",
|
|
4370
|
+
"offset": 12
|
|
4371
|
+
}
|
|
4372
|
+
]
|
|
4373
|
+
},
|
|
4374
|
+
"variants": {
|
|
4375
|
+
"default": {
|
|
4376
|
+
"wgsl": "split_qg.wgsl",
|
|
4377
|
+
"entryPoint": "main",
|
|
4378
|
+
"workgroup": [
|
|
4379
|
+
256,
|
|
4380
|
+
1,
|
|
4381
|
+
1
|
|
4382
|
+
],
|
|
4383
|
+
"requires": []
|
|
4384
|
+
},
|
|
4385
|
+
"f16": {
|
|
4386
|
+
"wgsl": "split_qg_f16.wgsl",
|
|
4387
|
+
"entryPoint": "main",
|
|
4388
|
+
"workgroup": [
|
|
4389
|
+
256,
|
|
4390
|
+
1,
|
|
4391
|
+
1
|
|
4392
|
+
],
|
|
4393
|
+
"requires": [
|
|
4394
|
+
"shader-f16"
|
|
4395
|
+
]
|
|
4396
|
+
}
|
|
4397
|
+
}
|
|
4398
|
+
},
|
|
4325
4399
|
"sample": {
|
|
4326
4400
|
"description": "GPU-side sampling kernels",
|
|
4327
4401
|
"baseBindings": [
|
package/src/config/loader.js
CHANGED
|
@@ -23,6 +23,7 @@ const mambaPreset = await loadJson('./presets/models/mamba.json', import.meta.ur
|
|
|
23
23
|
const modernbertPreset = await loadJson('./presets/models/modernbert.json', import.meta.url, 'Failed to load preset');
|
|
24
24
|
const lfm2Preset = await loadJson('./presets/models/lfm2.json', import.meta.url, 'Failed to load preset');
|
|
25
25
|
const qwen3Preset = await loadJson('./presets/models/qwen3.json', import.meta.url, 'Failed to load preset');
|
|
26
|
+
const qwen35Preset = await loadJson('./presets/models/qwen3_5.json', import.meta.url, 'Failed to load preset');
|
|
26
27
|
const kimiK2Preset = await loadJson('./presets/models/kimi-k2.json', import.meta.url, 'Failed to load preset');
|
|
27
28
|
const gptOssPreset = await loadJson('./presets/models/gpt-oss.json', import.meta.url, 'Failed to load preset');
|
|
28
29
|
|
|
@@ -46,6 +47,7 @@ export const PRESET_REGISTRY = {
|
|
|
46
47
|
modernbert: modernbertPreset,
|
|
47
48
|
lfm2: lfm2Preset,
|
|
48
49
|
qwen3: qwen3Preset,
|
|
50
|
+
qwen3_5: qwen35Preset,
|
|
49
51
|
kimi_k2: kimiK2Preset,
|
|
50
52
|
gpt_oss: gptOssPreset,
|
|
51
53
|
};
|
|
@@ -97,6 +99,7 @@ export const PRESET_DETECTION_ORDER = [
|
|
|
97
99
|
'gemma3',
|
|
98
100
|
'llama3',
|
|
99
101
|
'lfm2',
|
|
102
|
+
'qwen3_5',
|
|
100
103
|
'qwen3',
|
|
101
104
|
'kimi_k2',
|
|
102
105
|
'gpt_oss',
|
|
@@ -171,6 +171,13 @@ export function buildMergeContractArtifact() {
|
|
|
171
171
|
`configA=${isolatedConfigA.runtime.inference.compute.activationDtype}, configB=${isolatedConfigB.runtime.inference.compute.activationDtype}`,
|
|
172
172
|
'actual'
|
|
173
173
|
);
|
|
174
|
+
recordCheck(
|
|
175
|
+
checks,
|
|
176
|
+
'runtime.schema.storage.opfs_sync_access_handle_defaults_off',
|
|
177
|
+
isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle === false,
|
|
178
|
+
`value=${String(isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle)}`,
|
|
179
|
+
'actual'
|
|
180
|
+
);
|
|
174
181
|
|
|
175
182
|
const calibrateConfig = createDopplerConfig({
|
|
176
183
|
runtime: {
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "gemma3-q4k-dequant-f32w-f32a-online",
|
|
3
|
+
"name": "Gemma 3 Q4K Dequant (F32 projection weights, F32 activations, online decode)",
|
|
4
|
+
"description": "Q4K projection weights dequantized to F32 with F32 activations. Tied embeddings and LM head stay on the native F16 path. Decode uses online attention; prefill uses streaming attention.",
|
|
5
|
+
"activationDtype": "f32",
|
|
6
|
+
"kvDtype": "f16",
|
|
7
|
+
"decode": {
|
|
8
|
+
"steps": [
|
|
9
|
+
{ "op": "input_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
10
|
+
{ "op": "q_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.q_proj" },
|
|
11
|
+
{ "op": "k_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.k_proj" },
|
|
12
|
+
{ "op": "v_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.v_proj" },
|
|
13
|
+
{ "op": "rope_q", "kernel": "rope.wgsl", "entry": "main" },
|
|
14
|
+
{ "op": "rope_k", "kernel": "rope.wgsl", "entry": "main" },
|
|
15
|
+
{ "op": "attention", "kernel": "attention_decode_online_f16kv.wgsl", "entry": "main" },
|
|
16
|
+
{ "op": "o_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.o_proj" },
|
|
17
|
+
{ "op": "attn_residual", "kernel": "residual.wgsl", "entry": "main" },
|
|
18
|
+
{ "op": "post_attn_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
19
|
+
{ "op": "gate_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.gate_proj" },
|
|
20
|
+
{ "op": "up_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.up_proj" },
|
|
21
|
+
{ "op": "activation", "kernel": "gelu.wgsl", "entry": "main", "constants": { "HAS_GATE": true } },
|
|
22
|
+
{ "op": "down_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.down_proj" },
|
|
23
|
+
{ "op": "ffn_residual", "kernel": "residual.wgsl", "entry": "main" }
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
"prefill": {
|
|
27
|
+
"steps": [
|
|
28
|
+
{ "op": "input_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
29
|
+
{ "op": "q_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.q_proj" },
|
|
30
|
+
{ "op": "k_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.k_proj" },
|
|
31
|
+
{ "op": "v_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.v_proj" },
|
|
32
|
+
{ "op": "rope_q", "kernel": "rope.wgsl", "entry": "main" },
|
|
33
|
+
{ "op": "rope_k", "kernel": "rope.wgsl", "entry": "main" },
|
|
34
|
+
{ "op": "attention", "kernel": "attention_streaming_f16kv.wgsl", "entry": "main" },
|
|
35
|
+
{ "op": "o_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.o_proj" },
|
|
36
|
+
{ "op": "attn_residual", "kernel": "residual.wgsl", "entry": "main" },
|
|
37
|
+
{ "op": "post_attn_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
38
|
+
{ "op": "gate_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.gate_proj" },
|
|
39
|
+
{ "op": "up_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.up_proj" },
|
|
40
|
+
{ "op": "activation", "kernel": "gelu.wgsl", "entry": "main", "constants": { "HAS_GATE": true } },
|
|
41
|
+
{ "op": "down_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.down_proj" },
|
|
42
|
+
{ "op": "ffn_residual", "kernel": "residual.wgsl", "entry": "main" }
|
|
43
|
+
]
|
|
44
|
+
},
|
|
45
|
+
"preLayer": [
|
|
46
|
+
{ "op": "embed", "kernel": "gather_f16.wgsl", "entry": "main", "weights": "embed_tokens" }
|
|
47
|
+
],
|
|
48
|
+
"postLayer": [
|
|
49
|
+
{ "op": "final_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
50
|
+
{ "op": "lm_head", "kernel": "matmul_gemv_subgroup.wgsl", "entry": "main_multicol", "weights": "lm_head", "constants": { "MULTICOL_COLS_PER_WG": 64, "MULTICOL_THREADS_PER_COL": 4 } },
|
|
51
|
+
{ "op": "lm_head_prefill", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "lm_head" }
|
|
52
|
+
],
|
|
53
|
+
"sampling": [
|
|
54
|
+
{ "op": "sample", "kernel": "sample.wgsl", "entry": "sample_single_pass" }
|
|
55
|
+
]
|
|
56
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "lfm2-q4k-dequant-f32a-nosubgroups",
|
|
3
|
+
"name": "LFM2 Q4K Dequant (F32 activations, no subgroups)",
|
|
4
|
+
"description": "Subgroup-free LFM2 Q4K path: F32 activations with tiled prefill matmul and small-kernel prefill attention. Still requires shader-f16 kernels.",
|
|
5
|
+
"activationDtype": "f32",
|
|
6
|
+
"kvDtype": "f16",
|
|
7
|
+
|
|
8
|
+
"decode": {
|
|
9
|
+
"steps": [
|
|
10
|
+
{ "op": "input_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
11
|
+
{ "op": "q_proj", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.q_proj" },
|
|
12
|
+
{ "op": "k_proj", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.k_proj" },
|
|
13
|
+
{ "op": "v_proj", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.v_proj" },
|
|
14
|
+
{ "op": "rope_q", "kernel": "rope.wgsl", "entry": "main" },
|
|
15
|
+
{ "op": "rope_k", "kernel": "rope.wgsl", "entry": "main" },
|
|
16
|
+
{ "op": "attention", "kernel": "attention_decode_chunked_f16kv.wgsl", "entry": "main" },
|
|
17
|
+
{ "op": "o_proj", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.o_proj" },
|
|
18
|
+
{ "op": "attn_residual", "kernel": "residual.wgsl", "entry": "main" },
|
|
19
|
+
{ "op": "post_attn_norm","kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
20
|
+
{ "op": "gate_proj", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "layer.{L}.mlp.gate_proj" },
|
|
21
|
+
{ "op": "up_proj", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "layer.{L}.mlp.up_proj" },
|
|
22
|
+
{ "op": "activation", "kernel": "gelu.wgsl", "entry": "main", "constants": { "HAS_GATE": true } },
|
|
23
|
+
{ "op": "down_proj", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "layer.{L}.mlp.down_proj" },
|
|
24
|
+
{ "op": "ffn_residual", "kernel": "residual.wgsl", "entry": "main" }
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
|
|
28
|
+
"prefill": {
|
|
29
|
+
"steps": [
|
|
30
|
+
{ "op": "input_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
31
|
+
{ "op": "q_proj", "kernel": "matmul_f16w_f32a_tiled.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.q_proj" },
|
|
32
|
+
{ "op": "k_proj", "kernel": "matmul_f16w_f32a_tiled.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.k_proj" },
|
|
33
|
+
{ "op": "v_proj", "kernel": "matmul_f16w_f32a_tiled.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.v_proj" },
|
|
34
|
+
{ "op": "rope_q", "kernel": "rope.wgsl", "entry": "main" },
|
|
35
|
+
{ "op": "rope_k", "kernel": "rope.wgsl", "entry": "main" },
|
|
36
|
+
{ "op": "attention", "kernel": "attention_small_f16kv.wgsl", "entry": "main" },
|
|
37
|
+
{ "op": "o_proj", "kernel": "matmul_f16w_f32a_tiled.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.o_proj" },
|
|
38
|
+
{ "op": "attn_residual", "kernel": "residual.wgsl", "entry": "main" },
|
|
39
|
+
{ "op": "post_attn_norm","kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
40
|
+
{ "op": "gate_proj", "kernel": "matmul_f16w_f32a_tiled.wgsl", "entry": "main", "weights": "layer.{L}.mlp.gate_proj" },
|
|
41
|
+
{ "op": "up_proj", "kernel": "matmul_f16w_f32a_tiled.wgsl", "entry": "main", "weights": "layer.{L}.mlp.up_proj" },
|
|
42
|
+
{ "op": "activation", "kernel": "gelu.wgsl", "entry": "main", "constants": { "HAS_GATE": true } },
|
|
43
|
+
{ "op": "down_proj", "kernel": "matmul_f16w_f32a_tiled.wgsl", "entry": "main", "weights": "layer.{L}.mlp.down_proj" },
|
|
44
|
+
{ "op": "ffn_residual", "kernel": "residual.wgsl", "entry": "main" }
|
|
45
|
+
]
|
|
46
|
+
},
|
|
47
|
+
|
|
48
|
+
"preLayer": [
|
|
49
|
+
{ "op": "embed", "kernel": "gather_f16.wgsl", "entry": "main", "weights": "embed_tokens" }
|
|
50
|
+
],
|
|
51
|
+
|
|
52
|
+
"postLayer": [
|
|
53
|
+
{ "op": "final_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
|
|
54
|
+
{ "op": "lm_head", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "lm_head" },
|
|
55
|
+
{ "op": "lm_head_prefill", "kernel": "matmul_f16w_f32a_tiled.wgsl", "entry": "main", "weights": "lm_head" }
|
|
56
|
+
],
|
|
57
|
+
|
|
58
|
+
"sampling": [
|
|
59
|
+
{ "op": "sample", "kernel": "sample.wgsl", "entry": "sample_single_pass" }
|
|
60
|
+
]
|
|
61
|
+
}
|
|
@@ -92,6 +92,13 @@
|
|
|
92
92
|
"statusReason": "default",
|
|
93
93
|
"notes": "Gemma 3 Q4K dequant default: subgroup GEMV + online attention + tuned lm_head multicol, F32 activations."
|
|
94
94
|
},
|
|
95
|
+
{
|
|
96
|
+
"id": "gemma3-q4k-dequant-f32w-f32a-online",
|
|
97
|
+
"file": "gemma3-q4k-dequant-f32w-f32a-online.json",
|
|
98
|
+
"status": "experimental",
|
|
99
|
+
"statusReason": "accuracy-probe",
|
|
100
|
+
"notes": "Gemma 3 Q4K dequant path that keeps matmul weights in F32 and runs F32 matmul kernels for numeric-sensitivity debugging."
|
|
101
|
+
},
|
|
95
102
|
{
|
|
96
103
|
"id": "lfm2-q4k-dequant-f32a-online",
|
|
97
104
|
"file": "lfm2-q4k-dequant-f32a-online.json",
|
|
@@ -99,6 +106,13 @@
|
|
|
99
106
|
"statusReason": "default",
|
|
100
107
|
"notes": "LFM2 Q4K default: subgroup GEMV decode with tiled fast-prefill path and F32 activations."
|
|
101
108
|
},
|
|
109
|
+
{
|
|
110
|
+
"id": "lfm2-q4k-dequant-f32a-nosubgroups",
|
|
111
|
+
"file": "lfm2-q4k-dequant-f32a-nosubgroups.json",
|
|
112
|
+
"status": "canonical",
|
|
113
|
+
"statusReason": "subgroup-free",
|
|
114
|
+
"notes": "Subgroup-free LFM2 Q4K dequant path with F32 activations and tiled prefill. Still requires shader-f16 kernels."
|
|
115
|
+
},
|
|
102
116
|
{
|
|
103
117
|
"id": "embeddinggemma-f16-f32a",
|
|
104
118
|
"file": "embeddinggemma-f16-f32a.json",
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
"inference": {
|
|
11
11
|
"attention": {
|
|
12
|
+
"queryPreAttnScalar": 256,
|
|
12
13
|
"slidingWindow": 4096,
|
|
13
14
|
"attnLogitSoftcapping": 50.0,
|
|
14
15
|
"queryKeyNorm": false
|
|
@@ -40,7 +41,7 @@
|
|
|
40
41
|
"f32": "gemma2-f16-f32a"
|
|
41
42
|
},
|
|
42
43
|
"q4k": {
|
|
43
|
-
"f16": "gemma2-q4k-dequant-
|
|
44
|
+
"f16": "gemma2-q4k-dequant-f16a",
|
|
44
45
|
"f32": "gemma2-q4k-dequant-f32a-nosubgroups"
|
|
45
46
|
}
|
|
46
47
|
}
|
|
@@ -10,7 +10,8 @@
|
|
|
10
10
|
"inference": {
|
|
11
11
|
"attention": {
|
|
12
12
|
"slidingWindow": null,
|
|
13
|
-
"queryKeyNorm": true
|
|
13
|
+
"queryKeyNorm": true,
|
|
14
|
+
"attentionOutputGate": true
|
|
14
15
|
},
|
|
15
16
|
"output": {
|
|
16
17
|
"scaleEmbeddings": false
|
|
@@ -39,8 +40,8 @@
|
|
|
39
40
|
},
|
|
40
41
|
|
|
41
42
|
"detection": {
|
|
42
|
-
"architecturePatterns": ["qwen3", "
|
|
43
|
-
"modelTypePatterns": ["
|
|
43
|
+
"architecturePatterns": ["qwen3", "Qwen3ForCausalLM", "Qwen2ForCausalLM"],
|
|
44
|
+
"modelTypePatterns": ["qwen3", "qwen2"],
|
|
44
45
|
"configPatterns": {
|
|
45
46
|
"model_type": "qwen2"
|
|
46
47
|
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "qwen3_5",
|
|
3
|
+
"name": "Qwen 3.5",
|
|
4
|
+
"extends": "qwen3",
|
|
5
|
+
|
|
6
|
+
"inference": {
|
|
7
|
+
"normalization": {
|
|
8
|
+
"rmsNormWeightOffset": true
|
|
9
|
+
}
|
|
10
|
+
},
|
|
11
|
+
|
|
12
|
+
"detection": {
|
|
13
|
+
"architecturePatterns": ["qwen3_5", "Qwen3_5ForCausalLM", "Qwen3_5ForConditionalGeneration"],
|
|
14
|
+
"modelTypePatterns": ["qwen3_5", "qwen3_5_text"]
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "model/qwen3-5-layer-probe",
|
|
3
|
+
"name": "qwen3-5-layer-probe",
|
|
4
|
+
"description": "Probe all 24 layer outputs in Qwen 3.5 to isolate where the hidden state distribution collapses.",
|
|
5
|
+
"intent": "investigate",
|
|
6
|
+
"stability": "canonical",
|
|
7
|
+
"owner": "doppler-core",
|
|
8
|
+
"createdAtUtc": "2026-03-13T00:00:00Z",
|
|
9
|
+
"extends": "modes/debug",
|
|
10
|
+
"runtime": {
|
|
11
|
+
"inference": {
|
|
12
|
+
"prompt": "What color is the sky on a clear day? Answer in one word.",
|
|
13
|
+
"batching": {
|
|
14
|
+
"maxTokens": 1
|
|
15
|
+
},
|
|
16
|
+
"sampling": {
|
|
17
|
+
"temperature": 0
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"shared": {
|
|
21
|
+
"debug": {
|
|
22
|
+
"trace": {
|
|
23
|
+
"enabled": true,
|
|
24
|
+
"categories": ["attn", "ffn", "logits"],
|
|
25
|
+
"layers": null,
|
|
26
|
+
"maxDecodeSteps": 1
|
|
27
|
+
},
|
|
28
|
+
"probes": [
|
|
29
|
+
{
|
|
30
|
+
"id": "embed",
|
|
31
|
+
"stage": "embed_out",
|
|
32
|
+
"tokens": [-1],
|
|
33
|
+
"dims": [0, 1, 2, 3, 512, 513]
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"id": "layer_out",
|
|
37
|
+
"stage": "layer_out",
|
|
38
|
+
"layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
|
|
39
|
+
"tokens": [-1],
|
|
40
|
+
"dims": [0, 1, 2, 3]
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"id": "logits",
|
|
44
|
+
"stage": "logits_final",
|
|
45
|
+
"tokens": [-1],
|
|
46
|
+
"dims": [271, 0, 1, 2, 3, 496, 138]
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|