npm - @simulatte/doppler - Versions diffs - 0.1.7 → 0.1.8 - Mend

@simulatte/doppler 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/CHANGELOG.md +19 -0
package/package.json +21 -36
package/src/browser/browser-converter.js +5 -0
package/src/client/doppler-registry.json +1 -17
package/src/config/kernel-path-loader.d.ts +5 -0
package/src/config/kernel-path-loader.js +13 -0
package/src/config/kernels/registry.json +74 -0
package/src/config/loader.js +3 -0
package/src/config/merge-contract-check.js +7 -0
package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
package/src/config/presets/kernel-paths/registry.json +14 -0
package/src/config/presets/models/gemma2.json +2 -1
package/src/config/presets/models/gemma3.json +2 -0
package/src/config/presets/models/qwen3.json +4 -3
package/src/config/presets/models/qwen3_5.json +16 -0
package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
package/src/config/schema/conversion.schema.d.ts +1 -0
package/src/config/schema/manifest.schema.d.ts +1 -1
package/src/config/schema/manifest.schema.js +1 -1
package/src/config/schema/storage.schema.js +1 -1
package/src/converter/conversion-plan.js +10 -2
package/src/converter/core.js +2 -0
package/src/converter/manifest-inference.js +12 -22
package/src/converter/parsers/transformer.js +4 -0
package/src/converter/quantization-info.js +5 -1
package/src/converter/quantizer.js +19 -12
package/src/converter/rope-config.js +8 -6
package/src/converter/tokenizer-utils.d.ts +1 -0
package/src/converter/tokenizer-utils.js +4 -1
package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
package/src/distribution/shard-delivery.js +6 -1
package/src/formats/rdrr/parsing.d.ts +4 -0
package/src/formats/rdrr/parsing.js +14 -1
package/src/gpu/kernels/index.d.ts +8 -0
package/src/gpu/kernels/index.js +6 -0
package/src/gpu/kernels/matmul-selection.js +47 -4
package/src/gpu/kernels/matmul.d.ts +2 -0
package/src/gpu/kernels/matmul.js +1 -1
package/src/gpu/kernels/rmsnorm.js +9 -2
package/src/gpu/kernels/split_qg.d.ts +50 -0
package/src/gpu/kernels/split_qg.js +46 -0
package/src/gpu/kernels/split_qg.wgsl +58 -0
package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
package/src/gpu/weight-buffer.d.ts +1 -1
package/src/gpu/weight-buffer.js +1 -1
package/src/inference/browser-harness.d.ts +2 -0
package/src/inference/browser-harness.js +20 -1
package/src/inference/pipelines/diffusion/helpers.js +3 -0
package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
package/src/inference/pipelines/text/attention/output-projection.js +8 -0
package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
package/src/inference/pipelines/text/attention/projections.js +41 -11
package/src/inference/pipelines/text/attention/record.js +15 -6
package/src/inference/pipelines/text/attention/run.js +50 -6
package/src/inference/pipelines/text/config.js +14 -0
package/src/inference/pipelines/text/execution-plan.js +5 -4
package/src/inference/pipelines/text/generator-runtime.js +5 -0
package/src/inference/pipelines/text/generator-steps.d.ts +6 -0
package/src/inference/pipelines/text/generator-steps.js +43 -15
package/src/inference/pipelines/text/generator.js +50 -17
package/src/inference/pipelines/text/init.d.ts +13 -0
package/src/inference/pipelines/text/init.js +16 -5
package/src/inference/pipelines/text/layer.js +1 -0
package/src/inference/pipelines/text/linear-attention.d.ts +5 -0
package/src/inference/pipelines/text/linear-attention.js +33 -3
package/src/inference/pipelines/text/logits/gpu.js +2 -2
package/src/inference/pipelines/text/logits/index.d.ts +6 -1
package/src/inference/pipelines/text/logits/index.js +3 -1
package/src/inference/pipelines/text/model-load.js +3 -0
package/src/inference/pipelines/text/sampling.js +52 -6
package/src/inference/test-harness.js +2 -2
package/src/loader/final-weights-loader.js +2 -0
package/src/loader/shard-cache.js +3 -2
package/src/loader/tensors/tensor-loader.js +6 -1
package/src/rules/inference/dtype.rules.json +5 -0
package/src/rules/inference/kernel-path.rules.json +2 -2
package/src/rules/kernels/split-qg.rules.json +6 -0
package/src/rules/rule-registry.js +2 -0
package/src/storage/downloader.js +2 -1
package/src/storage/shard-manager.js +4 -3
package/src/tooling/conversion-config-materializer.js +3 -5
package/src/tooling/node-converter.js +3 -0
package/src/tooling/node-source-runtime.js +36 -0
package/src/types/model.d.ts +5 -0
package/tools/doppler-cli.js +6 -1

package/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,25 @@ This changelog is package-facing and release-oriented. Entries before `0.1.7`
 were retrofitted from package version history, release commits, and release
 docs so the `0.1.x` line has one conventional npm-visible history surface.
+## [0.1.8] - 2026-03-13
+### Changed
+- Simplified demo to show only verified Q4K models (Gemma 3 270M, Gemma 3 1B).
+  Hidden Translate, Diffusion, and Embedding tabs until models are ready.
+- Trimmed hosted HF registry and quickstart registry to the two verified models.
+- Aligned catalog, HF registry, and quickstart registry to the canonical
+  external support registry as single source of truth for HF revisions.
+### Fixed
+- Fixed Qwen 3.5 conversion configs using wrong model preset (`qwen3` instead
+  of `qwen3_5`), which caused support matrix check failures.
+- Fixed catalog lifecycle metadata inconsistencies: corrected `local`, `hf`,
+  `curated`, and `demo` fields to match actual artifact availability.
+- Removed failing and unverified models from demo visibility (TranslateGemma 4B,
+  EmbeddingGemma 300M with broken HF manifest, Qwen 3.5 0.8B/2B, F16 variant).
 ## [0.1.7] - 2026-03-10
 ### Added

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@simulatte/doppler",
-  "version": "0.1.7",
+  "version": "0.1.8",
   "description": "Browser-native WebGPU inference engine for local intent and inference loops",
   "main": "src/index.js",
   "types": "src/index.d.ts",
@@ -29,22 +29,22 @@
     "bench:chart": "node ./benchmarks/vendors/compare-chart.js",
     "bench:chart:readme": "node ./benchmarks/vendors/compare-chart.js --preset readme-evidence",
     "bench:architecture:chart": "node ./benchmarks/vendors/generate-architecture-overview-svg.js",
-    "ci:diffusion:contract": "node tools/ci-diffusion-contract-gates.mjs",
-    "ci:diffusion:contract:list": "node tools/ci-diffusion-contract-gates.mjs --list",
-    "ci:training:contract": "node tools/ci-training-contract-gates.mjs",
-    "ci:training:contract:list": "node tools/ci-training-contract-gates.mjs --list",
-    "training:contract:delta": "node tools/emit-training-contract-delta.mjs",
-    "training:workloads:verify": "node tools/verify-training-workload-packs.mjs --registry tools/configs/training-workloads/registry.json",
-    "training:report-ids:publish": "node tools/publish-training-report-ids.mjs --registry tools/configs/training-workloads/registry.json",
-    "distill:studio:mvp": "node tools/distill-studio-mvp.mjs",
-    "distill:quality-gate": "node tools/distill-studio-quality-gate.mjs",
-    "p2p:observability": "node tools/p2p-delivery-observability.mjs",
-    "p2p:drill": "node tools/p2p-resilience-drill.mjs",
+    "ci:diffusion:contract": "node tools/ci-diffusion-contract-gates.js",
+    "ci:diffusion:contract:list": "node tools/ci-diffusion-contract-gates.js --list",
+    "ci:training:contract": "node tools/ci-training-contract-gates.js",
+    "ci:training:contract:list": "node tools/ci-training-contract-gates.js --list",
+    "training:contract:delta": "node tools/emit-training-contract-delta.js",
+    "training:workloads:verify": "node tools/verify-training-workload-packs.js --registry tools/configs/training-workloads/registry.json",
+    "training:report-ids:publish": "node tools/publish-training-report-ids.js --registry tools/configs/training-workloads/registry.json",
+    "distill:studio:mvp": "node tools/distill-studio-mvp.js",
+    "distill:quality-gate": "node tools/distill-studio-quality-gate.js",
+    "p2p:observability": "node tools/p2p-delivery-observability.js",
+    "p2p:drill": "node tools/p2p-resilience-drill.js",
     "test": "npm run test:unit",
-    "test:unit": "node tools/run-node-tests.mjs --suite unit",
-    "test:gpu": "node tools/run-node-tests.mjs --suite gpu",
-    "test:coverage": "node tools/run-node-coverage.mjs",
-    "test:coverage:report": "node tools/run-node-coverage.mjs --no-threshold",
+    "test:unit": "node tools/run-node-tests.js --suite unit",
+    "test:gpu": "node tools/run-node-tests.js --suite gpu",
+    "test:coverage": "node tools/run-node-coverage.js",
+    "test:coverage:report": "node tools/run-node-coverage.js --no-threshold",
     "test:gpu:browser": "node tools/doppler-cli.js verify --config '{\"request\":{\"suite\":\"kernels\"},\"run\":{\"surface\":\"browser\",\"browser\":{\"opfsCache\":false,\"headless\":true,\"channel\":\"chromium\",\"browserArgs\":[\"--use-angle=swiftshader\",\"--disable-vulkan-surface\"],\"console\":true}}}'",
     "agents:verify": "node tools/verify-agent-parity.js",
     "agents:freshness": "node tools/verify-agent-freshness.js",
@@ -74,9 +74,10 @@
     "ci:catalog:check": "npm run registry:sync:scripts:check && npm run support:matrix:check && npm run registry:hf:check",
     "external:rdrr:index": "node tools/sync-external-rdrr-index.js",
     "external:rdrr:index:check": "node tools/sync-external-rdrr-index.js --check",
-    "verify:embeddinggemma-300m": "node tools/run-registry-verify.js embeddinggemma-300m",
-    "verify:gemma-3-1b-it-f16": "node tools/run-registry-verify.js gemma-3-1b-it-f16",
-    "verify:gemma-3-1b-it-f16-af32": "node tools/run-registry-verify.js gemma-3-1b-it-f16-af32",
+    "external:support:sync": "node tools/sync-external-support-registry.js",
+    "external:support:check": "node tools/sync-external-support-registry.js --check",
+    "catalog:sync:external": "node tools/sync-catalog-from-external-support.js",
+    "catalog:sync:external:check": "node tools/sync-catalog-from-external-support.js --check",
     "verify:gemma-3-1b-it-q4k-ehf16-af32": "node tools/run-registry-verify.js gemma-3-1b-it-q4k-ehf16-af32",
     "verify:gemma-3-1b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js gemma-3-1b-it-wq4k-ef16-hf16",
     "verify:gemma-3-270m-it-q4k-ehf16-af32": "node tools/run-registry-verify.js gemma-3-270m-it-q4k-ehf16-af32",
@@ -84,25 +85,9 @@
     "verify:gemma-3-270m-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js gemma-3-270m-it-wq4k-ef16-hf16",
     "verify:gemma-3-270m-it-wq4k-ef16-hf16-f32": "node tools/run-registry-verify.js gemma-3-270m-it-wq4k-ef16-hf16-f32",
     "verify:gemma3-1b": "node tools/run-registry-verify.js gemma3-1b",
-    "verify:gemma3-1b-f16": "node tools/run-registry-verify.js gemma3-1b-f16",
     "verify:gemma3-270m": "node tools/run-registry-verify.js gemma3-270m",
-    "verify:google-embeddinggemma-300m": "node tools/run-registry-verify.js google-embeddinggemma-300m",
-    "verify:google-embeddinggemma-300m-q4k-ehf16-af32": "node tools/run-registry-verify.js google-embeddinggemma-300m-q4k-ehf16-af32",
-    "verify:google-embeddinggemma-300m-wq4k-ef16": "node tools/run-registry-verify.js google-embeddinggemma-300m-wq4k-ef16",
     "verify:google-gemma-3-1b-it": "node tools/run-registry-verify.js google-gemma-3-1b-it",
-    "verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it",
-    "verify:google-translategemma-4b-it": "node tools/run-registry-verify.js google-translategemma-4b-it",
-    "verify:qwen-3-5-0-8b": "node tools/run-registry-verify.js qwen-3-5-0-8b",
-    "verify:qwen-3-5-0-8b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-0-8b-wq4k-ef16-hf16-f16",
-    "verify:qwen-3-5-2b": "node tools/run-registry-verify.js qwen-3-5-2b",
-    "verify:qwen-3-5-2b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-2b-wq4k-ef16-hf16-f16",
-    "verify:qwen-qwen3.5-0.8b": "node tools/run-registry-verify.js qwen-qwen3.5-0.8b",
-    "verify:qwen-qwen3.5-2b": "node tools/run-registry-verify.js qwen-qwen3.5-2b",
-    "verify:qwen3-0.8b": "node tools/run-registry-verify.js qwen3-0.8b",
-    "verify:qwen3-2b": "node tools/run-registry-verify.js qwen3-2b",
-    "verify:translategemma": "node tools/run-registry-verify.js translategemma",
-    "verify:translategemma-4b": "node tools/run-registry-verify.js translategemma-4b",
-    "verify:translategemma-4b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js translategemma-4b-it-wq4k-ef16-hf16"
+    "verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it"
   },
   "exports": {
     ".": {

package/src/browser/browser-converter.js CHANGED Viewed

@@ -408,6 +408,7 @@ export async function convertModel(files, options = {}) {
     // Parse based on format
     let modelInfo;
     let config = null;
+    let generationConfig = null;
     let tokenizerJson = null;
     let tokenizerConfig = null;
     let tokenizerModel = null;
@@ -455,6 +456,10 @@ export async function convertModel(files, options = {}) {
         tokenizerConfig = await parseTokenizerConfigJson(auxiliary.tokenizerConfig);
         modelInfo.tokenizerConfig = tokenizerConfig;
       }
+      if (auxiliary.generationConfig) {
+        generationConfig = await parseConfigJson(auxiliary.generationConfig);
+        modelInfo.generationConfig = generationConfig;
+      }
       if (auxiliary.tokenizerModel) {
         const source = normalizeTensorSource(auxiliary.tokenizerModel);
         tokenizerModel = await source.readRange(0, source.size);

package/src/client/doppler-registry.json CHANGED Viewed

@@ -16,25 +16,9 @@
       ],
       "hf": {
         "repoId": "Clocksmith/rdrr",
-        "revision": "cd6c12be0e83e92d6dbd92598a0aa94391ec7e94",
+        "revision": "ca6f0dbdf3882d3893a65cf48f2bb6f1520df162",
         "path": "models/gemma-3-270m-it-q4k-ehf16-af32"
       }
-    },
-    {
-      "modelId": "google-embeddinggemma-300m-q4k-ehf16-af32",
-      "aliases": [
-        "embeddinggemma-300m",
-        "google/embeddinggemma-300m",
-        "google-embeddinggemma-300m-wq4k-ef16"
-      ],
-      "modes": [
-        "embedding"
-      ],
-      "hf": {
-        "repoId": "Clocksmith/rdrr",
-        "revision": "b23aca921ea11729d6f34b9484555968a5ab0e42",
-        "path": "models/google-embeddinggemma-300m-q4k-ehf16-af32"
-      }
     }
   ]
 }

package/src/config/kernel-path-loader.d.ts CHANGED Viewed

@@ -134,6 +134,11 @@ export function getKernelPathStrict(): boolean;
  */
 export function isKernelPathFusedQ4K(path?: KernelPathSchema | null): boolean;
+/**
+ * Check if a kernel path requires matmul weights to stay in F32.
+ */
+export function kernelPathRequiresF32MatmulWeights(path?: KernelPathSchema | null): boolean;
 /**
  * Check if the active kernel path uses fused Q4K matmul.
  */

package/src/config/kernel-path-loader.js CHANGED Viewed

@@ -503,6 +503,19 @@ export function isKernelPathFusedQ4K(path = undefined) {
   return kernelSteps.some((step) => step.kernel.includes('fused_matmul_q4'));
 }
+export function kernelPathRequiresF32MatmulWeights(path = undefined) {
+  const lookupPath = path === undefined ? activeKernelPath : path;
+  if (!lookupPath) return false;
+  const kernelSteps = [
+    ...(lookupPath.decode?.steps ?? []),
+    ...(lookupPath.prefill?.steps ?? []),
+    ...(lookupPath.preLayer ?? []),
+    ...(lookupPath.postLayer ?? []),
+    ...(lookupPath.layerOverrides?.flatMap((override) => override.steps) ?? []),
+  ];
+  return kernelSteps.some((step) => normalizeKernelFile(step.kernel) === 'matmul_f32.wgsl');
+}
 export function isActiveKernelPathFusedQ4K() {
   return isKernelPathFusedQ4K(activeKernelPath);
 }

package/src/config/kernels/registry.json CHANGED Viewed

@@ -4322,6 +4322,80 @@
         }
       }
     },
+    "split_qg": {
+      "description": "De-interleave Q and Gate projections from q_proj output for attentionOutputGate models",
+      "baseBindings": [
+        {
+          "index": 0,
+          "name": "uniforms",
+          "type": "uniform"
+        },
+        {
+          "index": 1,
+          "name": "qg_interleaved",
+          "type": "read-only-storage"
+        },
+        {
+          "index": 2,
+          "name": "Q",
+          "type": "storage"
+        },
+        {
+          "index": 3,
+          "name": "G",
+          "type": "storage"
+        }
+      ],
+      "baseUniforms": {
+        "size": 16,
+        "fields": [
+          {
+            "name": "num_tokens",
+            "type": "u32",
+            "offset": 0
+          },
+          {
+            "name": "num_heads",
+            "type": "u32",
+            "offset": 4
+          },
+          {
+            "name": "head_dim",
+            "type": "u32",
+            "offset": 8
+          },
+          {
+            "name": "_pad",
+            "type": "u32",
+            "offset": 12
+          }
+        ]
+      },
+      "variants": {
+        "default": {
+          "wgsl": "split_qg.wgsl",
+          "entryPoint": "main",
+          "workgroup": [
+            256,
+            1,
+            1
+          ],
+          "requires": []
+        },
+        "f16": {
+          "wgsl": "split_qg_f16.wgsl",
+          "entryPoint": "main",
+          "workgroup": [
+            256,
+            1,
+            1
+          ],
+          "requires": [
+            "shader-f16"
+          ]
+        }
+      }
+    },
     "sample": {
       "description": "GPU-side sampling kernels",
       "baseBindings": [

package/src/config/loader.js CHANGED Viewed

@@ -23,6 +23,7 @@ const mambaPreset = await loadJson('./presets/models/mamba.json', import.meta.ur
 const modernbertPreset = await loadJson('./presets/models/modernbert.json', import.meta.url, 'Failed to load preset');
 const lfm2Preset = await loadJson('./presets/models/lfm2.json', import.meta.url, 'Failed to load preset');
 const qwen3Preset = await loadJson('./presets/models/qwen3.json', import.meta.url, 'Failed to load preset');
+const qwen35Preset = await loadJson('./presets/models/qwen3_5.json', import.meta.url, 'Failed to load preset');
 const kimiK2Preset = await loadJson('./presets/models/kimi-k2.json', import.meta.url, 'Failed to load preset');
 const gptOssPreset = await loadJson('./presets/models/gpt-oss.json', import.meta.url, 'Failed to load preset');
@@ -46,6 +47,7 @@ export const PRESET_REGISTRY = {
   modernbert: modernbertPreset,
   lfm2: lfm2Preset,
   qwen3: qwen3Preset,
+  qwen3_5: qwen35Preset,
   kimi_k2: kimiK2Preset,
   gpt_oss: gptOssPreset,
 };
@@ -97,6 +99,7 @@ export const PRESET_DETECTION_ORDER = [
   'gemma3',
   'llama3',
   'lfm2',
+  'qwen3_5',
   'qwen3',
   'kimi_k2',
   'gpt_oss',

package/src/config/merge-contract-check.js CHANGED Viewed

@@ -171,6 +171,13 @@ export function buildMergeContractArtifact() {
     `configA=${isolatedConfigA.runtime.inference.compute.activationDtype}, configB=${isolatedConfigB.runtime.inference.compute.activationDtype}`,
     'actual'
   );
+  recordCheck(
+    checks,
+    'runtime.schema.storage.opfs_sync_access_handle_defaults_off',
+    isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle === false,
+    `value=${String(isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle)}`,
+    'actual'
+  );
   const calibrateConfig = createDopplerConfig({
     runtime: {

package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json ADDED Viewed

@@ -0,0 +1,56 @@
+{
+  "id": "gemma3-q4k-dequant-f32w-f32a-online",
+  "name": "Gemma 3 Q4K Dequant (F32 projection weights, F32 activations, online decode)",
+  "description": "Q4K projection weights dequantized to F32 with F32 activations. Tied embeddings and LM head stay on the native F16 path. Decode uses online attention; prefill uses streaming attention.",
+  "activationDtype": "f32",
+  "kvDtype": "f16",
+  "decode": {
+    "steps": [
+      { "op": "input_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
+      { "op": "q_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.q_proj" },
+      { "op": "k_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.k_proj" },
+      { "op": "v_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.v_proj" },
+      { "op": "rope_q", "kernel": "rope.wgsl", "entry": "main" },
+      { "op": "rope_k", "kernel": "rope.wgsl", "entry": "main" },
+      { "op": "attention", "kernel": "attention_decode_online_f16kv.wgsl", "entry": "main" },
+      { "op": "o_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.o_proj" },
+      { "op": "attn_residual", "kernel": "residual.wgsl", "entry": "main" },
+      { "op": "post_attn_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
+      { "op": "gate_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.gate_proj" },
+      { "op": "up_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.up_proj" },
+      { "op": "activation", "kernel": "gelu.wgsl", "entry": "main", "constants": { "HAS_GATE": true } },
+      { "op": "down_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.down_proj" },
+      { "op": "ffn_residual", "kernel": "residual.wgsl", "entry": "main" }
+    ]
+  },
+  "prefill": {
+    "steps": [
+      { "op": "input_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
+      { "op": "q_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.q_proj" },
+      { "op": "k_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.k_proj" },
+      { "op": "v_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.v_proj" },
+      { "op": "rope_q", "kernel": "rope.wgsl", "entry": "main" },
+      { "op": "rope_k", "kernel": "rope.wgsl", "entry": "main" },
+      { "op": "attention", "kernel": "attention_streaming_f16kv.wgsl", "entry": "main" },
+      { "op": "o_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.self_attn.o_proj" },
+      { "op": "attn_residual", "kernel": "residual.wgsl", "entry": "main" },
+      { "op": "post_attn_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
+      { "op": "gate_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.gate_proj" },
+      { "op": "up_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.up_proj" },
+      { "op": "activation", "kernel": "gelu.wgsl", "entry": "main", "constants": { "HAS_GATE": true } },
+      { "op": "down_proj", "kernel": "matmul_f32.wgsl", "entry": "main", "weights": "layer.{L}.mlp.down_proj" },
+      { "op": "ffn_residual", "kernel": "residual.wgsl", "entry": "main" }
+    ]
+  },
+  "preLayer": [
+    { "op": "embed", "kernel": "gather_f16.wgsl", "entry": "main", "weights": "embed_tokens" }
+  ],
+  "postLayer": [
+    { "op": "final_norm", "kernel": "rmsnorm.wgsl", "entry": "main" },
+    { "op": "lm_head", "kernel": "matmul_gemv_subgroup.wgsl", "entry": "main_multicol", "weights": "lm_head", "constants": { "MULTICOL_COLS_PER_WG": 64, "MULTICOL_THREADS_PER_COL": 4 } },
+    { "op": "lm_head_prefill", "kernel": "matmul_f16w_f32a.wgsl", "entry": "main", "weights": "lm_head" }
+  ],
+  "sampling": [
+    { "op": "sample", "kernel": "sample.wgsl", "entry": "sample_single_pass" }
+  ]
+}

package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json ADDED Viewed

@@ -0,0 +1,61 @@
+{
+  "id": "lfm2-q4k-dequant-f32a-nosubgroups",
+  "name": "LFM2 Q4K Dequant (F32 activations, no subgroups)",
+  "description": "Subgroup-free LFM2 Q4K path: F32 activations with tiled prefill matmul and small-kernel prefill attention. Still requires shader-f16 kernels.",
+  "activationDtype": "f32",
+  "kvDtype": "f16",
+  "decode": {
+    "steps": [
+      { "op": "input_norm",    "kernel": "rmsnorm.wgsl",                        "entry": "main" },
+      { "op": "q_proj",        "kernel": "matmul_f16w_f32a.wgsl",               "entry": "main",  "weights": "layer.{L}.self_attn.q_proj" },
+      { "op": "k_proj",        "kernel": "matmul_f16w_f32a.wgsl",               "entry": "main",  "weights": "layer.{L}.self_attn.k_proj" },
+      { "op": "v_proj",        "kernel": "matmul_f16w_f32a.wgsl",               "entry": "main",  "weights": "layer.{L}.self_attn.v_proj" },
+      { "op": "rope_q",        "kernel": "rope.wgsl",                           "entry": "main" },
+      { "op": "rope_k",        "kernel": "rope.wgsl",                           "entry": "main" },
+      { "op": "attention",     "kernel": "attention_decode_chunked_f16kv.wgsl",  "entry": "main" },
+      { "op": "o_proj",        "kernel": "matmul_f16w_f32a.wgsl",               "entry": "main",  "weights": "layer.{L}.self_attn.o_proj" },
+      { "op": "attn_residual", "kernel": "residual.wgsl",                       "entry": "main" },
+      { "op": "post_attn_norm","kernel": "rmsnorm.wgsl",                        "entry": "main" },
+      { "op": "gate_proj",     "kernel": "matmul_f16w_f32a.wgsl",               "entry": "main",  "weights": "layer.{L}.mlp.gate_proj" },
+      { "op": "up_proj",       "kernel": "matmul_f16w_f32a.wgsl",               "entry": "main",  "weights": "layer.{L}.mlp.up_proj" },
+      { "op": "activation",    "kernel": "gelu.wgsl",                           "entry": "main", "constants": { "HAS_GATE": true } },
+      { "op": "down_proj",     "kernel": "matmul_f16w_f32a.wgsl",               "entry": "main",  "weights": "layer.{L}.mlp.down_proj" },
+      { "op": "ffn_residual",  "kernel": "residual.wgsl",                       "entry": "main" }
+    ]
+  },
+  "prefill": {
+    "steps": [
+      { "op": "input_norm",    "kernel": "rmsnorm.wgsl",                        "entry": "main" },
+      { "op": "q_proj",        "kernel": "matmul_f16w_f32a_tiled.wgsl",         "entry": "main",  "weights": "layer.{L}.self_attn.q_proj" },
+      { "op": "k_proj",        "kernel": "matmul_f16w_f32a_tiled.wgsl",         "entry": "main",  "weights": "layer.{L}.self_attn.k_proj" },
+      { "op": "v_proj",        "kernel": "matmul_f16w_f32a_tiled.wgsl",         "entry": "main",  "weights": "layer.{L}.self_attn.v_proj" },
+      { "op": "rope_q",        "kernel": "rope.wgsl",                           "entry": "main" },
+      { "op": "rope_k",        "kernel": "rope.wgsl",                           "entry": "main" },
+      { "op": "attention",     "kernel": "attention_small_f16kv.wgsl",           "entry": "main" },
+      { "op": "o_proj",        "kernel": "matmul_f16w_f32a_tiled.wgsl",         "entry": "main",  "weights": "layer.{L}.self_attn.o_proj" },
+      { "op": "attn_residual", "kernel": "residual.wgsl",                       "entry": "main" },
+      { "op": "post_attn_norm","kernel": "rmsnorm.wgsl",                        "entry": "main" },
+      { "op": "gate_proj",     "kernel": "matmul_f16w_f32a_tiled.wgsl",         "entry": "main",  "weights": "layer.{L}.mlp.gate_proj" },
+      { "op": "up_proj",       "kernel": "matmul_f16w_f32a_tiled.wgsl",         "entry": "main",  "weights": "layer.{L}.mlp.up_proj" },
+      { "op": "activation",    "kernel": "gelu.wgsl",                           "entry": "main", "constants": { "HAS_GATE": true } },
+      { "op": "down_proj",     "kernel": "matmul_f16w_f32a_tiled.wgsl",         "entry": "main",  "weights": "layer.{L}.mlp.down_proj" },
+      { "op": "ffn_residual",  "kernel": "residual.wgsl",                       "entry": "main" }
+    ]
+  },
+  "preLayer": [
+    { "op": "embed",           "kernel": "gather_f16.wgsl",                     "entry": "main",  "weights": "embed_tokens" }
+  ],
+  "postLayer": [
+    { "op": "final_norm",      "kernel": "rmsnorm.wgsl",                        "entry": "main" },
+    { "op": "lm_head",         "kernel": "matmul_f16w_f32a.wgsl",               "entry": "main",  "weights": "lm_head" },
+    { "op": "lm_head_prefill", "kernel": "matmul_f16w_f32a_tiled.wgsl",         "entry": "main",  "weights": "lm_head" }
+  ],
+  "sampling": [
+    { "op": "sample",          "kernel": "sample.wgsl",                         "entry": "sample_single_pass" }
+  ]
+}

package/src/config/presets/kernel-paths/registry.json CHANGED Viewed

@@ -92,6 +92,13 @@
       "statusReason": "default",
       "notes": "Gemma 3 Q4K dequant default: subgroup GEMV + online attention + tuned lm_head multicol, F32 activations."
     },
+    {
+      "id": "gemma3-q4k-dequant-f32w-f32a-online",
+      "file": "gemma3-q4k-dequant-f32w-f32a-online.json",
+      "status": "experimental",
+      "statusReason": "accuracy-probe",
+      "notes": "Gemma 3 Q4K dequant path that keeps matmul weights in F32 and runs F32 matmul kernels for numeric-sensitivity debugging."
+    },
     {
       "id": "lfm2-q4k-dequant-f32a-online",
       "file": "lfm2-q4k-dequant-f32a-online.json",
@@ -99,6 +106,13 @@
       "statusReason": "default",
       "notes": "LFM2 Q4K default: subgroup GEMV decode with tiled fast-prefill path and F32 activations."
     },
+    {
+      "id": "lfm2-q4k-dequant-f32a-nosubgroups",
+      "file": "lfm2-q4k-dequant-f32a-nosubgroups.json",
+      "status": "canonical",
+      "statusReason": "subgroup-free",
+      "notes": "Subgroup-free LFM2 Q4K dequant path with F32 activations and tiled prefill. Still requires shader-f16 kernels."
+    },
     {
       "id": "embeddinggemma-f16-f32a",
       "file": "embeddinggemma-f16-f32a.json",

package/src/config/presets/models/gemma2.json CHANGED Viewed

@@ -9,6 +9,7 @@
   "inference": {
     "attention": {
+      "queryPreAttnScalar": 256,
       "slidingWindow": 4096,
       "attnLogitSoftcapping": 50.0,
       "queryKeyNorm": false
@@ -40,7 +41,7 @@
         "f32": "gemma2-f16-f32a"
       },
       "q4k": {
-        "f16": "gemma2-q4k-dequant-f32a-nosubgroups",
+        "f16": "gemma2-q4k-dequant-f16a",
         "f32": "gemma2-q4k-dequant-f32a-nosubgroups"
       }
     }

package/src/config/presets/models/gemma3.json CHANGED Viewed

@@ -8,7 +8,9 @@
   },
   "inference": {
     "attention": {
+      "queryPreAttnScalar": 256,
       "attnLogitSoftcapping": null,
+      "slidingWindow": 512,
       "queryKeyNorm": true
     },
     "normalization": {

package/src/config/presets/models/qwen3.json CHANGED Viewed

@@ -10,7 +10,8 @@
   "inference": {
     "attention": {
       "slidingWindow": null,
-      "queryKeyNorm": true
+      "queryKeyNorm": true,
+      "attentionOutputGate": true
     },
     "output": {
       "scaleEmbeddings": false
@@ -39,8 +40,8 @@
   },
   "detection": {
-    "architecturePatterns": ["qwen3", "qwen3_5", "Qwen3ForCausalLM", "Qwen3_5ForCausalLM", "Qwen2ForCausalLM"],
-    "modelTypePatterns": ["qwen3_5", "qwen3_5_text", "qwen2"],
+    "architecturePatterns": ["qwen3", "Qwen3ForCausalLM", "Qwen2ForCausalLM"],
+    "modelTypePatterns": ["qwen3", "qwen2"],
     "configPatterns": {
       "model_type": "qwen2"
     }

package/src/config/presets/models/qwen3_5.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "id": "qwen3_5",
+  "name": "Qwen 3.5",
+  "extends": "qwen3",
+  "inference": {
+    "normalization": {
+      "rmsNormWeightOffset": true
+    }
+  },
+  "detection": {
+    "architecturePatterns": ["qwen3_5", "Qwen3_5ForCausalLM", "Qwen3_5ForConditionalGeneration"],
+    "modelTypePatterns": ["qwen3_5", "qwen3_5_text"]
+  }
+}

package/src/config/presets/runtime/model/qwen3-5-layer-probe.json ADDED Viewed

@@ -0,0 +1,52 @@
+{
+  "id": "model/qwen3-5-layer-probe",
+  "name": "qwen3-5-layer-probe",
+  "description": "Probe all 24 layer outputs in Qwen 3.5 to isolate where the hidden state distribution collapses.",
+  "intent": "investigate",
+  "stability": "canonical",
+  "owner": "doppler-core",
+  "createdAtUtc": "2026-03-13T00:00:00Z",
+  "extends": "modes/debug",
+  "runtime": {
+    "inference": {
+      "prompt": "What color is the sky on a clear day? Answer in one word.",
+      "batching": {
+        "maxTokens": 1
+      },
+      "sampling": {
+        "temperature": 0
+      }
+    },
+    "shared": {
+      "debug": {
+        "trace": {
+          "enabled": true,
+          "categories": ["attn", "ffn", "logits"],
+          "layers": null,
+          "maxDecodeSteps": 1
+        },
+        "probes": [
+          {
+            "id": "embed",
+            "stage": "embed_out",
+            "tokens": [-1],
+            "dims": [0, 1, 2, 3, 512, 513]
+          },
+          {
+            "id": "layer_out",
+            "stage": "layer_out",
+            "layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
+            "tokens": [-1],
+            "dims": [0, 1, 2, 3]
+          },
+          {
+            "id": "logits",
+            "stage": "logits_final",
+            "tokens": [-1],
+            "dims": [271, 0, 1, 2, 3, 496, 138]
+          }
+        ]
+      }
+    }
+  }
+}