@simulatte/doppler 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +16 -23
- package/package.json +14 -1
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +7 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +12 -2
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +2 -1
- package/src/config/schema/manifest.schema.js +16 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +58 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +57 -41
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +62 -8
- package/src/inference/pipelines/text/attention/run.js +62 -8
- package/src/inference/pipelines/text/config.js +3 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +41 -19
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.js +78 -20
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +3 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +44 -25
|
@@ -15,9 +15,14 @@ export function chooseDefinedWithSource(path, overrideValue, fallbackValue, sour
|
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
export function mergeShallowObject(base, override) {
|
|
18
|
-
if (
|
|
18
|
+
if (override === undefined) {
|
|
19
19
|
return base;
|
|
20
20
|
}
|
|
21
|
+
if (override === null || typeof override !== 'object' || Array.isArray(override)) {
|
|
22
|
+
throw new Error(
|
|
23
|
+
'DopplerConfigError: shallow object overrides must be plain objects when provided explicitly.'
|
|
24
|
+
);
|
|
25
|
+
}
|
|
21
26
|
return { ...base, ...override };
|
|
22
27
|
}
|
|
23
28
|
|
|
@@ -29,17 +34,133 @@ export function replaceSubtree(overrideValue, fallbackValue) {
|
|
|
29
34
|
return chooseNullish(overrideValue, fallbackValue);
|
|
30
35
|
}
|
|
31
36
|
|
|
37
|
+
const DEFAULT_KERNEL_PATH_POLICY = Object.freeze({
|
|
38
|
+
mode: 'locked',
|
|
39
|
+
sourceScope: Object.freeze(['model', 'manifest']),
|
|
40
|
+
onIncompatible: 'error',
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
const VALID_KERNEL_PATH_POLICY_SOURCES = new Set([
|
|
44
|
+
'model',
|
|
45
|
+
'manifest',
|
|
46
|
+
'config',
|
|
47
|
+
'execution-v0',
|
|
48
|
+
]);
|
|
49
|
+
|
|
50
|
+
function normalizeKernelPathPolicyMode(value) {
|
|
51
|
+
if (value === undefined) {
|
|
52
|
+
return DEFAULT_KERNEL_PATH_POLICY.mode;
|
|
53
|
+
}
|
|
54
|
+
const normalized = String(value).trim().toLowerCase();
|
|
55
|
+
if (normalized === 'locked' || normalized === 'capability-aware') {
|
|
56
|
+
return normalized;
|
|
57
|
+
}
|
|
58
|
+
throw new Error(
|
|
59
|
+
`DopplerConfigError: runtime.inference.kernelPathPolicy.mode must be "locked" or "capability-aware"; got ${JSON.stringify(value)}.`
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function normalizeKernelPathPolicySource(source) {
|
|
64
|
+
const normalized = String(source ?? '').trim().toLowerCase();
|
|
65
|
+
if (!normalized) {
|
|
66
|
+
throw new Error(
|
|
67
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be non-empty strings.'
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
if (normalized === 'runtime') {
|
|
71
|
+
throw new Error(
|
|
72
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "runtime". Use "config".'
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
if (normalized === 'execution_v0') {
|
|
76
|
+
throw new Error(
|
|
77
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "execution_v0". Use "execution-v0".'
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
if (!VALID_KERNEL_PATH_POLICY_SOURCES.has(normalized)) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
`DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be model|manifest|config|execution-v0; got ${JSON.stringify(source)}.`
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
return normalized;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function normalizeKernelPathPolicySourceScope(value) {
|
|
89
|
+
if (value === undefined) {
|
|
90
|
+
return [...DEFAULT_KERNEL_PATH_POLICY.sourceScope];
|
|
91
|
+
}
|
|
92
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
93
|
+
throw new Error(
|
|
94
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope must be a non-empty array.'
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
return [...new Set(value.map((source) => normalizeKernelPathPolicySource(source)))];
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function normalizeKernelPathPolicyOnIncompatible(value) {
|
|
101
|
+
if (value === undefined) {
|
|
102
|
+
return DEFAULT_KERNEL_PATH_POLICY.onIncompatible;
|
|
103
|
+
}
|
|
104
|
+
const normalized = String(value).trim().toLowerCase();
|
|
105
|
+
if (normalized === 'error' || normalized === 'remap') {
|
|
106
|
+
return normalized;
|
|
107
|
+
}
|
|
108
|
+
throw new Error(
|
|
109
|
+
`DopplerConfigError: runtime.inference.kernelPathPolicy.onIncompatible must be "error" or "remap"; got ${JSON.stringify(value)}.`
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function assertKernelPathPolicyObject(value, label) {
|
|
114
|
+
if (value === undefined) {
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
if (value === null) {
|
|
118
|
+
throw new Error(`DopplerConfigError: ${label} must not be null.`);
|
|
119
|
+
}
|
|
120
|
+
if (typeof value !== 'object' || Array.isArray(value)) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
`DopplerConfigError: ${label} must be an object.`
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function assertKernelPathPolicySourceAliasesCompatible(policy, label) {
|
|
128
|
+
if (!policy || policy.sourceScope === undefined || policy.allowSources === undefined) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const sourceScope = normalizeKernelPathPolicySourceScope(policy.sourceScope);
|
|
133
|
+
const allowSources = normalizeKernelPathPolicySourceScope(policy.allowSources);
|
|
134
|
+
const aliasesMatch = sourceScope.length === allowSources.length
|
|
135
|
+
&& sourceScope.every((value, index) => value === allowSources[index]);
|
|
136
|
+
|
|
137
|
+
if (!aliasesMatch) {
|
|
138
|
+
throw new Error(
|
|
139
|
+
`DopplerConfigError: ${label}.sourceScope and ${label}.allowSources must match exactly when both are provided.`
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
32
144
|
export function mergeKernelPathPolicy(basePolicy, overridePolicy) {
|
|
145
|
+
assertKernelPathPolicyObject(basePolicy, 'runtime.inference.kernelPathPolicy');
|
|
146
|
+
assertKernelPathPolicyObject(overridePolicy, 'runtime.inference.kernelPathPolicy');
|
|
147
|
+
assertKernelPathPolicySourceAliasesCompatible(basePolicy, 'runtime.inference.kernelPathPolicy');
|
|
148
|
+
assertKernelPathPolicySourceAliasesCompatible(overridePolicy, 'runtime.inference.kernelPathPolicy');
|
|
33
149
|
const base = basePolicy ?? {};
|
|
34
150
|
const override = overridePolicy ?? {};
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
151
|
+
const sourceScope = normalizeKernelPathPolicySourceScope(
|
|
152
|
+
override.sourceScope
|
|
153
|
+
?? override.allowSources
|
|
154
|
+
?? base.sourceScope
|
|
155
|
+
?? base.allowSources
|
|
156
|
+
);
|
|
38
157
|
return {
|
|
39
|
-
mode: override.mode ?? base.mode,
|
|
158
|
+
mode: normalizeKernelPathPolicyMode(override.mode ?? base.mode),
|
|
40
159
|
sourceScope,
|
|
41
|
-
allowSources: sourceScope,
|
|
42
|
-
onIncompatible:
|
|
160
|
+
allowSources: [...sourceScope],
|
|
161
|
+
onIncompatible: normalizeKernelPathPolicyOnIncompatible(
|
|
162
|
+
override.onIncompatible ?? base.onIncompatible
|
|
163
|
+
),
|
|
43
164
|
};
|
|
44
165
|
}
|
|
45
166
|
|
package/src/config/merge.d.ts
CHANGED
|
@@ -54,6 +54,7 @@ export interface MergedInferenceConfig {
|
|
|
54
54
|
ffn: ManifestFFNSchema;
|
|
55
55
|
rope: ManifestRoPESchema;
|
|
56
56
|
output: ManifestOutputSchema;
|
|
57
|
+
pipeline: ManifestInferenceSchema['pipeline'];
|
|
57
58
|
layerPattern: ManifestLayerPatternSchema | null;
|
|
58
59
|
chatTemplate: ManifestChatTemplateSchema;
|
|
59
60
|
defaultKernelPath: string | null;
|
package/src/config/merge.js
CHANGED
|
@@ -333,12 +333,22 @@ export function mergeConfig(
|
|
|
333
333
|
sources
|
|
334
334
|
);
|
|
335
335
|
|
|
336
|
+
let pipeline = manifestInf.pipeline;
|
|
337
|
+
const runtimePipeline = runtimeOverrides?.pipeline;
|
|
338
|
+
if (runtimePipeline !== undefined) {
|
|
339
|
+
pipeline = runtimePipeline;
|
|
340
|
+
sources.set('inference.pipeline', 'runtime');
|
|
341
|
+
} else {
|
|
342
|
+
sources.set('inference.pipeline', 'manifest');
|
|
343
|
+
}
|
|
344
|
+
|
|
336
345
|
const inference = {
|
|
337
346
|
attention: mergeAttention(manifestInf.attention, runtimeOverrides?.attention, sources),
|
|
338
347
|
normalization: mergeNormalization(manifestInf.normalization, runtimeOverrides?.normalization, sources),
|
|
339
348
|
ffn: mergeFFN(manifestInf.ffn, runtimeOverrides?.ffn, sources),
|
|
340
349
|
rope: mergeRoPE(manifestInf.rope, runtimeOverrides?.rope, sources),
|
|
341
350
|
output: mergeOutput(manifestInf.output, runtimeOverrides?.output, sources),
|
|
351
|
+
pipeline,
|
|
342
352
|
layerPattern,
|
|
343
353
|
chatTemplate,
|
|
344
354
|
defaultKernelPath,
|
|
@@ -2,6 +2,7 @@ import { log } from '../debug/index.js';
|
|
|
2
2
|
import { PARAM_CATEGORIES, CategoryRules } from './param-categories.js';
|
|
3
3
|
import { TOOLING_INTENTS, TOOLING_DIAGNOSTICS } from './schema/tooling.schema.js';
|
|
4
4
|
import { validateEcosystemConfig } from './schema/ecosystem.schema.js';
|
|
5
|
+
import { isPlainObject } from '../utils/plain-object.js';
|
|
5
6
|
|
|
6
7
|
export function validateCallTimeOptions(options) {
|
|
7
8
|
if (!options) return;
|
|
@@ -33,7 +34,23 @@ export function validateCallTimeOptions(options) {
|
|
|
33
34
|
}
|
|
34
35
|
|
|
35
36
|
export function validateRuntimeOverrides(overrides) {
|
|
37
|
+
if (!isPlainObject(overrides)) {
|
|
38
|
+
throw new Error('DopplerConfigError: runtime overrides must be an object when provided.');
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
assertRequiredRuntimeOverrideNotNull(overrides, 'shared');
|
|
42
|
+
assertRequiredRuntimeOverrideNotNull(overrides, 'loading');
|
|
43
|
+
assertRequiredRuntimeOverrideNotNull(overrides, 'inference');
|
|
44
|
+
assertRequiredRuntimeOverrideNotNull(overrides, 'emulation');
|
|
45
|
+
assertRequiredRuntimeOverrideNotNull(overrides?.inference, 'batching', 'runtime.inference');
|
|
46
|
+
assertRequiredRuntimeOverrideNotNull(overrides?.inference, 'compute', 'runtime.inference');
|
|
47
|
+
assertRequiredRuntimeOverrideNotNull(overrides?.inference, 'generation', 'runtime.inference');
|
|
48
|
+
assertRequiredRuntimeOverrideNotNull(overrides?.inference, 'kernelPathPolicy', 'runtime.inference');
|
|
49
|
+
|
|
36
50
|
const modelOverrides = overrides?.inference?.modelOverrides;
|
|
51
|
+
if (modelOverrides !== undefined && modelOverrides !== null && !isPlainObject(modelOverrides)) {
|
|
52
|
+
throw new Error('DopplerConfigError: runtime.inference.modelOverrides must be an object when provided.');
|
|
53
|
+
}
|
|
37
54
|
if (!modelOverrides) return;
|
|
38
55
|
|
|
39
56
|
const params = flattenObject(modelOverrides);
|
|
@@ -214,6 +231,15 @@ function validateKernelPathPolicy(label, value) {
|
|
|
214
231
|
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
215
232
|
throw new Error(`DopplerConfigError: ${label} must be an object.`);
|
|
216
233
|
}
|
|
234
|
+
if (
|
|
235
|
+
value.sourceScope !== undefined
|
|
236
|
+
&& value.allowSources !== undefined
|
|
237
|
+
&& !arraysEqual(value.sourceScope, value.allowSources)
|
|
238
|
+
) {
|
|
239
|
+
throw new Error(
|
|
240
|
+
`DopplerConfigError: ${label}.sourceScope and ${label}.allowSources must match exactly when both are provided.`
|
|
241
|
+
);
|
|
242
|
+
}
|
|
217
243
|
if (value.mode !== 'locked' && value.mode !== 'capability-aware') {
|
|
218
244
|
throw new Error(`DopplerConfigError: ${label}.mode must be "locked" or "capability-aware".`);
|
|
219
245
|
}
|
|
@@ -224,12 +250,31 @@ function validateKernelPathPolicy(label, value) {
|
|
|
224
250
|
if (value.onIncompatible !== 'error' && value.onIncompatible !== 'remap') {
|
|
225
251
|
throw new Error(`DopplerConfigError: ${label}.onIncompatible must be "error" or "remap".`);
|
|
226
252
|
}
|
|
227
|
-
const validSources = new Set(['model', 'manifest', 'config', '
|
|
253
|
+
const validSources = new Set(['model', 'manifest', 'config', 'execution-v0']);
|
|
228
254
|
for (const source of sourceScope) {
|
|
229
255
|
if (!validSources.has(source)) {
|
|
230
256
|
throw new Error(
|
|
231
|
-
`DopplerConfigError: ${label}.sourceScope entries must be model|manifest|config|
|
|
257
|
+
`DopplerConfigError: ${label}.sourceScope entries must be model|manifest|config|execution-v0.`
|
|
232
258
|
);
|
|
233
259
|
}
|
|
234
260
|
}
|
|
235
261
|
}
|
|
262
|
+
|
|
263
|
+
function assertRequiredRuntimeOverrideNotNull(container, key, prefix = 'runtime') {
|
|
264
|
+
if (!isPlainObject(container) || !Object.prototype.hasOwnProperty.call(container, key)) {
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
if (container[key] === null) {
|
|
268
|
+
throw new Error(`DopplerConfigError: ${prefix}.${key} must not be null.`);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function arraysEqual(left, right) {
|
|
273
|
+
if (!Array.isArray(left) || !Array.isArray(right)) {
|
|
274
|
+
return false;
|
|
275
|
+
}
|
|
276
|
+
if (left.length !== right.length) {
|
|
277
|
+
return false;
|
|
278
|
+
}
|
|
279
|
+
return left.every((value, index) => value === right[index]);
|
|
280
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
|
-
"id": "gemma2-q4k-dequant-f32a",
|
|
3
|
-
"name": "Gemma 2 Q4K Dequant (F32 activations)",
|
|
4
|
-
"description": "Q4K weights dequantized to F16 with F32 activations. Non-fused
|
|
2
|
+
"id": "gemma2-q4k-dequant-f32a-nosubgroups",
|
|
3
|
+
"name": "Gemma 2 Q4K Dequant (F32 activations, no subgroups)",
|
|
4
|
+
"description": "Q4K weights dequantized to F16 with F32 activations. Non-fused path with no subgroup requirement that still requires shader-f16 kernels.",
|
|
5
5
|
"activationDtype": "f32",
|
|
6
6
|
|
|
7
7
|
"decode": {
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "gemma3-f16-fused-f32a-online-streamingprefill",
|
|
3
|
+
"name": "Gemma 3 F16 (F32 activations, online, streaming prefill)",
|
|
4
|
+
"description": "F16 weights with F32 activations, online decode attention, and streaming prefill attention for Gemma 3 stability-sensitive runs.",
|
|
5
|
+
"activationDtype": "f32",
|
|
6
|
+
"kvDtype": "f16",
|
|
7
|
+
"decode": {
|
|
8
|
+
"steps": [
|
|
9
|
+
{
|
|
10
|
+
"op": "input_norm",
|
|
11
|
+
"kernel": "rmsnorm.wgsl",
|
|
12
|
+
"entry": "main"
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"op": "q_proj",
|
|
16
|
+
"kernel": "matmul_gemv_subgroup.wgsl",
|
|
17
|
+
"entry": "main_vec4",
|
|
18
|
+
"weights": "layer.{L}.self_attn.q_proj"
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"op": "k_proj",
|
|
22
|
+
"kernel": "matmul_gemv_subgroup.wgsl",
|
|
23
|
+
"entry": "main_vec4",
|
|
24
|
+
"weights": "layer.{L}.self_attn.k_proj"
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"op": "v_proj",
|
|
28
|
+
"kernel": "matmul_gemv_subgroup.wgsl",
|
|
29
|
+
"entry": "main_vec4",
|
|
30
|
+
"weights": "layer.{L}.self_attn.v_proj"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"op": "rope_q",
|
|
34
|
+
"kernel": "rope.wgsl",
|
|
35
|
+
"entry": "main"
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"op": "rope_k",
|
|
39
|
+
"kernel": "rope.wgsl",
|
|
40
|
+
"entry": "main"
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"op": "attention",
|
|
44
|
+
"kernel": "attention_decode_online_f16kv.wgsl",
|
|
45
|
+
"entry": "main"
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"op": "o_proj",
|
|
49
|
+
"kernel": "matmul_gemv_subgroup.wgsl",
|
|
50
|
+
"entry": "main_vec4",
|
|
51
|
+
"weights": "layer.{L}.self_attn.o_proj"
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"op": "attn_residual",
|
|
55
|
+
"kernel": "residual.wgsl",
|
|
56
|
+
"entry": "main"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"op": "post_attn_norm",
|
|
60
|
+
"kernel": "rmsnorm.wgsl",
|
|
61
|
+
"entry": "main"
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"op": "gate_proj",
|
|
65
|
+
"kernel": "matmul_gemv_subgroup.wgsl",
|
|
66
|
+
"entry": "main_vec4",
|
|
67
|
+
"weights": "layer.{L}.mlp.gate_proj"
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"op": "up_proj",
|
|
71
|
+
"kernel": "matmul_gemv_subgroup.wgsl",
|
|
72
|
+
"entry": "main_vec4",
|
|
73
|
+
"weights": "layer.{L}.mlp.up_proj"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"op": "activation",
|
|
77
|
+
"kernel": "gelu.wgsl",
|
|
78
|
+
"entry": "main",
|
|
79
|
+
"constants": {
|
|
80
|
+
"HAS_GATE": true
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"op": "down_proj",
|
|
85
|
+
"kernel": "matmul_gemv_subgroup.wgsl",
|
|
86
|
+
"entry": "main_vec4",
|
|
87
|
+
"weights": "layer.{L}.mlp.down_proj"
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"op": "ffn_residual",
|
|
91
|
+
"kernel": "residual.wgsl",
|
|
92
|
+
"entry": "main"
|
|
93
|
+
}
|
|
94
|
+
]
|
|
95
|
+
},
|
|
96
|
+
"prefill": {
|
|
97
|
+
"steps": [
|
|
98
|
+
{
|
|
99
|
+
"op": "input_norm",
|
|
100
|
+
"kernel": "rmsnorm.wgsl",
|
|
101
|
+
"entry": "main"
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"op": "q_proj",
|
|
105
|
+
"kernel": "matmul_f16w_f32a_tiled.wgsl",
|
|
106
|
+
"entry": "main",
|
|
107
|
+
"weights": "layer.{L}.self_attn.q_proj"
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"op": "k_proj",
|
|
111
|
+
"kernel": "matmul_f16w_f32a_tiled.wgsl",
|
|
112
|
+
"entry": "main",
|
|
113
|
+
"weights": "layer.{L}.self_attn.k_proj"
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"op": "v_proj",
|
|
117
|
+
"kernel": "matmul_f16w_f32a_tiled.wgsl",
|
|
118
|
+
"entry": "main",
|
|
119
|
+
"weights": "layer.{L}.self_attn.v_proj"
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"op": "rope_q",
|
|
123
|
+
"kernel": "rope.wgsl",
|
|
124
|
+
"entry": "main"
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
"op": "rope_k",
|
|
128
|
+
"kernel": "rope.wgsl",
|
|
129
|
+
"entry": "main"
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"op": "attention",
|
|
133
|
+
"kernel": "attention_streaming_f16kv.wgsl",
|
|
134
|
+
"entry": "main"
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
"op": "o_proj",
|
|
138
|
+
"kernel": "matmul_f16w_f32a_tiled.wgsl",
|
|
139
|
+
"entry": "main",
|
|
140
|
+
"weights": "layer.{L}.self_attn.o_proj"
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
"op": "attn_residual",
|
|
144
|
+
"kernel": "residual.wgsl",
|
|
145
|
+
"entry": "main"
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
"op": "post_attn_norm",
|
|
149
|
+
"kernel": "rmsnorm.wgsl",
|
|
150
|
+
"entry": "main"
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"op": "gate_proj",
|
|
154
|
+
"kernel": "matmul_f16w_f32a_tiled.wgsl",
|
|
155
|
+
"entry": "main",
|
|
156
|
+
"weights": "layer.{L}.mlp.gate_proj"
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
"op": "up_proj",
|
|
160
|
+
"kernel": "matmul_f16w_f32a_tiled.wgsl",
|
|
161
|
+
"entry": "main",
|
|
162
|
+
"weights": "layer.{L}.mlp.up_proj"
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
"op": "activation",
|
|
166
|
+
"kernel": "gelu.wgsl",
|
|
167
|
+
"entry": "main",
|
|
168
|
+
"constants": {
|
|
169
|
+
"HAS_GATE": true
|
|
170
|
+
}
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
"op": "down_proj",
|
|
174
|
+
"kernel": "matmul_f16w_f32a_tiled.wgsl",
|
|
175
|
+
"entry": "main",
|
|
176
|
+
"weights": "layer.{L}.mlp.down_proj"
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
"op": "ffn_residual",
|
|
180
|
+
"kernel": "residual.wgsl",
|
|
181
|
+
"entry": "main"
|
|
182
|
+
}
|
|
183
|
+
]
|
|
184
|
+
},
|
|
185
|
+
"preLayer": [
|
|
186
|
+
{
|
|
187
|
+
"op": "embed",
|
|
188
|
+
"kernel": "gather.wgsl",
|
|
189
|
+
"entry": "main",
|
|
190
|
+
"weights": "embed_tokens"
|
|
191
|
+
}
|
|
192
|
+
],
|
|
193
|
+
"postLayer": [
|
|
194
|
+
{
|
|
195
|
+
"op": "final_norm",
|
|
196
|
+
"kernel": "rmsnorm.wgsl",
|
|
197
|
+
"entry": "main"
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
"op": "lm_head",
|
|
201
|
+
"kernel": "matmul_gemv_subgroup.wgsl",
|
|
202
|
+
"entry": "main_multicol",
|
|
203
|
+
"weights": "lm_head",
|
|
204
|
+
"constants": {
|
|
205
|
+
"MULTICOL_COLS_PER_WG": 64,
|
|
206
|
+
"MULTICOL_THREADS_PER_COL": 4
|
|
207
|
+
}
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
"op": "lm_head_prefill",
|
|
211
|
+
"kernel": "matmul_f16w_f32a_tiled.wgsl",
|
|
212
|
+
"entry": "main",
|
|
213
|
+
"weights": "lm_head"
|
|
214
|
+
}
|
|
215
|
+
],
|
|
216
|
+
"sampling": [
|
|
217
|
+
{
|
|
218
|
+
"op": "sample",
|
|
219
|
+
"kernel": "sample.wgsl",
|
|
220
|
+
"entry": "sample_single_pass"
|
|
221
|
+
}
|
|
222
|
+
]
|
|
223
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
|
-
"id": "gemma3-q4k-dequant-f32a",
|
|
3
|
-
"name": "Gemma 3 Q4K Dequant (F32 activations)",
|
|
4
|
-
"description": "Q4K weights dequantized to F16 with F32 activations
|
|
2
|
+
"id": "gemma3-q4k-dequant-f32a-nosubgroups",
|
|
3
|
+
"name": "Gemma 3 Q4K Dequant (F32 activations, no subgroups)",
|
|
4
|
+
"description": "Q4K weights dequantized to F16 with F32 activations. Subgroup-free non-online path for Gemma 3 that still requires shader-f16 kernels.",
|
|
5
5
|
"activationDtype": "f32",
|
|
6
6
|
"kvDtype": "f16",
|
|
7
7
|
|
|
@@ -16,11 +16,18 @@
|
|
|
16
16
|
"notes": "Default Gemma 2 Q4K dequant path for baseline F16 activation workflows."
|
|
17
17
|
},
|
|
18
18
|
{
|
|
19
|
-
"id": "gemma2-q4k-dequant-f32a",
|
|
20
|
-
"file": "gemma2-q4k-dequant-f32a.json",
|
|
19
|
+
"id": "gemma2-q4k-dequant-f32a-nosubgroups",
|
|
20
|
+
"file": "gemma2-q4k-dequant-f32a-nosubgroups.json",
|
|
21
21
|
"status": "canonical",
|
|
22
|
-
"statusReason": "
|
|
23
|
-
"notes": "Subgroup-free Gemma 2 Q4K dequant path with F32 activations."
|
|
22
|
+
"statusReason": "subgroup-free",
|
|
23
|
+
"notes": "Subgroup-free Gemma 2 Q4K dequant path with F32 activations. Still requires shader-f16 kernels."
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"id": "gemma2-q4k-dequant-f32a",
|
|
27
|
+
"aliasOf": "gemma2-q4k-dequant-f32a-nosubgroups",
|
|
28
|
+
"status": "legacy",
|
|
29
|
+
"statusReason": "compatibility-alias",
|
|
30
|
+
"notes": "Legacy alias for gemma2-q4k-dequant-f32a-nosubgroups."
|
|
24
31
|
},
|
|
25
32
|
{
|
|
26
33
|
"id": "gemma2-f16-f16a",
|
|
@@ -50,6 +57,13 @@
|
|
|
50
57
|
"statusReason": "benchmark-probe",
|
|
51
58
|
"notes": "Experimental fused F32 pipeline variant for fast decode."
|
|
52
59
|
},
|
|
60
|
+
{
|
|
61
|
+
"id": "gemma3-f16-fused-f32a-online-streamingprefill",
|
|
62
|
+
"file": "gemma3-f16-fused-f32a-online-streamingprefill.json",
|
|
63
|
+
"status": "experimental",
|
|
64
|
+
"statusReason": "stability-probe",
|
|
65
|
+
"notes": "Gemma 3 F16/F32 online path with streaming prefill attention instead of the small-tile prefill kernel."
|
|
66
|
+
},
|
|
53
67
|
{
|
|
54
68
|
"id": "gemma3-q4k-dequant-f16a-online",
|
|
55
69
|
"file": "gemma3-q4k-dequant-f16a-online.json",
|
|
@@ -58,11 +72,18 @@
|
|
|
58
72
|
"notes": "Experimental Gemma 3 Q4K path using online decode attention on subgroup-capable GPUs."
|
|
59
73
|
},
|
|
60
74
|
{
|
|
61
|
-
"id": "gemma3-q4k-dequant-f32a",
|
|
62
|
-
"file": "gemma3-q4k-dequant-f32a.json",
|
|
75
|
+
"id": "gemma3-q4k-dequant-f32a-nosubgroups",
|
|
76
|
+
"file": "gemma3-q4k-dequant-f32a-nosubgroups.json",
|
|
63
77
|
"status": "canonical",
|
|
64
|
-
"statusReason": "
|
|
65
|
-
"notes": "Gemma 3 Q4K dequant path with F32
|
|
78
|
+
"statusReason": "subgroup-free",
|
|
79
|
+
"notes": "Subgroup-free Gemma 3 Q4K dequant path with F32 activations. Still requires shader-f16 kernels."
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"id": "gemma3-q4k-dequant-f32a",
|
|
83
|
+
"aliasOf": "gemma3-q4k-dequant-f32a-nosubgroups",
|
|
84
|
+
"status": "legacy",
|
|
85
|
+
"statusReason": "compatibility-alias",
|
|
86
|
+
"notes": "Legacy alias for gemma3-q4k-dequant-f32a-nosubgroups."
|
|
66
87
|
},
|
|
67
88
|
{
|
|
68
89
|
"id": "gemma3-q4k-dequant-f32a-online",
|
|
@@ -1,20 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "kernels/dequant-f16-q4k",
|
|
3
3
|
"name": "dequant-f16-q4k",
|
|
4
|
-
"description": "
|
|
4
|
+
"description": "Deprecated alias for kernels/gemma2-q4k-dequant-f16a.",
|
|
5
5
|
"intent": "investigate",
|
|
6
|
-
"stability": "
|
|
6
|
+
"stability": "deprecated",
|
|
7
7
|
"owner": "doppler-core",
|
|
8
8
|
"createdAtUtc": "2026-02-25T00:00:00Z",
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
"kernelPathPolicy": {
|
|
14
|
-
"mode": "capability-aware",
|
|
15
|
-
"sourceScope": ["config", "model", "manifest", "execution-v0"],
|
|
16
|
-
"onIncompatible": "remap"
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
}
|
|
9
|
+
"deprecatedAtUtc": "2026-03-08T00:00:00Z",
|
|
10
|
+
"replacementId": "kernels/gemma2-q4k-dequant-f16a",
|
|
11
|
+
"extends": "kernels/gemma2-q4k-dequant-f16a",
|
|
12
|
+
"runtime": {}
|
|
20
13
|
}
|