@simulatte/doppler 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +16 -23
- package/package.json +30 -32
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +31 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +5 -20
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +18 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +81 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +15 -2
- package/src/config/merge-contract-check.js +66 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +43 -8
- package/src/config/presets/models/gemma2.json +3 -2
- package/src/config/presets/models/gemma3.json +2 -0
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +3 -2
- package/src/config/schema/manifest.schema.js +17 -4
- package/src/config/schema/storage.schema.js +1 -1
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +104 -11
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +16 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +50 -29
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +40 -16
- package/src/converter/quantizer.js +19 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +83 -27
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +53 -3
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul-selection.js +47 -4
- package/src/gpu/kernels/matmul.d.ts +2 -0
- package/src/gpu/kernels/matmul.js +59 -40
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +66 -43
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +8 -0
- package/src/inference/browser-harness.js +149 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +10 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
- package/src/inference/pipelines/text/attention/projections.js +192 -112
- package/src/inference/pipelines/text/attention/record.js +77 -14
- package/src/inference/pipelines/text/attention/run.js +112 -14
- package/src/inference/pipelines/text/config.js +17 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +46 -23
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-runtime.js +5 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
- package/src/inference/pipelines/text/generator-steps.js +340 -221
- package/src/inference/pipelines/text/generator.js +56 -40
- package/src/inference/pipelines/text/init.d.ts +13 -0
- package/src/inference/pipelines/text/init.js +94 -25
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +4 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
- package/src/inference/pipelines/text/linear-attention.js +113 -9
- package/src/inference/pipelines/text/logits/gpu.js +12 -7
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +13 -12
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +282 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +17 -7
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +10 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +84 -14
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +214 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.js +27 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +365 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +55 -6
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +30 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +120 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/types/model.d.ts +5 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +50 -26
|
@@ -3,6 +3,7 @@ import { KERNEL_CONFIGS } from '../gpu/kernels/utils.js';
|
|
|
3
3
|
import { selectByRules } from '../gpu/kernels/rule-matcher.js';
|
|
4
4
|
import { loadJson } from '../utils/load-json.js';
|
|
5
5
|
import { buildKernelPathContractArtifact } from './kernel-path-contract-check.js';
|
|
6
|
+
import { mergeKernelPathPolicy } from './merge-helpers.js';
|
|
6
7
|
|
|
7
8
|
// =============================================================================
|
|
8
9
|
// Built-in Kernel Paths (imported at build time)
|
|
@@ -454,49 +455,17 @@ export function getKernelPathAttentionVariant(
|
|
|
454
455
|
|
|
455
456
|
let activeKernelPath = null;
|
|
456
457
|
let activeKernelPathSource = 'none';
|
|
457
|
-
|
|
458
|
+
const DEFAULT_ACTIVE_KERNEL_PATH_POLICY = {
|
|
458
459
|
mode: 'locked',
|
|
459
460
|
sourceScope: ['model', 'manifest'],
|
|
460
461
|
onIncompatible: 'error',
|
|
461
462
|
};
|
|
463
|
+
let activeKernelPathPolicy = DEFAULT_ACTIVE_KERNEL_PATH_POLICY;
|
|
462
464
|
|
|
463
|
-
function
|
|
464
|
-
const normalized = String(source ?? '').trim().toLowerCase();
|
|
465
|
-
if (normalized === 'runtime') return 'config';
|
|
466
|
-
if (normalized === 'execution_v0') return 'execution-v0';
|
|
467
|
-
return normalized;
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
function normalizeKernelPathPolicy(policy) {
|
|
471
|
-
if (!policy || typeof policy !== 'object' || Array.isArray(policy)) {
|
|
472
|
-
return {
|
|
473
|
-
mode: 'locked',
|
|
474
|
-
sourceScope: ['model', 'manifest'],
|
|
475
|
-
onIncompatible: 'error',
|
|
476
|
-
};
|
|
477
|
-
}
|
|
478
|
-
const mode = String(policy.mode ?? '').trim().toLowerCase() === 'capability-aware'
|
|
479
|
-
? 'capability-aware'
|
|
480
|
-
: 'locked';
|
|
481
|
-
const sourceScope = Array.isArray(policy.sourceScope ?? policy.allowSources)
|
|
482
|
-
? (policy.sourceScope ?? policy.allowSources)
|
|
483
|
-
.map((source) => normalizeKernelPathSource(source))
|
|
484
|
-
.filter((source) => source.length > 0)
|
|
485
|
-
: ['model', 'manifest'];
|
|
486
|
-
const onIncompatible = String(policy.onIncompatible ?? '').trim().toLowerCase() === 'remap'
|
|
487
|
-
? 'remap'
|
|
488
|
-
: 'error';
|
|
489
|
-
return {
|
|
490
|
-
mode,
|
|
491
|
-
sourceScope: sourceScope.length > 0 ? [...new Set(sourceScope)] : ['model', 'manifest'],
|
|
492
|
-
onIncompatible,
|
|
493
|
-
};
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
export function setActiveKernelPath(path, source = 'none', policy = null) {
|
|
465
|
+
export function setActiveKernelPath(path, source = 'none', policy = undefined) {
|
|
497
466
|
activeKernelPath = path;
|
|
498
467
|
activeKernelPathSource = path ? source : 'none';
|
|
499
|
-
activeKernelPathPolicy =
|
|
468
|
+
activeKernelPathPolicy = mergeKernelPathPolicy(DEFAULT_ACTIVE_KERNEL_PATH_POLICY, policy);
|
|
500
469
|
}
|
|
501
470
|
|
|
502
471
|
export function getActiveKernelPath() {
|
|
@@ -534,6 +503,19 @@ export function isKernelPathFusedQ4K(path = undefined) {
|
|
|
534
503
|
return kernelSteps.some((step) => step.kernel.includes('fused_matmul_q4'));
|
|
535
504
|
}
|
|
536
505
|
|
|
506
|
+
export function kernelPathRequiresF32MatmulWeights(path = undefined) {
|
|
507
|
+
const lookupPath = path === undefined ? activeKernelPath : path;
|
|
508
|
+
if (!lookupPath) return false;
|
|
509
|
+
const kernelSteps = [
|
|
510
|
+
...(lookupPath.decode?.steps ?? []),
|
|
511
|
+
...(lookupPath.prefill?.steps ?? []),
|
|
512
|
+
...(lookupPath.preLayer ?? []),
|
|
513
|
+
...(lookupPath.postLayer ?? []),
|
|
514
|
+
...(lookupPath.layerOverrides?.flatMap((override) => override.steps) ?? []),
|
|
515
|
+
];
|
|
516
|
+
return kernelSteps.some((step) => normalizeKernelFile(step.kernel) === 'matmul_f32.wgsl');
|
|
517
|
+
}
|
|
518
|
+
|
|
537
519
|
export function isActiveKernelPathFusedQ4K() {
|
|
538
520
|
return isKernelPathFusedQ4K(activeKernelPath);
|
|
539
521
|
}
|
|
@@ -220,7 +220,7 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
220
220
|
"topk.wgsl#main": "a18763303cd18e8a020e647f8a52f65403526849faf835d9f9394f634c3c97eb",
|
|
221
221
|
"topk.wgsl#softmax_topk": "95ff3517da909e4bd4d0ff8d85b619bd250522943aeb9276375edc59f67e9604",
|
|
222
222
|
"topk.wgsl#topk_2_small": "289eaa5c4f005e0aaf37dfe5343aeda30d9ab3929979dbf0cc3553f23e136807",
|
|
223
|
-
"transpose.wgsl#main": "
|
|
223
|
+
"transpose.wgsl#main": "8caf8664dfc579b4e92edce50783263c535764006290cc7902108f26586113a2",
|
|
224
224
|
"upsample2d_f16.wgsl#main": "43cee5f2503cb4b6caea45e9842f8961ce313b02eb8ed23a97d6967113ce521c",
|
|
225
225
|
"upsample2d.wgsl#main": "6de9172ad3d6940dd3c94470a105755a33760e66a84d6e9e96ec4d6a07dc4a25"
|
|
226
226
|
});
|
|
@@ -4,6 +4,17 @@ let cachedRegistry = null;
|
|
|
4
4
|
|
|
5
5
|
let registryUrl = null;
|
|
6
6
|
|
|
7
|
+
function deepFreeze(value, seen = new WeakSet()) {
|
|
8
|
+
if (!value || typeof value !== 'object' || seen.has(value)) {
|
|
9
|
+
return value;
|
|
10
|
+
}
|
|
11
|
+
seen.add(value);
|
|
12
|
+
for (const entry of Object.values(value)) {
|
|
13
|
+
deepFreeze(entry, seen);
|
|
14
|
+
}
|
|
15
|
+
return Object.freeze(value);
|
|
16
|
+
}
|
|
17
|
+
|
|
7
18
|
export function setRegistryUrl(url) {
|
|
8
19
|
registryUrl = url;
|
|
9
20
|
cachedRegistry = null;
|
|
@@ -15,7 +26,9 @@ export async function getRegistry() {
|
|
|
15
26
|
}
|
|
16
27
|
|
|
17
28
|
const source = registryUrl || './registry.json';
|
|
18
|
-
cachedRegistry =
|
|
29
|
+
cachedRegistry = deepFreeze(
|
|
30
|
+
await loadJson(source, import.meta.url, 'Failed to load kernel registry')
|
|
31
|
+
);
|
|
19
32
|
return cachedRegistry;
|
|
20
33
|
}
|
|
21
34
|
|
|
@@ -2174,7 +2174,7 @@
|
|
|
2174
2174
|
}
|
|
2175
2175
|
],
|
|
2176
2176
|
"baseUniforms": {
|
|
2177
|
-
"size":
|
|
2177
|
+
"size": 32,
|
|
2178
2178
|
"fields": [
|
|
2179
2179
|
{
|
|
2180
2180
|
"name": "M",
|
|
@@ -2202,7 +2202,9 @@
|
|
|
2202
2202
|
1,
|
|
2203
2203
|
1
|
|
2204
2204
|
],
|
|
2205
|
-
"requires": [
|
|
2205
|
+
"requires": [
|
|
2206
|
+
"shader-f16"
|
|
2207
|
+
]
|
|
2206
2208
|
},
|
|
2207
2209
|
"f16": {
|
|
2208
2210
|
"wgsl": "matmul_gemv_residual_f16.wgsl",
|
|
@@ -2377,12 +2379,12 @@
|
|
|
2377
2379
|
"offset": 20
|
|
2378
2380
|
},
|
|
2379
2381
|
{
|
|
2380
|
-
"name": "
|
|
2382
|
+
"name": "rotary_dim",
|
|
2381
2383
|
"type": "u32",
|
|
2382
2384
|
"offset": 24
|
|
2383
2385
|
},
|
|
2384
2386
|
{
|
|
2385
|
-
"name": "
|
|
2387
|
+
"name": "interleaved",
|
|
2386
2388
|
"type": "u32",
|
|
2387
2389
|
"offset": 28
|
|
2388
2390
|
}
|
|
@@ -4116,7 +4118,7 @@
|
|
|
4116
4118
|
}
|
|
4117
4119
|
],
|
|
4118
4120
|
"baseUniforms": {
|
|
4119
|
-
"size":
|
|
4121
|
+
"size": 32,
|
|
4120
4122
|
"fields": [
|
|
4121
4123
|
{
|
|
4122
4124
|
"name": "num_tokens",
|
|
@@ -4320,6 +4322,80 @@
|
|
|
4320
4322
|
}
|
|
4321
4323
|
}
|
|
4322
4324
|
},
|
|
4325
|
+
"split_qg": {
|
|
4326
|
+
"description": "De-interleave Q and Gate projections from q_proj output for attentionOutputGate models",
|
|
4327
|
+
"baseBindings": [
|
|
4328
|
+
{
|
|
4329
|
+
"index": 0,
|
|
4330
|
+
"name": "uniforms",
|
|
4331
|
+
"type": "uniform"
|
|
4332
|
+
},
|
|
4333
|
+
{
|
|
4334
|
+
"index": 1,
|
|
4335
|
+
"name": "qg_interleaved",
|
|
4336
|
+
"type": "read-only-storage"
|
|
4337
|
+
},
|
|
4338
|
+
{
|
|
4339
|
+
"index": 2,
|
|
4340
|
+
"name": "Q",
|
|
4341
|
+
"type": "storage"
|
|
4342
|
+
},
|
|
4343
|
+
{
|
|
4344
|
+
"index": 3,
|
|
4345
|
+
"name": "G",
|
|
4346
|
+
"type": "storage"
|
|
4347
|
+
}
|
|
4348
|
+
],
|
|
4349
|
+
"baseUniforms": {
|
|
4350
|
+
"size": 16,
|
|
4351
|
+
"fields": [
|
|
4352
|
+
{
|
|
4353
|
+
"name": "num_tokens",
|
|
4354
|
+
"type": "u32",
|
|
4355
|
+
"offset": 0
|
|
4356
|
+
},
|
|
4357
|
+
{
|
|
4358
|
+
"name": "num_heads",
|
|
4359
|
+
"type": "u32",
|
|
4360
|
+
"offset": 4
|
|
4361
|
+
},
|
|
4362
|
+
{
|
|
4363
|
+
"name": "head_dim",
|
|
4364
|
+
"type": "u32",
|
|
4365
|
+
"offset": 8
|
|
4366
|
+
},
|
|
4367
|
+
{
|
|
4368
|
+
"name": "_pad",
|
|
4369
|
+
"type": "u32",
|
|
4370
|
+
"offset": 12
|
|
4371
|
+
}
|
|
4372
|
+
]
|
|
4373
|
+
},
|
|
4374
|
+
"variants": {
|
|
4375
|
+
"default": {
|
|
4376
|
+
"wgsl": "split_qg.wgsl",
|
|
4377
|
+
"entryPoint": "main",
|
|
4378
|
+
"workgroup": [
|
|
4379
|
+
256,
|
|
4380
|
+
1,
|
|
4381
|
+
1
|
|
4382
|
+
],
|
|
4383
|
+
"requires": []
|
|
4384
|
+
},
|
|
4385
|
+
"f16": {
|
|
4386
|
+
"wgsl": "split_qg_f16.wgsl",
|
|
4387
|
+
"entryPoint": "main",
|
|
4388
|
+
"workgroup": [
|
|
4389
|
+
256,
|
|
4390
|
+
1,
|
|
4391
|
+
1
|
|
4392
|
+
],
|
|
4393
|
+
"requires": [
|
|
4394
|
+
"shader-f16"
|
|
4395
|
+
]
|
|
4396
|
+
}
|
|
4397
|
+
}
|
|
4398
|
+
},
|
|
4323
4399
|
"sample": {
|
|
4324
4400
|
"description": "GPU-side sampling kernels",
|
|
4325
4401
|
"baseBindings": [
|
package/src/config/loader.d.ts
CHANGED
package/src/config/loader.js
CHANGED
|
@@ -23,6 +23,7 @@ const mambaPreset = await loadJson('./presets/models/mamba.json', import.meta.ur
|
|
|
23
23
|
const modernbertPreset = await loadJson('./presets/models/modernbert.json', import.meta.url, 'Failed to load preset');
|
|
24
24
|
const lfm2Preset = await loadJson('./presets/models/lfm2.json', import.meta.url, 'Failed to load preset');
|
|
25
25
|
const qwen3Preset = await loadJson('./presets/models/qwen3.json', import.meta.url, 'Failed to load preset');
|
|
26
|
+
const qwen35Preset = await loadJson('./presets/models/qwen3_5.json', import.meta.url, 'Failed to load preset');
|
|
26
27
|
const kimiK2Preset = await loadJson('./presets/models/kimi-k2.json', import.meta.url, 'Failed to load preset');
|
|
27
28
|
const gptOssPreset = await loadJson('./presets/models/gpt-oss.json', import.meta.url, 'Failed to load preset');
|
|
28
29
|
|
|
@@ -46,6 +47,7 @@ export const PRESET_REGISTRY = {
|
|
|
46
47
|
modernbert: modernbertPreset,
|
|
47
48
|
lfm2: lfm2Preset,
|
|
48
49
|
qwen3: qwen3Preset,
|
|
50
|
+
qwen3_5: qwen35Preset,
|
|
49
51
|
kimi_k2: kimiK2Preset,
|
|
50
52
|
gpt_oss: gptOssPreset,
|
|
51
53
|
};
|
|
@@ -97,6 +99,7 @@ export const PRESET_DETECTION_ORDER = [
|
|
|
97
99
|
'gemma3',
|
|
98
100
|
'llama3',
|
|
99
101
|
'lfm2',
|
|
102
|
+
'qwen3_5',
|
|
100
103
|
'qwen3',
|
|
101
104
|
'kimi_k2',
|
|
102
105
|
'gpt_oss',
|
|
@@ -161,8 +164,7 @@ export function detectPreset(
|
|
|
161
164
|
}
|
|
162
165
|
}
|
|
163
166
|
|
|
164
|
-
|
|
165
|
-
return 'transformer';
|
|
167
|
+
return null;
|
|
166
168
|
}
|
|
167
169
|
|
|
168
170
|
// =============================================================================
|
|
@@ -178,6 +180,17 @@ export function resolveConfig(
|
|
|
178
180
|
(manifest.config || {}),
|
|
179
181
|
manifest.modelType
|
|
180
182
|
);
|
|
183
|
+
if (!id) {
|
|
184
|
+
const modelId = String(manifest?.modelId ?? 'unknown').trim() || 'unknown';
|
|
185
|
+
const modelType = String(manifest?.config?.model_type ?? 'unknown').trim() || 'unknown';
|
|
186
|
+
const architecture = String(manifest?.modelType ?? 'unknown').trim() || 'unknown';
|
|
187
|
+
throw createDopplerError(
|
|
188
|
+
ERROR_CODES.CONFIG_PRESET_UNKNOWN,
|
|
189
|
+
`Could not detect a preset for manifest "${modelId}" ` +
|
|
190
|
+
`(architecture="${architecture}", model_type="${modelType}"). ` +
|
|
191
|
+
'Provide an explicit presetId instead of relying on the generic transformer fallback.'
|
|
192
|
+
);
|
|
193
|
+
}
|
|
181
194
|
|
|
182
195
|
// Get resolved preset
|
|
183
196
|
const preset = resolvePreset(id);
|
|
@@ -77,6 +77,7 @@ function buildWitnessMergeManifest() {
|
|
|
77
77
|
embeddingTranspose: false,
|
|
78
78
|
embeddingVocabSize: 1024,
|
|
79
79
|
},
|
|
80
|
+
pipeline: 'decode-only',
|
|
80
81
|
layerPattern: null,
|
|
81
82
|
chatTemplate: {
|
|
82
83
|
type: 'gemma',
|
|
@@ -114,6 +115,13 @@ export function buildMergeContractArtifact() {
|
|
|
114
115
|
&& mergedUndefined._sources.get('inference.defaultKernelPath') === 'manifest',
|
|
115
116
|
`value=${mergedUndefined.inference.defaultKernelPath}, source=${mergedUndefined._sources.get('inference.defaultKernelPath')}`
|
|
116
117
|
);
|
|
118
|
+
recordCheck(
|
|
119
|
+
checks,
|
|
120
|
+
'runtime.mergeConfig.pipeline_preserves_manifest_value',
|
|
121
|
+
mergedUndefined.inference.pipeline === 'decode-only'
|
|
122
|
+
&& mergedUndefined._sources.get('inference.pipeline') === 'manifest',
|
|
123
|
+
`value=${String(mergedUndefined.inference.pipeline)}, source=${mergedUndefined._sources.get('inference.pipeline')}`
|
|
124
|
+
);
|
|
117
125
|
|
|
118
126
|
const mergedNull = mergeConfig(buildWitnessMergeManifest(), {
|
|
119
127
|
defaultKernelPath: null,
|
|
@@ -152,6 +160,42 @@ export function buildMergeContractArtifact() {
|
|
|
152
160
|
`value=${String(runtimeConfig.runtime.inference.chatTemplate.enabled)}`
|
|
153
161
|
);
|
|
154
162
|
|
|
163
|
+
const isolatedConfigA = createDopplerConfig();
|
|
164
|
+
isolatedConfigA.runtime.inference.compute.activationDtype = 'f32';
|
|
165
|
+
const isolatedConfigB = createDopplerConfig();
|
|
166
|
+
recordCheck(
|
|
167
|
+
checks,
|
|
168
|
+
'runtime.schema.defaults_are_isolated_per_instance',
|
|
169
|
+
isolatedConfigB.runtime.inference.compute.activationDtype !== 'f32'
|
|
170
|
+
&& isolatedConfigA.runtime.inference.compute !== isolatedConfigB.runtime.inference.compute,
|
|
171
|
+
`configA=${isolatedConfigA.runtime.inference.compute.activationDtype}, configB=${isolatedConfigB.runtime.inference.compute.activationDtype}`,
|
|
172
|
+
'actual'
|
|
173
|
+
);
|
|
174
|
+
recordCheck(
|
|
175
|
+
checks,
|
|
176
|
+
'runtime.schema.storage.opfs_sync_access_handle_defaults_off',
|
|
177
|
+
isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle === false,
|
|
178
|
+
`value=${String(isolatedConfigB.runtime.loading.storage.backend.opfs.useSyncAccessHandle)}`,
|
|
179
|
+
'actual'
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
const calibrateConfig = createDopplerConfig({
|
|
183
|
+
runtime: {
|
|
184
|
+
shared: {
|
|
185
|
+
tooling: {
|
|
186
|
+
intent: 'calibrate',
|
|
187
|
+
},
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
});
|
|
191
|
+
recordCheck(
|
|
192
|
+
checks,
|
|
193
|
+
'runtime.schema.calibrate_does_not_mutate_kernel_warmup_defaults',
|
|
194
|
+
calibrateConfig.runtime.shared.kernelWarmup.prewarm === false,
|
|
195
|
+
`prewarm=${String(calibrateConfig.runtime.shared.kernelWarmup.prewarm)}`,
|
|
196
|
+
'actual'
|
|
197
|
+
);
|
|
198
|
+
|
|
155
199
|
const overlaySources = new Map();
|
|
156
200
|
const chosenRuntimeValue = chooseDefinedWithSource(
|
|
157
201
|
'inference.defaultKernelPath',
|
|
@@ -252,6 +296,24 @@ export function buildMergeContractArtifact() {
|
|
|
252
296
|
'actual'
|
|
253
297
|
);
|
|
254
298
|
|
|
299
|
+
let invalidShallowOverrideError = null;
|
|
300
|
+
try {
|
|
301
|
+
mergeShallowObject(
|
|
302
|
+
{ type: 'base', enabled: true },
|
|
303
|
+
null
|
|
304
|
+
);
|
|
305
|
+
} catch (error) {
|
|
306
|
+
invalidShallowOverrideError = error;
|
|
307
|
+
}
|
|
308
|
+
recordCheck(
|
|
309
|
+
checks,
|
|
310
|
+
'runtime.mergeShallowObject.invalid_explicit_override_fails_closed',
|
|
311
|
+
invalidShallowOverrideError instanceof Error
|
|
312
|
+
&& /shallow object overrides must be plain objects/.test(invalidShallowOverrideError.message),
|
|
313
|
+
`error=${invalidShallowOverrideError?.message ?? 'none'}`,
|
|
314
|
+
'actual'
|
|
315
|
+
);
|
|
316
|
+
|
|
255
317
|
const layeredAttention = mergeLayeredShallowObjects(
|
|
256
318
|
{ slidingWindow: 4096, attentionBias: false },
|
|
257
319
|
{ slidingWindow: 2048 },
|
|
@@ -273,7 +335,7 @@ export function buildMergeContractArtifact() {
|
|
|
273
335
|
onIncompatible: 'error',
|
|
274
336
|
},
|
|
275
337
|
{
|
|
276
|
-
allowSources: ['
|
|
338
|
+
allowSources: ['config', 'execution-v0'],
|
|
277
339
|
onIncompatible: 'remap',
|
|
278
340
|
}
|
|
279
341
|
);
|
|
@@ -283,7 +345,7 @@ export function buildMergeContractArtifact() {
|
|
|
283
345
|
Array.isArray(mergedKernelPathPolicy.sourceScope)
|
|
284
346
|
&& Array.isArray(mergedKernelPathPolicy.allowSources)
|
|
285
347
|
&& mergedKernelPathPolicy.sourceScope.length === 2
|
|
286
|
-
&& mergedKernelPathPolicy.sourceScope[0] === '
|
|
348
|
+
&& mergedKernelPathPolicy.sourceScope[0] === 'config'
|
|
287
349
|
&& mergedKernelPathPolicy.allowSources[1] === 'execution-v0'
|
|
288
350
|
&& mergedKernelPathPolicy.onIncompatible === 'remap',
|
|
289
351
|
`sourceScope=${JSON.stringify(mergedKernelPathPolicy.sourceScope)}, allowSources=${JSON.stringify(mergedKernelPathPolicy.allowSources)}`,
|
|
@@ -294,7 +356,7 @@ export function buildMergeContractArtifact() {
|
|
|
294
356
|
runtime: {
|
|
295
357
|
inference: {
|
|
296
358
|
kernelPathPolicy: {
|
|
297
|
-
allowSources: ['
|
|
359
|
+
allowSources: ['config', 'execution-v0'],
|
|
298
360
|
},
|
|
299
361
|
},
|
|
300
362
|
},
|
|
@@ -303,7 +365,7 @@ export function buildMergeContractArtifact() {
|
|
|
303
365
|
checks,
|
|
304
366
|
'runtime.schema.kernelPathPolicy.helper_is_used',
|
|
305
367
|
Array.isArray(runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope)
|
|
306
|
-
&& runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope[0] === '
|
|
368
|
+
&& runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope[0] === 'config'
|
|
307
369
|
&& runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.allowSources[1] === 'execution-v0',
|
|
308
370
|
`policy=${JSON.stringify(runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy)}`,
|
|
309
371
|
'actual'
|
|
@@ -15,9 +15,14 @@ export function chooseDefinedWithSource(path, overrideValue, fallbackValue, sour
|
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
export function mergeShallowObject(base, override) {
|
|
18
|
-
if (
|
|
18
|
+
if (override === undefined) {
|
|
19
19
|
return base;
|
|
20
20
|
}
|
|
21
|
+
if (override === null || typeof override !== 'object' || Array.isArray(override)) {
|
|
22
|
+
throw new Error(
|
|
23
|
+
'DopplerConfigError: shallow object overrides must be plain objects when provided explicitly.'
|
|
24
|
+
);
|
|
25
|
+
}
|
|
21
26
|
return { ...base, ...override };
|
|
22
27
|
}
|
|
23
28
|
|
|
@@ -29,17 +34,133 @@ export function replaceSubtree(overrideValue, fallbackValue) {
|
|
|
29
34
|
return chooseNullish(overrideValue, fallbackValue);
|
|
30
35
|
}
|
|
31
36
|
|
|
37
|
+
const DEFAULT_KERNEL_PATH_POLICY = Object.freeze({
|
|
38
|
+
mode: 'locked',
|
|
39
|
+
sourceScope: Object.freeze(['model', 'manifest']),
|
|
40
|
+
onIncompatible: 'error',
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
const VALID_KERNEL_PATH_POLICY_SOURCES = new Set([
|
|
44
|
+
'model',
|
|
45
|
+
'manifest',
|
|
46
|
+
'config',
|
|
47
|
+
'execution-v0',
|
|
48
|
+
]);
|
|
49
|
+
|
|
50
|
+
function normalizeKernelPathPolicyMode(value) {
|
|
51
|
+
if (value === undefined) {
|
|
52
|
+
return DEFAULT_KERNEL_PATH_POLICY.mode;
|
|
53
|
+
}
|
|
54
|
+
const normalized = String(value).trim().toLowerCase();
|
|
55
|
+
if (normalized === 'locked' || normalized === 'capability-aware') {
|
|
56
|
+
return normalized;
|
|
57
|
+
}
|
|
58
|
+
throw new Error(
|
|
59
|
+
`DopplerConfigError: runtime.inference.kernelPathPolicy.mode must be "locked" or "capability-aware"; got ${JSON.stringify(value)}.`
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function normalizeKernelPathPolicySource(source) {
|
|
64
|
+
const normalized = String(source ?? '').trim().toLowerCase();
|
|
65
|
+
if (!normalized) {
|
|
66
|
+
throw new Error(
|
|
67
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be non-empty strings.'
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
if (normalized === 'runtime') {
|
|
71
|
+
throw new Error(
|
|
72
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "runtime". Use "config".'
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
if (normalized === 'execution_v0') {
|
|
76
|
+
throw new Error(
|
|
77
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "execution_v0". Use "execution-v0".'
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
if (!VALID_KERNEL_PATH_POLICY_SOURCES.has(normalized)) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
`DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be model|manifest|config|execution-v0; got ${JSON.stringify(source)}.`
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
return normalized;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function normalizeKernelPathPolicySourceScope(value) {
|
|
89
|
+
if (value === undefined) {
|
|
90
|
+
return [...DEFAULT_KERNEL_PATH_POLICY.sourceScope];
|
|
91
|
+
}
|
|
92
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
93
|
+
throw new Error(
|
|
94
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope must be a non-empty array.'
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
return [...new Set(value.map((source) => normalizeKernelPathPolicySource(source)))];
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function normalizeKernelPathPolicyOnIncompatible(value) {
|
|
101
|
+
if (value === undefined) {
|
|
102
|
+
return DEFAULT_KERNEL_PATH_POLICY.onIncompatible;
|
|
103
|
+
}
|
|
104
|
+
const normalized = String(value).trim().toLowerCase();
|
|
105
|
+
if (normalized === 'error' || normalized === 'remap') {
|
|
106
|
+
return normalized;
|
|
107
|
+
}
|
|
108
|
+
throw new Error(
|
|
109
|
+
`DopplerConfigError: runtime.inference.kernelPathPolicy.onIncompatible must be "error" or "remap"; got ${JSON.stringify(value)}.`
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function assertKernelPathPolicyObject(value, label) {
|
|
114
|
+
if (value === undefined) {
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
if (value === null) {
|
|
118
|
+
throw new Error(`DopplerConfigError: ${label} must not be null.`);
|
|
119
|
+
}
|
|
120
|
+
if (typeof value !== 'object' || Array.isArray(value)) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
`DopplerConfigError: ${label} must be an object.`
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function assertKernelPathPolicySourceAliasesCompatible(policy, label) {
|
|
128
|
+
if (!policy || policy.sourceScope === undefined || policy.allowSources === undefined) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const sourceScope = normalizeKernelPathPolicySourceScope(policy.sourceScope);
|
|
133
|
+
const allowSources = normalizeKernelPathPolicySourceScope(policy.allowSources);
|
|
134
|
+
const aliasesMatch = sourceScope.length === allowSources.length
|
|
135
|
+
&& sourceScope.every((value, index) => value === allowSources[index]);
|
|
136
|
+
|
|
137
|
+
if (!aliasesMatch) {
|
|
138
|
+
throw new Error(
|
|
139
|
+
`DopplerConfigError: ${label}.sourceScope and ${label}.allowSources must match exactly when both are provided.`
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
32
144
|
export function mergeKernelPathPolicy(basePolicy, overridePolicy) {
|
|
145
|
+
assertKernelPathPolicyObject(basePolicy, 'runtime.inference.kernelPathPolicy');
|
|
146
|
+
assertKernelPathPolicyObject(overridePolicy, 'runtime.inference.kernelPathPolicy');
|
|
147
|
+
assertKernelPathPolicySourceAliasesCompatible(basePolicy, 'runtime.inference.kernelPathPolicy');
|
|
148
|
+
assertKernelPathPolicySourceAliasesCompatible(overridePolicy, 'runtime.inference.kernelPathPolicy');
|
|
33
149
|
const base = basePolicy ?? {};
|
|
34
150
|
const override = overridePolicy ?? {};
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
151
|
+
const sourceScope = normalizeKernelPathPolicySourceScope(
|
|
152
|
+
override.sourceScope
|
|
153
|
+
?? override.allowSources
|
|
154
|
+
?? base.sourceScope
|
|
155
|
+
?? base.allowSources
|
|
156
|
+
);
|
|
38
157
|
return {
|
|
39
|
-
mode: override.mode ?? base.mode,
|
|
158
|
+
mode: normalizeKernelPathPolicyMode(override.mode ?? base.mode),
|
|
40
159
|
sourceScope,
|
|
41
|
-
allowSources: sourceScope,
|
|
42
|
-
onIncompatible:
|
|
160
|
+
allowSources: [...sourceScope],
|
|
161
|
+
onIncompatible: normalizeKernelPathPolicyOnIncompatible(
|
|
162
|
+
override.onIncompatible ?? base.onIncompatible
|
|
163
|
+
),
|
|
43
164
|
};
|
|
44
165
|
}
|
|
45
166
|
|
package/src/config/merge.d.ts
CHANGED
|
@@ -54,6 +54,7 @@ export interface MergedInferenceConfig {
|
|
|
54
54
|
ffn: ManifestFFNSchema;
|
|
55
55
|
rope: ManifestRoPESchema;
|
|
56
56
|
output: ManifestOutputSchema;
|
|
57
|
+
pipeline: ManifestInferenceSchema['pipeline'];
|
|
57
58
|
layerPattern: ManifestLayerPatternSchema | null;
|
|
58
59
|
chatTemplate: ManifestChatTemplateSchema;
|
|
59
60
|
defaultKernelPath: string | null;
|
package/src/config/merge.js
CHANGED
|
@@ -333,12 +333,22 @@ export function mergeConfig(
|
|
|
333
333
|
sources
|
|
334
334
|
);
|
|
335
335
|
|
|
336
|
+
let pipeline = manifestInf.pipeline;
|
|
337
|
+
const runtimePipeline = runtimeOverrides?.pipeline;
|
|
338
|
+
if (runtimePipeline !== undefined) {
|
|
339
|
+
pipeline = runtimePipeline;
|
|
340
|
+
sources.set('inference.pipeline', 'runtime');
|
|
341
|
+
} else {
|
|
342
|
+
sources.set('inference.pipeline', 'manifest');
|
|
343
|
+
}
|
|
344
|
+
|
|
336
345
|
const inference = {
|
|
337
346
|
attention: mergeAttention(manifestInf.attention, runtimeOverrides?.attention, sources),
|
|
338
347
|
normalization: mergeNormalization(manifestInf.normalization, runtimeOverrides?.normalization, sources),
|
|
339
348
|
ffn: mergeFFN(manifestInf.ffn, runtimeOverrides?.ffn, sources),
|
|
340
349
|
rope: mergeRoPE(manifestInf.rope, runtimeOverrides?.rope, sources),
|
|
341
350
|
output: mergeOutput(manifestInf.output, runtimeOverrides?.output, sources),
|
|
351
|
+
pipeline,
|
|
342
352
|
layerPattern,
|
|
343
353
|
chatTemplate,
|
|
344
354
|
defaultKernelPath,
|