@simulatte/doppler 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +16 -23
- package/package.json +30 -32
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +31 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +5 -20
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +18 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +81 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +15 -2
- package/src/config/merge-contract-check.js +66 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +43 -8
- package/src/config/presets/models/gemma2.json +3 -2
- package/src/config/presets/models/gemma3.json +2 -0
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +3 -2
- package/src/config/schema/manifest.schema.js +17 -4
- package/src/config/schema/storage.schema.js +1 -1
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +104 -11
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +16 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +50 -29
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +40 -16
- package/src/converter/quantizer.js +19 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +83 -27
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +53 -3
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul-selection.js +47 -4
- package/src/gpu/kernels/matmul.d.ts +2 -0
- package/src/gpu/kernels/matmul.js +59 -40
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +66 -43
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +8 -0
- package/src/inference/browser-harness.js +149 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +10 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
- package/src/inference/pipelines/text/attention/projections.js +192 -112
- package/src/inference/pipelines/text/attention/record.js +77 -14
- package/src/inference/pipelines/text/attention/run.js +112 -14
- package/src/inference/pipelines/text/config.js +17 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +46 -23
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-runtime.js +5 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
- package/src/inference/pipelines/text/generator-steps.js +340 -221
- package/src/inference/pipelines/text/generator.js +56 -40
- package/src/inference/pipelines/text/init.d.ts +13 -0
- package/src/inference/pipelines/text/init.js +94 -25
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +4 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
- package/src/inference/pipelines/text/linear-attention.js +113 -9
- package/src/inference/pipelines/text/logits/gpu.js +12 -7
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +13 -12
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +282 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +17 -7
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +10 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +84 -14
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +214 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.js +27 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +365 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +55 -6
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +30 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +120 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/types/model.d.ts +5 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +50 -26
|
@@ -1,1011 +1,37 @@
|
|
|
1
1
|
import { mergeRuntimeValues } from '../../../config/runtime-merge.js';
|
|
2
|
+
import { buildExecutionV0FromKernelPath } from '../../../converter/execution-v0-manifest.js';
|
|
2
3
|
import {
|
|
3
|
-
|
|
4
|
-
indexExecutionV0KernelProfiles,
|
|
5
|
-
normalizeExecutionV0Dtype,
|
|
6
|
-
resolveExecutionV0KernelProfile,
|
|
7
|
-
resolveExecutionV0KVIO,
|
|
8
|
-
resolveExecutionV0Precision,
|
|
9
|
-
} from '../../../config/execution-v0-contract-check.js';
|
|
10
|
-
import { selectRuleValue } from '../../../rules/rule-registry.js';
|
|
11
|
-
import {
|
|
12
|
-
EXECUTION_V0_SCHEMA_ID,
|
|
4
|
+
DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS,
|
|
13
5
|
DEFAULT_EXECUTION_V0_POLICIES,
|
|
14
6
|
DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
|
|
15
|
-
isExecutionV0Digest,
|
|
16
|
-
isExecutionV0Semver,
|
|
17
7
|
} from '../../../config/schema/execution-v0.schema.js';
|
|
18
|
-
import {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
}
|
|
46
|
-
const outputDtype = config?.outputDtype;
|
|
47
|
-
if (typeof outputDtype === 'string' && outputDtype.length > 0) {
|
|
48
|
-
byKernelEntry.get(key).add(String(outputDtype).toLowerCase());
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
return byKernelEntry;
|
|
53
|
-
})();
|
|
54
|
-
|
|
55
|
-
function getKernelOutputCapabilities(step) {
|
|
56
|
-
const kernel = String(step?.kernel ?? '').trim();
|
|
57
|
-
const entry = String(step?.entry ?? 'main').trim() || 'main';
|
|
58
|
-
if (!kernel) {
|
|
59
|
-
return null;
|
|
60
|
-
}
|
|
61
|
-
return KERNEL_OUTPUT_CAPABILITIES.get(`${kernel}#${entry}`) ?? null;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function cloneJson(value) {
|
|
65
|
-
if (typeof structuredClone === 'function') {
|
|
66
|
-
return structuredClone(value);
|
|
67
|
-
}
|
|
68
|
-
return JSON.parse(JSON.stringify(value));
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
const normalizeDtype = normalizeExecutionV0Dtype;
|
|
72
|
-
const resolvePrecision = resolveExecutionV0Precision;
|
|
73
|
-
const resolveKVIO = resolveExecutionV0KVIO;
|
|
74
|
-
|
|
75
|
-
function normalizePhase(value, label) {
|
|
76
|
-
const normalized = String(value ?? '').trim().toLowerCase();
|
|
77
|
-
if (normalized !== 'prefill' && normalized !== 'decode' && normalized !== 'both') {
|
|
78
|
-
throw new Error(`[ExecutionV0] ${label} must be prefill|decode|both; got "${value}"`);
|
|
79
|
-
}
|
|
80
|
-
return normalized;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
function normalizeSection(value, label) {
|
|
84
|
-
const normalized = String(value ?? '').trim();
|
|
85
|
-
if (!['preLayer', 'layer', 'postLayer', 'sampling'].includes(normalized)) {
|
|
86
|
-
throw new Error(`[ExecutionV0] ${label} must be preLayer|layer|postLayer|sampling; got "${value}"`);
|
|
87
|
-
}
|
|
88
|
-
return normalized;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
function normalizeKVLayout(value, label) {
|
|
92
|
-
if (value == null) {
|
|
93
|
-
return null;
|
|
94
|
-
}
|
|
95
|
-
const normalized = String(value).trim().toLowerCase();
|
|
96
|
-
if (!normalized) {
|
|
97
|
-
return null;
|
|
98
|
-
}
|
|
99
|
-
return normalized;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
function assertKernelRef(kernelRef, label) {
|
|
103
|
-
if (!kernelRef) return;
|
|
104
|
-
if (typeof kernelRef.id !== 'string' || kernelRef.id.trim().length === 0) {
|
|
105
|
-
throw new Error(`[ExecutionV0] ${label}.id is required`);
|
|
106
|
-
}
|
|
107
|
-
if (!isExecutionV0Semver(kernelRef.version)) {
|
|
108
|
-
throw new Error(`[ExecutionV0] ${label}.version must be semver; got "${kernelRef.version}"`);
|
|
109
|
-
}
|
|
110
|
-
if (!isExecutionV0Digest(kernelRef.digest)) {
|
|
111
|
-
throw new Error(`[ExecutionV0] ${label}.digest must match sha256:<64-hex>`);
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
function isPhaseMatch(phase, targetPhase) {
|
|
116
|
-
return phase === 'both' || phase === targetPhase;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
function stepHasLayer(step, layerIdx) {
|
|
120
|
-
if (step.layers === 'all') return true;
|
|
121
|
-
if (!Array.isArray(step.layers)) return false;
|
|
122
|
-
return step.layers.includes(layerIdx);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
const buildKernelProfileKey = buildExecutionV0KernelProfileKey;
|
|
126
|
-
|
|
127
|
-
function normalizeSlot(value, label) {
|
|
128
|
-
if (typeof value !== 'string' || value.trim().length === 0) {
|
|
129
|
-
throw new Error(`[ExecutionV0] ${label} must be a non-empty string`);
|
|
130
|
-
}
|
|
131
|
-
return value.trim();
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
function assertKernelPrecisionCapability(step, resolvedPrecision, policies) {
|
|
135
|
-
if (step.op === 'cast') {
|
|
136
|
-
return;
|
|
137
|
-
}
|
|
138
|
-
if (policies.unsupportedPrecision !== 'error') {
|
|
139
|
-
return;
|
|
140
|
-
}
|
|
141
|
-
const kernel = String(step.kernel ?? '').trim();
|
|
142
|
-
const entry = String(step.entry ?? 'main').trim() || 'main';
|
|
143
|
-
const supportedOutputDtypes = getKernelOutputCapabilities(step);
|
|
144
|
-
if (!supportedOutputDtypes) {
|
|
145
|
-
throw new Error(
|
|
146
|
-
`[ExecutionV0] step "${step.id}" kernel "${kernel}#${entry}" ` +
|
|
147
|
-
'is not present in kernel registry; cannot validate precision capability.'
|
|
148
|
-
);
|
|
149
|
-
}
|
|
150
|
-
if (supportedOutputDtypes.size === 0) {
|
|
151
|
-
// Some kernels do not declare output dtype metadata yet; treat as unknown.
|
|
152
|
-
return;
|
|
153
|
-
}
|
|
154
|
-
const outputDtype = normalizeDtype(resolvedPrecision.outputDtype, `${step.id}.precision.outputDtype`);
|
|
155
|
-
if (!supportedOutputDtypes.has(outputDtype)) {
|
|
156
|
-
throw new Error(
|
|
157
|
-
`[ExecutionV0] step "${step.id}" outputDtype=${outputDtype} is unsupported by ` +
|
|
158
|
-
`kernel "${kernel}#${entry}" (supported: ${[...supportedOutputDtypes].join(', ') || 'none'}).`
|
|
159
|
-
);
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
function createSourceTrace() {
|
|
164
|
-
return {
|
|
165
|
-
session: {},
|
|
166
|
-
steps: {},
|
|
167
|
-
};
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
function setSourceTrace(trace, path, source) {
|
|
171
|
-
if (!trace || typeof path !== 'string' || path.length === 0) return;
|
|
172
|
-
trace[path] = { source };
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
function setStepSourceTrace(trace, stepId, path, source) {
|
|
176
|
-
if (!trace || !stepId || !path) return;
|
|
177
|
-
if (!trace.steps[stepId]) {
|
|
178
|
-
trace.steps[stepId] = {};
|
|
179
|
-
}
|
|
180
|
-
trace.steps[stepId][path] = { source };
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
function isPlainObject(value) {
|
|
184
|
-
return value != null && typeof value === 'object' && !Array.isArray(value);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
function collectLeafPaths(value, prefix = [], out = []) {
|
|
188
|
-
if (Array.isArray(value)) {
|
|
189
|
-
if (prefix.length > 0) {
|
|
190
|
-
out.push(prefix);
|
|
191
|
-
}
|
|
192
|
-
return out;
|
|
193
|
-
}
|
|
194
|
-
if (!isPlainObject(value)) {
|
|
195
|
-
if (prefix.length > 0) {
|
|
196
|
-
out.push(prefix);
|
|
197
|
-
}
|
|
198
|
-
return out;
|
|
199
|
-
}
|
|
200
|
-
for (const [key, child] of Object.entries(value)) {
|
|
201
|
-
collectLeafPaths(child, [...prefix, key], out);
|
|
202
|
-
}
|
|
203
|
-
return out;
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
function hasDefinedPath(root, pathSegments) {
|
|
207
|
-
let current = root;
|
|
208
|
-
for (const segment of pathSegments) {
|
|
209
|
-
if (!isPlainObject(current) || !Object.prototype.hasOwnProperty.call(current, segment)) {
|
|
210
|
-
return false;
|
|
211
|
-
}
|
|
212
|
-
current = current[segment];
|
|
213
|
-
}
|
|
214
|
-
return current !== undefined;
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
const indexKernelProfiles = indexExecutionV0KernelProfiles;
|
|
218
|
-
|
|
219
|
-
function resolveProfile(profileIndex, step) {
|
|
220
|
-
return resolveExecutionV0KernelProfile(profileIndex, step);
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
function validateStepShape(step, index) {
|
|
224
|
-
if (!step || typeof step !== 'object') {
|
|
225
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}] must be an object`);
|
|
226
|
-
}
|
|
227
|
-
if (typeof step.id !== 'string' || step.id.trim().length === 0) {
|
|
228
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}].id is required`);
|
|
229
|
-
}
|
|
230
|
-
if (typeof step.op !== 'string' || step.op.trim().length === 0) {
|
|
231
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}].op is required`);
|
|
232
|
-
}
|
|
233
|
-
normalizePhase(step.phase, `execution.steps[${index}].phase`);
|
|
234
|
-
normalizeSection(step.section, `execution.steps[${index}].section`);
|
|
235
|
-
normalizeSlot(step.src, `execution.steps[${index}].src`);
|
|
236
|
-
normalizeSlot(step.dst, `execution.steps[${index}].dst`);
|
|
237
|
-
if (step.layers !== 'all' && !Array.isArray(step.layers)) {
|
|
238
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}].layers must be "all" or number[]`);
|
|
239
|
-
}
|
|
240
|
-
if (step.layers !== 'all') {
|
|
241
|
-
for (const layer of step.layers) {
|
|
242
|
-
if (!Number.isInteger(layer) || layer < 0) {
|
|
243
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}].layers must contain non-negative integers`);
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
if (step.op === 'cast') {
|
|
248
|
-
normalizeDtype(step.toDtype, `execution.steps[${index}].toDtype`);
|
|
249
|
-
if (step.fromDtype != null) {
|
|
250
|
-
normalizeDtype(step.fromDtype, `execution.steps[${index}].fromDtype`);
|
|
251
|
-
}
|
|
252
|
-
} else {
|
|
253
|
-
if (typeof step.kernel !== 'string' || step.kernel.trim().length === 0) {
|
|
254
|
-
throw new Error(
|
|
255
|
-
`[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernel (non-cast op)`
|
|
256
|
-
);
|
|
257
|
-
}
|
|
258
|
-
if (!step.kernelRef || typeof step.kernelRef !== 'object' || Array.isArray(step.kernelRef)) {
|
|
259
|
-
throw new Error(
|
|
260
|
-
`[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernelRef {id, version, digest} (non-cast op)`
|
|
261
|
-
);
|
|
262
|
-
}
|
|
263
|
-
assertKernelRef(step.kernelRef, `execution.steps[${index}].kernelRef`);
|
|
264
|
-
const entry = String(step.entry ?? 'main').trim() || 'main';
|
|
265
|
-
let expectedKernelRef;
|
|
266
|
-
try {
|
|
267
|
-
expectedKernelRef = buildKernelRefFromKernelEntry(step.kernel, entry);
|
|
268
|
-
} catch (error) {
|
|
269
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
270
|
-
throw new Error(
|
|
271
|
-
`[ExecutionV0] execution.steps[${index}] "${step.id}" kernel "${step.kernel}#${entry}" ` +
|
|
272
|
-
`cannot be content-pinned: ${message}`
|
|
273
|
-
);
|
|
274
|
-
}
|
|
275
|
-
if (!isKernelRefBoundToKernel(step.kernelRef, step.kernel, entry)) {
|
|
276
|
-
throw new Error(
|
|
277
|
-
`[ExecutionV0] execution.steps[${index}] "${step.id}" kernelRef does not match kernel binding ` +
|
|
278
|
-
`("${step.kernel}#${entry}"). Expected ${expectedKernelRef.id}@${expectedKernelRef.version} ${expectedKernelRef.digest}.`
|
|
279
|
-
);
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
function assertExecutionRuntimeOverlay(runtimeInference) {
|
|
285
|
-
if (!runtimeInference || typeof runtimeInference !== 'object') {
|
|
286
|
-
return;
|
|
287
|
-
}
|
|
288
|
-
const unknownKeys = Object.keys(runtimeInference).filter((key) => !EXECUTION_V0_RUNTIME_KEYS.has(key));
|
|
289
|
-
if (unknownKeys.length > 0) {
|
|
290
|
-
throw new Error(
|
|
291
|
-
`[ExecutionV0] runtime.inference overlay supports only ${[...EXECUTION_V0_RUNTIME_KEYS].join(', ')}; ` +
|
|
292
|
-
`got unsupported keys: ${unknownKeys.join(', ')}.`
|
|
293
|
-
);
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
function validateUniqueStepIds(steps) {
|
|
298
|
-
const ids = new Set();
|
|
299
|
-
for (const step of steps) {
|
|
300
|
-
if (ids.has(step.id)) {
|
|
301
|
-
throw new Error(`[ExecutionV0] duplicate step id "${step.id}"`);
|
|
302
|
-
}
|
|
303
|
-
ids.add(step.id);
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
function assertExecutionV0Schema(manifestInference) {
|
|
308
|
-
if (!hasExecutionV0(manifestInference)) return;
|
|
309
|
-
const discriminator = manifestInference?.schema ?? null;
|
|
310
|
-
if (discriminator !== EXECUTION_V0_SCHEMA_ID) {
|
|
311
|
-
throw new Error(
|
|
312
|
-
`[ExecutionV0] manifest.inference.schema must be "${EXECUTION_V0_SCHEMA_ID}" ` +
|
|
313
|
-
`when execution is present; got "${discriminator}".`
|
|
314
|
-
);
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
function applyExecutionPatchAtomic(baseSteps, patch) {
|
|
319
|
-
if (!patch) {
|
|
320
|
-
return baseSteps;
|
|
321
|
-
}
|
|
322
|
-
const steps = cloneJson(baseSteps);
|
|
323
|
-
const byId = new Map(steps.map((step, index) => [step.id, index]));
|
|
324
|
-
|
|
325
|
-
for (const entry of patch.set ?? []) {
|
|
326
|
-
if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
|
|
327
|
-
throw new Error('[ExecutionV0] executionPatch.set entries require id');
|
|
328
|
-
}
|
|
329
|
-
if (!byId.has(entry.id)) {
|
|
330
|
-
throw new Error(`[ExecutionV0] executionPatch.set target "${entry.id}" does not exist`);
|
|
331
|
-
}
|
|
332
|
-
for (const key of Object.keys(entry)) {
|
|
333
|
-
if (key === 'id') continue;
|
|
334
|
-
if (!PATCH_SET_MUTABLE_FIELDS.has(key)) {
|
|
335
|
-
throw new Error(`[ExecutionV0] executionPatch.set "${entry.id}" cannot mutate "${key}"`);
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
for (const entry of patch.remove ?? []) {
|
|
341
|
-
if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
|
|
342
|
-
throw new Error('[ExecutionV0] executionPatch.remove entries require id');
|
|
343
|
-
}
|
|
344
|
-
if (!byId.has(entry.id)) {
|
|
345
|
-
throw new Error(`[ExecutionV0] executionPatch.remove target "${entry.id}" does not exist`);
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
for (const entry of patch.set ?? []) {
|
|
350
|
-
const index = byId.get(entry.id);
|
|
351
|
-
const target = steps[index];
|
|
352
|
-
if (entry.precision !== undefined) target.precision = cloneJson(entry.precision);
|
|
353
|
-
if (entry.kvIO !== undefined) target.kvIO = cloneJson(entry.kvIO);
|
|
354
|
-
if (entry.constants !== undefined) target.constants = cloneJson(entry.constants);
|
|
355
|
-
if (entry.entry !== undefined) target.entry = entry.entry;
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
const removeIds = new Set((patch.remove ?? []).map((entry) => entry.id));
|
|
359
|
-
const removedSteps = steps.filter((step) => !removeIds.has(step.id));
|
|
360
|
-
|
|
361
|
-
let current = removedSteps;
|
|
362
|
-
const insertedAfterAnchors = new Map();
|
|
363
|
-
for (const entry of patch.add ?? []) {
|
|
364
|
-
if (!entry?.step || typeof entry.step !== 'object') {
|
|
365
|
-
throw new Error('[ExecutionV0] executionPatch.add requires a step payload');
|
|
366
|
-
}
|
|
367
|
-
const hasBefore = typeof entry.insertBefore === 'string' && entry.insertBefore.length > 0;
|
|
368
|
-
const hasAfter = typeof entry.insertAfter === 'string' && entry.insertAfter.length > 0;
|
|
369
|
-
if (hasBefore === hasAfter) {
|
|
370
|
-
throw new Error('[ExecutionV0] executionPatch.add requires exactly one of insertBefore or insertAfter');
|
|
371
|
-
}
|
|
372
|
-
if (current.some((step) => step.id === entry.step.id)) {
|
|
373
|
-
throw new Error(`[ExecutionV0] executionPatch.add step id "${entry.step.id}" already exists`);
|
|
374
|
-
}
|
|
375
|
-
const anchorId = hasBefore ? entry.insertBefore : entry.insertAfter;
|
|
376
|
-
const anchorIndex = current.findIndex((step) => step.id === anchorId);
|
|
377
|
-
if (anchorIndex < 0) {
|
|
378
|
-
throw new Error(`[ExecutionV0] executionPatch.add anchor "${anchorId}" not found`);
|
|
379
|
-
}
|
|
380
|
-
let insertIndex = hasBefore ? anchorIndex : anchorIndex + 1;
|
|
381
|
-
if (!hasBefore) {
|
|
382
|
-
const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
|
|
383
|
-
while (insertIndex < current.length && insertedIds.includes(current[insertIndex].id)) {
|
|
384
|
-
insertIndex += 1;
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
current = [
|
|
388
|
-
...current.slice(0, insertIndex),
|
|
389
|
-
cloneJson(entry.step),
|
|
390
|
-
...current.slice(insertIndex),
|
|
391
|
-
];
|
|
392
|
-
if (!hasBefore) {
|
|
393
|
-
const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
|
|
394
|
-
insertedIds.push(entry.step.id);
|
|
395
|
-
insertedAfterAnchors.set(anchorId, insertedIds);
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
validateUniqueStepIds(current);
|
|
400
|
-
return current;
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
function indexRuntimePatchMeta(patch) {
|
|
404
|
-
const meta = {
|
|
405
|
-
addedSteps: new Set(),
|
|
406
|
-
precisionFieldsByStep: new Map(),
|
|
407
|
-
kvIOFieldsByStep: new Set(),
|
|
408
|
-
};
|
|
409
|
-
if (!patch || typeof patch !== 'object') {
|
|
410
|
-
return meta;
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
for (const add of patch.add ?? []) {
|
|
414
|
-
const stepId = add?.step?.id;
|
|
415
|
-
if (typeof stepId === 'string' && stepId.length > 0) {
|
|
416
|
-
meta.addedSteps.add(stepId);
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
for (const set of patch.set ?? []) {
|
|
421
|
-
const stepId = set?.id;
|
|
422
|
-
if (typeof stepId !== 'string' || stepId.length === 0) continue;
|
|
423
|
-
if (set.precision && typeof set.precision === 'object') {
|
|
424
|
-
meta.precisionFieldsByStep.set(stepId, new Set(Object.keys(set.precision)));
|
|
425
|
-
}
|
|
426
|
-
if (set.kvIO && typeof set.kvIO === 'object') {
|
|
427
|
-
meta.kvIOFieldsByStep.add(stepId);
|
|
428
|
-
}
|
|
429
|
-
}
|
|
430
|
-
return meta;
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
function createInitialSlotDtypes(sessionDefaults) {
|
|
434
|
-
const activationDefault = normalizeDtype(
|
|
435
|
-
sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
|
|
436
|
-
'sessionDefaults.compute.defaults.activationDtype'
|
|
437
|
-
);
|
|
438
|
-
return new Map([['state', activationDefault]]);
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
function ensureCompatibleKV(step, kvIO, sessionDefaults) {
|
|
442
|
-
if (step.op !== 'attention' || !kvIO) {
|
|
443
|
-
return;
|
|
444
|
-
}
|
|
445
|
-
const runtimeKvDtypeRaw = sessionDefaults?.kvcache?.kvDtype;
|
|
446
|
-
if (runtimeKvDtypeRaw == null) {
|
|
447
|
-
return;
|
|
448
|
-
}
|
|
449
|
-
const runtimeKvDtype = normalizeDtype(runtimeKvDtypeRaw, 'sessionDefaults.kvcache.kvDtype');
|
|
450
|
-
if (kvIO.readDtype !== runtimeKvDtype || kvIO.writeDtype !== runtimeKvDtype) {
|
|
451
|
-
throw new Error(
|
|
452
|
-
`[ExecutionV0] step "${step.id}" kvIO read/write (${kvIO.readDtype}/${kvIO.writeDtype}) ` +
|
|
453
|
-
`must match sessionDefaults.kvcache.kvDtype (${runtimeKvDtype}).`
|
|
454
|
-
);
|
|
455
|
-
}
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
function resolvePhaseSteps(phase, steps, sessionDefaults, profileIndex, policies, options = {}) {
|
|
459
|
-
const slotDtypes = options.initialSlotDtypes
|
|
460
|
-
? new Map(options.initialSlotDtypes)
|
|
461
|
-
: createInitialSlotDtypes(sessionDefaults);
|
|
462
|
-
const resolved = [];
|
|
463
|
-
const sourceTrace = options.sourceTrace ?? null;
|
|
464
|
-
const sessionDefaultSources = options.sessionDefaultSources ?? {};
|
|
465
|
-
const runtimePatchMeta = options.runtimePatchMeta ?? {
|
|
466
|
-
addedSteps: new Set(),
|
|
467
|
-
precisionFieldsByStep: new Map(),
|
|
468
|
-
kvIOFieldsByStep: new Set(),
|
|
469
|
-
};
|
|
470
|
-
|
|
471
|
-
for (const step of steps) {
|
|
472
|
-
const stepPhase = normalizePhase(step.phase, `${step.id}.phase`);
|
|
473
|
-
if (!isPhaseMatch(stepPhase, phase)) continue;
|
|
474
|
-
const profile = resolveProfile(profileIndex, step);
|
|
475
|
-
if (
|
|
476
|
-
step.kernelRef
|
|
477
|
-
&& !profile
|
|
478
|
-
&& policies.unresolvedKernel === 'error'
|
|
479
|
-
) {
|
|
480
|
-
throw new Error(
|
|
481
|
-
`[ExecutionV0] step "${step.id}" references kernel profile ` +
|
|
482
|
-
`${step.kernelRef.id}@${step.kernelRef.version} (${step.kernelRef.digest}) ` +
|
|
483
|
-
'but no matching sessionDefaults.compute.kernelProfiles entry was found.'
|
|
484
|
-
);
|
|
485
|
-
}
|
|
486
|
-
const { precision, sources: precisionSources } = resolvePrecision(step, profile, sessionDefaults);
|
|
487
|
-
const src = normalizeSlot(step.src, `${step.id}.src`);
|
|
488
|
-
const dst = normalizeSlot(step.dst, `${step.id}.dst`);
|
|
489
|
-
if (!slotDtypes.has(src)) {
|
|
490
|
-
throw new Error(
|
|
491
|
-
`[ExecutionV0] step "${step.id}" reads slot "${src}" before it is produced. ` +
|
|
492
|
-
'Add an explicit producer step or cast/load bridge.'
|
|
493
|
-
);
|
|
494
|
-
}
|
|
495
|
-
const derivedInput = slotDtypes.get(src);
|
|
496
|
-
const inputDtype = normalizeDtype(precision.inputDtype ?? derivedInput, `${step.id}.precision.inputDtype`);
|
|
497
|
-
|
|
498
|
-
if (
|
|
499
|
-
policies.dtypeTransition === 'require_cast_step'
|
|
500
|
-
&& step.op !== 'cast'
|
|
501
|
-
&& inputDtype !== derivedInput
|
|
502
|
-
) {
|
|
503
|
-
throw new Error(
|
|
504
|
-
`[ExecutionV0] step "${step.id}" requires inputDtype=${inputDtype} ` +
|
|
505
|
-
`but slot "${src}" currently holds ${derivedInput}. Insert explicit cast step.`
|
|
506
|
-
);
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
let outputDtype = normalizeDtype(precision.outputDtype, `${step.id}.precision.outputDtype`);
|
|
510
|
-
let outputDtypeSource = precisionSources.outputDtype;
|
|
511
|
-
if (step.op !== 'cast' && outputDtypeSource === 'sessionDefault') {
|
|
512
|
-
const declaredOutputDtypes = getKernelOutputCapabilities(step);
|
|
513
|
-
if (declaredOutputDtypes && declaredOutputDtypes.size === 1) {
|
|
514
|
-
outputDtype = [...declaredOutputDtypes][0];
|
|
515
|
-
outputDtypeSource = 'derived';
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
|
-
if (step.op === 'cast') {
|
|
519
|
-
outputDtype = normalizeDtype(step.toDtype, `${step.id}.toDtype`);
|
|
520
|
-
outputDtypeSource = 'manifest';
|
|
521
|
-
const fromDtype = step.fromDtype
|
|
522
|
-
? normalizeDtype(step.fromDtype, `${step.id}.fromDtype`)
|
|
523
|
-
: derivedInput;
|
|
524
|
-
if (fromDtype !== derivedInput) {
|
|
525
|
-
throw new Error(
|
|
526
|
-
`[ExecutionV0] cast step "${step.id}" fromDtype=${fromDtype} does not match slot "${src}" dtype=${derivedInput}`
|
|
527
|
-
);
|
|
528
|
-
}
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
const resolvedPrecision = {
|
|
532
|
-
inputDtype,
|
|
533
|
-
mathDtype: normalizeDtype(precision.mathDtype, `${step.id}.precision.mathDtype`),
|
|
534
|
-
accumDtype: normalizeDtype(precision.accumDtype, `${step.id}.precision.accumDtype`),
|
|
535
|
-
outputDtype,
|
|
536
|
-
};
|
|
537
|
-
assertKernelPrecisionCapability(step, resolvedPrecision, policies);
|
|
538
|
-
slotDtypes.set(dst, outputDtype);
|
|
539
|
-
|
|
540
|
-
const kvIOResolved = step.op === 'attention'
|
|
541
|
-
? resolveKVIO(step, profile, sessionDefaults)
|
|
542
|
-
: null;
|
|
543
|
-
const kvIO = kvIOResolved?.value ?? null;
|
|
544
|
-
ensureCompatibleKV(step, kvIO, sessionDefaults);
|
|
545
|
-
|
|
546
|
-
if (sourceTrace) {
|
|
547
|
-
const precisionFieldsPatched = runtimePatchMeta.precisionFieldsByStep.get(step.id) ?? new Set();
|
|
548
|
-
const isAddedStep = runtimePatchMeta.addedSteps.has(step.id);
|
|
549
|
-
const inputPatched = isAddedStep
|
|
550
|
-
? step.precision?.inputDtype != null
|
|
551
|
-
: precisionFieldsPatched.has('inputDtype');
|
|
552
|
-
const mathPatched = isAddedStep
|
|
553
|
-
? step.precision?.mathDtype != null
|
|
554
|
-
: precisionFieldsPatched.has('mathDtype');
|
|
555
|
-
const accumPatched = isAddedStep
|
|
556
|
-
? step.precision?.accumDtype != null
|
|
557
|
-
: precisionFieldsPatched.has('accumDtype');
|
|
558
|
-
const outputPatched = isAddedStep
|
|
559
|
-
? step.precision?.outputDtype != null
|
|
560
|
-
: precisionFieldsPatched.has('outputDtype');
|
|
561
|
-
const mathSource = precisionSources.mathDtype === 'sessionDefault'
|
|
562
|
-
? sessionDefaultSources.mathDtype ?? 'derived'
|
|
563
|
-
: precisionSources.mathDtype;
|
|
564
|
-
const accumSource = precisionSources.accumDtype === 'sessionDefault'
|
|
565
|
-
? sessionDefaultSources.accumDtype ?? 'derived'
|
|
566
|
-
: precisionSources.accumDtype;
|
|
567
|
-
const outputSource = precisionSources.outputDtype === 'sessionDefault'
|
|
568
|
-
? outputDtypeSource === 'sessionDefault'
|
|
569
|
-
? (sessionDefaultSources.outputDtype ?? 'derived')
|
|
570
|
-
: outputDtypeSource
|
|
571
|
-
: outputDtypeSource;
|
|
572
|
-
setStepSourceTrace(sourceTrace, step.id, 'precision.inputDtype',
|
|
573
|
-
inputPatched
|
|
574
|
-
? 'runtime.patch'
|
|
575
|
-
: precision.inputDtype != null
|
|
576
|
-
? precisionSources.inputDtype
|
|
577
|
-
: 'derived');
|
|
578
|
-
setStepSourceTrace(sourceTrace, step.id, 'precision.mathDtype', mathPatched ? 'runtime.patch' : mathSource);
|
|
579
|
-
setStepSourceTrace(sourceTrace, step.id, 'precision.accumDtype', accumPatched ? 'runtime.patch' : accumSource);
|
|
580
|
-
setStepSourceTrace(sourceTrace, step.id, 'precision.outputDtype', outputPatched ? 'runtime.patch' : outputSource);
|
|
581
|
-
if (step.op === 'attention') {
|
|
582
|
-
const kvPatched = runtimePatchMeta.kvIOFieldsByStep.has(step.id)
|
|
583
|
-
|| (isAddedStep && !!step.kvIO);
|
|
584
|
-
const kvSource = kvIOResolved?.source === 'sessionDefault'
|
|
585
|
-
? sessionDefaultSources.kvDtype ?? 'derived'
|
|
586
|
-
: kvIOResolved?.source ?? 'derived';
|
|
587
|
-
const resolvedKvSource = kvPatched ? 'runtime.patch' : kvSource;
|
|
588
|
-
setStepSourceTrace(sourceTrace, step.id, 'kvIO.readDtype', resolvedKvSource);
|
|
589
|
-
setStepSourceTrace(sourceTrace, step.id, 'kvIO.writeDtype', resolvedKvSource);
|
|
590
|
-
}
|
|
591
|
-
}
|
|
592
|
-
|
|
593
|
-
resolved.push({
|
|
594
|
-
...step,
|
|
595
|
-
src,
|
|
596
|
-
dst,
|
|
597
|
-
phase: stepPhase,
|
|
598
|
-
section: normalizeSection(step.section, `${step.id}.section`),
|
|
599
|
-
precision: resolvedPrecision,
|
|
600
|
-
kvIO,
|
|
601
|
-
});
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
return {
|
|
605
|
-
steps: resolved,
|
|
606
|
-
finalSlotDtypes: slotDtypes,
|
|
607
|
-
};
|
|
608
|
-
}
|
|
609
|
-
|
|
610
|
-
function stripPresetComputeDefaults(compute, manifestComputeDefaults) {
|
|
611
|
-
if (!compute?.defaults || !manifestComputeDefaults) {
|
|
612
|
-
return compute;
|
|
613
|
-
}
|
|
614
|
-
const dtypeKeys = ['activationDtype', 'mathDtype', 'accumDtype', 'outputDtype'];
|
|
615
|
-
const hasManifestDtype = dtypeKeys.some(
|
|
616
|
-
(key) => manifestComputeDefaults[key] !== undefined && manifestComputeDefaults[key] !== null
|
|
617
|
-
);
|
|
618
|
-
if (!hasManifestDtype) {
|
|
619
|
-
return compute;
|
|
620
|
-
}
|
|
621
|
-
const nextDefaults = { ...compute.defaults };
|
|
622
|
-
for (const key of dtypeKeys) {
|
|
623
|
-
if (manifestComputeDefaults[key] !== undefined && manifestComputeDefaults[key] !== null) {
|
|
624
|
-
delete nextDefaults[key];
|
|
625
|
-
}
|
|
626
|
-
}
|
|
627
|
-
if (Object.keys(nextDefaults).length === 0) {
|
|
628
|
-
const nextCompute = { ...compute };
|
|
629
|
-
delete nextCompute.defaults;
|
|
630
|
-
return Object.keys(nextCompute).length === 0 ? null : nextCompute;
|
|
631
|
-
}
|
|
632
|
-
return { ...compute, defaults: nextDefaults };
|
|
633
|
-
}
|
|
634
|
-
|
|
635
|
-
function normalizeRuntimeSessionForExecutionV0(runtimeSession, manifestInference) {
|
|
636
|
-
const manifestSessionDefaults = manifestInference?.sessionDefaults ?? null;
|
|
637
|
-
const manifestProfiles = manifestSessionDefaults?.compute?.kernelProfiles;
|
|
638
|
-
const hasManifestProfiles = Array.isArray(manifestProfiles) && manifestProfiles.length > 0;
|
|
639
|
-
const manifestComputeDefaults = manifestSessionDefaults?.compute?.defaults ?? null;
|
|
640
|
-
const hasManifestKVCache = manifestSessionDefaults?.kvcache != null;
|
|
641
|
-
const hasManifestDecodeLoop = manifestSessionDefaults?.decodeLoop != null;
|
|
642
|
-
|
|
643
|
-
if (!runtimeSession || typeof runtimeSession !== 'object') {
|
|
644
|
-
return runtimeSession;
|
|
645
|
-
}
|
|
646
|
-
|
|
647
|
-
let compute = runtimeSession.compute ?? null;
|
|
648
|
-
let kvcache = Object.prototype.hasOwnProperty.call(runtimeSession, 'kvcache')
|
|
649
|
-
? runtimeSession.kvcache
|
|
650
|
-
: undefined;
|
|
651
|
-
let decodeLoop = Object.prototype.hasOwnProperty.call(runtimeSession, 'decodeLoop')
|
|
652
|
-
? runtimeSession.decodeLoop
|
|
653
|
-
: undefined;
|
|
654
|
-
let changed = false;
|
|
655
|
-
|
|
656
|
-
// Strip preset compute dtype defaults when manifest provides model-specific values.
|
|
657
|
-
// default.json sets session.compute.defaults.activationDtype="f16" as a preset default.
|
|
658
|
-
// When the manifest declares its own compute dtypes (e.g. activationDtype="f32" for f32
|
|
659
|
-
// variants), the manifest must win. Only explicit user overrides (via --runtime-config-json
|
|
660
|
-
// or CLI flags) should take precedence, not baked-in preset values.
|
|
661
|
-
if (manifestComputeDefaults) {
|
|
662
|
-
const stripped = stripPresetComputeDefaults(compute, manifestComputeDefaults);
|
|
663
|
-
if (stripped !== compute) {
|
|
664
|
-
compute = stripped;
|
|
665
|
-
changed = true;
|
|
666
|
-
}
|
|
667
|
-
}
|
|
668
|
-
|
|
669
|
-
// Strip empty kernelProfiles when manifest provides them.
|
|
670
|
-
if (compute && Object.prototype.hasOwnProperty.call(compute, 'kernelProfiles')) {
|
|
671
|
-
const kernelProfiles = compute.kernelProfiles;
|
|
672
|
-
if (Array.isArray(kernelProfiles) && kernelProfiles.length === 0 && hasManifestProfiles) {
|
|
673
|
-
const nextCompute = { ...compute };
|
|
674
|
-
delete nextCompute.kernelProfiles;
|
|
675
|
-
compute = Object.keys(nextCompute).length === 0 ? null : nextCompute;
|
|
676
|
-
changed = true;
|
|
677
|
-
}
|
|
678
|
-
}
|
|
679
|
-
|
|
680
|
-
// Strip preset nulls so manifest session defaults can win.
|
|
681
|
-
if (kvcache === null && hasManifestKVCache) {
|
|
682
|
-
kvcache = undefined;
|
|
683
|
-
changed = true;
|
|
684
|
-
}
|
|
685
|
-
|
|
686
|
-
if (decodeLoop === null && hasManifestDecodeLoop) {
|
|
687
|
-
decodeLoop = undefined;
|
|
688
|
-
changed = true;
|
|
689
|
-
}
|
|
690
|
-
|
|
691
|
-
if (!changed) {
|
|
692
|
-
return runtimeSession;
|
|
693
|
-
}
|
|
694
|
-
|
|
695
|
-
const nextRuntimeSession = { ...runtimeSession };
|
|
696
|
-
if (!compute) {
|
|
697
|
-
delete nextRuntimeSession.compute;
|
|
698
|
-
} else {
|
|
699
|
-
nextRuntimeSession.compute = compute;
|
|
700
|
-
}
|
|
701
|
-
if (kvcache === undefined) {
|
|
702
|
-
delete nextRuntimeSession.kvcache;
|
|
703
|
-
} else {
|
|
704
|
-
nextRuntimeSession.kvcache = kvcache;
|
|
705
|
-
}
|
|
706
|
-
if (decodeLoop === undefined) {
|
|
707
|
-
delete nextRuntimeSession.decodeLoop;
|
|
708
|
-
} else {
|
|
709
|
-
nextRuntimeSession.decodeLoop = decodeLoop;
|
|
710
|
-
}
|
|
711
|
-
|
|
712
|
-
return Object.keys(nextRuntimeSession).length === 0 ? {} : nextRuntimeSession;
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
function validatePhaseBoundaryCompatibility(options) {
|
|
716
|
-
const {
|
|
717
|
-
steps,
|
|
718
|
-
prefillFinalSlotDtypes,
|
|
719
|
-
decodeInitialSlotDtypes,
|
|
720
|
-
sessionDefaults,
|
|
721
|
-
profileIndex,
|
|
722
|
-
policies,
|
|
723
|
-
} = options;
|
|
724
|
-
const decodeSteps = steps.filter((step) => isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'decode'));
|
|
725
|
-
if (decodeSteps.length === 0) {
|
|
726
|
-
return;
|
|
727
|
-
}
|
|
728
|
-
const writtenSlots = new Set();
|
|
729
|
-
for (const step of decodeSteps) {
|
|
730
|
-
const src = normalizeSlot(step.src, `${step.id}.src`);
|
|
731
|
-
const dst = normalizeSlot(step.dst, `${step.id}.dst`);
|
|
732
|
-
const readsCarriedSlot = !writtenSlots.has(src) && prefillFinalSlotDtypes.has(src);
|
|
733
|
-
if (readsCarriedSlot && step.op !== 'cast') {
|
|
734
|
-
const profile = resolveProfile(profileIndex, step);
|
|
735
|
-
const { precision } = resolvePrecision(step, profile, sessionDefaults);
|
|
736
|
-
const carriedDtype = prefillFinalSlotDtypes.get(src);
|
|
737
|
-
const decodeInput = normalizeDtype(
|
|
738
|
-
precision.inputDtype
|
|
739
|
-
?? carriedDtype
|
|
740
|
-
?? decodeInitialSlotDtypes.get(src),
|
|
741
|
-
`${step.id}.precision.inputDtype`
|
|
742
|
-
);
|
|
743
|
-
if (decodeInput !== carriedDtype) {
|
|
744
|
-
throw new Error(
|
|
745
|
-
`[ExecutionV0] decode step "${step.id}" reads carried slot "${src}" as ${decodeInput} ` +
|
|
746
|
-
`but prefill left ${carriedDtype}. Add explicit cast at phase boundary.`
|
|
747
|
-
);
|
|
748
|
-
}
|
|
749
|
-
}
|
|
750
|
-
writtenSlots.add(dst);
|
|
751
|
-
}
|
|
752
|
-
}
|
|
753
|
-
|
|
754
|
-
function assertKVLayoutExecutionCompatibility(steps, sessionDefaults) {
|
|
755
|
-
const kvLayout = normalizeKVLayout(sessionDefaults?.kvcache?.layout, 'sessionDefaults.kvcache.layout');
|
|
756
|
-
if (kvLayout !== 'bdpa') {
|
|
757
|
-
return;
|
|
758
|
-
}
|
|
759
|
-
const incompatibleStep = steps.find((step) => (
|
|
760
|
-
step?.op === 'attention'
|
|
761
|
-
&& isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'prefill')
|
|
762
|
-
));
|
|
763
|
-
if (!incompatibleStep) {
|
|
764
|
-
return;
|
|
765
|
-
}
|
|
766
|
-
throw new Error(
|
|
767
|
-
`[ExecutionV0] sessionDefaults.kvcache.layout="bdpa" is decode-only, ` +
|
|
768
|
-
`but step "${incompatibleStep.id}" declares prefill attention. ` +
|
|
769
|
-
'Use a non-BDPA KV layout for prefill-capable models or remove prefill attention from the execution contract.'
|
|
770
|
-
);
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
function toKernelPathStep(step) {
|
|
774
|
-
if (step.op === 'cast') return null;
|
|
775
|
-
if (!step.kernel) return null;
|
|
776
|
-
return {
|
|
777
|
-
op: step.op,
|
|
778
|
-
kernel: step.kernel,
|
|
779
|
-
entry: step.entry ?? 'main',
|
|
780
|
-
...(step.weights ? { weights: step.weights } : {}),
|
|
781
|
-
...(step.constants ? { constants: step.constants } : {}),
|
|
782
|
-
};
|
|
783
|
-
}
|
|
784
|
-
|
|
785
|
-
function getSectionSteps(steps, section, phase = null) {
|
|
786
|
-
return steps
|
|
787
|
-
.filter((step) => step.section === section)
|
|
788
|
-
.filter((step) => (phase ? isPhaseMatch(step.phase, phase) : true))
|
|
789
|
-
.map(toKernelPathStep)
|
|
790
|
-
.filter((step) => step != null);
|
|
791
|
-
}
|
|
792
|
-
|
|
793
|
-
function buildLayerPhaseSteps(steps, phase, layerIdx) {
|
|
794
|
-
return steps
|
|
795
|
-
.filter((step) => step.section === 'layer' && isPhaseMatch(step.phase, phase))
|
|
796
|
-
.filter((step) => stepHasLayer(step, layerIdx))
|
|
797
|
-
.map(toKernelPathStep)
|
|
798
|
-
.filter((step) => step != null);
|
|
799
|
-
}
|
|
800
|
-
|
|
801
|
-
function getInlineKernelPathSteps(path) {
|
|
802
|
-
return [
|
|
803
|
-
...(path?.preLayer ?? []),
|
|
804
|
-
...(path?.decode?.steps ?? []),
|
|
805
|
-
...(path?.prefill?.steps ?? []),
|
|
806
|
-
...(path?.postLayer ?? []),
|
|
807
|
-
...(path?.sampling ?? []),
|
|
808
|
-
...(path?.layerOverrides?.flatMap((override) => override.steps ?? []) ?? []),
|
|
809
|
-
];
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
function assertInlineKernelPathSessionCompatibility(path, sessionDefaults) {
|
|
813
|
-
if (!path) {
|
|
814
|
-
return;
|
|
815
|
-
}
|
|
816
|
-
const activationDtype = normalizeDtype(
|
|
817
|
-
path.activationDtype ?? sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
|
|
818
|
-
'inlineKernelPath.activationDtype'
|
|
819
|
-
);
|
|
820
|
-
const kvDtype = normalizeDtype(
|
|
821
|
-
path.kvDtype ?? sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
|
|
822
|
-
'inlineKernelPath.kvDtype'
|
|
823
|
-
);
|
|
824
|
-
|
|
825
|
-
for (const step of getInlineKernelPathSteps(path)) {
|
|
826
|
-
const kernel = String(step?.kernel ?? '').trim();
|
|
827
|
-
if (!kernel.startsWith('attention')) {
|
|
828
|
-
continue;
|
|
829
|
-
}
|
|
830
|
-
if (kernel.includes('_f16kv')) {
|
|
831
|
-
if (activationDtype !== 'f32' || kvDtype !== 'f16') {
|
|
832
|
-
throw new Error(
|
|
833
|
-
`[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
|
|
834
|
-
`activationDtype="f32" and kvcache.kvDtype="f16", but resolved ` +
|
|
835
|
-
`activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
|
|
836
|
-
);
|
|
837
|
-
}
|
|
838
|
-
continue;
|
|
839
|
-
}
|
|
840
|
-
if (kernel.includes('_f16')) {
|
|
841
|
-
if (activationDtype !== 'f16' || kvDtype !== 'f16') {
|
|
842
|
-
throw new Error(
|
|
843
|
-
`[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
|
|
844
|
-
`activationDtype="f16" and kvcache.kvDtype="f16", but resolved ` +
|
|
845
|
-
`activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
|
|
846
|
-
);
|
|
847
|
-
}
|
|
848
|
-
continue;
|
|
849
|
-
}
|
|
850
|
-
if (activationDtype !== 'f32' || kvDtype !== 'f32') {
|
|
851
|
-
throw new Error(
|
|
852
|
-
`[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
|
|
853
|
-
`activationDtype="f32" and kvcache.kvDtype="f32", but resolved ` +
|
|
854
|
-
`activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
|
|
855
|
-
);
|
|
856
|
-
}
|
|
857
|
-
}
|
|
858
|
-
}
|
|
859
|
-
|
|
860
|
-
function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers, finitenessFallbackKernelPathId = null) {
|
|
861
|
-
const activationDtype = normalizeDtype(
|
|
862
|
-
sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
|
|
863
|
-
'sessionDefaults.compute.defaults.activationDtype'
|
|
864
|
-
);
|
|
865
|
-
const kvDtype = normalizeDtype(
|
|
866
|
-
sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
|
|
867
|
-
'sessionDefaults.kvcache.kvDtype'
|
|
868
|
-
);
|
|
869
|
-
const decodeSteps = buildLayerPhaseSteps(steps, 'decode', 0);
|
|
870
|
-
const prefillSteps = buildLayerPhaseSteps(steps, 'prefill', 0);
|
|
871
|
-
if (decodeSteps.length === 0 && prefillSteps.length === 0) {
|
|
872
|
-
return null;
|
|
873
|
-
}
|
|
874
|
-
|
|
875
|
-
const path = {
|
|
876
|
-
id: `${modelId || 'model'}-execution-v0`,
|
|
877
|
-
name: 'Execution v0 inline kernel path',
|
|
878
|
-
description: 'Generated from manifest.inference.execution.steps',
|
|
879
|
-
activationDtype,
|
|
880
|
-
kvDtype,
|
|
881
|
-
...(typeof finitenessFallbackKernelPathId === 'string' && finitenessFallbackKernelPathId.length > 0
|
|
882
|
-
? { finitenessFallbackKernelPathId }
|
|
883
|
-
: {}),
|
|
884
|
-
decode: {
|
|
885
|
-
steps: decodeSteps.length > 0 ? decodeSteps : prefillSteps,
|
|
886
|
-
},
|
|
887
|
-
prefill: {
|
|
888
|
-
steps: prefillSteps.length > 0 ? prefillSteps : decodeSteps,
|
|
889
|
-
},
|
|
890
|
-
};
|
|
891
|
-
|
|
892
|
-
if (numLayers > 0) {
|
|
893
|
-
const overrides = [];
|
|
894
|
-
for (let layerIdx = 0; layerIdx < numLayers; layerIdx++) {
|
|
895
|
-
const decodeLayerSteps = buildLayerPhaseSteps(steps, 'decode', layerIdx);
|
|
896
|
-
const prefillLayerSteps = buildLayerPhaseSteps(steps, 'prefill', layerIdx);
|
|
897
|
-
const hasCustomDecode = JSON.stringify(decodeLayerSteps) !== JSON.stringify(path.decode.steps);
|
|
898
|
-
const hasCustomPrefill = JSON.stringify(prefillLayerSteps) !== JSON.stringify(path.prefill.steps);
|
|
899
|
-
if (!hasCustomDecode && !hasCustomPrefill) continue;
|
|
900
|
-
// Kernel path layerOverrides are single-step lists per layer.
|
|
901
|
-
const mergedLayerSteps = decodeLayerSteps.length > 0
|
|
902
|
-
? decodeLayerSteps
|
|
903
|
-
: prefillLayerSteps;
|
|
904
|
-
if (mergedLayerSteps.length > 0) {
|
|
905
|
-
overrides.push({
|
|
906
|
-
layers: [layerIdx],
|
|
907
|
-
steps: mergedLayerSteps,
|
|
908
|
-
});
|
|
909
|
-
}
|
|
910
|
-
}
|
|
911
|
-
if (overrides.length > 0) {
|
|
912
|
-
path.layerOverrides = overrides;
|
|
913
|
-
}
|
|
914
|
-
}
|
|
915
|
-
|
|
916
|
-
const preLayer = getSectionSteps(steps, 'preLayer');
|
|
917
|
-
if (preLayer.length > 0) {
|
|
918
|
-
path.preLayer = preLayer;
|
|
919
|
-
}
|
|
920
|
-
const postLayer = getSectionSteps(steps, 'postLayer');
|
|
921
|
-
if (postLayer.length > 0) {
|
|
922
|
-
path.postLayer = postLayer;
|
|
923
|
-
}
|
|
924
|
-
const sampling = getSectionSteps(steps, 'sampling', 'decode');
|
|
925
|
-
if (sampling.length > 0) {
|
|
926
|
-
path.sampling = sampling;
|
|
927
|
-
}
|
|
928
|
-
|
|
929
|
-
assertInlineKernelPathSessionCompatibility(path, sessionDefaults);
|
|
930
|
-
return path;
|
|
931
|
-
}
|
|
932
|
-
|
|
933
|
-
function buildLayerPipelineFromExecution(steps) {
|
|
934
|
-
const layerSectionSteps = steps.filter((step) => step.section === 'layer');
|
|
935
|
-
if (layerSectionSteps.length === 0) {
|
|
936
|
-
return null;
|
|
937
|
-
}
|
|
938
|
-
if (layerSectionSteps.some((step) => !PIPELINE_COMPATIBLE_OPS.has(step.op))) {
|
|
939
|
-
return null;
|
|
940
|
-
}
|
|
941
|
-
|
|
942
|
-
const layerSteps = layerSectionSteps
|
|
943
|
-
.map((step) => ({
|
|
944
|
-
op: step.op,
|
|
945
|
-
phase: step.phase,
|
|
946
|
-
src: step.src ?? 'state',
|
|
947
|
-
dst: step.dst ?? 'state',
|
|
948
|
-
...(step.residual !== undefined ? { residual: step.residual } : {}),
|
|
949
|
-
...(step.a !== undefined ? { a: step.a } : {}),
|
|
950
|
-
...(step.b !== undefined ? { b: step.b } : {}),
|
|
951
|
-
...(step.variant !== undefined ? { variant: step.variant } : {}),
|
|
952
|
-
...(step.skipInputNorm !== undefined ? { skipInputNorm: step.skipInputNorm } : {}),
|
|
953
|
-
...(step.precision?.inputDtype ? { inputDtype: step.precision.inputDtype } : {}),
|
|
954
|
-
...(step.precision?.outputDtype ? { outputDtype: step.precision.outputDtype } : {}),
|
|
955
|
-
...(step.fromDtype ? { fromDtype: step.fromDtype } : {}),
|
|
956
|
-
...(step.toDtype ? { toDtype: step.toDtype } : {}),
|
|
957
|
-
...(step.probeStage ? { probeStage: step.probeStage } : {}),
|
|
958
|
-
...(step.name ? { name: step.name } : {}),
|
|
959
|
-
...(step.weight ? { weight: step.weight } : {}),
|
|
960
|
-
}));
|
|
961
|
-
|
|
962
|
-
return {
|
|
963
|
-
steps: layerSteps,
|
|
964
|
-
overrides: [],
|
|
965
|
-
};
|
|
966
|
-
}
|
|
967
|
-
|
|
968
|
-
function buildSessionRuntimePatch(sessionDefaults) {
|
|
969
|
-
const patch = {};
|
|
970
|
-
const computeDefaults = sessionDefaults?.compute?.defaults ?? null;
|
|
971
|
-
const computePatch = {};
|
|
972
|
-
const activationDtype = computeDefaults?.activationDtype;
|
|
973
|
-
if (activationDtype) {
|
|
974
|
-
computePatch.activationDtype = activationDtype;
|
|
975
|
-
}
|
|
976
|
-
if (computeDefaults && (computeDefaults.mathDtype || computeDefaults.accumDtype || computeDefaults.outputDtype)) {
|
|
977
|
-
computePatch.defaults = {
|
|
978
|
-
...(computeDefaults.mathDtype ? { mathDtype: computeDefaults.mathDtype } : {}),
|
|
979
|
-
...(computeDefaults.accumDtype ? { accumDtype: computeDefaults.accumDtype } : {}),
|
|
980
|
-
...(computeDefaults.outputDtype ? { outputDtype: computeDefaults.outputDtype } : {}),
|
|
981
|
-
};
|
|
982
|
-
}
|
|
983
|
-
if (Object.keys(computePatch).length > 0) {
|
|
984
|
-
patch.compute = computePatch;
|
|
985
|
-
}
|
|
986
|
-
if (sessionDefaults?.kvcache) {
|
|
987
|
-
patch.kvcache = sessionDefaults.kvcache;
|
|
988
|
-
}
|
|
989
|
-
if (sessionDefaults?.decodeLoop) {
|
|
990
|
-
patch.batching = {
|
|
991
|
-
batchSize: sessionDefaults.decodeLoop.batchSize,
|
|
992
|
-
stopCheckMode: sessionDefaults.decodeLoop.stopCheckMode,
|
|
993
|
-
readbackInterval: sessionDefaults.decodeLoop.readbackInterval,
|
|
994
|
-
ringTokens: sessionDefaults.decodeLoop.ringTokens,
|
|
995
|
-
ringStop: sessionDefaults.decodeLoop.ringStop,
|
|
996
|
-
ringStaging: sessionDefaults.decodeLoop.ringStaging,
|
|
997
|
-
};
|
|
998
|
-
}
|
|
999
|
-
return patch;
|
|
1000
|
-
}
|
|
1001
|
-
|
|
1002
|
-
function buildModelRuntimeOverrides(manifestInference) {
|
|
1003
|
-
const model = manifestInference?.model;
|
|
1004
|
-
if (!model || typeof model !== 'object') {
|
|
1005
|
-
return null;
|
|
1006
|
-
}
|
|
1007
|
-
return cloneJson(model);
|
|
1008
|
-
}
|
|
8
|
+
import {
|
|
9
|
+
applyExecutionPatchAtomic,
|
|
10
|
+
assertExecutionRuntimeOverlay,
|
|
11
|
+
assertExecutionV0Schema,
|
|
12
|
+
assertKVLayoutExecutionCompatibility,
|
|
13
|
+
collectLeafPaths,
|
|
14
|
+
createInitialSlotDtypes,
|
|
15
|
+
createSourceTrace,
|
|
16
|
+
hasDefinedPath,
|
|
17
|
+
indexKernelProfiles,
|
|
18
|
+
indexRuntimePatchMeta,
|
|
19
|
+
normalizeRuntimeSessionForExecutionV0,
|
|
20
|
+
resolvePhaseSteps,
|
|
21
|
+
setSourceTrace,
|
|
22
|
+
validateManifestSessionDefaultsContract,
|
|
23
|
+
validatePhaseBoundaryCompatibility,
|
|
24
|
+
validateStepShape,
|
|
25
|
+
validateUniqueStepIds,
|
|
26
|
+
cloneJson,
|
|
27
|
+
} from './execution-v0-contract-helpers.js';
|
|
28
|
+
import {
|
|
29
|
+
buildInlineKernelPath,
|
|
30
|
+
buildLayerPipelineFromExecution,
|
|
31
|
+
buildModelRuntimeOverrides,
|
|
32
|
+
buildSessionRuntimePatch,
|
|
33
|
+
resolveFinitenessFallbackKernelPathId,
|
|
34
|
+
} from './execution-v0-runtime-builders.js';
|
|
1009
35
|
|
|
1010
36
|
export function hasExecutionV0(manifestInference) {
|
|
1011
37
|
return !!manifestInference?.execution && Array.isArray(manifestInference.execution.steps);
|
|
@@ -1017,6 +43,7 @@ export function compileExecutionV0(options = {}) {
|
|
|
1017
43
|
return null;
|
|
1018
44
|
}
|
|
1019
45
|
assertExecutionV0Schema(manifestInference);
|
|
46
|
+
validateManifestSessionDefaultsContract(manifestInference);
|
|
1020
47
|
|
|
1021
48
|
const modelId = options.modelId ?? 'model';
|
|
1022
49
|
const numLayers = Number.isInteger(options.numLayers) ? options.numLayers : 0;
|
|
@@ -1028,7 +55,8 @@ export function compileExecutionV0(options = {}) {
|
|
|
1028
55
|
};
|
|
1029
56
|
const normalizedRuntimeSession = normalizeRuntimeSessionForExecutionV0(
|
|
1030
57
|
runtimeInference.session ?? {},
|
|
1031
|
-
manifestInference
|
|
58
|
+
manifestInference,
|
|
59
|
+
DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS
|
|
1032
60
|
);
|
|
1033
61
|
const sessionDefaults = mergeRuntimeValues(
|
|
1034
62
|
DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
|
|
@@ -1115,14 +143,7 @@ export function compileExecutionV0(options = {}) {
|
|
|
1115
143
|
&& manifestInference.defaultKernelPath.trim().length > 0
|
|
1116
144
|
? manifestInference.defaultKernelPath.trim()
|
|
1117
145
|
: null;
|
|
1118
|
-
const finitenessFallbackKernelPathId = defaultKernelPathId
|
|
1119
|
-
? selectRuleValue(
|
|
1120
|
-
'inference',
|
|
1121
|
-
'kernelPath',
|
|
1122
|
-
'finitenessFallback',
|
|
1123
|
-
{ kernelPathId: defaultKernelPathId }
|
|
1124
|
-
)
|
|
1125
|
-
: null;
|
|
146
|
+
const finitenessFallbackKernelPathId = resolveFinitenessFallbackKernelPathId(defaultKernelPathId);
|
|
1126
147
|
|
|
1127
148
|
const kernelPath = buildInlineKernelPath(
|
|
1128
149
|
patchedSteps,
|
|
@@ -1167,13 +188,23 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
|
|
|
1167
188
|
}
|
|
1168
189
|
|
|
1169
190
|
const runtimeInference = runtimeConfig.inference ?? {};
|
|
191
|
+
const kernelPathExecution = runtimeInference.kernelPath !== undefined
|
|
192
|
+
? buildExecutionV0FromKernelPath(runtimeInference.kernelPath)
|
|
193
|
+
: null;
|
|
194
|
+
const manifestInference = kernelPathExecution
|
|
195
|
+
? {
|
|
196
|
+
...manifest.inference,
|
|
197
|
+
...kernelPathExecution,
|
|
198
|
+
defaultKernelPath: runtimeInference.kernelPath,
|
|
199
|
+
}
|
|
200
|
+
: manifest.inference;
|
|
1170
201
|
const runtimeExecutionOverlay = {
|
|
1171
202
|
...(runtimeInference.session ? { session: runtimeInference.session } : {}),
|
|
1172
203
|
...(runtimeInference.executionPatch ? { executionPatch: runtimeInference.executionPatch } : {}),
|
|
1173
204
|
};
|
|
1174
205
|
|
|
1175
206
|
const executionV0State = compileExecutionV0({
|
|
1176
|
-
manifestInference
|
|
207
|
+
manifestInference,
|
|
1177
208
|
runtimeInference: runtimeExecutionOverlay,
|
|
1178
209
|
modelId: options.modelId ?? manifest.modelId ?? 'model',
|
|
1179
210
|
numLayers: Number.isInteger(options.numLayers)
|
|
@@ -1184,7 +215,13 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
|
|
|
1184
215
|
return { runtimeConfig, executionV0State: null };
|
|
1185
216
|
}
|
|
1186
217
|
|
|
218
|
+
const compiledKernelPathSource = runtimeInference.kernelPath !== undefined
|
|
219
|
+
? 'config'
|
|
220
|
+
: 'manifest';
|
|
1187
221
|
const runtimeInferencePatch = { ...executionV0State.runtimeInferencePatch };
|
|
222
|
+
if (runtimeInferencePatch.kernelPathSource) {
|
|
223
|
+
runtimeInferencePatch.kernelPathSource = compiledKernelPathSource;
|
|
224
|
+
}
|
|
1188
225
|
if (runtimeInference.kernelPath !== undefined) {
|
|
1189
226
|
delete runtimeInferencePatch.kernelPath;
|
|
1190
227
|
delete runtimeInferencePatch.kernelPathSource;
|
|
@@ -1195,6 +232,18 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
|
|
|
1195
232
|
runtimeInference.modelOverrides ?? {}
|
|
1196
233
|
);
|
|
1197
234
|
}
|
|
235
|
+
if (runtimeInference.kernelPath !== undefined && runtimeInference.compute) {
|
|
236
|
+
runtimeInferencePatch.compute = mergeRuntimeValues(
|
|
237
|
+
runtimeInferencePatch.compute ?? {},
|
|
238
|
+
runtimeInference.compute
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
if (runtimeInference.kernelPath !== undefined && runtimeInference.kvcache) {
|
|
242
|
+
runtimeInferencePatch.kvcache = mergeRuntimeValues(
|
|
243
|
+
runtimeInferencePatch.kvcache ?? {},
|
|
244
|
+
runtimeInference.kvcache
|
|
245
|
+
);
|
|
246
|
+
}
|
|
1198
247
|
|
|
1199
248
|
return {
|
|
1200
249
|
runtimeConfig: {
|