@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
|
@@ -1,1007 +1,37 @@
|
|
|
1
1
|
import { mergeRuntimeValues } from '../../../config/runtime-merge.js';
|
|
2
|
+
import { buildExecutionV0FromKernelPath } from '../../../converter/execution-v0-manifest.js';
|
|
2
3
|
import {
|
|
3
|
-
|
|
4
|
-
indexExecutionV0KernelProfiles,
|
|
5
|
-
normalizeExecutionV0Dtype,
|
|
6
|
-
resolveExecutionV0KernelProfile,
|
|
7
|
-
resolveExecutionV0KVIO,
|
|
8
|
-
resolveExecutionV0Precision,
|
|
9
|
-
} from '../../../config/execution-v0-contract-check.js';
|
|
10
|
-
import {
|
|
11
|
-
EXECUTION_V0_SCHEMA_ID,
|
|
4
|
+
DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS,
|
|
12
5
|
DEFAULT_EXECUTION_V0_POLICIES,
|
|
13
6
|
DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
|
|
14
|
-
isExecutionV0Digest,
|
|
15
|
-
isExecutionV0Semver,
|
|
16
7
|
} from '../../../config/schema/execution-v0.schema.js';
|
|
17
|
-
import {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
}
|
|
45
|
-
const outputDtype = config?.outputDtype;
|
|
46
|
-
if (typeof outputDtype === 'string' && outputDtype.length > 0) {
|
|
47
|
-
byKernelEntry.get(key).add(String(outputDtype).toLowerCase());
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
return byKernelEntry;
|
|
52
|
-
})();
|
|
53
|
-
|
|
54
|
-
function getKernelOutputCapabilities(step) {
|
|
55
|
-
const kernel = String(step?.kernel ?? '').trim();
|
|
56
|
-
const entry = String(step?.entry ?? 'main').trim() || 'main';
|
|
57
|
-
if (!kernel) {
|
|
58
|
-
return null;
|
|
59
|
-
}
|
|
60
|
-
return KERNEL_OUTPUT_CAPABILITIES.get(`${kernel}#${entry}`) ?? null;
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
function cloneJson(value) {
|
|
64
|
-
if (typeof structuredClone === 'function') {
|
|
65
|
-
return structuredClone(value);
|
|
66
|
-
}
|
|
67
|
-
return JSON.parse(JSON.stringify(value));
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const normalizeDtype = normalizeExecutionV0Dtype;
|
|
71
|
-
const resolvePrecision = resolveExecutionV0Precision;
|
|
72
|
-
const resolveKVIO = resolveExecutionV0KVIO;
|
|
73
|
-
|
|
74
|
-
function normalizePhase(value, label) {
|
|
75
|
-
const normalized = String(value ?? '').trim().toLowerCase();
|
|
76
|
-
if (normalized !== 'prefill' && normalized !== 'decode' && normalized !== 'both') {
|
|
77
|
-
throw new Error(`[ExecutionV0] ${label} must be prefill|decode|both; got "${value}"`);
|
|
78
|
-
}
|
|
79
|
-
return normalized;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function normalizeSection(value, label) {
|
|
83
|
-
const normalized = String(value ?? '').trim();
|
|
84
|
-
if (!['preLayer', 'layer', 'postLayer', 'sampling'].includes(normalized)) {
|
|
85
|
-
throw new Error(`[ExecutionV0] ${label} must be preLayer|layer|postLayer|sampling; got "${value}"`);
|
|
86
|
-
}
|
|
87
|
-
return normalized;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
function normalizeKVLayout(value, label) {
|
|
91
|
-
if (value == null) {
|
|
92
|
-
return null;
|
|
93
|
-
}
|
|
94
|
-
const normalized = String(value).trim().toLowerCase();
|
|
95
|
-
if (!normalized) {
|
|
96
|
-
return null;
|
|
97
|
-
}
|
|
98
|
-
return normalized;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
function assertKernelRef(kernelRef, label) {
|
|
102
|
-
if (!kernelRef) return;
|
|
103
|
-
if (typeof kernelRef.id !== 'string' || kernelRef.id.trim().length === 0) {
|
|
104
|
-
throw new Error(`[ExecutionV0] ${label}.id is required`);
|
|
105
|
-
}
|
|
106
|
-
if (!isExecutionV0Semver(kernelRef.version)) {
|
|
107
|
-
throw new Error(`[ExecutionV0] ${label}.version must be semver; got "${kernelRef.version}"`);
|
|
108
|
-
}
|
|
109
|
-
if (!isExecutionV0Digest(kernelRef.digest)) {
|
|
110
|
-
throw new Error(`[ExecutionV0] ${label}.digest must match sha256:<64-hex>`);
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
function isPhaseMatch(phase, targetPhase) {
|
|
115
|
-
return phase === 'both' || phase === targetPhase;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
function stepHasLayer(step, layerIdx) {
|
|
119
|
-
if (step.layers === 'all') return true;
|
|
120
|
-
if (!Array.isArray(step.layers)) return false;
|
|
121
|
-
return step.layers.includes(layerIdx);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
const buildKernelProfileKey = buildExecutionV0KernelProfileKey;
|
|
125
|
-
|
|
126
|
-
function normalizeSlot(value, label) {
|
|
127
|
-
if (typeof value !== 'string' || value.trim().length === 0) {
|
|
128
|
-
throw new Error(`[ExecutionV0] ${label} must be a non-empty string`);
|
|
129
|
-
}
|
|
130
|
-
return value.trim();
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
function assertKernelPrecisionCapability(step, resolvedPrecision, policies) {
|
|
134
|
-
if (step.op === 'cast') {
|
|
135
|
-
return;
|
|
136
|
-
}
|
|
137
|
-
if (policies.unsupportedPrecision !== 'error') {
|
|
138
|
-
return;
|
|
139
|
-
}
|
|
140
|
-
const kernel = String(step.kernel ?? '').trim();
|
|
141
|
-
const entry = String(step.entry ?? 'main').trim() || 'main';
|
|
142
|
-
const supportedOutputDtypes = getKernelOutputCapabilities(step);
|
|
143
|
-
if (!supportedOutputDtypes) {
|
|
144
|
-
throw new Error(
|
|
145
|
-
`[ExecutionV0] step "${step.id}" kernel "${kernel}#${entry}" ` +
|
|
146
|
-
'is not present in kernel registry; cannot validate precision capability.'
|
|
147
|
-
);
|
|
148
|
-
}
|
|
149
|
-
if (supportedOutputDtypes.size === 0) {
|
|
150
|
-
// Some kernels do not declare output dtype metadata yet; treat as unknown.
|
|
151
|
-
return;
|
|
152
|
-
}
|
|
153
|
-
const outputDtype = normalizeDtype(resolvedPrecision.outputDtype, `${step.id}.precision.outputDtype`);
|
|
154
|
-
if (!supportedOutputDtypes.has(outputDtype)) {
|
|
155
|
-
throw new Error(
|
|
156
|
-
`[ExecutionV0] step "${step.id}" outputDtype=${outputDtype} is unsupported by ` +
|
|
157
|
-
`kernel "${kernel}#${entry}" (supported: ${[...supportedOutputDtypes].join(', ') || 'none'}).`
|
|
158
|
-
);
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
function createSourceTrace() {
|
|
163
|
-
return {
|
|
164
|
-
session: {},
|
|
165
|
-
steps: {},
|
|
166
|
-
};
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
function setSourceTrace(trace, path, source) {
|
|
170
|
-
if (!trace || typeof path !== 'string' || path.length === 0) return;
|
|
171
|
-
trace[path] = { source };
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
function setStepSourceTrace(trace, stepId, path, source) {
|
|
175
|
-
if (!trace || !stepId || !path) return;
|
|
176
|
-
if (!trace.steps[stepId]) {
|
|
177
|
-
trace.steps[stepId] = {};
|
|
178
|
-
}
|
|
179
|
-
trace.steps[stepId][path] = { source };
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
function isPlainObject(value) {
|
|
183
|
-
return value != null && typeof value === 'object' && !Array.isArray(value);
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
function collectLeafPaths(value, prefix = [], out = []) {
|
|
187
|
-
if (Array.isArray(value)) {
|
|
188
|
-
if (prefix.length > 0) {
|
|
189
|
-
out.push(prefix);
|
|
190
|
-
}
|
|
191
|
-
return out;
|
|
192
|
-
}
|
|
193
|
-
if (!isPlainObject(value)) {
|
|
194
|
-
if (prefix.length > 0) {
|
|
195
|
-
out.push(prefix);
|
|
196
|
-
}
|
|
197
|
-
return out;
|
|
198
|
-
}
|
|
199
|
-
for (const [key, child] of Object.entries(value)) {
|
|
200
|
-
collectLeafPaths(child, [...prefix, key], out);
|
|
201
|
-
}
|
|
202
|
-
return out;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
function hasDefinedPath(root, pathSegments) {
|
|
206
|
-
let current = root;
|
|
207
|
-
for (const segment of pathSegments) {
|
|
208
|
-
if (!isPlainObject(current) || !Object.prototype.hasOwnProperty.call(current, segment)) {
|
|
209
|
-
return false;
|
|
210
|
-
}
|
|
211
|
-
current = current[segment];
|
|
212
|
-
}
|
|
213
|
-
return current !== undefined;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
const indexKernelProfiles = indexExecutionV0KernelProfiles;
|
|
217
|
-
|
|
218
|
-
function resolveProfile(profileIndex, step) {
|
|
219
|
-
return resolveExecutionV0KernelProfile(profileIndex, step);
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
function validateStepShape(step, index) {
|
|
223
|
-
if (!step || typeof step !== 'object') {
|
|
224
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}] must be an object`);
|
|
225
|
-
}
|
|
226
|
-
if (typeof step.id !== 'string' || step.id.trim().length === 0) {
|
|
227
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}].id is required`);
|
|
228
|
-
}
|
|
229
|
-
if (typeof step.op !== 'string' || step.op.trim().length === 0) {
|
|
230
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}].op is required`);
|
|
231
|
-
}
|
|
232
|
-
normalizePhase(step.phase, `execution.steps[${index}].phase`);
|
|
233
|
-
normalizeSection(step.section, `execution.steps[${index}].section`);
|
|
234
|
-
normalizeSlot(step.src, `execution.steps[${index}].src`);
|
|
235
|
-
normalizeSlot(step.dst, `execution.steps[${index}].dst`);
|
|
236
|
-
if (step.layers !== 'all' && !Array.isArray(step.layers)) {
|
|
237
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}].layers must be "all" or number[]`);
|
|
238
|
-
}
|
|
239
|
-
if (step.layers !== 'all') {
|
|
240
|
-
for (const layer of step.layers) {
|
|
241
|
-
if (!Number.isInteger(layer) || layer < 0) {
|
|
242
|
-
throw new Error(`[ExecutionV0] execution.steps[${index}].layers must contain non-negative integers`);
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
if (step.op === 'cast') {
|
|
247
|
-
normalizeDtype(step.toDtype, `execution.steps[${index}].toDtype`);
|
|
248
|
-
if (step.fromDtype != null) {
|
|
249
|
-
normalizeDtype(step.fromDtype, `execution.steps[${index}].fromDtype`);
|
|
250
|
-
}
|
|
251
|
-
} else {
|
|
252
|
-
if (typeof step.kernel !== 'string' || step.kernel.trim().length === 0) {
|
|
253
|
-
throw new Error(
|
|
254
|
-
`[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernel (non-cast op)`
|
|
255
|
-
);
|
|
256
|
-
}
|
|
257
|
-
if (!step.kernelRef || typeof step.kernelRef !== 'object' || Array.isArray(step.kernelRef)) {
|
|
258
|
-
throw new Error(
|
|
259
|
-
`[ExecutionV0] execution.steps[${index}] "${step.id}" requires kernelRef {id, version, digest} (non-cast op)`
|
|
260
|
-
);
|
|
261
|
-
}
|
|
262
|
-
assertKernelRef(step.kernelRef, `execution.steps[${index}].kernelRef`);
|
|
263
|
-
const entry = String(step.entry ?? 'main').trim() || 'main';
|
|
264
|
-
let expectedKernelRef;
|
|
265
|
-
try {
|
|
266
|
-
expectedKernelRef = buildKernelRefFromKernelEntry(step.kernel, entry);
|
|
267
|
-
} catch (error) {
|
|
268
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
269
|
-
throw new Error(
|
|
270
|
-
`[ExecutionV0] execution.steps[${index}] "${step.id}" kernel "${step.kernel}#${entry}" ` +
|
|
271
|
-
`cannot be content-pinned: ${message}`
|
|
272
|
-
);
|
|
273
|
-
}
|
|
274
|
-
if (!isKernelRefBoundToKernel(step.kernelRef, step.kernel, entry)) {
|
|
275
|
-
throw new Error(
|
|
276
|
-
`[ExecutionV0] execution.steps[${index}] "${step.id}" kernelRef does not match kernel binding ` +
|
|
277
|
-
`("${step.kernel}#${entry}"). Expected ${expectedKernelRef.id}@${expectedKernelRef.version} ${expectedKernelRef.digest}.`
|
|
278
|
-
);
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
function assertExecutionRuntimeOverlay(runtimeInference) {
|
|
284
|
-
if (!runtimeInference || typeof runtimeInference !== 'object') {
|
|
285
|
-
return;
|
|
286
|
-
}
|
|
287
|
-
const unknownKeys = Object.keys(runtimeInference).filter((key) => !EXECUTION_V0_RUNTIME_KEYS.has(key));
|
|
288
|
-
if (unknownKeys.length > 0) {
|
|
289
|
-
throw new Error(
|
|
290
|
-
`[ExecutionV0] runtime.inference overlay supports only ${[...EXECUTION_V0_RUNTIME_KEYS].join(', ')}; ` +
|
|
291
|
-
`got unsupported keys: ${unknownKeys.join(', ')}.`
|
|
292
|
-
);
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
function validateUniqueStepIds(steps) {
|
|
297
|
-
const ids = new Set();
|
|
298
|
-
for (const step of steps) {
|
|
299
|
-
if (ids.has(step.id)) {
|
|
300
|
-
throw new Error(`[ExecutionV0] duplicate step id "${step.id}"`);
|
|
301
|
-
}
|
|
302
|
-
ids.add(step.id);
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
function assertExecutionV0Schema(manifestInference) {
|
|
307
|
-
if (!hasExecutionV0(manifestInference)) return;
|
|
308
|
-
const discriminator = manifestInference?.schema ?? null;
|
|
309
|
-
if (discriminator !== EXECUTION_V0_SCHEMA_ID) {
|
|
310
|
-
throw new Error(
|
|
311
|
-
`[ExecutionV0] manifest.inference.schema must be "${EXECUTION_V0_SCHEMA_ID}" ` +
|
|
312
|
-
`when execution is present; got "${discriminator}".`
|
|
313
|
-
);
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
function applyExecutionPatchAtomic(baseSteps, patch) {
|
|
318
|
-
if (!patch) {
|
|
319
|
-
return baseSteps;
|
|
320
|
-
}
|
|
321
|
-
const steps = cloneJson(baseSteps);
|
|
322
|
-
const byId = new Map(steps.map((step, index) => [step.id, index]));
|
|
323
|
-
|
|
324
|
-
for (const entry of patch.set ?? []) {
|
|
325
|
-
if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
|
|
326
|
-
throw new Error('[ExecutionV0] executionPatch.set entries require id');
|
|
327
|
-
}
|
|
328
|
-
if (!byId.has(entry.id)) {
|
|
329
|
-
throw new Error(`[ExecutionV0] executionPatch.set target "${entry.id}" does not exist`);
|
|
330
|
-
}
|
|
331
|
-
for (const key of Object.keys(entry)) {
|
|
332
|
-
if (key === 'id') continue;
|
|
333
|
-
if (!PATCH_SET_MUTABLE_FIELDS.has(key)) {
|
|
334
|
-
throw new Error(`[ExecutionV0] executionPatch.set "${entry.id}" cannot mutate "${key}"`);
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
for (const entry of patch.remove ?? []) {
|
|
340
|
-
if (!entry || typeof entry !== 'object' || typeof entry.id !== 'string') {
|
|
341
|
-
throw new Error('[ExecutionV0] executionPatch.remove entries require id');
|
|
342
|
-
}
|
|
343
|
-
if (!byId.has(entry.id)) {
|
|
344
|
-
throw new Error(`[ExecutionV0] executionPatch.remove target "${entry.id}" does not exist`);
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
for (const entry of patch.set ?? []) {
|
|
349
|
-
const index = byId.get(entry.id);
|
|
350
|
-
const target = steps[index];
|
|
351
|
-
if (entry.precision !== undefined) target.precision = cloneJson(entry.precision);
|
|
352
|
-
if (entry.kvIO !== undefined) target.kvIO = cloneJson(entry.kvIO);
|
|
353
|
-
if (entry.constants !== undefined) target.constants = cloneJson(entry.constants);
|
|
354
|
-
if (entry.entry !== undefined) target.entry = entry.entry;
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
const removeIds = new Set((patch.remove ?? []).map((entry) => entry.id));
|
|
358
|
-
const removedSteps = steps.filter((step) => !removeIds.has(step.id));
|
|
359
|
-
|
|
360
|
-
let current = removedSteps;
|
|
361
|
-
const insertedAfterAnchors = new Map();
|
|
362
|
-
for (const entry of patch.add ?? []) {
|
|
363
|
-
if (!entry?.step || typeof entry.step !== 'object') {
|
|
364
|
-
throw new Error('[ExecutionV0] executionPatch.add requires a step payload');
|
|
365
|
-
}
|
|
366
|
-
const hasBefore = typeof entry.insertBefore === 'string' && entry.insertBefore.length > 0;
|
|
367
|
-
const hasAfter = typeof entry.insertAfter === 'string' && entry.insertAfter.length > 0;
|
|
368
|
-
if (hasBefore === hasAfter) {
|
|
369
|
-
throw new Error('[ExecutionV0] executionPatch.add requires exactly one of insertBefore or insertAfter');
|
|
370
|
-
}
|
|
371
|
-
if (current.some((step) => step.id === entry.step.id)) {
|
|
372
|
-
throw new Error(`[ExecutionV0] executionPatch.add step id "${entry.step.id}" already exists`);
|
|
373
|
-
}
|
|
374
|
-
const anchorId = hasBefore ? entry.insertBefore : entry.insertAfter;
|
|
375
|
-
const anchorIndex = current.findIndex((step) => step.id === anchorId);
|
|
376
|
-
if (anchorIndex < 0) {
|
|
377
|
-
throw new Error(`[ExecutionV0] executionPatch.add anchor "${anchorId}" not found`);
|
|
378
|
-
}
|
|
379
|
-
let insertIndex = hasBefore ? anchorIndex : anchorIndex + 1;
|
|
380
|
-
if (!hasBefore) {
|
|
381
|
-
const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
|
|
382
|
-
while (insertIndex < current.length && insertedIds.includes(current[insertIndex].id)) {
|
|
383
|
-
insertIndex += 1;
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
current = [
|
|
387
|
-
...current.slice(0, insertIndex),
|
|
388
|
-
cloneJson(entry.step),
|
|
389
|
-
...current.slice(insertIndex),
|
|
390
|
-
];
|
|
391
|
-
if (!hasBefore) {
|
|
392
|
-
const insertedIds = insertedAfterAnchors.get(anchorId) ?? [];
|
|
393
|
-
insertedIds.push(entry.step.id);
|
|
394
|
-
insertedAfterAnchors.set(anchorId, insertedIds);
|
|
395
|
-
}
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
validateUniqueStepIds(current);
|
|
399
|
-
return current;
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
function indexRuntimePatchMeta(patch) {
|
|
403
|
-
const meta = {
|
|
404
|
-
addedSteps: new Set(),
|
|
405
|
-
precisionFieldsByStep: new Map(),
|
|
406
|
-
kvIOFieldsByStep: new Set(),
|
|
407
|
-
};
|
|
408
|
-
if (!patch || typeof patch !== 'object') {
|
|
409
|
-
return meta;
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
for (const add of patch.add ?? []) {
|
|
413
|
-
const stepId = add?.step?.id;
|
|
414
|
-
if (typeof stepId === 'string' && stepId.length > 0) {
|
|
415
|
-
meta.addedSteps.add(stepId);
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
for (const set of patch.set ?? []) {
|
|
420
|
-
const stepId = set?.id;
|
|
421
|
-
if (typeof stepId !== 'string' || stepId.length === 0) continue;
|
|
422
|
-
if (set.precision && typeof set.precision === 'object') {
|
|
423
|
-
meta.precisionFieldsByStep.set(stepId, new Set(Object.keys(set.precision)));
|
|
424
|
-
}
|
|
425
|
-
if (set.kvIO && typeof set.kvIO === 'object') {
|
|
426
|
-
meta.kvIOFieldsByStep.add(stepId);
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
return meta;
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
function createInitialSlotDtypes(sessionDefaults) {
|
|
433
|
-
const activationDefault = normalizeDtype(
|
|
434
|
-
sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
|
|
435
|
-
'sessionDefaults.compute.defaults.activationDtype'
|
|
436
|
-
);
|
|
437
|
-
return new Map([['state', activationDefault]]);
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
function ensureCompatibleKV(step, kvIO, sessionDefaults) {
|
|
441
|
-
if (step.op !== 'attention' || !kvIO) {
|
|
442
|
-
return;
|
|
443
|
-
}
|
|
444
|
-
const runtimeKvDtypeRaw = sessionDefaults?.kvcache?.kvDtype;
|
|
445
|
-
if (runtimeKvDtypeRaw == null) {
|
|
446
|
-
return;
|
|
447
|
-
}
|
|
448
|
-
const runtimeKvDtype = normalizeDtype(runtimeKvDtypeRaw, 'sessionDefaults.kvcache.kvDtype');
|
|
449
|
-
if (kvIO.readDtype !== runtimeKvDtype || kvIO.writeDtype !== runtimeKvDtype) {
|
|
450
|
-
throw new Error(
|
|
451
|
-
`[ExecutionV0] step "${step.id}" kvIO read/write (${kvIO.readDtype}/${kvIO.writeDtype}) ` +
|
|
452
|
-
`must match sessionDefaults.kvcache.kvDtype (${runtimeKvDtype}).`
|
|
453
|
-
);
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
function resolvePhaseSteps(phase, steps, sessionDefaults, profileIndex, policies, options = {}) {
|
|
458
|
-
const slotDtypes = options.initialSlotDtypes
|
|
459
|
-
? new Map(options.initialSlotDtypes)
|
|
460
|
-
: createInitialSlotDtypes(sessionDefaults);
|
|
461
|
-
const resolved = [];
|
|
462
|
-
const sourceTrace = options.sourceTrace ?? null;
|
|
463
|
-
const sessionDefaultSources = options.sessionDefaultSources ?? {};
|
|
464
|
-
const runtimePatchMeta = options.runtimePatchMeta ?? {
|
|
465
|
-
addedSteps: new Set(),
|
|
466
|
-
precisionFieldsByStep: new Map(),
|
|
467
|
-
kvIOFieldsByStep: new Set(),
|
|
468
|
-
};
|
|
469
|
-
|
|
470
|
-
for (const step of steps) {
|
|
471
|
-
const stepPhase = normalizePhase(step.phase, `${step.id}.phase`);
|
|
472
|
-
if (!isPhaseMatch(stepPhase, phase)) continue;
|
|
473
|
-
const profile = resolveProfile(profileIndex, step);
|
|
474
|
-
if (
|
|
475
|
-
step.kernelRef
|
|
476
|
-
&& !profile
|
|
477
|
-
&& policies.unresolvedKernel === 'error'
|
|
478
|
-
) {
|
|
479
|
-
throw new Error(
|
|
480
|
-
`[ExecutionV0] step "${step.id}" references kernel profile ` +
|
|
481
|
-
`${step.kernelRef.id}@${step.kernelRef.version} (${step.kernelRef.digest}) ` +
|
|
482
|
-
'but no matching sessionDefaults.compute.kernelProfiles entry was found.'
|
|
483
|
-
);
|
|
484
|
-
}
|
|
485
|
-
const { precision, sources: precisionSources } = resolvePrecision(step, profile, sessionDefaults);
|
|
486
|
-
const src = normalizeSlot(step.src, `${step.id}.src`);
|
|
487
|
-
const dst = normalizeSlot(step.dst, `${step.id}.dst`);
|
|
488
|
-
if (!slotDtypes.has(src)) {
|
|
489
|
-
throw new Error(
|
|
490
|
-
`[ExecutionV0] step "${step.id}" reads slot "${src}" before it is produced. ` +
|
|
491
|
-
'Add an explicit producer step or cast/load bridge.'
|
|
492
|
-
);
|
|
493
|
-
}
|
|
494
|
-
const derivedInput = slotDtypes.get(src);
|
|
495
|
-
const inputDtype = normalizeDtype(precision.inputDtype ?? derivedInput, `${step.id}.precision.inputDtype`);
|
|
496
|
-
|
|
497
|
-
if (
|
|
498
|
-
policies.dtypeTransition === 'require_cast_step'
|
|
499
|
-
&& step.op !== 'cast'
|
|
500
|
-
&& inputDtype !== derivedInput
|
|
501
|
-
) {
|
|
502
|
-
throw new Error(
|
|
503
|
-
`[ExecutionV0] step "${step.id}" requires inputDtype=${inputDtype} ` +
|
|
504
|
-
`but slot "${src}" currently holds ${derivedInput}. Insert explicit cast step.`
|
|
505
|
-
);
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
let outputDtype = normalizeDtype(precision.outputDtype, `${step.id}.precision.outputDtype`);
|
|
509
|
-
let outputDtypeSource = precisionSources.outputDtype;
|
|
510
|
-
if (step.op !== 'cast' && outputDtypeSource === 'sessionDefault') {
|
|
511
|
-
const declaredOutputDtypes = getKernelOutputCapabilities(step);
|
|
512
|
-
if (declaredOutputDtypes && declaredOutputDtypes.size === 1) {
|
|
513
|
-
outputDtype = [...declaredOutputDtypes][0];
|
|
514
|
-
outputDtypeSource = 'derived';
|
|
515
|
-
}
|
|
516
|
-
}
|
|
517
|
-
if (step.op === 'cast') {
|
|
518
|
-
outputDtype = normalizeDtype(step.toDtype, `${step.id}.toDtype`);
|
|
519
|
-
outputDtypeSource = 'manifest';
|
|
520
|
-
const fromDtype = step.fromDtype
|
|
521
|
-
? normalizeDtype(step.fromDtype, `${step.id}.fromDtype`)
|
|
522
|
-
: derivedInput;
|
|
523
|
-
if (fromDtype !== derivedInput) {
|
|
524
|
-
throw new Error(
|
|
525
|
-
`[ExecutionV0] cast step "${step.id}" fromDtype=${fromDtype} does not match slot "${src}" dtype=${derivedInput}`
|
|
526
|
-
);
|
|
527
|
-
}
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
const resolvedPrecision = {
|
|
531
|
-
inputDtype,
|
|
532
|
-
mathDtype: normalizeDtype(precision.mathDtype, `${step.id}.precision.mathDtype`),
|
|
533
|
-
accumDtype: normalizeDtype(precision.accumDtype, `${step.id}.precision.accumDtype`),
|
|
534
|
-
outputDtype,
|
|
535
|
-
};
|
|
536
|
-
assertKernelPrecisionCapability(step, resolvedPrecision, policies);
|
|
537
|
-
slotDtypes.set(dst, outputDtype);
|
|
538
|
-
|
|
539
|
-
const kvIOResolved = step.op === 'attention'
|
|
540
|
-
? resolveKVIO(step, profile, sessionDefaults)
|
|
541
|
-
: null;
|
|
542
|
-
const kvIO = kvIOResolved?.value ?? null;
|
|
543
|
-
ensureCompatibleKV(step, kvIO, sessionDefaults);
|
|
544
|
-
|
|
545
|
-
if (sourceTrace) {
|
|
546
|
-
const precisionFieldsPatched = runtimePatchMeta.precisionFieldsByStep.get(step.id) ?? new Set();
|
|
547
|
-
const isAddedStep = runtimePatchMeta.addedSteps.has(step.id);
|
|
548
|
-
const inputPatched = isAddedStep
|
|
549
|
-
? step.precision?.inputDtype != null
|
|
550
|
-
: precisionFieldsPatched.has('inputDtype');
|
|
551
|
-
const mathPatched = isAddedStep
|
|
552
|
-
? step.precision?.mathDtype != null
|
|
553
|
-
: precisionFieldsPatched.has('mathDtype');
|
|
554
|
-
const accumPatched = isAddedStep
|
|
555
|
-
? step.precision?.accumDtype != null
|
|
556
|
-
: precisionFieldsPatched.has('accumDtype');
|
|
557
|
-
const outputPatched = isAddedStep
|
|
558
|
-
? step.precision?.outputDtype != null
|
|
559
|
-
: precisionFieldsPatched.has('outputDtype');
|
|
560
|
-
const mathSource = precisionSources.mathDtype === 'sessionDefault'
|
|
561
|
-
? sessionDefaultSources.mathDtype ?? 'derived'
|
|
562
|
-
: precisionSources.mathDtype;
|
|
563
|
-
const accumSource = precisionSources.accumDtype === 'sessionDefault'
|
|
564
|
-
? sessionDefaultSources.accumDtype ?? 'derived'
|
|
565
|
-
: precisionSources.accumDtype;
|
|
566
|
-
const outputSource = precisionSources.outputDtype === 'sessionDefault'
|
|
567
|
-
? outputDtypeSource === 'sessionDefault'
|
|
568
|
-
? (sessionDefaultSources.outputDtype ?? 'derived')
|
|
569
|
-
: outputDtypeSource
|
|
570
|
-
: outputDtypeSource;
|
|
571
|
-
setStepSourceTrace(sourceTrace, step.id, 'precision.inputDtype',
|
|
572
|
-
inputPatched
|
|
573
|
-
? 'runtime.patch'
|
|
574
|
-
: precision.inputDtype != null
|
|
575
|
-
? precisionSources.inputDtype
|
|
576
|
-
: 'derived');
|
|
577
|
-
setStepSourceTrace(sourceTrace, step.id, 'precision.mathDtype', mathPatched ? 'runtime.patch' : mathSource);
|
|
578
|
-
setStepSourceTrace(sourceTrace, step.id, 'precision.accumDtype', accumPatched ? 'runtime.patch' : accumSource);
|
|
579
|
-
setStepSourceTrace(sourceTrace, step.id, 'precision.outputDtype', outputPatched ? 'runtime.patch' : outputSource);
|
|
580
|
-
if (step.op === 'attention') {
|
|
581
|
-
const kvPatched = runtimePatchMeta.kvIOFieldsByStep.has(step.id)
|
|
582
|
-
|| (isAddedStep && !!step.kvIO);
|
|
583
|
-
const kvSource = kvIOResolved?.source === 'sessionDefault'
|
|
584
|
-
? sessionDefaultSources.kvDtype ?? 'derived'
|
|
585
|
-
: kvIOResolved?.source ?? 'derived';
|
|
586
|
-
const resolvedKvSource = kvPatched ? 'runtime.patch' : kvSource;
|
|
587
|
-
setStepSourceTrace(sourceTrace, step.id, 'kvIO.readDtype', resolvedKvSource);
|
|
588
|
-
setStepSourceTrace(sourceTrace, step.id, 'kvIO.writeDtype', resolvedKvSource);
|
|
589
|
-
}
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
resolved.push({
|
|
593
|
-
...step,
|
|
594
|
-
src,
|
|
595
|
-
dst,
|
|
596
|
-
phase: stepPhase,
|
|
597
|
-
section: normalizeSection(step.section, `${step.id}.section`),
|
|
598
|
-
precision: resolvedPrecision,
|
|
599
|
-
kvIO,
|
|
600
|
-
});
|
|
601
|
-
}
|
|
602
|
-
|
|
603
|
-
return {
|
|
604
|
-
steps: resolved,
|
|
605
|
-
finalSlotDtypes: slotDtypes,
|
|
606
|
-
};
|
|
607
|
-
}
|
|
608
|
-
|
|
609
|
-
function stripPresetComputeDefaults(compute, manifestComputeDefaults) {
|
|
610
|
-
if (!compute?.defaults || !manifestComputeDefaults) {
|
|
611
|
-
return compute;
|
|
612
|
-
}
|
|
613
|
-
const dtypeKeys = ['activationDtype', 'mathDtype', 'accumDtype', 'outputDtype'];
|
|
614
|
-
const hasManifestDtype = dtypeKeys.some(
|
|
615
|
-
(key) => manifestComputeDefaults[key] !== undefined && manifestComputeDefaults[key] !== null
|
|
616
|
-
);
|
|
617
|
-
if (!hasManifestDtype) {
|
|
618
|
-
return compute;
|
|
619
|
-
}
|
|
620
|
-
const nextDefaults = { ...compute.defaults };
|
|
621
|
-
for (const key of dtypeKeys) {
|
|
622
|
-
if (manifestComputeDefaults[key] !== undefined && manifestComputeDefaults[key] !== null) {
|
|
623
|
-
delete nextDefaults[key];
|
|
624
|
-
}
|
|
625
|
-
}
|
|
626
|
-
if (Object.keys(nextDefaults).length === 0) {
|
|
627
|
-
const nextCompute = { ...compute };
|
|
628
|
-
delete nextCompute.defaults;
|
|
629
|
-
return Object.keys(nextCompute).length === 0 ? null : nextCompute;
|
|
630
|
-
}
|
|
631
|
-
return { ...compute, defaults: nextDefaults };
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
function normalizeRuntimeSessionForExecutionV0(runtimeSession, manifestInference) {
|
|
635
|
-
const manifestSessionDefaults = manifestInference?.sessionDefaults ?? null;
|
|
636
|
-
const manifestProfiles = manifestSessionDefaults?.compute?.kernelProfiles;
|
|
637
|
-
const hasManifestProfiles = Array.isArray(manifestProfiles) && manifestProfiles.length > 0;
|
|
638
|
-
const manifestComputeDefaults = manifestSessionDefaults?.compute?.defaults ?? null;
|
|
639
|
-
const hasManifestKVCache = manifestSessionDefaults?.kvcache != null;
|
|
640
|
-
const hasManifestDecodeLoop = manifestSessionDefaults?.decodeLoop != null;
|
|
641
|
-
|
|
642
|
-
if (!runtimeSession || typeof runtimeSession !== 'object') {
|
|
643
|
-
return runtimeSession;
|
|
644
|
-
}
|
|
645
|
-
|
|
646
|
-
let compute = runtimeSession.compute ?? null;
|
|
647
|
-
let kvcache = Object.prototype.hasOwnProperty.call(runtimeSession, 'kvcache')
|
|
648
|
-
? runtimeSession.kvcache
|
|
649
|
-
: undefined;
|
|
650
|
-
let decodeLoop = Object.prototype.hasOwnProperty.call(runtimeSession, 'decodeLoop')
|
|
651
|
-
? runtimeSession.decodeLoop
|
|
652
|
-
: undefined;
|
|
653
|
-
let changed = false;
|
|
654
|
-
|
|
655
|
-
// Strip preset compute dtype defaults when manifest provides model-specific values.
|
|
656
|
-
// default.json sets session.compute.defaults.activationDtype="f16" as a preset default.
|
|
657
|
-
// When the manifest declares its own compute dtypes (e.g. activationDtype="f32" for f32
|
|
658
|
-
// variants), the manifest must win. Only explicit user overrides (via --runtime-config-json
|
|
659
|
-
// or CLI flags) should take precedence, not baked-in preset values.
|
|
660
|
-
if (manifestComputeDefaults) {
|
|
661
|
-
const stripped = stripPresetComputeDefaults(compute, manifestComputeDefaults);
|
|
662
|
-
if (stripped !== compute) {
|
|
663
|
-
compute = stripped;
|
|
664
|
-
changed = true;
|
|
665
|
-
}
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
// Strip empty kernelProfiles when manifest provides them.
|
|
669
|
-
if (compute && Object.prototype.hasOwnProperty.call(compute, 'kernelProfiles')) {
|
|
670
|
-
const kernelProfiles = compute.kernelProfiles;
|
|
671
|
-
if (Array.isArray(kernelProfiles) && kernelProfiles.length === 0 && hasManifestProfiles) {
|
|
672
|
-
const nextCompute = { ...compute };
|
|
673
|
-
delete nextCompute.kernelProfiles;
|
|
674
|
-
compute = Object.keys(nextCompute).length === 0 ? null : nextCompute;
|
|
675
|
-
changed = true;
|
|
676
|
-
}
|
|
677
|
-
}
|
|
678
|
-
|
|
679
|
-
// Strip preset nulls so manifest session defaults can win.
|
|
680
|
-
if (kvcache === null && hasManifestKVCache) {
|
|
681
|
-
kvcache = undefined;
|
|
682
|
-
changed = true;
|
|
683
|
-
}
|
|
684
|
-
|
|
685
|
-
if (decodeLoop === null && hasManifestDecodeLoop) {
|
|
686
|
-
decodeLoop = undefined;
|
|
687
|
-
changed = true;
|
|
688
|
-
}
|
|
689
|
-
|
|
690
|
-
if (!changed) {
|
|
691
|
-
return runtimeSession;
|
|
692
|
-
}
|
|
693
|
-
|
|
694
|
-
const nextRuntimeSession = { ...runtimeSession };
|
|
695
|
-
if (!compute) {
|
|
696
|
-
delete nextRuntimeSession.compute;
|
|
697
|
-
} else {
|
|
698
|
-
nextRuntimeSession.compute = compute;
|
|
699
|
-
}
|
|
700
|
-
if (kvcache === undefined) {
|
|
701
|
-
delete nextRuntimeSession.kvcache;
|
|
702
|
-
} else {
|
|
703
|
-
nextRuntimeSession.kvcache = kvcache;
|
|
704
|
-
}
|
|
705
|
-
if (decodeLoop === undefined) {
|
|
706
|
-
delete nextRuntimeSession.decodeLoop;
|
|
707
|
-
} else {
|
|
708
|
-
nextRuntimeSession.decodeLoop = decodeLoop;
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
return Object.keys(nextRuntimeSession).length === 0 ? {} : nextRuntimeSession;
|
|
712
|
-
}
|
|
713
|
-
|
|
714
|
-
function validatePhaseBoundaryCompatibility(options) {
|
|
715
|
-
const {
|
|
716
|
-
steps,
|
|
717
|
-
prefillFinalSlotDtypes,
|
|
718
|
-
decodeInitialSlotDtypes,
|
|
719
|
-
sessionDefaults,
|
|
720
|
-
profileIndex,
|
|
721
|
-
policies,
|
|
722
|
-
} = options;
|
|
723
|
-
const decodeSteps = steps.filter((step) => isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'decode'));
|
|
724
|
-
if (decodeSteps.length === 0) {
|
|
725
|
-
return;
|
|
726
|
-
}
|
|
727
|
-
const writtenSlots = new Set();
|
|
728
|
-
for (const step of decodeSteps) {
|
|
729
|
-
const src = normalizeSlot(step.src, `${step.id}.src`);
|
|
730
|
-
const dst = normalizeSlot(step.dst, `${step.id}.dst`);
|
|
731
|
-
const readsCarriedSlot = !writtenSlots.has(src) && prefillFinalSlotDtypes.has(src);
|
|
732
|
-
if (readsCarriedSlot && step.op !== 'cast') {
|
|
733
|
-
const profile = resolveProfile(profileIndex, step);
|
|
734
|
-
const { precision } = resolvePrecision(step, profile, sessionDefaults);
|
|
735
|
-
const carriedDtype = prefillFinalSlotDtypes.get(src);
|
|
736
|
-
const decodeInput = normalizeDtype(
|
|
737
|
-
precision.inputDtype
|
|
738
|
-
?? carriedDtype
|
|
739
|
-
?? decodeInitialSlotDtypes.get(src),
|
|
740
|
-
`${step.id}.precision.inputDtype`
|
|
741
|
-
);
|
|
742
|
-
if (decodeInput !== carriedDtype) {
|
|
743
|
-
throw new Error(
|
|
744
|
-
`[ExecutionV0] decode step "${step.id}" reads carried slot "${src}" as ${decodeInput} ` +
|
|
745
|
-
`but prefill left ${carriedDtype}. Add explicit cast at phase boundary.`
|
|
746
|
-
);
|
|
747
|
-
}
|
|
748
|
-
}
|
|
749
|
-
writtenSlots.add(dst);
|
|
750
|
-
}
|
|
751
|
-
}
|
|
752
|
-
|
|
753
|
-
function assertKVLayoutExecutionCompatibility(steps, sessionDefaults) {
|
|
754
|
-
const kvLayout = normalizeKVLayout(sessionDefaults?.kvcache?.layout, 'sessionDefaults.kvcache.layout');
|
|
755
|
-
if (kvLayout !== 'bdpa') {
|
|
756
|
-
return;
|
|
757
|
-
}
|
|
758
|
-
const incompatibleStep = steps.find((step) => (
|
|
759
|
-
step?.op === 'attention'
|
|
760
|
-
&& isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'prefill')
|
|
761
|
-
));
|
|
762
|
-
if (!incompatibleStep) {
|
|
763
|
-
return;
|
|
764
|
-
}
|
|
765
|
-
throw new Error(
|
|
766
|
-
`[ExecutionV0] sessionDefaults.kvcache.layout="bdpa" is decode-only, ` +
|
|
767
|
-
`but step "${incompatibleStep.id}" declares prefill attention. ` +
|
|
768
|
-
'Use a non-BDPA KV layout for prefill-capable models or remove prefill attention from the execution contract.'
|
|
769
|
-
);
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
function toKernelPathStep(step) {
|
|
773
|
-
if (step.op === 'cast') return null;
|
|
774
|
-
if (!step.kernel) return null;
|
|
775
|
-
return {
|
|
776
|
-
op: step.op,
|
|
777
|
-
kernel: step.kernel,
|
|
778
|
-
entry: step.entry ?? 'main',
|
|
779
|
-
...(step.weights ? { weights: step.weights } : {}),
|
|
780
|
-
...(step.constants ? { constants: step.constants } : {}),
|
|
781
|
-
};
|
|
782
|
-
}
|
|
783
|
-
|
|
784
|
-
function getSectionSteps(steps, section, phase = null) {
|
|
785
|
-
return steps
|
|
786
|
-
.filter((step) => step.section === section)
|
|
787
|
-
.filter((step) => (phase ? isPhaseMatch(step.phase, phase) : true))
|
|
788
|
-
.map(toKernelPathStep)
|
|
789
|
-
.filter((step) => step != null);
|
|
790
|
-
}
|
|
791
|
-
|
|
792
|
-
function buildLayerPhaseSteps(steps, phase, layerIdx) {
|
|
793
|
-
return steps
|
|
794
|
-
.filter((step) => step.section === 'layer' && isPhaseMatch(step.phase, phase))
|
|
795
|
-
.filter((step) => stepHasLayer(step, layerIdx))
|
|
796
|
-
.map(toKernelPathStep)
|
|
797
|
-
.filter((step) => step != null);
|
|
798
|
-
}
|
|
799
|
-
|
|
800
|
-
function getInlineKernelPathSteps(path) {
|
|
801
|
-
return [
|
|
802
|
-
...(path?.preLayer ?? []),
|
|
803
|
-
...(path?.decode?.steps ?? []),
|
|
804
|
-
...(path?.prefill?.steps ?? []),
|
|
805
|
-
...(path?.postLayer ?? []),
|
|
806
|
-
...(path?.sampling ?? []),
|
|
807
|
-
...(path?.layerOverrides?.flatMap((override) => override.steps ?? []) ?? []),
|
|
808
|
-
];
|
|
809
|
-
}
|
|
810
|
-
|
|
811
|
-
function assertInlineKernelPathSessionCompatibility(path, sessionDefaults) {
|
|
812
|
-
if (!path) {
|
|
813
|
-
return;
|
|
814
|
-
}
|
|
815
|
-
const activationDtype = normalizeDtype(
|
|
816
|
-
path.activationDtype ?? sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
|
|
817
|
-
'inlineKernelPath.activationDtype'
|
|
818
|
-
);
|
|
819
|
-
const kvDtype = normalizeDtype(
|
|
820
|
-
path.kvDtype ?? sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
|
|
821
|
-
'inlineKernelPath.kvDtype'
|
|
822
|
-
);
|
|
823
|
-
|
|
824
|
-
for (const step of getInlineKernelPathSteps(path)) {
|
|
825
|
-
const kernel = String(step?.kernel ?? '').trim();
|
|
826
|
-
if (!kernel.startsWith('attention')) {
|
|
827
|
-
continue;
|
|
828
|
-
}
|
|
829
|
-
if (kernel.includes('_f16kv')) {
|
|
830
|
-
if (activationDtype !== 'f32' || kvDtype !== 'f16') {
|
|
831
|
-
throw new Error(
|
|
832
|
-
`[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
|
|
833
|
-
`activationDtype="f32" and kvcache.kvDtype="f16", but resolved ` +
|
|
834
|
-
`activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
|
|
835
|
-
);
|
|
836
|
-
}
|
|
837
|
-
continue;
|
|
838
|
-
}
|
|
839
|
-
if (kernel.includes('_f16')) {
|
|
840
|
-
if (activationDtype !== 'f16' || kvDtype !== 'f16') {
|
|
841
|
-
throw new Error(
|
|
842
|
-
`[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
|
|
843
|
-
`activationDtype="f16" and kvcache.kvDtype="f16", but resolved ` +
|
|
844
|
-
`activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
|
|
845
|
-
);
|
|
846
|
-
}
|
|
847
|
-
continue;
|
|
848
|
-
}
|
|
849
|
-
if (activationDtype !== 'f32' || kvDtype !== 'f32') {
|
|
850
|
-
throw new Error(
|
|
851
|
-
`[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
|
|
852
|
-
`activationDtype="f32" and kvcache.kvDtype="f32", but resolved ` +
|
|
853
|
-
`activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
|
|
854
|
-
);
|
|
855
|
-
}
|
|
856
|
-
}
|
|
857
|
-
}
|
|
858
|
-
|
|
859
|
-
function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers) {
|
|
860
|
-
const activationDtype = normalizeDtype(
|
|
861
|
-
sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
|
|
862
|
-
'sessionDefaults.compute.defaults.activationDtype'
|
|
863
|
-
);
|
|
864
|
-
const kvDtype = normalizeDtype(
|
|
865
|
-
sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
|
|
866
|
-
'sessionDefaults.kvcache.kvDtype'
|
|
867
|
-
);
|
|
868
|
-
const decodeSteps = buildLayerPhaseSteps(steps, 'decode', 0);
|
|
869
|
-
const prefillSteps = buildLayerPhaseSteps(steps, 'prefill', 0);
|
|
870
|
-
if (decodeSteps.length === 0 && prefillSteps.length === 0) {
|
|
871
|
-
return null;
|
|
872
|
-
}
|
|
873
|
-
|
|
874
|
-
const path = {
|
|
875
|
-
id: `${modelId || 'model'}-execution-v0`,
|
|
876
|
-
name: 'Execution v0 inline kernel path',
|
|
877
|
-
description: 'Generated from manifest.inference.execution.steps',
|
|
878
|
-
activationDtype,
|
|
879
|
-
kvDtype,
|
|
880
|
-
decode: {
|
|
881
|
-
steps: decodeSteps.length > 0 ? decodeSteps : prefillSteps,
|
|
882
|
-
},
|
|
883
|
-
prefill: {
|
|
884
|
-
steps: prefillSteps.length > 0 ? prefillSteps : decodeSteps,
|
|
885
|
-
},
|
|
886
|
-
};
|
|
887
|
-
|
|
888
|
-
if (numLayers > 0) {
|
|
889
|
-
const overrides = [];
|
|
890
|
-
for (let layerIdx = 0; layerIdx < numLayers; layerIdx++) {
|
|
891
|
-
const decodeLayerSteps = buildLayerPhaseSteps(steps, 'decode', layerIdx);
|
|
892
|
-
const prefillLayerSteps = buildLayerPhaseSteps(steps, 'prefill', layerIdx);
|
|
893
|
-
const hasCustomDecode = JSON.stringify(decodeLayerSteps) !== JSON.stringify(path.decode.steps);
|
|
894
|
-
const hasCustomPrefill = JSON.stringify(prefillLayerSteps) !== JSON.stringify(path.prefill.steps);
|
|
895
|
-
if (!hasCustomDecode && !hasCustomPrefill) continue;
|
|
896
|
-
// Kernel path layerOverrides are single-step lists per layer.
|
|
897
|
-
const mergedLayerSteps = decodeLayerSteps.length > 0
|
|
898
|
-
? decodeLayerSteps
|
|
899
|
-
: prefillLayerSteps;
|
|
900
|
-
if (mergedLayerSteps.length > 0) {
|
|
901
|
-
overrides.push({
|
|
902
|
-
layers: [layerIdx],
|
|
903
|
-
steps: mergedLayerSteps,
|
|
904
|
-
});
|
|
905
|
-
}
|
|
906
|
-
}
|
|
907
|
-
if (overrides.length > 0) {
|
|
908
|
-
path.layerOverrides = overrides;
|
|
909
|
-
}
|
|
910
|
-
}
|
|
911
|
-
|
|
912
|
-
const preLayer = getSectionSteps(steps, 'preLayer');
|
|
913
|
-
if (preLayer.length > 0) {
|
|
914
|
-
path.preLayer = preLayer;
|
|
915
|
-
}
|
|
916
|
-
const postLayer = getSectionSteps(steps, 'postLayer');
|
|
917
|
-
if (postLayer.length > 0) {
|
|
918
|
-
path.postLayer = postLayer;
|
|
919
|
-
}
|
|
920
|
-
const sampling = getSectionSteps(steps, 'sampling', 'decode');
|
|
921
|
-
if (sampling.length > 0) {
|
|
922
|
-
path.sampling = sampling;
|
|
923
|
-
}
|
|
924
|
-
|
|
925
|
-
assertInlineKernelPathSessionCompatibility(path, sessionDefaults);
|
|
926
|
-
return path;
|
|
927
|
-
}
|
|
928
|
-
|
|
929
|
-
function buildLayerPipelineFromExecution(steps) {
|
|
930
|
-
const layerSectionSteps = steps.filter((step) => step.section === 'layer');
|
|
931
|
-
if (layerSectionSteps.length === 0) {
|
|
932
|
-
return null;
|
|
933
|
-
}
|
|
934
|
-
if (layerSectionSteps.some((step) => !PIPELINE_COMPATIBLE_OPS.has(step.op))) {
|
|
935
|
-
return null;
|
|
936
|
-
}
|
|
937
|
-
|
|
938
|
-
const layerSteps = layerSectionSteps
|
|
939
|
-
.map((step) => ({
|
|
940
|
-
op: step.op,
|
|
941
|
-
phase: step.phase,
|
|
942
|
-
src: step.src ?? 'state',
|
|
943
|
-
dst: step.dst ?? 'state',
|
|
944
|
-
...(step.residual !== undefined ? { residual: step.residual } : {}),
|
|
945
|
-
...(step.a !== undefined ? { a: step.a } : {}),
|
|
946
|
-
...(step.b !== undefined ? { b: step.b } : {}),
|
|
947
|
-
...(step.variant !== undefined ? { variant: step.variant } : {}),
|
|
948
|
-
...(step.skipInputNorm !== undefined ? { skipInputNorm: step.skipInputNorm } : {}),
|
|
949
|
-
...(step.precision?.inputDtype ? { inputDtype: step.precision.inputDtype } : {}),
|
|
950
|
-
...(step.precision?.outputDtype ? { outputDtype: step.precision.outputDtype } : {}),
|
|
951
|
-
...(step.fromDtype ? { fromDtype: step.fromDtype } : {}),
|
|
952
|
-
...(step.toDtype ? { toDtype: step.toDtype } : {}),
|
|
953
|
-
...(step.probeStage ? { probeStage: step.probeStage } : {}),
|
|
954
|
-
...(step.name ? { name: step.name } : {}),
|
|
955
|
-
...(step.weight ? { weight: step.weight } : {}),
|
|
956
|
-
}));
|
|
957
|
-
|
|
958
|
-
return {
|
|
959
|
-
steps: layerSteps,
|
|
960
|
-
overrides: [],
|
|
961
|
-
};
|
|
962
|
-
}
|
|
963
|
-
|
|
964
|
-
function buildSessionRuntimePatch(sessionDefaults) {
|
|
965
|
-
const patch = {};
|
|
966
|
-
const computeDefaults = sessionDefaults?.compute?.defaults ?? null;
|
|
967
|
-
const computePatch = {};
|
|
968
|
-
const activationDtype = computeDefaults?.activationDtype;
|
|
969
|
-
if (activationDtype) {
|
|
970
|
-
computePatch.activationDtype = activationDtype;
|
|
971
|
-
}
|
|
972
|
-
if (computeDefaults && (computeDefaults.mathDtype || computeDefaults.accumDtype || computeDefaults.outputDtype)) {
|
|
973
|
-
computePatch.defaults = {
|
|
974
|
-
...(computeDefaults.mathDtype ? { mathDtype: computeDefaults.mathDtype } : {}),
|
|
975
|
-
...(computeDefaults.accumDtype ? { accumDtype: computeDefaults.accumDtype } : {}),
|
|
976
|
-
...(computeDefaults.outputDtype ? { outputDtype: computeDefaults.outputDtype } : {}),
|
|
977
|
-
};
|
|
978
|
-
}
|
|
979
|
-
if (Object.keys(computePatch).length > 0) {
|
|
980
|
-
patch.compute = computePatch;
|
|
981
|
-
}
|
|
982
|
-
if (sessionDefaults?.kvcache) {
|
|
983
|
-
patch.kvcache = sessionDefaults.kvcache;
|
|
984
|
-
}
|
|
985
|
-
if (sessionDefaults?.decodeLoop) {
|
|
986
|
-
patch.batching = {
|
|
987
|
-
batchSize: sessionDefaults.decodeLoop.batchSize,
|
|
988
|
-
stopCheckMode: sessionDefaults.decodeLoop.stopCheckMode,
|
|
989
|
-
readbackInterval: sessionDefaults.decodeLoop.readbackInterval,
|
|
990
|
-
ringTokens: sessionDefaults.decodeLoop.ringTokens,
|
|
991
|
-
ringStop: sessionDefaults.decodeLoop.ringStop,
|
|
992
|
-
ringStaging: sessionDefaults.decodeLoop.ringStaging,
|
|
993
|
-
};
|
|
994
|
-
}
|
|
995
|
-
return patch;
|
|
996
|
-
}
|
|
997
|
-
|
|
998
|
-
function buildModelRuntimeOverrides(manifestInference) {
|
|
999
|
-
const model = manifestInference?.model;
|
|
1000
|
-
if (!model || typeof model !== 'object') {
|
|
1001
|
-
return null;
|
|
1002
|
-
}
|
|
1003
|
-
return cloneJson(model);
|
|
1004
|
-
}
|
|
8
|
+
import {
|
|
9
|
+
applyExecutionPatchAtomic,
|
|
10
|
+
assertExecutionRuntimeOverlay,
|
|
11
|
+
assertExecutionV0Schema,
|
|
12
|
+
assertKVLayoutExecutionCompatibility,
|
|
13
|
+
collectLeafPaths,
|
|
14
|
+
createInitialSlotDtypes,
|
|
15
|
+
createSourceTrace,
|
|
16
|
+
hasDefinedPath,
|
|
17
|
+
indexKernelProfiles,
|
|
18
|
+
indexRuntimePatchMeta,
|
|
19
|
+
normalizeRuntimeSessionForExecutionV0,
|
|
20
|
+
resolvePhaseSteps,
|
|
21
|
+
setSourceTrace,
|
|
22
|
+
validateManifestSessionDefaultsContract,
|
|
23
|
+
validatePhaseBoundaryCompatibility,
|
|
24
|
+
validateStepShape,
|
|
25
|
+
validateUniqueStepIds,
|
|
26
|
+
cloneJson,
|
|
27
|
+
} from './execution-v0-contract-helpers.js';
|
|
28
|
+
import {
|
|
29
|
+
buildInlineKernelPath,
|
|
30
|
+
buildLayerPipelineFromExecution,
|
|
31
|
+
buildModelRuntimeOverrides,
|
|
32
|
+
buildSessionRuntimePatch,
|
|
33
|
+
resolveFinitenessFallbackKernelPathId,
|
|
34
|
+
} from './execution-v0-runtime-builders.js';
|
|
1005
35
|
|
|
1006
36
|
export function hasExecutionV0(manifestInference) {
|
|
1007
37
|
return !!manifestInference?.execution && Array.isArray(manifestInference.execution.steps);
|
|
@@ -1013,6 +43,7 @@ export function compileExecutionV0(options = {}) {
|
|
|
1013
43
|
return null;
|
|
1014
44
|
}
|
|
1015
45
|
assertExecutionV0Schema(manifestInference);
|
|
46
|
+
validateManifestSessionDefaultsContract(manifestInference);
|
|
1016
47
|
|
|
1017
48
|
const modelId = options.modelId ?? 'model';
|
|
1018
49
|
const numLayers = Number.isInteger(options.numLayers) ? options.numLayers : 0;
|
|
@@ -1024,7 +55,8 @@ export function compileExecutionV0(options = {}) {
|
|
|
1024
55
|
};
|
|
1025
56
|
const normalizedRuntimeSession = normalizeRuntimeSessionForExecutionV0(
|
|
1026
57
|
runtimeInference.session ?? {},
|
|
1027
|
-
manifestInference
|
|
58
|
+
manifestInference,
|
|
59
|
+
DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS
|
|
1028
60
|
);
|
|
1029
61
|
const sessionDefaults = mergeRuntimeValues(
|
|
1030
62
|
DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
|
|
@@ -1107,7 +139,19 @@ export function compileExecutionV0(options = {}) {
|
|
|
1107
139
|
...resolvedDecodeSteps.filter((step) => step.phase === 'decode'),
|
|
1108
140
|
];
|
|
1109
141
|
|
|
1110
|
-
const
|
|
142
|
+
const defaultKernelPathId = typeof manifestInference.defaultKernelPath === 'string'
|
|
143
|
+
&& manifestInference.defaultKernelPath.trim().length > 0
|
|
144
|
+
? manifestInference.defaultKernelPath.trim()
|
|
145
|
+
: null;
|
|
146
|
+
const finitenessFallbackKernelPathId = resolveFinitenessFallbackKernelPathId(defaultKernelPathId);
|
|
147
|
+
|
|
148
|
+
const kernelPath = buildInlineKernelPath(
|
|
149
|
+
patchedSteps,
|
|
150
|
+
resolvedSession,
|
|
151
|
+
modelId,
|
|
152
|
+
numLayers,
|
|
153
|
+
finitenessFallbackKernelPathId
|
|
154
|
+
);
|
|
1111
155
|
const layerPipeline = buildLayerPipelineFromExecution(resolvedSteps);
|
|
1112
156
|
const sessionPatch = buildSessionRuntimePatch(resolvedSession);
|
|
1113
157
|
const modelOverrides = buildModelRuntimeOverrides(manifestInference);
|
|
@@ -1144,13 +188,23 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
|
|
|
1144
188
|
}
|
|
1145
189
|
|
|
1146
190
|
const runtimeInference = runtimeConfig.inference ?? {};
|
|
191
|
+
const kernelPathExecution = runtimeInference.kernelPath !== undefined
|
|
192
|
+
? buildExecutionV0FromKernelPath(runtimeInference.kernelPath)
|
|
193
|
+
: null;
|
|
194
|
+
const manifestInference = kernelPathExecution
|
|
195
|
+
? {
|
|
196
|
+
...manifest.inference,
|
|
197
|
+
...kernelPathExecution,
|
|
198
|
+
defaultKernelPath: runtimeInference.kernelPath,
|
|
199
|
+
}
|
|
200
|
+
: manifest.inference;
|
|
1147
201
|
const runtimeExecutionOverlay = {
|
|
1148
202
|
...(runtimeInference.session ? { session: runtimeInference.session } : {}),
|
|
1149
203
|
...(runtimeInference.executionPatch ? { executionPatch: runtimeInference.executionPatch } : {}),
|
|
1150
204
|
};
|
|
1151
205
|
|
|
1152
206
|
const executionV0State = compileExecutionV0({
|
|
1153
|
-
manifestInference
|
|
207
|
+
manifestInference,
|
|
1154
208
|
runtimeInference: runtimeExecutionOverlay,
|
|
1155
209
|
modelId: options.modelId ?? manifest.modelId ?? 'model',
|
|
1156
210
|
numLayers: Number.isInteger(options.numLayers)
|
|
@@ -1161,13 +215,35 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
|
|
|
1161
215
|
return { runtimeConfig, executionV0State: null };
|
|
1162
216
|
}
|
|
1163
217
|
|
|
218
|
+
const compiledKernelPathSource = runtimeInference.kernelPath !== undefined
|
|
219
|
+
? 'config'
|
|
220
|
+
: 'manifest';
|
|
1164
221
|
const runtimeInferencePatch = { ...executionV0State.runtimeInferencePatch };
|
|
222
|
+
if (runtimeInferencePatch.kernelPathSource) {
|
|
223
|
+
runtimeInferencePatch.kernelPathSource = compiledKernelPathSource;
|
|
224
|
+
}
|
|
225
|
+
if (runtimeInference.kernelPath !== undefined) {
|
|
226
|
+
delete runtimeInferencePatch.kernelPath;
|
|
227
|
+
delete runtimeInferencePatch.kernelPathSource;
|
|
228
|
+
}
|
|
1165
229
|
if (runtimeInferencePatch.modelOverrides) {
|
|
1166
230
|
runtimeInferencePatch.modelOverrides = mergeRuntimeValues(
|
|
1167
231
|
runtimeInferencePatch.modelOverrides,
|
|
1168
232
|
runtimeInference.modelOverrides ?? {}
|
|
1169
233
|
);
|
|
1170
234
|
}
|
|
235
|
+
if (runtimeInference.kernelPath !== undefined && runtimeInference.compute) {
|
|
236
|
+
runtimeInferencePatch.compute = mergeRuntimeValues(
|
|
237
|
+
runtimeInferencePatch.compute ?? {},
|
|
238
|
+
runtimeInference.compute
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
if (runtimeInference.kernelPath !== undefined && runtimeInference.kvcache) {
|
|
242
|
+
runtimeInferencePatch.kvcache = mergeRuntimeValues(
|
|
243
|
+
runtimeInferencePatch.kvcache ?? {},
|
|
244
|
+
runtimeInference.kvcache
|
|
245
|
+
);
|
|
246
|
+
}
|
|
1171
247
|
|
|
1172
248
|
return {
|
|
1173
249
|
runtimeConfig: {
|