@simulatte/doppler 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +16 -23
- package/package.json +30 -32
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +31 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +5 -20
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +18 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +81 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +15 -2
- package/src/config/merge-contract-check.js +66 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +43 -8
- package/src/config/presets/models/gemma2.json +3 -2
- package/src/config/presets/models/gemma3.json +2 -0
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +3 -2
- package/src/config/schema/manifest.schema.js +17 -4
- package/src/config/schema/storage.schema.js +1 -1
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +104 -11
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +16 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +50 -29
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +40 -16
- package/src/converter/quantizer.js +19 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +83 -27
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +53 -3
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul-selection.js +47 -4
- package/src/gpu/kernels/matmul.d.ts +2 -0
- package/src/gpu/kernels/matmul.js +59 -40
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +66 -43
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +8 -0
- package/src/inference/browser-harness.js +149 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +10 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
- package/src/inference/pipelines/text/attention/projections.js +192 -112
- package/src/inference/pipelines/text/attention/record.js +77 -14
- package/src/inference/pipelines/text/attention/run.js +112 -14
- package/src/inference/pipelines/text/config.js +17 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +46 -23
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-runtime.js +5 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
- package/src/inference/pipelines/text/generator-steps.js +340 -221
- package/src/inference/pipelines/text/generator.js +56 -40
- package/src/inference/pipelines/text/init.d.ts +13 -0
- package/src/inference/pipelines/text/init.js +94 -25
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +4 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
- package/src/inference/pipelines/text/linear-attention.js +113 -9
- package/src/inference/pipelines/text/logits/gpu.js +12 -7
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +13 -12
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +282 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +17 -7
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +10 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +84 -14
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +214 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.js +27 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +365 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +55 -6
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +30 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +120 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/types/model.d.ts +5 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +50 -26
|
@@ -2,6 +2,7 @@ import {
|
|
|
2
2
|
createConverterConfig,
|
|
3
3
|
HEADER_READ_SIZE,
|
|
4
4
|
} from '../../config/schema/index.js';
|
|
5
|
+
import { DEFAULT_EXECUTION_V0_SESSION_DEFAULTS } from '../../config/schema/execution-v0.schema.js';
|
|
5
6
|
import { extractArchitecture } from '../../converter/core.js';
|
|
6
7
|
import {
|
|
7
8
|
inferSourceWeightQuantization,
|
|
@@ -13,9 +14,11 @@ import { parseTransformerModel } from '../../converter/parsers/transformer.js';
|
|
|
13
14
|
import { parseGGUFHeader } from '../../formats/gguf/types.js';
|
|
14
15
|
import { parseSafetensorsHeader } from '../../formats/safetensors/types.js';
|
|
15
16
|
import { log } from '../../debug/index.js';
|
|
17
|
+
import { computeHash } from '../../storage/shard-manager.js';
|
|
16
18
|
import {
|
|
17
19
|
buildSourceRuntimeBundle,
|
|
18
20
|
createSourceStorageContext,
|
|
21
|
+
getSourceRuntimeMetadata,
|
|
19
22
|
} from '../../tooling/source-runtime-bundle.js';
|
|
20
23
|
|
|
21
24
|
const SUPPORTED_SOURCE_DTYPES = new Set([
|
|
@@ -42,6 +45,15 @@ const SOURCE_RUNTIME_EXECUTION_OVERRIDE = {
|
|
|
42
45
|
],
|
|
43
46
|
};
|
|
44
47
|
|
|
48
|
+
const SOURCE_RUNTIME_SESSION_DEFAULTS = {
|
|
49
|
+
compute: {
|
|
50
|
+
defaults: { ...DEFAULT_EXECUTION_V0_SESSION_DEFAULTS.compute.defaults },
|
|
51
|
+
kernelProfiles: [],
|
|
52
|
+
},
|
|
53
|
+
kvcache: null,
|
|
54
|
+
decodeLoop: null,
|
|
55
|
+
};
|
|
56
|
+
|
|
45
57
|
function normalizeRelativePath(value) {
|
|
46
58
|
return String(value || '')
|
|
47
59
|
.replace(/\\/g, '/')
|
|
@@ -166,6 +178,14 @@ async function readBridgeRange(bridgeClient, fileEntry, offset, length) {
|
|
|
166
178
|
return bridgeClient.read(fileEntry.absolutePath, offset, length);
|
|
167
179
|
}
|
|
168
180
|
|
|
181
|
+
async function readBridgeAllBytes(bridgeClient, fileEntry, label) {
|
|
182
|
+
const size = Number(fileEntry?.size) || 0;
|
|
183
|
+
if (size < 0) {
|
|
184
|
+
throw new Error(`Invalid bridge file size for ${label}.`);
|
|
185
|
+
}
|
|
186
|
+
return readBridgeRange(bridgeClient, fileEntry, 0, size);
|
|
187
|
+
}
|
|
188
|
+
|
|
169
189
|
async function readBridgeTextFile(bridgeClient, fileEntry, label) {
|
|
170
190
|
const size = Number(fileEntry?.size) || 0;
|
|
171
191
|
if (size <= 0) {
|
|
@@ -274,7 +294,39 @@ async function parseBridgeSafetensorsModel(bridgeClient, fileIndex) {
|
|
|
274
294
|
}
|
|
275
295
|
return { path, size: entry.size };
|
|
276
296
|
}),
|
|
297
|
+
auxiliaryFiles: [
|
|
298
|
+
{ path: 'config.json', size: Number(fileIndex.get('config.json')?.size || 0), kind: 'config' },
|
|
299
|
+
...(fileIndex.has('model.safetensors.index.json')
|
|
300
|
+
? [{
|
|
301
|
+
path: 'model.safetensors.index.json',
|
|
302
|
+
size: Number(fileIndex.get('model.safetensors.index.json')?.size || 0),
|
|
303
|
+
kind: 'safetensors_index',
|
|
304
|
+
}]
|
|
305
|
+
: []),
|
|
306
|
+
...(fileIndex.has('tokenizer.json')
|
|
307
|
+
? [{
|
|
308
|
+
path: 'tokenizer.json',
|
|
309
|
+
size: Number(fileIndex.get('tokenizer.json')?.size || 0),
|
|
310
|
+
kind: 'tokenizer_json',
|
|
311
|
+
}]
|
|
312
|
+
: []),
|
|
313
|
+
...(fileIndex.has('tokenizer_config.json')
|
|
314
|
+
? [{
|
|
315
|
+
path: 'tokenizer_config.json',
|
|
316
|
+
size: Number(fileIndex.get('tokenizer_config.json')?.size || 0),
|
|
317
|
+
kind: 'tokenizer_config',
|
|
318
|
+
}]
|
|
319
|
+
: []),
|
|
320
|
+
...(fileIndex.has('tokenizer.model')
|
|
321
|
+
? [{
|
|
322
|
+
path: 'tokenizer.model',
|
|
323
|
+
size: Number(fileIndex.get('tokenizer.model')?.size || 0),
|
|
324
|
+
kind: 'tokenizer_model',
|
|
325
|
+
}]
|
|
326
|
+
: []),
|
|
327
|
+
],
|
|
277
328
|
tokenizerJsonPath: fileIndex.has('tokenizer.json') ? 'tokenizer.json' : null,
|
|
329
|
+
tokenizerConfigPath: fileIndex.has('tokenizer_config.json') ? 'tokenizer_config.json' : null,
|
|
278
330
|
tokenizerModelPath: fileIndex.has('tokenizer.model') ? 'tokenizer.model' : null,
|
|
279
331
|
};
|
|
280
332
|
}
|
|
@@ -339,7 +391,9 @@ async function parseBridgeGGUFModel(bridgeClient, fileIndex, ggufRelativePath) {
|
|
|
339
391
|
tokenizerConfig: null,
|
|
340
392
|
tokenizerModelName: null,
|
|
341
393
|
sourceFiles: [{ path: ggufRelativePath, size: ggufEntry.size }],
|
|
394
|
+
auxiliaryFiles: [],
|
|
342
395
|
tokenizerJsonPath: null,
|
|
396
|
+
tokenizerConfigPath: null,
|
|
343
397
|
tokenizerModelPath: null,
|
|
344
398
|
};
|
|
345
399
|
}
|
|
@@ -391,14 +445,7 @@ function createBridgeFileReaders(bridgeClient, fileMap, rootPath) {
|
|
|
391
445
|
return null;
|
|
392
446
|
}
|
|
393
447
|
const direct = map.get(hint);
|
|
394
|
-
|
|
395
|
-
return direct;
|
|
396
|
-
}
|
|
397
|
-
const basename = hint.split('/').pop();
|
|
398
|
-
if (basename && map.has(basename)) {
|
|
399
|
-
return map.get(basename);
|
|
400
|
-
}
|
|
401
|
-
return null;
|
|
448
|
+
return direct || null;
|
|
402
449
|
};
|
|
403
450
|
|
|
404
451
|
const readRange = async (relativePath, offset, length) => {
|
|
@@ -432,10 +479,53 @@ function createBridgeFileReaders(bridgeClient, fileMap, rootPath) {
|
|
|
432
479
|
};
|
|
433
480
|
}
|
|
434
481
|
|
|
482
|
+
async function addHashesToBridgeFiles(bridgeClient, fileIndex, entries, hashAlgorithm) {
|
|
483
|
+
const hashedEntries = [];
|
|
484
|
+
for (const entry of Array.isArray(entries) ? entries : []) {
|
|
485
|
+
const relativePath = normalizeRelativePath(entry?.path);
|
|
486
|
+
if (!relativePath) continue;
|
|
487
|
+
const fileEntry = fileIndex.get(relativePath);
|
|
488
|
+
if (!fileEntry) {
|
|
489
|
+
throw new Error(`Missing bridge file entry for "${relativePath}"`);
|
|
490
|
+
}
|
|
491
|
+
const bytes = await readBridgeAllBytes(bridgeClient, fileEntry, `bridge source asset (${relativePath})`);
|
|
492
|
+
hashedEntries.push({
|
|
493
|
+
...entry,
|
|
494
|
+
path: relativePath,
|
|
495
|
+
size: Number.isFinite(entry?.size) ? Math.max(0, Math.floor(Number(entry.size))) : fileEntry.size,
|
|
496
|
+
hash: await computeHash(toUint8Array(bytes), hashAlgorithm),
|
|
497
|
+
hashAlgorithm,
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
return hashedEntries;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
async function resolveBridgeStorageContext(options = {}) {
|
|
504
|
+
const bridgeClient = options.bridgeClient;
|
|
505
|
+
const localPath = options.localPath;
|
|
506
|
+
const manifest = options.manifest;
|
|
507
|
+
const sourceRuntime = getSourceRuntimeMetadata(manifest);
|
|
508
|
+
if (!sourceRuntime) {
|
|
509
|
+
return null;
|
|
510
|
+
}
|
|
511
|
+
const files = await listBridgeFilesRecursive(bridgeClient, localPath);
|
|
512
|
+
const fileMap = indexBridgeFiles(files);
|
|
513
|
+
const readers = createBridgeFileReaders(bridgeClient, fileMap, localPath);
|
|
514
|
+
return createSourceStorageContext({
|
|
515
|
+
manifest,
|
|
516
|
+
readRange: readers.readRange,
|
|
517
|
+
readText: readers.readText,
|
|
518
|
+
readBinary: readers.readBinary,
|
|
519
|
+
verifyHashes: options.verifyHashes !== false,
|
|
520
|
+
});
|
|
521
|
+
}
|
|
522
|
+
|
|
435
523
|
export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
436
524
|
const bridgeClient = options.bridgeClient;
|
|
437
525
|
const localPath = options.localPath;
|
|
438
526
|
const requestedModelId = options.modelId || null;
|
|
527
|
+
const verifyHashes = options.verifyHashes !== false;
|
|
528
|
+
const existingManifest = options.manifest ?? null;
|
|
439
529
|
|
|
440
530
|
if (!bridgeClient || typeof bridgeClient.read !== 'function' || typeof bridgeClient.list !== 'function') {
|
|
441
531
|
throw new Error('Bridge source runtime requires a connected bridge client with read/list support.');
|
|
@@ -444,6 +534,21 @@ export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
|
444
534
|
throw new Error('Bridge source runtime requires localPath.');
|
|
445
535
|
}
|
|
446
536
|
|
|
537
|
+
if (existingManifest && getSourceRuntimeMetadata(existingManifest)) {
|
|
538
|
+
const storageContext = await resolveBridgeStorageContext({
|
|
539
|
+
bridgeClient,
|
|
540
|
+
localPath,
|
|
541
|
+
manifest: existingManifest,
|
|
542
|
+
verifyHashes,
|
|
543
|
+
});
|
|
544
|
+
return {
|
|
545
|
+
manifest: existingManifest,
|
|
546
|
+
storageContext,
|
|
547
|
+
sourceKind: getSourceRuntimeMetadata(existingManifest)?.sourceKind ?? 'safetensors',
|
|
548
|
+
sourceRoot: localPath,
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
|
|
447
552
|
options.onProgress?.({
|
|
448
553
|
stage: 'source-discovery',
|
|
449
554
|
message: 'Scanning source files via bridge...',
|
|
@@ -461,6 +566,7 @@ export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
|
461
566
|
modelBaseId: requestedModelId || null,
|
|
462
567
|
},
|
|
463
568
|
inference: {
|
|
569
|
+
sessionDefaults: SOURCE_RUNTIME_SESSION_DEFAULTS,
|
|
464
570
|
execution: SOURCE_RUNTIME_EXECUTION_OVERRIDE,
|
|
465
571
|
},
|
|
466
572
|
});
|
|
@@ -476,26 +582,39 @@ export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
|
476
582
|
});
|
|
477
583
|
|
|
478
584
|
const modelId = resolveModelIdHint(requestedModelId, plan, parsed.sourceKind);
|
|
585
|
+
const hashAlgorithm = converterConfig.manifest.hashAlgorithm;
|
|
586
|
+
const files = await listBridgeFilesRecursive(bridgeClient, localPath);
|
|
587
|
+
const fileMap = indexBridgeFiles(files);
|
|
588
|
+
const sourceFiles = await addHashesToBridgeFiles(bridgeClient, fileMap, parsed.sourceFiles, hashAlgorithm);
|
|
589
|
+
const auxiliaryFiles = await addHashesToBridgeFiles(
|
|
590
|
+
bridgeClient,
|
|
591
|
+
fileMap,
|
|
592
|
+
parsed.auxiliaryFiles,
|
|
593
|
+
hashAlgorithm
|
|
594
|
+
);
|
|
479
595
|
const { manifest, shardSources } = await buildSourceRuntimeBundle({
|
|
480
596
|
modelId,
|
|
481
597
|
modelName: modelId,
|
|
482
598
|
modelType: plan.modelType,
|
|
599
|
+
sourceKind: parsed.sourceKind,
|
|
483
600
|
architecture: parsed.architecture,
|
|
484
601
|
architectureHint: parsed.architectureHint,
|
|
485
602
|
rawConfig: parsed.config,
|
|
486
603
|
inference: plan.manifestInference,
|
|
487
604
|
tensors: parsed.tensors,
|
|
488
|
-
sourceFiles
|
|
605
|
+
sourceFiles,
|
|
606
|
+
auxiliaryFiles,
|
|
489
607
|
sourceQuantization: parsed.sourceQuantization,
|
|
490
608
|
quantizationInfo: plan.quantizationInfo,
|
|
491
|
-
hashAlgorithm
|
|
609
|
+
hashAlgorithm,
|
|
492
610
|
tokenizerJson: parsed.tokenizerJson,
|
|
493
611
|
tokenizerConfig: parsed.tokenizerConfig,
|
|
494
612
|
tokenizerModelName: parsed.tokenizerModelName,
|
|
613
|
+
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
614
|
+
tokenizerConfigPath: parsed.tokenizerConfigPath,
|
|
615
|
+
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
495
616
|
});
|
|
496
617
|
|
|
497
|
-
const files = await listBridgeFilesRecursive(bridgeClient, localPath);
|
|
498
|
-
const fileMap = indexBridgeFiles(files);
|
|
499
618
|
const readers = createBridgeFileReaders(bridgeClient, fileMap, localPath);
|
|
500
619
|
const storageContext = createSourceStorageContext({
|
|
501
620
|
manifest,
|
|
@@ -505,7 +624,7 @@ export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
|
505
624
|
readBinary: readers.readBinary,
|
|
506
625
|
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
507
626
|
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
508
|
-
verifyHashes
|
|
627
|
+
verifyHashes,
|
|
509
628
|
});
|
|
510
629
|
|
|
511
630
|
log.info(
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 1,
|
|
3
|
-
"source": "
|
|
3
|
+
"source": "models/catalog.json",
|
|
4
4
|
"models": [
|
|
5
5
|
{
|
|
6
|
-
"modelId": "gemma-3-270m-it-
|
|
6
|
+
"modelId": "gemma-3-270m-it-q4k-ehf16-af32",
|
|
7
7
|
"aliases": [
|
|
8
8
|
"gemma3-270m",
|
|
9
9
|
"google/gemma-3-270m-it",
|
|
10
10
|
"gemma-3-270m-it-wq4k-ef16",
|
|
11
|
+
"gemma-3-270m-it-wq4k-ef16-hf16",
|
|
11
12
|
"gemma-3-270m-it-wq4k-ef16-hf16-f32"
|
|
12
13
|
],
|
|
13
14
|
"modes": [
|
|
@@ -15,24 +16,8 @@
|
|
|
15
16
|
],
|
|
16
17
|
"hf": {
|
|
17
18
|
"repoId": "Clocksmith/rdrr",
|
|
18
|
-
"revision": "
|
|
19
|
-
"path": "models/gemma-3-270m-it-
|
|
20
|
-
}
|
|
21
|
-
},
|
|
22
|
-
{
|
|
23
|
-
"modelId": "google-embeddinggemma-300m-wq4k-ef16",
|
|
24
|
-
"aliases": [
|
|
25
|
-
"embeddinggemma-300m",
|
|
26
|
-
"google/embeddinggemma-300m",
|
|
27
|
-
"google-embeddinggemma-300m-wq4k-ef16"
|
|
28
|
-
],
|
|
29
|
-
"modes": [
|
|
30
|
-
"embedding"
|
|
31
|
-
],
|
|
32
|
-
"hf": {
|
|
33
|
-
"repoId": "Clocksmith/rdrr",
|
|
34
|
-
"revision": "4efe64a914892e98be50842aeb16c3b648cc68a5",
|
|
35
|
-
"path": "models/google-embeddinggemma-300m-wq4k-ef16"
|
|
19
|
+
"revision": "ca6f0dbdf3882d3893a65cf48f2bb6f1520df162",
|
|
20
|
+
"path": "models/gemma-3-270m-it-q4k-ehf16-af32"
|
|
36
21
|
}
|
|
37
22
|
}
|
|
38
23
|
]
|
|
@@ -1,8 +1,23 @@
|
|
|
1
1
|
import { loadJson } from '../utils/load-json.js';
|
|
2
2
|
import { validateBackwardRegistry } from './schema/backward-registry.schema.js';
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
function deepFreeze(value, seen = new WeakSet()) {
|
|
5
|
+
if (!value || typeof value !== 'object' || seen.has(value)) {
|
|
6
|
+
return value;
|
|
7
|
+
}
|
|
8
|
+
seen.add(value);
|
|
9
|
+
for (const entry of Object.values(value)) {
|
|
10
|
+
deepFreeze(entry, seen);
|
|
11
|
+
}
|
|
12
|
+
return Object.freeze(value);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const backwardRegistryData = deepFreeze(
|
|
16
|
+
validateBackwardRegistry(
|
|
17
|
+
await loadJson('./kernels/backward-registry.json', import.meta.url, 'Failed to load json')
|
|
18
|
+
)
|
|
19
|
+
);
|
|
5
20
|
|
|
6
21
|
export function loadBackwardRegistry() {
|
|
7
|
-
return
|
|
22
|
+
return backwardRegistryData;
|
|
8
23
|
}
|
|
@@ -1,8 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS,
|
|
3
|
-
isExecutionV0Digest,
|
|
4
|
-
isExecutionV0Semver,
|
|
5
|
-
} from './schema/execution-v0.schema.js';
|
|
1
|
+
import { isExecutionV0Digest, isExecutionV0Semver } from './schema/execution-v0.schema.js';
|
|
6
2
|
|
|
7
3
|
function normalizeDtype(value, label) {
|
|
8
4
|
const normalized = String(value ?? '').trim().toLowerCase();
|
|
@@ -32,6 +28,107 @@ function assertExecutionV0KernelRef(kernelRef, label) {
|
|
|
32
28
|
}
|
|
33
29
|
}
|
|
34
30
|
|
|
31
|
+
function requirePlainObject(value, label) {
|
|
32
|
+
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
33
|
+
throw new Error(`[ExecutionV0Contract] ${label} must be an object.`);
|
|
34
|
+
}
|
|
35
|
+
return value;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function requireOwnProperty(root, key, label) {
|
|
39
|
+
if (!Object.prototype.hasOwnProperty.call(root, key)) {
|
|
40
|
+
throw new Error(`[ExecutionV0Contract] ${label} is required.`);
|
|
41
|
+
}
|
|
42
|
+
return root[key];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function requireNullableObject(root, key, label) {
|
|
46
|
+
const value = requireOwnProperty(root, key, label);
|
|
47
|
+
if (value === null) {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
return requirePlainObject(value, label);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function requireArrayProperty(root, key, label) {
|
|
54
|
+
const value = requireOwnProperty(root, key, label);
|
|
55
|
+
if (!Array.isArray(value)) {
|
|
56
|
+
throw new Error(`[ExecutionV0Contract] ${label} must be an array.`);
|
|
57
|
+
}
|
|
58
|
+
return value;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function requireDtypeProperty(root, key, label) {
|
|
62
|
+
const value = requireOwnProperty(root, key, label);
|
|
63
|
+
if (value == null) {
|
|
64
|
+
throw new Error(`[ExecutionV0Contract] ${label} is required.`);
|
|
65
|
+
}
|
|
66
|
+
return normalizeDtype(value, label);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function validateExecutionV0SessionDefaults(sessionDefaults = {}) {
|
|
70
|
+
const normalizedSessionDefaults = requirePlainObject(
|
|
71
|
+
sessionDefaults,
|
|
72
|
+
'manifest.inference.sessionDefaults'
|
|
73
|
+
);
|
|
74
|
+
const compute = requirePlainObject(
|
|
75
|
+
requireOwnProperty(normalizedSessionDefaults, 'compute', 'sessionDefaults.compute'),
|
|
76
|
+
'sessionDefaults.compute'
|
|
77
|
+
);
|
|
78
|
+
const computeDefaults = requirePlainObject(
|
|
79
|
+
requireOwnProperty(compute, 'defaults', 'sessionDefaults.compute.defaults'),
|
|
80
|
+
'sessionDefaults.compute.defaults'
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
requireDtypeProperty(
|
|
84
|
+
computeDefaults,
|
|
85
|
+
'activationDtype',
|
|
86
|
+
'sessionDefaults.compute.defaults.activationDtype'
|
|
87
|
+
);
|
|
88
|
+
requireDtypeProperty(
|
|
89
|
+
computeDefaults,
|
|
90
|
+
'mathDtype',
|
|
91
|
+
'sessionDefaults.compute.defaults.mathDtype'
|
|
92
|
+
);
|
|
93
|
+
requireDtypeProperty(
|
|
94
|
+
computeDefaults,
|
|
95
|
+
'accumDtype',
|
|
96
|
+
'sessionDefaults.compute.defaults.accumDtype'
|
|
97
|
+
);
|
|
98
|
+
requireDtypeProperty(
|
|
99
|
+
computeDefaults,
|
|
100
|
+
'outputDtype',
|
|
101
|
+
'sessionDefaults.compute.defaults.outputDtype'
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
requireArrayProperty(
|
|
105
|
+
compute,
|
|
106
|
+
'kernelProfiles',
|
|
107
|
+
'sessionDefaults.compute.kernelProfiles'
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
const kvcache = requireNullableObject(
|
|
111
|
+
normalizedSessionDefaults,
|
|
112
|
+
'kvcache',
|
|
113
|
+
'sessionDefaults.kvcache'
|
|
114
|
+
);
|
|
115
|
+
if (kvcache !== null) {
|
|
116
|
+
requireDtypeProperty(
|
|
117
|
+
kvcache,
|
|
118
|
+
'kvDtype',
|
|
119
|
+
'sessionDefaults.kvcache.kvDtype'
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
requireNullableObject(
|
|
124
|
+
normalizedSessionDefaults,
|
|
125
|
+
'decodeLoop',
|
|
126
|
+
'sessionDefaults.decodeLoop'
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
return normalizedSessionDefaults;
|
|
130
|
+
}
|
|
131
|
+
|
|
35
132
|
function createPrecisionSources(step, profile) {
|
|
36
133
|
return {
|
|
37
134
|
inputDtype: step.precision?.inputDtype != null
|
|
@@ -89,10 +186,10 @@ export function resolveExecutionV0KernelProfile(profileIndex, step) {
|
|
|
89
186
|
}
|
|
90
187
|
|
|
91
188
|
export function resolveExecutionV0Precision(step, profile, sessionDefaults = {}) {
|
|
92
|
-
const defaults =
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
189
|
+
const defaults = requirePlainObject(
|
|
190
|
+
sessionDefaults?.compute?.defaults,
|
|
191
|
+
'sessionDefaults.compute.defaults'
|
|
192
|
+
);
|
|
96
193
|
const precision = {
|
|
97
194
|
inputDtype: step.precision?.inputDtype
|
|
98
195
|
?? profile?.precision?.inputDtype
|
|
@@ -132,12 +229,12 @@ export function resolveExecutionV0KVIO(step, profile, sessionDefaults = {}) {
|
|
|
132
229
|
source: 'kernelProfile',
|
|
133
230
|
};
|
|
134
231
|
}
|
|
135
|
-
const defaults = {
|
|
136
|
-
...DEFAULT_EXECUTION_V0_COMPUTE_DEFAULTS,
|
|
137
|
-
...(sessionDefaults?.compute?.defaults ?? {}),
|
|
138
|
-
};
|
|
139
232
|
const kvDtype = normalizeDtype(
|
|
140
|
-
|
|
233
|
+
requireOwnProperty(
|
|
234
|
+
requireNullableObject(sessionDefaults, 'kvcache', 'sessionDefaults.kvcache') ?? {},
|
|
235
|
+
'kvDtype',
|
|
236
|
+
'sessionDefaults.kvcache.kvDtype'
|
|
237
|
+
),
|
|
141
238
|
`${step.id}.sessionDefaults.kvcache.kvDtype`
|
|
142
239
|
);
|
|
143
240
|
return {
|
|
@@ -155,10 +252,11 @@ export function buildExecutionV0ContractArtifact(manifestInference, options = {}
|
|
|
155
252
|
const checks = [];
|
|
156
253
|
const errors = [];
|
|
157
254
|
const perStep = {};
|
|
158
|
-
|
|
255
|
+
let sessionDefaults = manifestInference.sessionDefaults ?? {};
|
|
159
256
|
let profileIndex;
|
|
160
257
|
|
|
161
258
|
try {
|
|
259
|
+
sessionDefaults = validateExecutionV0SessionDefaults(sessionDefaults);
|
|
162
260
|
profileIndex = indexExecutionV0KernelProfiles(sessionDefaults);
|
|
163
261
|
} catch (error) {
|
|
164
262
|
errors.push(error instanceof Error ? error.message : String(error));
|
|
@@ -36,6 +36,11 @@ function normalizeRegistryEntry(entry, index) {
|
|
|
36
36
|
? entry.aliasOf.trim()
|
|
37
37
|
: null;
|
|
38
38
|
const hasFile = typeof entry.file === 'string' && entry.file.trim() !== '';
|
|
39
|
+
if (aliasOf && hasFile) {
|
|
40
|
+
throw new Error(
|
|
41
|
+
`kernel-path contract: entries[${index}] must not include both file and aliasOf.`
|
|
42
|
+
);
|
|
43
|
+
}
|
|
39
44
|
if (!aliasOf && !hasFile) {
|
|
40
45
|
throw new Error(
|
|
41
46
|
`kernel-path contract: entries[${index}] must include file or aliasOf.`
|
|
@@ -111,12 +116,16 @@ function normalizeAutoSelectRule(rule, index) {
|
|
|
111
116
|
const hasSubgroups = typeof match.hasSubgroups === 'boolean'
|
|
112
117
|
? match.hasSubgroups
|
|
113
118
|
: null;
|
|
119
|
+
const hasF16 = typeof match.hasF16 === 'boolean'
|
|
120
|
+
? match.hasF16
|
|
121
|
+
: null;
|
|
114
122
|
const value = rule.value;
|
|
115
123
|
if (typeof value === 'string' && value.trim() !== '') {
|
|
116
124
|
return {
|
|
117
125
|
matchKernelPathRef,
|
|
118
126
|
allowCapabilityAutoSelection,
|
|
119
127
|
hasSubgroups,
|
|
128
|
+
hasF16,
|
|
120
129
|
valueKind: 'string',
|
|
121
130
|
value: value.trim(),
|
|
122
131
|
isDefault: Object.keys(match).length === 0,
|
|
@@ -127,6 +136,7 @@ function normalizeAutoSelectRule(rule, index) {
|
|
|
127
136
|
matchKernelPathRef,
|
|
128
137
|
allowCapabilityAutoSelection,
|
|
129
138
|
hasSubgroups,
|
|
139
|
+
hasF16,
|
|
130
140
|
valueKind: 'context',
|
|
131
141
|
value: value.context.trim(),
|
|
132
142
|
isDefault: Object.keys(match).length === 0,
|
|
@@ -380,9 +390,9 @@ export function validateKernelPathContractFacts(facts) {
|
|
|
380
390
|
autoSelectShapeErrors += 1;
|
|
381
391
|
errors.push('[KernelPathContract] non-default autoSelect rules must match on kernelPathRef.');
|
|
382
392
|
}
|
|
383
|
-
if (rule.hasSubgroups == null) {
|
|
393
|
+
if (rule.hasSubgroups == null && rule.hasF16 == null) {
|
|
384
394
|
autoSelectShapeErrors += 1;
|
|
385
|
-
errors.push('[KernelPathContract] non-default autoSelect rules must match on hasSubgroups.');
|
|
395
|
+
errors.push('[KernelPathContract] non-default autoSelect rules must match on hasSubgroups and/or hasF16.');
|
|
386
396
|
}
|
|
387
397
|
if (rule.valueKind === 'context') {
|
|
388
398
|
autoSelectShapeErrors += 1;
|
|
@@ -401,36 +411,54 @@ export function validateKernelPathContractFacts(facts) {
|
|
|
401
411
|
);
|
|
402
412
|
}
|
|
403
413
|
}
|
|
404
|
-
const resolvedAutoSelectRules = autoSelectRules.map((rule) =>
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
: rule.
|
|
415
|
-
|
|
414
|
+
const resolvedAutoSelectRules = autoSelectRules.map((rule) => {
|
|
415
|
+
if (rule.isDefault) {
|
|
416
|
+
return {
|
|
417
|
+
match: {},
|
|
418
|
+
value: rule.valueKind === 'context'
|
|
419
|
+
? { context: rule.value }
|
|
420
|
+
: rule.value,
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
const match = {
|
|
424
|
+
allowCapabilityAutoSelection: rule.allowCapabilityAutoSelection,
|
|
425
|
+
kernelPathRef: rule.matchKernelPathRef,
|
|
426
|
+
};
|
|
427
|
+
if (rule.hasSubgroups != null) {
|
|
428
|
+
match.hasSubgroups = rule.hasSubgroups;
|
|
429
|
+
}
|
|
430
|
+
if (rule.hasF16 != null) {
|
|
431
|
+
match.hasF16 = rule.hasF16;
|
|
432
|
+
}
|
|
433
|
+
return {
|
|
434
|
+
match,
|
|
435
|
+
value: rule.valueKind === 'context'
|
|
436
|
+
? { context: rule.value }
|
|
437
|
+
: rule.value,
|
|
438
|
+
};
|
|
439
|
+
});
|
|
416
440
|
for (const entry of entries) {
|
|
417
441
|
for (const allowCapabilityAutoSelection of [true, false]) {
|
|
418
442
|
for (const hasSubgroups of [true, false]) {
|
|
419
|
-
const
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
443
|
+
for (const hasF16 of [true, false]) {
|
|
444
|
+
const selected = selectByRules(resolvedAutoSelectRules, {
|
|
445
|
+
kernelPathRef: entry.id,
|
|
446
|
+
allowCapabilityAutoSelection,
|
|
447
|
+
hasSubgroups,
|
|
448
|
+
hasF16,
|
|
449
|
+
});
|
|
450
|
+
const resolved = isPlainObject(selected) && selected.context === 'kernelPathRef'
|
|
451
|
+
? entry.id
|
|
452
|
+
: selected;
|
|
453
|
+
if (typeof resolved !== 'string' || !resolved.length || !entriesById.has(resolved)) {
|
|
454
|
+
autoSelectCoverageErrors += 1;
|
|
455
|
+
errors.push(
|
|
456
|
+
`[KernelPathContract] autoSelect rules did not yield a valid kernel path for ` +
|
|
457
|
+
`"${entry.id}" (allowCapabilityAutoSelection=${allowCapabilityAutoSelection}, ` +
|
|
458
|
+
`hasSubgroups=${hasSubgroups}, hasF16=${hasF16}).`
|
|
459
|
+
);
|
|
460
|
+
break;
|
|
461
|
+
}
|
|
434
462
|
}
|
|
435
463
|
}
|
|
436
464
|
}
|
|
@@ -134,6 +134,11 @@ export function getKernelPathStrict(): boolean;
|
|
|
134
134
|
*/
|
|
135
135
|
export function isKernelPathFusedQ4K(path?: KernelPathSchema | null): boolean;
|
|
136
136
|
|
|
137
|
+
/**
|
|
138
|
+
* Check if a kernel path requires matmul weights to stay in F32.
|
|
139
|
+
*/
|
|
140
|
+
export function kernelPathRequiresF32MatmulWeights(path?: KernelPathSchema | null): boolean;
|
|
141
|
+
|
|
137
142
|
/**
|
|
138
143
|
* Check if the active kernel path uses fused Q4K matmul.
|
|
139
144
|
*/
|