@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
|
@@ -224,6 +224,29 @@ function formatChatML(messages) {
|
|
|
224
224
|
return parts.join('');
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
+
function formatQwen(messages) {
|
|
228
|
+
// Qwen 3.5 chat format is ChatML-like, but the generation prelude includes
|
|
229
|
+
// an explicit empty thinking block before assistant output.
|
|
230
|
+
const parts = [];
|
|
231
|
+
for (const [index, message] of messages.entries()) {
|
|
232
|
+
const role = normalizeChatRole(message?.role);
|
|
233
|
+
assertSupportedChatRole(role, 'Qwen', index);
|
|
234
|
+
if (role === 'system' && index !== 0) {
|
|
235
|
+
throw new Error('Qwen template requires any system message to appear first.');
|
|
236
|
+
}
|
|
237
|
+
const content = normalizeChatMessageContent(message?.content);
|
|
238
|
+
if (role === 'system') {
|
|
239
|
+
parts.push(`<|im_start|>system\n${content}<|im_end|>\n`);
|
|
240
|
+
} else if (role === 'user') {
|
|
241
|
+
parts.push(`<|im_start|>user\n${content}<|im_end|>\n`);
|
|
242
|
+
} else if (role === 'assistant') {
|
|
243
|
+
parts.push(`<|im_start|>assistant\n${content}<|im_end|>\n`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
parts.push('<|im_start|>assistant\n<think>\n\n</think>\n\n');
|
|
247
|
+
return parts.join('');
|
|
248
|
+
}
|
|
249
|
+
|
|
227
250
|
function formatTranslateGemmaUserPrompt(content) {
|
|
228
251
|
if (!Array.isArray(content) || content.length !== 1) {
|
|
229
252
|
throw new Error(
|
|
@@ -345,7 +368,7 @@ const CHAT_FORMATTERS = {
|
|
|
345
368
|
'llama3': formatHeaderBased,
|
|
346
369
|
'gpt-oss': formatChannelBased,
|
|
347
370
|
'chatml': formatChatML,
|
|
348
|
-
'qwen':
|
|
371
|
+
'qwen': formatQwen,
|
|
349
372
|
'translategemma': formatTranslateGemma,
|
|
350
373
|
};
|
|
351
374
|
|
|
@@ -363,4 +386,5 @@ export function formatChatMessages(messages, templateType) {
|
|
|
363
386
|
export const formatGemmaChat = formatTurnBased;
|
|
364
387
|
export const formatLlama3Chat = formatHeaderBased;
|
|
365
388
|
export const formatGptOssChat = formatChannelBased;
|
|
389
|
+
export const formatQwenChat = formatQwen;
|
|
366
390
|
export const formatTranslateGemmaChat = formatTranslateGemma;
|
|
@@ -148,6 +148,10 @@ export interface ParsedModelConfig {
|
|
|
148
148
|
slidingWindow: number | null;
|
|
149
149
|
ropeTheta: number;
|
|
150
150
|
ropeLocalTheta: number | null;
|
|
151
|
+
ropeRotaryDim: number;
|
|
152
|
+
ropeInterleaved: boolean;
|
|
153
|
+
mropeSection: number[] | null;
|
|
154
|
+
partialRotaryFactor: number | null;
|
|
151
155
|
ropeScale: number;
|
|
152
156
|
ropeLocalScale: number;
|
|
153
157
|
ropeScalingType: string | null;
|
|
@@ -21,6 +21,28 @@ function assertSupportedRuntimeModelType(manifest) {
|
|
|
21
21
|
);
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
+
function resolveRotaryDim(headDim, partialRotaryFactor, modelId) {
|
|
25
|
+
if (partialRotaryFactor == null) {
|
|
26
|
+
return headDim;
|
|
27
|
+
}
|
|
28
|
+
if (typeof partialRotaryFactor !== 'number' || Number.isNaN(partialRotaryFactor)) {
|
|
29
|
+
throw new Error(`Manifest "${modelId}" has invalid rope.partialRotaryFactor.`);
|
|
30
|
+
}
|
|
31
|
+
if (partialRotaryFactor <= 0 || partialRotaryFactor > 1) {
|
|
32
|
+
throw new Error(
|
|
33
|
+
`Manifest "${modelId}" requires 0 < rope.partialRotaryFactor <= 1; got ${partialRotaryFactor}.`
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
const rotaryDim = Math.trunc(headDim * partialRotaryFactor);
|
|
37
|
+
if (rotaryDim <= 0 || (rotaryDim % 2) !== 0) {
|
|
38
|
+
throw new Error(
|
|
39
|
+
`Manifest "${modelId}" resolves rope rotary dim ${rotaryDim} from headDim=${headDim} ` +
|
|
40
|
+
`and partialRotaryFactor=${partialRotaryFactor}, but rotary dim must be a positive even integer.`
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
return rotaryDim;
|
|
44
|
+
}
|
|
45
|
+
|
|
24
46
|
export function getStopTokenIds(manifest) {
|
|
25
47
|
const eosTokenId = manifest?.eos_token_id;
|
|
26
48
|
if (Array.isArray(eosTokenId)) return eosTokenId;
|
|
@@ -112,11 +134,10 @@ function resolveIntermediateSizeForRuntime(manifest, inf, arch, modelId) {
|
|
|
112
134
|
if (inferred == null || inferred === fromArch) {
|
|
113
135
|
return fromArch;
|
|
114
136
|
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
137
|
+
throw new Error(
|
|
138
|
+
`Manifest "${modelId}" has intermediateSize=${fromArch}, but FFN tensors imply ${inferred}. ` +
|
|
139
|
+
'Re-convert the model so manifest architecture matches the weights.'
|
|
118
140
|
);
|
|
119
|
-
return inferred;
|
|
120
141
|
}
|
|
121
142
|
|
|
122
143
|
// =============================================================================
|
|
@@ -130,7 +151,14 @@ export function hasManifestInference(manifest) {
|
|
|
130
151
|
|
|
131
152
|
|
|
132
153
|
export function validateRequiredInferenceFields(inf, modelId) {
|
|
133
|
-
|
|
154
|
+
inf = inf ?? {};
|
|
155
|
+
inf.attention = inf.attention ?? {};
|
|
156
|
+
inf.normalization = inf.normalization ?? {};
|
|
157
|
+
inf.ffn = inf.ffn ?? {};
|
|
158
|
+
inf.rope = inf.rope ?? {};
|
|
159
|
+
inf.output = inf.output ?? {};
|
|
160
|
+
inf.layerPattern = inf.layerPattern ?? {};
|
|
161
|
+
inf.chatTemplate = inf.chatTemplate ?? {};
|
|
134
162
|
const errors = [];
|
|
135
163
|
|
|
136
164
|
// Attention fields - non-nullable required
|
|
@@ -201,6 +229,20 @@ export function validateRequiredInferenceFields(inf, modelId) {
|
|
|
201
229
|
if (inf.rope.ropeLocalTheta === undefined) {
|
|
202
230
|
errors.push('rope.ropeLocalTheta must be explicitly set (null for no local theta, or number)');
|
|
203
231
|
}
|
|
232
|
+
if (inf.rope.mropeInterleaved == null) {
|
|
233
|
+
errors.push('rope.mropeInterleaved is required');
|
|
234
|
+
}
|
|
235
|
+
if (inf.rope.mropeSection === undefined) {
|
|
236
|
+
errors.push('rope.mropeSection must be explicitly set (null when unused, or an array of positive integers)');
|
|
237
|
+
}
|
|
238
|
+
if (inf.rope.partialRotaryFactor === undefined) {
|
|
239
|
+
errors.push('rope.partialRotaryFactor must be explicitly set (null when unused, or a number in (0, 1])');
|
|
240
|
+
} else {
|
|
241
|
+
const factor = inf.rope.partialRotaryFactor;
|
|
242
|
+
if (factor !== null && (typeof factor !== 'number' || Number.isNaN(factor) || factor <= 0 || factor > 1)) {
|
|
243
|
+
errors.push('rope.partialRotaryFactor must be a number in (0, 1] or null');
|
|
244
|
+
}
|
|
245
|
+
}
|
|
204
246
|
|
|
205
247
|
// Output fields - non-nullable required
|
|
206
248
|
if (inf.output.tieWordEmbeddings == null) {
|
|
@@ -458,6 +500,26 @@ export function toParsedConfigFromMerged(merged, manifest) {
|
|
|
458
500
|
const ropeScalingType = inf.rope.ropeScalingType;
|
|
459
501
|
const ropeLocalScale = inf.rope.ropeLocalScalingFactor ?? ropeScale;
|
|
460
502
|
const ropeLocalScalingType = inf.rope.ropeLocalScalingType ?? ropeScalingType;
|
|
503
|
+
const partialRotaryFactor = inf.rope.partialRotaryFactor;
|
|
504
|
+
const ropeInterleaved = inf.rope.mropeInterleaved === true;
|
|
505
|
+
const mropeSection = Array.isArray(inf.rope.mropeSection)
|
|
506
|
+
? inf.rope.mropeSection.map((entry) => Math.trunc(Number(entry)))
|
|
507
|
+
: null;
|
|
508
|
+
const ropeRotaryDim = resolveRotaryDim(arch.headDim, partialRotaryFactor, merged.modelId);
|
|
509
|
+
if (mropeSection && mropeSection.some((entry) => !Number.isFinite(entry) || entry <= 0)) {
|
|
510
|
+
throw new Error(
|
|
511
|
+
`Manifest "${merged.modelId}" has invalid rope.mropeSection; expected positive integers.`
|
|
512
|
+
);
|
|
513
|
+
}
|
|
514
|
+
if (ropeInterleaved && mropeSection) {
|
|
515
|
+
const doubledMropeDim = mropeSection.reduce((sum, entry) => sum + entry, 0) * 2;
|
|
516
|
+
if (doubledMropeDim !== ropeRotaryDim) {
|
|
517
|
+
throw new Error(
|
|
518
|
+
`Manifest "${merged.modelId}" declares rope.mropeSection=${JSON.stringify(mropeSection)}, ` +
|
|
519
|
+
`which expands to rotary dim ${doubledMropeDim}, but the resolved rotary dim is ${ropeRotaryDim}.`
|
|
520
|
+
);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
461
523
|
|
|
462
524
|
// Build ropeScaling object from manifest values if scaling is enabled
|
|
463
525
|
// Include YARN params when present
|
|
@@ -532,6 +594,10 @@ export function toParsedConfigFromMerged(merged, manifest) {
|
|
|
532
594
|
slidingWindow: inf.attention.slidingWindow,
|
|
533
595
|
ropeTheta: inf.rope.ropeTheta,
|
|
534
596
|
ropeLocalTheta: inf.rope.ropeLocalTheta,
|
|
597
|
+
ropeRotaryDim,
|
|
598
|
+
ropeInterleaved,
|
|
599
|
+
mropeSection,
|
|
600
|
+
partialRotaryFactor,
|
|
535
601
|
ropeScale,
|
|
536
602
|
ropeLocalScale,
|
|
537
603
|
ropeScalingType,
|
|
@@ -319,14 +319,8 @@ export async function embed(tokenIds, embedBuffer, config) {
|
|
|
319
319
|
const firstTokenId = tokenIdArray[0];
|
|
320
320
|
const bytesPerElement = useF16 ? 2 : 4;
|
|
321
321
|
const sampleSize = Math.min(32 * bytesPerElement, hiddenSize * bytesPerElement);
|
|
322
|
-
const
|
|
323
|
-
const
|
|
324
|
-
enc.copyBufferToBuffer(gatherOutput.buffer, 0, staging, 0, sampleSize);
|
|
325
|
-
device.queue.submit([enc.finish()]);
|
|
326
|
-
await staging.mapAsync(GPUMapMode.READ);
|
|
327
|
-
const data = decodeReadback(staging.getMappedRange().slice(0), gatherOptions.outputDtype);
|
|
328
|
-
staging.unmap();
|
|
329
|
-
staging.destroy();
|
|
322
|
+
const readback = await readBuffer(gatherOutput.buffer, sampleSize);
|
|
323
|
+
const data = decodeReadback(readback, gatherOptions.outputDtype);
|
|
330
324
|
|
|
331
325
|
// Compute statistics
|
|
332
326
|
let sum = 0, sumSq = 0;
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { log } from '../../../debug/index.js';
|
|
2
1
|
import { resolveKernelPath } from '../../../config/kernel-path-loader.js';
|
|
3
2
|
import { selectRuleValue } from '../../../rules/rule-registry.js';
|
|
4
3
|
import {
|
|
@@ -9,19 +8,36 @@ import {
|
|
|
9
8
|
export const PRIMARY_EXECUTION_PLAN_ID = 'primary';
|
|
10
9
|
export const FINITENESS_FALLBACK_EXECUTION_PLAN_ID = 'finiteness_fallback';
|
|
11
10
|
|
|
12
|
-
function
|
|
13
|
-
if (
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
11
|
+
function assertOptionalBoolean(value, label) {
|
|
12
|
+
if (value === undefined) {
|
|
13
|
+
return undefined;
|
|
14
|
+
}
|
|
15
|
+
if (typeof value !== 'boolean') {
|
|
16
|
+
throw new Error(`[ExecutionPlan] ${label} must be boolean when provided; got ${JSON.stringify(value)}.`);
|
|
17
|
+
}
|
|
18
|
+
return value;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function assertOptionalPositiveInt(value, label) {
|
|
22
|
+
if (value === undefined) {
|
|
23
|
+
return undefined;
|
|
24
|
+
}
|
|
25
|
+
if (!Number.isInteger(value) || value < 1) {
|
|
26
|
+
throw new Error(`[ExecutionPlan] ${label} must be a positive integer when provided; got ${JSON.stringify(value)}.`);
|
|
27
|
+
}
|
|
28
|
+
return value;
|
|
18
29
|
}
|
|
19
30
|
|
|
20
|
-
function
|
|
21
|
-
if (value ===
|
|
22
|
-
return
|
|
31
|
+
function assertOptionalStopCheckMode(value) {
|
|
32
|
+
if (value === undefined) {
|
|
33
|
+
return undefined;
|
|
23
34
|
}
|
|
24
|
-
|
|
35
|
+
if (value !== 'batch' && value !== 'per-token') {
|
|
36
|
+
throw new Error(
|
|
37
|
+
`[ExecutionPlan] stopCheckMode must be "batch" or "per-token" when provided; got ${JSON.stringify(value)}.`
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
return value;
|
|
25
41
|
}
|
|
26
42
|
|
|
27
43
|
function resolveFallbackActivationDtype(primaryActivationDtype) {
|
|
@@ -42,56 +58,48 @@ function resolveFallbackActivationDtype(primaryActivationDtype) {
|
|
|
42
58
|
function resolveFallbackKernelPath(primaryKernelPath) {
|
|
43
59
|
const primaryKernelPathId = primaryKernelPath?.id ?? null;
|
|
44
60
|
if (!primaryKernelPathId) {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
};
|
|
61
|
+
throw new Error(
|
|
62
|
+
'[ExecutionPlan] F16 finiteness fallback requires a primary kernel path with a stable id. ' +
|
|
63
|
+
'Add a registered kernelPath id and a finiteness fallback rule.'
|
|
64
|
+
);
|
|
50
65
|
}
|
|
51
66
|
|
|
52
|
-
const
|
|
67
|
+
const explicitFallbackKernelPathId = typeof primaryKernelPath?.finitenessFallbackKernelPathId === 'string'
|
|
68
|
+
&& primaryKernelPath.finitenessFallbackKernelPathId.length > 0
|
|
69
|
+
? primaryKernelPath.finitenessFallbackKernelPathId
|
|
70
|
+
: null;
|
|
53
71
|
|
|
54
|
-
const fallbackKernelPathId = selectRuleValue(
|
|
72
|
+
const fallbackKernelPathId = explicitFallbackKernelPathId ?? selectRuleValue(
|
|
55
73
|
'inference',
|
|
56
74
|
'kernelPath',
|
|
57
75
|
'finitenessFallback',
|
|
58
76
|
{ kernelPathId: primaryKernelPathId }
|
|
59
77
|
);
|
|
60
78
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
79
|
+
if (typeof fallbackKernelPathId !== 'string' || fallbackKernelPathId.length === 0) {
|
|
80
|
+
throw new Error(
|
|
81
|
+
`[ExecutionPlan] Missing finiteness fallback kernel path mapping for "${primaryKernelPathId}". ` +
|
|
82
|
+
'Add an explicit rule in src/rules/inference/kernel-path.rules.json.'
|
|
83
|
+
);
|
|
84
|
+
}
|
|
65
85
|
|
|
66
|
-
if (
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
`
|
|
86
|
+
if (fallbackKernelPathId === primaryKernelPathId) {
|
|
87
|
+
throw new Error(
|
|
88
|
+
`[ExecutionPlan] Invalid finiteness fallback mapping for "${primaryKernelPathId}": ` +
|
|
89
|
+
`fallback kernel path resolves to itself. Add an explicit widening path.`
|
|
70
90
|
);
|
|
71
91
|
}
|
|
72
92
|
|
|
73
93
|
try {
|
|
74
|
-
const kernelPath = resolveKernelPath(
|
|
94
|
+
const kernelPath = resolveKernelPath(fallbackKernelPathId);
|
|
75
95
|
return {
|
|
76
96
|
kernelPath,
|
|
77
|
-
kernelPathId:
|
|
78
|
-
kernelPathSource,
|
|
97
|
+
kernelPathId: fallbackKernelPathId,
|
|
98
|
+
kernelPathSource: 'rule',
|
|
79
99
|
};
|
|
80
100
|
} catch (error) {
|
|
81
|
-
if (primaryKernelPathIsObject) {
|
|
82
|
-
log.warn(
|
|
83
|
-
'Pipeline',
|
|
84
|
-
`[ExecutionPlan] Failed to resolve finiteness fallback kernel path "${resolvedKernelPathId}" ` +
|
|
85
|
-
`for "${primaryKernelPathId}", using inline kernel path as fallback. ${error?.message || error}`
|
|
86
|
-
);
|
|
87
|
-
return {
|
|
88
|
-
kernelPath: primaryKernelPath,
|
|
89
|
-
kernelPathId: primaryKernelPathId,
|
|
90
|
-
kernelPathSource,
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
101
|
throw new Error(
|
|
94
|
-
`[ExecutionPlan] Failed to resolve finiteness fallback kernel path "${
|
|
102
|
+
`[ExecutionPlan] Failed to resolve finiteness fallback kernel path "${fallbackKernelPathId}" ` +
|
|
95
103
|
`(from "${primaryKernelPathId}"): ${error?.message || error}`
|
|
96
104
|
);
|
|
97
105
|
}
|
|
@@ -252,11 +260,17 @@ export function activateFallbackExecutionPlan(container) {
|
|
|
252
260
|
|
|
253
261
|
function resolveExecutionOverrides(options = {}) {
|
|
254
262
|
return {
|
|
255
|
-
disableCommandBatching:
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
263
|
+
disableCommandBatching: assertOptionalBoolean(
|
|
264
|
+
options.disableCommandBatching,
|
|
265
|
+
'disableCommandBatching'
|
|
266
|
+
),
|
|
267
|
+
disableMultiTokenDecode: assertOptionalBoolean(
|
|
268
|
+
options.disableMultiTokenDecode,
|
|
269
|
+
'disableMultiTokenDecode'
|
|
270
|
+
),
|
|
271
|
+
batchSize: assertOptionalPositiveInt(options.batchSize, 'batchSize'),
|
|
272
|
+
stopCheckMode: assertOptionalStopCheckMode(options.stopCheckMode),
|
|
273
|
+
maxTokens: assertOptionalPositiveInt(options.maxTokens, 'maxTokens'),
|
|
260
274
|
};
|
|
261
275
|
}
|
|
262
276
|
|
|
@@ -276,9 +290,9 @@ export function resolveExecutionSessionPlan(container, options = {}) {
|
|
|
276
290
|
deferredRoundingWindowTokens: activePlan.deferredRoundingWindowTokens,
|
|
277
291
|
disableCommandBatching: overrides.disableCommandBatching ?? activePlan.defaultDisableCommandBatching,
|
|
278
292
|
disableMultiTokenDecode: overrides.disableMultiTokenDecode ?? activePlan.defaultDisableMultiTokenDecode,
|
|
279
|
-
batchSize:
|
|
280
|
-
stopCheckMode:
|
|
281
|
-
maxTokens:
|
|
293
|
+
batchSize: overrides.batchSize ?? activePlan.defaultBatchSize,
|
|
294
|
+
stopCheckMode: overrides.stopCheckMode ?? activePlan.defaultStopCheckMode,
|
|
295
|
+
maxTokens: overrides.maxTokens ?? activePlan.defaultMaxTokens,
|
|
282
296
|
readbackInterval: activePlan.readbackInterval,
|
|
283
297
|
ringTokens: activePlan.ringTokens,
|
|
284
298
|
ringStop: activePlan.ringStop,
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
export declare function cloneJson<T>(value: T): T;
|
|
2
|
+
export declare function validateManifestSessionDefaultsContract(manifestInference: Record<string, unknown> | null): void;
|
|
3
|
+
export declare function isPhaseMatch(phase: string, targetPhase: string): boolean;
|
|
4
|
+
export declare function stepHasLayer(step: Record<string, unknown>, layerIdx: number): boolean;
|
|
5
|
+
export declare function normalizePhase(value: unknown, label: string): string;
|
|
6
|
+
export declare function normalizeSection(value: unknown, label: string): string;
|
|
7
|
+
export declare function normalizeSlot(value: unknown, label: string): string;
|
|
8
|
+
export declare function createSourceTrace(): { session: Record<string, unknown>; steps: Record<string, unknown> };
|
|
9
|
+
export declare function setSourceTrace(trace: Record<string, unknown>, path: string, source: string): void;
|
|
10
|
+
export declare function collectLeafPaths(value: unknown, prefix?: string[], out?: string[][]): string[][];
|
|
11
|
+
export declare function hasDefinedPath(root: unknown, pathSegments: string[]): boolean;
|
|
12
|
+
export declare function validateStepShape(step: Record<string, unknown>, index: number): void;
|
|
13
|
+
export declare function assertExecutionRuntimeOverlay(runtimeInference: Record<string, unknown> | null | undefined): void;
|
|
14
|
+
export declare function validateUniqueStepIds(steps: Array<Record<string, unknown>>): void;
|
|
15
|
+
export declare function hasExecutionV0(manifestInference: Record<string, unknown> | null | undefined): boolean;
|
|
16
|
+
export declare function assertExecutionV0Schema(manifestInference: Record<string, unknown> | null | undefined): void;
|
|
17
|
+
export declare function applyExecutionPatchAtomic(
|
|
18
|
+
baseSteps: Array<Record<string, unknown>>,
|
|
19
|
+
patch: Record<string, unknown> | null | undefined
|
|
20
|
+
): Array<Record<string, unknown>>;
|
|
21
|
+
export declare function indexRuntimePatchMeta(
|
|
22
|
+
patch: Record<string, unknown> | null | undefined
|
|
23
|
+
): {
|
|
24
|
+
addedSteps: Set<string>;
|
|
25
|
+
precisionFieldsByStep: Map<string, Set<string>>;
|
|
26
|
+
kvIOFieldsByStep: Set<string>;
|
|
27
|
+
};
|
|
28
|
+
export declare function requireSessionActivationDtype(
|
|
29
|
+
sessionDefaults: Record<string, unknown> | null | undefined,
|
|
30
|
+
label?: string
|
|
31
|
+
): string;
|
|
32
|
+
export declare function createInitialSlotDtypes(sessionDefaults: Record<string, unknown>): Map<string, string>;
|
|
33
|
+
export declare function resolvePhaseSteps(
|
|
34
|
+
phase: string,
|
|
35
|
+
steps: Array<Record<string, unknown>>,
|
|
36
|
+
sessionDefaults: Record<string, unknown>,
|
|
37
|
+
profileIndex: Map<string, unknown>,
|
|
38
|
+
policies: Record<string, unknown>,
|
|
39
|
+
options?: Record<string, unknown>
|
|
40
|
+
): {
|
|
41
|
+
steps: Array<Record<string, unknown>>;
|
|
42
|
+
finalSlotDtypes: Map<string, string>;
|
|
43
|
+
};
|
|
44
|
+
export declare function normalizeRuntimeSessionForExecutionV0(
|
|
45
|
+
runtimeSession: Record<string, unknown> | null | undefined,
|
|
46
|
+
manifestInference: Record<string, unknown> | null | undefined,
|
|
47
|
+
defaultComputeDefaults: Record<string, unknown>
|
|
48
|
+
): Record<string, unknown> | null | undefined;
|
|
49
|
+
export declare function validatePhaseBoundaryCompatibility(options: Record<string, unknown>): void;
|
|
50
|
+
export declare function assertKVLayoutExecutionCompatibility(
|
|
51
|
+
steps: Array<Record<string, unknown>>,
|
|
52
|
+
sessionDefaults: Record<string, unknown>
|
|
53
|
+
): void;
|
|
54
|
+
export declare const buildKernelProfileKey: (
|
|
55
|
+
kernelRef: Record<string, unknown> | null | undefined,
|
|
56
|
+
step?: Record<string, unknown> | null | undefined
|
|
57
|
+
) => string;
|
|
58
|
+
export declare const indexKernelProfiles: (sessionDefaults: Record<string, unknown>) => Map<string, unknown>;
|
|
59
|
+
export declare const normalizeDtype: (value: unknown, label: string) => string;
|