@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
|
@@ -49,16 +49,16 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
49
49
|
"backward/upsample2d_backward.wgsl#main": "3f3a27fc880d3c4cba49889cafb4be66d30132cfeec9c2105751cc12a5a44ae3",
|
|
50
50
|
"bf16_to_f16.wgsl#main": "91c009d291a205fc42e7b45942e58630442eb67f8cb4bacc2e54160fa5f25c35",
|
|
51
51
|
"bf16_to_f32.wgsl#main": "2c5c08476d40aeb64287b8b31d1ef088c63e8bb4568bf7b7c5faf4a4ed493576",
|
|
52
|
-
"bias_add_f16.wgsl#main": "
|
|
53
|
-
"bias_add.wgsl#main": "
|
|
52
|
+
"bias_add_f16.wgsl#main": "f4c5b6e6495bcaae4582c3eb11c7fe39466a9625bbe57eede116c24db71682f6",
|
|
53
|
+
"bias_add.wgsl#main": "e7d3b4a5f9efc8b0569c56bcdeb63e921fa6d1d006901deabc734cab40346715",
|
|
54
54
|
"cast_f16_to_f32.wgsl#main": "98a0b31fcba2bad945e9f8522d643ae75135bfdf6b39a824565f57d5d4fd965d",
|
|
55
55
|
"cast_f32_to_f16.wgsl#main": "0b669e7812c09a2f44e219548e11ca0dfd8af921d30017e2a5c79d54f792542f",
|
|
56
56
|
"clamp.wgsl#main": "9d8039a590d102133636d67de8ae76ca8e9129bf220c48f7a08e9d82fcc48bad",
|
|
57
|
-
"conv2d_f16.wgsl#main": "
|
|
58
|
-
"conv2d.wgsl#main": "
|
|
57
|
+
"conv2d_f16.wgsl#main": "1e26a7e45e434fc45bdcd1913b0b97d2ed7e80b3c0bb1beae3d762ee457841c7",
|
|
58
|
+
"conv2d.wgsl#main": "140ed68bf1554ebfadbad7bcf1dfdff7f95aff18588459205f14b05cb432eee2",
|
|
59
59
|
"cross_entropy_loss.wgsl#main": "5a48087bdec94184432c90ce5b345e1eadbdfcb13b9793ecee8052bc7392239c",
|
|
60
|
-
"depthwise_conv2d_f16.wgsl#main": "
|
|
61
|
-
"depthwise_conv2d.wgsl#main": "
|
|
60
|
+
"depthwise_conv2d_f16.wgsl#main": "f7f093a7e6623ed17a675bac729149e94718aece916416966eaf03c1d6939f2a",
|
|
61
|
+
"depthwise_conv2d.wgsl#main": "cf14cb40d282ad4d4fab160109b97eaeaf12aab62579b73324ac485ac75155b0",
|
|
62
62
|
"dequant_f16_out_vec4.wgsl#main_vec4": "61c20e6c71c1c8421b4ec202dbd26292a6300587bd44c314f2a6c6d9d9442c3a",
|
|
63
63
|
"dequant_f16_out.wgsl#main": "94d61843d56f9a3bbc6b7c2b95dc6ecbba3f6a262b2c4086a076f69a8c38ccae",
|
|
64
64
|
"dequant_f16_rowwise.wgsl#main": "f5bf7cef950b52d65cee6121dbaa176244d3221045b3b6386b3be47f23ce17dc",
|
|
@@ -118,8 +118,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
118
118
|
"gelu.wgsl#main": "a9007ea08aaff98f9be08f1e0490a6bcf252883eac5513de876ab9ce918865e6",
|
|
119
119
|
"gptoss_mxfp4_expert_fused.wgsl#main_expert": "3159e8cd81da13f909cf905e6d35307fefe1dcbbdf1b2b8e8ff0ce923bd71180",
|
|
120
120
|
"gptoss_router_topk.wgsl#softmax_topk": "86e4ea709c0c0084d09c6a4cd07710dc14f380e03f91b8ed9ec871b310be49f1",
|
|
121
|
-
"grouped_pointwise_conv2d_f16.wgsl#main": "
|
|
122
|
-
"grouped_pointwise_conv2d.wgsl#main": "
|
|
121
|
+
"grouped_pointwise_conv2d_f16.wgsl#main": "578a0fcb3362864feb0de0e800b2df49e66e42db4360a08189e5f815ead944c4",
|
|
122
|
+
"grouped_pointwise_conv2d.wgsl#main": "9cf77937b10dd96f3aedc1793183ef2fff05d075fac3884fad5cd5869d0d9181",
|
|
123
123
|
"groupnorm_apply_f16.wgsl#main": "cfd850b87944ac1c03ba7bd98136db556dadd8a70611e351d82d297299a7cd02",
|
|
124
124
|
"groupnorm_apply.wgsl#main": "b09b8f2f57dcdfa1a0366daa30d3910feb134204652c711d2ba564e566b5a334",
|
|
125
125
|
"groupnorm_stats_f16.wgsl#main": "fb76f78ce668ea8459110335698fe4b09a2425fc71deed3bab67efd7641c3199",
|
|
@@ -155,33 +155,33 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
155
155
|
"moe_gather.wgsl#gather_single_pass": "8dbd0c38a323ba6c73af3ecaad297d79d861b817152e6e2c12fcd9db6f767f1e",
|
|
156
156
|
"moe_gather.wgsl#gather_tokens": "58e40a63a39c2f99c47c75dee71767f98482217afd74607cee1b7efc2c85738b",
|
|
157
157
|
"moe_offsets.wgsl#build_offsets": "3ea004145fa234659408cdeb0d4d802adff1037c9c5c03af146b3734cc69dd27",
|
|
158
|
-
"pixel_shuffle_f16.wgsl#main": "
|
|
159
|
-
"pixel_shuffle.wgsl#main": "
|
|
160
|
-
"relu_f16.wgsl#main": "
|
|
161
|
-
"relu.wgsl#main": "
|
|
162
|
-
"repeat_channels_f16.wgsl#main": "
|
|
163
|
-
"repeat_channels.wgsl#main": "
|
|
164
|
-
"residual_f16_vec4.wgsl#add_vec4": "
|
|
165
|
-
"residual_f16.wgsl#main": "
|
|
166
|
-
"residual_vec4.wgsl#add_vec4": "
|
|
167
|
-
"residual.wgsl#main": "
|
|
168
|
-
"rmsnorm_f16.wgsl#main": "
|
|
169
|
-
"rmsnorm_f16.wgsl#rmsnorm_small_f16": "
|
|
170
|
-
"rmsnorm.wgsl#main": "
|
|
171
|
-
"rmsnorm.wgsl#main_cached": "
|
|
172
|
-
"rmsnorm.wgsl#main_small": "
|
|
173
|
-
"rmsnorm.wgsl#main_small_subgroup": "
|
|
174
|
-
"rmsnorm.wgsl#main_subgroup": "
|
|
158
|
+
"pixel_shuffle_f16.wgsl#main": "1d1202cdaa8f7a94e015c5e2212bc98881d00d548a12fe7a8e91c4e17f2ba723",
|
|
159
|
+
"pixel_shuffle.wgsl#main": "91c017f0642132229768a2be6c8d47ad26d486f5d40e7dbf9e2349c847e527b8",
|
|
160
|
+
"relu_f16.wgsl#main": "baac3a33693e5a72e60c7ec9b4a4dbcec10a83ca1fac2972988b07a75780f14a",
|
|
161
|
+
"relu.wgsl#main": "52ffa66f9898a4f291758ae93055000cb1eb488c22a993e6b0568a29d9d3ad28",
|
|
162
|
+
"repeat_channels_f16.wgsl#main": "6eee5f9f74d8c17a71f6f422b503bbfb872350a9486ec0065fd1f67718311883",
|
|
163
|
+
"repeat_channels.wgsl#main": "bce0cb33beed50ce388b2d069961ae0fbe361dd4930b9344cb066b3390475a51",
|
|
164
|
+
"residual_f16_vec4.wgsl#add_vec4": "bf8cd304a1d4c5500143b0bc52d0236a7e8bbc4cc2d51d54ef0d4fce209f503b",
|
|
165
|
+
"residual_f16.wgsl#main": "3ca5c562fb5baf495e31e662f85fb26863f8f6d4ad29bb119c484e2ffdee7791",
|
|
166
|
+
"residual_vec4.wgsl#add_vec4": "f2f30d2dec9d90062bf5fce1f141951e8e6b54f56698b87ffb9bf6662c8acce2",
|
|
167
|
+
"residual.wgsl#main": "f1abd88c959c5d8dd27b9353d487e37b2a96850ed9d90c365212e260399cc2a7",
|
|
168
|
+
"rmsnorm_f16.wgsl#main": "7ae20c01b0453306504f777c4a8de37364a8b45bef3c569b0572c7863740a6bd",
|
|
169
|
+
"rmsnorm_f16.wgsl#rmsnorm_small_f16": "e9ce0a7427831e4d1280691eb9ca0daab55d917d4f0d9975d4bcd7e8fe960941",
|
|
170
|
+
"rmsnorm.wgsl#main": "f516b3e4bde2015f2a207c3ca5b8c9820c7809fa8f8d0786f90c568e0f1ac077",
|
|
171
|
+
"rmsnorm.wgsl#main_cached": "bcae48e93d63e11701386850559fec5d4924128ad9d6ac1de27d1b8c34fc3dff",
|
|
172
|
+
"rmsnorm.wgsl#main_small": "95c65dcb443717c821c44de87dc89cdd4f6da97e08cccf8a9526f5cdd19dd33d",
|
|
173
|
+
"rmsnorm.wgsl#main_small_subgroup": "8ff72e445b662b1820be25a594fb0558007bfca7e50d2d1bc915df5774a76f6a",
|
|
174
|
+
"rmsnorm.wgsl#main_subgroup": "0aac7dd6455bf8f3d11917ec87fa71bb5fa9ef4f8e5bba02dfbfd11b92ccd01f",
|
|
175
175
|
"rope_f16.wgsl#main": "a87f2964b77e851a2fbcc88305adeecaf8eba372291d83a71b817c8ef3da5c58",
|
|
176
176
|
"rope_f16.wgsl#rope_compute_freqs": "c7aa2cb50420ab2709b20e0a33e93ab6aa4f50d2fa8d9f79b0bfcfefb2f7abab",
|
|
177
177
|
"rope_f16.wgsl#rope_ntk_scaled": "46d2574b46539b289371c0c37a0b4e2fb21279134126b36a1fc30b98523905dd",
|
|
178
178
|
"rope_f16.wgsl#rope_qk": "3bc01e167dc3cc5397bd7751e493311b4d3f5c6c0f6fad30234a740bb4c4507b",
|
|
179
179
|
"rope_f16.wgsl#rope_yarn": "9b788dd05a1598aadcba5b0218d1666ce75faadbe32b71ee1def65ec23fb7dfe",
|
|
180
|
-
"rope.wgsl#main": "
|
|
181
|
-
"rope.wgsl#rope_compute_freqs": "
|
|
182
|
-
"rope.wgsl#rope_ntk_scaled": "
|
|
183
|
-
"rope.wgsl#rope_qk": "
|
|
184
|
-
"rope.wgsl#rope_yarn": "
|
|
180
|
+
"rope.wgsl#main": "4c803ad5e0dd065d5572c7aecc1def277c43884dcc02f22a9676914c10111400",
|
|
181
|
+
"rope.wgsl#rope_compute_freqs": "c9338316a31c8d467acbf8d512cb9616ee902d2619fa9187639f8ff5d78414ac",
|
|
182
|
+
"rope.wgsl#rope_ntk_scaled": "818f89865a3d1d6f2d49f671ac882d0fde9709702160a1ae8d9a8ef113afb511",
|
|
183
|
+
"rope.wgsl#rope_qk": "3d773c8b8c400142edc8a4111afb04a2bf75bdb109b2d41cbe5afdb72a959772",
|
|
184
|
+
"rope.wgsl#rope_yarn": "cb00e1cf87fac198dcf0fb0d4e2d5f6f99d2fed6dff0a089a96bb459917851d2",
|
|
185
185
|
"sample_f16.wgsl#argmax": "30b9f199b49352e5aff91b7aa8016edb423ce33f77481c3a7bc184251856fb27",
|
|
186
186
|
"sample_f16.wgsl#argmax_reduce": "a3ca27fc50b10c36c1676bdd5dbfe5edc67850cdd5c1af7a1d3ad70f830dd8a7",
|
|
187
187
|
"sample_f16.wgsl#find_topk_phase1": "24e47e5ced28af802959e350ff0a6eec6b9a26f89fb38e222990eeaffb16bd36",
|
|
@@ -194,10 +194,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
194
194
|
"sample.wgsl#find_topk_phase2": "940b216e605d22096da5aca65950a8030866fc5a39e7fdf484d69a832de1b63a",
|
|
195
195
|
"sample.wgsl#sample_single_pass": "4412357e84113ee2f1bc0dc8bf89e314c2ab482c89c14ca016ea9949d16a9d0c",
|
|
196
196
|
"sample.wgsl#softmax_and_sample": "7172c60e76430fbe130e530e3564b569b45eccf193987b32d6f52bd6bbcc9f08",
|
|
197
|
-
"sana_linear_attention_apply_f16.wgsl#main": "
|
|
198
|
-
"sana_linear_attention_apply.wgsl#main": "
|
|
199
|
-
"sana_linear_attention_summary_f16.wgsl#main": "
|
|
200
|
-
"sana_linear_attention_summary.wgsl#main": "
|
|
197
|
+
"sana_linear_attention_apply_f16.wgsl#main": "4a7426ce67eccfb70956feeae84275f4d3cc586c50e8442c07eb69993b378ab5",
|
|
198
|
+
"sana_linear_attention_apply.wgsl#main": "5f69e0bc1d9e2df5a61e13bd819313c8f7ff5dfc4b7d78e71d5152dc23b6a86c",
|
|
199
|
+
"sana_linear_attention_summary_f16.wgsl#main": "3abb736ead999485b5dac9c6b534143b464cfd0b5300c5e03c56cec03c8fa48e",
|
|
200
|
+
"sana_linear_attention_summary.wgsl#main": "be9c1fe861dcb5ea46927749764267656a69160bc8b732c6eb1a1bcb0c075589",
|
|
201
201
|
"scale.wgsl#main": "44ec481452b586307957163e3d65c9d02561d3f2f3db633f906f5488b1ea1ca4",
|
|
202
202
|
"scale.wgsl#main_inplace": "020824c7118a59c461ce81f1c2cd01b7c2a3f1aab326392b7d48d4448a0c2ed1",
|
|
203
203
|
"scatter_add_dynamic_f16_weights.wgsl#scatter_add_dynamic": "42799e745bc445b199b1cbc384bc12bb9372ed1599af3260a803cefc8dd35497",
|
|
@@ -206,8 +206,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
206
206
|
"scatter_add_vec4.wgsl#scatter_add_vec4": "247c4f23129cdfbb19593b17c5833d85048da117d77141c74bc4e16e691d94e1",
|
|
207
207
|
"scatter_add.wgsl#main": "dea947b8014e9b674e4fec8f15fac6c926e8a3a4d8eff104b953d77f35a1ac35",
|
|
208
208
|
"scatter_add.wgsl#scatter_add_accumulate": "561800af22dedae63f1abe69b757b0ef6c7832a2bff228c2262e0b7111d89247",
|
|
209
|
-
"silu_f16.wgsl#main": "
|
|
210
|
-
"silu.wgsl#main": "
|
|
209
|
+
"silu_f16.wgsl#main": "867634b20dcb75969e001966836892a2b7e01782b0028d94779c6ec21c254ae0",
|
|
210
|
+
"silu.wgsl#main": "7b52d30fb741beef2dbf728e0c4ecffe5b08d9661d63c306caecb4cb3ced85e5",
|
|
211
211
|
"softmax_subgroup.wgsl#main_subgroup": "88472c0dab5f81c5f045f0ee79c4c3bb484791a4a2b84af398c019851438f091",
|
|
212
212
|
"softmax_subgroup.wgsl#softmax_small_subgroup": "5d7bd1b698910a437197bf6c8b7f8b259036dd006ad5470f767b539dba8538f8",
|
|
213
213
|
"softmax.wgsl#main": "45c5876806b442222d7e190e595f55a0079bae82e07d37586996c1a63790bb7a",
|
|
@@ -220,7 +220,7 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
220
220
|
"topk.wgsl#main": "a18763303cd18e8a020e647f8a52f65403526849faf835d9f9394f634c3c97eb",
|
|
221
221
|
"topk.wgsl#softmax_topk": "95ff3517da909e4bd4d0ff8d85b619bd250522943aeb9276375edc59f67e9604",
|
|
222
222
|
"topk.wgsl#topk_2_small": "289eaa5c4f005e0aaf37dfe5343aeda30d9ab3929979dbf0cc3553f23e136807",
|
|
223
|
-
"transpose.wgsl#main": "
|
|
224
|
-
"upsample2d_f16.wgsl#main": "
|
|
225
|
-
"upsample2d.wgsl#main": "
|
|
223
|
+
"transpose.wgsl#main": "8caf8664dfc579b4e92edce50783263c535764006290cc7902108f26586113a2",
|
|
224
|
+
"upsample2d_f16.wgsl#main": "43cee5f2503cb4b6caea45e9842f8961ce313b02eb8ed23a97d6967113ce521c",
|
|
225
|
+
"upsample2d.wgsl#main": "6de9172ad3d6940dd3c94470a105755a33760e66a84d6e9e96ec4d6a07dc4a25"
|
|
226
226
|
});
|
|
@@ -4,6 +4,17 @@ let cachedRegistry = null;
|
|
|
4
4
|
|
|
5
5
|
let registryUrl = null;
|
|
6
6
|
|
|
7
|
+
function deepFreeze(value, seen = new WeakSet()) {
|
|
8
|
+
if (!value || typeof value !== 'object' || seen.has(value)) {
|
|
9
|
+
return value;
|
|
10
|
+
}
|
|
11
|
+
seen.add(value);
|
|
12
|
+
for (const entry of Object.values(value)) {
|
|
13
|
+
deepFreeze(entry, seen);
|
|
14
|
+
}
|
|
15
|
+
return Object.freeze(value);
|
|
16
|
+
}
|
|
17
|
+
|
|
7
18
|
export function setRegistryUrl(url) {
|
|
8
19
|
registryUrl = url;
|
|
9
20
|
cachedRegistry = null;
|
|
@@ -15,7 +26,9 @@ export async function getRegistry() {
|
|
|
15
26
|
}
|
|
16
27
|
|
|
17
28
|
const source = registryUrl || './registry.json';
|
|
18
|
-
cachedRegistry =
|
|
29
|
+
cachedRegistry = deepFreeze(
|
|
30
|
+
await loadJson(source, import.meta.url, 'Failed to load kernel registry')
|
|
31
|
+
);
|
|
19
32
|
return cachedRegistry;
|
|
20
33
|
}
|
|
21
34
|
|
|
@@ -1826,7 +1826,7 @@
|
|
|
1826
1826
|
}
|
|
1827
1827
|
],
|
|
1828
1828
|
"baseUniforms": {
|
|
1829
|
-
"size":
|
|
1829
|
+
"size": 32,
|
|
1830
1830
|
"fields": [
|
|
1831
1831
|
{
|
|
1832
1832
|
"name": "hidden_size",
|
|
@@ -1847,6 +1847,26 @@
|
|
|
1847
1847
|
"name": "has_residual",
|
|
1848
1848
|
"type": "u32",
|
|
1849
1849
|
"offset": 12
|
|
1850
|
+
},
|
|
1851
|
+
{
|
|
1852
|
+
"name": "token_stride",
|
|
1853
|
+
"type": "u32",
|
|
1854
|
+
"offset": 16
|
|
1855
|
+
},
|
|
1856
|
+
{
|
|
1857
|
+
"name": "_pad0",
|
|
1858
|
+
"type": "u32",
|
|
1859
|
+
"offset": 20
|
|
1860
|
+
},
|
|
1861
|
+
{
|
|
1862
|
+
"name": "_pad1",
|
|
1863
|
+
"type": "u32",
|
|
1864
|
+
"offset": 24
|
|
1865
|
+
},
|
|
1866
|
+
{
|
|
1867
|
+
"name": "_pad2",
|
|
1868
|
+
"type": "u32",
|
|
1869
|
+
"offset": 28
|
|
1850
1870
|
}
|
|
1851
1871
|
]
|
|
1852
1872
|
},
|
|
@@ -2154,7 +2174,7 @@
|
|
|
2154
2174
|
}
|
|
2155
2175
|
],
|
|
2156
2176
|
"baseUniforms": {
|
|
2157
|
-
"size":
|
|
2177
|
+
"size": 32,
|
|
2158
2178
|
"fields": [
|
|
2159
2179
|
{
|
|
2160
2180
|
"name": "M",
|
|
@@ -2182,7 +2202,9 @@
|
|
|
2182
2202
|
1,
|
|
2183
2203
|
1
|
|
2184
2204
|
],
|
|
2185
|
-
"requires": [
|
|
2205
|
+
"requires": [
|
|
2206
|
+
"shader-f16"
|
|
2207
|
+
]
|
|
2186
2208
|
},
|
|
2187
2209
|
"f16": {
|
|
2188
2210
|
"wgsl": "matmul_gemv_residual_f16.wgsl",
|
|
@@ -2357,12 +2379,12 @@
|
|
|
2357
2379
|
"offset": 20
|
|
2358
2380
|
},
|
|
2359
2381
|
{
|
|
2360
|
-
"name": "
|
|
2382
|
+
"name": "rotary_dim",
|
|
2361
2383
|
"type": "u32",
|
|
2362
2384
|
"offset": 24
|
|
2363
2385
|
},
|
|
2364
2386
|
{
|
|
2365
|
-
"name": "
|
|
2387
|
+
"name": "interleaved",
|
|
2366
2388
|
"type": "u32",
|
|
2367
2389
|
"offset": 28
|
|
2368
2390
|
}
|
|
@@ -3637,7 +3659,7 @@
|
|
|
3637
3659
|
}
|
|
3638
3660
|
],
|
|
3639
3661
|
"baseUniforms": {
|
|
3640
|
-
"size":
|
|
3662
|
+
"size": 32,
|
|
3641
3663
|
"fields": [
|
|
3642
3664
|
{
|
|
3643
3665
|
"name": "num_tokens",
|
|
@@ -4096,7 +4118,7 @@
|
|
|
4096
4118
|
}
|
|
4097
4119
|
],
|
|
4098
4120
|
"baseUniforms": {
|
|
4099
|
-
"size":
|
|
4121
|
+
"size": 32,
|
|
4100
4122
|
"fields": [
|
|
4101
4123
|
{
|
|
4102
4124
|
"name": "num_tokens",
|
|
@@ -4117,6 +4139,26 @@
|
|
|
4117
4139
|
"name": "bias_offset",
|
|
4118
4140
|
"type": "u32",
|
|
4119
4141
|
"offset": 12
|
|
4142
|
+
},
|
|
4143
|
+
{
|
|
4144
|
+
"name": "token_stride",
|
|
4145
|
+
"type": "u32",
|
|
4146
|
+
"offset": 16
|
|
4147
|
+
},
|
|
4148
|
+
{
|
|
4149
|
+
"name": "_pad0",
|
|
4150
|
+
"type": "u32",
|
|
4151
|
+
"offset": 20
|
|
4152
|
+
},
|
|
4153
|
+
{
|
|
4154
|
+
"name": "_pad1",
|
|
4155
|
+
"type": "u32",
|
|
4156
|
+
"offset": 24
|
|
4157
|
+
},
|
|
4158
|
+
{
|
|
4159
|
+
"name": "_pad2",
|
|
4160
|
+
"type": "u32",
|
|
4161
|
+
"offset": 28
|
|
4120
4162
|
}
|
|
4121
4163
|
]
|
|
4122
4164
|
},
|
package/src/config/loader.d.ts
CHANGED
package/src/config/loader.js
CHANGED
|
@@ -161,8 +161,7 @@ export function detectPreset(
|
|
|
161
161
|
}
|
|
162
162
|
}
|
|
163
163
|
|
|
164
|
-
|
|
165
|
-
return 'transformer';
|
|
164
|
+
return null;
|
|
166
165
|
}
|
|
167
166
|
|
|
168
167
|
// =============================================================================
|
|
@@ -178,6 +177,17 @@ export function resolveConfig(
|
|
|
178
177
|
(manifest.config || {}),
|
|
179
178
|
manifest.modelType
|
|
180
179
|
);
|
|
180
|
+
if (!id) {
|
|
181
|
+
const modelId = String(manifest?.modelId ?? 'unknown').trim() || 'unknown';
|
|
182
|
+
const modelType = String(manifest?.config?.model_type ?? 'unknown').trim() || 'unknown';
|
|
183
|
+
const architecture = String(manifest?.modelType ?? 'unknown').trim() || 'unknown';
|
|
184
|
+
throw createDopplerError(
|
|
185
|
+
ERROR_CODES.CONFIG_PRESET_UNKNOWN,
|
|
186
|
+
`Could not detect a preset for manifest "${modelId}" ` +
|
|
187
|
+
`(architecture="${architecture}", model_type="${modelType}"). ` +
|
|
188
|
+
'Provide an explicit presetId instead of relying on the generic transformer fallback.'
|
|
189
|
+
);
|
|
190
|
+
}
|
|
181
191
|
|
|
182
192
|
// Get resolved preset
|
|
183
193
|
const preset = resolvePreset(id);
|
|
@@ -326,6 +336,8 @@ function assertArchitecture(manifest, architecture) {
|
|
|
326
336
|
|
|
327
337
|
function extractArchitectureFromConfig(config) {
|
|
328
338
|
const nestedTextConfig = getNestedTextConfig(config);
|
|
339
|
+
const topLevelRoPEParameters = getFlatRoPEParameters(config);
|
|
340
|
+
const nestedRoPEParameters = getFlatRoPEParameters(nestedTextConfig);
|
|
329
341
|
return {
|
|
330
342
|
numLayers: config.num_hidden_layers ?? nestedTextConfig?.num_hidden_layers ?? config.n_layer ?? config.blockCount,
|
|
331
343
|
hiddenSize: config.hidden_size ?? nestedTextConfig?.hidden_size ?? config.n_embd ?? config.embeddingLength,
|
|
@@ -335,13 +347,20 @@ function extractArchitectureFromConfig(config) {
|
|
|
335
347
|
headDim: config.head_dim ?? nestedTextConfig?.head_dim,
|
|
336
348
|
vocabSize: config.vocab_size ?? nestedTextConfig?.vocab_size ?? config.vocabSize,
|
|
337
349
|
maxSeqLen: config.max_position_embeddings ?? nestedTextConfig?.max_position_embeddings ?? config.n_positions ?? config.contextLength,
|
|
338
|
-
ropeTheta:
|
|
350
|
+
ropeTheta: topLevelRoPEParameters?.rope_theta
|
|
351
|
+
?? nestedRoPEParameters?.rope_theta
|
|
352
|
+
?? config.rope_theta
|
|
353
|
+
?? nestedTextConfig?.rope_theta
|
|
354
|
+
?? config.ropeFreqBase,
|
|
339
355
|
rmsNormEps: config.rms_norm_eps ?? nestedTextConfig?.rms_norm_eps ?? config.attentionLayerNormRMSEpsilon,
|
|
340
356
|
};
|
|
341
357
|
}
|
|
342
358
|
|
|
343
359
|
function extractInferenceFromConfig(config) {
|
|
344
360
|
const nestedTextConfig = getNestedTextConfig(config);
|
|
361
|
+
const topLevelRoPEParameters = getFlatRoPEParameters(config);
|
|
362
|
+
const nestedRoPEParameters = getFlatRoPEParameters(nestedTextConfig);
|
|
363
|
+
const ropeParameters = nestedRoPEParameters ?? topLevelRoPEParameters;
|
|
345
364
|
return {
|
|
346
365
|
attention: {
|
|
347
366
|
slidingWindow: config.sliding_window ?? nestedTextConfig?.sliding_window,
|
|
@@ -355,7 +374,13 @@ function extractInferenceFromConfig(config) {
|
|
|
355
374
|
},
|
|
356
375
|
pipeline: config.pipeline ?? nestedTextConfig?.pipeline,
|
|
357
376
|
rope: {
|
|
358
|
-
ropeTheta:
|
|
377
|
+
ropeTheta: ropeParameters?.rope_theta
|
|
378
|
+
?? config.rope_theta
|
|
379
|
+
?? nestedTextConfig?.rope_theta
|
|
380
|
+
?? config.ropeFreqBase,
|
|
381
|
+
mropeInterleaved: ropeParameters?.mrope_interleaved,
|
|
382
|
+
mropeSection: Array.isArray(ropeParameters?.mrope_section) ? ropeParameters.mrope_section : undefined,
|
|
383
|
+
partialRotaryFactor: ropeParameters?.partial_rotary_factor,
|
|
359
384
|
ropeScalingType: config.rope_scaling_type ?? nestedTextConfig?.rope_scaling_type,
|
|
360
385
|
ropeScalingFactor: config.rope_scaling_factor ?? nestedTextConfig?.rope_scaling_factor,
|
|
361
386
|
},
|
|
@@ -375,6 +400,20 @@ function getNestedTextConfig(config) {
|
|
|
375
400
|
return null;
|
|
376
401
|
}
|
|
377
402
|
|
|
403
|
+
function getFlatRoPEParameters(config) {
|
|
404
|
+
if (!config || typeof config !== 'object' || Array.isArray(config)) {
|
|
405
|
+
return null;
|
|
406
|
+
}
|
|
407
|
+
const ropeParameters = config.rope_parameters;
|
|
408
|
+
if (!ropeParameters || typeof ropeParameters !== 'object' || Array.isArray(ropeParameters)) {
|
|
409
|
+
return null;
|
|
410
|
+
}
|
|
411
|
+
if (ropeParameters.full_attention || ropeParameters.sliding_attention) {
|
|
412
|
+
return null;
|
|
413
|
+
}
|
|
414
|
+
return ropeParameters;
|
|
415
|
+
}
|
|
416
|
+
|
|
378
417
|
function extractTokenizerFromManifest(manifest) {
|
|
379
418
|
if (!manifest.tokenizer) return {};
|
|
380
419
|
|
|
@@ -77,6 +77,7 @@ function buildWitnessMergeManifest() {
|
|
|
77
77
|
embeddingTranspose: false,
|
|
78
78
|
embeddingVocabSize: 1024,
|
|
79
79
|
},
|
|
80
|
+
pipeline: 'decode-only',
|
|
80
81
|
layerPattern: null,
|
|
81
82
|
chatTemplate: {
|
|
82
83
|
type: 'gemma',
|
|
@@ -114,6 +115,13 @@ export function buildMergeContractArtifact() {
|
|
|
114
115
|
&& mergedUndefined._sources.get('inference.defaultKernelPath') === 'manifest',
|
|
115
116
|
`value=${mergedUndefined.inference.defaultKernelPath}, source=${mergedUndefined._sources.get('inference.defaultKernelPath')}`
|
|
116
117
|
);
|
|
118
|
+
recordCheck(
|
|
119
|
+
checks,
|
|
120
|
+
'runtime.mergeConfig.pipeline_preserves_manifest_value',
|
|
121
|
+
mergedUndefined.inference.pipeline === 'decode-only'
|
|
122
|
+
&& mergedUndefined._sources.get('inference.pipeline') === 'manifest',
|
|
123
|
+
`value=${String(mergedUndefined.inference.pipeline)}, source=${mergedUndefined._sources.get('inference.pipeline')}`
|
|
124
|
+
);
|
|
117
125
|
|
|
118
126
|
const mergedNull = mergeConfig(buildWitnessMergeManifest(), {
|
|
119
127
|
defaultKernelPath: null,
|
|
@@ -152,6 +160,35 @@ export function buildMergeContractArtifact() {
|
|
|
152
160
|
`value=${String(runtimeConfig.runtime.inference.chatTemplate.enabled)}`
|
|
153
161
|
);
|
|
154
162
|
|
|
163
|
+
const isolatedConfigA = createDopplerConfig();
|
|
164
|
+
isolatedConfigA.runtime.inference.compute.activationDtype = 'f32';
|
|
165
|
+
const isolatedConfigB = createDopplerConfig();
|
|
166
|
+
recordCheck(
|
|
167
|
+
checks,
|
|
168
|
+
'runtime.schema.defaults_are_isolated_per_instance',
|
|
169
|
+
isolatedConfigB.runtime.inference.compute.activationDtype !== 'f32'
|
|
170
|
+
&& isolatedConfigA.runtime.inference.compute !== isolatedConfigB.runtime.inference.compute,
|
|
171
|
+
`configA=${isolatedConfigA.runtime.inference.compute.activationDtype}, configB=${isolatedConfigB.runtime.inference.compute.activationDtype}`,
|
|
172
|
+
'actual'
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
const calibrateConfig = createDopplerConfig({
|
|
176
|
+
runtime: {
|
|
177
|
+
shared: {
|
|
178
|
+
tooling: {
|
|
179
|
+
intent: 'calibrate',
|
|
180
|
+
},
|
|
181
|
+
},
|
|
182
|
+
},
|
|
183
|
+
});
|
|
184
|
+
recordCheck(
|
|
185
|
+
checks,
|
|
186
|
+
'runtime.schema.calibrate_does_not_mutate_kernel_warmup_defaults',
|
|
187
|
+
calibrateConfig.runtime.shared.kernelWarmup.prewarm === false,
|
|
188
|
+
`prewarm=${String(calibrateConfig.runtime.shared.kernelWarmup.prewarm)}`,
|
|
189
|
+
'actual'
|
|
190
|
+
);
|
|
191
|
+
|
|
155
192
|
const overlaySources = new Map();
|
|
156
193
|
const chosenRuntimeValue = chooseDefinedWithSource(
|
|
157
194
|
'inference.defaultKernelPath',
|
|
@@ -252,6 +289,24 @@ export function buildMergeContractArtifact() {
|
|
|
252
289
|
'actual'
|
|
253
290
|
);
|
|
254
291
|
|
|
292
|
+
let invalidShallowOverrideError = null;
|
|
293
|
+
try {
|
|
294
|
+
mergeShallowObject(
|
|
295
|
+
{ type: 'base', enabled: true },
|
|
296
|
+
null
|
|
297
|
+
);
|
|
298
|
+
} catch (error) {
|
|
299
|
+
invalidShallowOverrideError = error;
|
|
300
|
+
}
|
|
301
|
+
recordCheck(
|
|
302
|
+
checks,
|
|
303
|
+
'runtime.mergeShallowObject.invalid_explicit_override_fails_closed',
|
|
304
|
+
invalidShallowOverrideError instanceof Error
|
|
305
|
+
&& /shallow object overrides must be plain objects/.test(invalidShallowOverrideError.message),
|
|
306
|
+
`error=${invalidShallowOverrideError?.message ?? 'none'}`,
|
|
307
|
+
'actual'
|
|
308
|
+
);
|
|
309
|
+
|
|
255
310
|
const layeredAttention = mergeLayeredShallowObjects(
|
|
256
311
|
{ slidingWindow: 4096, attentionBias: false },
|
|
257
312
|
{ slidingWindow: 2048 },
|
|
@@ -273,7 +328,7 @@ export function buildMergeContractArtifact() {
|
|
|
273
328
|
onIncompatible: 'error',
|
|
274
329
|
},
|
|
275
330
|
{
|
|
276
|
-
allowSources: ['
|
|
331
|
+
allowSources: ['config', 'execution-v0'],
|
|
277
332
|
onIncompatible: 'remap',
|
|
278
333
|
}
|
|
279
334
|
);
|
|
@@ -283,7 +338,7 @@ export function buildMergeContractArtifact() {
|
|
|
283
338
|
Array.isArray(mergedKernelPathPolicy.sourceScope)
|
|
284
339
|
&& Array.isArray(mergedKernelPathPolicy.allowSources)
|
|
285
340
|
&& mergedKernelPathPolicy.sourceScope.length === 2
|
|
286
|
-
&& mergedKernelPathPolicy.sourceScope[0] === '
|
|
341
|
+
&& mergedKernelPathPolicy.sourceScope[0] === 'config'
|
|
287
342
|
&& mergedKernelPathPolicy.allowSources[1] === 'execution-v0'
|
|
288
343
|
&& mergedKernelPathPolicy.onIncompatible === 'remap',
|
|
289
344
|
`sourceScope=${JSON.stringify(mergedKernelPathPolicy.sourceScope)}, allowSources=${JSON.stringify(mergedKernelPathPolicy.allowSources)}`,
|
|
@@ -294,7 +349,7 @@ export function buildMergeContractArtifact() {
|
|
|
294
349
|
runtime: {
|
|
295
350
|
inference: {
|
|
296
351
|
kernelPathPolicy: {
|
|
297
|
-
allowSources: ['
|
|
352
|
+
allowSources: ['config', 'execution-v0'],
|
|
298
353
|
},
|
|
299
354
|
},
|
|
300
355
|
},
|
|
@@ -303,7 +358,7 @@ export function buildMergeContractArtifact() {
|
|
|
303
358
|
checks,
|
|
304
359
|
'runtime.schema.kernelPathPolicy.helper_is_used',
|
|
305
360
|
Array.isArray(runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope)
|
|
306
|
-
&& runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope[0] === '
|
|
361
|
+
&& runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.sourceScope[0] === 'config'
|
|
307
362
|
&& runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy.allowSources[1] === 'execution-v0',
|
|
308
363
|
`policy=${JSON.stringify(runtimeConfigWithKernelPathPolicy.runtime.inference.kernelPathPolicy)}`,
|
|
309
364
|
'actual'
|
|
@@ -15,9 +15,14 @@ export function chooseDefinedWithSource(path, overrideValue, fallbackValue, sour
|
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
export function mergeShallowObject(base, override) {
|
|
18
|
-
if (
|
|
18
|
+
if (override === undefined) {
|
|
19
19
|
return base;
|
|
20
20
|
}
|
|
21
|
+
if (override === null || typeof override !== 'object' || Array.isArray(override)) {
|
|
22
|
+
throw new Error(
|
|
23
|
+
'DopplerConfigError: shallow object overrides must be plain objects when provided explicitly.'
|
|
24
|
+
);
|
|
25
|
+
}
|
|
21
26
|
return { ...base, ...override };
|
|
22
27
|
}
|
|
23
28
|
|
|
@@ -29,17 +34,133 @@ export function replaceSubtree(overrideValue, fallbackValue) {
|
|
|
29
34
|
return chooseNullish(overrideValue, fallbackValue);
|
|
30
35
|
}
|
|
31
36
|
|
|
37
|
+
const DEFAULT_KERNEL_PATH_POLICY = Object.freeze({
|
|
38
|
+
mode: 'locked',
|
|
39
|
+
sourceScope: Object.freeze(['model', 'manifest']),
|
|
40
|
+
onIncompatible: 'error',
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
const VALID_KERNEL_PATH_POLICY_SOURCES = new Set([
|
|
44
|
+
'model',
|
|
45
|
+
'manifest',
|
|
46
|
+
'config',
|
|
47
|
+
'execution-v0',
|
|
48
|
+
]);
|
|
49
|
+
|
|
50
|
+
function normalizeKernelPathPolicyMode(value) {
|
|
51
|
+
if (value === undefined) {
|
|
52
|
+
return DEFAULT_KERNEL_PATH_POLICY.mode;
|
|
53
|
+
}
|
|
54
|
+
const normalized = String(value).trim().toLowerCase();
|
|
55
|
+
if (normalized === 'locked' || normalized === 'capability-aware') {
|
|
56
|
+
return normalized;
|
|
57
|
+
}
|
|
58
|
+
throw new Error(
|
|
59
|
+
`DopplerConfigError: runtime.inference.kernelPathPolicy.mode must be "locked" or "capability-aware"; got ${JSON.stringify(value)}.`
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function normalizeKernelPathPolicySource(source) {
|
|
64
|
+
const normalized = String(source ?? '').trim().toLowerCase();
|
|
65
|
+
if (!normalized) {
|
|
66
|
+
throw new Error(
|
|
67
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be non-empty strings.'
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
if (normalized === 'runtime') {
|
|
71
|
+
throw new Error(
|
|
72
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "runtime". Use "config".'
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
if (normalized === 'execution_v0') {
|
|
76
|
+
throw new Error(
|
|
77
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope does not accept legacy "execution_v0". Use "execution-v0".'
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
if (!VALID_KERNEL_PATH_POLICY_SOURCES.has(normalized)) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
`DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope entries must be model|manifest|config|execution-v0; got ${JSON.stringify(source)}.`
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
return normalized;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function normalizeKernelPathPolicySourceScope(value) {
|
|
89
|
+
if (value === undefined) {
|
|
90
|
+
return [...DEFAULT_KERNEL_PATH_POLICY.sourceScope];
|
|
91
|
+
}
|
|
92
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
93
|
+
throw new Error(
|
|
94
|
+
'DopplerConfigError: runtime.inference.kernelPathPolicy.sourceScope must be a non-empty array.'
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
return [...new Set(value.map((source) => normalizeKernelPathPolicySource(source)))];
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function normalizeKernelPathPolicyOnIncompatible(value) {
|
|
101
|
+
if (value === undefined) {
|
|
102
|
+
return DEFAULT_KERNEL_PATH_POLICY.onIncompatible;
|
|
103
|
+
}
|
|
104
|
+
const normalized = String(value).trim().toLowerCase();
|
|
105
|
+
if (normalized === 'error' || normalized === 'remap') {
|
|
106
|
+
return normalized;
|
|
107
|
+
}
|
|
108
|
+
throw new Error(
|
|
109
|
+
`DopplerConfigError: runtime.inference.kernelPathPolicy.onIncompatible must be "error" or "remap"; got ${JSON.stringify(value)}.`
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function assertKernelPathPolicyObject(value, label) {
|
|
114
|
+
if (value === undefined) {
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
if (value === null) {
|
|
118
|
+
throw new Error(`DopplerConfigError: ${label} must not be null.`);
|
|
119
|
+
}
|
|
120
|
+
if (typeof value !== 'object' || Array.isArray(value)) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
`DopplerConfigError: ${label} must be an object.`
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function assertKernelPathPolicySourceAliasesCompatible(policy, label) {
|
|
128
|
+
if (!policy || policy.sourceScope === undefined || policy.allowSources === undefined) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const sourceScope = normalizeKernelPathPolicySourceScope(policy.sourceScope);
|
|
133
|
+
const allowSources = normalizeKernelPathPolicySourceScope(policy.allowSources);
|
|
134
|
+
const aliasesMatch = sourceScope.length === allowSources.length
|
|
135
|
+
&& sourceScope.every((value, index) => value === allowSources[index]);
|
|
136
|
+
|
|
137
|
+
if (!aliasesMatch) {
|
|
138
|
+
throw new Error(
|
|
139
|
+
`DopplerConfigError: ${label}.sourceScope and ${label}.allowSources must match exactly when both are provided.`
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
32
144
|
export function mergeKernelPathPolicy(basePolicy, overridePolicy) {
|
|
145
|
+
assertKernelPathPolicyObject(basePolicy, 'runtime.inference.kernelPathPolicy');
|
|
146
|
+
assertKernelPathPolicyObject(overridePolicy, 'runtime.inference.kernelPathPolicy');
|
|
147
|
+
assertKernelPathPolicySourceAliasesCompatible(basePolicy, 'runtime.inference.kernelPathPolicy');
|
|
148
|
+
assertKernelPathPolicySourceAliasesCompatible(overridePolicy, 'runtime.inference.kernelPathPolicy');
|
|
33
149
|
const base = basePolicy ?? {};
|
|
34
150
|
const override = overridePolicy ?? {};
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
151
|
+
const sourceScope = normalizeKernelPathPolicySourceScope(
|
|
152
|
+
override.sourceScope
|
|
153
|
+
?? override.allowSources
|
|
154
|
+
?? base.sourceScope
|
|
155
|
+
?? base.allowSources
|
|
156
|
+
);
|
|
38
157
|
return {
|
|
39
|
-
mode: override.mode ?? base.mode,
|
|
158
|
+
mode: normalizeKernelPathPolicyMode(override.mode ?? base.mode),
|
|
40
159
|
sourceScope,
|
|
41
|
-
allowSources: sourceScope,
|
|
42
|
-
onIncompatible:
|
|
160
|
+
allowSources: [...sourceScope],
|
|
161
|
+
onIncompatible: normalizeKernelPathPolicyOnIncompatible(
|
|
162
|
+
override.onIncompatible ?? base.onIncompatible
|
|
163
|
+
),
|
|
43
164
|
};
|
|
44
165
|
}
|
|
45
166
|
|
package/src/config/merge.d.ts
CHANGED
|
@@ -54,6 +54,7 @@ export interface MergedInferenceConfig {
|
|
|
54
54
|
ffn: ManifestFFNSchema;
|
|
55
55
|
rope: ManifestRoPESchema;
|
|
56
56
|
output: ManifestOutputSchema;
|
|
57
|
+
pipeline: ManifestInferenceSchema['pipeline'];
|
|
57
58
|
layerPattern: ManifestLayerPatternSchema | null;
|
|
58
59
|
chatTemplate: ManifestChatTemplateSchema;
|
|
59
60
|
defaultKernelPath: string | null;
|