@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "kernels/gemma2-q4k-dequant-f16a",
|
|
3
|
+
"name": "gemma2-q4k-dequant-f16a",
|
|
4
|
+
"description": "Explicit runtime override preset for the Gemma 2 Q4K dequant-to-F16 kernel path.",
|
|
5
|
+
"intent": "investigate",
|
|
6
|
+
"stability": "canonical",
|
|
7
|
+
"owner": "doppler-core",
|
|
8
|
+
"createdAtUtc": "2026-03-08T00:00:00Z",
|
|
9
|
+
"extends": "default",
|
|
10
|
+
"runtime": {
|
|
11
|
+
"inference": {
|
|
12
|
+
"compute": {
|
|
13
|
+
"activationDtype": "f16"
|
|
14
|
+
},
|
|
15
|
+
"kvcache": {
|
|
16
|
+
"kvDtype": "f16"
|
|
17
|
+
},
|
|
18
|
+
"session": {
|
|
19
|
+
"compute": {
|
|
20
|
+
"defaults": {
|
|
21
|
+
"outputDtype": "f16"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"kernelPath": "gemma2-q4k-dequant-f16a",
|
|
26
|
+
"kernelPathPolicy": {
|
|
27
|
+
"mode": "capability-aware",
|
|
28
|
+
"sourceScope": ["config", "model", "manifest", "execution-v0"],
|
|
29
|
+
"onIncompatible": "remap"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "kernels/gemma2-q4k-dequant-f32a-nosubgroups",
|
|
3
|
+
"name": "gemma2-q4k-dequant-f32a-nosubgroups",
|
|
4
|
+
"description": "Explicit runtime override preset for the Gemma 2 Q4K dequant-to-F32 no-subgroups kernel path.",
|
|
5
|
+
"intent": "investigate",
|
|
6
|
+
"stability": "canonical",
|
|
7
|
+
"owner": "doppler-core",
|
|
8
|
+
"createdAtUtc": "2026-03-08T00:00:00Z",
|
|
9
|
+
"extends": "default",
|
|
10
|
+
"runtime": {
|
|
11
|
+
"inference": {
|
|
12
|
+
"compute": {
|
|
13
|
+
"activationDtype": "f32"
|
|
14
|
+
},
|
|
15
|
+
"kvcache": {
|
|
16
|
+
"kvDtype": "f32"
|
|
17
|
+
},
|
|
18
|
+
"session": {
|
|
19
|
+
"compute": {
|
|
20
|
+
"defaults": {
|
|
21
|
+
"outputDtype": "f32"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"kernelPath": "gemma2-q4k-dequant-f32a-nosubgroups",
|
|
26
|
+
"kernelPathPolicy": {
|
|
27
|
+
"mode": "capability-aware",
|
|
28
|
+
"sourceScope": ["config", "model", "manifest", "execution-v0"],
|
|
29
|
+
"onIncompatible": "remap"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "kernels/gemma2-q4k-fused-f32a",
|
|
3
|
+
"name": "gemma2-q4k-fused-f32a",
|
|
4
|
+
"description": "Explicit runtime override preset for the Gemma 2 fused Q4K F32 kernel path.",
|
|
5
|
+
"intent": "investigate",
|
|
6
|
+
"stability": "canonical",
|
|
7
|
+
"owner": "doppler-core",
|
|
8
|
+
"createdAtUtc": "2026-03-08T00:00:00Z",
|
|
9
|
+
"extends": "default",
|
|
10
|
+
"runtime": {
|
|
11
|
+
"inference": {
|
|
12
|
+
"compute": {
|
|
13
|
+
"activationDtype": "f32"
|
|
14
|
+
},
|
|
15
|
+
"kvcache": {
|
|
16
|
+
"kvDtype": "f32"
|
|
17
|
+
},
|
|
18
|
+
"session": {
|
|
19
|
+
"compute": {
|
|
20
|
+
"defaults": {
|
|
21
|
+
"outputDtype": "f32"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"kernelPath": "gemma2-q4k-fused-f32a",
|
|
26
|
+
"kernelPathPolicy": {
|
|
27
|
+
"mode": "capability-aware",
|
|
28
|
+
"sourceScope": ["config", "model", "manifest", "execution-v0"],
|
|
29
|
+
"onIncompatible": "remap"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
@@ -1,20 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "kernels/safe-q4k",
|
|
3
3
|
"name": "safe-q4k",
|
|
4
|
-
"description": "
|
|
4
|
+
"description": "Deprecated alias for kernels/gemma2-q4k-dequant-f32a-nosubgroups.",
|
|
5
5
|
"intent": "investigate",
|
|
6
|
-
"stability": "
|
|
6
|
+
"stability": "deprecated",
|
|
7
7
|
"owner": "doppler-core",
|
|
8
8
|
"createdAtUtc": "2026-02-25T00:00:00Z",
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
"kernelPathPolicy": {
|
|
14
|
-
"mode": "capability-aware",
|
|
15
|
-
"sourceScope": ["config", "model", "manifest", "execution-v0"],
|
|
16
|
-
"onIncompatible": "remap"
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
}
|
|
9
|
+
"deprecatedAtUtc": "2026-03-08T00:00:00Z",
|
|
10
|
+
"replacementId": "kernels/gemma2-q4k-dequant-f32a-nosubgroups",
|
|
11
|
+
"extends": "kernels/gemma2-q4k-dequant-f32a-nosubgroups",
|
|
12
|
+
"runtime": {}
|
|
20
13
|
}
|
|
@@ -50,6 +50,9 @@ function createValidInferenceFixture() {
|
|
|
50
50
|
ropeScalingFactor: 1.0,
|
|
51
51
|
ropeScalingType: null,
|
|
52
52
|
ropeLocalTheta: null,
|
|
53
|
+
mropeInterleaved: false,
|
|
54
|
+
mropeSection: null,
|
|
55
|
+
partialRotaryFactor: null,
|
|
53
56
|
yarnBetaFast: null,
|
|
54
57
|
yarnBetaSlow: null,
|
|
55
58
|
yarnOriginalMaxPos: null,
|
|
@@ -94,6 +97,9 @@ const FIELD_CASES = Object.freeze([
|
|
|
94
97
|
{ kind: 'nonNullable', path: ['rope', 'ropeScalingFactor'], message: 'rope.ropeScalingFactor is required' },
|
|
95
98
|
{ kind: 'nullable', path: ['rope', 'ropeScalingType'], message: 'rope.ropeScalingType must be explicitly set' },
|
|
96
99
|
{ kind: 'nullable', path: ['rope', 'ropeLocalTheta'], message: 'rope.ropeLocalTheta must be explicitly set' },
|
|
100
|
+
{ kind: 'nonNullable', path: ['rope', 'mropeInterleaved'], message: 'rope.mropeInterleaved is required' },
|
|
101
|
+
{ kind: 'nullable', path: ['rope', 'mropeSection'], message: 'rope.mropeSection must be explicitly set' },
|
|
102
|
+
{ kind: 'nullable', path: ['rope', 'partialRotaryFactor'], message: 'rope.partialRotaryFactor must be explicitly set' },
|
|
97
103
|
{ kind: 'nullable', path: ['rope', 'yarnBetaFast'], message: 'rope.yarnBetaFast must be explicitly set' },
|
|
98
104
|
{ kind: 'nullable', path: ['rope', 'yarnBetaSlow'], message: 'rope.yarnBetaSlow must be explicitly set' },
|
|
99
105
|
{ kind: 'nullable', path: ['rope', 'yarnOriginalMaxPos'], message: 'rope.yarnOriginalMaxPos must be explicitly set' },
|
package/src/config/runtime.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { createDopplerConfig, setKernelThresholds } from './schema/index.js';
|
|
2
2
|
import { validateRuntimeConfig, validateRuntimeOverrides } from './param-validator.js';
|
|
3
|
+
import { isPlainObject } from '../utils/plain-object.js';
|
|
3
4
|
|
|
4
5
|
let runtimeConfig = createDopplerConfig().runtime;
|
|
5
6
|
setKernelThresholds(runtimeConfig.shared.kernelThresholds);
|
|
@@ -9,12 +10,16 @@ export function getRuntimeConfig() {
|
|
|
9
10
|
}
|
|
10
11
|
|
|
11
12
|
export function setRuntimeConfig(overrides) {
|
|
12
|
-
if (
|
|
13
|
+
if (overrides === undefined || overrides === null) {
|
|
13
14
|
runtimeConfig = createDopplerConfig().runtime;
|
|
14
15
|
setKernelThresholds(runtimeConfig.shared.kernelThresholds);
|
|
15
16
|
return runtimeConfig;
|
|
16
17
|
}
|
|
17
18
|
|
|
19
|
+
if (!isPlainObject(overrides)) {
|
|
20
|
+
throw new Error('DopplerConfigError: runtime overrides must be an object when provided.');
|
|
21
|
+
}
|
|
22
|
+
|
|
18
23
|
assertNoDeprecatedRuntimeKeys(overrides);
|
|
19
24
|
validateRuntimeOverrides(overrides);
|
|
20
25
|
|
|
@@ -181,6 +181,11 @@ export type ProbeStage =
|
|
|
181
181
|
// Attention stages (per-layer)
|
|
182
182
|
| 'attn_input' // Input to attention (after residual from previous layer)
|
|
183
183
|
| 'attn_normed' // After input RMSNorm
|
|
184
|
+
| 'linear_qkv_proj' // Linear-attention fused QKV projection output
|
|
185
|
+
| 'linear_z_proj' // Linear-attention z projection output
|
|
186
|
+
| 'linear_a_proj' // Linear-attention a projection output
|
|
187
|
+
| 'linear_b_proj' // Linear-attention b projection output
|
|
188
|
+
| 'linear_core_out' // Linear-attention recurrent core output (before o_proj)
|
|
184
189
|
| 'q_proj' // Q projection output
|
|
185
190
|
| 'k_proj' // K projection output
|
|
186
191
|
| 'v_proj' // V projection output
|
|
@@ -5,6 +5,7 @@ import { DEFAULT_EMULATION_CONFIG, createEmulationConfig } from './emulation.sch
|
|
|
5
5
|
import { mergeEcosystemConfig } from './ecosystem.schema.js';
|
|
6
6
|
import {
|
|
7
7
|
chooseNullish,
|
|
8
|
+
chooseDefined,
|
|
8
9
|
mergeExecutionPatchLists,
|
|
9
10
|
mergeKernelPathPolicy,
|
|
10
11
|
mergeShallowObject,
|
|
@@ -31,6 +32,13 @@ export const DEFAULT_DOPPLER_CONFIG = {
|
|
|
31
32
|
runtime: DEFAULT_RUNTIME_CONFIG,
|
|
32
33
|
};
|
|
33
34
|
|
|
35
|
+
function cloneConfigTree(value) {
|
|
36
|
+
if (typeof structuredClone === 'function') {
|
|
37
|
+
return structuredClone(value);
|
|
38
|
+
}
|
|
39
|
+
return JSON.parse(JSON.stringify(value));
|
|
40
|
+
}
|
|
41
|
+
|
|
34
42
|
// =============================================================================
|
|
35
43
|
// Factory Function
|
|
36
44
|
// =============================================================================
|
|
@@ -39,19 +47,21 @@ export function createDopplerConfig(
|
|
|
39
47
|
overrides
|
|
40
48
|
) {
|
|
41
49
|
if (!overrides) {
|
|
42
|
-
return {
|
|
50
|
+
return {
|
|
51
|
+
model: DEFAULT_DOPPLER_CONFIG.model,
|
|
52
|
+
runtime: cloneConfigTree(DEFAULT_RUNTIME_CONFIG),
|
|
53
|
+
};
|
|
43
54
|
}
|
|
44
55
|
|
|
45
56
|
const runtimeOverrides = overrides.runtime ?? {};
|
|
57
|
+
const runtimeBase = cloneConfigTree(DEFAULT_RUNTIME_CONFIG);
|
|
46
58
|
const runtime = overrides.runtime
|
|
47
|
-
? mergeRuntimeConfig(
|
|
48
|
-
:
|
|
59
|
+
? mergeRuntimeConfig(runtimeBase, runtimeOverrides)
|
|
60
|
+
: runtimeBase;
|
|
49
61
|
const config = {
|
|
50
62
|
model: overrides.model ?? DEFAULT_DOPPLER_CONFIG.model,
|
|
51
63
|
runtime,
|
|
52
64
|
};
|
|
53
|
-
|
|
54
|
-
applyCalibrateDefaults(config.runtime, runtimeOverrides);
|
|
55
65
|
return config;
|
|
56
66
|
}
|
|
57
67
|
|
|
@@ -239,7 +249,7 @@ function mergeInferenceConfig(
|
|
|
239
249
|
speculative: { ...base.speculative, ...overrides.speculative },
|
|
240
250
|
generation: { ...base.generation, ...overrides.generation },
|
|
241
251
|
pipeline: overrides.pipeline ?? base.pipeline,
|
|
242
|
-
kernelPath: overrides.kernelPath
|
|
252
|
+
kernelPath: chooseDefined(overrides.kernelPath, base.kernelPath),
|
|
243
253
|
kernelPathSource: overrides.kernelPathSource ?? base.kernelPathSource,
|
|
244
254
|
kernelPathPolicy: mergeKernelPathPolicy(baseKernelPathPolicy, overrideKernelPathPolicy),
|
|
245
255
|
chatTemplate: mergeShallowObject(base.chatTemplate, overrides.chatTemplate),
|
|
@@ -302,21 +312,6 @@ function mergeDebugConfig(
|
|
|
302
312
|
};
|
|
303
313
|
}
|
|
304
314
|
|
|
305
|
-
function applyCalibrateDefaults(runtime, runtimeOverrides) {
|
|
306
|
-
const intent = runtime?.shared?.tooling?.intent;
|
|
307
|
-
if (intent !== 'calibrate') return;
|
|
308
|
-
|
|
309
|
-
const warmupOverrides = runtimeOverrides?.shared?.kernelWarmup;
|
|
310
|
-
const hasPrewarmOverride = warmupOverrides
|
|
311
|
-
&& Object.prototype.hasOwnProperty.call(warmupOverrides, 'prewarm');
|
|
312
|
-
if (!hasPrewarmOverride) {
|
|
313
|
-
runtime.shared.kernelWarmup = {
|
|
314
|
-
...runtime.shared.kernelWarmup,
|
|
315
|
-
prewarm: true,
|
|
316
|
-
};
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
|
|
320
315
|
function mergeBenchmarkConfig(
|
|
321
316
|
base,
|
|
322
317
|
overrides
|
|
@@ -165,13 +165,16 @@ export const DEFAULT_PRESET_INFERENCE_CONFIG = {
|
|
|
165
165
|
rope: {
|
|
166
166
|
ropeTheta: 10000,
|
|
167
167
|
ropeLocalTheta: null,
|
|
168
|
+
mropeInterleaved: false,
|
|
169
|
+
mropeSection: null,
|
|
170
|
+
partialRotaryFactor: null,
|
|
168
171
|
ropeScalingType: null,
|
|
169
172
|
ropeScalingFactor: 1.0,
|
|
170
173
|
ropeLocalScalingType: null,
|
|
171
174
|
ropeLocalScalingFactor: 1.0,
|
|
172
|
-
yarnBetaFast:
|
|
173
|
-
yarnBetaSlow:
|
|
174
|
-
yarnOriginalMaxPos:
|
|
175
|
+
yarnBetaFast: null,
|
|
176
|
+
yarnBetaSlow: null,
|
|
177
|
+
yarnOriginalMaxPos: null,
|
|
175
178
|
ropeLocalYarnBetaFast: null,
|
|
176
179
|
ropeLocalYarnBetaSlow: null,
|
|
177
180
|
ropeLocalYarnOriginalMaxPos: null,
|
|
@@ -18,6 +18,15 @@ export interface RoPEConfigSchema {
|
|
|
18
18
|
/** Local RoPE theta for sliding window layers (Gemma 3 uses 10000) */
|
|
19
19
|
ropeLocalTheta?: number;
|
|
20
20
|
|
|
21
|
+
/** Apply adjacent-pair rotary layout instead of rotate-half layout. */
|
|
22
|
+
mropeInterleaved?: boolean;
|
|
23
|
+
|
|
24
|
+
/** mRoPE section sizes before the Qwen doubling step. */
|
|
25
|
+
mropeSection?: number[] | null;
|
|
26
|
+
|
|
27
|
+
/** Fraction of the head dimension that participates in rotary embedding. */
|
|
28
|
+
partialRotaryFactor?: number | null;
|
|
29
|
+
|
|
21
30
|
/** RoPE scaling type */
|
|
22
31
|
ropeScalingType?: 'linear' | 'dynamic' | 'yarn' | null;
|
|
23
32
|
|
|
@@ -105,6 +105,12 @@ export interface KernelPathSchema {
|
|
|
105
105
|
/** KV cache dtype for this path; defaults to activationDtype when omitted. */
|
|
106
106
|
kvDtype?: string;
|
|
107
107
|
|
|
108
|
+
/**
|
|
109
|
+
* Explicit widening target used by the finiteness fallback execution plan.
|
|
110
|
+
* Required for inline/generated kernel paths that do not have a stable registry id.
|
|
111
|
+
*/
|
|
112
|
+
finitenessFallbackKernelPathId?: string;
|
|
113
|
+
|
|
108
114
|
/**
|
|
109
115
|
* Prefill phase kernel sequence (M > 1).
|
|
110
116
|
* If not specified, uses decode with batched variants.
|
|
@@ -145,13 +151,17 @@ export interface KernelPathSchema {
|
|
|
145
151
|
export type BuiltinKernelPathId =
|
|
146
152
|
| 'gemma2-q4k-fused-f32a' // Gemma 2 Q4K weights, fused matmul, F32 activations
|
|
147
153
|
| 'gemma2-q4k-dequant-f16a' // Gemma 2 Q4K -> F16 dequant, F16 activations
|
|
154
|
+
| 'gemma2-q4k-dequant-f32a-nosubgroups' // Gemma 2 Q4K -> F32 dequant path with no subgroup requirement
|
|
155
|
+
| 'gemma2-q4k-dequant-f32a' // Legacy alias for gemma2-q4k-dequant-f32a-nosubgroups
|
|
148
156
|
| 'gemma2-f16-f16a' // Gemma 2 F16 weights, F16 activations
|
|
149
157
|
| 'gemma2-f16-f32a' // Gemma 2 F16 weights, F32 activations
|
|
150
158
|
| 'gemma3-f16-fused-f16a-online' // Gemma 3 F16 fused FFN online path
|
|
151
159
|
| 'gemma3-f16-fused-f32a-online' // Gemma 3 F16 fused FFN online path with F32 activations
|
|
160
|
+
| 'gemma3-f16-fused-f32a-online-streamingprefill' // Gemma 3 F16 fused FFN online path with streaming prefill attention
|
|
152
161
|
| 'gemma3-q4k-dequant-f16a-online' // Gemma 3 Q4K dequant online path (F16 activations)
|
|
153
162
|
| 'gemma3-q4k-dequant-f32a-online' // Gemma 3 Q4K dequant online path with F32 activations
|
|
154
|
-
| 'gemma3-q4k-dequant-f32a' // Gemma 3 Q4K dequant path with
|
|
163
|
+
| 'gemma3-q4k-dequant-f32a-nosubgroups' // Gemma 3 Q4K dequant path with no subgroup requirement
|
|
164
|
+
| 'gemma3-q4k-dequant-f32a' // Legacy alias for gemma3-q4k-dequant-f32a-nosubgroups
|
|
155
165
|
| 'lfm2-q4k-dequant-f32a-online' // LFM2 Q4K path with F32 activations and fast prefill
|
|
156
166
|
| 'embeddinggemma-f16-f32a' // EmbeddingGemma F16 weights, F32 activations
|
|
157
167
|
| 'embeddinggemma-f32-f32a' // EmbeddingGemma F32 weights, F32 activations
|
|
@@ -155,18 +155,25 @@ export const DEFAULT_KERNEL_THRESHOLDS = {
|
|
|
155
155
|
tuner: DEFAULT_TUNER_LIMITS,
|
|
156
156
|
};
|
|
157
157
|
|
|
158
|
+
function cloneThresholdTree(value) {
|
|
159
|
+
if (typeof structuredClone === 'function') {
|
|
160
|
+
return structuredClone(value);
|
|
161
|
+
}
|
|
162
|
+
return JSON.parse(JSON.stringify(value));
|
|
163
|
+
}
|
|
164
|
+
|
|
158
165
|
// =============================================================================
|
|
159
166
|
// Runtime Access
|
|
160
167
|
// =============================================================================
|
|
161
168
|
|
|
162
|
-
let currentThresholds =
|
|
169
|
+
let currentThresholds = cloneThresholdTree(DEFAULT_KERNEL_THRESHOLDS);
|
|
163
170
|
|
|
164
171
|
export function getKernelThresholds() {
|
|
165
|
-
return currentThresholds;
|
|
172
|
+
return cloneThresholdTree(currentThresholds);
|
|
166
173
|
}
|
|
167
174
|
|
|
168
175
|
export function setKernelThresholds(overrides) {
|
|
169
|
-
|
|
176
|
+
const nextThresholds = {
|
|
170
177
|
...currentThresholds,
|
|
171
178
|
...overrides,
|
|
172
179
|
matmul: { ...currentThresholds.matmul, ...overrides.matmul },
|
|
@@ -180,8 +187,9 @@ export function setKernelThresholds(overrides) {
|
|
|
180
187
|
cast: { ...currentThresholds.cast, ...overrides.cast },
|
|
181
188
|
tuner: { ...currentThresholds.tuner, ...overrides.tuner },
|
|
182
189
|
};
|
|
190
|
+
currentThresholds = cloneThresholdTree(nextThresholds);
|
|
183
191
|
}
|
|
184
192
|
|
|
185
193
|
export function resetKernelThresholds() {
|
|
186
|
-
currentThresholds =
|
|
194
|
+
currentThresholds = cloneThresholdTree(DEFAULT_KERNEL_THRESHOLDS);
|
|
187
195
|
}
|
|
@@ -88,7 +88,8 @@ export interface QuantizationInfoSchema {
|
|
|
88
88
|
tts?: QuantizationValue; // TTS decoder
|
|
89
89
|
projector?: QuantizationValue; // Cross-modal projection layers
|
|
90
90
|
|
|
91
|
-
// Runtime hints
|
|
91
|
+
// Runtime hints. `compute` may be included in variantTag when artifact naming
|
|
92
|
+
// treats activation dtype as part of the published variant identity.
|
|
92
93
|
kvCache?: QuantizationValue;
|
|
93
94
|
compute?: QuantizationValue;
|
|
94
95
|
|
|
@@ -217,6 +218,12 @@ export interface ManifestRoPESchema {
|
|
|
217
218
|
ropeTheta: number;
|
|
218
219
|
/** Local theta for sliding window layers (null = same as ropeTheta) */
|
|
219
220
|
ropeLocalTheta: number | null;
|
|
221
|
+
/** Use adjacent-pair rotary layout instead of rotate-half layout. */
|
|
222
|
+
mropeInterleaved: boolean;
|
|
223
|
+
/** mRoPE section sizes before the Qwen doubling step. */
|
|
224
|
+
mropeSection: number[] | null;
|
|
225
|
+
/** Fraction of the head dimension that participates in rotary embedding. */
|
|
226
|
+
partialRotaryFactor: number | null;
|
|
220
227
|
/** RoPE scaling type (null = no scaling, 'linear', 'dynamic', 'yarn') */
|
|
221
228
|
ropeScalingType: string | null;
|
|
222
229
|
/** RoPE scaling factor (1.0 if no scaling) */
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { MB } from './units.schema.js';
|
|
2
|
+
import { validateRequiredInferenceFields } from '../../inference/pipelines/text/config.js';
|
|
2
3
|
|
|
3
4
|
// =============================================================================
|
|
4
5
|
// Hash & Versioning
|
|
@@ -62,14 +63,17 @@ export const DEFAULT_MANIFEST_INFERENCE = {
|
|
|
62
63
|
rope: {
|
|
63
64
|
ropeTheta: 10000,
|
|
64
65
|
ropeLocalTheta: null, // Same as ropeTheta (null = use ropeTheta)
|
|
66
|
+
mropeInterleaved: false,
|
|
67
|
+
mropeSection: null,
|
|
68
|
+
partialRotaryFactor: null,
|
|
65
69
|
ropeScalingType: null, // No scaling (null = disabled)
|
|
66
70
|
ropeScalingFactor: 1.0,
|
|
67
71
|
ropeLocalScalingType: null, // Local scaling policy (null = no scaling)
|
|
68
72
|
ropeLocalScalingFactor: 1.0,
|
|
69
73
|
// YARN parameters - only relevant when ropeScalingType='yarn'
|
|
70
|
-
yarnBetaFast:
|
|
71
|
-
yarnBetaSlow:
|
|
72
|
-
yarnOriginalMaxPos:
|
|
74
|
+
yarnBetaFast: null,
|
|
75
|
+
yarnBetaSlow: null,
|
|
76
|
+
yarnOriginalMaxPos: null,
|
|
73
77
|
// Local YARN parameters - only relevant when ropeLocalScalingType='yarn'
|
|
74
78
|
ropeLocalYarnBetaFast: null,
|
|
75
79
|
ropeLocalYarnBetaSlow: null,
|
|
@@ -121,6 +125,18 @@ export function validateManifestInference(
|
|
|
121
125
|
`Please re-convert the model using the latest converter.`
|
|
122
126
|
);
|
|
123
127
|
}
|
|
128
|
+
|
|
129
|
+
if (manifest.modelType === 'diffusion' || manifest.modelType === 'energy') {
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const inference = typeof structuredClone === 'function'
|
|
134
|
+
? structuredClone(manifest.inference)
|
|
135
|
+
: JSON.parse(JSON.stringify(manifest.inference));
|
|
136
|
+
validateRequiredInferenceFields(
|
|
137
|
+
inference,
|
|
138
|
+
manifest.modelId ?? 'unknown'
|
|
139
|
+
);
|
|
124
140
|
}
|
|
125
141
|
|
|
126
142
|
export function hasInferenceConfig(
|
|
@@ -2,9 +2,17 @@ import { createDopplerConfig, DEFAULT_TRAINING_SETTINGS } from './schema/index.j
|
|
|
2
2
|
import { validateDistillTrainingConfig } from './schema/distill-training.schema.js';
|
|
3
3
|
import { validateUlTrainingConfig } from './schema/ul-training.schema.js';
|
|
4
4
|
|
|
5
|
+
function cloneConfigTree(value) {
|
|
6
|
+
if (typeof structuredClone === 'function') {
|
|
7
|
+
return structuredClone(value);
|
|
8
|
+
}
|
|
9
|
+
return JSON.parse(JSON.stringify(value));
|
|
10
|
+
}
|
|
11
|
+
|
|
5
12
|
function mergeTrainingSettings(base, overrides) {
|
|
13
|
+
const baseConfig = cloneConfigTree(base);
|
|
6
14
|
if (!overrides) {
|
|
7
|
-
const merged =
|
|
15
|
+
const merged = baseConfig;
|
|
8
16
|
validateDistillTrainingConfig(merged.distill);
|
|
9
17
|
validateUlTrainingConfig(merged.ul);
|
|
10
18
|
if (merged.distill.enabled === true && merged.ul.enabled === true) {
|
|
@@ -14,42 +22,42 @@ function mergeTrainingSettings(base, overrides) {
|
|
|
14
22
|
}
|
|
15
23
|
|
|
16
24
|
const merged = {
|
|
17
|
-
enabled: overrides.enabled ??
|
|
18
|
-
lora: { ...
|
|
25
|
+
enabled: overrides.enabled ?? baseConfig.enabled,
|
|
26
|
+
lora: { ...baseConfig.lora, ...overrides.lora },
|
|
19
27
|
optimizer: {
|
|
20
|
-
...
|
|
28
|
+
...baseConfig.optimizer,
|
|
21
29
|
...overrides.optimizer,
|
|
22
|
-
scheduler: { ...
|
|
30
|
+
scheduler: { ...baseConfig.optimizer.scheduler, ...overrides.optimizer?.scheduler },
|
|
23
31
|
},
|
|
24
|
-
gradient: { ...
|
|
25
|
-
precision: { ...
|
|
26
|
-
attention: { ...
|
|
32
|
+
gradient: { ...baseConfig.gradient, ...overrides.gradient },
|
|
33
|
+
precision: { ...baseConfig.precision, ...overrides.precision },
|
|
34
|
+
attention: { ...baseConfig.attention, ...overrides.attention },
|
|
27
35
|
telemetry: {
|
|
28
|
-
...
|
|
36
|
+
...baseConfig.telemetry,
|
|
29
37
|
...overrides.telemetry,
|
|
30
38
|
alerts: {
|
|
31
|
-
...
|
|
39
|
+
...baseConfig.telemetry.alerts,
|
|
32
40
|
...overrides.telemetry?.alerts,
|
|
33
41
|
thresholds: {
|
|
34
|
-
...
|
|
42
|
+
...baseConfig.telemetry.alerts.thresholds,
|
|
35
43
|
...overrides.telemetry?.alerts?.thresholds,
|
|
36
44
|
},
|
|
37
45
|
},
|
|
38
46
|
},
|
|
39
|
-
lossScaling: { ...
|
|
47
|
+
lossScaling: { ...baseConfig.lossScaling, ...overrides.lossScaling },
|
|
40
48
|
distill: {
|
|
41
|
-
...
|
|
49
|
+
...baseConfig.distill,
|
|
42
50
|
...overrides.distill,
|
|
43
|
-
freeze: { ...
|
|
51
|
+
freeze: { ...baseConfig.distill.freeze, ...overrides.distill?.freeze },
|
|
44
52
|
},
|
|
45
53
|
ul: {
|
|
46
|
-
...
|
|
54
|
+
...baseConfig.ul,
|
|
47
55
|
...overrides.ul,
|
|
48
|
-
noiseSchedule: { ...
|
|
49
|
-
priorAlignment: { ...
|
|
50
|
-
decoderSigmoidWeight: { ...
|
|
51
|
-
lossWeights: { ...
|
|
52
|
-
freeze: { ...
|
|
56
|
+
noiseSchedule: { ...baseConfig.ul.noiseSchedule, ...overrides.ul?.noiseSchedule },
|
|
57
|
+
priorAlignment: { ...baseConfig.ul.priorAlignment, ...overrides.ul?.priorAlignment },
|
|
58
|
+
decoderSigmoidWeight: { ...baseConfig.ul.decoderSigmoidWeight, ...overrides.ul?.decoderSigmoidWeight },
|
|
59
|
+
lossWeights: { ...baseConfig.ul.lossWeights, ...overrides.ul?.lossWeights },
|
|
60
|
+
freeze: { ...baseConfig.ul.freeze, ...overrides.ul?.freeze },
|
|
53
61
|
},
|
|
54
62
|
};
|
|
55
63
|
validateDistillTrainingConfig(merged.distill);
|
|
@@ -74,7 +82,7 @@ export function createTrainingConfig(overrides = {}) {
|
|
|
74
82
|
|
|
75
83
|
export const DEFAULT_TRAINING_CONFIG = createTrainingConfig();
|
|
76
84
|
|
|
77
|
-
let trainingConfig =
|
|
85
|
+
let trainingConfig = createTrainingConfig();
|
|
78
86
|
|
|
79
87
|
export function getTrainingConfig() {
|
|
80
88
|
return trainingConfig;
|
|
@@ -86,6 +94,6 @@ export function setTrainingConfig(overrides) {
|
|
|
86
94
|
}
|
|
87
95
|
|
|
88
96
|
export function resetTrainingConfig() {
|
|
89
|
-
trainingConfig =
|
|
97
|
+
trainingConfig = createTrainingConfig();
|
|
90
98
|
return trainingConfig;
|
|
91
99
|
}
|