@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
package/tools/doppler-cli.js
CHANGED
|
@@ -22,8 +22,6 @@ const DEFAULT_CLI_POLICY = {
|
|
|
22
22
|
allowed: ['auto', 'node', 'browser'],
|
|
23
23
|
},
|
|
24
24
|
bench: {
|
|
25
|
-
modelId: 'gemma-3-270m-it-wf16-ef16-hf16',
|
|
26
|
-
surface: 'browser',
|
|
27
25
|
cacheMode: 'warm',
|
|
28
26
|
},
|
|
29
27
|
cacheMode: null,
|
|
@@ -66,6 +64,8 @@ function usage() {
|
|
|
66
64
|
' doppler debug --config <path.json|json> [--runtime-config <path|url|json>] [--surface auto|node|browser]',
|
|
67
65
|
' doppler bench --config <path.json|json> [--runtime-config <path|url|json>] [--surface auto|node|browser]',
|
|
68
66
|
' doppler verify --config <path.json|json> [--runtime-config <path|url|json>] [--surface auto|node|browser]',
|
|
67
|
+
' doppler lora --config <path.json|json> [--surface auto|node]',
|
|
68
|
+
' doppler distill --config <path.json|json> [--surface auto|node]',
|
|
69
69
|
'',
|
|
70
70
|
'Flags:',
|
|
71
71
|
' --config <path|json> Required command config payload (file path or JSON object string).',
|
|
@@ -80,7 +80,7 @@ function usage() {
|
|
|
80
80
|
' - run: CLI-only run controls (surface, browser options, and bench save/compare/manifest settings).',
|
|
81
81
|
'',
|
|
82
82
|
'Example:',
|
|
83
|
-
' doppler verify --config \'{"request":{"suite":"inference","modelId":"gemma-3-270m-it-
|
|
83
|
+
' doppler verify --config \'{"request":{"suite":"inference","modelId":"gemma-3-270m-it-f16-af32"}}\' --json',
|
|
84
84
|
].join('\n');
|
|
85
85
|
}
|
|
86
86
|
|
|
@@ -337,7 +337,89 @@ function resolveStaticRootDir(browserOptions = {}) {
|
|
|
337
337
|
return process.cwd();
|
|
338
338
|
}
|
|
339
339
|
|
|
340
|
-
|
|
340
|
+
function resolveRdrrRoot(options = {}) {
|
|
341
|
+
return path.resolve(asStringOrNull(options.rdrrRoot) || DEFAULT_EXTERNAL_RDRR_ROOT);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
async function findResolvableModelCandidate(candidates) {
|
|
345
|
+
const discoveredManifestCandidates = [];
|
|
346
|
+
|
|
347
|
+
for (const candidate of candidates) {
|
|
348
|
+
if (!await pathExists(candidate.manifestPath)) {
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
discoveredManifestCandidates.push(candidate);
|
|
352
|
+
|
|
353
|
+
const modelDir = path.dirname(candidate.manifestPath);
|
|
354
|
+
try {
|
|
355
|
+
const files = await fs.readdir(modelDir, { withFileTypes: true });
|
|
356
|
+
const hasShards = files.some((entry) =>
|
|
357
|
+
entry.isFile() && /^shard_\d+\.bin$/u.test(entry.name)
|
|
358
|
+
);
|
|
359
|
+
if (hasShards) {
|
|
360
|
+
return { candidate, discoveredManifestCandidates };
|
|
361
|
+
}
|
|
362
|
+
} catch {
|
|
363
|
+
return { candidate, discoveredManifestCandidates };
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
return { candidate: null, discoveredManifestCandidates };
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
async function resolveExternalModelDirectory(rdrrRoot, modelId) {
|
|
371
|
+
const directModelDir = path.join(rdrrRoot, modelId);
|
|
372
|
+
const directManifestPath = path.join(directModelDir, 'manifest.json');
|
|
373
|
+
if (await pathExists(directManifestPath)) {
|
|
374
|
+
return {
|
|
375
|
+
modelDir: directModelDir,
|
|
376
|
+
manifestPath: directManifestPath,
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
let entries = [];
|
|
381
|
+
try {
|
|
382
|
+
entries = await fs.readdir(rdrrRoot, { withFileTypes: true });
|
|
383
|
+
} catch {
|
|
384
|
+
return null;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const matches = [];
|
|
388
|
+
for (const entry of entries) {
|
|
389
|
+
if (!entry.isDirectory()) {
|
|
390
|
+
continue;
|
|
391
|
+
}
|
|
392
|
+
const manifestPath = path.join(rdrrRoot, entry.name, 'manifest.json');
|
|
393
|
+
if (!await pathExists(manifestPath)) {
|
|
394
|
+
continue;
|
|
395
|
+
}
|
|
396
|
+
let manifest = null;
|
|
397
|
+
try {
|
|
398
|
+
manifest = JSON.parse(await fs.readFile(manifestPath, 'utf8'));
|
|
399
|
+
} catch {
|
|
400
|
+
continue;
|
|
401
|
+
}
|
|
402
|
+
if (manifest?.modelId !== modelId) {
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
matches.push({
|
|
406
|
+
modelDir: path.join(rdrrRoot, entry.name),
|
|
407
|
+
manifestPath,
|
|
408
|
+
});
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
if (matches.length > 1) {
|
|
412
|
+
const matchPaths = matches.map((match) => match.modelDir).join(', ');
|
|
413
|
+
throw new Error(
|
|
414
|
+
`Model "${modelId}" matched multiple external directories. ` +
|
|
415
|
+
`Disambiguate by setting request.modelUrl in --config. Matches: ${matchPaths}`
|
|
416
|
+
);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
return matches[0] || null;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
export async function resolveBrowserModelUrl(request, browserOptions = {}) {
|
|
341
423
|
if (request.modelUrl || !request.modelId) {
|
|
342
424
|
return request;
|
|
343
425
|
}
|
|
@@ -353,49 +435,28 @@ async function resolveBrowserModelUrl(request, browserOptions = {}) {
|
|
|
353
435
|
}
|
|
354
436
|
|
|
355
437
|
const staticRootDir = resolveStaticRootDir(browserOptions);
|
|
356
|
-
const
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
};
|
|
360
|
-
const localCandidate = {
|
|
438
|
+
const externalModel = await resolveExternalModelDirectory(resolveRdrrRoot(browserOptions), modelId);
|
|
439
|
+
const candidates = [
|
|
440
|
+
{
|
|
361
441
|
modelUrl: `/models/local/${encodedModelId}`,
|
|
362
442
|
manifestPath: path.join(staticRootDir, 'models', 'local', modelId, 'manifest.json'),
|
|
363
|
-
|
|
364
|
-
|
|
443
|
+
},
|
|
444
|
+
{
|
|
365
445
|
modelUrl: `/models/${encodedModelId}`,
|
|
366
446
|
manifestPath: path.join(staticRootDir, 'models', modelId, 'manifest.json'),
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
447
|
+
},
|
|
448
|
+
{
|
|
449
|
+
modelUrl: `/models/external/${encodeURIComponent(path.basename(externalModel?.modelDir || modelId))}`,
|
|
450
|
+
manifestPath: externalModel?.manifestPath || path.join(resolveRdrrRoot(browserOptions), modelId, 'manifest.json'),
|
|
451
|
+
},
|
|
372
452
|
];
|
|
373
|
-
const discoveredManifestCandidates = [];
|
|
374
|
-
|
|
375
|
-
for (const candidate of candidates) {
|
|
376
|
-
if (!await pathExists(candidate.manifestPath)) {
|
|
377
|
-
continue;
|
|
378
|
-
}
|
|
379
|
-
discoveredManifestCandidates.push(candidate);
|
|
380
453
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
if (hasShards) {
|
|
388
|
-
return {
|
|
389
|
-
...request,
|
|
390
|
-
modelUrl: candidate.modelUrl,
|
|
391
|
-
};
|
|
392
|
-
}
|
|
393
|
-
} catch {
|
|
394
|
-
return {
|
|
395
|
-
...request,
|
|
396
|
-
modelUrl: candidate.modelUrl,
|
|
397
|
-
};
|
|
398
|
-
}
|
|
454
|
+
const { candidate, discoveredManifestCandidates } = await findResolvableModelCandidate(candidates);
|
|
455
|
+
if (candidate) {
|
|
456
|
+
return {
|
|
457
|
+
...request,
|
|
458
|
+
modelUrl: candidate.modelUrl,
|
|
459
|
+
};
|
|
399
460
|
}
|
|
400
461
|
|
|
401
462
|
if (discoveredManifestCandidates.length > 0) {
|
|
@@ -421,13 +482,13 @@ export async function resolveNodeModelUrl(request, options = {}) {
|
|
|
421
482
|
}
|
|
422
483
|
|
|
423
484
|
const modelId = String(request.modelId);
|
|
424
|
-
const rdrrRoot =
|
|
425
|
-
const
|
|
426
|
-
if (!
|
|
485
|
+
const rdrrRoot = resolveRdrrRoot(options);
|
|
486
|
+
const externalModel = await resolveExternalModelDirectory(rdrrRoot, modelId);
|
|
487
|
+
if (!externalModel) {
|
|
427
488
|
return request;
|
|
428
489
|
}
|
|
429
490
|
|
|
430
|
-
const modelDir =
|
|
491
|
+
const modelDir = externalModel.modelDir;
|
|
431
492
|
try {
|
|
432
493
|
const files = await fs.readdir(modelDir, { withFileTypes: true });
|
|
433
494
|
const hasShards = files.some((entry) =>
|
|
@@ -461,6 +522,9 @@ function parseSurface(value, command, policy = DEFAULT_CLI_POLICY) {
|
|
|
461
522
|
if (command === 'convert' && normalized === 'browser') {
|
|
462
523
|
throw new Error('convert is not supported on browser relay. Use --surface node or --surface auto.');
|
|
463
524
|
}
|
|
525
|
+
if ((command === 'lora' || command === 'distill') && normalized === 'browser') {
|
|
526
|
+
throw new Error(`${command} is not supported on browser relay. Use --surface node or --surface auto.`);
|
|
527
|
+
}
|
|
464
528
|
return normalized;
|
|
465
529
|
}
|
|
466
530
|
|
|
@@ -520,13 +584,10 @@ function resolveBenchRunOptions(runConfig, policy = DEFAULT_CLI_POLICY) {
|
|
|
520
584
|
function resolveSurfaceForCommand(command, parsed, runConfig, policy = DEFAULT_CLI_POLICY) {
|
|
521
585
|
const fromCli = asStringOrNull(parsed.flags.surface);
|
|
522
586
|
const fromRun = asStringOrNull(runConfig?.surface);
|
|
523
|
-
|
|
524
|
-
? asStringOrNull(policy?.defaults?.bench?.surface)
|
|
525
|
-
: null;
|
|
526
|
-
return parseSurface(fromCli ?? fromRun ?? fromPolicy, command, policy);
|
|
587
|
+
return parseSurface(fromCli ?? fromRun ?? null, command, policy);
|
|
527
588
|
}
|
|
528
589
|
|
|
529
|
-
async function buildRequest(parsed, policy = DEFAULT_CLI_POLICY) {
|
|
590
|
+
export async function buildRequest(parsed, policy = DEFAULT_CLI_POLICY) {
|
|
530
591
|
const command = parsed.command;
|
|
531
592
|
if (!command || !TOOLING_COMMANDS.includes(command)) {
|
|
532
593
|
throw new Error(`Unsupported command "${command || ''}"`);
|
|
@@ -545,21 +606,15 @@ async function buildRequest(parsed, policy = DEFAULT_CLI_POLICY) {
|
|
|
545
606
|
}
|
|
546
607
|
requestInput.command = command;
|
|
547
608
|
|
|
548
|
-
if (command === 'bench' && !asStringOrNull(requestInput.modelId) && !asStringOrNull(requestInput.modelUrl)) {
|
|
549
|
-
const benchDefaultModelId = asStringOrNull(policy?.defaults?.bench?.modelId);
|
|
550
|
-
if (benchDefaultModelId) {
|
|
551
|
-
requestInput.modelId = benchDefaultModelId;
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
|
|
555
609
|
applyRuntimeFlagOverride(requestInput, runtimeOverride);
|
|
556
610
|
|
|
557
611
|
const surfaceFromCli = asStringOrNull(parsed.flags.surface) !== null;
|
|
612
|
+
const surface = resolveSurfaceForCommand(command, parsed, envelope.run, policy);
|
|
558
613
|
|
|
559
614
|
return {
|
|
560
615
|
request: normalizeToolingCommandRequest(requestInput),
|
|
561
616
|
runConfig: envelope.run,
|
|
562
|
-
surface
|
|
617
|
+
surface,
|
|
563
618
|
surfaceFromCli,
|
|
564
619
|
benchRunOptions: resolveBenchRunOptions(envelope.run, policy),
|
|
565
620
|
};
|
|
@@ -593,10 +648,18 @@ function buildBrowserRunOptions(runConfig, jsonOutput, request = {}) {
|
|
|
593
648
|
executablePath: asStringOrNull(browser.executablePath),
|
|
594
649
|
runnerPath: asStringOrNull(browser.runnerPath),
|
|
595
650
|
staticRootDir: asStringOrNull(browser.staticRootDir),
|
|
651
|
+
rdrrRoot: asStringOrNull(browser.rdrrRoot),
|
|
596
652
|
baseUrl: asStringOrNull(browser.baseUrl),
|
|
597
653
|
browserArgs: parseBrowserArgs(browser.browserArgs),
|
|
598
654
|
headless: headed ? false : (explicitHeadless ?? true),
|
|
599
655
|
};
|
|
656
|
+
const rdrrRoot = resolveRdrrRoot(options);
|
|
657
|
+
options.staticMounts = [
|
|
658
|
+
{
|
|
659
|
+
urlPrefix: '/models/external',
|
|
660
|
+
rootDir: rdrrRoot,
|
|
661
|
+
},
|
|
662
|
+
];
|
|
600
663
|
|
|
601
664
|
const port = parseNumberFlag(browser.port, 'run.browser.port');
|
|
602
665
|
if (port !== null) {
|
|
@@ -644,9 +707,36 @@ function isNodeWebGPUFallbackCandidate(error, fallbackPolicy = DEFAULT_CLI_POLIC
|
|
|
644
707
|
function isTrainingCommandFlow(request) {
|
|
645
708
|
if (!request || typeof request !== 'object') return false;
|
|
646
709
|
if (request.suite === 'training') return true;
|
|
710
|
+
if (request.command === 'lora' || request.command === 'distill') return true;
|
|
647
711
|
return request.command === 'bench' && request.workloadType === 'training';
|
|
648
712
|
}
|
|
649
713
|
|
|
714
|
+
function resolveErrorSurface(error, fallbackSurface = null) {
|
|
715
|
+
return (
|
|
716
|
+
asStringOrNull(fallbackSurface)
|
|
717
|
+
|| asStringOrNull(error?.surface)
|
|
718
|
+
|| asStringOrNull(error?.details?.surface)
|
|
719
|
+
|| null
|
|
720
|
+
);
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
export function createCliToolingErrorEnvelope(error, context = {}) {
|
|
724
|
+
return createToolingErrorEnvelope(error, {
|
|
725
|
+
surface: resolveErrorSurface(error, context.surface),
|
|
726
|
+
request: context.request ?? null,
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
export function finalizeCliCommandResponse(response, request) {
|
|
731
|
+
if (!isPlainObject(response) || !Object.prototype.hasOwnProperty.call(response, 'request')) {
|
|
732
|
+
return response;
|
|
733
|
+
}
|
|
734
|
+
return {
|
|
735
|
+
...response,
|
|
736
|
+
request,
|
|
737
|
+
};
|
|
738
|
+
}
|
|
739
|
+
|
|
650
740
|
async function runCommandOnSurface(request, surface, runConfig, jsonOutput) {
|
|
651
741
|
if (surface === 'node') {
|
|
652
742
|
const nodeRequest = await resolveNodeModelUrl(request);
|
|
@@ -656,7 +746,8 @@ async function runCommandOnSurface(request, surface, runConfig, jsonOutput) {
|
|
|
656
746
|
console.error(`[surface] node resolved modelUrl=${nodeRequest.modelUrl}`);
|
|
657
747
|
}
|
|
658
748
|
}
|
|
659
|
-
|
|
749
|
+
const response = await runNodeCommand(nodeRequest, buildNodeRunOptions(jsonOutput));
|
|
750
|
+
return finalizeCliCommandResponse(response, request);
|
|
660
751
|
}
|
|
661
752
|
|
|
662
753
|
const browserOptions = buildBrowserRunOptions(runConfig, jsonOutput, request);
|
|
@@ -670,7 +761,8 @@ async function runCommandOnSurface(request, surface, runConfig, jsonOutput) {
|
|
|
670
761
|
}
|
|
671
762
|
}
|
|
672
763
|
|
|
673
|
-
|
|
764
|
+
const response = await runBrowserCommandInNode(browserRequest, browserOptions);
|
|
765
|
+
return finalizeCliCommandResponse(response, request);
|
|
674
766
|
}
|
|
675
767
|
|
|
676
768
|
async function runWithAutoSurface(request, runConfig, jsonOutput, policy = DEFAULT_CLI_POLICY) {
|
|
@@ -687,9 +779,12 @@ async function runWithAutoSurface(request, runConfig, jsonOutput, policy = DEFAU
|
|
|
687
779
|
}
|
|
688
780
|
if (isTrainingCommandFlow(request)) {
|
|
689
781
|
const downgradeError = new Error(
|
|
690
|
-
|
|
782
|
+
(request.command === 'lora' || request.command === 'distill')
|
|
783
|
+
? 'Training command auto-surface downgrade is blocked. Re-run with --surface node after fixing Node WebGPU support.'
|
|
784
|
+
: 'Training command auto-surface downgrade is blocked. Re-run with --surface node after fixing Node WebGPU support, or explicitly choose --surface browser.'
|
|
691
785
|
);
|
|
692
786
|
downgradeError.code = 'training_surface_downgrade_blocked';
|
|
787
|
+
downgradeError.surface = 'node';
|
|
693
788
|
downgradeError.command = request.command;
|
|
694
789
|
downgradeError.suite = request.suite;
|
|
695
790
|
downgradeError.workloadType = request.workloadType || null;
|
|
@@ -726,6 +821,13 @@ function toSummary(result) {
|
|
|
726
821
|
return `converted ${result.manifest.modelId} (${result.tensorCount} tensors, ${result.shardCount} shards)${contractStatus}${graphStatus}`;
|
|
727
822
|
}
|
|
728
823
|
|
|
824
|
+
if (result.kind === 'lora' || result.kind === 'distill') {
|
|
825
|
+
const workloadId = result.workloadId || 'unknown';
|
|
826
|
+
const action = result.action || 'run';
|
|
827
|
+
const runRoot = result.runRoot || 'n/a';
|
|
828
|
+
return `${result.kind} ${action} workload=${workloadId} runRoot=${runRoot}`;
|
|
829
|
+
}
|
|
830
|
+
|
|
729
831
|
const suite = result.suite || result.report?.suite || 'suite';
|
|
730
832
|
const modelId = result.modelId || result.report?.modelId || 'unknown';
|
|
731
833
|
const passed = Number.isFinite(result.passed) ? result.passed : null;
|
|
@@ -938,7 +1040,7 @@ async function runManifestSweep(manifest, commandContext, jsonOutput, policy = D
|
|
|
938
1040
|
results.push({
|
|
939
1041
|
label,
|
|
940
1042
|
response: null,
|
|
941
|
-
error:
|
|
1043
|
+
error: createCliToolingErrorEnvelope(error, {
|
|
942
1044
|
surface: surface === 'auto' ? null : surface,
|
|
943
1045
|
request,
|
|
944
1046
|
}),
|
|
@@ -1122,6 +1224,20 @@ function printConvertReportSummary(result) {
|
|
|
1122
1224
|
|
|
1123
1225
|
function printMetricsSummary(result) {
|
|
1124
1226
|
if (!result || typeof result !== 'object') return;
|
|
1227
|
+
if (result.kind === 'distill') {
|
|
1228
|
+
const stageCount = Array.isArray(result.stageResults) ? result.stageResults.length : 0;
|
|
1229
|
+
console.log(
|
|
1230
|
+
`[metrics] kind=distill action=${result.action || 'run'} stages=${stageCount} runRoot=${quoteOneLine(result.runRoot)}`
|
|
1231
|
+
);
|
|
1232
|
+
return;
|
|
1233
|
+
}
|
|
1234
|
+
if (result.kind === 'lora') {
|
|
1235
|
+
const exportCount = Array.isArray(result.exports) ? result.exports.length : 0;
|
|
1236
|
+
console.log(
|
|
1237
|
+
`[metrics] kind=lora action=${result.action || 'run'} exports=${exportCount} runRoot=${quoteOneLine(result.runRoot)}`
|
|
1238
|
+
);
|
|
1239
|
+
return;
|
|
1240
|
+
}
|
|
1125
1241
|
const suite = String(result.suite || '');
|
|
1126
1242
|
const metrics = result.metrics;
|
|
1127
1243
|
if (!metrics || typeof metrics !== 'object') return;
|
|
@@ -1307,7 +1423,7 @@ async function main() {
|
|
|
1307
1423
|
printMetricsSummary(response.result);
|
|
1308
1424
|
} catch (error) {
|
|
1309
1425
|
if (jsonOutputRequested) {
|
|
1310
|
-
console.log(JSON.stringify(
|
|
1426
|
+
console.log(JSON.stringify(createCliToolingErrorEnvelope(error, errorContext), null, 2));
|
|
1311
1427
|
process.exitCode = 1;
|
|
1312
1428
|
return;
|
|
1313
1429
|
}
|