@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
|
@@ -53,6 +53,78 @@ async function readTextFromSource(source) {
|
|
|
53
53
|
return new TextDecoder().decode(buffer);
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
+
function resolveIndexedShardLayout(indexJson, fileMap) {
|
|
57
|
+
const weightMap = indexJson?.weight_map;
|
|
58
|
+
if (!weightMap || typeof weightMap !== 'object') {
|
|
59
|
+
throw new Error('Safetensors index JSON must include a weight_map object for sharded parsing.');
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const referencedShards = new Set();
|
|
63
|
+
for (const [tensorName, shardNameRaw] of Object.entries(weightMap)) {
|
|
64
|
+
if (typeof tensorName !== 'string' || !tensorName.trim()) {
|
|
65
|
+
throw new Error('Safetensors index JSON weight_map contains an invalid tensor name.');
|
|
66
|
+
}
|
|
67
|
+
if (typeof shardNameRaw !== 'string' || !shardNameRaw.trim()) {
|
|
68
|
+
throw new Error(`Safetensors index JSON weight_map entry for "${tensorName}" must reference a shard filename.`);
|
|
69
|
+
}
|
|
70
|
+
referencedShards.add(shardNameRaw);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (referencedShards.size === 0) {
|
|
74
|
+
throw new Error('Safetensors index JSON weight_map must reference at least one shard.');
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const missingShards = [...referencedShards].filter((shardName) => !fileMap.has(shardName));
|
|
78
|
+
if (missingShards.length > 0) {
|
|
79
|
+
throw new Error(
|
|
80
|
+
`Safetensors sharded parse is missing indexed shard files: ${missingShards.join(', ')}`
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const extraShards = [...fileMap.keys()].filter((shardName) => !referencedShards.has(shardName));
|
|
85
|
+
if (extraShards.length > 0) {
|
|
86
|
+
throw new Error(
|
|
87
|
+
`Safetensors sharded parse received shard files not referenced by index JSON: ${extraShards.join(', ')}`
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return {
|
|
92
|
+
weightMap,
|
|
93
|
+
referencedShards,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function validateParsedShardsAgainstIndex(parsedShards, weightMap) {
|
|
98
|
+
const seenTensorNames = new Set();
|
|
99
|
+
for (const parsedShard of parsedShards) {
|
|
100
|
+
for (const tensor of parsedShard.parsed.tensors) {
|
|
101
|
+
seenTensorNames.add(tensor.name);
|
|
102
|
+
const mappedShard = weightMap[tensor.name];
|
|
103
|
+
if (typeof mappedShard !== 'string' || !mappedShard.trim()) {
|
|
104
|
+
throw new Error(
|
|
105
|
+
`Safetensors index JSON is missing a weight_map entry for tensor "${tensor.name}".`
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
if (mappedShard !== parsedShard.source.name) {
|
|
109
|
+
throw new Error(
|
|
110
|
+
`Safetensors index JSON routes tensor "${tensor.name}" to "${mappedShard}", ` +
|
|
111
|
+
`but it was found in "${parsedShard.source.name}".`
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const missingTensorMappings = Object.entries(weightMap)
|
|
118
|
+
.filter(([tensorName]) => !seenTensorNames.has(tensorName))
|
|
119
|
+
.map(([tensorName]) => tensorName);
|
|
120
|
+
if (missingTensorMappings.length > 0) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
`Safetensors index JSON references tensors not found in provided shard files: ` +
|
|
123
|
+
`${missingTensorMappings.join(', ')}`
|
|
124
|
+
);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
56
128
|
export async function parseSafetensorsFile(file) {
|
|
57
129
|
const source = normalizeTensorSource(file);
|
|
58
130
|
const headerSizeBuffer = await source.readRange(0, 8);
|
|
@@ -108,8 +180,15 @@ export async function parseSafetensorsSharded(
|
|
|
108
180
|
metadata = indexJson.metadata || {};
|
|
109
181
|
}
|
|
110
182
|
|
|
111
|
-
|
|
183
|
+
let safetensorsSources = sources
|
|
112
184
|
.filter((source) => source.name.endsWith('.safetensors'));
|
|
185
|
+
let weightMap = null;
|
|
186
|
+
if (indexJson) {
|
|
187
|
+
const indexedLayout = resolveIndexedShardLayout(indexJson, fileMap);
|
|
188
|
+
weightMap = indexedLayout.weightMap;
|
|
189
|
+
safetensorsSources = safetensorsSources
|
|
190
|
+
.filter((source) => indexedLayout.referencedShards.has(source.name));
|
|
191
|
+
}
|
|
113
192
|
const parsedShards = await Promise.all(
|
|
114
193
|
safetensorsSources.map(async (source) => {
|
|
115
194
|
const parsed = await parseSafetensorsFile(source);
|
|
@@ -135,6 +214,10 @@ export async function parseSafetensorsSharded(
|
|
|
135
214
|
}
|
|
136
215
|
}
|
|
137
216
|
|
|
217
|
+
if (weightMap) {
|
|
218
|
+
validateParsedShardsAgainstIndex(parsedShards, weightMap);
|
|
219
|
+
}
|
|
220
|
+
|
|
138
221
|
return {
|
|
139
222
|
metadata,
|
|
140
223
|
shards,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
|
|
3
3
|
import { generateShardFilename } from '../formats/rdrr/index.js';
|
|
4
|
-
import { createStreamingHasher } from '../storage/shard-manager.js';
|
|
4
|
+
import { createStreamingHasher, getOpfsPathConfig } from '../storage/shard-manager.js';
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
export class BrowserShardIO {
|
|
@@ -13,7 +13,7 @@ export class BrowserShardIO {
|
|
|
13
13
|
|
|
14
14
|
static async create(modelId, options = {}) {
|
|
15
15
|
const opfsRoot = await navigator.storage.getDirectory();
|
|
16
|
-
const modelsDir = await opfsRoot.getDirectoryHandle(
|
|
16
|
+
const modelsDir = await opfsRoot.getDirectoryHandle(getOpfsPathConfig().opfsRootDir, { create: true });
|
|
17
17
|
const modelDir = await modelsDir.getDirectoryHandle(modelId, { create: true });
|
|
18
18
|
return new BrowserShardIO(modelDir, options);
|
|
19
19
|
}
|
|
@@ -229,9 +229,15 @@ export async function createRemoteTensorSource(url, options = {}) {
|
|
|
229
229
|
try {
|
|
230
230
|
const source = await createHttpTensorSource(url, options);
|
|
231
231
|
return { source, size: source.size, supportsRange: true };
|
|
232
|
-
} catch (
|
|
232
|
+
} catch (error) {
|
|
233
233
|
if (options.allowDownloadFallback === false) {
|
|
234
|
-
throw
|
|
234
|
+
throw error;
|
|
235
|
+
}
|
|
236
|
+
if (options.allowDownloadFallback !== true) {
|
|
237
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
238
|
+
throw new Error(
|
|
239
|
+
`HTTP tensor source failed for "${url}" and download fallback is not explicitly enabled: ${message}`
|
|
240
|
+
);
|
|
235
241
|
}
|
|
236
242
|
const downloaded = await createDownloadTensorSource(url, options);
|
|
237
243
|
return { ...downloaded, supportsRange: false };
|
|
@@ -61,7 +61,7 @@ export async function probeHttpRange(url, options = {}) {
|
|
|
61
61
|
acceptRanges,
|
|
62
62
|
contentEncoding,
|
|
63
63
|
};
|
|
64
|
-
} catch (
|
|
64
|
+
} catch (error) {
|
|
65
65
|
return {
|
|
66
66
|
ok: false,
|
|
67
67
|
status: 0,
|
|
@@ -69,6 +69,7 @@ export async function probeHttpRange(url, options = {}) {
|
|
|
69
69
|
size: null,
|
|
70
70
|
acceptRanges: null,
|
|
71
71
|
contentEncoding: null,
|
|
72
|
+
error: error instanceof Error ? error.message : String(error),
|
|
72
73
|
};
|
|
73
74
|
}
|
|
74
75
|
}
|
|
@@ -76,6 +77,9 @@ export async function probeHttpRange(url, options = {}) {
|
|
|
76
77
|
export async function createHttpTensorSource(url, options = {}) {
|
|
77
78
|
const { headers, signal, name: overrideName } = options;
|
|
78
79
|
const probe = await probeHttpRange(url, { headers, signal });
|
|
80
|
+
if (!probe.ok && probe.status === 0 && probe.error) {
|
|
81
|
+
throw new Error(`HTTP tensor source probe failed for "${url}": ${probe.error}`);
|
|
82
|
+
}
|
|
79
83
|
if (!probe.supportsRange || probe.size == null) {
|
|
80
84
|
throw new Error('HTTP range requests not supported for tensor source');
|
|
81
85
|
}
|
|
@@ -227,9 +227,7 @@ async function* dopplerGenerate(prompt, options = {}) {
|
|
|
227
227
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
228
228
|
throw new Error('doppler() requires options.model.');
|
|
229
229
|
}
|
|
230
|
-
|
|
231
|
-
throw new Error('doppler() does not accept load-affecting options. Use doppler.load(model, options) instead.');
|
|
232
|
-
}
|
|
230
|
+
assertNoLoadAffectingOptions('doppler()', options);
|
|
233
231
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
234
232
|
yield* model.generate(prompt, options);
|
|
235
233
|
}
|
|
@@ -240,10 +238,26 @@ export function doppler(prompt, options) {
|
|
|
240
238
|
|
|
241
239
|
doppler.load = load;
|
|
242
240
|
|
|
241
|
+
function assertNoLoadAffectingOptions(apiName, options) {
|
|
242
|
+
if (!options || typeof options !== 'object') {
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
if (
|
|
246
|
+
options.runtimeConfig !== undefined
|
|
247
|
+
|| options.runtimePreset !== undefined
|
|
248
|
+
|| options.runtimeConfigUrl !== undefined
|
|
249
|
+
) {
|
|
250
|
+
throw new Error(
|
|
251
|
+
`${apiName} does not accept load-affecting options. Use doppler.load(model, options) instead.`
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
243
256
|
doppler.text = async function text(prompt, options = {}) {
|
|
244
257
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
245
258
|
throw new Error('doppler.text() requires options.model.');
|
|
246
259
|
}
|
|
260
|
+
assertNoLoadAffectingOptions('doppler.text()', options);
|
|
247
261
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
248
262
|
return model.generateText(prompt, options);
|
|
249
263
|
};
|
|
@@ -252,6 +266,7 @@ doppler.chat = function chat(messages, options = {}) {
|
|
|
252
266
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
253
267
|
throw new Error('doppler.chat() requires options.model.');
|
|
254
268
|
}
|
|
269
|
+
assertNoLoadAffectingOptions('doppler.chat()', options);
|
|
255
270
|
return (async function* run() {
|
|
256
271
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
257
272
|
yield* model.chat(messages, options);
|
|
@@ -262,6 +277,7 @@ doppler.chatText = async function chatText(messages, options = {}) {
|
|
|
262
277
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
263
278
|
throw new Error('doppler.chatText() requires options.model.');
|
|
264
279
|
}
|
|
280
|
+
assertNoLoadAffectingOptions('doppler.chatText()', options);
|
|
265
281
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
266
282
|
return model.chatText(messages, options);
|
|
267
283
|
};
|
|
@@ -284,5 +300,5 @@ doppler.evictAll = async function evictAll() {
|
|
|
284
300
|
|
|
285
301
|
doppler.listModels = async function listModels() {
|
|
286
302
|
const models = await listQuickstartModels();
|
|
287
|
-
return models.map((entry) => entry.
|
|
303
|
+
return models.map((entry) => entry.modelId);
|
|
288
304
|
};
|
|
@@ -128,6 +128,21 @@ async function collectText(iterable) {
|
|
|
128
128
|
return output;
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
+
function assertNoLoadAffectingOptions(apiName, options) {
|
|
132
|
+
if (!options || typeof options !== 'object') {
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
if (
|
|
136
|
+
options.runtimeConfig !== undefined
|
|
137
|
+
|| options.runtimePreset !== undefined
|
|
138
|
+
|| options.runtimeConfigUrl !== undefined
|
|
139
|
+
) {
|
|
140
|
+
throw new Error(
|
|
141
|
+
`${apiName} does not accept load-affecting options. Use doppler.load(model, options) instead.`
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
131
146
|
function createModelHandle(pipeline, resolved) {
|
|
132
147
|
return {
|
|
133
148
|
generate(prompt, options = {}) {
|
|
@@ -246,9 +261,7 @@ async function* dopplerGenerate(prompt, options = {}) {
|
|
|
246
261
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
247
262
|
throw new Error('doppler() requires options.model.');
|
|
248
263
|
}
|
|
249
|
-
|
|
250
|
-
throw new Error('doppler() does not accept load-affecting options. Use doppler.load(model, options) instead.');
|
|
251
|
-
}
|
|
264
|
+
assertNoLoadAffectingOptions('doppler()', options);
|
|
252
265
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
253
266
|
yield* model.generate(prompt, options);
|
|
254
267
|
}
|
|
@@ -259,12 +272,14 @@ export function doppler(prompt, options) {
|
|
|
259
272
|
|
|
260
273
|
doppler.load = load;
|
|
261
274
|
doppler.text = async function text(prompt, options) {
|
|
275
|
+
assertNoLoadAffectingOptions('doppler.text()', options);
|
|
262
276
|
return collectText(doppler(prompt, options));
|
|
263
277
|
};
|
|
264
278
|
doppler.chat = function chat(messages, options = {}) {
|
|
265
279
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
266
280
|
throw new Error('doppler.chat() requires options.model.');
|
|
267
281
|
}
|
|
282
|
+
assertNoLoadAffectingOptions('doppler.chat()', options);
|
|
268
283
|
return (async function* () {
|
|
269
284
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
270
285
|
yield* model.chat(messages, options);
|
|
@@ -274,6 +289,7 @@ doppler.chatText = async function chatText(messages, options = {}) {
|
|
|
274
289
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
275
290
|
throw new Error('doppler.chatText() requires options.model.');
|
|
276
291
|
}
|
|
292
|
+
assertNoLoadAffectingOptions('doppler.chatText()', options);
|
|
277
293
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
278
294
|
return model.chatText(messages, options);
|
|
279
295
|
};
|
|
@@ -11,6 +11,15 @@ import { getPipeline } from './model-manager.js';
|
|
|
11
11
|
|
|
12
12
|
export { formatGemmaChat, formatLlama3Chat, formatGptOssChat };
|
|
13
13
|
|
|
14
|
+
function assertSupportedGenerateOptions(options = {}) {
|
|
15
|
+
if (Array.isArray(options?.stopTokens) && options.stopTokens.length > 0) {
|
|
16
|
+
throw new Error(
|
|
17
|
+
'Doppler provider generate options do not support stopTokens on this surface. ' +
|
|
18
|
+
'Use stopSequences instead.'
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
14
23
|
function resolveChatTemplate(pipeline, options) {
|
|
15
24
|
const override = options?.useChatTemplate;
|
|
16
25
|
const runtimeEnabled = pipeline?.runtimeConfig?.inference?.chatTemplate?.enabled;
|
|
@@ -21,6 +30,7 @@ function resolveChatTemplate(pipeline, options) {
|
|
|
21
30
|
}
|
|
22
31
|
|
|
23
32
|
export async function* generate(prompt, options = {}) {
|
|
33
|
+
assertSupportedGenerateOptions(options);
|
|
24
34
|
const pipeline = getPipeline();
|
|
25
35
|
if (!pipeline) {
|
|
26
36
|
throw new Error('No model loaded. Call loadModel() first.');
|
|
@@ -52,6 +62,7 @@ export async function* generate(prompt, options = {}) {
|
|
|
52
62
|
}
|
|
53
63
|
|
|
54
64
|
export async function prefillKV(prompt, options = {}) {
|
|
65
|
+
assertSupportedGenerateOptions(options);
|
|
55
66
|
const pipeline = getPipeline();
|
|
56
67
|
if (!pipeline) {
|
|
57
68
|
throw new Error('No model loaded. Call loadModel() first.');
|
|
@@ -61,6 +72,7 @@ export async function prefillKV(prompt, options = {}) {
|
|
|
61
72
|
}
|
|
62
73
|
|
|
63
74
|
export async function* generateWithPrefixKV(prefix, prompt, options = {}) {
|
|
75
|
+
assertSupportedGenerateOptions(options);
|
|
64
76
|
const pipeline = getPipeline();
|
|
65
77
|
if (!pipeline) {
|
|
66
78
|
throw new Error('No model loaded. Call loadModel() first.');
|
|
@@ -10,6 +10,16 @@ export declare function getPipeline(): InferencePipeline | null;
|
|
|
10
10
|
|
|
11
11
|
export declare function getCurrentModelId(): string | null;
|
|
12
12
|
|
|
13
|
+
export declare function verifyExplicitModelUrlMatch(
|
|
14
|
+
localManifest: RDRRManifest | Record<string, unknown> | null | undefined,
|
|
15
|
+
modelUrl: string | null | undefined,
|
|
16
|
+
fetchRemoteManifest?: (modelUrl: string) => Promise<RDRRManifest | Record<string, unknown> | null>
|
|
17
|
+
): Promise<void>;
|
|
18
|
+
|
|
19
|
+
export declare function shouldAutoTuneKernels(
|
|
20
|
+
runtimeConfig?: Record<string, unknown> | null
|
|
21
|
+
): boolean;
|
|
22
|
+
|
|
13
23
|
export declare function extractTextModelConfig(manifest: RDRRManifest): TextModelConfig;
|
|
14
24
|
|
|
15
25
|
export declare function readOPFSFile(path: string): Promise<ArrayBuffer>;
|
|
@@ -20,6 +20,12 @@ import { log } from '../../debug/index.js';
|
|
|
20
20
|
import { DopplerCapabilities } from './types.js';
|
|
21
21
|
import { GB, HEADER_READ_SIZE } from '../../config/schema/index.js';
|
|
22
22
|
import { resolveBridgeSourceRuntimeBundle } from './source-runtime.js';
|
|
23
|
+
import { getRuntimeConfig } from '../../config/runtime.js';
|
|
24
|
+
import {
|
|
25
|
+
buildSourceArtifactFingerprint,
|
|
26
|
+
createStoredSourceArtifactContext,
|
|
27
|
+
verifyStoredSourceArtifact,
|
|
28
|
+
} from '../../storage/source-artifact-store.js';
|
|
23
29
|
|
|
24
30
|
let pipeline = null;
|
|
25
31
|
let currentModelId = null;
|
|
@@ -34,6 +40,9 @@ function manifestsDiffer(localManifest, remoteManifest) {
|
|
|
34
40
|
const localShards = Array.isArray(localManifest.shards) ? localManifest.shards : [];
|
|
35
41
|
const remoteShards = Array.isArray(remoteManifest.shards) ? remoteManifest.shards : [];
|
|
36
42
|
if (localShards.length !== remoteShards.length) return true;
|
|
43
|
+
if (buildSourceArtifactFingerprint(localManifest) !== buildSourceArtifactFingerprint(remoteManifest)) {
|
|
44
|
+
return true;
|
|
45
|
+
}
|
|
37
46
|
|
|
38
47
|
for (let i = 0; i < localShards.length; i++) {
|
|
39
48
|
const local = localShards[i];
|
|
@@ -61,6 +70,34 @@ async function tryFetchRemoteManifest(modelUrl) {
|
|
|
61
70
|
return manifest;
|
|
62
71
|
}
|
|
63
72
|
|
|
73
|
+
export async function verifyExplicitModelUrlMatch(
|
|
74
|
+
localManifest,
|
|
75
|
+
modelUrl,
|
|
76
|
+
fetchRemoteManifest = tryFetchRemoteManifest
|
|
77
|
+
) {
|
|
78
|
+
if (!localManifest || !modelUrl) {
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
let remoteManifest = null;
|
|
82
|
+
try {
|
|
83
|
+
remoteManifest = await fetchRemoteManifest(modelUrl);
|
|
84
|
+
} catch (error) {
|
|
85
|
+
throw new Error(
|
|
86
|
+
`Could not compare cached manifest with explicit modelUrl "${modelUrl}": ${error.message}`
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
if (remoteManifest && manifestsDiffer(localManifest, remoteManifest)) {
|
|
90
|
+
throw new Error(
|
|
91
|
+
`Explicit modelUrl "${modelUrl}" does not match the cached manifest for "${localManifest.modelId ?? 'unknown'}". ` +
|
|
92
|
+
'Clear the cache or load the matching source explicitly.'
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function shouldAutoTuneKernels(runtimeConfig = getRuntimeConfig()) {
|
|
98
|
+
return runtimeConfig?.shared?.kernelWarmup?.autoTune === true;
|
|
99
|
+
}
|
|
100
|
+
|
|
64
101
|
export function getPipeline() {
|
|
65
102
|
return pipeline;
|
|
66
103
|
}
|
|
@@ -69,6 +106,14 @@ export function getCurrentModelId() {
|
|
|
69
106
|
return currentModelId;
|
|
70
107
|
}
|
|
71
108
|
|
|
109
|
+
function requireManifestQuantization(manifest) {
|
|
110
|
+
const quantization = String(manifest?.quantization ?? '').trim();
|
|
111
|
+
if (!quantization) {
|
|
112
|
+
throw new Error('Manifest is missing quantization; re-convert the model.');
|
|
113
|
+
}
|
|
114
|
+
return quantization.toUpperCase();
|
|
115
|
+
}
|
|
116
|
+
|
|
72
117
|
export function extractTextModelConfig(manifest) {
|
|
73
118
|
const arch = (manifest.architecture && typeof manifest.architecture === 'object')
|
|
74
119
|
? manifest.architecture
|
|
@@ -86,12 +131,12 @@ export function extractTextModelConfig(manifest) {
|
|
|
86
131
|
headDim: arch.headDim,
|
|
87
132
|
vocabSize: arch.vocabSize,
|
|
88
133
|
maxSeqLen: arch.maxSeqLen,
|
|
89
|
-
quantization: (manifest
|
|
134
|
+
quantization: requireManifestQuantization(manifest),
|
|
90
135
|
};
|
|
91
136
|
}
|
|
92
137
|
|
|
93
138
|
function estimateDequantizedWeightsBytes(manifest) {
|
|
94
|
-
const q = (manifest
|
|
139
|
+
const q = requireManifestQuantization(manifest);
|
|
95
140
|
const total = manifest?.totalSize || 0;
|
|
96
141
|
if (q.startsWith('Q4')) {
|
|
97
142
|
return total * 8;
|
|
@@ -243,6 +288,24 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
243
288
|
manifest = parseManifest(manifestJson);
|
|
244
289
|
log.info('DopplerProvider', `Loaded manifest via bridge: ${manifest.modelId}`);
|
|
245
290
|
if (onProgress) onProgress({ stage: 'manifest', message: 'Manifest loaded via bridge' });
|
|
291
|
+
const persistedSourceBundle = await resolveBridgeSourceRuntimeBundle({
|
|
292
|
+
bridgeClient,
|
|
293
|
+
localPath,
|
|
294
|
+
modelId,
|
|
295
|
+
manifest,
|
|
296
|
+
verifyHashes: true,
|
|
297
|
+
onProgress: (progress) => onProgress?.(progress),
|
|
298
|
+
});
|
|
299
|
+
if (persistedSourceBundle) {
|
|
300
|
+
bridgeStorageContext = persistedSourceBundle.storageContext;
|
|
301
|
+
bridgeSourceMode = true;
|
|
302
|
+
if (onProgress) {
|
|
303
|
+
onProgress({
|
|
304
|
+
stage: 'manifest',
|
|
305
|
+
message: `Direct-source manifest ready (${persistedSourceBundle.sourceKind} artifact mode)`,
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
}
|
|
246
309
|
} catch (manifestError) {
|
|
247
310
|
log.warn(
|
|
248
311
|
'DopplerProvider',
|
|
@@ -252,6 +315,7 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
252
315
|
bridgeClient,
|
|
253
316
|
localPath,
|
|
254
317
|
modelId,
|
|
318
|
+
verifyHashes: true,
|
|
255
319
|
onProgress: (progress) => onProgress?.(progress),
|
|
256
320
|
});
|
|
257
321
|
if (!sourceBundle) {
|
|
@@ -286,25 +350,26 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
286
350
|
|
|
287
351
|
let integrity = { valid: false, missingShards: [] };
|
|
288
352
|
if (manifest) {
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
353
|
+
const sourceArtifactFingerprint = buildSourceArtifactFingerprint(manifest);
|
|
354
|
+
if (sourceArtifactFingerprint) {
|
|
355
|
+
const sourceIntegrity = await verifyStoredSourceArtifact(manifest, { checkHashes: false }).catch(() => ({
|
|
356
|
+
valid: false,
|
|
357
|
+
missingFiles: [],
|
|
358
|
+
}));
|
|
359
|
+
integrity = {
|
|
360
|
+
valid: sourceIntegrity.valid,
|
|
361
|
+
missingShards: Array.isArray(sourceIntegrity.missingFiles) ? sourceIntegrity.missingFiles : [],
|
|
362
|
+
};
|
|
363
|
+
} else {
|
|
364
|
+
integrity = await verifyIntegrity({ checkHashes: false }).catch(() => ({
|
|
365
|
+
valid: false,
|
|
366
|
+
missingShards: [],
|
|
367
|
+
}));
|
|
368
|
+
}
|
|
293
369
|
}
|
|
294
370
|
|
|
295
371
|
if (integrity.valid && manifest && modelUrl) {
|
|
296
|
-
|
|
297
|
-
const remoteManifest = await tryFetchRemoteManifest(modelUrl);
|
|
298
|
-
if (remoteManifest && manifestsDiffer(manifest, remoteManifest)) {
|
|
299
|
-
log.info('DopplerProvider', 'Cached model differs from source URL manifest; refreshing cache');
|
|
300
|
-
integrity = { valid: false, missingShards: [] };
|
|
301
|
-
}
|
|
302
|
-
} catch (error) {
|
|
303
|
-
log.warn(
|
|
304
|
-
'DopplerProvider',
|
|
305
|
-
`Could not compare cached manifest with source URL (${error.message}); using cached model`
|
|
306
|
-
);
|
|
307
|
-
}
|
|
372
|
+
await verifyExplicitModelUrlMatch(manifest, modelUrl);
|
|
308
373
|
}
|
|
309
374
|
|
|
310
375
|
if (!integrity.valid && modelUrl) {
|
|
@@ -365,7 +430,11 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
365
430
|
DopplerCapabilities.kernelsWarmed = true;
|
|
366
431
|
}
|
|
367
432
|
|
|
368
|
-
if (
|
|
433
|
+
if (
|
|
434
|
+
!DopplerCapabilities.kernelsTuned
|
|
435
|
+
&& shouldAutoTuneKernels()
|
|
436
|
+
&& typeof setTimeout !== 'undefined'
|
|
437
|
+
) {
|
|
369
438
|
DopplerCapabilities.kernelsTuned = true;
|
|
370
439
|
const tuneConfig = extractTextModelConfig(manifest);
|
|
371
440
|
setTimeout(() => {
|
|
@@ -389,6 +458,9 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
389
458
|
const memCaps = await getMemoryCapabilities();
|
|
390
459
|
|
|
391
460
|
let storageContext = bridgeStorageContext;
|
|
461
|
+
if (!storageContext && buildSourceArtifactFingerprint(manifest)) {
|
|
462
|
+
storageContext = createStoredSourceArtifactContext(manifest, { verifyHashes: true });
|
|
463
|
+
}
|
|
392
464
|
if (!storageContext && useBridge && DopplerCapabilities.bridgeClient && DopplerCapabilities.localPath) {
|
|
393
465
|
const bridgeClient = DopplerCapabilities.bridgeClient;
|
|
394
466
|
const basePath = DopplerCapabilities.localPath.endsWith('/')
|
|
@@ -6,7 +6,9 @@ export interface ResolveBridgeSourceRuntimeBundleOptions {
|
|
|
6
6
|
bridgeClient: ExtensionBridgeClient;
|
|
7
7
|
localPath: string;
|
|
8
8
|
modelId?: string | null;
|
|
9
|
+
manifest?: RDRRManifest | null;
|
|
9
10
|
onProgress?: (info: { stage: string; message: string }) => void;
|
|
11
|
+
verifyHashes?: boolean;
|
|
10
12
|
}
|
|
11
13
|
|
|
12
14
|
export interface BridgeSourceRuntimeBundle {
|
|
@@ -19,4 +21,3 @@ export interface BridgeSourceRuntimeBundle {
|
|
|
19
21
|
export declare function resolveBridgeSourceRuntimeBundle(
|
|
20
22
|
options: ResolveBridgeSourceRuntimeBundleOptions
|
|
21
23
|
): Promise<BridgeSourceRuntimeBundle | null>;
|
|
22
|
-
|