@simulatte/doppler 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +16 -23
- package/package.json +30 -32
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +31 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +5 -20
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +18 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +81 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +15 -2
- package/src/config/merge-contract-check.js +66 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +43 -8
- package/src/config/presets/models/gemma2.json +3 -2
- package/src/config/presets/models/gemma3.json +2 -0
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +3 -2
- package/src/config/schema/manifest.schema.js +17 -4
- package/src/config/schema/storage.schema.js +1 -1
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +104 -11
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +16 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +50 -29
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +40 -16
- package/src/converter/quantizer.js +19 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +83 -27
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +53 -3
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul-selection.js +47 -4
- package/src/gpu/kernels/matmul.d.ts +2 -0
- package/src/gpu/kernels/matmul.js +59 -40
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +66 -43
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +8 -0
- package/src/inference/browser-harness.js +149 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +10 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
- package/src/inference/pipelines/text/attention/projections.js +192 -112
- package/src/inference/pipelines/text/attention/record.js +77 -14
- package/src/inference/pipelines/text/attention/run.js +112 -14
- package/src/inference/pipelines/text/config.js +17 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +46 -23
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-runtime.js +5 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
- package/src/inference/pipelines/text/generator-steps.js +340 -221
- package/src/inference/pipelines/text/generator.js +56 -40
- package/src/inference/pipelines/text/init.d.ts +13 -0
- package/src/inference/pipelines/text/init.js +94 -25
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +4 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
- package/src/inference/pipelines/text/linear-attention.js +113 -9
- package/src/inference/pipelines/text/logits/gpu.js +12 -7
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +13 -12
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +282 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +17 -7
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +10 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +84 -14
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +214 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.js +27 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +365 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +55 -6
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +30 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +120 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/types/model.d.ts +5 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +50 -26
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
computeHash,
|
|
6
6
|
getStorageBackendType,
|
|
7
7
|
} from '../storage/shard-manager.js';
|
|
8
|
+
import { getExpectedShardHash } from '../formats/rdrr/index.js';
|
|
8
9
|
import { formatBytes } from '../storage/quota.js';
|
|
9
10
|
import { log, trace as debugTrace } from '../debug/index.js';
|
|
10
11
|
import { getRuntimeConfig } from '../config/runtime.js';
|
|
@@ -23,6 +24,7 @@ export class ShardCache {
|
|
|
23
24
|
#inFlightLoads = 0;
|
|
24
25
|
#highPriorityQueue = [];
|
|
25
26
|
#lowPriorityQueue = [];
|
|
27
|
+
#epoch = 0;
|
|
26
28
|
|
|
27
29
|
lastSource = null;
|
|
28
30
|
|
|
@@ -123,6 +125,7 @@ export class ShardCache {
|
|
|
123
125
|
const shardInfo = this.#manifest?.shards?.[shardIndex];
|
|
124
126
|
const sizeStr = shardInfo ? formatBytes(shardInfo.size) : '';
|
|
125
127
|
const priority = options.priority === 'low' ? 'low' : 'high';
|
|
128
|
+
const epoch = this.#epoch;
|
|
126
129
|
|
|
127
130
|
// 1. Check cache first
|
|
128
131
|
if (this.#cache.has(shardIndex)) {
|
|
@@ -136,24 +139,29 @@ export class ShardCache {
|
|
|
136
139
|
}
|
|
137
140
|
|
|
138
141
|
// 2. Check if fetch is already in-flight - deduplicate concurrent requests
|
|
139
|
-
|
|
142
|
+
const inFlight = this.#fetchPromises.get(shardIndex);
|
|
143
|
+
if (inFlight && inFlight.epoch === epoch) {
|
|
140
144
|
log.verbose('ShardCache', `Shard ${shardIndex}: waiting for in-flight fetch`);
|
|
141
|
-
return
|
|
145
|
+
return inFlight.promise;
|
|
142
146
|
}
|
|
143
147
|
|
|
144
148
|
// 3. Start the actual fetch and store the promise for deduplication
|
|
145
149
|
const fetchPromise = this.#scheduleLoad(
|
|
146
150
|
priority,
|
|
147
|
-
|
|
151
|
+
epoch,
|
|
152
|
+
() => this.#doLoad(shardIndex, sizeStr, epoch)
|
|
148
153
|
);
|
|
149
|
-
|
|
154
|
+
const fetchEntry = { epoch, promise: fetchPromise };
|
|
155
|
+
this.#fetchPromises.set(shardIndex, fetchEntry);
|
|
150
156
|
|
|
151
157
|
try {
|
|
152
158
|
const result = await fetchPromise;
|
|
153
159
|
return result;
|
|
154
160
|
} finally {
|
|
155
161
|
// Remove from in-flight map when done (success or error)
|
|
156
|
-
this.#fetchPromises.
|
|
162
|
+
if (this.#fetchPromises.get(shardIndex) === fetchEntry) {
|
|
163
|
+
this.#fetchPromises.delete(shardIndex);
|
|
164
|
+
}
|
|
157
165
|
}
|
|
158
166
|
}
|
|
159
167
|
|
|
@@ -195,6 +203,13 @@ export class ShardCache {
|
|
|
195
203
|
throw new Error('Custom shard loader must return ArrayBuffer or Uint8Array.');
|
|
196
204
|
}
|
|
197
205
|
|
|
206
|
+
#throwShortStreamRead(shardIndex, start, want, produced, path) {
|
|
207
|
+
throw new Error(
|
|
208
|
+
`Shard ${shardIndex} short stream read via ${path}: ` +
|
|
209
|
+
`offset=${start}, expected=${want}, got=${produced}.`
|
|
210
|
+
);
|
|
211
|
+
}
|
|
212
|
+
|
|
198
213
|
async loadRange(shardIndex, offset = 0, length = null, options = {}) {
|
|
199
214
|
const start = this.#toRangeOffset(offset);
|
|
200
215
|
const want = length == null ? null : this.#toRangeOffset(length);
|
|
@@ -276,9 +291,15 @@ export class ShardCache {
|
|
|
276
291
|
this.#setLastSource('RAM', 0, 'stream', 'cache');
|
|
277
292
|
const view = new Uint8Array(cached);
|
|
278
293
|
const end = want == null ? view.length : Math.min(view.length, start + want);
|
|
294
|
+
let produced = 0;
|
|
279
295
|
for (let cursor = start; cursor < end; cursor += chunkBytes) {
|
|
280
296
|
const sliceEnd = Math.min(end, cursor + chunkBytes);
|
|
281
|
-
|
|
297
|
+
const chunk = view.slice(cursor, sliceEnd);
|
|
298
|
+
produced += chunk.byteLength;
|
|
299
|
+
yield chunk;
|
|
300
|
+
}
|
|
301
|
+
if (want != null && produced < want) {
|
|
302
|
+
this.#throwShortStreamRead(shardIndex, start, want, produced, 'cache');
|
|
282
303
|
}
|
|
283
304
|
return;
|
|
284
305
|
}
|
|
@@ -323,6 +344,15 @@ export class ShardCache {
|
|
|
323
344
|
resumed += bytes.byteLength;
|
|
324
345
|
yield bytes;
|
|
325
346
|
}
|
|
347
|
+
if (want != null && produced + resumed < want) {
|
|
348
|
+
this.#throwShortStreamRead(
|
|
349
|
+
shardIndex,
|
|
350
|
+
start,
|
|
351
|
+
want,
|
|
352
|
+
produced + resumed,
|
|
353
|
+
'custom-range-fallback'
|
|
354
|
+
);
|
|
355
|
+
}
|
|
326
356
|
const elapsed = (performance.now() - streamStart) / 1000;
|
|
327
357
|
this.#setLastSource(
|
|
328
358
|
'custom',
|
|
@@ -358,6 +388,15 @@ export class ShardCache {
|
|
|
358
388
|
resumed += bytes.byteLength;
|
|
359
389
|
yield bytes;
|
|
360
390
|
}
|
|
391
|
+
if (produced + resumed < want) {
|
|
392
|
+
this.#throwShortStreamRead(
|
|
393
|
+
shardIndex,
|
|
394
|
+
start,
|
|
395
|
+
want,
|
|
396
|
+
produced + resumed,
|
|
397
|
+
'custom-range-fallback'
|
|
398
|
+
);
|
|
399
|
+
}
|
|
361
400
|
const elapsed = (performance.now() - streamStart) / 1000;
|
|
362
401
|
this.#setLastSource(
|
|
363
402
|
'custom',
|
|
@@ -369,6 +408,9 @@ export class ShardCache {
|
|
|
369
408
|
return;
|
|
370
409
|
}
|
|
371
410
|
|
|
411
|
+
if (want != null && produced < want) {
|
|
412
|
+
this.#throwShortStreamRead(shardIndex, start, want, produced, 'custom-stream');
|
|
413
|
+
}
|
|
372
414
|
const elapsed = (performance.now() - streamStart) / 1000;
|
|
373
415
|
this.#setLastSource('custom', elapsed, 'stream', 'custom-stream');
|
|
374
416
|
return;
|
|
@@ -403,6 +445,9 @@ export class ShardCache {
|
|
|
403
445
|
}
|
|
404
446
|
}
|
|
405
447
|
}
|
|
448
|
+
if (want != null && produced < want) {
|
|
449
|
+
this.#throwShortStreamRead(shardIndex, start, want, produced, 'custom-range');
|
|
450
|
+
}
|
|
406
451
|
this.#setLastSource(
|
|
407
452
|
'custom',
|
|
408
453
|
(performance.now() - rangeStart) / 1000,
|
|
@@ -414,8 +459,14 @@ export class ShardCache {
|
|
|
414
459
|
}
|
|
415
460
|
|
|
416
461
|
const streamStart = performance.now();
|
|
462
|
+
let produced = 0;
|
|
417
463
|
for await (const chunk of streamShardRangeFromStore(shardIndex, start, want, { chunkBytes })) {
|
|
418
|
-
|
|
464
|
+
const bytes = chunk instanceof Uint8Array ? chunk : new Uint8Array(chunk);
|
|
465
|
+
produced += bytes.byteLength;
|
|
466
|
+
yield bytes;
|
|
467
|
+
}
|
|
468
|
+
if (want != null && produced < want) {
|
|
469
|
+
this.#throwShortStreamRead(shardIndex, start, want, produced, 'backend-stream');
|
|
419
470
|
}
|
|
420
471
|
const elapsed = (performance.now() - streamStart) / 1000;
|
|
421
472
|
const backend = getStorageBackendType() ?? 'storage';
|
|
@@ -426,7 +477,7 @@ export class ShardCache {
|
|
|
426
477
|
return this.load(shardIndex, { priority: 'low' });
|
|
427
478
|
}
|
|
428
479
|
|
|
429
|
-
async #doLoad(shardIndex, sizeStr) {
|
|
480
|
+
async #doLoad(shardIndex, sizeStr, epoch) {
|
|
430
481
|
if (this.#customLoader) {
|
|
431
482
|
const startTime = performance.now();
|
|
432
483
|
let data = await this.#customLoader(shardIndex);
|
|
@@ -434,11 +485,11 @@ export class ShardCache {
|
|
|
434
485
|
// Verify hash if enabled
|
|
435
486
|
if (this.#verifyHashes && this.#manifest) {
|
|
436
487
|
const shardInfo = this.#manifest.shards?.[shardIndex];
|
|
437
|
-
const
|
|
488
|
+
const algorithm = shardInfo?.hashAlgorithm ?? this.#manifest.hashAlgorithm;
|
|
489
|
+
const expectedHash = getExpectedShardHash(shardInfo, algorithm);
|
|
438
490
|
if (!expectedHash) {
|
|
439
491
|
throw new Error(`Shard ${shardIndex} missing hash in manifest.`);
|
|
440
492
|
}
|
|
441
|
-
const algorithm = shardInfo?.hashAlgorithm ?? this.#manifest.hashAlgorithm;
|
|
442
493
|
if (!algorithm) {
|
|
443
494
|
throw new Error(`Manifest missing hashAlgorithm for shard ${shardIndex}.`);
|
|
444
495
|
}
|
|
@@ -453,7 +504,9 @@ export class ShardCache {
|
|
|
453
504
|
// Normalize to ArrayBuffer for downstream slicing
|
|
454
505
|
const arrayBuffer = this.#toArrayBuffer(data);
|
|
455
506
|
|
|
456
|
-
this.#
|
|
507
|
+
if (epoch === this.#epoch) {
|
|
508
|
+
this.#add(shardIndex, arrayBuffer);
|
|
509
|
+
}
|
|
457
510
|
|
|
458
511
|
const elapsed = (performance.now() - startTime) / 1000;
|
|
459
512
|
this.#setLastSource('custom', elapsed, 'full', 'custom-loader');
|
|
@@ -463,7 +516,9 @@ export class ShardCache {
|
|
|
463
516
|
|
|
464
517
|
const storageStart = performance.now();
|
|
465
518
|
const data = await loadShardFromStore(shardIndex);
|
|
466
|
-
this.#
|
|
519
|
+
if (epoch === this.#epoch) {
|
|
520
|
+
this.#add(shardIndex, data);
|
|
521
|
+
}
|
|
467
522
|
const elapsed = (performance.now() - storageStart) / 1000;
|
|
468
523
|
const backend = getStorageBackendType() ?? 'storage';
|
|
469
524
|
this.#setLastSource(backend, elapsed, 'full', 'backend-full');
|
|
@@ -471,12 +526,15 @@ export class ShardCache {
|
|
|
471
526
|
return data;
|
|
472
527
|
}
|
|
473
528
|
|
|
474
|
-
async #scheduleLoad(priority, task) {
|
|
529
|
+
async #scheduleLoad(priority, epoch, task) {
|
|
475
530
|
const limit = this.#maxConcurrentLoads > 0
|
|
476
531
|
? this.#maxConcurrentLoads
|
|
477
532
|
: Number.POSITIVE_INFINITY;
|
|
478
533
|
|
|
479
534
|
if (this.#inFlightLoads < limit) {
|
|
535
|
+
if (epoch !== this.#epoch) {
|
|
536
|
+
throw new Error('Shard load invalidated by cache clear().');
|
|
537
|
+
}
|
|
480
538
|
this.#inFlightLoads++;
|
|
481
539
|
try {
|
|
482
540
|
return await task();
|
|
@@ -487,7 +545,7 @@ export class ShardCache {
|
|
|
487
545
|
}
|
|
488
546
|
|
|
489
547
|
return new Promise((resolve, reject) => {
|
|
490
|
-
const entry = { task, resolve, reject };
|
|
548
|
+
const entry = { task, resolve, reject, epoch };
|
|
491
549
|
if (priority === 'low') {
|
|
492
550
|
this.#lowPriorityQueue.push(entry);
|
|
493
551
|
} else {
|
|
@@ -504,6 +562,10 @@ export class ShardCache {
|
|
|
504
562
|
while (this.#inFlightLoads < limit) {
|
|
505
563
|
const entry = this.#highPriorityQueue.shift() ?? this.#lowPriorityQueue.shift();
|
|
506
564
|
if (!entry) return;
|
|
565
|
+
if (entry.epoch !== this.#epoch) {
|
|
566
|
+
entry.reject(new Error('Shard load invalidated by cache clear().'));
|
|
567
|
+
continue;
|
|
568
|
+
}
|
|
507
569
|
|
|
508
570
|
this.#inFlightLoads++;
|
|
509
571
|
Promise.resolve()
|
|
@@ -529,6 +591,14 @@ export class ShardCache {
|
|
|
529
591
|
clear() {
|
|
530
592
|
const count = this.#cache.size;
|
|
531
593
|
const bytes = this.totalBytes;
|
|
594
|
+
this.#epoch++;
|
|
595
|
+
const queued = [...this.#highPriorityQueue, ...this.#lowPriorityQueue];
|
|
596
|
+
this.#highPriorityQueue = [];
|
|
597
|
+
this.#lowPriorityQueue = [];
|
|
598
|
+
this.#fetchPromises.clear();
|
|
599
|
+
for (const entry of queued) {
|
|
600
|
+
entry.reject(new Error('Shard load invalidated by cache clear().'));
|
|
601
|
+
}
|
|
532
602
|
this.#cache.clear();
|
|
533
603
|
debugTrace.loader(`Cleared shard cache: ${count} shards, ${formatBytes(bytes)} freed`);
|
|
534
604
|
}
|
|
@@ -2,6 +2,28 @@ import { loadTensorsFromStore } from '../storage/shard-manager.js';
|
|
|
2
2
|
import { parseTensorMap } from '../formats/rdrr/index.js';
|
|
3
3
|
import { log, trace as debugTrace } from '../debug/index.js';
|
|
4
4
|
|
|
5
|
+
function normalizeLocationSpans(spans, name, sourceLabel) {
|
|
6
|
+
if (spans === undefined) {
|
|
7
|
+
return undefined;
|
|
8
|
+
}
|
|
9
|
+
if (!Array.isArray(spans)) {
|
|
10
|
+
throw new Error(`Tensor "${name}" has invalid spans in ${sourceLabel}`);
|
|
11
|
+
}
|
|
12
|
+
return spans.map((span, spanIndex) => {
|
|
13
|
+
const shardIndex = typeof span?.shardIndex === 'number'
|
|
14
|
+
? span.shardIndex
|
|
15
|
+
: span?.shard;
|
|
16
|
+
if (typeof shardIndex !== 'number') {
|
|
17
|
+
throw new Error(`Tensor "${name}" span[${spanIndex}] missing shard index in ${sourceLabel}`);
|
|
18
|
+
}
|
|
19
|
+
return {
|
|
20
|
+
shardIndex,
|
|
21
|
+
offset: span.offset,
|
|
22
|
+
size: span.size,
|
|
23
|
+
};
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
|
|
5
27
|
export async function buildTensorLocations(manifest, options = {}) {
|
|
6
28
|
const locations = new Map();
|
|
7
29
|
|
|
@@ -37,14 +59,14 @@ export async function buildTensorLocations(manifest, options = {}) {
|
|
|
37
59
|
throw new Error(`Tensor "${name}" missing role in tensors.json`);
|
|
38
60
|
}
|
|
39
61
|
locations.set(name, {
|
|
40
|
-
shardIndex: info.shard,
|
|
62
|
+
shardIndex: info.shardIndex ?? info.shard,
|
|
41
63
|
offset: info.offset,
|
|
42
64
|
size: info.size,
|
|
43
65
|
shape: info.shape,
|
|
44
66
|
dtype: info.dtype,
|
|
45
67
|
role: info.role,
|
|
46
68
|
group: info.group,
|
|
47
|
-
spans: info.spans,
|
|
69
|
+
spans: normalizeLocationSpans(info.spans, name, 'tensors.json'),
|
|
48
70
|
layout: info.layout,
|
|
49
71
|
originalShape: info.originalShape,
|
|
50
72
|
});
|
|
@@ -73,7 +95,7 @@ export async function buildTensorLocations(manifest, options = {}) {
|
|
|
73
95
|
dtype: tensorInfo.dtype,
|
|
74
96
|
role: tensorInfo.role,
|
|
75
97
|
group: tensorInfo.group,
|
|
76
|
-
spans: tensorInfo.spans,
|
|
98
|
+
spans: normalizeLocationSpans(tensorInfo.spans, name, 'manifest.tensors'),
|
|
77
99
|
layout: tensorInfo.layout,
|
|
78
100
|
originalShape: tensorInfo.originalShape,
|
|
79
101
|
});
|