@simulatte/doppler 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +16 -23
- package/package.json +30 -32
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +31 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +5 -20
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +18 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +81 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +15 -2
- package/src/config/merge-contract-check.js +66 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +43 -8
- package/src/config/presets/models/gemma2.json +3 -2
- package/src/config/presets/models/gemma3.json +2 -0
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +3 -2
- package/src/config/schema/manifest.schema.js +17 -4
- package/src/config/schema/storage.schema.js +1 -1
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +104 -11
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +16 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +50 -29
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +40 -16
- package/src/converter/quantizer.js +19 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +83 -27
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +53 -3
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul-selection.js +47 -4
- package/src/gpu/kernels/matmul.d.ts +2 -0
- package/src/gpu/kernels/matmul.js +59 -40
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +66 -43
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +8 -0
- package/src/inference/browser-harness.js +149 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +10 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
- package/src/inference/pipelines/text/attention/projections.js +192 -112
- package/src/inference/pipelines/text/attention/record.js +77 -14
- package/src/inference/pipelines/text/attention/run.js +112 -14
- package/src/inference/pipelines/text/config.js +17 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +46 -23
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-runtime.js +5 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
- package/src/inference/pipelines/text/generator-steps.js +340 -221
- package/src/inference/pipelines/text/generator.js +56 -40
- package/src/inference/pipelines/text/init.d.ts +13 -0
- package/src/inference/pipelines/text/init.js +94 -25
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +4 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
- package/src/inference/pipelines/text/linear-attention.js +113 -9
- package/src/inference/pipelines/text/logits/gpu.js +12 -7
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +13 -12
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +282 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +17 -7
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +10 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +84 -14
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +214 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.js +27 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +365 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +55 -6
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +30 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +120 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/types/model.d.ts +5 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +50 -26
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { log } from '../debug/index.js';
|
|
2
|
+
import { getExpectedShardHash } from '../formats/rdrr/index.js';
|
|
2
3
|
import {
|
|
3
4
|
computeHash,
|
|
4
5
|
createStreamingHasher,
|
|
@@ -55,22 +56,30 @@ const inFlightDeliveries = new Map();
|
|
|
55
56
|
const p2pTransportPolicyState = new WeakMap();
|
|
56
57
|
|
|
57
58
|
function normalizeDistributionSourceOrder(rawSources = []) {
|
|
58
|
-
if (
|
|
59
|
+
if (rawSources === undefined || rawSources === null) {
|
|
59
60
|
return [...DISTRIBUTION_SOURCES];
|
|
60
61
|
}
|
|
62
|
+
if (!Array.isArray(rawSources)) {
|
|
63
|
+
throw new Error('distribution.sourceOrder must be an array when provided.');
|
|
64
|
+
}
|
|
61
65
|
|
|
62
66
|
const normalized = [];
|
|
63
67
|
const seen = new Set();
|
|
64
68
|
|
|
65
69
|
for (const value of rawSources) {
|
|
66
70
|
const source = String(value || '').trim().toLowerCase();
|
|
67
|
-
if (!DISTRIBUTION_SOURCES.includes(source))
|
|
71
|
+
if (!DISTRIBUTION_SOURCES.includes(source)) {
|
|
72
|
+
throw new Error(`distribution.sourceOrder contains unsupported source "${source || value}".`);
|
|
73
|
+
}
|
|
68
74
|
if (seen.has(source)) continue;
|
|
69
75
|
seen.add(source);
|
|
70
76
|
normalized.push(source);
|
|
71
77
|
}
|
|
72
78
|
|
|
73
|
-
|
|
79
|
+
if (normalized.length === 0) {
|
|
80
|
+
throw new Error('distribution.sourceOrder must include at least one supported source.');
|
|
81
|
+
}
|
|
82
|
+
return normalized;
|
|
74
83
|
}
|
|
75
84
|
|
|
76
85
|
function normalizeInteger(value, fallback, allowZero = false) {
|
|
@@ -81,6 +90,23 @@ function normalizeInteger(value, fallback, allowZero = false) {
|
|
|
81
90
|
: fallback;
|
|
82
91
|
}
|
|
83
92
|
|
|
93
|
+
function normalizeRequiredInteger(value, label, { allowZero = false, fallback = null } = {}) {
|
|
94
|
+
if (value === undefined || value === null) {
|
|
95
|
+
if (fallback !== null) {
|
|
96
|
+
return fallback;
|
|
97
|
+
}
|
|
98
|
+
throw new Error(`${label} is required.`);
|
|
99
|
+
}
|
|
100
|
+
const parsed = Number(value);
|
|
101
|
+
const min = allowZero ? 0 : 1;
|
|
102
|
+
if (!Number.isInteger(parsed) || parsed < min) {
|
|
103
|
+
throw new Error(
|
|
104
|
+
`${label} must be a ${allowZero ? 'non-negative' : 'positive'} integer when provided.`
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
return parsed;
|
|
108
|
+
}
|
|
109
|
+
|
|
84
110
|
function normalizeContentEncodings(value) {
|
|
85
111
|
if (!value) return [];
|
|
86
112
|
return value
|
|
@@ -95,13 +121,17 @@ function normalizeManifestVersionSet(value) {
|
|
|
95
121
|
return normalized || null;
|
|
96
122
|
}
|
|
97
123
|
|
|
98
|
-
function normalizeSamplingRate(value, fallback = 1) {
|
|
124
|
+
function normalizeSamplingRate(value, fallback = 1, label = 'distribution.sourceDecision.trace.samplingRate') {
|
|
125
|
+
if (value === undefined || value === null) {
|
|
126
|
+
return fallback;
|
|
127
|
+
}
|
|
99
128
|
const parsed = Number(value);
|
|
100
129
|
if (!Number.isFinite(parsed)) {
|
|
101
|
-
|
|
130
|
+
throw new Error(`${label} must be a finite number between 0 and 1 when provided.`);
|
|
131
|
+
}
|
|
132
|
+
if (parsed < 0 || parsed > 1) {
|
|
133
|
+
throw new Error(`${label} must be between 0 and 1 when provided.`);
|
|
102
134
|
}
|
|
103
|
-
if (parsed <= 0) return 0;
|
|
104
|
-
if (parsed >= 1) return 1;
|
|
105
135
|
return parsed;
|
|
106
136
|
}
|
|
107
137
|
|
|
@@ -479,19 +509,28 @@ function normalizeP2PConfig(config = {}) {
|
|
|
479
509
|
|
|
480
510
|
return {
|
|
481
511
|
enabled,
|
|
482
|
-
timeoutMs:
|
|
483
|
-
|
|
484
|
-
|
|
512
|
+
timeoutMs: normalizeRequiredInteger(
|
|
513
|
+
rawTimeoutMs,
|
|
514
|
+
'distribution.p2p.timeoutMs',
|
|
515
|
+
{ fallback: DEFAULT_P2P_TIMEOUT_MS }
|
|
516
|
+
),
|
|
517
|
+
maxRetries: normalizeRequiredInteger(
|
|
518
|
+
rawMaxRetries,
|
|
519
|
+
'distribution.p2p.maxRetries',
|
|
520
|
+
{ allowZero: true, fallback: DEFAULT_P2P_MAX_RETRIES }
|
|
521
|
+
),
|
|
522
|
+
retryDelayMs: normalizeRequiredInteger(
|
|
523
|
+
rawRetryDelayMs,
|
|
524
|
+
'distribution.p2p.retryDelayMs',
|
|
525
|
+
{ allowZero: true, fallback: DEFAULT_P2P_RETRY_DELAY_MS }
|
|
526
|
+
),
|
|
485
527
|
transport,
|
|
486
528
|
contractVersion,
|
|
487
529
|
controlPlane: normalizeP2PControlPlaneConfig({
|
|
488
530
|
...DEFAULT_DISTRIBUTION_CONFIG.p2p.controlPlane,
|
|
489
531
|
...rawControlPlane,
|
|
490
|
-
tokenRefreshSkewMs:
|
|
491
|
-
|
|
492
|
-
DEFAULT_P2P_CONTROL_PLANE_TOKEN_REFRESH_SKEW_MS,
|
|
493
|
-
true
|
|
494
|
-
),
|
|
532
|
+
tokenRefreshSkewMs: rawControlPlane.tokenRefreshSkewMs
|
|
533
|
+
?? DEFAULT_P2P_CONTROL_PLANE_TOKEN_REFRESH_SKEW_MS,
|
|
495
534
|
}),
|
|
496
535
|
security: {
|
|
497
536
|
requireSessionToken: rawSecurity.requireSessionToken === true,
|
|
@@ -499,19 +538,20 @@ function normalizeP2PConfig(config = {}) {
|
|
|
499
538
|
tokenExpiresAtMs: normalizeOptionalTimestamp(rawSecurity.tokenExpiresAtMs),
|
|
500
539
|
},
|
|
501
540
|
abuse: {
|
|
502
|
-
rateLimitPerMinute:
|
|
541
|
+
rateLimitPerMinute: normalizeRequiredInteger(
|
|
503
542
|
rawAbuse.rateLimitPerMinute,
|
|
504
|
-
|
|
505
|
-
true
|
|
543
|
+
'distribution.p2p.abuse.rateLimitPerMinute',
|
|
544
|
+
{ allowZero: true, fallback: DEFAULT_P2P_RATE_LIMIT_PER_MINUTE }
|
|
506
545
|
),
|
|
507
|
-
maxConsecutiveFailures:
|
|
546
|
+
maxConsecutiveFailures: normalizeRequiredInteger(
|
|
508
547
|
rawAbuse.maxConsecutiveFailures,
|
|
509
|
-
|
|
548
|
+
'distribution.p2p.abuse.maxConsecutiveFailures',
|
|
549
|
+
{ fallback: DEFAULT_P2P_MAX_CONSECUTIVE_FAILURES }
|
|
510
550
|
),
|
|
511
|
-
quarantineMs:
|
|
551
|
+
quarantineMs: normalizeRequiredInteger(
|
|
512
552
|
rawAbuse.quarantineMs,
|
|
513
|
-
|
|
514
|
-
true
|
|
553
|
+
'distribution.p2p.abuse.quarantineMs',
|
|
554
|
+
{ allowZero: true, fallback: DEFAULT_P2P_QUARANTINE_MS }
|
|
515
555
|
),
|
|
516
556
|
},
|
|
517
557
|
};
|
|
@@ -1293,9 +1333,21 @@ async function downloadShardFromHttp(baseUrl, shardInfo, shardIndex, options = {
|
|
|
1293
1333
|
const startTime = performance.now();
|
|
1294
1334
|
const url = buildShardUrl(baseUrl, shardInfo);
|
|
1295
1335
|
let lastError;
|
|
1296
|
-
const maxRetries =
|
|
1297
|
-
|
|
1298
|
-
|
|
1336
|
+
const maxRetries = normalizeRequiredInteger(
|
|
1337
|
+
options.maxRetries,
|
|
1338
|
+
'download.maxRetries',
|
|
1339
|
+
{ allowZero: true, fallback: 3 }
|
|
1340
|
+
);
|
|
1341
|
+
const initialRetryDelayMs = normalizeRequiredInteger(
|
|
1342
|
+
options.initialRetryDelayMs,
|
|
1343
|
+
'download.initialRetryDelayMs',
|
|
1344
|
+
{ allowZero: true, fallback: 1000 }
|
|
1345
|
+
);
|
|
1346
|
+
const maxRetryDelayMs = normalizeRequiredInteger(
|
|
1347
|
+
options.maxRetryDelayMs,
|
|
1348
|
+
'download.maxRetryDelayMs',
|
|
1349
|
+
{ allowZero: true, fallback: 30000 }
|
|
1350
|
+
);
|
|
1299
1351
|
const progressTotalBytes = Number.isFinite(options.expectedSize)
|
|
1300
1352
|
? Math.floor(options.expectedSize)
|
|
1301
1353
|
: (Number.isFinite(shardInfo?.size) ? Math.floor(shardInfo.size) : 0);
|
|
@@ -1967,7 +2019,11 @@ export async function downloadShard(
|
|
|
1967
2019
|
onDeliveryMetrics,
|
|
1968
2020
|
signal,
|
|
1969
2021
|
requiredEncoding: requiredEncoding ?? activeConfig.requiredContentEncoding ?? null,
|
|
1970
|
-
expectedHash:
|
|
2022
|
+
expectedHash:
|
|
2023
|
+
options.expectedHash
|
|
2024
|
+
?? getExpectedShardHash(shardInfo, algorithm)
|
|
2025
|
+
?? activeConfig.expectedHash
|
|
2026
|
+
?? null,
|
|
1971
2027
|
expectedSize: expectedSize ?? shardInfo?.size ?? null,
|
|
1972
2028
|
expectedManifestVersionSet: options.expectedManifestVersionSet ?? null,
|
|
1973
2029
|
writeToStore,
|
|
@@ -94,6 +94,8 @@ export const GGML_TYPE_SIZE = {
|
|
|
94
94
|
const GGUF_MAGIC = 0x46554747;
|
|
95
95
|
const GGUF_VERSION_MIN = 2;
|
|
96
96
|
const GGUF_VERSION_MAX = 3;
|
|
97
|
+
const MAX_SAFE_BIGINT = BigInt(Number.MAX_SAFE_INTEGER);
|
|
98
|
+
const MIN_SAFE_BIGINT = BigInt(Number.MIN_SAFE_INTEGER);
|
|
97
99
|
|
|
98
100
|
const {
|
|
99
101
|
contextLength: DEFAULT_GGUF_CONTEXT_LENGTH,
|
|
@@ -102,6 +104,13 @@ const {
|
|
|
102
104
|
ropeFreqBase: DEFAULT_ROPE_FREQ_BASE,
|
|
103
105
|
} = DEFAULT_GGUF_PARSER_DEFAULTS;
|
|
104
106
|
|
|
107
|
+
function toSafeInteger(value, label) {
|
|
108
|
+
if (value > MAX_SAFE_BIGINT || value < MIN_SAFE_BIGINT) {
|
|
109
|
+
throw new Error(`GGUF ${label} exceeds JavaScript safe integer range: ${value.toString()}`);
|
|
110
|
+
}
|
|
111
|
+
return Number(value);
|
|
112
|
+
}
|
|
113
|
+
|
|
105
114
|
class GGUFReader {
|
|
106
115
|
constructor(buffer) {
|
|
107
116
|
this.view = new DataView(buffer);
|
|
@@ -144,18 +153,26 @@ class GGUFReader {
|
|
|
144
153
|
return value;
|
|
145
154
|
}
|
|
146
155
|
|
|
147
|
-
|
|
148
|
-
const low = this.view.getUint32(this.offset, true);
|
|
149
|
-
const high = this.view.getUint32(this.offset + 4, true);
|
|
156
|
+
readUint64BigInt() {
|
|
157
|
+
const low = BigInt(this.view.getUint32(this.offset, true));
|
|
158
|
+
const high = BigInt(this.view.getUint32(this.offset + 4, true));
|
|
150
159
|
this.offset += 8;
|
|
151
|
-
return high
|
|
160
|
+
return (high << 32n) | low;
|
|
152
161
|
}
|
|
153
162
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
163
|
+
readUint64(label = 'u64 value') {
|
|
164
|
+
return toSafeInteger(this.readUint64BigInt(), label);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
readInt64BigInt() {
|
|
168
|
+
const low = BigInt(this.view.getUint32(this.offset, true));
|
|
169
|
+
const high = BigInt(this.view.getInt32(this.offset + 4, true));
|
|
157
170
|
this.offset += 8;
|
|
158
|
-
return high
|
|
171
|
+
return (high << 32n) | low;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
readInt64(label = 'i64 value') {
|
|
175
|
+
return toSafeInteger(this.readInt64BigInt(), label);
|
|
159
176
|
}
|
|
160
177
|
|
|
161
178
|
readFloat32() {
|
|
@@ -175,7 +192,7 @@ class GGUFReader {
|
|
|
175
192
|
}
|
|
176
193
|
|
|
177
194
|
readString() {
|
|
178
|
-
const length = this.readUint64();
|
|
195
|
+
const length = this.readUint64('string length');
|
|
179
196
|
const bytes = new Uint8Array(this.view.buffer, this.offset, length);
|
|
180
197
|
this.offset += length;
|
|
181
198
|
return new TextDecoder().decode(bytes);
|
|
@@ -196,9 +213,9 @@ class GGUFReader {
|
|
|
196
213
|
case GGUFValueType.INT32:
|
|
197
214
|
return this.readInt32();
|
|
198
215
|
case GGUFValueType.UINT64:
|
|
199
|
-
return this.readUint64();
|
|
216
|
+
return this.readUint64('metadata uint64');
|
|
200
217
|
case GGUFValueType.INT64:
|
|
201
|
-
return this.readInt64();
|
|
218
|
+
return this.readInt64('metadata int64');
|
|
202
219
|
case GGUFValueType.FLOAT32:
|
|
203
220
|
return this.readFloat32();
|
|
204
221
|
case GGUFValueType.FLOAT64:
|
|
@@ -216,7 +233,7 @@ class GGUFReader {
|
|
|
216
233
|
|
|
217
234
|
readArray() {
|
|
218
235
|
const elementType = this.readUint32();
|
|
219
|
-
const length = this.readUint64();
|
|
236
|
+
const length = this.readUint64('array length');
|
|
220
237
|
if (length > 10000000) {
|
|
221
238
|
throw new Error(`Array too long: ${length}`);
|
|
222
239
|
}
|
|
@@ -331,8 +348,8 @@ export function parseGGUF(buffer) {
|
|
|
331
348
|
throw new Error(`Unsupported GGUF version: ${version}`);
|
|
332
349
|
}
|
|
333
350
|
|
|
334
|
-
const tensorCount = reader.readUint64();
|
|
335
|
-
const metadataKVCount = reader.readUint64();
|
|
351
|
+
const tensorCount = reader.readUint64('tensor count');
|
|
352
|
+
const metadataKVCount = reader.readUint64('metadata count');
|
|
336
353
|
|
|
337
354
|
const metadata = {};
|
|
338
355
|
for (let i = 0; i < metadataKVCount; i++) {
|
|
@@ -351,10 +368,10 @@ export function parseGGUF(buffer) {
|
|
|
351
368
|
const nDims = reader.readUint32();
|
|
352
369
|
const shape = [];
|
|
353
370
|
for (let d = 0; d < nDims; d++) {
|
|
354
|
-
shape.push(reader.readUint64());
|
|
371
|
+
shape.push(reader.readUint64(`tensor "${name}" shape[${d}]`));
|
|
355
372
|
}
|
|
356
373
|
const type = reader.readUint32();
|
|
357
|
-
const offset = reader.readUint64();
|
|
374
|
+
const offset = reader.readUint64(`tensor "${name}" offset`);
|
|
358
375
|
|
|
359
376
|
tensors.push({
|
|
360
377
|
name,
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* @module formats/rdrr/groups
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
import type { ComponentGroup } from './types.js';
|
|
9
|
+
import type { ComponentGroup, RDRRManifest } from './types.js';
|
|
10
10
|
|
|
11
11
|
export declare function getGroup(groupId: string): ComponentGroup | null;
|
|
12
12
|
|
|
@@ -16,11 +16,19 @@ export declare function getShardsForGroup(groupId: string): number[];
|
|
|
16
16
|
|
|
17
17
|
export declare function getTensorsForGroup(groupId: string): string[];
|
|
18
18
|
|
|
19
|
-
export declare function getShardsForExpert(
|
|
19
|
+
export declare function getShardsForExpert(
|
|
20
|
+
layerIdx: number,
|
|
21
|
+
expertIdx: number,
|
|
22
|
+
manifest?: RDRRManifest | null
|
|
23
|
+
): number[];
|
|
20
24
|
|
|
21
|
-
export declare function getTensorsForExpert(
|
|
25
|
+
export declare function getTensorsForExpert(
|
|
26
|
+
layerIdx: number,
|
|
27
|
+
expertIdx: number,
|
|
28
|
+
manifest?: RDRRManifest | null
|
|
29
|
+
): string[];
|
|
22
30
|
|
|
23
|
-
export declare function getExpertBytes(): number;
|
|
31
|
+
export declare function getExpertBytes(manifest?: RDRRManifest | null): number;
|
|
24
32
|
|
|
25
33
|
export declare function getLayerGroupIds(): string[];
|
|
26
34
|
|
|
@@ -19,8 +19,7 @@ export function getTensorsForGroup(groupId) {
|
|
|
19
19
|
return getManifest()?.groups?.[groupId]?.tensors ?? [];
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
export function getShardsForExpert(layerIdx, expertIdx) {
|
|
23
|
-
const manifest = getManifest();
|
|
22
|
+
export function getShardsForExpert(layerIdx, expertIdx, manifest = getManifest()) {
|
|
24
23
|
const groupId = `layer.${layerIdx}.expert.${expertIdx}`;
|
|
25
24
|
const group = manifest?.groups?.[groupId];
|
|
26
25
|
if (group) {
|
|
@@ -29,8 +28,7 @@ export function getShardsForExpert(layerIdx, expertIdx) {
|
|
|
29
28
|
throw new Error(`Missing expert group mapping: ${groupId}`);
|
|
30
29
|
}
|
|
31
30
|
|
|
32
|
-
export function getTensorsForExpert(layerIdx, expertIdx) {
|
|
33
|
-
const manifest = getManifest();
|
|
31
|
+
export function getTensorsForExpert(layerIdx, expertIdx, manifest = getManifest()) {
|
|
34
32
|
const groupId = `layer.${layerIdx}.expert.${expertIdx}`;
|
|
35
33
|
const group = manifest?.groups?.[groupId];
|
|
36
34
|
if (group) {
|
|
@@ -39,8 +37,7 @@ export function getTensorsForExpert(layerIdx, expertIdx) {
|
|
|
39
37
|
throw new Error(`Missing expert group mapping: ${groupId}`);
|
|
40
38
|
}
|
|
41
39
|
|
|
42
|
-
export function getExpertBytes() {
|
|
43
|
-
const manifest = getManifest();
|
|
40
|
+
export function getExpertBytes(manifest = getManifest()) {
|
|
44
41
|
const expertGroups = Object.entries(manifest?.groups || {})
|
|
45
42
|
.filter(([id]) => id.includes('.expert.'));
|
|
46
43
|
|
|
@@ -7,6 +7,10 @@
|
|
|
7
7
|
import type { RDRRManifest, ShardInfo, TensorMap } from './types.js';
|
|
8
8
|
|
|
9
9
|
export declare function parseManifest(jsonString: string): RDRRManifest;
|
|
10
|
+
export declare function getExpectedShardHash(
|
|
11
|
+
shard: Partial<ShardInfo> | Record<string, unknown> | null | undefined,
|
|
12
|
+
manifestHashAlgorithm?: string | null
|
|
13
|
+
): string;
|
|
10
14
|
|
|
11
15
|
export declare function parseTensorMap(jsonString: string): TensorMap;
|
|
12
16
|
|
|
@@ -4,6 +4,19 @@ import { validateManifest } from './validation.js';
|
|
|
4
4
|
|
|
5
5
|
let currentManifest = null;
|
|
6
6
|
|
|
7
|
+
export function getExpectedShardHash(shard, manifestHashAlgorithm = null) {
|
|
8
|
+
if (!shard || typeof shard !== 'object' || Array.isArray(shard)) {
|
|
9
|
+
return '';
|
|
10
|
+
}
|
|
11
|
+
const algorithm = typeof manifestHashAlgorithm === 'string'
|
|
12
|
+
? manifestHashAlgorithm.trim().toLowerCase()
|
|
13
|
+
: '';
|
|
14
|
+
if (algorithm === 'blake3') {
|
|
15
|
+
return shard.blake3 || shard.hash || '';
|
|
16
|
+
}
|
|
17
|
+
return shard.hash || shard.blake3 || '';
|
|
18
|
+
}
|
|
19
|
+
|
|
7
20
|
export function parseManifest(jsonString) {
|
|
8
21
|
let manifest;
|
|
9
22
|
|
|
@@ -21,7 +34,7 @@ export function parseManifest(jsonString) {
|
|
|
21
34
|
index: shard.index ?? i,
|
|
22
35
|
filename: shard.filename || shard.fileName || '',
|
|
23
36
|
size: shard.size,
|
|
24
|
-
hash: shard
|
|
37
|
+
hash: getExpectedShardHash(shard, manifest.hashAlgorithm),
|
|
25
38
|
blake3: shard.blake3 || shard.hash,
|
|
26
39
|
offset: shard.offset ?? offset,
|
|
27
40
|
hashAlgorithm: shard.hashAlgorithm,
|
|
@@ -44,9 +57,13 @@ export function parseManifest(jsonString) {
|
|
|
44
57
|
export function parseTensorMap(jsonString) {
|
|
45
58
|
try {
|
|
46
59
|
const tensorMap = JSON.parse(jsonString);
|
|
60
|
+
const normalizedTensorMap = {};
|
|
47
61
|
|
|
48
62
|
for (const [name, loc] of Object.entries(tensorMap)) {
|
|
49
|
-
|
|
63
|
+
const shardIndex = typeof loc.shardIndex === 'number'
|
|
64
|
+
? loc.shardIndex
|
|
65
|
+
: loc.shard;
|
|
66
|
+
if (typeof shardIndex !== 'number') {
|
|
50
67
|
throw new Error(`Tensor '${name}' missing shard index`);
|
|
51
68
|
}
|
|
52
69
|
if (typeof loc.offset !== 'number') {
|
|
@@ -61,9 +78,42 @@ export function parseTensorMap(jsonString) {
|
|
|
61
78
|
if (typeof loc.role !== 'string') {
|
|
62
79
|
throw new Error(`Tensor '${name}' missing role`);
|
|
63
80
|
}
|
|
81
|
+
|
|
82
|
+
let spans = undefined;
|
|
83
|
+
if (loc.spans !== undefined) {
|
|
84
|
+
if (!Array.isArray(loc.spans)) {
|
|
85
|
+
throw new Error(`Tensor '${name}' has invalid spans array`);
|
|
86
|
+
}
|
|
87
|
+
spans = loc.spans.map((span, spanIndex) => {
|
|
88
|
+
const spanShardIndex = typeof span?.shardIndex === 'number'
|
|
89
|
+
? span.shardIndex
|
|
90
|
+
: span?.shard;
|
|
91
|
+
if (typeof spanShardIndex !== 'number') {
|
|
92
|
+
throw new Error(`Tensor '${name}' span[${spanIndex}] missing shard index`);
|
|
93
|
+
}
|
|
94
|
+
if (typeof span?.offset !== 'number') {
|
|
95
|
+
throw new Error(`Tensor '${name}' span[${spanIndex}] missing offset`);
|
|
96
|
+
}
|
|
97
|
+
if (typeof span?.size !== 'number') {
|
|
98
|
+
throw new Error(`Tensor '${name}' span[${spanIndex}] missing size`);
|
|
99
|
+
}
|
|
100
|
+
return {
|
|
101
|
+
shardIndex: spanShardIndex,
|
|
102
|
+
offset: span.offset,
|
|
103
|
+
size: span.size,
|
|
104
|
+
};
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
normalizedTensorMap[name] = {
|
|
109
|
+
...loc,
|
|
110
|
+
shard: shardIndex,
|
|
111
|
+
shardIndex,
|
|
112
|
+
spans,
|
|
113
|
+
};
|
|
64
114
|
}
|
|
65
115
|
|
|
66
|
-
return
|
|
116
|
+
return normalizedTensorMap;
|
|
67
117
|
} catch (e) {
|
|
68
118
|
if (e instanceof Error && e.message.includes('Tensor')) {
|
|
69
119
|
throw e;
|
|
@@ -75,13 +75,14 @@ export interface ComponentGroup extends ComponentGroupSchema {}
|
|
|
75
75
|
|
|
76
76
|
export interface TensorLocation {
|
|
77
77
|
shard: number;
|
|
78
|
+
shardIndex?: number;
|
|
78
79
|
offset: number;
|
|
79
80
|
size: number;
|
|
80
81
|
shape: number[];
|
|
81
82
|
dtype: string;
|
|
82
83
|
role: TensorRole;
|
|
83
84
|
group?: string;
|
|
84
|
-
spans?: Array<{ shardIndex
|
|
85
|
+
spans?: Array<{ shard?: number; shardIndex?: number; offset: number; size: number }>;
|
|
85
86
|
layout?: WeightLayout;
|
|
86
87
|
originalShape?: number[];
|
|
87
88
|
}
|