@simulatte/doppler 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +16 -23
- package/package.json +14 -1
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +7 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +12 -2
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +2 -1
- package/src/config/schema/manifest.schema.js +16 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +58 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +57 -41
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +62 -8
- package/src/inference/pipelines/text/attention/run.js +62 -8
- package/src/inference/pipelines/text/config.js +3 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +41 -19
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.js +78 -20
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +3 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +44 -25
package/src/debug/tensor.js
CHANGED
|
@@ -202,7 +202,13 @@ export const tensor = {
|
|
|
202
202
|
|
|
203
203
|
export async function snapshotTensor(buffer, shape, dtype = 'f32') {
|
|
204
204
|
try {
|
|
205
|
-
if (
|
|
205
|
+
if (
|
|
206
|
+
!gpuDevice
|
|
207
|
+
|| typeof gpuDevice.createBuffer !== 'function'
|
|
208
|
+
|| typeof gpuDevice.createCommandEncoder !== 'function'
|
|
209
|
+
|| !gpuDevice.queue
|
|
210
|
+
|| typeof gpuDevice.queue.submit !== 'function'
|
|
211
|
+
) {
|
|
206
212
|
throw new Error('GPU device not initialized');
|
|
207
213
|
}
|
|
208
214
|
const elementSize = dtype === 'f16' ? 2 : 4;
|
|
@@ -224,8 +230,11 @@ export async function snapshotTensor(buffer, shape, dtype = 'f32') {
|
|
|
224
230
|
staging.destroy();
|
|
225
231
|
const arr = new Float32Array(data);
|
|
226
232
|
return snapshotFromArray(arr, shape ?? [arr.length], dtype);
|
|
227
|
-
} catch {
|
|
233
|
+
} catch (error) {
|
|
234
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
228
235
|
return {
|
|
236
|
+
ok: false,
|
|
237
|
+
error: message,
|
|
229
238
|
shape: shape ?? [0],
|
|
230
239
|
dtype,
|
|
231
240
|
stats: { min: 0, max: 0, maxAbs: 0, mean: 0, std: 0 },
|
|
@@ -241,6 +250,8 @@ export function snapshotFromArray(arr, shape, dtype = 'f32') {
|
|
|
241
250
|
const stats = computeArrayStats(arr, Math.min(arr.length, numElements));
|
|
242
251
|
|
|
243
252
|
return {
|
|
253
|
+
ok: true,
|
|
254
|
+
error: null,
|
|
244
255
|
shape,
|
|
245
256
|
dtype,
|
|
246
257
|
stats: {
|
|
@@ -38,10 +38,17 @@ function asOptionalTimestamp(value, label) {
|
|
|
38
38
|
return Math.floor(parsed);
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
-
function
|
|
41
|
+
function asOptionalNonNegativeInteger(value, label) {
|
|
42
|
+
if (value === undefined || value === null) {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
42
45
|
const parsed = Number(value);
|
|
43
46
|
if (!Number.isInteger(parsed) || parsed < 0) {
|
|
44
|
-
|
|
47
|
+
throw createP2PTransportError(
|
|
48
|
+
P2P_TRANSPORT_ERROR_CODES.payloadInvalid,
|
|
49
|
+
`${label} must be a non-negative integer when provided.`,
|
|
50
|
+
{ label }
|
|
51
|
+
);
|
|
45
52
|
}
|
|
46
53
|
return parsed;
|
|
47
54
|
}
|
|
@@ -104,12 +111,11 @@ export function normalizeControlPlaneSessionUpdate(value, label = 'p2p control-p
|
|
|
104
111
|
|
|
105
112
|
export function normalizeP2PPolicyDecision(value, label = 'p2p control-plane policy decision') {
|
|
106
113
|
if (value === undefined || value === null) {
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
};
|
|
114
|
+
throw createP2PTransportError(
|
|
115
|
+
P2P_TRANSPORT_ERROR_CODES.payloadInvalid,
|
|
116
|
+
`${label} must return an explicit boolean or object decision.`,
|
|
117
|
+
{ label }
|
|
118
|
+
);
|
|
113
119
|
}
|
|
114
120
|
|
|
115
121
|
if (typeof value === 'boolean') {
|
|
@@ -129,9 +135,40 @@ export function normalizeP2PPolicyDecision(value, label = 'p2p control-plane pol
|
|
|
129
135
|
);
|
|
130
136
|
}
|
|
131
137
|
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
|
|
138
|
+
const hasAllow = Object.prototype.hasOwnProperty.call(value, 'allow');
|
|
139
|
+
const hasDeny = Object.prototype.hasOwnProperty.call(value, 'deny');
|
|
140
|
+
if (!hasAllow && !hasDeny) {
|
|
141
|
+
throw createP2PTransportError(
|
|
142
|
+
P2P_TRANSPORT_ERROR_CODES.payloadInvalid,
|
|
143
|
+
`${label} must include allow or deny.`,
|
|
144
|
+
{ label }
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
if (hasAllow && typeof value.allow !== 'boolean') {
|
|
148
|
+
throw createP2PTransportError(
|
|
149
|
+
P2P_TRANSPORT_ERROR_CODES.payloadInvalid,
|
|
150
|
+
`${label}.allow must be a boolean when provided.`,
|
|
151
|
+
{ label }
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
if (hasDeny && typeof value.deny !== 'boolean') {
|
|
155
|
+
throw createP2PTransportError(
|
|
156
|
+
P2P_TRANSPORT_ERROR_CODES.payloadInvalid,
|
|
157
|
+
`${label}.deny must be a boolean when provided.`,
|
|
158
|
+
{ label }
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
if (hasAllow && hasDeny && value.allow === value.deny) {
|
|
162
|
+
throw createP2PTransportError(
|
|
163
|
+
P2P_TRANSPORT_ERROR_CODES.payloadInvalid,
|
|
164
|
+
`${label} has conflicting allow/deny values.`,
|
|
165
|
+
{ label }
|
|
166
|
+
);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const allow = hasAllow
|
|
170
|
+
? value.allow
|
|
171
|
+
: value.deny !== true;
|
|
135
172
|
const reason = asOptionalString(value.reason, `${label}.reason`);
|
|
136
173
|
const sessionUpdate = normalizeControlPlaneSessionUpdate(
|
|
137
174
|
{
|
|
@@ -180,7 +217,10 @@ export function normalizeP2PControlPlaneConfig(config = {}) {
|
|
|
180
217
|
contractVersion: assertSupportedP2PControlPlaneContract(
|
|
181
218
|
raw.contractVersion ?? P2P_CONTROL_PLANE_CONTRACT_VERSION
|
|
182
219
|
),
|
|
183
|
-
tokenRefreshSkewMs:
|
|
220
|
+
tokenRefreshSkewMs: asOptionalNonNegativeInteger(
|
|
221
|
+
raw.tokenRefreshSkewMs,
|
|
222
|
+
'p2p.controlPlane.tokenRefreshSkewMs'
|
|
223
|
+
) ?? DEFAULT_TOKEN_REFRESH_SKEW_MS,
|
|
184
224
|
tokenProvider,
|
|
185
225
|
policyEvaluator,
|
|
186
226
|
};
|
|
@@ -12,6 +12,14 @@ function asFiniteNumber(value, fallback = 0) {
|
|
|
12
12
|
return Number.isFinite(parsed) ? parsed : fallback;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
function assertFiniteNumber(value, label) {
|
|
16
|
+
const parsed = Number(value);
|
|
17
|
+
if (!Number.isFinite(parsed)) {
|
|
18
|
+
throw new Error(`P2P observability ${label} must be a finite number.`);
|
|
19
|
+
}
|
|
20
|
+
return parsed;
|
|
21
|
+
}
|
|
22
|
+
|
|
15
23
|
function asNonNegativeInteger(value, fallback = 0) {
|
|
16
24
|
const parsed = Number(value);
|
|
17
25
|
if (!Number.isInteger(parsed) || parsed < 0) {
|
|
@@ -69,15 +77,43 @@ function percentile(values, ratio) {
|
|
|
69
77
|
}
|
|
70
78
|
|
|
71
79
|
function resolveSLOTargets(options = {}) {
|
|
72
|
-
const
|
|
73
|
-
|
|
74
|
-
|
|
80
|
+
const hasExplicitTargets = Object.hasOwn(options, 'targets');
|
|
81
|
+
if (hasExplicitTargets && (options.targets == null || typeof options.targets !== 'object' || Array.isArray(options.targets))) {
|
|
82
|
+
throw new Error('P2P observability targets must be an object when provided.');
|
|
83
|
+
}
|
|
84
|
+
const targets = hasExplicitTargets ? options.targets : {};
|
|
85
|
+
|
|
86
|
+
const minAvailability = Object.hasOwn(targets, 'minAvailability')
|
|
87
|
+
? assertFiniteNumber(targets.minAvailability, 'targets.minAvailability')
|
|
88
|
+
: DEFAULT_SLO_TARGETS.minAvailability;
|
|
89
|
+
const minP2PHitRate = Object.hasOwn(targets, 'minP2PHitRate')
|
|
90
|
+
? assertFiniteNumber(targets.minP2PHitRate, 'targets.minP2PHitRate')
|
|
91
|
+
: DEFAULT_SLO_TARGETS.minP2PHitRate;
|
|
92
|
+
const maxHttpFallbackRate = Object.hasOwn(targets, 'maxHttpFallbackRate')
|
|
93
|
+
? assertFiniteNumber(targets.maxHttpFallbackRate, 'targets.maxHttpFallbackRate')
|
|
94
|
+
: DEFAULT_SLO_TARGETS.maxHttpFallbackRate;
|
|
95
|
+
const maxP95LatencyMs = Object.hasOwn(targets, 'maxP95LatencyMs')
|
|
96
|
+
? assertFiniteNumber(targets.maxP95LatencyMs, 'targets.maxP95LatencyMs')
|
|
97
|
+
: DEFAULT_SLO_TARGETS.maxP95LatencyMs;
|
|
98
|
+
|
|
99
|
+
if (minAvailability < 0 || minAvailability > 1) {
|
|
100
|
+
throw new Error('P2P observability targets.minAvailability must be between 0 and 1.');
|
|
101
|
+
}
|
|
102
|
+
if (minP2PHitRate < 0 || minP2PHitRate > 1) {
|
|
103
|
+
throw new Error('P2P observability targets.minP2PHitRate must be between 0 and 1.');
|
|
104
|
+
}
|
|
105
|
+
if (maxHttpFallbackRate < 0 || maxHttpFallbackRate > 1) {
|
|
106
|
+
throw new Error('P2P observability targets.maxHttpFallbackRate must be between 0 and 1.');
|
|
107
|
+
}
|
|
108
|
+
if (maxP95LatencyMs < 0) {
|
|
109
|
+
throw new Error('P2P observability targets.maxP95LatencyMs must be >= 0.');
|
|
110
|
+
}
|
|
75
111
|
|
|
76
112
|
return {
|
|
77
|
-
minAvailability
|
|
78
|
-
minP2PHitRate
|
|
79
|
-
maxHttpFallbackRate
|
|
80
|
-
maxP95LatencyMs
|
|
113
|
+
minAvailability,
|
|
114
|
+
minP2PHitRate,
|
|
115
|
+
maxHttpFallbackRate,
|
|
116
|
+
maxP95LatencyMs,
|
|
81
117
|
};
|
|
82
118
|
}
|
|
83
119
|
|
|
@@ -200,6 +200,16 @@ function assertOpenDataChannel(channel, peerId) {
|
|
|
200
200
|
}
|
|
201
201
|
|
|
202
202
|
function toRequestMessage(requestId, context) {
|
|
203
|
+
if (context?.contractVersion !== P2P_WEBRTC_DATA_PLANE_CONTRACT_VERSION) {
|
|
204
|
+
throw createP2PTransportError(
|
|
205
|
+
P2P_TRANSPORT_ERROR_CODES.payloadInvalid,
|
|
206
|
+
`Unexpected WebRTC data-plane contractVersion "${context?.contractVersion}".`,
|
|
207
|
+
{
|
|
208
|
+
expectedContractVersion: P2P_WEBRTC_DATA_PLANE_CONTRACT_VERSION,
|
|
209
|
+
actualContractVersion: context?.contractVersion ?? null,
|
|
210
|
+
}
|
|
211
|
+
);
|
|
212
|
+
}
|
|
203
213
|
return {
|
|
204
214
|
schemaVersion: P2P_WEBRTC_MESSAGE_SCHEMA_VERSION,
|
|
205
215
|
contractVersion: P2P_WEBRTC_DATA_PLANE_CONTRACT_VERSION,
|
|
@@ -377,6 +387,16 @@ export function createBrowserWebRTCDataPlaneTransport(config = {}) {
|
|
|
377
387
|
const maxPayloadBytes = Math.max(1, asNonNegativeInteger(config.maxPayloadBytes, DEFAULT_MAX_PAYLOAD_BYTES));
|
|
378
388
|
|
|
379
389
|
return async function webRtcDataPlaneTransport(context) {
|
|
390
|
+
if (context?.contractVersion !== P2P_WEBRTC_DATA_PLANE_CONTRACT_VERSION) {
|
|
391
|
+
throw createP2PTransportError(
|
|
392
|
+
P2P_TRANSPORT_ERROR_CODES.contractUnsupported,
|
|
393
|
+
`Unsupported p2p.webrtc contractVersion "${context?.contractVersion}". Supported: ${P2P_WEBRTC_DATA_PLANE_CONTRACT_VERSION}.`,
|
|
394
|
+
{
|
|
395
|
+
contractVersion: context?.contractVersion ?? null,
|
|
396
|
+
}
|
|
397
|
+
);
|
|
398
|
+
}
|
|
399
|
+
|
|
380
400
|
const selection = normalizePeerSelectionResult(
|
|
381
401
|
selectPeer ? await selectPeer(context) : { peerId: staticPeerId }
|
|
382
402
|
);
|
|
@@ -55,22 +55,30 @@ const inFlightDeliveries = new Map();
|
|
|
55
55
|
const p2pTransportPolicyState = new WeakMap();
|
|
56
56
|
|
|
57
57
|
function normalizeDistributionSourceOrder(rawSources = []) {
|
|
58
|
-
if (
|
|
58
|
+
if (rawSources === undefined || rawSources === null) {
|
|
59
59
|
return [...DISTRIBUTION_SOURCES];
|
|
60
60
|
}
|
|
61
|
+
if (!Array.isArray(rawSources)) {
|
|
62
|
+
throw new Error('distribution.sourceOrder must be an array when provided.');
|
|
63
|
+
}
|
|
61
64
|
|
|
62
65
|
const normalized = [];
|
|
63
66
|
const seen = new Set();
|
|
64
67
|
|
|
65
68
|
for (const value of rawSources) {
|
|
66
69
|
const source = String(value || '').trim().toLowerCase();
|
|
67
|
-
if (!DISTRIBUTION_SOURCES.includes(source))
|
|
70
|
+
if (!DISTRIBUTION_SOURCES.includes(source)) {
|
|
71
|
+
throw new Error(`distribution.sourceOrder contains unsupported source "${source || value}".`);
|
|
72
|
+
}
|
|
68
73
|
if (seen.has(source)) continue;
|
|
69
74
|
seen.add(source);
|
|
70
75
|
normalized.push(source);
|
|
71
76
|
}
|
|
72
77
|
|
|
73
|
-
|
|
78
|
+
if (normalized.length === 0) {
|
|
79
|
+
throw new Error('distribution.sourceOrder must include at least one supported source.');
|
|
80
|
+
}
|
|
81
|
+
return normalized;
|
|
74
82
|
}
|
|
75
83
|
|
|
76
84
|
function normalizeInteger(value, fallback, allowZero = false) {
|
|
@@ -81,6 +89,23 @@ function normalizeInteger(value, fallback, allowZero = false) {
|
|
|
81
89
|
: fallback;
|
|
82
90
|
}
|
|
83
91
|
|
|
92
|
+
function normalizeRequiredInteger(value, label, { allowZero = false, fallback = null } = {}) {
|
|
93
|
+
if (value === undefined || value === null) {
|
|
94
|
+
if (fallback !== null) {
|
|
95
|
+
return fallback;
|
|
96
|
+
}
|
|
97
|
+
throw new Error(`${label} is required.`);
|
|
98
|
+
}
|
|
99
|
+
const parsed = Number(value);
|
|
100
|
+
const min = allowZero ? 0 : 1;
|
|
101
|
+
if (!Number.isInteger(parsed) || parsed < min) {
|
|
102
|
+
throw new Error(
|
|
103
|
+
`${label} must be a ${allowZero ? 'non-negative' : 'positive'} integer when provided.`
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
return parsed;
|
|
107
|
+
}
|
|
108
|
+
|
|
84
109
|
function normalizeContentEncodings(value) {
|
|
85
110
|
if (!value) return [];
|
|
86
111
|
return value
|
|
@@ -95,13 +120,17 @@ function normalizeManifestVersionSet(value) {
|
|
|
95
120
|
return normalized || null;
|
|
96
121
|
}
|
|
97
122
|
|
|
98
|
-
function normalizeSamplingRate(value, fallback = 1) {
|
|
123
|
+
function normalizeSamplingRate(value, fallback = 1, label = 'distribution.sourceDecision.trace.samplingRate') {
|
|
124
|
+
if (value === undefined || value === null) {
|
|
125
|
+
return fallback;
|
|
126
|
+
}
|
|
99
127
|
const parsed = Number(value);
|
|
100
128
|
if (!Number.isFinite(parsed)) {
|
|
101
|
-
|
|
129
|
+
throw new Error(`${label} must be a finite number between 0 and 1 when provided.`);
|
|
130
|
+
}
|
|
131
|
+
if (parsed < 0 || parsed > 1) {
|
|
132
|
+
throw new Error(`${label} must be between 0 and 1 when provided.`);
|
|
102
133
|
}
|
|
103
|
-
if (parsed <= 0) return 0;
|
|
104
|
-
if (parsed >= 1) return 1;
|
|
105
134
|
return parsed;
|
|
106
135
|
}
|
|
107
136
|
|
|
@@ -479,19 +508,28 @@ function normalizeP2PConfig(config = {}) {
|
|
|
479
508
|
|
|
480
509
|
return {
|
|
481
510
|
enabled,
|
|
482
|
-
timeoutMs:
|
|
483
|
-
|
|
484
|
-
|
|
511
|
+
timeoutMs: normalizeRequiredInteger(
|
|
512
|
+
rawTimeoutMs,
|
|
513
|
+
'distribution.p2p.timeoutMs',
|
|
514
|
+
{ fallback: DEFAULT_P2P_TIMEOUT_MS }
|
|
515
|
+
),
|
|
516
|
+
maxRetries: normalizeRequiredInteger(
|
|
517
|
+
rawMaxRetries,
|
|
518
|
+
'distribution.p2p.maxRetries',
|
|
519
|
+
{ allowZero: true, fallback: DEFAULT_P2P_MAX_RETRIES }
|
|
520
|
+
),
|
|
521
|
+
retryDelayMs: normalizeRequiredInteger(
|
|
522
|
+
rawRetryDelayMs,
|
|
523
|
+
'distribution.p2p.retryDelayMs',
|
|
524
|
+
{ allowZero: true, fallback: DEFAULT_P2P_RETRY_DELAY_MS }
|
|
525
|
+
),
|
|
485
526
|
transport,
|
|
486
527
|
contractVersion,
|
|
487
528
|
controlPlane: normalizeP2PControlPlaneConfig({
|
|
488
529
|
...DEFAULT_DISTRIBUTION_CONFIG.p2p.controlPlane,
|
|
489
530
|
...rawControlPlane,
|
|
490
|
-
tokenRefreshSkewMs:
|
|
491
|
-
|
|
492
|
-
DEFAULT_P2P_CONTROL_PLANE_TOKEN_REFRESH_SKEW_MS,
|
|
493
|
-
true
|
|
494
|
-
),
|
|
531
|
+
tokenRefreshSkewMs: rawControlPlane.tokenRefreshSkewMs
|
|
532
|
+
?? DEFAULT_P2P_CONTROL_PLANE_TOKEN_REFRESH_SKEW_MS,
|
|
495
533
|
}),
|
|
496
534
|
security: {
|
|
497
535
|
requireSessionToken: rawSecurity.requireSessionToken === true,
|
|
@@ -499,19 +537,20 @@ function normalizeP2PConfig(config = {}) {
|
|
|
499
537
|
tokenExpiresAtMs: normalizeOptionalTimestamp(rawSecurity.tokenExpiresAtMs),
|
|
500
538
|
},
|
|
501
539
|
abuse: {
|
|
502
|
-
rateLimitPerMinute:
|
|
540
|
+
rateLimitPerMinute: normalizeRequiredInteger(
|
|
503
541
|
rawAbuse.rateLimitPerMinute,
|
|
504
|
-
|
|
505
|
-
true
|
|
542
|
+
'distribution.p2p.abuse.rateLimitPerMinute',
|
|
543
|
+
{ allowZero: true, fallback: DEFAULT_P2P_RATE_LIMIT_PER_MINUTE }
|
|
506
544
|
),
|
|
507
|
-
maxConsecutiveFailures:
|
|
545
|
+
maxConsecutiveFailures: normalizeRequiredInteger(
|
|
508
546
|
rawAbuse.maxConsecutiveFailures,
|
|
509
|
-
|
|
547
|
+
'distribution.p2p.abuse.maxConsecutiveFailures',
|
|
548
|
+
{ fallback: DEFAULT_P2P_MAX_CONSECUTIVE_FAILURES }
|
|
510
549
|
),
|
|
511
|
-
quarantineMs:
|
|
550
|
+
quarantineMs: normalizeRequiredInteger(
|
|
512
551
|
rawAbuse.quarantineMs,
|
|
513
|
-
|
|
514
|
-
true
|
|
552
|
+
'distribution.p2p.abuse.quarantineMs',
|
|
553
|
+
{ allowZero: true, fallback: DEFAULT_P2P_QUARANTINE_MS }
|
|
515
554
|
),
|
|
516
555
|
},
|
|
517
556
|
};
|
|
@@ -1293,9 +1332,21 @@ async function downloadShardFromHttp(baseUrl, shardInfo, shardIndex, options = {
|
|
|
1293
1332
|
const startTime = performance.now();
|
|
1294
1333
|
const url = buildShardUrl(baseUrl, shardInfo);
|
|
1295
1334
|
let lastError;
|
|
1296
|
-
const maxRetries =
|
|
1297
|
-
|
|
1298
|
-
|
|
1335
|
+
const maxRetries = normalizeRequiredInteger(
|
|
1336
|
+
options.maxRetries,
|
|
1337
|
+
'download.maxRetries',
|
|
1338
|
+
{ allowZero: true, fallback: 3 }
|
|
1339
|
+
);
|
|
1340
|
+
const initialRetryDelayMs = normalizeRequiredInteger(
|
|
1341
|
+
options.initialRetryDelayMs,
|
|
1342
|
+
'download.initialRetryDelayMs',
|
|
1343
|
+
{ allowZero: true, fallback: 1000 }
|
|
1344
|
+
);
|
|
1345
|
+
const maxRetryDelayMs = normalizeRequiredInteger(
|
|
1346
|
+
options.maxRetryDelayMs,
|
|
1347
|
+
'download.maxRetryDelayMs',
|
|
1348
|
+
{ allowZero: true, fallback: 30000 }
|
|
1349
|
+
);
|
|
1299
1350
|
const progressTotalBytes = Number.isFinite(options.expectedSize)
|
|
1300
1351
|
? Math.floor(options.expectedSize)
|
|
1301
1352
|
: (Number.isFinite(shardInfo?.size) ? Math.floor(shardInfo.size) : 0);
|
|
@@ -94,6 +94,8 @@ export const GGML_TYPE_SIZE = {
|
|
|
94
94
|
const GGUF_MAGIC = 0x46554747;
|
|
95
95
|
const GGUF_VERSION_MIN = 2;
|
|
96
96
|
const GGUF_VERSION_MAX = 3;
|
|
97
|
+
const MAX_SAFE_BIGINT = BigInt(Number.MAX_SAFE_INTEGER);
|
|
98
|
+
const MIN_SAFE_BIGINT = BigInt(Number.MIN_SAFE_INTEGER);
|
|
97
99
|
|
|
98
100
|
const {
|
|
99
101
|
contextLength: DEFAULT_GGUF_CONTEXT_LENGTH,
|
|
@@ -102,6 +104,13 @@ const {
|
|
|
102
104
|
ropeFreqBase: DEFAULT_ROPE_FREQ_BASE,
|
|
103
105
|
} = DEFAULT_GGUF_PARSER_DEFAULTS;
|
|
104
106
|
|
|
107
|
+
function toSafeInteger(value, label) {
|
|
108
|
+
if (value > MAX_SAFE_BIGINT || value < MIN_SAFE_BIGINT) {
|
|
109
|
+
throw new Error(`GGUF ${label} exceeds JavaScript safe integer range: ${value.toString()}`);
|
|
110
|
+
}
|
|
111
|
+
return Number(value);
|
|
112
|
+
}
|
|
113
|
+
|
|
105
114
|
class GGUFReader {
|
|
106
115
|
constructor(buffer) {
|
|
107
116
|
this.view = new DataView(buffer);
|
|
@@ -144,18 +153,26 @@ class GGUFReader {
|
|
|
144
153
|
return value;
|
|
145
154
|
}
|
|
146
155
|
|
|
147
|
-
|
|
148
|
-
const low = this.view.getUint32(this.offset, true);
|
|
149
|
-
const high = this.view.getUint32(this.offset + 4, true);
|
|
156
|
+
readUint64BigInt() {
|
|
157
|
+
const low = BigInt(this.view.getUint32(this.offset, true));
|
|
158
|
+
const high = BigInt(this.view.getUint32(this.offset + 4, true));
|
|
150
159
|
this.offset += 8;
|
|
151
|
-
return high
|
|
160
|
+
return (high << 32n) | low;
|
|
152
161
|
}
|
|
153
162
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
163
|
+
readUint64(label = 'u64 value') {
|
|
164
|
+
return toSafeInteger(this.readUint64BigInt(), label);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
readInt64BigInt() {
|
|
168
|
+
const low = BigInt(this.view.getUint32(this.offset, true));
|
|
169
|
+
const high = BigInt(this.view.getInt32(this.offset + 4, true));
|
|
157
170
|
this.offset += 8;
|
|
158
|
-
return high
|
|
171
|
+
return (high << 32n) | low;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
readInt64(label = 'i64 value') {
|
|
175
|
+
return toSafeInteger(this.readInt64BigInt(), label);
|
|
159
176
|
}
|
|
160
177
|
|
|
161
178
|
readFloat32() {
|
|
@@ -175,7 +192,7 @@ class GGUFReader {
|
|
|
175
192
|
}
|
|
176
193
|
|
|
177
194
|
readString() {
|
|
178
|
-
const length = this.readUint64();
|
|
195
|
+
const length = this.readUint64('string length');
|
|
179
196
|
const bytes = new Uint8Array(this.view.buffer, this.offset, length);
|
|
180
197
|
this.offset += length;
|
|
181
198
|
return new TextDecoder().decode(bytes);
|
|
@@ -196,9 +213,9 @@ class GGUFReader {
|
|
|
196
213
|
case GGUFValueType.INT32:
|
|
197
214
|
return this.readInt32();
|
|
198
215
|
case GGUFValueType.UINT64:
|
|
199
|
-
return this.readUint64();
|
|
216
|
+
return this.readUint64('metadata uint64');
|
|
200
217
|
case GGUFValueType.INT64:
|
|
201
|
-
return this.readInt64();
|
|
218
|
+
return this.readInt64('metadata int64');
|
|
202
219
|
case GGUFValueType.FLOAT32:
|
|
203
220
|
return this.readFloat32();
|
|
204
221
|
case GGUFValueType.FLOAT64:
|
|
@@ -216,7 +233,7 @@ class GGUFReader {
|
|
|
216
233
|
|
|
217
234
|
readArray() {
|
|
218
235
|
const elementType = this.readUint32();
|
|
219
|
-
const length = this.readUint64();
|
|
236
|
+
const length = this.readUint64('array length');
|
|
220
237
|
if (length > 10000000) {
|
|
221
238
|
throw new Error(`Array too long: ${length}`);
|
|
222
239
|
}
|
|
@@ -331,8 +348,8 @@ export function parseGGUF(buffer) {
|
|
|
331
348
|
throw new Error(`Unsupported GGUF version: ${version}`);
|
|
332
349
|
}
|
|
333
350
|
|
|
334
|
-
const tensorCount = reader.readUint64();
|
|
335
|
-
const metadataKVCount = reader.readUint64();
|
|
351
|
+
const tensorCount = reader.readUint64('tensor count');
|
|
352
|
+
const metadataKVCount = reader.readUint64('metadata count');
|
|
336
353
|
|
|
337
354
|
const metadata = {};
|
|
338
355
|
for (let i = 0; i < metadataKVCount; i++) {
|
|
@@ -351,10 +368,10 @@ export function parseGGUF(buffer) {
|
|
|
351
368
|
const nDims = reader.readUint32();
|
|
352
369
|
const shape = [];
|
|
353
370
|
for (let d = 0; d < nDims; d++) {
|
|
354
|
-
shape.push(reader.readUint64());
|
|
371
|
+
shape.push(reader.readUint64(`tensor "${name}" shape[${d}]`));
|
|
355
372
|
}
|
|
356
373
|
const type = reader.readUint32();
|
|
357
|
-
const offset = reader.readUint64();
|
|
374
|
+
const offset = reader.readUint64(`tensor "${name}" offset`);
|
|
358
375
|
|
|
359
376
|
tensors.push({
|
|
360
377
|
name,
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* @module formats/rdrr/groups
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
import type { ComponentGroup } from './types.js';
|
|
9
|
+
import type { ComponentGroup, RDRRManifest } from './types.js';
|
|
10
10
|
|
|
11
11
|
export declare function getGroup(groupId: string): ComponentGroup | null;
|
|
12
12
|
|
|
@@ -16,11 +16,19 @@ export declare function getShardsForGroup(groupId: string): number[];
|
|
|
16
16
|
|
|
17
17
|
export declare function getTensorsForGroup(groupId: string): string[];
|
|
18
18
|
|
|
19
|
-
export declare function getShardsForExpert(
|
|
19
|
+
export declare function getShardsForExpert(
|
|
20
|
+
layerIdx: number,
|
|
21
|
+
expertIdx: number,
|
|
22
|
+
manifest?: RDRRManifest | null
|
|
23
|
+
): number[];
|
|
20
24
|
|
|
21
|
-
export declare function getTensorsForExpert(
|
|
25
|
+
export declare function getTensorsForExpert(
|
|
26
|
+
layerIdx: number,
|
|
27
|
+
expertIdx: number,
|
|
28
|
+
manifest?: RDRRManifest | null
|
|
29
|
+
): string[];
|
|
22
30
|
|
|
23
|
-
export declare function getExpertBytes(): number;
|
|
31
|
+
export declare function getExpertBytes(manifest?: RDRRManifest | null): number;
|
|
24
32
|
|
|
25
33
|
export declare function getLayerGroupIds(): string[];
|
|
26
34
|
|
|
@@ -19,8 +19,7 @@ export function getTensorsForGroup(groupId) {
|
|
|
19
19
|
return getManifest()?.groups?.[groupId]?.tensors ?? [];
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
export function getShardsForExpert(layerIdx, expertIdx) {
|
|
23
|
-
const manifest = getManifest();
|
|
22
|
+
export function getShardsForExpert(layerIdx, expertIdx, manifest = getManifest()) {
|
|
24
23
|
const groupId = `layer.${layerIdx}.expert.${expertIdx}`;
|
|
25
24
|
const group = manifest?.groups?.[groupId];
|
|
26
25
|
if (group) {
|
|
@@ -29,8 +28,7 @@ export function getShardsForExpert(layerIdx, expertIdx) {
|
|
|
29
28
|
throw new Error(`Missing expert group mapping: ${groupId}`);
|
|
30
29
|
}
|
|
31
30
|
|
|
32
|
-
export function getTensorsForExpert(layerIdx, expertIdx) {
|
|
33
|
-
const manifest = getManifest();
|
|
31
|
+
export function getTensorsForExpert(layerIdx, expertIdx, manifest = getManifest()) {
|
|
34
32
|
const groupId = `layer.${layerIdx}.expert.${expertIdx}`;
|
|
35
33
|
const group = manifest?.groups?.[groupId];
|
|
36
34
|
if (group) {
|
|
@@ -39,8 +37,7 @@ export function getTensorsForExpert(layerIdx, expertIdx) {
|
|
|
39
37
|
throw new Error(`Missing expert group mapping: ${groupId}`);
|
|
40
38
|
}
|
|
41
39
|
|
|
42
|
-
export function getExpertBytes() {
|
|
43
|
-
const manifest = getManifest();
|
|
40
|
+
export function getExpertBytes(manifest = getManifest()) {
|
|
44
41
|
const expertGroups = Object.entries(manifest?.groups || {})
|
|
45
42
|
.filter(([id]) => id.includes('.expert.'));
|
|
46
43
|
|
|
@@ -44,9 +44,13 @@ export function parseManifest(jsonString) {
|
|
|
44
44
|
export function parseTensorMap(jsonString) {
|
|
45
45
|
try {
|
|
46
46
|
const tensorMap = JSON.parse(jsonString);
|
|
47
|
+
const normalizedTensorMap = {};
|
|
47
48
|
|
|
48
49
|
for (const [name, loc] of Object.entries(tensorMap)) {
|
|
49
|
-
|
|
50
|
+
const shardIndex = typeof loc.shardIndex === 'number'
|
|
51
|
+
? loc.shardIndex
|
|
52
|
+
: loc.shard;
|
|
53
|
+
if (typeof shardIndex !== 'number') {
|
|
50
54
|
throw new Error(`Tensor '${name}' missing shard index`);
|
|
51
55
|
}
|
|
52
56
|
if (typeof loc.offset !== 'number') {
|
|
@@ -61,9 +65,42 @@ export function parseTensorMap(jsonString) {
|
|
|
61
65
|
if (typeof loc.role !== 'string') {
|
|
62
66
|
throw new Error(`Tensor '${name}' missing role`);
|
|
63
67
|
}
|
|
68
|
+
|
|
69
|
+
let spans = undefined;
|
|
70
|
+
if (loc.spans !== undefined) {
|
|
71
|
+
if (!Array.isArray(loc.spans)) {
|
|
72
|
+
throw new Error(`Tensor '${name}' has invalid spans array`);
|
|
73
|
+
}
|
|
74
|
+
spans = loc.spans.map((span, spanIndex) => {
|
|
75
|
+
const spanShardIndex = typeof span?.shardIndex === 'number'
|
|
76
|
+
? span.shardIndex
|
|
77
|
+
: span?.shard;
|
|
78
|
+
if (typeof spanShardIndex !== 'number') {
|
|
79
|
+
throw new Error(`Tensor '${name}' span[${spanIndex}] missing shard index`);
|
|
80
|
+
}
|
|
81
|
+
if (typeof span?.offset !== 'number') {
|
|
82
|
+
throw new Error(`Tensor '${name}' span[${spanIndex}] missing offset`);
|
|
83
|
+
}
|
|
84
|
+
if (typeof span?.size !== 'number') {
|
|
85
|
+
throw new Error(`Tensor '${name}' span[${spanIndex}] missing size`);
|
|
86
|
+
}
|
|
87
|
+
return {
|
|
88
|
+
shardIndex: spanShardIndex,
|
|
89
|
+
offset: span.offset,
|
|
90
|
+
size: span.size,
|
|
91
|
+
};
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
normalizedTensorMap[name] = {
|
|
96
|
+
...loc,
|
|
97
|
+
shard: shardIndex,
|
|
98
|
+
shardIndex,
|
|
99
|
+
spans,
|
|
100
|
+
};
|
|
64
101
|
}
|
|
65
102
|
|
|
66
|
-
return
|
|
103
|
+
return normalizedTensorMap;
|
|
67
104
|
} catch (e) {
|
|
68
105
|
if (e instanceof Error && e.message.includes('Tensor')) {
|
|
69
106
|
throw e;
|
|
@@ -75,13 +75,14 @@ export interface ComponentGroup extends ComponentGroupSchema {}
|
|
|
75
75
|
|
|
76
76
|
export interface TensorLocation {
|
|
77
77
|
shard: number;
|
|
78
|
+
shardIndex?: number;
|
|
78
79
|
offset: number;
|
|
79
80
|
size: number;
|
|
80
81
|
shape: number[];
|
|
81
82
|
dtype: string;
|
|
82
83
|
role: TensorRole;
|
|
83
84
|
group?: string;
|
|
84
|
-
spans?: Array<{ shardIndex
|
|
85
|
+
spans?: Array<{ shard?: number; shardIndex?: number; offset: number; size: number }>;
|
|
85
86
|
layout?: WeightLayout;
|
|
86
87
|
originalShape?: number[];
|
|
87
88
|
}
|