@simulatte/doppler 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +16 -23
- package/package.json +14 -1
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +7 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +12 -2
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +2 -1
- package/src/config/schema/manifest.schema.js +16 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +58 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +57 -41
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +62 -8
- package/src/inference/pipelines/text/attention/run.js +62 -8
- package/src/inference/pipelines/text/config.js +3 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +41 -19
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.js +78 -20
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +3 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +44 -25
|
@@ -229,9 +229,15 @@ export async function createRemoteTensorSource(url, options = {}) {
|
|
|
229
229
|
try {
|
|
230
230
|
const source = await createHttpTensorSource(url, options);
|
|
231
231
|
return { source, size: source.size, supportsRange: true };
|
|
232
|
-
} catch (
|
|
232
|
+
} catch (error) {
|
|
233
233
|
if (options.allowDownloadFallback === false) {
|
|
234
|
-
throw
|
|
234
|
+
throw error;
|
|
235
|
+
}
|
|
236
|
+
if (options.allowDownloadFallback !== true) {
|
|
237
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
238
|
+
throw new Error(
|
|
239
|
+
`HTTP tensor source failed for "${url}" and download fallback is not explicitly enabled: ${message}`
|
|
240
|
+
);
|
|
235
241
|
}
|
|
236
242
|
const downloaded = await createDownloadTensorSource(url, options);
|
|
237
243
|
return { ...downloaded, supportsRange: false };
|
|
@@ -61,7 +61,7 @@ export async function probeHttpRange(url, options = {}) {
|
|
|
61
61
|
acceptRanges,
|
|
62
62
|
contentEncoding,
|
|
63
63
|
};
|
|
64
|
-
} catch (
|
|
64
|
+
} catch (error) {
|
|
65
65
|
return {
|
|
66
66
|
ok: false,
|
|
67
67
|
status: 0,
|
|
@@ -69,6 +69,7 @@ export async function probeHttpRange(url, options = {}) {
|
|
|
69
69
|
size: null,
|
|
70
70
|
acceptRanges: null,
|
|
71
71
|
contentEncoding: null,
|
|
72
|
+
error: error instanceof Error ? error.message : String(error),
|
|
72
73
|
};
|
|
73
74
|
}
|
|
74
75
|
}
|
|
@@ -76,6 +77,9 @@ export async function probeHttpRange(url, options = {}) {
|
|
|
76
77
|
export async function createHttpTensorSource(url, options = {}) {
|
|
77
78
|
const { headers, signal, name: overrideName } = options;
|
|
78
79
|
const probe = await probeHttpRange(url, { headers, signal });
|
|
80
|
+
if (!probe.ok && probe.status === 0 && probe.error) {
|
|
81
|
+
throw new Error(`HTTP tensor source probe failed for "${url}": ${probe.error}`);
|
|
82
|
+
}
|
|
79
83
|
if (!probe.supportsRange || probe.size == null) {
|
|
80
84
|
throw new Error('HTTP range requests not supported for tensor source');
|
|
81
85
|
}
|
|
@@ -227,9 +227,7 @@ async function* dopplerGenerate(prompt, options = {}) {
|
|
|
227
227
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
228
228
|
throw new Error('doppler() requires options.model.');
|
|
229
229
|
}
|
|
230
|
-
|
|
231
|
-
throw new Error('doppler() does not accept load-affecting options. Use doppler.load(model, options) instead.');
|
|
232
|
-
}
|
|
230
|
+
assertNoLoadAffectingOptions('doppler()', options);
|
|
233
231
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
234
232
|
yield* model.generate(prompt, options);
|
|
235
233
|
}
|
|
@@ -240,10 +238,26 @@ export function doppler(prompt, options) {
|
|
|
240
238
|
|
|
241
239
|
doppler.load = load;
|
|
242
240
|
|
|
241
|
+
function assertNoLoadAffectingOptions(apiName, options) {
|
|
242
|
+
if (!options || typeof options !== 'object') {
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
if (
|
|
246
|
+
options.runtimeConfig !== undefined
|
|
247
|
+
|| options.runtimePreset !== undefined
|
|
248
|
+
|| options.runtimeConfigUrl !== undefined
|
|
249
|
+
) {
|
|
250
|
+
throw new Error(
|
|
251
|
+
`${apiName} does not accept load-affecting options. Use doppler.load(model, options) instead.`
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
243
256
|
doppler.text = async function text(prompt, options = {}) {
|
|
244
257
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
245
258
|
throw new Error('doppler.text() requires options.model.');
|
|
246
259
|
}
|
|
260
|
+
assertNoLoadAffectingOptions('doppler.text()', options);
|
|
247
261
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
248
262
|
return model.generateText(prompt, options);
|
|
249
263
|
};
|
|
@@ -252,6 +266,7 @@ doppler.chat = function chat(messages, options = {}) {
|
|
|
252
266
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
253
267
|
throw new Error('doppler.chat() requires options.model.');
|
|
254
268
|
}
|
|
269
|
+
assertNoLoadAffectingOptions('doppler.chat()', options);
|
|
255
270
|
return (async function* run() {
|
|
256
271
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
257
272
|
yield* model.chat(messages, options);
|
|
@@ -262,6 +277,7 @@ doppler.chatText = async function chatText(messages, options = {}) {
|
|
|
262
277
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
263
278
|
throw new Error('doppler.chatText() requires options.model.');
|
|
264
279
|
}
|
|
280
|
+
assertNoLoadAffectingOptions('doppler.chatText()', options);
|
|
265
281
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
266
282
|
return model.chatText(messages, options);
|
|
267
283
|
};
|
|
@@ -284,5 +300,5 @@ doppler.evictAll = async function evictAll() {
|
|
|
284
300
|
|
|
285
301
|
doppler.listModels = async function listModels() {
|
|
286
302
|
const models = await listQuickstartModels();
|
|
287
|
-
return models.map((entry) => entry.
|
|
303
|
+
return models.map((entry) => entry.modelId);
|
|
288
304
|
};
|
|
@@ -128,6 +128,21 @@ async function collectText(iterable) {
|
|
|
128
128
|
return output;
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
+
function assertNoLoadAffectingOptions(apiName, options) {
|
|
132
|
+
if (!options || typeof options !== 'object') {
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
if (
|
|
136
|
+
options.runtimeConfig !== undefined
|
|
137
|
+
|| options.runtimePreset !== undefined
|
|
138
|
+
|| options.runtimeConfigUrl !== undefined
|
|
139
|
+
) {
|
|
140
|
+
throw new Error(
|
|
141
|
+
`${apiName} does not accept load-affecting options. Use doppler.load(model, options) instead.`
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
131
146
|
function createModelHandle(pipeline, resolved) {
|
|
132
147
|
return {
|
|
133
148
|
generate(prompt, options = {}) {
|
|
@@ -246,9 +261,7 @@ async function* dopplerGenerate(prompt, options = {}) {
|
|
|
246
261
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
247
262
|
throw new Error('doppler() requires options.model.');
|
|
248
263
|
}
|
|
249
|
-
|
|
250
|
-
throw new Error('doppler() does not accept load-affecting options. Use doppler.load(model, options) instead.');
|
|
251
|
-
}
|
|
264
|
+
assertNoLoadAffectingOptions('doppler()', options);
|
|
252
265
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
253
266
|
yield* model.generate(prompt, options);
|
|
254
267
|
}
|
|
@@ -259,12 +272,14 @@ export function doppler(prompt, options) {
|
|
|
259
272
|
|
|
260
273
|
doppler.load = load;
|
|
261
274
|
doppler.text = async function text(prompt, options) {
|
|
275
|
+
assertNoLoadAffectingOptions('doppler.text()', options);
|
|
262
276
|
return collectText(doppler(prompt, options));
|
|
263
277
|
};
|
|
264
278
|
doppler.chat = function chat(messages, options = {}) {
|
|
265
279
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
266
280
|
throw new Error('doppler.chat() requires options.model.');
|
|
267
281
|
}
|
|
282
|
+
assertNoLoadAffectingOptions('doppler.chat()', options);
|
|
268
283
|
return (async function* () {
|
|
269
284
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
270
285
|
yield* model.chat(messages, options);
|
|
@@ -274,6 +289,7 @@ doppler.chatText = async function chatText(messages, options = {}) {
|
|
|
274
289
|
if (!options || typeof options !== 'object' || options.model == null) {
|
|
275
290
|
throw new Error('doppler.chatText() requires options.model.');
|
|
276
291
|
}
|
|
292
|
+
assertNoLoadAffectingOptions('doppler.chatText()', options);
|
|
277
293
|
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
278
294
|
return model.chatText(messages, options);
|
|
279
295
|
};
|
|
@@ -11,6 +11,15 @@ import { getPipeline } from './model-manager.js';
|
|
|
11
11
|
|
|
12
12
|
export { formatGemmaChat, formatLlama3Chat, formatGptOssChat };
|
|
13
13
|
|
|
14
|
+
function assertSupportedGenerateOptions(options = {}) {
|
|
15
|
+
if (Array.isArray(options?.stopTokens) && options.stopTokens.length > 0) {
|
|
16
|
+
throw new Error(
|
|
17
|
+
'Doppler provider generate options do not support stopTokens on this surface. ' +
|
|
18
|
+
'Use stopSequences instead.'
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
14
23
|
function resolveChatTemplate(pipeline, options) {
|
|
15
24
|
const override = options?.useChatTemplate;
|
|
16
25
|
const runtimeEnabled = pipeline?.runtimeConfig?.inference?.chatTemplate?.enabled;
|
|
@@ -21,6 +30,7 @@ function resolveChatTemplate(pipeline, options) {
|
|
|
21
30
|
}
|
|
22
31
|
|
|
23
32
|
export async function* generate(prompt, options = {}) {
|
|
33
|
+
assertSupportedGenerateOptions(options);
|
|
24
34
|
const pipeline = getPipeline();
|
|
25
35
|
if (!pipeline) {
|
|
26
36
|
throw new Error('No model loaded. Call loadModel() first.');
|
|
@@ -52,6 +62,7 @@ export async function* generate(prompt, options = {}) {
|
|
|
52
62
|
}
|
|
53
63
|
|
|
54
64
|
export async function prefillKV(prompt, options = {}) {
|
|
65
|
+
assertSupportedGenerateOptions(options);
|
|
55
66
|
const pipeline = getPipeline();
|
|
56
67
|
if (!pipeline) {
|
|
57
68
|
throw new Error('No model loaded. Call loadModel() first.');
|
|
@@ -61,6 +72,7 @@ export async function prefillKV(prompt, options = {}) {
|
|
|
61
72
|
}
|
|
62
73
|
|
|
63
74
|
export async function* generateWithPrefixKV(prefix, prompt, options = {}) {
|
|
75
|
+
assertSupportedGenerateOptions(options);
|
|
64
76
|
const pipeline = getPipeline();
|
|
65
77
|
if (!pipeline) {
|
|
66
78
|
throw new Error('No model loaded. Call loadModel() first.');
|
|
@@ -10,6 +10,16 @@ export declare function getPipeline(): InferencePipeline | null;
|
|
|
10
10
|
|
|
11
11
|
export declare function getCurrentModelId(): string | null;
|
|
12
12
|
|
|
13
|
+
export declare function verifyExplicitModelUrlMatch(
|
|
14
|
+
localManifest: RDRRManifest | Record<string, unknown> | null | undefined,
|
|
15
|
+
modelUrl: string | null | undefined,
|
|
16
|
+
fetchRemoteManifest?: (modelUrl: string) => Promise<RDRRManifest | Record<string, unknown> | null>
|
|
17
|
+
): Promise<void>;
|
|
18
|
+
|
|
19
|
+
export declare function shouldAutoTuneKernels(
|
|
20
|
+
runtimeConfig?: Record<string, unknown> | null
|
|
21
|
+
): boolean;
|
|
22
|
+
|
|
13
23
|
export declare function extractTextModelConfig(manifest: RDRRManifest): TextModelConfig;
|
|
14
24
|
|
|
15
25
|
export declare function readOPFSFile(path: string): Promise<ArrayBuffer>;
|
|
@@ -20,6 +20,12 @@ import { log } from '../../debug/index.js';
|
|
|
20
20
|
import { DopplerCapabilities } from './types.js';
|
|
21
21
|
import { GB, HEADER_READ_SIZE } from '../../config/schema/index.js';
|
|
22
22
|
import { resolveBridgeSourceRuntimeBundle } from './source-runtime.js';
|
|
23
|
+
import { getRuntimeConfig } from '../../config/runtime.js';
|
|
24
|
+
import {
|
|
25
|
+
buildSourceArtifactFingerprint,
|
|
26
|
+
createStoredSourceArtifactContext,
|
|
27
|
+
verifyStoredSourceArtifact,
|
|
28
|
+
} from '../../storage/source-artifact-store.js';
|
|
23
29
|
|
|
24
30
|
let pipeline = null;
|
|
25
31
|
let currentModelId = null;
|
|
@@ -34,6 +40,9 @@ function manifestsDiffer(localManifest, remoteManifest) {
|
|
|
34
40
|
const localShards = Array.isArray(localManifest.shards) ? localManifest.shards : [];
|
|
35
41
|
const remoteShards = Array.isArray(remoteManifest.shards) ? remoteManifest.shards : [];
|
|
36
42
|
if (localShards.length !== remoteShards.length) return true;
|
|
43
|
+
if (buildSourceArtifactFingerprint(localManifest) !== buildSourceArtifactFingerprint(remoteManifest)) {
|
|
44
|
+
return true;
|
|
45
|
+
}
|
|
37
46
|
|
|
38
47
|
for (let i = 0; i < localShards.length; i++) {
|
|
39
48
|
const local = localShards[i];
|
|
@@ -61,6 +70,34 @@ async function tryFetchRemoteManifest(modelUrl) {
|
|
|
61
70
|
return manifest;
|
|
62
71
|
}
|
|
63
72
|
|
|
73
|
+
export async function verifyExplicitModelUrlMatch(
|
|
74
|
+
localManifest,
|
|
75
|
+
modelUrl,
|
|
76
|
+
fetchRemoteManifest = tryFetchRemoteManifest
|
|
77
|
+
) {
|
|
78
|
+
if (!localManifest || !modelUrl) {
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
let remoteManifest = null;
|
|
82
|
+
try {
|
|
83
|
+
remoteManifest = await fetchRemoteManifest(modelUrl);
|
|
84
|
+
} catch (error) {
|
|
85
|
+
throw new Error(
|
|
86
|
+
`Could not compare cached manifest with explicit modelUrl "${modelUrl}": ${error.message}`
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
if (remoteManifest && manifestsDiffer(localManifest, remoteManifest)) {
|
|
90
|
+
throw new Error(
|
|
91
|
+
`Explicit modelUrl "${modelUrl}" does not match the cached manifest for "${localManifest.modelId ?? 'unknown'}". ` +
|
|
92
|
+
'Clear the cache or load the matching source explicitly.'
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function shouldAutoTuneKernels(runtimeConfig = getRuntimeConfig()) {
|
|
98
|
+
return runtimeConfig?.shared?.kernelWarmup?.autoTune === true;
|
|
99
|
+
}
|
|
100
|
+
|
|
64
101
|
export function getPipeline() {
|
|
65
102
|
return pipeline;
|
|
66
103
|
}
|
|
@@ -69,6 +106,14 @@ export function getCurrentModelId() {
|
|
|
69
106
|
return currentModelId;
|
|
70
107
|
}
|
|
71
108
|
|
|
109
|
+
function requireManifestQuantization(manifest) {
|
|
110
|
+
const quantization = String(manifest?.quantization ?? '').trim();
|
|
111
|
+
if (!quantization) {
|
|
112
|
+
throw new Error('Manifest is missing quantization; re-convert the model.');
|
|
113
|
+
}
|
|
114
|
+
return quantization.toUpperCase();
|
|
115
|
+
}
|
|
116
|
+
|
|
72
117
|
export function extractTextModelConfig(manifest) {
|
|
73
118
|
const arch = (manifest.architecture && typeof manifest.architecture === 'object')
|
|
74
119
|
? manifest.architecture
|
|
@@ -86,12 +131,12 @@ export function extractTextModelConfig(manifest) {
|
|
|
86
131
|
headDim: arch.headDim,
|
|
87
132
|
vocabSize: arch.vocabSize,
|
|
88
133
|
maxSeqLen: arch.maxSeqLen,
|
|
89
|
-
quantization: (manifest
|
|
134
|
+
quantization: requireManifestQuantization(manifest),
|
|
90
135
|
};
|
|
91
136
|
}
|
|
92
137
|
|
|
93
138
|
function estimateDequantizedWeightsBytes(manifest) {
|
|
94
|
-
const q = (manifest
|
|
139
|
+
const q = requireManifestQuantization(manifest);
|
|
95
140
|
const total = manifest?.totalSize || 0;
|
|
96
141
|
if (q.startsWith('Q4')) {
|
|
97
142
|
return total * 8;
|
|
@@ -243,6 +288,24 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
243
288
|
manifest = parseManifest(manifestJson);
|
|
244
289
|
log.info('DopplerProvider', `Loaded manifest via bridge: ${manifest.modelId}`);
|
|
245
290
|
if (onProgress) onProgress({ stage: 'manifest', message: 'Manifest loaded via bridge' });
|
|
291
|
+
const persistedSourceBundle = await resolveBridgeSourceRuntimeBundle({
|
|
292
|
+
bridgeClient,
|
|
293
|
+
localPath,
|
|
294
|
+
modelId,
|
|
295
|
+
manifest,
|
|
296
|
+
verifyHashes: true,
|
|
297
|
+
onProgress: (progress) => onProgress?.(progress),
|
|
298
|
+
});
|
|
299
|
+
if (persistedSourceBundle) {
|
|
300
|
+
bridgeStorageContext = persistedSourceBundle.storageContext;
|
|
301
|
+
bridgeSourceMode = true;
|
|
302
|
+
if (onProgress) {
|
|
303
|
+
onProgress({
|
|
304
|
+
stage: 'manifest',
|
|
305
|
+
message: `Direct-source manifest ready (${persistedSourceBundle.sourceKind} artifact mode)`,
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
}
|
|
246
309
|
} catch (manifestError) {
|
|
247
310
|
log.warn(
|
|
248
311
|
'DopplerProvider',
|
|
@@ -252,6 +315,7 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
252
315
|
bridgeClient,
|
|
253
316
|
localPath,
|
|
254
317
|
modelId,
|
|
318
|
+
verifyHashes: true,
|
|
255
319
|
onProgress: (progress) => onProgress?.(progress),
|
|
256
320
|
});
|
|
257
321
|
if (!sourceBundle) {
|
|
@@ -286,25 +350,26 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
286
350
|
|
|
287
351
|
let integrity = { valid: false, missingShards: [] };
|
|
288
352
|
if (manifest) {
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
353
|
+
const sourceArtifactFingerprint = buildSourceArtifactFingerprint(manifest);
|
|
354
|
+
if (sourceArtifactFingerprint) {
|
|
355
|
+
const sourceIntegrity = await verifyStoredSourceArtifact(manifest, { checkHashes: false }).catch(() => ({
|
|
356
|
+
valid: false,
|
|
357
|
+
missingFiles: [],
|
|
358
|
+
}));
|
|
359
|
+
integrity = {
|
|
360
|
+
valid: sourceIntegrity.valid,
|
|
361
|
+
missingShards: Array.isArray(sourceIntegrity.missingFiles) ? sourceIntegrity.missingFiles : [],
|
|
362
|
+
};
|
|
363
|
+
} else {
|
|
364
|
+
integrity = await verifyIntegrity({ checkHashes: false }).catch(() => ({
|
|
365
|
+
valid: false,
|
|
366
|
+
missingShards: [],
|
|
367
|
+
}));
|
|
368
|
+
}
|
|
293
369
|
}
|
|
294
370
|
|
|
295
371
|
if (integrity.valid && manifest && modelUrl) {
|
|
296
|
-
|
|
297
|
-
const remoteManifest = await tryFetchRemoteManifest(modelUrl);
|
|
298
|
-
if (remoteManifest && manifestsDiffer(manifest, remoteManifest)) {
|
|
299
|
-
log.info('DopplerProvider', 'Cached model differs from source URL manifest; refreshing cache');
|
|
300
|
-
integrity = { valid: false, missingShards: [] };
|
|
301
|
-
}
|
|
302
|
-
} catch (error) {
|
|
303
|
-
log.warn(
|
|
304
|
-
'DopplerProvider',
|
|
305
|
-
`Could not compare cached manifest with source URL (${error.message}); using cached model`
|
|
306
|
-
);
|
|
307
|
-
}
|
|
372
|
+
await verifyExplicitModelUrlMatch(manifest, modelUrl);
|
|
308
373
|
}
|
|
309
374
|
|
|
310
375
|
if (!integrity.valid && modelUrl) {
|
|
@@ -365,7 +430,11 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
365
430
|
DopplerCapabilities.kernelsWarmed = true;
|
|
366
431
|
}
|
|
367
432
|
|
|
368
|
-
if (
|
|
433
|
+
if (
|
|
434
|
+
!DopplerCapabilities.kernelsTuned
|
|
435
|
+
&& shouldAutoTuneKernels()
|
|
436
|
+
&& typeof setTimeout !== 'undefined'
|
|
437
|
+
) {
|
|
369
438
|
DopplerCapabilities.kernelsTuned = true;
|
|
370
439
|
const tuneConfig = extractTextModelConfig(manifest);
|
|
371
440
|
setTimeout(() => {
|
|
@@ -389,6 +458,9 @@ export async function loadModel(modelId, modelUrl = null, onProgress = null, loc
|
|
|
389
458
|
const memCaps = await getMemoryCapabilities();
|
|
390
459
|
|
|
391
460
|
let storageContext = bridgeStorageContext;
|
|
461
|
+
if (!storageContext && buildSourceArtifactFingerprint(manifest)) {
|
|
462
|
+
storageContext = createStoredSourceArtifactContext(manifest, { verifyHashes: true });
|
|
463
|
+
}
|
|
392
464
|
if (!storageContext && useBridge && DopplerCapabilities.bridgeClient && DopplerCapabilities.localPath) {
|
|
393
465
|
const bridgeClient = DopplerCapabilities.bridgeClient;
|
|
394
466
|
const basePath = DopplerCapabilities.localPath.endsWith('/')
|
|
@@ -6,7 +6,9 @@ export interface ResolveBridgeSourceRuntimeBundleOptions {
|
|
|
6
6
|
bridgeClient: ExtensionBridgeClient;
|
|
7
7
|
localPath: string;
|
|
8
8
|
modelId?: string | null;
|
|
9
|
+
manifest?: RDRRManifest | null;
|
|
9
10
|
onProgress?: (info: { stage: string; message: string }) => void;
|
|
11
|
+
verifyHashes?: boolean;
|
|
10
12
|
}
|
|
11
13
|
|
|
12
14
|
export interface BridgeSourceRuntimeBundle {
|
|
@@ -19,4 +21,3 @@ export interface BridgeSourceRuntimeBundle {
|
|
|
19
21
|
export declare function resolveBridgeSourceRuntimeBundle(
|
|
20
22
|
options: ResolveBridgeSourceRuntimeBundleOptions
|
|
21
23
|
): Promise<BridgeSourceRuntimeBundle | null>;
|
|
22
|
-
|
|
@@ -2,6 +2,7 @@ import {
|
|
|
2
2
|
createConverterConfig,
|
|
3
3
|
HEADER_READ_SIZE,
|
|
4
4
|
} from '../../config/schema/index.js';
|
|
5
|
+
import { DEFAULT_EXECUTION_V0_SESSION_DEFAULTS } from '../../config/schema/execution-v0.schema.js';
|
|
5
6
|
import { extractArchitecture } from '../../converter/core.js';
|
|
6
7
|
import {
|
|
7
8
|
inferSourceWeightQuantization,
|
|
@@ -13,9 +14,11 @@ import { parseTransformerModel } from '../../converter/parsers/transformer.js';
|
|
|
13
14
|
import { parseGGUFHeader } from '../../formats/gguf/types.js';
|
|
14
15
|
import { parseSafetensorsHeader } from '../../formats/safetensors/types.js';
|
|
15
16
|
import { log } from '../../debug/index.js';
|
|
17
|
+
import { computeHash } from '../../storage/shard-manager.js';
|
|
16
18
|
import {
|
|
17
19
|
buildSourceRuntimeBundle,
|
|
18
20
|
createSourceStorageContext,
|
|
21
|
+
getSourceRuntimeMetadata,
|
|
19
22
|
} from '../../tooling/source-runtime-bundle.js';
|
|
20
23
|
|
|
21
24
|
const SUPPORTED_SOURCE_DTYPES = new Set([
|
|
@@ -42,6 +45,15 @@ const SOURCE_RUNTIME_EXECUTION_OVERRIDE = {
|
|
|
42
45
|
],
|
|
43
46
|
};
|
|
44
47
|
|
|
48
|
+
const SOURCE_RUNTIME_SESSION_DEFAULTS = {
|
|
49
|
+
compute: {
|
|
50
|
+
defaults: { ...DEFAULT_EXECUTION_V0_SESSION_DEFAULTS.compute.defaults },
|
|
51
|
+
kernelProfiles: [],
|
|
52
|
+
},
|
|
53
|
+
kvcache: null,
|
|
54
|
+
decodeLoop: null,
|
|
55
|
+
};
|
|
56
|
+
|
|
45
57
|
function normalizeRelativePath(value) {
|
|
46
58
|
return String(value || '')
|
|
47
59
|
.replace(/\\/g, '/')
|
|
@@ -166,6 +178,14 @@ async function readBridgeRange(bridgeClient, fileEntry, offset, length) {
|
|
|
166
178
|
return bridgeClient.read(fileEntry.absolutePath, offset, length);
|
|
167
179
|
}
|
|
168
180
|
|
|
181
|
+
async function readBridgeAllBytes(bridgeClient, fileEntry, label) {
|
|
182
|
+
const size = Number(fileEntry?.size) || 0;
|
|
183
|
+
if (size < 0) {
|
|
184
|
+
throw new Error(`Invalid bridge file size for ${label}.`);
|
|
185
|
+
}
|
|
186
|
+
return readBridgeRange(bridgeClient, fileEntry, 0, size);
|
|
187
|
+
}
|
|
188
|
+
|
|
169
189
|
async function readBridgeTextFile(bridgeClient, fileEntry, label) {
|
|
170
190
|
const size = Number(fileEntry?.size) || 0;
|
|
171
191
|
if (size <= 0) {
|
|
@@ -274,7 +294,39 @@ async function parseBridgeSafetensorsModel(bridgeClient, fileIndex) {
|
|
|
274
294
|
}
|
|
275
295
|
return { path, size: entry.size };
|
|
276
296
|
}),
|
|
297
|
+
auxiliaryFiles: [
|
|
298
|
+
{ path: 'config.json', size: Number(fileIndex.get('config.json')?.size || 0), kind: 'config' },
|
|
299
|
+
...(fileIndex.has('model.safetensors.index.json')
|
|
300
|
+
? [{
|
|
301
|
+
path: 'model.safetensors.index.json',
|
|
302
|
+
size: Number(fileIndex.get('model.safetensors.index.json')?.size || 0),
|
|
303
|
+
kind: 'safetensors_index',
|
|
304
|
+
}]
|
|
305
|
+
: []),
|
|
306
|
+
...(fileIndex.has('tokenizer.json')
|
|
307
|
+
? [{
|
|
308
|
+
path: 'tokenizer.json',
|
|
309
|
+
size: Number(fileIndex.get('tokenizer.json')?.size || 0),
|
|
310
|
+
kind: 'tokenizer_json',
|
|
311
|
+
}]
|
|
312
|
+
: []),
|
|
313
|
+
...(fileIndex.has('tokenizer_config.json')
|
|
314
|
+
? [{
|
|
315
|
+
path: 'tokenizer_config.json',
|
|
316
|
+
size: Number(fileIndex.get('tokenizer_config.json')?.size || 0),
|
|
317
|
+
kind: 'tokenizer_config',
|
|
318
|
+
}]
|
|
319
|
+
: []),
|
|
320
|
+
...(fileIndex.has('tokenizer.model')
|
|
321
|
+
? [{
|
|
322
|
+
path: 'tokenizer.model',
|
|
323
|
+
size: Number(fileIndex.get('tokenizer.model')?.size || 0),
|
|
324
|
+
kind: 'tokenizer_model',
|
|
325
|
+
}]
|
|
326
|
+
: []),
|
|
327
|
+
],
|
|
277
328
|
tokenizerJsonPath: fileIndex.has('tokenizer.json') ? 'tokenizer.json' : null,
|
|
329
|
+
tokenizerConfigPath: fileIndex.has('tokenizer_config.json') ? 'tokenizer_config.json' : null,
|
|
278
330
|
tokenizerModelPath: fileIndex.has('tokenizer.model') ? 'tokenizer.model' : null,
|
|
279
331
|
};
|
|
280
332
|
}
|
|
@@ -339,7 +391,9 @@ async function parseBridgeGGUFModel(bridgeClient, fileIndex, ggufRelativePath) {
|
|
|
339
391
|
tokenizerConfig: null,
|
|
340
392
|
tokenizerModelName: null,
|
|
341
393
|
sourceFiles: [{ path: ggufRelativePath, size: ggufEntry.size }],
|
|
394
|
+
auxiliaryFiles: [],
|
|
342
395
|
tokenizerJsonPath: null,
|
|
396
|
+
tokenizerConfigPath: null,
|
|
343
397
|
tokenizerModelPath: null,
|
|
344
398
|
};
|
|
345
399
|
}
|
|
@@ -391,14 +445,7 @@ function createBridgeFileReaders(bridgeClient, fileMap, rootPath) {
|
|
|
391
445
|
return null;
|
|
392
446
|
}
|
|
393
447
|
const direct = map.get(hint);
|
|
394
|
-
|
|
395
|
-
return direct;
|
|
396
|
-
}
|
|
397
|
-
const basename = hint.split('/').pop();
|
|
398
|
-
if (basename && map.has(basename)) {
|
|
399
|
-
return map.get(basename);
|
|
400
|
-
}
|
|
401
|
-
return null;
|
|
448
|
+
return direct || null;
|
|
402
449
|
};
|
|
403
450
|
|
|
404
451
|
const readRange = async (relativePath, offset, length) => {
|
|
@@ -432,10 +479,53 @@ function createBridgeFileReaders(bridgeClient, fileMap, rootPath) {
|
|
|
432
479
|
};
|
|
433
480
|
}
|
|
434
481
|
|
|
482
|
+
async function addHashesToBridgeFiles(bridgeClient, fileIndex, entries, hashAlgorithm) {
|
|
483
|
+
const hashedEntries = [];
|
|
484
|
+
for (const entry of Array.isArray(entries) ? entries : []) {
|
|
485
|
+
const relativePath = normalizeRelativePath(entry?.path);
|
|
486
|
+
if (!relativePath) continue;
|
|
487
|
+
const fileEntry = fileIndex.get(relativePath);
|
|
488
|
+
if (!fileEntry) {
|
|
489
|
+
throw new Error(`Missing bridge file entry for "${relativePath}"`);
|
|
490
|
+
}
|
|
491
|
+
const bytes = await readBridgeAllBytes(bridgeClient, fileEntry, `bridge source asset (${relativePath})`);
|
|
492
|
+
hashedEntries.push({
|
|
493
|
+
...entry,
|
|
494
|
+
path: relativePath,
|
|
495
|
+
size: Number.isFinite(entry?.size) ? Math.max(0, Math.floor(Number(entry.size))) : fileEntry.size,
|
|
496
|
+
hash: await computeHash(toUint8Array(bytes), hashAlgorithm),
|
|
497
|
+
hashAlgorithm,
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
return hashedEntries;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
async function resolveBridgeStorageContext(options = {}) {
|
|
504
|
+
const bridgeClient = options.bridgeClient;
|
|
505
|
+
const localPath = options.localPath;
|
|
506
|
+
const manifest = options.manifest;
|
|
507
|
+
const sourceRuntime = getSourceRuntimeMetadata(manifest);
|
|
508
|
+
if (!sourceRuntime) {
|
|
509
|
+
return null;
|
|
510
|
+
}
|
|
511
|
+
const files = await listBridgeFilesRecursive(bridgeClient, localPath);
|
|
512
|
+
const fileMap = indexBridgeFiles(files);
|
|
513
|
+
const readers = createBridgeFileReaders(bridgeClient, fileMap, localPath);
|
|
514
|
+
return createSourceStorageContext({
|
|
515
|
+
manifest,
|
|
516
|
+
readRange: readers.readRange,
|
|
517
|
+
readText: readers.readText,
|
|
518
|
+
readBinary: readers.readBinary,
|
|
519
|
+
verifyHashes: options.verifyHashes !== false,
|
|
520
|
+
});
|
|
521
|
+
}
|
|
522
|
+
|
|
435
523
|
export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
436
524
|
const bridgeClient = options.bridgeClient;
|
|
437
525
|
const localPath = options.localPath;
|
|
438
526
|
const requestedModelId = options.modelId || null;
|
|
527
|
+
const verifyHashes = options.verifyHashes !== false;
|
|
528
|
+
const existingManifest = options.manifest ?? null;
|
|
439
529
|
|
|
440
530
|
if (!bridgeClient || typeof bridgeClient.read !== 'function' || typeof bridgeClient.list !== 'function') {
|
|
441
531
|
throw new Error('Bridge source runtime requires a connected bridge client with read/list support.');
|
|
@@ -444,6 +534,21 @@ export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
|
444
534
|
throw new Error('Bridge source runtime requires localPath.');
|
|
445
535
|
}
|
|
446
536
|
|
|
537
|
+
if (existingManifest && getSourceRuntimeMetadata(existingManifest)) {
|
|
538
|
+
const storageContext = await resolveBridgeStorageContext({
|
|
539
|
+
bridgeClient,
|
|
540
|
+
localPath,
|
|
541
|
+
manifest: existingManifest,
|
|
542
|
+
verifyHashes,
|
|
543
|
+
});
|
|
544
|
+
return {
|
|
545
|
+
manifest: existingManifest,
|
|
546
|
+
storageContext,
|
|
547
|
+
sourceKind: getSourceRuntimeMetadata(existingManifest)?.sourceKind ?? 'safetensors',
|
|
548
|
+
sourceRoot: localPath,
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
|
|
447
552
|
options.onProgress?.({
|
|
448
553
|
stage: 'source-discovery',
|
|
449
554
|
message: 'Scanning source files via bridge...',
|
|
@@ -461,6 +566,7 @@ export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
|
461
566
|
modelBaseId: requestedModelId || null,
|
|
462
567
|
},
|
|
463
568
|
inference: {
|
|
569
|
+
sessionDefaults: SOURCE_RUNTIME_SESSION_DEFAULTS,
|
|
464
570
|
execution: SOURCE_RUNTIME_EXECUTION_OVERRIDE,
|
|
465
571
|
},
|
|
466
572
|
});
|
|
@@ -476,26 +582,39 @@ export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
|
476
582
|
});
|
|
477
583
|
|
|
478
584
|
const modelId = resolveModelIdHint(requestedModelId, plan, parsed.sourceKind);
|
|
585
|
+
const hashAlgorithm = converterConfig.manifest.hashAlgorithm;
|
|
586
|
+
const files = await listBridgeFilesRecursive(bridgeClient, localPath);
|
|
587
|
+
const fileMap = indexBridgeFiles(files);
|
|
588
|
+
const sourceFiles = await addHashesToBridgeFiles(bridgeClient, fileMap, parsed.sourceFiles, hashAlgorithm);
|
|
589
|
+
const auxiliaryFiles = await addHashesToBridgeFiles(
|
|
590
|
+
bridgeClient,
|
|
591
|
+
fileMap,
|
|
592
|
+
parsed.auxiliaryFiles,
|
|
593
|
+
hashAlgorithm
|
|
594
|
+
);
|
|
479
595
|
const { manifest, shardSources } = await buildSourceRuntimeBundle({
|
|
480
596
|
modelId,
|
|
481
597
|
modelName: modelId,
|
|
482
598
|
modelType: plan.modelType,
|
|
599
|
+
sourceKind: parsed.sourceKind,
|
|
483
600
|
architecture: parsed.architecture,
|
|
484
601
|
architectureHint: parsed.architectureHint,
|
|
485
602
|
rawConfig: parsed.config,
|
|
486
603
|
inference: plan.manifestInference,
|
|
487
604
|
tensors: parsed.tensors,
|
|
488
|
-
sourceFiles
|
|
605
|
+
sourceFiles,
|
|
606
|
+
auxiliaryFiles,
|
|
489
607
|
sourceQuantization: parsed.sourceQuantization,
|
|
490
608
|
quantizationInfo: plan.quantizationInfo,
|
|
491
|
-
hashAlgorithm
|
|
609
|
+
hashAlgorithm,
|
|
492
610
|
tokenizerJson: parsed.tokenizerJson,
|
|
493
611
|
tokenizerConfig: parsed.tokenizerConfig,
|
|
494
612
|
tokenizerModelName: parsed.tokenizerModelName,
|
|
613
|
+
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
614
|
+
tokenizerConfigPath: parsed.tokenizerConfigPath,
|
|
615
|
+
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
495
616
|
});
|
|
496
617
|
|
|
497
|
-
const files = await listBridgeFilesRecursive(bridgeClient, localPath);
|
|
498
|
-
const fileMap = indexBridgeFiles(files);
|
|
499
618
|
const readers = createBridgeFileReaders(bridgeClient, fileMap, localPath);
|
|
500
619
|
const storageContext = createSourceStorageContext({
|
|
501
620
|
manifest,
|
|
@@ -505,7 +624,7 @@ export async function resolveBridgeSourceRuntimeBundle(options = {}) {
|
|
|
505
624
|
readBinary: readers.readBinary,
|
|
506
625
|
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
507
626
|
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
508
|
-
verifyHashes
|
|
627
|
+
verifyHashes,
|
|
509
628
|
});
|
|
510
629
|
|
|
511
630
|
log.info(
|