@simulatte/doppler 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +16 -23
- package/package.json +30 -32
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +31 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +5 -20
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +18 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +81 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +15 -2
- package/src/config/merge-contract-check.js +66 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +43 -8
- package/src/config/presets/models/gemma2.json +3 -2
- package/src/config/presets/models/gemma3.json +2 -0
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +3 -2
- package/src/config/schema/manifest.schema.js +17 -4
- package/src/config/schema/storage.schema.js +1 -1
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +104 -11
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +16 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +50 -29
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +40 -16
- package/src/converter/quantizer.js +19 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +83 -27
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +53 -3
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul-selection.js +47 -4
- package/src/gpu/kernels/matmul.d.ts +2 -0
- package/src/gpu/kernels/matmul.js +59 -40
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +66 -43
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +8 -0
- package/src/inference/browser-harness.js +149 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +10 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
- package/src/inference/pipelines/text/attention/projections.js +192 -112
- package/src/inference/pipelines/text/attention/record.js +77 -14
- package/src/inference/pipelines/text/attention/run.js +112 -14
- package/src/inference/pipelines/text/config.js +17 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +46 -23
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-runtime.js +5 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
- package/src/inference/pipelines/text/generator-steps.js +340 -221
- package/src/inference/pipelines/text/generator.js +56 -40
- package/src/inference/pipelines/text/init.d.ts +13 -0
- package/src/inference/pipelines/text/init.js +94 -25
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +4 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
- package/src/inference/pipelines/text/linear-attention.js +113 -9
- package/src/inference/pipelines/text/logits/gpu.js +12 -7
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +13 -12
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +282 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +17 -7
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +10 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +84 -14
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +214 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.js +27 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +365 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +55 -6
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +30 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +120 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/types/model.d.ts +5 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +50 -26
|
@@ -52,6 +52,7 @@ function normalizeExecutionConfig(value, defaults) {
|
|
|
52
52
|
const useGpuCast = value.useGpuCast == null
|
|
53
53
|
? defaults.useGpuCast === true
|
|
54
54
|
: value.useGpuCast === true;
|
|
55
|
+
const gpuCastRequestedExplicitly = value.useGpuCast === true;
|
|
55
56
|
if (value.useGpuCast != null && typeof value.useGpuCast !== 'boolean') {
|
|
56
57
|
throw new Error('node convert: execution.useGpuCast must be a boolean when provided.');
|
|
57
58
|
}
|
|
@@ -69,6 +70,7 @@ function normalizeExecutionConfig(value, defaults) {
|
|
|
69
70
|
rowChunkMinTensorBytes,
|
|
70
71
|
maxInFlightJobs,
|
|
71
72
|
useGpuCast,
|
|
73
|
+
gpuCastRequestedExplicitly,
|
|
72
74
|
gpuCastMinTensorBytes,
|
|
73
75
|
};
|
|
74
76
|
}
|
|
@@ -222,6 +224,7 @@ function createNodeGpuTensorTransformer(options) {
|
|
|
222
224
|
const {
|
|
223
225
|
runtime,
|
|
224
226
|
gpuCastMinTensorBytes,
|
|
227
|
+
requireGpuCast,
|
|
225
228
|
resolveTensorTargetQuant,
|
|
226
229
|
} = options;
|
|
227
230
|
const {
|
|
@@ -276,6 +279,11 @@ function createNodeGpuTensorTransformer(options) {
|
|
|
276
279
|
try {
|
|
277
280
|
const device = getDevice();
|
|
278
281
|
if (!device) {
|
|
282
|
+
if (requireGpuCast) {
|
|
283
|
+
throw new Error(
|
|
284
|
+
`node convert: execution.useGpuCast failed for tensor "${tensor.name}": GPU device is unavailable.`
|
|
285
|
+
);
|
|
286
|
+
}
|
|
279
287
|
return null;
|
|
280
288
|
}
|
|
281
289
|
inputBuffer = acquireBuffer(tensorData.byteLength, undefined, `convert_gpu_cast_in_${tensor.name}`);
|
|
@@ -292,6 +300,11 @@ function createNodeGpuTensorTransformer(options) {
|
|
|
292
300
|
|
|
293
301
|
const readback = await getBufferPool().readBuffer(outputBuffer, outputBytes);
|
|
294
302
|
if (!(readback instanceof ArrayBuffer) || readback.byteLength !== outputBytes) {
|
|
303
|
+
if (requireGpuCast) {
|
|
304
|
+
throw new Error(
|
|
305
|
+
`node convert: execution.useGpuCast failed for tensor "${tensor.name}": invalid GPU readback.`
|
|
306
|
+
);
|
|
307
|
+
}
|
|
295
308
|
return null;
|
|
296
309
|
}
|
|
297
310
|
reportProgress?.(tensorData.byteLength, tensorData.byteLength);
|
|
@@ -301,6 +314,10 @@ function createNodeGpuTensorTransformer(options) {
|
|
|
301
314
|
outLayout: null,
|
|
302
315
|
};
|
|
303
316
|
} catch (error) {
|
|
317
|
+
if (requireGpuCast) {
|
|
318
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
319
|
+
throw new Error(`node convert: execution.useGpuCast failed for tensor "${tensor.name}": ${message}`);
|
|
320
|
+
}
|
|
304
321
|
if (!warnedFallback) {
|
|
305
322
|
warnedFallback = true;
|
|
306
323
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -858,6 +875,7 @@ export async function convertSafetensorsDirectory(options) {
|
|
|
858
875
|
let sourceQuantization = null;
|
|
859
876
|
let tokenizerJson = null;
|
|
860
877
|
let tokenizerConfig = null;
|
|
878
|
+
let generationConfig = null;
|
|
861
879
|
let hasTokenizerModel = false;
|
|
862
880
|
let tokenizerModelPath = null;
|
|
863
881
|
let diffusionAuxFiles = [];
|
|
@@ -1084,6 +1102,7 @@ export async function convertSafetensorsDirectory(options) {
|
|
|
1084
1102
|
},
|
|
1085
1103
|
});
|
|
1086
1104
|
config = parsedTransformer.config;
|
|
1105
|
+
generationConfig = parsedTransformer.generationConfig ?? null;
|
|
1087
1106
|
tensors = parsedTransformer.tensors;
|
|
1088
1107
|
architectureHint = parsedTransformer.architectureHint;
|
|
1089
1108
|
architecture = extractArchitecture(config, null);
|
|
@@ -1152,6 +1171,7 @@ export async function convertSafetensorsDirectory(options) {
|
|
|
1152
1171
|
quantization: targetQuantization,
|
|
1153
1172
|
tokenizerJson,
|
|
1154
1173
|
tokenizerConfig,
|
|
1174
|
+
generationConfig,
|
|
1155
1175
|
tokenizerModel: hasTokenizerModel ? 'tokenizer.model' : null,
|
|
1156
1176
|
};
|
|
1157
1177
|
|
|
@@ -1168,10 +1188,19 @@ export async function convertSafetensorsDirectory(options) {
|
|
|
1168
1188
|
let result = null;
|
|
1169
1189
|
try {
|
|
1170
1190
|
if (executionPlan.useGpuCast) {
|
|
1171
|
-
|
|
1191
|
+
let gpuRuntime;
|
|
1192
|
+
try {
|
|
1193
|
+
gpuRuntime = await loadNodeGpuCastRuntime();
|
|
1194
|
+
} catch (error) {
|
|
1195
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1196
|
+
throw new Error(
|
|
1197
|
+
`node convert: execution.useGpuCast requires a WebGPU-capable Node runtime. ${message}`
|
|
1198
|
+
);
|
|
1199
|
+
}
|
|
1172
1200
|
gpuTensorTransformer = createNodeGpuTensorTransformer({
|
|
1173
1201
|
runtime: gpuRuntime,
|
|
1174
1202
|
gpuCastMinTensorBytes: executionPlan.gpuCastMinTensorBytes,
|
|
1203
|
+
requireGpuCast: executionPlan.gpuCastRequestedExplicitly === true,
|
|
1175
1204
|
resolveTensorTargetQuant,
|
|
1176
1205
|
});
|
|
1177
1206
|
}
|
|
@@ -4,6 +4,7 @@ import type { PipelineStorageContext } from '../inference/pipelines/text/init.js
|
|
|
4
4
|
export interface ResolveNodeSourceRuntimeBundleOptions {
|
|
5
5
|
inputPath: string;
|
|
6
6
|
modelId?: string | null;
|
|
7
|
+
verifyHashes?: boolean;
|
|
7
8
|
}
|
|
8
9
|
|
|
9
10
|
export interface NodeSourceRuntimeBundle {
|
|
@@ -16,4 +17,3 @@ export interface NodeSourceRuntimeBundle {
|
|
|
16
17
|
export declare function resolveNodeSourceRuntimeBundle(
|
|
17
18
|
options: ResolveNodeSourceRuntimeBundleOptions
|
|
18
19
|
): Promise<NodeSourceRuntimeBundle | null>;
|
|
19
|
-
|
|
@@ -3,6 +3,7 @@ import path from 'node:path';
|
|
|
3
3
|
import {
|
|
4
4
|
HEADER_READ_SIZE,
|
|
5
5
|
createConverterConfig,
|
|
6
|
+
DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
|
|
6
7
|
} from '../config/schema/index.js';
|
|
7
8
|
import { extractArchitecture } from '../converter/core.js';
|
|
8
9
|
import {
|
|
@@ -15,6 +16,7 @@ import { parseTransformerModel } from '../converter/parsers/transformer.js';
|
|
|
15
16
|
import { parseGGUFHeader } from '../formats/gguf/types.js';
|
|
16
17
|
import { parseSafetensorsHeader } from '../formats/safetensors/types.js';
|
|
17
18
|
import { log } from '../debug/index.js';
|
|
19
|
+
import { computeHash } from '../storage/shard-manager.js';
|
|
18
20
|
import {
|
|
19
21
|
buildSourceRuntimeBundle,
|
|
20
22
|
createSourceStorageContext,
|
|
@@ -33,6 +35,13 @@ const SOURCE_RUNTIME_EXECUTION_OVERRIDE = {
|
|
|
33
35
|
steps: [],
|
|
34
36
|
};
|
|
35
37
|
|
|
38
|
+
function cloneExecutionV0SessionDefaults() {
|
|
39
|
+
if (typeof structuredClone === 'function') {
|
|
40
|
+
return structuredClone(DEFAULT_EXECUTION_V0_SESSION_DEFAULTS);
|
|
41
|
+
}
|
|
42
|
+
return JSON.parse(JSON.stringify(DEFAULT_EXECUTION_V0_SESSION_DEFAULTS));
|
|
43
|
+
}
|
|
44
|
+
|
|
36
45
|
function toArrayBuffer(value, label) {
|
|
37
46
|
if (value instanceof ArrayBuffer) {
|
|
38
47
|
return value;
|
|
@@ -105,6 +114,16 @@ async function readJson(filePath, label) {
|
|
|
105
114
|
}
|
|
106
115
|
}
|
|
107
116
|
|
|
117
|
+
async function readFileBytes(filePath, label) {
|
|
118
|
+
try {
|
|
119
|
+
const bytes = await fs.readFile(filePath);
|
|
120
|
+
return bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength);
|
|
121
|
+
} catch (error) {
|
|
122
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
123
|
+
throw new Error(`Failed to read ${label} "${filePath}": ${message}`);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
108
127
|
async function readRange(filePath, offset, length) {
|
|
109
128
|
if (!Number.isFinite(offset) || !Number.isFinite(length) || length <= 0) {
|
|
110
129
|
return new ArrayBuffer(0);
|
|
@@ -206,6 +225,37 @@ async function parseSafetensorsInput(inputDir) {
|
|
|
206
225
|
const stats = await getPathStats(sourcePath, `source shard (${sourcePath})`);
|
|
207
226
|
sourceFiles.push({ path: sourcePath, size: Number(stats.size) });
|
|
208
227
|
}
|
|
228
|
+
const auxiliaryFiles = [
|
|
229
|
+
{ path: configPath, size: Number((await getPathStats(configPath, 'config.json')).size), kind: 'config' },
|
|
230
|
+
...(hasIndex
|
|
231
|
+
? [{
|
|
232
|
+
path: path.join(inputDir, 'model.safetensors.index.json'),
|
|
233
|
+
size: Number((await getPathStats(path.join(inputDir, 'model.safetensors.index.json'), 'model.safetensors.index.json')).size),
|
|
234
|
+
kind: 'safetensors_index',
|
|
235
|
+
}]
|
|
236
|
+
: []),
|
|
237
|
+
...(tokenizerJson
|
|
238
|
+
? [{
|
|
239
|
+
path: tokenizerJsonPath,
|
|
240
|
+
size: Number((await getPathStats(tokenizerJsonPath, 'tokenizer.json')).size),
|
|
241
|
+
kind: 'tokenizer_json',
|
|
242
|
+
}]
|
|
243
|
+
: []),
|
|
244
|
+
...(tokenizerConfig
|
|
245
|
+
? [{
|
|
246
|
+
path: tokenizerConfigPath,
|
|
247
|
+
size: Number((await getPathStats(tokenizerConfigPath, 'tokenizer_config.json')).size),
|
|
248
|
+
kind: 'tokenizer_config',
|
|
249
|
+
}]
|
|
250
|
+
: []),
|
|
251
|
+
...(hasTokenizerModel
|
|
252
|
+
? [{
|
|
253
|
+
path: tokenizerModelPath,
|
|
254
|
+
size: Number((await getPathStats(tokenizerModelPath, 'tokenizer.model')).size),
|
|
255
|
+
kind: 'tokenizer_model',
|
|
256
|
+
}]
|
|
257
|
+
: []),
|
|
258
|
+
];
|
|
209
259
|
|
|
210
260
|
return {
|
|
211
261
|
sourceKind: 'safetensors',
|
|
@@ -220,8 +270,10 @@ async function parseSafetensorsInput(inputDir) {
|
|
|
220
270
|
tokenizerConfig,
|
|
221
271
|
tokenizerModelName: hasTokenizerModel ? 'tokenizer.model' : null,
|
|
222
272
|
tokenizerJsonPath: tokenizerJsonPath,
|
|
273
|
+
tokenizerConfigPath: tokenizerConfigPath,
|
|
223
274
|
tokenizerModelPath: hasTokenizerModel ? tokenizerModelPath : null,
|
|
224
275
|
sourceFiles,
|
|
276
|
+
auxiliaryFiles,
|
|
225
277
|
};
|
|
226
278
|
}
|
|
227
279
|
|
|
@@ -283,8 +335,10 @@ async function parseGgufInput(ggufPath) {
|
|
|
283
335
|
tokenizerConfig: null,
|
|
284
336
|
tokenizerModelName: null,
|
|
285
337
|
tokenizerJsonPath: null,
|
|
338
|
+
tokenizerConfigPath: null,
|
|
286
339
|
tokenizerModelPath: null,
|
|
287
340
|
sourceFiles: [{ path: ggufPath, size: fileSize }],
|
|
341
|
+
auxiliaryFiles: [],
|
|
288
342
|
};
|
|
289
343
|
}
|
|
290
344
|
|
|
@@ -357,11 +411,62 @@ function buildNodeFileReaders() {
|
|
|
357
411
|
};
|
|
358
412
|
}
|
|
359
413
|
|
|
414
|
+
// Source dtype → compute precision mapping for source-runtime inference.
|
|
415
|
+
// BF16/F32 sources require f32 compute (BF16 has no native WebGPU support).
|
|
416
|
+
// Quantized formats require f32 compute for dequantization accuracy.
|
|
417
|
+
// F16 sources can use f16 compute directly.
|
|
418
|
+
const SOURCE_QUANT_COMPUTE_MAP = {
|
|
419
|
+
'F16': 'f16',
|
|
420
|
+
'BF16': 'f32',
|
|
421
|
+
'F32': 'f32',
|
|
422
|
+
'Q4_K': 'f32',
|
|
423
|
+
'Q4_K_M': 'f32',
|
|
424
|
+
'Q6_K': 'f32',
|
|
425
|
+
};
|
|
426
|
+
const SOURCE_COMPUTE_DEFAULT = 'f16';
|
|
427
|
+
|
|
428
|
+
function resolveSourceRuntimeComputePrecision(tensors, sourceQuantization) {
|
|
429
|
+
const dtypes = new Set();
|
|
430
|
+
for (const tensor of Array.isArray(tensors) ? tensors : []) {
|
|
431
|
+
const dtype = String(tensor?.dtype || '').trim().toUpperCase();
|
|
432
|
+
if (dtype) {
|
|
433
|
+
dtypes.add(dtype);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
// If any tensor requires f32 compute, use f32 for all.
|
|
437
|
+
for (const dtype of dtypes) {
|
|
438
|
+
if (SOURCE_QUANT_COMPUTE_MAP[dtype] === 'f32') {
|
|
439
|
+
return 'f32';
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
const normalized = String(sourceQuantization || '').trim().toUpperCase();
|
|
444
|
+
return SOURCE_QUANT_COMPUTE_MAP[normalized] ?? SOURCE_COMPUTE_DEFAULT;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
async function addHashesToFileEntries(entries, hashAlgorithm) {
|
|
448
|
+
const normalized = [];
|
|
449
|
+
for (const entry of Array.isArray(entries) ? entries : []) {
|
|
450
|
+
const filePath = normalizePath(entry?.path);
|
|
451
|
+
if (!filePath) continue;
|
|
452
|
+
const bytes = await readFileBytes(filePath, `source asset (${filePath})`);
|
|
453
|
+
normalized.push({
|
|
454
|
+
...entry,
|
|
455
|
+
path: filePath,
|
|
456
|
+
size: Number.isFinite(entry?.size) ? Math.max(0, Math.floor(Number(entry.size))) : bytes.byteLength,
|
|
457
|
+
hash: await computeHash(new Uint8Array(bytes), hashAlgorithm),
|
|
458
|
+
hashAlgorithm,
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
return normalized;
|
|
462
|
+
}
|
|
463
|
+
|
|
360
464
|
export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
361
465
|
const inputPath = normalizePath(options.inputPath);
|
|
362
466
|
if (!inputPath) {
|
|
363
467
|
throw new Error('node source runtime: inputPath is required.');
|
|
364
468
|
}
|
|
469
|
+
const verifyHashes = options.verifyHashes === true;
|
|
365
470
|
const resolvedInputPath = path.resolve(inputPath);
|
|
366
471
|
const stats = await getPathStats(resolvedInputPath, 'inputPath');
|
|
367
472
|
|
|
@@ -401,10 +506,14 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
401
506
|
assertSupportedSourceDtypes(parsed.tensors, parsed.sourceKind);
|
|
402
507
|
|
|
403
508
|
const converterConfig = createConverterConfig({
|
|
509
|
+
quantization: {
|
|
510
|
+
computePrecision: resolveSourceRuntimeComputePrecision(parsed.tensors, parsed.sourceQuantization),
|
|
511
|
+
},
|
|
404
512
|
output: {
|
|
405
513
|
modelBaseId: options.modelId || null,
|
|
406
514
|
},
|
|
407
515
|
inference: {
|
|
516
|
+
sessionDefaults: cloneExecutionV0SessionDefaults(),
|
|
408
517
|
execution: SOURCE_RUNTIME_EXECUTION_OVERRIDE,
|
|
409
518
|
},
|
|
410
519
|
});
|
|
@@ -425,22 +534,30 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
425
534
|
parsed.sourceKind,
|
|
426
535
|
parsed.sourcePathForModelId
|
|
427
536
|
);
|
|
537
|
+
const hashAlgorithm = converterConfig.manifest.hashAlgorithm;
|
|
538
|
+
const sourceFiles = await addHashesToFileEntries(parsed.sourceFiles, hashAlgorithm);
|
|
539
|
+
const auxiliaryFiles = await addHashesToFileEntries(parsed.auxiliaryFiles, hashAlgorithm);
|
|
428
540
|
const { manifest, shardSources } = await buildSourceRuntimeBundle({
|
|
429
541
|
modelId,
|
|
430
542
|
modelName: modelId,
|
|
431
543
|
modelType: plan.modelType,
|
|
544
|
+
sourceKind: parsed.sourceKind,
|
|
432
545
|
architecture: parsed.architecture,
|
|
433
546
|
architectureHint: parsed.architectureHint,
|
|
434
547
|
rawConfig: parsed.config,
|
|
435
548
|
inference: plan.manifestInference,
|
|
436
549
|
tensors: parsed.tensors,
|
|
437
|
-
sourceFiles
|
|
550
|
+
sourceFiles,
|
|
551
|
+
auxiliaryFiles,
|
|
438
552
|
sourceQuantization: parsed.sourceQuantization,
|
|
439
553
|
quantizationInfo: plan.quantizationInfo,
|
|
440
|
-
hashAlgorithm
|
|
554
|
+
hashAlgorithm,
|
|
441
555
|
tokenizerJson: parsed.tokenizerJson,
|
|
442
556
|
tokenizerConfig: parsed.tokenizerConfig,
|
|
443
557
|
tokenizerModelName: parsed.tokenizerModelName,
|
|
558
|
+
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
559
|
+
tokenizerConfigPath: parsed.tokenizerConfigPath,
|
|
560
|
+
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
444
561
|
});
|
|
445
562
|
|
|
446
563
|
const readers = buildNodeFileReaders();
|
|
@@ -452,7 +569,7 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
452
569
|
readBinary: readers.readBinary,
|
|
453
570
|
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
454
571
|
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
455
|
-
verifyHashes
|
|
572
|
+
verifyHashes,
|
|
456
573
|
});
|
|
457
574
|
|
|
458
575
|
log.info(
|
|
@@ -3,9 +3,12 @@ import { dirname, isAbsolute, resolve } from 'node:path';
|
|
|
3
3
|
import { fileURLToPath, pathToFileURL } from 'node:url';
|
|
4
4
|
|
|
5
5
|
const DEFAULT_DOE_PROVIDER_CREATE_ARGS = 'enable-dawn-features=allow_unsafe_apis';
|
|
6
|
+
const DOE_PROVIDER_CREATE_ARGS_ENV = 'FAWN_WEBGPU_CREATE_ARGS';
|
|
6
7
|
|
|
7
8
|
function hasNavigatorGpu() {
|
|
8
|
-
return typeof globalThis.navigator !== 'undefined'
|
|
9
|
+
return typeof globalThis.navigator !== 'undefined'
|
|
10
|
+
&& !!globalThis.navigator?.gpu
|
|
11
|
+
&& typeof globalThis.navigator.gpu.requestAdapter === 'function';
|
|
9
12
|
}
|
|
10
13
|
|
|
11
14
|
function hasGpuEnums() {
|
|
@@ -51,18 +54,12 @@ function resolveDefaultWebgpuModuleSpecifiers() {
|
|
|
51
54
|
return ['@simulatte/webgpu', 'webgpu'];
|
|
52
55
|
}
|
|
53
56
|
|
|
54
|
-
function
|
|
57
|
+
function resolveExplicitWebgpuModuleSpecifier() {
|
|
55
58
|
const fromEnv = process.env.DOPPLER_NODE_WEBGPU_MODULE;
|
|
56
59
|
if (typeof fromEnv === 'string' && fromEnv.trim().length > 0) {
|
|
57
|
-
return
|
|
58
|
-
explicit: true,
|
|
59
|
-
specifiers: [resolveCandidateModuleSpecifier(fromEnv.trim())],
|
|
60
|
-
};
|
|
60
|
+
return resolveCandidateModuleSpecifier(fromEnv.trim());
|
|
61
61
|
}
|
|
62
|
-
return
|
|
63
|
-
explicit: false,
|
|
64
|
-
specifiers: resolveDefaultWebgpuModuleSpecifiers(),
|
|
65
|
-
};
|
|
62
|
+
return null;
|
|
66
63
|
}
|
|
67
64
|
|
|
68
65
|
function isDoeWebgpuSpecifier(specifier) {
|
|
@@ -76,15 +73,15 @@ function isDoeWebgpuSpecifier(specifier) {
|
|
|
76
73
|
|
|
77
74
|
async function importWithProviderOverride(specifier) {
|
|
78
75
|
const shouldApplyCreateArgsDefault = isDoeWebgpuSpecifier(specifier)
|
|
79
|
-
&& !(typeof process.env
|
|
76
|
+
&& !(typeof process.env[DOE_PROVIDER_CREATE_ARGS_ENV] === 'string' && process.env[DOE_PROVIDER_CREATE_ARGS_ENV].trim().length > 0);
|
|
80
77
|
if (!shouldApplyCreateArgsDefault) {
|
|
81
78
|
return import(specifier);
|
|
82
79
|
}
|
|
83
|
-
process.env
|
|
80
|
+
process.env[DOE_PROVIDER_CREATE_ARGS_ENV] = DEFAULT_DOE_PROVIDER_CREATE_ARGS;
|
|
84
81
|
try {
|
|
85
82
|
return await import(specifier);
|
|
86
83
|
} finally {
|
|
87
|
-
delete process.env
|
|
84
|
+
delete process.env[DOE_PROVIDER_CREATE_ARGS_ENV];
|
|
88
85
|
}
|
|
89
86
|
}
|
|
90
87
|
|
|
@@ -237,27 +234,33 @@ function installWebgpuFromModule(mod) {
|
|
|
237
234
|
}
|
|
238
235
|
|
|
239
236
|
export async function bootstrapNodeWebGPU() {
|
|
237
|
+
const explicitSpecifier = resolveExplicitWebgpuModuleSpecifier();
|
|
238
|
+
if (explicitSpecifier) {
|
|
239
|
+
try {
|
|
240
|
+
const mod = await importWithProviderOverride(explicitSpecifier);
|
|
241
|
+
if (installWebgpuFromModule(mod)) {
|
|
242
|
+
return { ok: true, provider: explicitSpecifier };
|
|
243
|
+
}
|
|
244
|
+
return { ok: false, provider: explicitSpecifier };
|
|
245
|
+
} catch {
|
|
246
|
+
return { ok: false, provider: explicitSpecifier };
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
240
250
|
if (hasNavigatorGpu() && hasGpuEnums()) {
|
|
241
251
|
return { ok: true, provider: 'pre-installed' };
|
|
242
252
|
}
|
|
243
253
|
|
|
244
|
-
const
|
|
245
|
-
for (const specifier of specifiers) {
|
|
254
|
+
for (const specifier of resolveDefaultWebgpuModuleSpecifiers()) {
|
|
246
255
|
let mod;
|
|
247
256
|
try {
|
|
248
257
|
mod = await importWithProviderOverride(specifier);
|
|
249
258
|
} catch {
|
|
250
|
-
if (explicit) {
|
|
251
|
-
return { ok: false, provider: null };
|
|
252
|
-
}
|
|
253
259
|
continue;
|
|
254
260
|
}
|
|
255
261
|
if (installWebgpuFromModule(mod)) {
|
|
256
262
|
return { ok: true, provider: specifier };
|
|
257
263
|
}
|
|
258
|
-
if (explicit) {
|
|
259
|
-
return { ok: false, provider: null };
|
|
260
|
-
}
|
|
261
264
|
}
|
|
262
265
|
|
|
263
266
|
return { ok: false, provider: null };
|
|
@@ -8,6 +8,10 @@ import { downloadModel } from '../storage/downloader.js';
|
|
|
8
8
|
import { isOPFSAvailable } from '../storage/quota.js';
|
|
9
9
|
import { parseManifest, getManifestUrl } from '../formats/rdrr/index.js';
|
|
10
10
|
import { log } from '../debug/index.js';
|
|
11
|
+
import {
|
|
12
|
+
resolveSourceArtifact,
|
|
13
|
+
verifyStoredSourceArtifact,
|
|
14
|
+
} from '../storage/source-artifact-store.js';
|
|
11
15
|
|
|
12
16
|
const MODULE = 'OPFSCache';
|
|
13
17
|
|
|
@@ -43,6 +47,7 @@ function hasSameShardSet(aManifest, bManifest) {
|
|
|
43
47
|
}
|
|
44
48
|
|
|
45
49
|
function buildManifestFingerprint(manifest) {
|
|
50
|
+
const sourceArtifactFingerprint = resolveSourceArtifact(manifest)?.fingerprint ?? null;
|
|
46
51
|
const inference = manifest?.inference ?? {};
|
|
47
52
|
const layerPattern = inference?.layerPattern ?? {};
|
|
48
53
|
const quantizationInfo = manifest?.quantizationInfo ?? {};
|
|
@@ -75,6 +80,7 @@ function buildManifestFingerprint(manifest) {
|
|
|
75
80
|
},
|
|
76
81
|
},
|
|
77
82
|
shards,
|
|
83
|
+
sourceArtifactFingerprint,
|
|
78
84
|
});
|
|
79
85
|
}
|
|
80
86
|
|
|
@@ -119,16 +125,27 @@ export async function ensureModelCached(modelId, modelBaseUrl) {
|
|
|
119
125
|
if (!cachedManifestText || !cachedManifest) {
|
|
120
126
|
log.warn(MODULE, `Cache miss: "${modelId}" has no readable manifest in OPFS; re-importing`);
|
|
121
127
|
} else {
|
|
128
|
+
const cachedSourceArtifact = resolveSourceArtifact(cachedManifest);
|
|
129
|
+
const sourceIntegrity = cachedSourceArtifact
|
|
130
|
+
? await verifyStoredSourceArtifact(cachedManifest, { checkHashes: false })
|
|
131
|
+
: null;
|
|
132
|
+
const sourceIntegrityValid = !sourceIntegrity || sourceIntegrity.valid;
|
|
133
|
+
if (sourceIntegrity && !sourceIntegrity.valid) {
|
|
134
|
+
log.warn(
|
|
135
|
+
MODULE,
|
|
136
|
+
`Cache stale: "${modelId}" direct-source assets are incomplete (${sourceIntegrity.missingFiles.join(', ')})`
|
|
137
|
+
);
|
|
138
|
+
}
|
|
122
139
|
const cachedFingerprint = buildManifestFingerprint(cachedManifest);
|
|
123
140
|
const remoteFingerprint = buildManifestFingerprint(remoteManifest);
|
|
124
|
-
if (cachedFingerprint === remoteFingerprint) {
|
|
141
|
+
if (sourceIntegrityValid && cachedFingerprint === remoteFingerprint) {
|
|
125
142
|
log.info(MODULE, `Cache hit: "${modelId}"`);
|
|
126
143
|
return { cached: true, fromCache: true, modelId, error: null };
|
|
127
144
|
}
|
|
128
145
|
|
|
129
146
|
const sameShards = hasSameShardSet(cachedManifest, remoteManifest);
|
|
130
147
|
const sameHashAlgorithm = (cachedManifest?.hashAlgorithm ?? null) === (remoteManifest?.hashAlgorithm ?? null);
|
|
131
|
-
if (sameShards && sameHashAlgorithm) {
|
|
148
|
+
if (sourceIntegrityValid && sameShards && sameHashAlgorithm) {
|
|
132
149
|
await openModelStore(modelId);
|
|
133
150
|
await saveManifest(remoteManifestText);
|
|
134
151
|
log.info(MODULE, `Cache manifest refreshed: "${modelId}" (shards unchanged)`);
|
|
@@ -138,8 +155,8 @@ export async function ensureModelCached(modelId, modelBaseUrl) {
|
|
|
138
155
|
}
|
|
139
156
|
} catch (error) {
|
|
140
157
|
const message = toErrorMessage(error);
|
|
141
|
-
log.warn(MODULE, `Cache validation
|
|
142
|
-
return { cached:
|
|
158
|
+
log.warn(MODULE, `Cache validation failed (${message}); refusing cached model "${modelId}"`);
|
|
159
|
+
return { cached: false, fromCache: false, modelId, error: message };
|
|
143
160
|
}
|
|
144
161
|
}
|
|
145
162
|
} catch (error) {
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
export interface RuntimeCompositionBridge {
|
|
2
|
+
getRuntimeConfig: () => Record<string, unknown> | null;
|
|
3
|
+
setRuntimeConfig: (runtimeConfig: Record<string, unknown> | null) => void;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export interface RuntimeInputCompositionHandlers {
|
|
7
|
+
loadRuntimeConfigFromRef?: (
|
|
8
|
+
ref: string,
|
|
9
|
+
options?: Record<string, unknown>
|
|
10
|
+
) => Promise<Record<string, unknown> | null>;
|
|
11
|
+
applyRuntimePreset?: (
|
|
12
|
+
runtimePreset: string,
|
|
13
|
+
options?: Record<string, unknown>
|
|
14
|
+
) => Promise<void>;
|
|
15
|
+
applyRuntimeConfigFromUrl?: (
|
|
16
|
+
runtimeConfigUrl: string,
|
|
17
|
+
options?: Record<string, unknown>
|
|
18
|
+
) => Promise<void>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface OrderedRuntimeInputs {
|
|
22
|
+
configChain?: string[] | null;
|
|
23
|
+
runtimePreset?: string | null;
|
|
24
|
+
runtimeConfigUrl?: string | null;
|
|
25
|
+
runtimeConfig?: Record<string, unknown> | null;
|
|
26
|
+
runtimeContractPatch?: Record<string, unknown> | null | (() => Record<string, unknown> | null);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export declare function resolveRuntimeFromConfig(
|
|
30
|
+
config: Record<string, unknown> | null | undefined
|
|
31
|
+
): Record<string, unknown> | null;
|
|
32
|
+
|
|
33
|
+
export declare function applyOrderedRuntimeInputs(
|
|
34
|
+
runtimeBridge: RuntimeCompositionBridge,
|
|
35
|
+
inputs?: OrderedRuntimeInputs,
|
|
36
|
+
handlers?: RuntimeInputCompositionHandlers,
|
|
37
|
+
options?: Record<string, unknown>
|
|
38
|
+
): Promise<void>;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { mergeRuntimeValues } from '../config/runtime-merge.js';
|
|
2
|
+
|
|
3
|
+
export function resolveRuntimeFromConfig(config) {
|
|
4
|
+
if (!config || typeof config !== 'object') return null;
|
|
5
|
+
if (config.runtime && typeof config.runtime === 'object') return config.runtime;
|
|
6
|
+
if (config.shared || config.loading || config.inference || config.emulation) return config;
|
|
7
|
+
return null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function mergeRuntimePatch(runtimeBridge, patch) {
|
|
11
|
+
if (!patch) return;
|
|
12
|
+
const mergedRuntime = mergeRuntimeValues(runtimeBridge.getRuntimeConfig(), patch);
|
|
13
|
+
runtimeBridge.setRuntimeConfig(mergedRuntime);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function requireRuntimeBridge(runtimeBridge) {
|
|
17
|
+
if (!runtimeBridge?.setRuntimeConfig) {
|
|
18
|
+
throw new Error('runtime bridge must provide setRuntimeConfig().');
|
|
19
|
+
}
|
|
20
|
+
if (typeof runtimeBridge.getRuntimeConfig !== 'function') {
|
|
21
|
+
throw new Error('runtime bridge must provide getRuntimeConfig().');
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async function applyConfigChain(configChain, runtimeBridge, loadRuntimeConfigFromRef, options) {
|
|
26
|
+
if (!Array.isArray(configChain) || configChain.length === 0) {
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
if (typeof loadRuntimeConfigFromRef !== 'function') {
|
|
30
|
+
throw new Error('runtime input composition does not support configChain on this surface.');
|
|
31
|
+
}
|
|
32
|
+
for (const ref of configChain) {
|
|
33
|
+
const loaded = await loadRuntimeConfigFromRef(ref, options);
|
|
34
|
+
const runtime = resolveRuntimeFromConfig(loaded);
|
|
35
|
+
if (!runtime) {
|
|
36
|
+
throw new Error(`Loaded runtime config "${ref}" is missing runtime fields.`);
|
|
37
|
+
}
|
|
38
|
+
mergeRuntimePatch(runtimeBridge, runtime);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async function applyRuntimePreset(runtimePreset, applyPreset, options) {
|
|
43
|
+
if (!runtimePreset) {
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
if (typeof applyPreset !== 'function') {
|
|
47
|
+
throw new Error('runtime input composition does not support runtimePreset on this surface.');
|
|
48
|
+
}
|
|
49
|
+
await applyPreset(runtimePreset, options);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function applyRuntimeConfigUrl(runtimeConfigUrl, applyConfigFromUrl, options) {
|
|
53
|
+
if (!runtimeConfigUrl) {
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
if (typeof applyConfigFromUrl !== 'function') {
|
|
57
|
+
throw new Error('runtime input composition does not support runtimeConfigUrl on this surface.');
|
|
58
|
+
}
|
|
59
|
+
await applyConfigFromUrl(runtimeConfigUrl, options);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function applyOrderedRuntimeInputs(runtimeBridge, inputs = {}, handlers = {}, options = {}) {
|
|
63
|
+
requireRuntimeBridge(runtimeBridge);
|
|
64
|
+
|
|
65
|
+
await applyConfigChain(
|
|
66
|
+
inputs.configChain,
|
|
67
|
+
runtimeBridge,
|
|
68
|
+
handlers.loadRuntimeConfigFromRef,
|
|
69
|
+
options
|
|
70
|
+
);
|
|
71
|
+
await applyRuntimePreset(inputs.runtimePreset, handlers.applyRuntimePreset, options);
|
|
72
|
+
await applyRuntimeConfigUrl(inputs.runtimeConfigUrl, handlers.applyRuntimeConfigFromUrl, options);
|
|
73
|
+
|
|
74
|
+
if (inputs.runtimeConfig) {
|
|
75
|
+
const runtime = resolveRuntimeFromConfig(inputs.runtimeConfig);
|
|
76
|
+
if (!runtime) {
|
|
77
|
+
throw new Error('runtimeConfig is missing runtime fields');
|
|
78
|
+
}
|
|
79
|
+
mergeRuntimePatch(runtimeBridge, runtime);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const runtimeContractPatch = typeof inputs.runtimeContractPatch === 'function'
|
|
83
|
+
? inputs.runtimeContractPatch()
|
|
84
|
+
: (inputs.runtimeContractPatch ?? null);
|
|
85
|
+
mergeRuntimePatch(runtimeBridge, runtimeContractPatch);
|
|
86
|
+
}
|