@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
getActiveKernelPathSource,
|
|
19
19
|
setActiveKernelPath,
|
|
20
20
|
} from '../config/kernel-path-loader.js';
|
|
21
|
+
import { runTrainingOperatorCommand } from '../training/operator-command.js';
|
|
21
22
|
|
|
22
23
|
function asOptionalPlainObject(value, label) {
|
|
23
24
|
if (value == null) return null;
|
|
@@ -27,6 +28,28 @@ function asOptionalPlainObject(value, label) {
|
|
|
27
28
|
return value;
|
|
28
29
|
}
|
|
29
30
|
|
|
31
|
+
function assertNoUnsupportedRuntimeInputs(request) {
|
|
32
|
+
const runtimeFields = [];
|
|
33
|
+
if (Array.isArray(request?.configChain) && request.configChain.length > 0) {
|
|
34
|
+
runtimeFields.push('configChain');
|
|
35
|
+
}
|
|
36
|
+
if (typeof request?.runtimePreset === 'string' && request.runtimePreset.trim()) {
|
|
37
|
+
runtimeFields.push('runtimePreset');
|
|
38
|
+
}
|
|
39
|
+
if (typeof request?.runtimeConfigUrl === 'string' && request.runtimeConfigUrl.trim()) {
|
|
40
|
+
runtimeFields.push('runtimeConfigUrl');
|
|
41
|
+
}
|
|
42
|
+
if (request?.runtimeConfig != null) {
|
|
43
|
+
runtimeFields.push('runtimeConfig');
|
|
44
|
+
}
|
|
45
|
+
if (runtimeFields.length > 0) {
|
|
46
|
+
throw new Error(
|
|
47
|
+
`${request.command} does not support runtime input fields on the node operator surface: ` +
|
|
48
|
+
`${runtimeFields.join(', ')}. Put those settings into the workload/config asset instead.`
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
30
53
|
let runtimeModulesPromise = null;
|
|
31
54
|
|
|
32
55
|
async function loadRuntimeModules() {
|
|
@@ -50,16 +73,19 @@ export function hasNodeWebGPUSupport() {
|
|
|
50
73
|
}
|
|
51
74
|
|
|
52
75
|
async function assertNodeWebGPUSupport() {
|
|
76
|
+
let bootstrapProvider = null;
|
|
53
77
|
if (!hasNodeWebGPUSupport()) {
|
|
54
78
|
const bootstrap = await bootstrapNodeWebGPU();
|
|
55
|
-
if (bootstrap.
|
|
56
|
-
|
|
79
|
+
if (bootstrap.provider) {
|
|
80
|
+
bootstrapProvider = bootstrap.provider;
|
|
57
81
|
}
|
|
58
82
|
}
|
|
59
83
|
|
|
60
84
|
if (hasNodeWebGPUSupport()) return;
|
|
61
85
|
throw new Error(
|
|
62
|
-
'node command: WebGPU runtime is incomplete in Node.
|
|
86
|
+
'node command: WebGPU runtime is incomplete in Node.' +
|
|
87
|
+
(bootstrapProvider ? ` Provider resolution stopped at "${bootstrapProvider}".` : '') +
|
|
88
|
+
' Run in browser relay, or run under a WebGPU-enabled Node build.'
|
|
63
89
|
);
|
|
64
90
|
}
|
|
65
91
|
|
|
@@ -90,6 +116,21 @@ export async function runNodeCommand(commandRequest, options = {}) {
|
|
|
90
116
|
});
|
|
91
117
|
}
|
|
92
118
|
|
|
119
|
+
if (request.command === 'lora' || request.command === 'distill') {
|
|
120
|
+
const gpuOptionalActions = new Set(['compare', 'quality-gate', 'subsets']);
|
|
121
|
+
installNodeFileFetchShim();
|
|
122
|
+
assertNoUnsupportedRuntimeInputs(request);
|
|
123
|
+
if (!gpuOptionalActions.has(request.action)) {
|
|
124
|
+
await assertNodeWebGPUSupport();
|
|
125
|
+
}
|
|
126
|
+
const result = await runTrainingOperatorCommand(request);
|
|
127
|
+
return createToolingSuccessEnvelope({
|
|
128
|
+
surface: 'node',
|
|
129
|
+
request,
|
|
130
|
+
result,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
|
|
93
134
|
await assertNodeWebGPUSupport();
|
|
94
135
|
const modules = await loadRuntimeModules();
|
|
95
136
|
const runtimeBridge = {
|
|
@@ -52,6 +52,7 @@ function normalizeExecutionConfig(value, defaults) {
|
|
|
52
52
|
const useGpuCast = value.useGpuCast == null
|
|
53
53
|
? defaults.useGpuCast === true
|
|
54
54
|
: value.useGpuCast === true;
|
|
55
|
+
const gpuCastRequestedExplicitly = value.useGpuCast === true;
|
|
55
56
|
if (value.useGpuCast != null && typeof value.useGpuCast !== 'boolean') {
|
|
56
57
|
throw new Error('node convert: execution.useGpuCast must be a boolean when provided.');
|
|
57
58
|
}
|
|
@@ -69,6 +70,7 @@ function normalizeExecutionConfig(value, defaults) {
|
|
|
69
70
|
rowChunkMinTensorBytes,
|
|
70
71
|
maxInFlightJobs,
|
|
71
72
|
useGpuCast,
|
|
73
|
+
gpuCastRequestedExplicitly,
|
|
72
74
|
gpuCastMinTensorBytes,
|
|
73
75
|
};
|
|
74
76
|
}
|
|
@@ -222,6 +224,7 @@ function createNodeGpuTensorTransformer(options) {
|
|
|
222
224
|
const {
|
|
223
225
|
runtime,
|
|
224
226
|
gpuCastMinTensorBytes,
|
|
227
|
+
requireGpuCast,
|
|
225
228
|
resolveTensorTargetQuant,
|
|
226
229
|
} = options;
|
|
227
230
|
const {
|
|
@@ -276,6 +279,11 @@ function createNodeGpuTensorTransformer(options) {
|
|
|
276
279
|
try {
|
|
277
280
|
const device = getDevice();
|
|
278
281
|
if (!device) {
|
|
282
|
+
if (requireGpuCast) {
|
|
283
|
+
throw new Error(
|
|
284
|
+
`node convert: execution.useGpuCast failed for tensor "${tensor.name}": GPU device is unavailable.`
|
|
285
|
+
);
|
|
286
|
+
}
|
|
279
287
|
return null;
|
|
280
288
|
}
|
|
281
289
|
inputBuffer = acquireBuffer(tensorData.byteLength, undefined, `convert_gpu_cast_in_${tensor.name}`);
|
|
@@ -292,6 +300,11 @@ function createNodeGpuTensorTransformer(options) {
|
|
|
292
300
|
|
|
293
301
|
const readback = await getBufferPool().readBuffer(outputBuffer, outputBytes);
|
|
294
302
|
if (!(readback instanceof ArrayBuffer) || readback.byteLength !== outputBytes) {
|
|
303
|
+
if (requireGpuCast) {
|
|
304
|
+
throw new Error(
|
|
305
|
+
`node convert: execution.useGpuCast failed for tensor "${tensor.name}": invalid GPU readback.`
|
|
306
|
+
);
|
|
307
|
+
}
|
|
295
308
|
return null;
|
|
296
309
|
}
|
|
297
310
|
reportProgress?.(tensorData.byteLength, tensorData.byteLength);
|
|
@@ -301,6 +314,10 @@ function createNodeGpuTensorTransformer(options) {
|
|
|
301
314
|
outLayout: null,
|
|
302
315
|
};
|
|
303
316
|
} catch (error) {
|
|
317
|
+
if (requireGpuCast) {
|
|
318
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
319
|
+
throw new Error(`node convert: execution.useGpuCast failed for tensor "${tensor.name}": ${message}`);
|
|
320
|
+
}
|
|
304
321
|
if (!warnedFallback) {
|
|
305
322
|
warnedFallback = true;
|
|
306
323
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -1168,10 +1185,19 @@ export async function convertSafetensorsDirectory(options) {
|
|
|
1168
1185
|
let result = null;
|
|
1169
1186
|
try {
|
|
1170
1187
|
if (executionPlan.useGpuCast) {
|
|
1171
|
-
|
|
1188
|
+
let gpuRuntime;
|
|
1189
|
+
try {
|
|
1190
|
+
gpuRuntime = await loadNodeGpuCastRuntime();
|
|
1191
|
+
} catch (error) {
|
|
1192
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1193
|
+
throw new Error(
|
|
1194
|
+
`node convert: execution.useGpuCast requires a WebGPU-capable Node runtime. ${message}`
|
|
1195
|
+
);
|
|
1196
|
+
}
|
|
1172
1197
|
gpuTensorTransformer = createNodeGpuTensorTransformer({
|
|
1173
1198
|
runtime: gpuRuntime,
|
|
1174
1199
|
gpuCastMinTensorBytes: executionPlan.gpuCastMinTensorBytes,
|
|
1200
|
+
requireGpuCast: executionPlan.gpuCastRequestedExplicitly === true,
|
|
1175
1201
|
resolveTensorTargetQuant,
|
|
1176
1202
|
});
|
|
1177
1203
|
}
|
|
@@ -4,6 +4,7 @@ import type { PipelineStorageContext } from '../inference/pipelines/text/init.js
|
|
|
4
4
|
export interface ResolveNodeSourceRuntimeBundleOptions {
|
|
5
5
|
inputPath: string;
|
|
6
6
|
modelId?: string | null;
|
|
7
|
+
verifyHashes?: boolean;
|
|
7
8
|
}
|
|
8
9
|
|
|
9
10
|
export interface NodeSourceRuntimeBundle {
|
|
@@ -16,4 +17,3 @@ export interface NodeSourceRuntimeBundle {
|
|
|
16
17
|
export declare function resolveNodeSourceRuntimeBundle(
|
|
17
18
|
options: ResolveNodeSourceRuntimeBundleOptions
|
|
18
19
|
): Promise<NodeSourceRuntimeBundle | null>;
|
|
19
|
-
|
|
@@ -3,6 +3,7 @@ import path from 'node:path';
|
|
|
3
3
|
import {
|
|
4
4
|
HEADER_READ_SIZE,
|
|
5
5
|
createConverterConfig,
|
|
6
|
+
DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
|
|
6
7
|
} from '../config/schema/index.js';
|
|
7
8
|
import { extractArchitecture } from '../converter/core.js';
|
|
8
9
|
import {
|
|
@@ -15,6 +16,7 @@ import { parseTransformerModel } from '../converter/parsers/transformer.js';
|
|
|
15
16
|
import { parseGGUFHeader } from '../formats/gguf/types.js';
|
|
16
17
|
import { parseSafetensorsHeader } from '../formats/safetensors/types.js';
|
|
17
18
|
import { log } from '../debug/index.js';
|
|
19
|
+
import { computeHash } from '../storage/shard-manager.js';
|
|
18
20
|
import {
|
|
19
21
|
buildSourceRuntimeBundle,
|
|
20
22
|
createSourceStorageContext,
|
|
@@ -33,6 +35,13 @@ const SOURCE_RUNTIME_EXECUTION_OVERRIDE = {
|
|
|
33
35
|
steps: [],
|
|
34
36
|
};
|
|
35
37
|
|
|
38
|
+
function cloneExecutionV0SessionDefaults() {
|
|
39
|
+
if (typeof structuredClone === 'function') {
|
|
40
|
+
return structuredClone(DEFAULT_EXECUTION_V0_SESSION_DEFAULTS);
|
|
41
|
+
}
|
|
42
|
+
return JSON.parse(JSON.stringify(DEFAULT_EXECUTION_V0_SESSION_DEFAULTS));
|
|
43
|
+
}
|
|
44
|
+
|
|
36
45
|
function toArrayBuffer(value, label) {
|
|
37
46
|
if (value instanceof ArrayBuffer) {
|
|
38
47
|
return value;
|
|
@@ -105,6 +114,16 @@ async function readJson(filePath, label) {
|
|
|
105
114
|
}
|
|
106
115
|
}
|
|
107
116
|
|
|
117
|
+
async function readFileBytes(filePath, label) {
|
|
118
|
+
try {
|
|
119
|
+
const bytes = await fs.readFile(filePath);
|
|
120
|
+
return bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength);
|
|
121
|
+
} catch (error) {
|
|
122
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
123
|
+
throw new Error(`Failed to read ${label} "${filePath}": ${message}`);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
108
127
|
async function readRange(filePath, offset, length) {
|
|
109
128
|
if (!Number.isFinite(offset) || !Number.isFinite(length) || length <= 0) {
|
|
110
129
|
return new ArrayBuffer(0);
|
|
@@ -206,6 +225,37 @@ async function parseSafetensorsInput(inputDir) {
|
|
|
206
225
|
const stats = await getPathStats(sourcePath, `source shard (${sourcePath})`);
|
|
207
226
|
sourceFiles.push({ path: sourcePath, size: Number(stats.size) });
|
|
208
227
|
}
|
|
228
|
+
const auxiliaryFiles = [
|
|
229
|
+
{ path: configPath, size: Number((await getPathStats(configPath, 'config.json')).size), kind: 'config' },
|
|
230
|
+
...(hasIndex
|
|
231
|
+
? [{
|
|
232
|
+
path: path.join(inputDir, 'model.safetensors.index.json'),
|
|
233
|
+
size: Number((await getPathStats(path.join(inputDir, 'model.safetensors.index.json'), 'model.safetensors.index.json')).size),
|
|
234
|
+
kind: 'safetensors_index',
|
|
235
|
+
}]
|
|
236
|
+
: []),
|
|
237
|
+
...(tokenizerJson
|
|
238
|
+
? [{
|
|
239
|
+
path: tokenizerJsonPath,
|
|
240
|
+
size: Number((await getPathStats(tokenizerJsonPath, 'tokenizer.json')).size),
|
|
241
|
+
kind: 'tokenizer_json',
|
|
242
|
+
}]
|
|
243
|
+
: []),
|
|
244
|
+
...(tokenizerConfig
|
|
245
|
+
? [{
|
|
246
|
+
path: tokenizerConfigPath,
|
|
247
|
+
size: Number((await getPathStats(tokenizerConfigPath, 'tokenizer_config.json')).size),
|
|
248
|
+
kind: 'tokenizer_config',
|
|
249
|
+
}]
|
|
250
|
+
: []),
|
|
251
|
+
...(hasTokenizerModel
|
|
252
|
+
? [{
|
|
253
|
+
path: tokenizerModelPath,
|
|
254
|
+
size: Number((await getPathStats(tokenizerModelPath, 'tokenizer.model')).size),
|
|
255
|
+
kind: 'tokenizer_model',
|
|
256
|
+
}]
|
|
257
|
+
: []),
|
|
258
|
+
];
|
|
209
259
|
|
|
210
260
|
return {
|
|
211
261
|
sourceKind: 'safetensors',
|
|
@@ -220,8 +270,10 @@ async function parseSafetensorsInput(inputDir) {
|
|
|
220
270
|
tokenizerConfig,
|
|
221
271
|
tokenizerModelName: hasTokenizerModel ? 'tokenizer.model' : null,
|
|
222
272
|
tokenizerJsonPath: tokenizerJsonPath,
|
|
273
|
+
tokenizerConfigPath: tokenizerConfigPath,
|
|
223
274
|
tokenizerModelPath: hasTokenizerModel ? tokenizerModelPath : null,
|
|
224
275
|
sourceFiles,
|
|
276
|
+
auxiliaryFiles,
|
|
225
277
|
};
|
|
226
278
|
}
|
|
227
279
|
|
|
@@ -283,8 +335,10 @@ async function parseGgufInput(ggufPath) {
|
|
|
283
335
|
tokenizerConfig: null,
|
|
284
336
|
tokenizerModelName: null,
|
|
285
337
|
tokenizerJsonPath: null,
|
|
338
|
+
tokenizerConfigPath: null,
|
|
286
339
|
tokenizerModelPath: null,
|
|
287
340
|
sourceFiles: [{ path: ggufPath, size: fileSize }],
|
|
341
|
+
auxiliaryFiles: [],
|
|
288
342
|
};
|
|
289
343
|
}
|
|
290
344
|
|
|
@@ -357,11 +411,29 @@ function buildNodeFileReaders() {
|
|
|
357
411
|
};
|
|
358
412
|
}
|
|
359
413
|
|
|
414
|
+
async function addHashesToFileEntries(entries, hashAlgorithm) {
|
|
415
|
+
const normalized = [];
|
|
416
|
+
for (const entry of Array.isArray(entries) ? entries : []) {
|
|
417
|
+
const filePath = normalizePath(entry?.path);
|
|
418
|
+
if (!filePath) continue;
|
|
419
|
+
const bytes = await readFileBytes(filePath, `source asset (${filePath})`);
|
|
420
|
+
normalized.push({
|
|
421
|
+
...entry,
|
|
422
|
+
path: filePath,
|
|
423
|
+
size: Number.isFinite(entry?.size) ? Math.max(0, Math.floor(Number(entry.size))) : bytes.byteLength,
|
|
424
|
+
hash: await computeHash(new Uint8Array(bytes), hashAlgorithm),
|
|
425
|
+
hashAlgorithm,
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
return normalized;
|
|
429
|
+
}
|
|
430
|
+
|
|
360
431
|
export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
361
432
|
const inputPath = normalizePath(options.inputPath);
|
|
362
433
|
if (!inputPath) {
|
|
363
434
|
throw new Error('node source runtime: inputPath is required.');
|
|
364
435
|
}
|
|
436
|
+
const verifyHashes = options.verifyHashes === true;
|
|
365
437
|
const resolvedInputPath = path.resolve(inputPath);
|
|
366
438
|
const stats = await getPathStats(resolvedInputPath, 'inputPath');
|
|
367
439
|
|
|
@@ -405,6 +477,7 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
405
477
|
modelBaseId: options.modelId || null,
|
|
406
478
|
},
|
|
407
479
|
inference: {
|
|
480
|
+
sessionDefaults: cloneExecutionV0SessionDefaults(),
|
|
408
481
|
execution: SOURCE_RUNTIME_EXECUTION_OVERRIDE,
|
|
409
482
|
},
|
|
410
483
|
});
|
|
@@ -425,22 +498,30 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
425
498
|
parsed.sourceKind,
|
|
426
499
|
parsed.sourcePathForModelId
|
|
427
500
|
);
|
|
501
|
+
const hashAlgorithm = converterConfig.manifest.hashAlgorithm;
|
|
502
|
+
const sourceFiles = await addHashesToFileEntries(parsed.sourceFiles, hashAlgorithm);
|
|
503
|
+
const auxiliaryFiles = await addHashesToFileEntries(parsed.auxiliaryFiles, hashAlgorithm);
|
|
428
504
|
const { manifest, shardSources } = await buildSourceRuntimeBundle({
|
|
429
505
|
modelId,
|
|
430
506
|
modelName: modelId,
|
|
431
507
|
modelType: plan.modelType,
|
|
508
|
+
sourceKind: parsed.sourceKind,
|
|
432
509
|
architecture: parsed.architecture,
|
|
433
510
|
architectureHint: parsed.architectureHint,
|
|
434
511
|
rawConfig: parsed.config,
|
|
435
512
|
inference: plan.manifestInference,
|
|
436
513
|
tensors: parsed.tensors,
|
|
437
|
-
sourceFiles
|
|
514
|
+
sourceFiles,
|
|
515
|
+
auxiliaryFiles,
|
|
438
516
|
sourceQuantization: parsed.sourceQuantization,
|
|
439
517
|
quantizationInfo: plan.quantizationInfo,
|
|
440
|
-
hashAlgorithm
|
|
518
|
+
hashAlgorithm,
|
|
441
519
|
tokenizerJson: parsed.tokenizerJson,
|
|
442
520
|
tokenizerConfig: parsed.tokenizerConfig,
|
|
443
521
|
tokenizerModelName: parsed.tokenizerModelName,
|
|
522
|
+
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
523
|
+
tokenizerConfigPath: parsed.tokenizerConfigPath,
|
|
524
|
+
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
444
525
|
});
|
|
445
526
|
|
|
446
527
|
const readers = buildNodeFileReaders();
|
|
@@ -452,7 +533,7 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
452
533
|
readBinary: readers.readBinary,
|
|
453
534
|
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
454
535
|
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
455
|
-
verifyHashes
|
|
536
|
+
verifyHashes,
|
|
456
537
|
});
|
|
457
538
|
|
|
458
539
|
log.info(
|
|
@@ -2,21 +2,13 @@ import { existsSync, readFileSync, statSync } from 'node:fs';
|
|
|
2
2
|
import { dirname, isAbsolute, resolve } from 'node:path';
|
|
3
3
|
import { fileURLToPath, pathToFileURL } from 'node:url';
|
|
4
4
|
|
|
5
|
-
const DOPPLER_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '..', '..');
|
|
6
|
-
|
|
7
|
-
const DEFAULT_LOCAL_DOE_PROVIDER_PATH = resolve(
|
|
8
|
-
dirname(fileURLToPath(import.meta.url)),
|
|
9
|
-
'..',
|
|
10
|
-
'..',
|
|
11
|
-
'..',
|
|
12
|
-
'fawn',
|
|
13
|
-
'nursery',
|
|
14
|
-
'webgpu-doe',
|
|
15
|
-
);
|
|
16
5
|
const DEFAULT_DOE_PROVIDER_CREATE_ARGS = 'enable-dawn-features=allow_unsafe_apis';
|
|
6
|
+
const DOE_PROVIDER_CREATE_ARGS_ENV = 'FAWN_WEBGPU_CREATE_ARGS';
|
|
17
7
|
|
|
18
8
|
function hasNavigatorGpu() {
|
|
19
|
-
return typeof globalThis.navigator !== 'undefined'
|
|
9
|
+
return typeof globalThis.navigator !== 'undefined'
|
|
10
|
+
&& !!globalThis.navigator?.gpu
|
|
11
|
+
&& typeof globalThis.navigator.gpu.requestAdapter === 'function';
|
|
20
12
|
}
|
|
21
13
|
|
|
22
14
|
function hasGpuEnums() {
|
|
@@ -59,110 +51,37 @@ function resolveCandidateModuleSpecifier(candidate) {
|
|
|
59
51
|
}
|
|
60
52
|
|
|
61
53
|
function resolveDefaultWebgpuModuleSpecifiers() {
|
|
62
|
-
|
|
63
|
-
const localCandidates = [
|
|
64
|
-
resolve(process.cwd(), '..', 'fawn', 'nursery', 'webgpu-doe'),
|
|
65
|
-
DEFAULT_LOCAL_DOE_PROVIDER_PATH,
|
|
66
|
-
];
|
|
67
|
-
for (const localCandidate of localCandidates) {
|
|
68
|
-
const resolvedPath = resolveNodeModuleFilePath(localCandidate);
|
|
69
|
-
if (resolvedPath) {
|
|
70
|
-
specifiers.push(pathToFileURL(resolvedPath).href);
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
specifiers.push('@simulatte/webgpu-doe');
|
|
74
|
-
specifiers.push('webgpu');
|
|
75
|
-
return [...new Set(specifiers)];
|
|
54
|
+
return ['@simulatte/webgpu', 'webgpu'];
|
|
76
55
|
}
|
|
77
56
|
|
|
78
|
-
function
|
|
57
|
+
function resolveExplicitWebgpuModuleSpecifier() {
|
|
79
58
|
const fromEnv = process.env.DOPPLER_NODE_WEBGPU_MODULE;
|
|
80
59
|
if (typeof fromEnv === 'string' && fromEnv.trim().length > 0) {
|
|
81
|
-
return
|
|
82
|
-
explicit: true,
|
|
83
|
-
specifiers: [resolveCandidateModuleSpecifier(fromEnv.trim())],
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
return {
|
|
87
|
-
explicit: false,
|
|
88
|
-
specifiers: resolveDefaultWebgpuModuleSpecifiers(),
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
function resolveWorkspaceWebgpuProviderPath() {
|
|
93
|
-
const candidates = [
|
|
94
|
-
resolve(process.cwd(), 'node_modules', 'webgpu'),
|
|
95
|
-
resolve(DOPPLER_ROOT, 'node_modules', 'webgpu'),
|
|
96
|
-
];
|
|
97
|
-
for (const candidate of candidates) {
|
|
98
|
-
const resolvedPath = resolveNodeModuleFilePath(candidate);
|
|
99
|
-
if (resolvedPath) {
|
|
100
|
-
return resolvedPath;
|
|
101
|
-
}
|
|
60
|
+
return resolveCandidateModuleSpecifier(fromEnv.trim());
|
|
102
61
|
}
|
|
103
62
|
return null;
|
|
104
63
|
}
|
|
105
64
|
|
|
106
65
|
function isDoeWebgpuSpecifier(specifier) {
|
|
107
|
-
if (specifier === '@simulatte/webgpu
|
|
66
|
+
if (specifier === '@simulatte/webgpu') {
|
|
108
67
|
return true;
|
|
109
68
|
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
if (specifier.includes('/webgpu-doe/')) {
|
|
114
|
-
return true;
|
|
115
|
-
}
|
|
116
|
-
return specifier.includes('webgpu-doe') && specifier.startsWith('file://');
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
function resolveDoeProviderOverride(specifier) {
|
|
120
|
-
const explicitProvider = process.env.FAWN_WEBGPU_NODE_PROVIDER_MODULE;
|
|
121
|
-
if (typeof explicitProvider === 'string' && explicitProvider.trim().length > 0) {
|
|
122
|
-
return null;
|
|
123
|
-
}
|
|
124
|
-
if (!isDoeWebgpuSpecifier(specifier)) {
|
|
125
|
-
return null;
|
|
126
|
-
}
|
|
127
|
-
return resolveWorkspaceWebgpuProviderPath();
|
|
69
|
+
return typeof specifier === 'string'
|
|
70
|
+
&& specifier.startsWith('file://')
|
|
71
|
+
&& specifier.includes('@simulatte/webgpu');
|
|
128
72
|
}
|
|
129
73
|
|
|
130
74
|
async function importWithProviderOverride(specifier) {
|
|
131
|
-
const providerOverride = resolveDoeProviderOverride(specifier);
|
|
132
75
|
const shouldApplyCreateArgsDefault = isDoeWebgpuSpecifier(specifier)
|
|
133
|
-
&& !(typeof process.env
|
|
134
|
-
if (!
|
|
135
|
-
|
|
136
|
-
return import(specifier);
|
|
137
|
-
}
|
|
138
|
-
process.env.FAWN_WEBGPU_CREATE_ARGS = DEFAULT_DOE_PROVIDER_CREATE_ARGS;
|
|
139
|
-
try {
|
|
140
|
-
return await import(specifier);
|
|
141
|
-
} finally {
|
|
142
|
-
delete process.env.FAWN_WEBGPU_CREATE_ARGS;
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
const hadProvider = Object.prototype.hasOwnProperty.call(process.env, 'FAWN_WEBGPU_NODE_PROVIDER_MODULE');
|
|
146
|
-
const previousProvider = process.env.FAWN_WEBGPU_NODE_PROVIDER_MODULE;
|
|
147
|
-
const hadCreateArgs = Object.prototype.hasOwnProperty.call(process.env, 'FAWN_WEBGPU_CREATE_ARGS');
|
|
148
|
-
const previousCreateArgs = process.env.FAWN_WEBGPU_CREATE_ARGS;
|
|
149
|
-
process.env.FAWN_WEBGPU_NODE_PROVIDER_MODULE = providerOverride;
|
|
150
|
-
if (!hadCreateArgs) {
|
|
151
|
-
process.env.FAWN_WEBGPU_CREATE_ARGS = DEFAULT_DOE_PROVIDER_CREATE_ARGS;
|
|
76
|
+
&& !(typeof process.env[DOE_PROVIDER_CREATE_ARGS_ENV] === 'string' && process.env[DOE_PROVIDER_CREATE_ARGS_ENV].trim().length > 0);
|
|
77
|
+
if (!shouldApplyCreateArgsDefault) {
|
|
78
|
+
return import(specifier);
|
|
152
79
|
}
|
|
80
|
+
process.env[DOE_PROVIDER_CREATE_ARGS_ENV] = DEFAULT_DOE_PROVIDER_CREATE_ARGS;
|
|
153
81
|
try {
|
|
154
82
|
return await import(specifier);
|
|
155
83
|
} finally {
|
|
156
|
-
|
|
157
|
-
process.env.FAWN_WEBGPU_NODE_PROVIDER_MODULE = previousProvider;
|
|
158
|
-
} else {
|
|
159
|
-
delete process.env.FAWN_WEBGPU_NODE_PROVIDER_MODULE;
|
|
160
|
-
}
|
|
161
|
-
if (hadCreateArgs) {
|
|
162
|
-
process.env.FAWN_WEBGPU_CREATE_ARGS = previousCreateArgs;
|
|
163
|
-
} else {
|
|
164
|
-
delete process.env.FAWN_WEBGPU_CREATE_ARGS;
|
|
165
|
-
}
|
|
84
|
+
delete process.env[DOE_PROVIDER_CREATE_ARGS_ENV];
|
|
166
85
|
}
|
|
167
86
|
}
|
|
168
87
|
|
|
@@ -315,27 +234,33 @@ function installWebgpuFromModule(mod) {
|
|
|
315
234
|
}
|
|
316
235
|
|
|
317
236
|
export async function bootstrapNodeWebGPU() {
|
|
237
|
+
const explicitSpecifier = resolveExplicitWebgpuModuleSpecifier();
|
|
238
|
+
if (explicitSpecifier) {
|
|
239
|
+
try {
|
|
240
|
+
const mod = await importWithProviderOverride(explicitSpecifier);
|
|
241
|
+
if (installWebgpuFromModule(mod)) {
|
|
242
|
+
return { ok: true, provider: explicitSpecifier };
|
|
243
|
+
}
|
|
244
|
+
return { ok: false, provider: explicitSpecifier };
|
|
245
|
+
} catch {
|
|
246
|
+
return { ok: false, provider: explicitSpecifier };
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
318
250
|
if (hasNavigatorGpu() && hasGpuEnums()) {
|
|
319
251
|
return { ok: true, provider: 'pre-installed' };
|
|
320
252
|
}
|
|
321
253
|
|
|
322
|
-
const
|
|
323
|
-
for (const specifier of specifiers) {
|
|
254
|
+
for (const specifier of resolveDefaultWebgpuModuleSpecifiers()) {
|
|
324
255
|
let mod;
|
|
325
256
|
try {
|
|
326
257
|
mod = await importWithProviderOverride(specifier);
|
|
327
258
|
} catch {
|
|
328
|
-
if (explicit) {
|
|
329
|
-
return { ok: false, provider: null };
|
|
330
|
-
}
|
|
331
259
|
continue;
|
|
332
260
|
}
|
|
333
261
|
if (installWebgpuFromModule(mod)) {
|
|
334
262
|
return { ok: true, provider: specifier };
|
|
335
263
|
}
|
|
336
|
-
if (explicit) {
|
|
337
|
-
return { ok: false, provider: null };
|
|
338
|
-
}
|
|
339
264
|
}
|
|
340
265
|
|
|
341
266
|
return { ok: false, provider: null };
|
|
@@ -8,6 +8,10 @@ import { downloadModel } from '../storage/downloader.js';
|
|
|
8
8
|
import { isOPFSAvailable } from '../storage/quota.js';
|
|
9
9
|
import { parseManifest, getManifestUrl } from '../formats/rdrr/index.js';
|
|
10
10
|
import { log } from '../debug/index.js';
|
|
11
|
+
import {
|
|
12
|
+
resolveSourceArtifact,
|
|
13
|
+
verifyStoredSourceArtifact,
|
|
14
|
+
} from '../storage/source-artifact-store.js';
|
|
11
15
|
|
|
12
16
|
const MODULE = 'OPFSCache';
|
|
13
17
|
|
|
@@ -43,6 +47,7 @@ function hasSameShardSet(aManifest, bManifest) {
|
|
|
43
47
|
}
|
|
44
48
|
|
|
45
49
|
function buildManifestFingerprint(manifest) {
|
|
50
|
+
const sourceArtifactFingerprint = resolveSourceArtifact(manifest)?.fingerprint ?? null;
|
|
46
51
|
const inference = manifest?.inference ?? {};
|
|
47
52
|
const layerPattern = inference?.layerPattern ?? {};
|
|
48
53
|
const quantizationInfo = manifest?.quantizationInfo ?? {};
|
|
@@ -75,6 +80,7 @@ function buildManifestFingerprint(manifest) {
|
|
|
75
80
|
},
|
|
76
81
|
},
|
|
77
82
|
shards,
|
|
83
|
+
sourceArtifactFingerprint,
|
|
78
84
|
});
|
|
79
85
|
}
|
|
80
86
|
|
|
@@ -119,16 +125,27 @@ export async function ensureModelCached(modelId, modelBaseUrl) {
|
|
|
119
125
|
if (!cachedManifestText || !cachedManifest) {
|
|
120
126
|
log.warn(MODULE, `Cache miss: "${modelId}" has no readable manifest in OPFS; re-importing`);
|
|
121
127
|
} else {
|
|
128
|
+
const cachedSourceArtifact = resolveSourceArtifact(cachedManifest);
|
|
129
|
+
const sourceIntegrity = cachedSourceArtifact
|
|
130
|
+
? await verifyStoredSourceArtifact(cachedManifest, { checkHashes: false })
|
|
131
|
+
: null;
|
|
132
|
+
const sourceIntegrityValid = !sourceIntegrity || sourceIntegrity.valid;
|
|
133
|
+
if (sourceIntegrity && !sourceIntegrity.valid) {
|
|
134
|
+
log.warn(
|
|
135
|
+
MODULE,
|
|
136
|
+
`Cache stale: "${modelId}" direct-source assets are incomplete (${sourceIntegrity.missingFiles.join(', ')})`
|
|
137
|
+
);
|
|
138
|
+
}
|
|
122
139
|
const cachedFingerprint = buildManifestFingerprint(cachedManifest);
|
|
123
140
|
const remoteFingerprint = buildManifestFingerprint(remoteManifest);
|
|
124
|
-
if (cachedFingerprint === remoteFingerprint) {
|
|
141
|
+
if (sourceIntegrityValid && cachedFingerprint === remoteFingerprint) {
|
|
125
142
|
log.info(MODULE, `Cache hit: "${modelId}"`);
|
|
126
143
|
return { cached: true, fromCache: true, modelId, error: null };
|
|
127
144
|
}
|
|
128
145
|
|
|
129
146
|
const sameShards = hasSameShardSet(cachedManifest, remoteManifest);
|
|
130
147
|
const sameHashAlgorithm = (cachedManifest?.hashAlgorithm ?? null) === (remoteManifest?.hashAlgorithm ?? null);
|
|
131
|
-
if (sameShards && sameHashAlgorithm) {
|
|
148
|
+
if (sourceIntegrityValid && sameShards && sameHashAlgorithm) {
|
|
132
149
|
await openModelStore(modelId);
|
|
133
150
|
await saveManifest(remoteManifestText);
|
|
134
151
|
log.info(MODULE, `Cache manifest refreshed: "${modelId}" (shards unchanged)`);
|
|
@@ -138,8 +155,8 @@ export async function ensureModelCached(modelId, modelBaseUrl) {
|
|
|
138
155
|
}
|
|
139
156
|
} catch (error) {
|
|
140
157
|
const message = toErrorMessage(error);
|
|
141
|
-
log.warn(MODULE, `Cache validation
|
|
142
|
-
return { cached:
|
|
158
|
+
log.warn(MODULE, `Cache validation failed (${message}); refusing cached model "${modelId}"`);
|
|
159
|
+
return { cached: false, fromCache: false, modelId, error: message };
|
|
143
160
|
}
|
|
144
161
|
}
|
|
145
162
|
} catch (error) {
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
export interface RuntimeCompositionBridge {
|
|
2
|
+
getRuntimeConfig: () => Record<string, unknown> | null;
|
|
3
|
+
setRuntimeConfig: (runtimeConfig: Record<string, unknown> | null) => void;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export interface RuntimeInputCompositionHandlers {
|
|
7
|
+
loadRuntimeConfigFromRef?: (
|
|
8
|
+
ref: string,
|
|
9
|
+
options?: Record<string, unknown>
|
|
10
|
+
) => Promise<Record<string, unknown> | null>;
|
|
11
|
+
applyRuntimePreset?: (
|
|
12
|
+
runtimePreset: string,
|
|
13
|
+
options?: Record<string, unknown>
|
|
14
|
+
) => Promise<void>;
|
|
15
|
+
applyRuntimeConfigFromUrl?: (
|
|
16
|
+
runtimeConfigUrl: string,
|
|
17
|
+
options?: Record<string, unknown>
|
|
18
|
+
) => Promise<void>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface OrderedRuntimeInputs {
|
|
22
|
+
configChain?: string[] | null;
|
|
23
|
+
runtimePreset?: string | null;
|
|
24
|
+
runtimeConfigUrl?: string | null;
|
|
25
|
+
runtimeConfig?: Record<string, unknown> | null;
|
|
26
|
+
runtimeContractPatch?: Record<string, unknown> | null | (() => Record<string, unknown> | null);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export declare function resolveRuntimeFromConfig(
|
|
30
|
+
config: Record<string, unknown> | null | undefined
|
|
31
|
+
): Record<string, unknown> | null;
|
|
32
|
+
|
|
33
|
+
export declare function applyOrderedRuntimeInputs(
|
|
34
|
+
runtimeBridge: RuntimeCompositionBridge,
|
|
35
|
+
inputs?: OrderedRuntimeInputs,
|
|
36
|
+
handlers?: RuntimeInputCompositionHandlers,
|
|
37
|
+
options?: Record<string, unknown>
|
|
38
|
+
): Promise<void>;
|