@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
package/src/hotswap/runtime.js
CHANGED
|
@@ -6,18 +6,40 @@ function normalizeRolloutPolicy(policy) {
|
|
|
6
6
|
? policy.rollout
|
|
7
7
|
: {};
|
|
8
8
|
const rawMode = String(rollout.mode || 'shadow').trim().toLowerCase().replace(/_/g, '-');
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
if (rawMode !== 'default' && rawMode !== 'canary' && rawMode !== 'opt-in' && rawMode !== 'shadow') {
|
|
10
|
+
throw new Error(
|
|
11
|
+
`hotswap.rollout.mode must be one of default, canary, opt-in, shadow (received "${rollout.mode}")`
|
|
12
|
+
);
|
|
13
|
+
}
|
|
14
|
+
let canaryPercent = 0;
|
|
15
|
+
if (rollout.canaryPercent !== undefined && rollout.canaryPercent !== null) {
|
|
16
|
+
const parsedCanaryPercent = Number(rollout.canaryPercent);
|
|
17
|
+
if (!Number.isFinite(parsedCanaryPercent) || parsedCanaryPercent < 0 || parsedCanaryPercent > 100) {
|
|
18
|
+
throw new Error('hotswap.rollout.canaryPercent must be a number between 0 and 100 when provided.');
|
|
19
|
+
}
|
|
20
|
+
canaryPercent = parsedCanaryPercent;
|
|
21
|
+
}
|
|
22
|
+
if (rollout.cohortSalt !== undefined && rollout.cohortSalt !== null && typeof rollout.cohortSalt !== 'string') {
|
|
23
|
+
throw new Error('hotswap.rollout.cohortSalt must be a string when provided.');
|
|
24
|
+
}
|
|
15
25
|
const cohortSalt = String(rollout.cohortSalt || 'doppler-hotswap-v1').trim() || 'doppler-hotswap-v1';
|
|
26
|
+
if (rollout.optInAllowlist !== undefined && rollout.optInAllowlist !== null && !Array.isArray(rollout.optInAllowlist)) {
|
|
27
|
+
throw new Error('hotswap.rollout.optInAllowlist must be an array of strings when provided.');
|
|
28
|
+
}
|
|
16
29
|
const optInAllowlist = Array.isArray(rollout.optInAllowlist)
|
|
17
|
-
? rollout.optInAllowlist.map((entry) =>
|
|
30
|
+
? rollout.optInAllowlist.map((entry, index) => {
|
|
31
|
+
if (typeof entry !== 'string') {
|
|
32
|
+
throw new Error(`hotswap.rollout.optInAllowlist[${index}] must be a string.`);
|
|
33
|
+
}
|
|
34
|
+
const normalized = entry.trim();
|
|
35
|
+
if (!normalized) {
|
|
36
|
+
throw new Error(`hotswap.rollout.optInAllowlist[${index}] must not be empty.`);
|
|
37
|
+
}
|
|
38
|
+
return normalized;
|
|
39
|
+
})
|
|
18
40
|
: [];
|
|
19
41
|
return {
|
|
20
|
-
mode,
|
|
42
|
+
mode: rawMode,
|
|
21
43
|
canaryPercent,
|
|
22
44
|
cohortSalt,
|
|
23
45
|
optInAllowlist,
|
package/src/index-browser.d.ts
CHANGED
|
@@ -9,6 +9,20 @@ export {
|
|
|
9
9
|
export { MultiModelLoader } from './loader/multi-model-loader.js';
|
|
10
10
|
|
|
11
11
|
export { InferencePipeline, EmbeddingPipeline, createPipeline } from './generation/index.js';
|
|
12
|
+
export {
|
|
13
|
+
StructuredJsonHeadPipeline,
|
|
14
|
+
isStructuredJsonHeadModelType,
|
|
15
|
+
createStructuredJsonHeadPipeline,
|
|
16
|
+
DreamStructuredPipeline,
|
|
17
|
+
isDreamStructuredModelType,
|
|
18
|
+
createDreamStructuredPipeline,
|
|
19
|
+
} from './generation/index.js';
|
|
20
|
+
export {
|
|
21
|
+
EnergyRowHeadPipeline,
|
|
22
|
+
createEnergyRowHeadPipeline,
|
|
23
|
+
DreamEnergyHeadPipeline,
|
|
24
|
+
createDreamEnergyHeadPipeline,
|
|
25
|
+
} from './inference/pipelines/energy-head/row-head-pipeline.js';
|
|
12
26
|
export { KVCache } from './inference/kv-cache.js';
|
|
13
27
|
export { Tokenizer } from './inference/tokenizer.js';
|
|
14
28
|
export { SpeculativeDecoder } from './inference/speculative.js';
|
|
@@ -25,6 +39,22 @@ export {
|
|
|
25
39
|
mergeMultipleLogits,
|
|
26
40
|
} from './gpu/kernels/logit-merge.js';
|
|
27
41
|
|
|
42
|
+
export type { RDRRManifest, ShardInfo } from './formats/rdrr/index.js';
|
|
43
|
+
export type { TensorLocation, LoadProgress, LoadOptions, LoaderStats } from './loader/doppler-loader.js';
|
|
44
|
+
export type { AdapterSource } from './loader/multi-model-loader.js';
|
|
45
|
+
export type { ParsedModelConfig } from './generation/index.js';
|
|
46
|
+
export type { SamplingOptions } from './generation/index.js';
|
|
47
|
+
export type {
|
|
48
|
+
GenerateOptions,
|
|
49
|
+
GenerationResult,
|
|
50
|
+
KVCacheSnapshot,
|
|
51
|
+
LayerWeights,
|
|
52
|
+
ExpertWeights,
|
|
53
|
+
RouterWeights,
|
|
54
|
+
} from './generation/index.js';
|
|
55
|
+
export type { LoRAAdapter, LoRAModuleName } from './generation/index.js';
|
|
56
|
+
export type { ExpertNode, ExpertTask } from './inference/multi-model-network.js';
|
|
57
|
+
|
|
28
58
|
export {
|
|
29
59
|
ADAPTER_MANIFEST_SCHEMA,
|
|
30
60
|
validateManifest as validateAdapterManifest,
|
|
@@ -45,4 +75,18 @@ export {
|
|
|
45
75
|
createMemoryRegistry,
|
|
46
76
|
} from './adapters/index.js';
|
|
47
77
|
|
|
78
|
+
export type {
|
|
79
|
+
AdapterManifest,
|
|
80
|
+
AdapterMetadata,
|
|
81
|
+
AdapterTensorSpec,
|
|
82
|
+
LoRALoadOptions,
|
|
83
|
+
LoRAWeightsResult,
|
|
84
|
+
AdapterState,
|
|
85
|
+
EnableAdapterOptions,
|
|
86
|
+
AdapterStackOptions,
|
|
87
|
+
AdapterManagerEvents,
|
|
88
|
+
AdapterRegistryEntry,
|
|
89
|
+
AdapterQueryOptions,
|
|
90
|
+
} from './adapters/index.js';
|
|
91
|
+
|
|
48
92
|
export * from './tooling-exports.browser.js';
|
package/src/index-browser.js
CHANGED
|
@@ -11,6 +11,20 @@ export { MultiModelLoader } from './loader/multi-model-loader.js';
|
|
|
11
11
|
|
|
12
12
|
// Inference pipeline
|
|
13
13
|
export { InferencePipeline, EmbeddingPipeline, createPipeline } from './generation/index.js';
|
|
14
|
+
export {
|
|
15
|
+
StructuredJsonHeadPipeline,
|
|
16
|
+
isStructuredJsonHeadModelType,
|
|
17
|
+
createStructuredJsonHeadPipeline,
|
|
18
|
+
DreamStructuredPipeline,
|
|
19
|
+
isDreamStructuredModelType,
|
|
20
|
+
createDreamStructuredPipeline,
|
|
21
|
+
} from './generation/index.js';
|
|
22
|
+
export {
|
|
23
|
+
EnergyRowHeadPipeline,
|
|
24
|
+
createEnergyRowHeadPipeline,
|
|
25
|
+
DreamEnergyHeadPipeline,
|
|
26
|
+
createDreamEnergyHeadPipeline,
|
|
27
|
+
} from './inference/pipelines/energy-head/row-head-pipeline.js';
|
|
14
28
|
export { KVCache } from './inference/kv-cache.js';
|
|
15
29
|
export { Tokenizer } from './inference/tokenizer.js';
|
|
16
30
|
export { SpeculativeDecoder } from './inference/speculative.js';
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { getInferenceLayerPatternContractArtifact } from '../rules/rule-registry.js';
|
|
2
|
+
import { isPlainObject } from '../utils/plain-object.js';
|
|
3
|
+
import { validateBrowserSuiteMetrics } from '../config/schema/browser-suite-metrics.schema.js';
|
|
4
|
+
import { buildExecutionContractArtifact } from '../config/execution-contract-check.js';
|
|
5
|
+
import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
|
|
6
|
+
|
|
7
|
+
export function buildSuiteContractMetrics(suite, baseMetrics, manifest) {
|
|
8
|
+
const executionContractArtifact = buildExecutionContractArtifact(manifest);
|
|
9
|
+
const executionV0GraphContractArtifact = executionContractArtifact?.executionV0?.graph ?? null;
|
|
10
|
+
const layerPatternContractArtifact = getInferenceLayerPatternContractArtifact();
|
|
11
|
+
const requiredInferenceFieldsArtifact = manifest?.modelType === 'transformer'
|
|
12
|
+
&& isPlainObject(manifest?.inference?.attention)
|
|
13
|
+
? buildManifestRequiredInferenceFieldsArtifact(
|
|
14
|
+
manifest?.inference ?? null,
|
|
15
|
+
`${manifest?.modelId ?? 'unknown'}.inference`
|
|
16
|
+
)
|
|
17
|
+
: null;
|
|
18
|
+
return validateBrowserSuiteMetrics({
|
|
19
|
+
...baseMetrics,
|
|
20
|
+
schemaVersion: 1,
|
|
21
|
+
source: 'doppler',
|
|
22
|
+
suite,
|
|
23
|
+
...(executionContractArtifact ? { executionContractArtifact } : {}),
|
|
24
|
+
executionV0GraphContractArtifact,
|
|
25
|
+
layerPatternContractArtifact,
|
|
26
|
+
requiredInferenceFieldsArtifact,
|
|
27
|
+
});
|
|
28
|
+
}
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
import { getRuntimeConfig } from '../config/runtime.js';
|
|
2
|
+
import { computeSampleStats } from '../debug/stats.js';
|
|
3
|
+
import { initializeSuiteModel, resolveDeviceInfo } from './browser-harness-model-helpers.js';
|
|
4
|
+
import { buildSuiteContractMetrics } from './browser-harness-contract-helpers.js';
|
|
5
|
+
import { resolvePrompt } from './browser-harness-text-helpers.js';
|
|
6
|
+
import {
|
|
7
|
+
buildSuiteSummary,
|
|
8
|
+
normalizeCacheMode,
|
|
9
|
+
normalizeLoadMode,
|
|
10
|
+
safeStatsValue,
|
|
11
|
+
buildDiffusionPerformanceArtifact,
|
|
12
|
+
buildCanonicalTiming,
|
|
13
|
+
buildTimingDiagnostics,
|
|
14
|
+
} from './browser-harness-suite-helpers.js';
|
|
15
|
+
|
|
16
|
+
export async function runDiffusionSuite(options = {}) {
|
|
17
|
+
const startTime = performance.now();
|
|
18
|
+
const runtimeConfig = getRuntimeConfig();
|
|
19
|
+
const captureOutput = options.captureOutput === true;
|
|
20
|
+
const cacheMode = normalizeCacheMode(options.cacheMode);
|
|
21
|
+
const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
|
|
22
|
+
const benchConfig = runtimeConfig.shared?.benchmark?.run || {};
|
|
23
|
+
const warmupRuns = Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0));
|
|
24
|
+
const timedRuns = Math.max(1, Math.floor(benchConfig.timedRuns ?? 1));
|
|
25
|
+
|
|
26
|
+
const diffusionConfig = runtimeConfig.inference?.diffusion;
|
|
27
|
+
if (!diffusionConfig) {
|
|
28
|
+
throw new Error('runtime.inference.diffusion must be set for diffusion harness runs.');
|
|
29
|
+
}
|
|
30
|
+
const scheduler = diffusionConfig.scheduler;
|
|
31
|
+
const latent = diffusionConfig.latent;
|
|
32
|
+
const prompt = resolvePrompt(runtimeConfig);
|
|
33
|
+
const negativePrompt = diffusionConfig.negativePrompt ?? '';
|
|
34
|
+
|
|
35
|
+
const width = Math.floor(latent?.width);
|
|
36
|
+
const height = Math.floor(latent?.height);
|
|
37
|
+
const steps = Math.floor(scheduler?.numSteps);
|
|
38
|
+
const guidanceScale = scheduler?.guidanceScale;
|
|
39
|
+
|
|
40
|
+
if (!Number.isFinite(width) || width <= 0) {
|
|
41
|
+
throw new Error('runtime.inference.diffusion.latent.width must be set for diffusion harness runs.');
|
|
42
|
+
}
|
|
43
|
+
if (!Number.isFinite(height) || height <= 0) {
|
|
44
|
+
throw new Error('runtime.inference.diffusion.latent.height must be set for diffusion harness runs.');
|
|
45
|
+
}
|
|
46
|
+
if (!Number.isFinite(steps) || steps <= 0) {
|
|
47
|
+
throw new Error('runtime.inference.diffusion.scheduler.numSteps must be set for diffusion harness runs.');
|
|
48
|
+
}
|
|
49
|
+
if (!Number.isFinite(guidanceScale) || guidanceScale <= 0) {
|
|
50
|
+
throw new Error('runtime.inference.diffusion.scheduler.guidanceScale must be set for diffusion harness runs.');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const harness = await initializeSuiteModel(options);
|
|
54
|
+
const totalMs = [];
|
|
55
|
+
const prefillMs = [];
|
|
56
|
+
const denoiseMs = [];
|
|
57
|
+
const vaeMs = [];
|
|
58
|
+
const prefillTokens = [];
|
|
59
|
+
const decodeTokens = [];
|
|
60
|
+
const gpuTotalMs = [];
|
|
61
|
+
const gpuPrefillMs = [];
|
|
62
|
+
const gpuDenoiseMs = [];
|
|
63
|
+
const gpuVaeMs = [];
|
|
64
|
+
let output = null;
|
|
65
|
+
|
|
66
|
+
for (let i = 0; i < warmupRuns + timedRuns; i++) {
|
|
67
|
+
harness.pipeline.reset?.();
|
|
68
|
+
const result = await harness.pipeline.generate({
|
|
69
|
+
prompt,
|
|
70
|
+
negativePrompt,
|
|
71
|
+
steps,
|
|
72
|
+
guidanceScale,
|
|
73
|
+
width,
|
|
74
|
+
height,
|
|
75
|
+
});
|
|
76
|
+
if (captureOutput && i === warmupRuns + timedRuns - 1) {
|
|
77
|
+
output = result;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (i < warmupRuns) continue;
|
|
81
|
+
|
|
82
|
+
const stats = harness.pipeline.getStats?.() ?? {};
|
|
83
|
+
if (Number.isFinite(stats.totalTimeMs)) totalMs.push(stats.totalTimeMs);
|
|
84
|
+
if (Number.isFinite(stats.prefillTimeMs)) prefillMs.push(stats.prefillTimeMs);
|
|
85
|
+
if (Number.isFinite(stats.decodeTimeMs)) denoiseMs.push(stats.decodeTimeMs);
|
|
86
|
+
if (Number.isFinite(stats.vaeTimeMs)) vaeMs.push(stats.vaeTimeMs);
|
|
87
|
+
if (Number.isFinite(stats.prefillTokens)) prefillTokens.push(stats.prefillTokens);
|
|
88
|
+
if (Number.isFinite(stats.decodeTokens)) decodeTokens.push(stats.decodeTokens);
|
|
89
|
+
|
|
90
|
+
const gpu = stats.gpu ?? null;
|
|
91
|
+
if (gpu?.available) {
|
|
92
|
+
if (Number.isFinite(gpu.totalMs)) gpuTotalMs.push(gpu.totalMs);
|
|
93
|
+
if (Number.isFinite(gpu.prefillMs)) gpuPrefillMs.push(gpu.prefillMs);
|
|
94
|
+
if (Number.isFinite(gpu.denoiseMs)) gpuDenoiseMs.push(gpu.denoiseMs);
|
|
95
|
+
if (Number.isFinite(gpu.vaeMs)) gpuVaeMs.push(gpu.vaeMs);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
|
|
100
|
+
? harness.pipeline.getMemoryStats()
|
|
101
|
+
: null;
|
|
102
|
+
|
|
103
|
+
if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
|
|
104
|
+
await harness.pipeline.unload();
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const results = [
|
|
108
|
+
{
|
|
109
|
+
name: 'diffusion',
|
|
110
|
+
passed: totalMs.length > 0,
|
|
111
|
+
duration: totalMs.reduce((sum, value) => sum + value, 0),
|
|
112
|
+
error: totalMs.length > 0 ? undefined : 'No diffusion runs completed',
|
|
113
|
+
},
|
|
114
|
+
];
|
|
115
|
+
|
|
116
|
+
const summary = buildSuiteSummary('diffusion', results, startTime);
|
|
117
|
+
const cpuStats = {
|
|
118
|
+
totalMs: computeSampleStats(totalMs),
|
|
119
|
+
prefillMs: computeSampleStats(prefillMs),
|
|
120
|
+
denoiseMs: computeSampleStats(denoiseMs),
|
|
121
|
+
vaeMs: computeSampleStats(vaeMs),
|
|
122
|
+
};
|
|
123
|
+
const gpuStats = gpuTotalMs.length > 0
|
|
124
|
+
? {
|
|
125
|
+
available: true,
|
|
126
|
+
totalMs: computeSampleStats(gpuTotalMs),
|
|
127
|
+
prefillMs: computeSampleStats(gpuPrefillMs),
|
|
128
|
+
denoiseMs: computeSampleStats(gpuDenoiseMs),
|
|
129
|
+
vaeMs: computeSampleStats(gpuVaeMs),
|
|
130
|
+
}
|
|
131
|
+
: { available: false };
|
|
132
|
+
|
|
133
|
+
const avgPrefillTokens = prefillTokens.length
|
|
134
|
+
? Math.round(prefillTokens.reduce((a, b) => a + b, 0) / prefillTokens.length)
|
|
135
|
+
: 0;
|
|
136
|
+
const avgDecodeTokens = decodeTokens.length
|
|
137
|
+
? Math.round(decodeTokens.reduce((a, b) => a + b, 0) / decodeTokens.length)
|
|
138
|
+
: 0;
|
|
139
|
+
const prefillMsMedian = safeStatsValue(cpuStats.prefillMs?.median);
|
|
140
|
+
const denoiseMsMedian = safeStatsValue(cpuStats.denoiseMs?.median);
|
|
141
|
+
const totalMsMedian = safeStatsValue(cpuStats.totalMs?.median);
|
|
142
|
+
const diffusionPerformanceArtifact = buildDiffusionPerformanceArtifact({
|
|
143
|
+
warmupRuns,
|
|
144
|
+
timedRuns,
|
|
145
|
+
width,
|
|
146
|
+
height,
|
|
147
|
+
steps,
|
|
148
|
+
guidanceScale,
|
|
149
|
+
avgPrefillTokens,
|
|
150
|
+
avgDecodeTokens,
|
|
151
|
+
cpuStats,
|
|
152
|
+
gpuStats,
|
|
153
|
+
});
|
|
154
|
+
const timing = buildCanonicalTiming({
|
|
155
|
+
modelLoadMs: 0,
|
|
156
|
+
firstTokenMs: null,
|
|
157
|
+
firstResponseMs: null,
|
|
158
|
+
prefillMs: prefillMsMedian,
|
|
159
|
+
decodeMs: denoiseMsMedian,
|
|
160
|
+
totalRunMs: totalMsMedian,
|
|
161
|
+
prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
|
|
162
|
+
decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
|
|
163
|
+
cacheMode,
|
|
164
|
+
loadMode,
|
|
165
|
+
});
|
|
166
|
+
const timingDiagnostics = buildTimingDiagnostics(timing, {
|
|
167
|
+
source: 'doppler',
|
|
168
|
+
prefillSemantics: 'internal_prefill_phase',
|
|
169
|
+
});
|
|
170
|
+
const metricsWithContracts = buildSuiteContractMetrics(
|
|
171
|
+
'diffusion',
|
|
172
|
+
{
|
|
173
|
+
warmupRuns,
|
|
174
|
+
timedRuns,
|
|
175
|
+
width,
|
|
176
|
+
height,
|
|
177
|
+
steps,
|
|
178
|
+
guidanceScale,
|
|
179
|
+
prompt,
|
|
180
|
+
avgPrefillTokens,
|
|
181
|
+
avgDecodeTokens,
|
|
182
|
+
latency: {
|
|
183
|
+
totalMs: cpuStats.totalMs,
|
|
184
|
+
prefillMs: cpuStats.prefillMs,
|
|
185
|
+
denoiseMs: cpuStats.denoiseMs,
|
|
186
|
+
vaeMs: cpuStats.vaeMs,
|
|
187
|
+
},
|
|
188
|
+
throughput: {
|
|
189
|
+
prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
|
|
190
|
+
decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
|
|
191
|
+
decodeStepsPerSec: diffusionPerformanceArtifact.throughput.decodeStepsPerSec,
|
|
192
|
+
},
|
|
193
|
+
cpu: cpuStats,
|
|
194
|
+
gpu: gpuStats,
|
|
195
|
+
performanceArtifact: diffusionPerformanceArtifact,
|
|
196
|
+
},
|
|
197
|
+
harness.manifest
|
|
198
|
+
);
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
...summary,
|
|
202
|
+
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
203
|
+
cacheMode,
|
|
204
|
+
loadMode,
|
|
205
|
+
env: {
|
|
206
|
+
library: 'doppler',
|
|
207
|
+
runtime: 'browser',
|
|
208
|
+
device: 'webgpu',
|
|
209
|
+
browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
|
|
210
|
+
browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
|
|
211
|
+
browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
|
|
212
|
+
browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
|
|
213
|
+
},
|
|
214
|
+
timing,
|
|
215
|
+
timingDiagnostics,
|
|
216
|
+
output,
|
|
217
|
+
metrics: metricsWithContracts,
|
|
218
|
+
memoryStats,
|
|
219
|
+
deviceInfo: resolveDeviceInfo(),
|
|
220
|
+
pipeline: options.keepPipeline ? harness.pipeline : null,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
export async function runEnergySuite(options = {}) {
|
|
225
|
+
const startTime = performance.now();
|
|
226
|
+
const harness = await initializeSuiteModel(options);
|
|
227
|
+
if (harness.manifest?.modelType !== 'energy') {
|
|
228
|
+
throw new Error('Energy suite requires an energy model manifest.');
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const result = await harness.pipeline.generate();
|
|
232
|
+
const stats = harness.pipeline.getStats?.() ?? {};
|
|
233
|
+
|
|
234
|
+
const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
|
|
235
|
+
? harness.pipeline.getMemoryStats()
|
|
236
|
+
: null;
|
|
237
|
+
|
|
238
|
+
if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
|
|
239
|
+
await harness.pipeline.unload();
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
const results = [
|
|
243
|
+
{
|
|
244
|
+
name: 'energy',
|
|
245
|
+
passed: Number.isFinite(result.energy ?? NaN),
|
|
246
|
+
duration: result.totalTimeMs ?? Math.max(0, performance.now() - startTime),
|
|
247
|
+
error: Number.isFinite(result.energy ?? NaN) ? undefined : 'Energy did not converge',
|
|
248
|
+
},
|
|
249
|
+
];
|
|
250
|
+
|
|
251
|
+
const summary = buildSuiteSummary('energy', results, startTime);
|
|
252
|
+
return {
|
|
253
|
+
...summary,
|
|
254
|
+
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
255
|
+
metrics: {
|
|
256
|
+
steps: result.steps,
|
|
257
|
+
energy: result.energy ?? null,
|
|
258
|
+
dtype: result.dtype,
|
|
259
|
+
shape: result.shape,
|
|
260
|
+
totalTimeMs: result.totalTimeMs ?? null,
|
|
261
|
+
energyHistory: result.energyHistory ?? [],
|
|
262
|
+
stateStats: result.stateStats ?? null,
|
|
263
|
+
readbackCount: stats.readbackCount ?? null,
|
|
264
|
+
},
|
|
265
|
+
memoryStats,
|
|
266
|
+
deviceInfo: resolveDeviceInfo(),
|
|
267
|
+
pipeline: options.keepPipeline ? harness.pipeline : null,
|
|
268
|
+
};
|
|
269
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export declare function resolveDeviceInfo(): Record<string, unknown> | null;
|
|
2
|
+
export declare function resolveKernelPathForModel(options?: Record<string, unknown>): Promise<{
|
|
3
|
+
modelId: string | null;
|
|
4
|
+
kernelPath: unknown;
|
|
5
|
+
source: string | null;
|
|
6
|
+
} | null>;
|
|
7
|
+
export declare function initializeInferenceFromStorage(
|
|
8
|
+
modelId: string,
|
|
9
|
+
options?: Record<string, unknown>
|
|
10
|
+
): Promise<Record<string, unknown>>;
|
|
11
|
+
export declare function initializeInferenceFromSourcePath(
|
|
12
|
+
sourcePath: string,
|
|
13
|
+
options?: Record<string, unknown>
|
|
14
|
+
): Promise<Record<string, unknown>>;
|
|
15
|
+
export declare function resolveHarnessOverride(options?: Record<string, unknown>): Promise<Record<string, unknown>>;
|
|
16
|
+
export declare function initializeSuiteModel(options?: Record<string, unknown>): Promise<Record<string, unknown>>;
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import { initializeInference } from './test-harness.js';
|
|
2
|
+
import { setRuntimeConfig } from '../config/runtime.js';
|
|
3
|
+
import { initDevice, getKernelCapabilities, getDevice } from '../gpu/device.js';
|
|
4
|
+
import { createPipeline } from './pipelines/text.js';
|
|
5
|
+
import { parseModelConfigFromManifest } from './pipelines/text/config.js';
|
|
6
|
+
import { resolveKernelPathState, activateKernelPathState } from './pipelines/text/model-load.js';
|
|
7
|
+
import { openModelStore, loadManifestFromStore } from '../storage/shard-manager.js';
|
|
8
|
+
import { parseManifest } from '../formats/rdrr/index.js';
|
|
9
|
+
import { resolveRuntime } from './browser-harness-runtime-helpers.js';
|
|
10
|
+
import { normalizeLoadMode } from './browser-harness-suite-helpers.js';
|
|
11
|
+
import { buildSourceArtifactFingerprint, createStoredSourceArtifactContext } from '../storage/source-artifact-store.js';
|
|
12
|
+
|
|
13
|
+
const NODE_SOURCE_RUNTIME_MODULE_PATH = '../tooling/node-source-runtime.js';
|
|
14
|
+
|
|
15
|
+
function isNodeRuntime() {
|
|
16
|
+
return typeof process !== 'undefined' && !!process.versions?.node;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function resolveSourceVerifyHashes(options = {}) {
|
|
20
|
+
const explicit = options?.runtime?.runtimeConfig?.loading?.shardCache?.verifyHashes;
|
|
21
|
+
if (explicit == null) {
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
return explicit === true;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function resolveDeviceInfo() {
|
|
28
|
+
try {
|
|
29
|
+
return getKernelCapabilities();
|
|
30
|
+
} catch {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export async function resolveKernelPathForModel(options = {}) {
|
|
36
|
+
const runtimeConfig = options.runtime?.runtimeConfig ?? null;
|
|
37
|
+
let manifest = null;
|
|
38
|
+
let manifestModelId = options.modelId || null;
|
|
39
|
+
|
|
40
|
+
if (options.modelId) {
|
|
41
|
+
await openModelStore(options.modelId);
|
|
42
|
+
const manifestText = await loadManifestFromStore();
|
|
43
|
+
if (manifestText) {
|
|
44
|
+
manifest = parseManifest(manifestText);
|
|
45
|
+
manifestModelId = manifest.modelId ?? options.modelId;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (!manifest) return null;
|
|
50
|
+
|
|
51
|
+
const modelConfig = parseModelConfigFromManifest(manifest, runtimeConfig);
|
|
52
|
+
const kernelPathState = resolveKernelPathState({
|
|
53
|
+
manifest,
|
|
54
|
+
runtimeConfig,
|
|
55
|
+
modelConfig,
|
|
56
|
+
});
|
|
57
|
+
activateKernelPathState(kernelPathState);
|
|
58
|
+
return {
|
|
59
|
+
modelId: manifestModelId,
|
|
60
|
+
kernelPath: kernelPathState.resolvedKernelPath,
|
|
61
|
+
source: kernelPathState.kernelPathSource,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export async function initializeInferenceFromStorage(modelId, options = {}) {
|
|
66
|
+
const { onProgress } = options;
|
|
67
|
+
if (!modelId) {
|
|
68
|
+
throw new Error('modelId is required');
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (options.runtime?.runtimeConfig) {
|
|
72
|
+
setRuntimeConfig(options.runtime.runtimeConfig);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
onProgress?.('storage', 0.05, 'Opening model store...');
|
|
76
|
+
await openModelStore(modelId);
|
|
77
|
+
|
|
78
|
+
onProgress?.('manifest', 0.1, 'Loading manifest...');
|
|
79
|
+
const manifestText = await loadManifestFromStore();
|
|
80
|
+
if (!manifestText) {
|
|
81
|
+
throw new Error('Manifest not found in storage');
|
|
82
|
+
}
|
|
83
|
+
const manifest = parseManifest(manifestText);
|
|
84
|
+
|
|
85
|
+
onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
|
|
86
|
+
await initDevice();
|
|
87
|
+
const device = getDevice();
|
|
88
|
+
const capabilities = getKernelCapabilities();
|
|
89
|
+
|
|
90
|
+
onProgress?.('pipeline', 0.3, 'Creating pipeline...');
|
|
91
|
+
const storage = buildSourceArtifactFingerprint(manifest)
|
|
92
|
+
? createStoredSourceArtifactContext(manifest, { verifyHashes: true })
|
|
93
|
+
: null;
|
|
94
|
+
const pipeline = await createPipeline(manifest, {
|
|
95
|
+
gpu: { device },
|
|
96
|
+
runtime: options.runtime,
|
|
97
|
+
...(storage ? { storage } : {}),
|
|
98
|
+
onProgress,
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
return { pipeline, manifest, capabilities };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export async function initializeInferenceFromSourcePath(sourcePath, options = {}) {
|
|
105
|
+
const { onProgress } = options;
|
|
106
|
+
if (!sourcePath || typeof sourcePath !== 'string') {
|
|
107
|
+
throw new Error('modelUrl is required for loadMode=memory.');
|
|
108
|
+
}
|
|
109
|
+
if (!isNodeRuntime()) {
|
|
110
|
+
throw new Error('loadMode=memory source runtime is currently supported on Node only.');
|
|
111
|
+
}
|
|
112
|
+
if (/^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//.test(sourcePath)) {
|
|
113
|
+
throw new Error(
|
|
114
|
+
'loadMode=memory expects a local filesystem path (Safetensors directory or .gguf file), not an URL.'
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (options.runtime?.runtimeConfig) {
|
|
119
|
+
setRuntimeConfig(options.runtime.runtimeConfig);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
onProgress?.('source', 0.05, 'Preparing source runtime bundle...');
|
|
123
|
+
const { resolveNodeSourceRuntimeBundle } = await import(NODE_SOURCE_RUNTIME_MODULE_PATH);
|
|
124
|
+
const sourceBundle = await resolveNodeSourceRuntimeBundle({
|
|
125
|
+
inputPath: sourcePath,
|
|
126
|
+
modelId: options.modelId || null,
|
|
127
|
+
verifyHashes: resolveSourceVerifyHashes(options),
|
|
128
|
+
});
|
|
129
|
+
if (!sourceBundle) {
|
|
130
|
+
throw new Error(
|
|
131
|
+
`No source-runtime model detected at "${sourcePath}". ` +
|
|
132
|
+
'Expected a Safetensors directory or a .gguf file path.'
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
|
|
137
|
+
await initDevice();
|
|
138
|
+
const device = getDevice();
|
|
139
|
+
const capabilities = getKernelCapabilities();
|
|
140
|
+
|
|
141
|
+
onProgress?.('pipeline', 0.3, 'Creating pipeline...');
|
|
142
|
+
const pipeline = await createPipeline(sourceBundle.manifest, {
|
|
143
|
+
gpu: { device },
|
|
144
|
+
runtime: options.runtime,
|
|
145
|
+
storage: sourceBundle.storageContext,
|
|
146
|
+
onProgress,
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
pipeline,
|
|
151
|
+
manifest: sourceBundle.manifest,
|
|
152
|
+
capabilities,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export async function resolveHarnessOverride(options = {}) {
|
|
157
|
+
const input = typeof options.harnessOverride === 'function'
|
|
158
|
+
? await options.harnessOverride(options)
|
|
159
|
+
: options.harnessOverride;
|
|
160
|
+
|
|
161
|
+
if (!input || typeof input !== 'object') {
|
|
162
|
+
throw new Error('harnessOverride must resolve to an object.');
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (!input.pipeline || typeof input.pipeline.generate !== 'function') {
|
|
166
|
+
throw new Error('harnessOverride.pipeline.generate(request) is required.');
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const manifest = input.manifest && typeof input.manifest === 'object'
|
|
170
|
+
? input.manifest
|
|
171
|
+
: {
|
|
172
|
+
modelId: options.modelId || 'diffusion-harness-override',
|
|
173
|
+
modelType: 'diffusion',
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
const modelLoadMs = Number.isFinite(input.modelLoadMs)
|
|
177
|
+
? Math.max(0, input.modelLoadMs)
|
|
178
|
+
: 0;
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
...input,
|
|
182
|
+
manifest,
|
|
183
|
+
modelLoadMs,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
export async function initializeSuiteModel(options = {}) {
|
|
188
|
+
if (options.harnessOverride) {
|
|
189
|
+
if (options.runtime?.runtimeConfig) {
|
|
190
|
+
setRuntimeConfig(options.runtime.runtimeConfig);
|
|
191
|
+
}
|
|
192
|
+
return resolveHarnessOverride(options);
|
|
193
|
+
}
|
|
194
|
+
const loadStart = performance.now();
|
|
195
|
+
const runtime = resolveRuntime(options);
|
|
196
|
+
const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
|
|
197
|
+
let harness;
|
|
198
|
+
if (loadMode === 'memory') {
|
|
199
|
+
if (!options.modelUrl) {
|
|
200
|
+
throw new Error('loadMode=memory requires modelUrl to be a local model path.');
|
|
201
|
+
}
|
|
202
|
+
harness = await initializeInferenceFromSourcePath(options.modelUrl, { ...options, runtime });
|
|
203
|
+
} else if (options.modelId && !options.modelUrl) {
|
|
204
|
+
harness = await initializeInferenceFromStorage(options.modelId, { ...options, runtime });
|
|
205
|
+
} else {
|
|
206
|
+
if (!options.modelUrl) {
|
|
207
|
+
throw new Error('modelUrl is required for this suite');
|
|
208
|
+
}
|
|
209
|
+
harness = await initializeInference(options.modelUrl, {
|
|
210
|
+
runtime,
|
|
211
|
+
onProgress: options.onProgress,
|
|
212
|
+
log: options.log,
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
const modelLoadMs = Math.max(0, performance.now() - loadStart);
|
|
216
|
+
return { ...harness, modelLoadMs };
|
|
217
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export declare function collectTrainingArtifactsFromSuiteResult(
|
|
2
|
+
suiteResult: Record<string, unknown>
|
|
3
|
+
): {
|
|
4
|
+
ulArtifacts: Array<Record<string, unknown>>;
|
|
5
|
+
distillArtifacts: Array<Record<string, unknown>>;
|
|
6
|
+
checkpointResumeTimeline: Array<Record<string, unknown>>;
|
|
7
|
+
};
|