@simulatte/doppler 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BRANDING.md +14 -0
- package/LICENSE +201 -0
- package/NOTICE +5 -0
- package/README.md +85 -0
- package/SECURITY.md +19 -0
- package/package.json +144 -0
- package/src/adapters/adapter-manager.d.ts +200 -0
- package/src/adapters/adapter-manager.js +509 -0
- package/src/adapters/adapter-manifest.d.ts +290 -0
- package/src/adapters/adapter-manifest.js +320 -0
- package/src/adapters/adapter-registry.d.ts +192 -0
- package/src/adapters/adapter-registry.js +466 -0
- package/src/adapters/index.d.ts +89 -0
- package/src/adapters/index.js +42 -0
- package/src/adapters/lora-loader.d.ts +105 -0
- package/src/adapters/lora-loader.js +397 -0
- package/src/bootstrap.d.ts +1 -0
- package/src/bootstrap.js +30 -0
- package/src/bridge/extension/background.d.ts +14 -0
- package/src/bridge/extension/background.js +168 -0
- package/src/bridge/extension/manifest.json +34 -0
- package/src/bridge/extension-client.d.ts +109 -0
- package/src/bridge/extension-client.js +369 -0
- package/src/bridge/index.d.ts +68 -0
- package/src/bridge/index.js +51 -0
- package/src/bridge/protocol.d.ts +96 -0
- package/src/bridge/protocol.js +130 -0
- package/src/browser/browser-converter.d.ts +71 -0
- package/src/browser/browser-converter.js +947 -0
- package/src/browser/file-picker.d.ts +63 -0
- package/src/browser/file-picker.js +275 -0
- package/src/browser/gguf-importer.d.ts +136 -0
- package/src/browser/gguf-importer.js +532 -0
- package/src/browser/gguf-parser-browser.d.ts +14 -0
- package/src/browser/gguf-parser-browser.js +17 -0
- package/src/browser/quantization.d.ts +69 -0
- package/src/browser/quantization.js +328 -0
- package/src/browser/safetensors-parser-browser.d.ts +193 -0
- package/src/browser/safetensors-parser-browser.js +264 -0
- package/src/browser/shard-io-browser.d.ts +57 -0
- package/src/browser/shard-io-browser.js +89 -0
- package/src/browser/tensor-source-download.d.ts +27 -0
- package/src/browser/tensor-source-download.js +239 -0
- package/src/browser/tensor-source-file.d.ts +26 -0
- package/src/browser/tensor-source-file.js +53 -0
- package/src/browser/tensor-source-http.d.ts +28 -0
- package/src/browser/tensor-source-http.js +126 -0
- package/src/client/doppler-provider/generation.d.ts +25 -0
- package/src/client/doppler-provider/generation.js +114 -0
- package/src/client/doppler-provider/index.d.ts +2 -0
- package/src/client/doppler-provider/index.js +3 -0
- package/src/client/doppler-provider/model-manager.d.ts +61 -0
- package/src/client/doppler-provider/model-manager.js +667 -0
- package/src/client/doppler-provider/provider.d.ts +5 -0
- package/src/client/doppler-provider/provider.js +102 -0
- package/src/client/doppler-provider/source-runtime.d.ts +22 -0
- package/src/client/doppler-provider/source-runtime.js +522 -0
- package/src/client/doppler-provider/types.d.ts +127 -0
- package/src/client/doppler-provider/types.js +17 -0
- package/src/client/doppler-provider.d.ts +46 -0
- package/src/client/doppler-provider.js +36 -0
- package/src/config/README.md +69 -0
- package/src/config/backward-registry-loader.d.ts +3 -0
- package/src/config/backward-registry-loader.js +8 -0
- package/src/config/index.d.ts +63 -0
- package/src/config/index.js +31 -0
- package/src/config/kernel-path-loader.d.ts +149 -0
- package/src/config/kernel-path-loader.js +534 -0
- package/src/config/kernels/backward-registry.json +99 -0
- package/src/config/kernels/kernel-ref-digests.d.ts +1 -0
- package/src/config/kernels/kernel-ref-digests.js +214 -0
- package/src/config/kernels/kernel-ref.d.ts +17 -0
- package/src/config/kernels/kernel-ref.js +75 -0
- package/src/config/kernels/moe/gpt-oss.paths.json +49 -0
- package/src/config/kernels/registry.d.ts +86 -0
- package/src/config/kernels/registry.js +103 -0
- package/src/config/kernels/registry.json +6771 -0
- package/src/config/loader.d.ts +57 -0
- package/src/config/loader.js +513 -0
- package/src/config/merge.d.ts +142 -0
- package/src/config/merge.js +389 -0
- package/src/config/param-categories.d.ts +17 -0
- package/src/config/param-categories.js +72 -0
- package/src/config/param-validator.d.ts +26 -0
- package/src/config/param-validator.js +235 -0
- package/src/config/platforms/amd-rdna3.json +16 -0
- package/src/config/platforms/apple-m1.json +16 -0
- package/src/config/platforms/apple-m2.json +16 -0
- package/src/config/platforms/apple-m3.json +16 -0
- package/src/config/platforms/generic.json +14 -0
- package/src/config/platforms/loader.d.ts +65 -0
- package/src/config/platforms/loader.js +153 -0
- package/src/config/platforms/nvidia-rtx30.json +16 -0
- package/src/config/platforms/nvidia-rtx40.json +16 -0
- package/src/config/presets/kernel-paths/embeddinggemma-f16-f32a.json +60 -0
- package/src/config/presets/kernel-paths/embeddinggemma-f32-f32a.json +60 -0
- package/src/config/presets/kernel-paths/embeddinggemma-q4k-dequant-f32a.json +60 -0
- package/src/config/presets/kernel-paths/gemma2-f16-f16a.json +61 -0
- package/src/config/presets/kernel-paths/gemma2-f16-f32a.json +60 -0
- package/src/config/presets/kernel-paths/gemma2-q4k-dequant-f16a.json +61 -0
- package/src/config/presets/kernel-paths/gemma2-q4k-dequant-f32a.json +60 -0
- package/src/config/presets/kernel-paths/gemma2-q4k-fused-f32a.json +57 -0
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f16a-online.json +200 -0
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online.json +223 -0
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f16a-online.json +60 -0
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-online.json +61 -0
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a.json +61 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-online.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +103 -0
- package/src/config/presets/models/deepseek.json +20 -0
- package/src/config/presets/models/diffusion.json +10 -0
- package/src/config/presets/models/embeddinggemma.json +74 -0
- package/src/config/presets/models/functiongemma.json +31 -0
- package/src/config/presets/models/gemma2.json +59 -0
- package/src/config/presets/models/gemma3.json +75 -0
- package/src/config/presets/models/gpt-oss.json +68 -0
- package/src/config/presets/models/kimi-k2.json +25 -0
- package/src/config/presets/models/lfm2.json +83 -0
- package/src/config/presets/models/llama3.json +40 -0
- package/src/config/presets/models/mamba.json +34 -0
- package/src/config/presets/models/mixtral.json +37 -0
- package/src/config/presets/models/modernbert.json +32 -0
- package/src/config/presets/models/qwen3.json +41 -0
- package/src/config/presets/models/transformer.json +73 -0
- package/src/config/presets/models/translategemma.json +30 -0
- package/src/config/presets/platforms/nvidia-gb200-8gpu.json +45 -0
- package/src/config/presets/platforms/nvidia-gb200-nvl72.json +45 -0
- package/src/config/presets/platforms/nvidia-gh200-nvl2.json +44 -0
- package/src/config/presets/platforms/nvidia-gh200.json +44 -0
- package/src/config/presets/runtime/compute/f16-activations.json +30 -0
- package/src/config/presets/runtime/compute/f16-batched.json +32 -0
- package/src/config/presets/runtime/default.json +101 -0
- package/src/config/presets/runtime/diagnostics/debug-logits.json +53 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +53 -0
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +210 -0
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +39 -0
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +20 -0
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +20 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +20 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +20 -0
- package/src/config/presets/runtime/model/gemma2-debug.json +77 -0
- package/src/config/presets/runtime/model/gemma2-pipeline-debug.json +66 -0
- package/src/config/presets/runtime/model/gemma2-pipeline.json +75 -0
- package/src/config/presets/runtime/model/gemma3-layer-probe.json +85 -0
- package/src/config/presets/runtime/modes/bench.json +37 -0
- package/src/config/presets/runtime/modes/debug.json +39 -0
- package/src/config/presets/runtime/modes/default.json +10 -0
- package/src/config/presets/runtime/modes/embedding-bench.json +28 -0
- package/src/config/presets/runtime/modes/embedding.json +54 -0
- package/src/config/presets/runtime/modes/low-memory.json +40 -0
- package/src/config/presets/runtime/modes/production.json +48 -0
- package/src/config/presets/runtime/modes/simulation.json +30 -0
- package/src/config/presets/runtime/modes/trace-layers.json +126 -0
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +11 -0
- package/src/config/runtime-merge.d.ts +5 -0
- package/src/config/runtime-merge.js +21 -0
- package/src/config/runtime.d.ts +28 -0
- package/src/config/runtime.js +56 -0
- package/src/config/schema/adapter.schema.d.ts +53 -0
- package/src/config/schema/adapter.schema.js +60 -0
- package/src/config/schema/backward-registry.schema.d.ts +14 -0
- package/src/config/schema/backward-registry.schema.js +46 -0
- package/src/config/schema/benchmark.schema.d.ts +54 -0
- package/src/config/schema/benchmark.schema.js +74 -0
- package/src/config/schema/bridge.schema.d.ts +25 -0
- package/src/config/schema/bridge.schema.js +22 -0
- package/src/config/schema/buffer-pool.schema.d.ts +92 -0
- package/src/config/schema/buffer-pool.schema.js +50 -0
- package/src/config/schema/conversion.schema.d.ts +183 -0
- package/src/config/schema/conversion.schema.js +13 -0
- package/src/config/schema/converter.schema.d.ts +123 -0
- package/src/config/schema/converter.schema.js +136 -0
- package/src/config/schema/debug.schema.d.ts +245 -0
- package/src/config/schema/debug.schema.js +106 -0
- package/src/config/schema/diffusion.schema.d.ts +88 -0
- package/src/config/schema/diffusion.schema.js +62 -0
- package/src/config/schema/distill-training.schema.d.ts +48 -0
- package/src/config/schema/distill-training.schema.js +139 -0
- package/src/config/schema/distribution.schema.d.ts +155 -0
- package/src/config/schema/distribution.schema.js +81 -0
- package/src/config/schema/doppler.schema.d.ts +75 -0
- package/src/config/schema/doppler.schema.js +352 -0
- package/src/config/schema/ecosystem.schema.d.ts +255 -0
- package/src/config/schema/ecosystem.schema.js +534 -0
- package/src/config/schema/emulation.schema.d.ts +351 -0
- package/src/config/schema/emulation.schema.js +299 -0
- package/src/config/schema/energy.schema.d.ts +102 -0
- package/src/config/schema/energy.schema.js +72 -0
- package/src/config/schema/execution-v0.schema.d.ts +187 -0
- package/src/config/schema/execution-v0.schema.js +55 -0
- package/src/config/schema/gpu-cache.schema.d.ts +26 -0
- package/src/config/schema/gpu-cache.schema.js +8 -0
- package/src/config/schema/harness.schema.d.ts +32 -0
- package/src/config/schema/harness.schema.js +20 -0
- package/src/config/schema/hotswap.schema.d.ts +55 -0
- package/src/config/schema/hotswap.schema.js +18 -0
- package/src/config/schema/index.d.ts +863 -0
- package/src/config/schema/index.js +471 -0
- package/src/config/schema/inference-defaults.schema.d.ts +276 -0
- package/src/config/schema/inference-defaults.schema.js +185 -0
- package/src/config/schema/inference.schema.d.ts +289 -0
- package/src/config/schema/inference.schema.js +39 -0
- package/src/config/schema/intent-bundle.schema.d.ts +28 -0
- package/src/config/schema/intent-bundle.schema.js +12 -0
- package/src/config/schema/kernel-path.schema.d.ts +173 -0
- package/src/config/schema/kernel-path.schema.js +9 -0
- package/src/config/schema/kernel-registry.schema.d.ts +199 -0
- package/src/config/schema/kernel-registry.schema.js +46 -0
- package/src/config/schema/kernel-thresholds.schema.d.ts +302 -0
- package/src/config/schema/kernel-thresholds.schema.js +187 -0
- package/src/config/schema/kernel-warmup.schema.d.ts +19 -0
- package/src/config/schema/kernel-warmup.schema.js +5 -0
- package/src/config/schema/kvcache.schema.d.ts +131 -0
- package/src/config/schema/kvcache.schema.js +31 -0
- package/src/config/schema/loading.schema.d.ts +153 -0
- package/src/config/schema/loading.schema.js +84 -0
- package/src/config/schema/lora.schema.d.ts +12 -0
- package/src/config/schema/lora.schema.js +12 -0
- package/src/config/schema/manifest.schema.d.ts +500 -0
- package/src/config/schema/manifest.schema.js +130 -0
- package/src/config/schema/memory-limits.schema.d.ts +107 -0
- package/src/config/schema/memory-limits.schema.js +57 -0
- package/src/config/schema/moe.schema.d.ts +78 -0
- package/src/config/schema/moe.schema.js +31 -0
- package/src/config/schema/platform.schema.d.ts +121 -0
- package/src/config/schema/platform.schema.js +1 -0
- package/src/config/schema/preset.schema.d.ts +124 -0
- package/src/config/schema/preset.schema.js +1 -0
- package/src/config/schema/quantization-defaults.schema.d.ts +34 -0
- package/src/config/schema/quantization-defaults.schema.js +5 -0
- package/src/config/schema/quantization.schema.d.ts +10 -0
- package/src/config/schema/quantization.schema.js +33 -0
- package/src/config/schema/shared-runtime.schema.d.ts +75 -0
- package/src/config/schema/shared-runtime.schema.js +45 -0
- package/src/config/schema/speculative.schema.d.ts +21 -0
- package/src/config/schema/speculative.schema.js +11 -0
- package/src/config/schema/storage.schema.d.ts +123 -0
- package/src/config/schema/storage.schema.js +66 -0
- package/src/config/schema/tooling.schema.d.ts +29 -0
- package/src/config/schema/tooling.schema.js +12 -0
- package/src/config/schema/training-metrics.schema.d.ts +89 -0
- package/src/config/schema/training-metrics.schema.js +374 -0
- package/src/config/schema/training.schema.d.ts +88 -0
- package/src/config/schema/training.schema.js +106 -0
- package/src/config/schema/tuner.schema.d.ts +39 -0
- package/src/config/schema/tuner.schema.js +13 -0
- package/src/config/schema/ul-training.schema.d.ts +61 -0
- package/src/config/schema/ul-training.schema.js +140 -0
- package/src/config/schema/units.schema.d.ts +27 -0
- package/src/config/schema/units.schema.js +26 -0
- package/src/config/training-defaults.d.ts +24 -0
- package/src/config/training-defaults.js +91 -0
- package/src/converter/conversion-plan.d.ts +64 -0
- package/src/converter/conversion-plan.js +472 -0
- package/src/converter/core.d.ts +247 -0
- package/src/converter/core.js +1329 -0
- package/src/converter/execution-v0-manifest.d.ts +15 -0
- package/src/converter/execution-v0-manifest.js +146 -0
- package/src/converter/index.d.ts +98 -0
- package/src/converter/index.js +59 -0
- package/src/converter/manifest-inference.d.ts +20 -0
- package/src/converter/manifest-inference.js +492 -0
- package/src/converter/parsers/diffusion.d.ts +50 -0
- package/src/converter/parsers/diffusion.js +270 -0
- package/src/converter/parsers/gguf.d.ts +22 -0
- package/src/converter/parsers/gguf.js +46 -0
- package/src/converter/parsers/index.d.ts +21 -0
- package/src/converter/parsers/index.js +12 -0
- package/src/converter/parsers/transformer.d.ts +16 -0
- package/src/converter/parsers/transformer.js +25 -0
- package/src/converter/quantization-info.d.ts +37 -0
- package/src/converter/quantization-info.js +398 -0
- package/src/converter/quantizer.d.ts +96 -0
- package/src/converter/quantizer.js +422 -0
- package/src/converter/rope-config.d.ts +15 -0
- package/src/converter/rope-config.js +218 -0
- package/src/converter/shard-packer.d.ts +138 -0
- package/src/converter/shard-packer.js +422 -0
- package/src/converter/tokenizer-utils.d.ts +11 -0
- package/src/converter/tokenizer-utils.js +87 -0
- package/src/debug/config.d.ts +78 -0
- package/src/debug/config.js +235 -0
- package/src/debug/history.d.ts +65 -0
- package/src/debug/history.js +71 -0
- package/src/debug/index.d.ts +268 -0
- package/src/debug/index.js +192 -0
- package/src/debug/log.d.ts +46 -0
- package/src/debug/log.js +132 -0
- package/src/debug/perf.d.ts +33 -0
- package/src/debug/perf.js +51 -0
- package/src/debug/reference/README.md +114 -0
- package/src/debug/reference/hf_attn_debug.py +114 -0
- package/src/debug/reference/hf_embed_check.py +89 -0
- package/src/debug/reference/hf_layer_out.py +100 -0
- package/src/debug/reference/hf_rope_check.py +116 -0
- package/src/debug/reference/hf_weights.py +75 -0
- package/src/debug/signals.d.ts +63 -0
- package/src/debug/signals.js +33 -0
- package/src/debug/stats.d.ts +47 -0
- package/src/debug/stats.js +160 -0
- package/src/debug/tensor.d.ts +123 -0
- package/src/debug/tensor.js +257 -0
- package/src/debug/trace.d.ts +17 -0
- package/src/debug/trace.js +167 -0
- package/src/diffusion/image-regression.d.ts +31 -0
- package/src/diffusion/image-regression.js +107 -0
- package/src/diffusion/index.d.ts +8 -0
- package/src/diffusion/index.js +8 -0
- package/src/distribution/p2p-control-plane.d.ts +52 -0
- package/src/distribution/p2p-control-plane.js +232 -0
- package/src/distribution/p2p-observability.d.ts +116 -0
- package/src/distribution/p2p-observability.js +267 -0
- package/src/distribution/p2p-transport-contract.d.ts +57 -0
- package/src/distribution/p2p-transport-contract.js +310 -0
- package/src/distribution/p2p-webrtc-browser.d.ts +37 -0
- package/src/distribution/p2p-webrtc-browser.js +434 -0
- package/src/distribution/shard-delivery.d.ts +251 -0
- package/src/distribution/shard-delivery.js +2096 -0
- package/src/energy/index.d.ts +2 -0
- package/src/energy/index.js +2 -0
- package/src/errors/doppler-error.d.ts +21 -0
- package/src/errors/doppler-error.js +25 -0
- package/src/errors/index.d.ts +1 -0
- package/src/errors/index.js +1 -0
- package/src/formats/gguf/index.d.ts +8 -0
- package/src/formats/gguf/index.js +4 -0
- package/src/formats/gguf/types.d.ts +137 -0
- package/src/formats/gguf/types.js +443 -0
- package/src/formats/index.d.ts +51 -0
- package/src/formats/index.js +13 -0
- package/src/formats/rdrr/classification.d.ts +39 -0
- package/src/formats/rdrr/classification.js +275 -0
- package/src/formats/rdrr/groups.d.ts +27 -0
- package/src/formats/rdrr/groups.js +76 -0
- package/src/formats/rdrr/index.d.ts +25 -0
- package/src/formats/rdrr/index.js +19 -0
- package/src/formats/rdrr/manifest.d.ts +32 -0
- package/src/formats/rdrr/manifest.js +108 -0
- package/src/formats/rdrr/parsing.d.ts +23 -0
- package/src/formats/rdrr/parsing.js +101 -0
- package/src/formats/rdrr/tensor-config-validator.d.ts +42 -0
- package/src/formats/rdrr/tensor-config-validator.js +156 -0
- package/src/formats/rdrr/types.d.ts +200 -0
- package/src/formats/rdrr/types.js +16 -0
- package/src/formats/rdrr/validation.d.ts +9 -0
- package/src/formats/rdrr/validation.js +200 -0
- package/src/formats/safetensors/index.d.ts +8 -0
- package/src/formats/safetensors/index.js +4 -0
- package/src/formats/safetensors/types.d.ts +67 -0
- package/src/formats/safetensors/types.js +102 -0
- package/src/formats/tokenizer/index.d.ts +5 -0
- package/src/formats/tokenizer/index.js +3 -0
- package/src/formats/tokenizer/types.d.ts +9 -0
- package/src/formats/tokenizer/types.js +22 -0
- package/src/generation/index.d.ts +18 -0
- package/src/generation/index.js +12 -0
- package/src/gpu/command-recorder.d.ts +175 -0
- package/src/gpu/command-recorder.js +473 -0
- package/src/gpu/device.d.ts +141 -0
- package/src/gpu/device.js +350 -0
- package/src/gpu/kernel-runtime.d.ts +20 -0
- package/src/gpu/kernel-runtime.js +37 -0
- package/src/gpu/kernel-selection-cache.d.ts +13 -0
- package/src/gpu/kernel-selection-cache.js +13 -0
- package/src/gpu/kernel-selection-log.d.ts +12 -0
- package/src/gpu/kernel-selection-log.js +28 -0
- package/src/gpu/kernel-selector.d.ts +11 -0
- package/src/gpu/kernel-selector.js +10 -0
- package/src/gpu/kernel-tuner/benchmarks.d.ts +144 -0
- package/src/gpu/kernel-tuner/benchmarks.js +892 -0
- package/src/gpu/kernel-tuner/cache.d.ts +55 -0
- package/src/gpu/kernel-tuner/cache.js +66 -0
- package/src/gpu/kernel-tuner/index.d.ts +59 -0
- package/src/gpu/kernel-tuner/index.js +38 -0
- package/src/gpu/kernel-tuner/tuner.d.ts +82 -0
- package/src/gpu/kernel-tuner/tuner.js +229 -0
- package/src/gpu/kernel-tuner/types.d.ts +101 -0
- package/src/gpu/kernel-tuner/types.js +4 -0
- package/src/gpu/kernel-tuner.d.ts +33 -0
- package/src/gpu/kernel-tuner.js +12 -0
- package/src/gpu/kernels/README.md +127 -0
- package/src/gpu/kernels/attention.d.ts +236 -0
- package/src/gpu/kernels/attention.js +1359 -0
- package/src/gpu/kernels/attention.wgsl +249 -0
- package/src/gpu/kernels/attention_bdpa_decode_f16.wgsl +246 -0
- package/src/gpu/kernels/attention_decode.wgsl +233 -0
- package/src/gpu/kernels/attention_decode_chunked_f16.wgsl +183 -0
- package/src/gpu/kernels/attention_decode_chunked_f16kv.wgsl +208 -0
- package/src/gpu/kernels/attention_decode_f16.wgsl +202 -0
- package/src/gpu/kernels/attention_decode_f16kv.wgsl +224 -0
- package/src/gpu/kernels/attention_decode_online_f16.wgsl +223 -0
- package/src/gpu/kernels/attention_decode_online_f16kv.wgsl +225 -0
- package/src/gpu/kernels/attention_decode_optimized.wgsl +445 -0
- package/src/gpu/kernels/attention_decode_paged_f16.wgsl +172 -0
- package/src/gpu/kernels/attention_decode_paged_f16kv.wgsl +174 -0
- package/src/gpu/kernels/attention_decode_subgroup.wgsl +233 -0
- package/src/gpu/kernels/attention_decode_tiered_f16.wgsl +218 -0
- package/src/gpu/kernels/attention_decode_tiered_f16kv.wgsl +220 -0
- package/src/gpu/kernels/attention_decode_tiered_int4_f16kv.wgsl +242 -0
- package/src/gpu/kernels/attention_decode_tiered_int8_f16kv.wgsl +242 -0
- package/src/gpu/kernels/attention_f16.wgsl +214 -0
- package/src/gpu/kernels/attention_f16kv.wgsl +242 -0
- package/src/gpu/kernels/attention_small.wgsl +260 -0
- package/src/gpu/kernels/attention_small_f16.wgsl +240 -0
- package/src/gpu/kernels/attention_small_f16kv.wgsl +266 -0
- package/src/gpu/kernels/attention_streaming.wgsl +149 -0
- package/src/gpu/kernels/attention_streaming_f16.wgsl +147 -0
- package/src/gpu/kernels/attention_streaming_f16kv.wgsl +151 -0
- package/src/gpu/kernels/backward/adam.d.ts +28 -0
- package/src/gpu/kernels/backward/adam.js +199 -0
- package/src/gpu/kernels/backward/adam.wgsl +50 -0
- package/src/gpu/kernels/backward/attention_backward.d.ts +22 -0
- package/src/gpu/kernels/backward/attention_backward.js +276 -0
- package/src/gpu/kernels/backward/attention_backward.wgsl +49 -0
- package/src/gpu/kernels/backward/bias_add_backward.d.ts +17 -0
- package/src/gpu/kernels/backward/bias_add_backward.js +24 -0
- package/src/gpu/kernels/backward/bias_add_backward.wgsl +33 -0
- package/src/gpu/kernels/backward/conv2d_backward.d.ts +31 -0
- package/src/gpu/kernels/backward/conv2d_backward.js +135 -0
- package/src/gpu/kernels/backward/conv2d_backward_input.wgsl +83 -0
- package/src/gpu/kernels/backward/conv2d_backward_weight.wgsl +70 -0
- package/src/gpu/kernels/backward/cross_entropy_backward.d.ts +23 -0
- package/src/gpu/kernels/backward/cross_entropy_backward.js +29 -0
- package/src/gpu/kernels/backward/cross_entropy_backward.wgsl +39 -0
- package/src/gpu/kernels/backward/embed_backward.d.ts +29 -0
- package/src/gpu/kernels/backward/embed_backward.js +118 -0
- package/src/gpu/kernels/backward/embed_backward.wgsl +73 -0
- package/src/gpu/kernels/backward/gelu_backward.d.ts +16 -0
- package/src/gpu/kernels/backward/gelu_backward.js +39 -0
- package/src/gpu/kernels/backward/gelu_backward.wgsl +38 -0
- package/src/gpu/kernels/backward/groupnorm_backward.d.ts +24 -0
- package/src/gpu/kernels/backward/groupnorm_backward.js +29 -0
- package/src/gpu/kernels/backward/groupnorm_backward.wgsl +143 -0
- package/src/gpu/kernels/backward/index.d.ts +17 -0
- package/src/gpu/kernels/backward/index.js +23 -0
- package/src/gpu/kernels/backward/layernorm_backward.d.ts +22 -0
- package/src/gpu/kernels/backward/layernorm_backward.js +135 -0
- package/src/gpu/kernels/backward/layernorm_backward.wgsl +194 -0
- package/src/gpu/kernels/backward/matmul_backward.d.ts +32 -0
- package/src/gpu/kernels/backward/matmul_backward.js +124 -0
- package/src/gpu/kernels/backward/matmul_backward.wgsl +90 -0
- package/src/gpu/kernels/backward/matmul_transpose_a.wgsl +84 -0
- package/src/gpu/kernels/backward/pixel_shuffle_backward.d.ts +22 -0
- package/src/gpu/kernels/backward/pixel_shuffle_backward.js +30 -0
- package/src/gpu/kernels/backward/pixel_shuffle_backward.wgsl +54 -0
- package/src/gpu/kernels/backward/rmsnorm_backward.d.ts +24 -0
- package/src/gpu/kernels/backward/rmsnorm_backward.js +101 -0
- package/src/gpu/kernels/backward/rmsnorm_backward.wgsl +78 -0
- package/src/gpu/kernels/backward/rope_backward.d.ts +25 -0
- package/src/gpu/kernels/backward/rope_backward.js +109 -0
- package/src/gpu/kernels/backward/rope_backward.wgsl +59 -0
- package/src/gpu/kernels/backward/scale_backward.d.ts +16 -0
- package/src/gpu/kernels/backward/scale_backward.js +84 -0
- package/src/gpu/kernels/backward/scale_backward.wgsl +27 -0
- package/src/gpu/kernels/backward/silu_backward.d.ts +16 -0
- package/src/gpu/kernels/backward/silu_backward.js +39 -0
- package/src/gpu/kernels/backward/silu_backward.wgsl +31 -0
- package/src/gpu/kernels/backward/softmax_backward.d.ts +16 -0
- package/src/gpu/kernels/backward/softmax_backward.js +43 -0
- package/src/gpu/kernels/backward/softmax_backward.wgsl +44 -0
- package/src/gpu/kernels/backward/upsample2d_backward.d.ts +21 -0
- package/src/gpu/kernels/backward/upsample2d_backward.js +30 -0
- package/src/gpu/kernels/backward/upsample2d_backward.wgsl +59 -0
- package/src/gpu/kernels/backward/utils.d.ts +45 -0
- package/src/gpu/kernels/backward/utils.js +371 -0
- package/src/gpu/kernels/bf16_to_f16.wgsl +54 -0
- package/src/gpu/kernels/bf16_to_f32.wgsl +70 -0
- package/src/gpu/kernels/bias_add.wgsl +40 -0
- package/src/gpu/kernels/bias_add_f16.wgsl +44 -0
- package/src/gpu/kernels/cast.d.ts +67 -0
- package/src/gpu/kernels/cast.js +422 -0
- package/src/gpu/kernels/cast_f16_to_f32.wgsl +31 -0
- package/src/gpu/kernels/cast_f32_to_f16.wgsl +36 -0
- package/src/gpu/kernels/check-finiteness.d.ts +15 -0
- package/src/gpu/kernels/check-finiteness.js +149 -0
- package/src/gpu/kernels/check-stop.d.ts +31 -0
- package/src/gpu/kernels/check-stop.js +181 -0
- package/src/gpu/kernels/clamp.d.ts +22 -0
- package/src/gpu/kernels/clamp.js +42 -0
- package/src/gpu/kernels/clamp.wgsl +24 -0
- package/src/gpu/kernels/constants.d.ts +168 -0
- package/src/gpu/kernels/constants.js +129 -0
- package/src/gpu/kernels/conv2d.d.ts +34 -0
- package/src/gpu/kernels/conv2d.js +81 -0
- package/src/gpu/kernels/conv2d.wgsl +71 -0
- package/src/gpu/kernels/conv2d_f16.wgsl +73 -0
- package/src/gpu/kernels/cross_entropy_loss.d.ts +21 -0
- package/src/gpu/kernels/cross_entropy_loss.js +54 -0
- package/src/gpu/kernels/cross_entropy_loss.wgsl +39 -0
- package/src/gpu/kernels/dequant.d.ts +108 -0
- package/src/gpu/kernels/dequant.js +524 -0
- package/src/gpu/kernels/dequant_f16_out.wgsl +151 -0
- package/src/gpu/kernels/dequant_f16_out_vec4.wgsl +149 -0
- package/src/gpu/kernels/dequant_f16_rowwise.wgsl +139 -0
- package/src/gpu/kernels/dequant_f32_rowwise.wgsl +133 -0
- package/src/gpu/kernels/dequant_mxfp4.wgsl +120 -0
- package/src/gpu/kernels/dequant_mxfp4_expert.wgsl +129 -0
- package/src/gpu/kernels/dequant_mxfp4_expert_f16.wgsl +105 -0
- package/src/gpu/kernels/dequant_mxfp4_vec4.wgsl +116 -0
- package/src/gpu/kernels/dequant_q6k.wgsl +140 -0
- package/src/gpu/kernels/dequant_q8_0.wgsl +98 -0
- package/src/gpu/kernels/dequant_shared.wgsl +202 -0
- package/src/gpu/kernels/dequant_shared_vec4.wgsl +153 -0
- package/src/gpu/kernels/dequant_subgroup.wgsl +202 -0
- package/src/gpu/kernels/dispatch.d.ts +157 -0
- package/src/gpu/kernels/dispatch.js +235 -0
- package/src/gpu/kernels/energy.d.ts +131 -0
- package/src/gpu/kernels/energy.js +425 -0
- package/src/gpu/kernels/energy_eval.wgsl +26 -0
- package/src/gpu/kernels/energy_eval_f16.wgsl +30 -0
- package/src/gpu/kernels/energy_quintel_grad.wgsl +92 -0
- package/src/gpu/kernels/energy_quintel_grad_f16.wgsl +96 -0
- package/src/gpu/kernels/energy_quintel_reduce.wgsl +112 -0
- package/src/gpu/kernels/energy_quintel_reduce_f16.wgsl +116 -0
- package/src/gpu/kernels/energy_quintel_update.wgsl +92 -0
- package/src/gpu/kernels/energy_quintel_update_f16.wgsl +96 -0
- package/src/gpu/kernels/energy_update.wgsl +25 -0
- package/src/gpu/kernels/energy_update_f16.wgsl +30 -0
- package/src/gpu/kernels/feature-check.d.ts +42 -0
- package/src/gpu/kernels/feature-check.js +70 -0
- package/src/gpu/kernels/fused_ffn.d.ts +65 -0
- package/src/gpu/kernels/fused_ffn.js +318 -0
- package/src/gpu/kernels/fused_ffn.wgsl +420 -0
- package/src/gpu/kernels/fused_ffn_f16.wgsl +213 -0
- package/src/gpu/kernels/fused_ffn_q4k.wgsl +375 -0
- package/src/gpu/kernels/fused_matmul_q4.wgsl +404 -0
- package/src/gpu/kernels/fused_matmul_q4_batched.wgsl +194 -0
- package/src/gpu/kernels/fused_matmul_q4_batched_f16.wgsl +170 -0
- package/src/gpu/kernels/fused_matmul_q4_batched_f16a.wgsl +154 -0
- package/src/gpu/kernels/fused_matmul_q4_f16a.wgsl +219 -0
- package/src/gpu/kernels/fused_matmul_q4_multicol_f16.wgsl +216 -0
- package/src/gpu/kernels/fused_matmul_q4_multicol_f16a.wgsl +204 -0
- package/src/gpu/kernels/fused_matmul_residual.d.ts +46 -0
- package/src/gpu/kernels/fused_matmul_residual.js +152 -0
- package/src/gpu/kernels/fused_matmul_rmsnorm.d.ts +64 -0
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +273 -0
- package/src/gpu/kernels/fused_matmul_rmsnorm.wgsl +324 -0
- package/src/gpu/kernels/fused_matmul_rmsnorm_f16.wgsl +303 -0
- package/src/gpu/kernels/fused_swiglu.wgsl +63 -0
- package/src/gpu/kernels/fused_swiglu_f16.wgsl +57 -0
- package/src/gpu/kernels/gather.d.ts +64 -0
- package/src/gpu/kernels/gather.js +119 -0
- package/src/gpu/kernels/gather.wgsl +61 -0
- package/src/gpu/kernels/gather_f16.wgsl +65 -0
- package/src/gpu/kernels/gather_f16_f16_out.wgsl +55 -0
- package/src/gpu/kernels/gather_f16_out.wgsl +55 -0
- package/src/gpu/kernels/gather_f16_vec4.wgsl +76 -0
- package/src/gpu/kernels/gather_f16_vec4_f16_out.wgsl +68 -0
- package/src/gpu/kernels/gather_vec4.wgsl +74 -0
- package/src/gpu/kernels/gather_vec4_f16_out.wgsl +68 -0
- package/src/gpu/kernels/gelu.d.ts +33 -0
- package/src/gpu/kernels/gelu.js +47 -0
- package/src/gpu/kernels/gelu.wgsl +64 -0
- package/src/gpu/kernels/gelu_f16.wgsl +66 -0
- package/src/gpu/kernels/gptoss_mxfp4_expert_fused.wgsl +127 -0
- package/src/gpu/kernels/gptoss_router_topk.wgsl +119 -0
- package/src/gpu/kernels/groupnorm.d.ts +31 -0
- package/src/gpu/kernels/groupnorm.js +91 -0
- package/src/gpu/kernels/groupnorm_apply.wgsl +41 -0
- package/src/gpu/kernels/groupnorm_apply_f16.wgsl +46 -0
- package/src/gpu/kernels/groupnorm_stats.wgsl +76 -0
- package/src/gpu/kernels/groupnorm_stats_f16.wgsl +79 -0
- package/src/gpu/kernels/index.d.ts +336 -0
- package/src/gpu/kernels/index.js +284 -0
- package/src/gpu/kernels/kernel-base.d.ts +33 -0
- package/src/gpu/kernels/kernel-base.js +46 -0
- package/src/gpu/kernels/kernel-configs.d.ts +65 -0
- package/src/gpu/kernels/kernel-configs.js +50 -0
- package/src/gpu/kernels/kernel-tuning.d.ts +42 -0
- package/src/gpu/kernels/kernel-tuning.js +149 -0
- package/src/gpu/kernels/kv-quantize.d.ts +37 -0
- package/src/gpu/kernels/kv-quantize.js +138 -0
- package/src/gpu/kernels/kv_quantize_int4.wgsl +119 -0
- package/src/gpu/kernels/kv_quantize_int8.wgsl +119 -0
- package/src/gpu/kernels/layernorm.d.ts +37 -0
- package/src/gpu/kernels/layernorm.js +80 -0
- package/src/gpu/kernels/layernorm.wgsl +121 -0
- package/src/gpu/kernels/layernorm_f16.wgsl +103 -0
- package/src/gpu/kernels/linear-attention-core.d.ts +39 -0
- package/src/gpu/kernels/linear-attention-core.js +535 -0
- package/src/gpu/kernels/logit-merge.d.ts +110 -0
- package/src/gpu/kernels/logit-merge.js +392 -0
- package/src/gpu/kernels/matmul-dispatch.d.ts +38 -0
- package/src/gpu/kernels/matmul-dispatch.js +155 -0
- package/src/gpu/kernels/matmul-selection.d.ts +87 -0
- package/src/gpu/kernels/matmul-selection.js +474 -0
- package/src/gpu/kernels/matmul.d.ts +109 -0
- package/src/gpu/kernels/matmul.js +271 -0
- package/src/gpu/kernels/matmul_f16.wgsl +170 -0
- package/src/gpu/kernels/matmul_f16_tiled.wgsl +165 -0
- package/src/gpu/kernels/matmul_f16w_f32a.wgsl +89 -0
- package/src/gpu/kernels/matmul_f16w_f32a_tiled.wgsl +154 -0
- package/src/gpu/kernels/matmul_f32.wgsl +100 -0
- package/src/gpu/kernels/matmul_gemv.wgsl +80 -0
- package/src/gpu/kernels/matmul_gemv_f16a.wgsl +81 -0
- package/src/gpu/kernels/matmul_gemv_residual.wgsl +119 -0
- package/src/gpu/kernels/matmul_gemv_residual_f16.wgsl +78 -0
- package/src/gpu/kernels/matmul_gemv_subgroup.wgsl +345 -0
- package/src/gpu/kernels/matmul_gemv_subgroup_f16a.wgsl +514 -0
- package/src/gpu/kernels/modulate.d.ts +29 -0
- package/src/gpu/kernels/modulate.js +49 -0
- package/src/gpu/kernels/modulate.wgsl +40 -0
- package/src/gpu/kernels/modulate_f16.wgsl +43 -0
- package/src/gpu/kernels/moe.d.ts +164 -0
- package/src/gpu/kernels/moe.js +496 -0
- package/src/gpu/kernels/moe_gather.wgsl +170 -0
- package/src/gpu/kernels/moe_gather_f16.wgsl +82 -0
- package/src/gpu/kernels/moe_gather_vec4.wgsl +74 -0
- package/src/gpu/kernels/moe_offsets.wgsl +48 -0
- package/src/gpu/kernels/pipeline-cache.d.ts +88 -0
- package/src/gpu/kernels/pipeline-cache.js +305 -0
- package/src/gpu/kernels/pixel_shuffle.d.ts +27 -0
- package/src/gpu/kernels/pixel_shuffle.js +49 -0
- package/src/gpu/kernels/pixel_shuffle.wgsl +44 -0
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +47 -0
- package/src/gpu/kernels/residual.d.ts +74 -0
- package/src/gpu/kernels/residual.js +127 -0
- package/src/gpu/kernels/residual.wgsl +53 -0
- package/src/gpu/kernels/residual_f16.wgsl +35 -0
- package/src/gpu/kernels/residual_f16_vec4.wgsl +47 -0
- package/src/gpu/kernels/residual_vec4.wgsl +46 -0
- package/src/gpu/kernels/rmsnorm.d.ts +53 -0
- package/src/gpu/kernels/rmsnorm.js +140 -0
- package/src/gpu/kernels/rmsnorm.wgsl +417 -0
- package/src/gpu/kernels/rmsnorm_f16.wgsl +164 -0
- package/src/gpu/kernels/rope.d.ts +48 -0
- package/src/gpu/kernels/rope.js +53 -0
- package/src/gpu/kernels/rope.wgsl +328 -0
- package/src/gpu/kernels/rope_f16.wgsl +271 -0
- package/src/gpu/kernels/rule-matcher.d.ts +30 -0
- package/src/gpu/kernels/rule-matcher.js +42 -0
- package/src/gpu/kernels/rule-registry.d.ts +7 -0
- package/src/gpu/kernels/rule-registry.js +41 -0
- package/src/gpu/kernels/sample.d.ts +75 -0
- package/src/gpu/kernels/sample.js +578 -0
- package/src/gpu/kernels/sample.wgsl +377 -0
- package/src/gpu/kernels/sample_f16.wgsl +331 -0
- package/src/gpu/kernels/scale.d.ts +35 -0
- package/src/gpu/kernels/scale.js +37 -0
- package/src/gpu/kernels/scale.wgsl +38 -0
- package/src/gpu/kernels/scatter_add.wgsl +88 -0
- package/src/gpu/kernels/scatter_add_dynamic.wgsl +59 -0
- package/src/gpu/kernels/scatter_add_dynamic_f16.wgsl +52 -0
- package/src/gpu/kernels/scatter_add_dynamic_f16_weights.wgsl +50 -0
- package/src/gpu/kernels/scatter_add_vec4.wgsl +70 -0
- package/src/gpu/kernels/shader-cache.d.ts +56 -0
- package/src/gpu/kernels/shader-cache.js +206 -0
- package/src/gpu/kernels/silu.d.ts +75 -0
- package/src/gpu/kernels/silu.js +340 -0
- package/src/gpu/kernels/silu.wgsl +99 -0
- package/src/gpu/kernels/silu_f16.wgsl +98 -0
- package/src/gpu/kernels/softmax.d.ts +57 -0
- package/src/gpu/kernels/softmax.js +106 -0
- package/src/gpu/kernels/softmax.wgsl +388 -0
- package/src/gpu/kernels/softmax_subgroup.wgsl +175 -0
- package/src/gpu/kernels/split_qkv.d.ts +51 -0
- package/src/gpu/kernels/split_qkv.js +41 -0
- package/src/gpu/kernels/split_qkv.wgsl +71 -0
- package/src/gpu/kernels/split_qkv_f16.wgsl +75 -0
- package/src/gpu/kernels/topk.wgsl +243 -0
- package/src/gpu/kernels/topk_f16.wgsl +108 -0
- package/src/gpu/kernels/topk_f16_weights.wgsl +101 -0
- package/src/gpu/kernels/transpose.d.ts +21 -0
- package/src/gpu/kernels/transpose.js +30 -0
- package/src/gpu/kernels/transpose.wgsl +32 -0
- package/src/gpu/kernels/types.d.ts +21 -0
- package/src/gpu/kernels/types.js +4 -0
- package/src/gpu/kernels/uniform-utils.d.ts +48 -0
- package/src/gpu/kernels/uniform-utils.js +94 -0
- package/src/gpu/kernels/upsample2d.d.ts +25 -0
- package/src/gpu/kernels/upsample2d.js +58 -0
- package/src/gpu/kernels/upsample2d.wgsl +37 -0
- package/src/gpu/kernels/upsample2d_f16.wgsl +41 -0
- package/src/gpu/kernels/utils.d.ts +106 -0
- package/src/gpu/kernels/utils.js +224 -0
- package/src/gpu/multi-model-recorder.d.ts +21 -0
- package/src/gpu/multi-model-recorder.js +31 -0
- package/src/gpu/partitioned-buffer-pool.d.ts +28 -0
- package/src/gpu/partitioned-buffer-pool.js +49 -0
- package/src/gpu/perf-guards.d.ts +25 -0
- package/src/gpu/perf-guards.js +140 -0
- package/src/gpu/profiler.d.ts +114 -0
- package/src/gpu/profiler.js +391 -0
- package/src/gpu/submit-tracker.d.ts +111 -0
- package/src/gpu/submit-tracker.js +229 -0
- package/src/gpu/tensor.d.ts +69 -0
- package/src/gpu/tensor.js +75 -0
- package/src/gpu/uniform-cache.d.ts +108 -0
- package/src/gpu/uniform-cache.js +242 -0
- package/src/gpu/weight-buffer.d.ts +115 -0
- package/src/gpu/weight-buffer.js +118 -0
- package/src/hotswap/intent-bundle.d.ts +37 -0
- package/src/hotswap/intent-bundle.js +123 -0
- package/src/hotswap/manifest.d.ts +33 -0
- package/src/hotswap/manifest.js +114 -0
- package/src/hotswap/runtime.d.ts +31 -0
- package/src/hotswap/runtime.js +128 -0
- package/src/index-browser.d.ts +47 -0
- package/src/index-browser.js +53 -0
- package/src/index-internal.d.ts +2 -0
- package/src/index-internal.js +2 -0
- package/src/index.d.ts +102 -0
- package/src/index.js +75 -0
- package/src/inference/README.md +593 -0
- package/src/inference/browser-harness.d.ts +234 -0
- package/src/inference/browser-harness.js +2665 -0
- package/src/inference/decode-buffers.d.ts +108 -0
- package/src/inference/decode-buffers.js +181 -0
- package/src/inference/decode-ring.d.ts +52 -0
- package/src/inference/decode-ring.js +273 -0
- package/src/inference/expert-router.d.ts +27 -0
- package/src/inference/expert-router.js +55 -0
- package/src/inference/functiongemma.d.ts +15 -0
- package/src/inference/functiongemma.js +1 -0
- package/src/inference/kv-cache/base.d.ts +150 -0
- package/src/inference/kv-cache/base.js +1037 -0
- package/src/inference/kv-cache/basis-decomposed-paged.d.ts +50 -0
- package/src/inference/kv-cache/basis-decomposed-paged.js +276 -0
- package/src/inference/kv-cache/index.d.ts +35 -0
- package/src/inference/kv-cache/index.js +20 -0
- package/src/inference/kv-cache/sliding-window.d.ts +72 -0
- package/src/inference/kv-cache/sliding-window.js +243 -0
- package/src/inference/kv-cache/tiered.d.ts +89 -0
- package/src/inference/kv-cache/tiered.js +574 -0
- package/src/inference/kv-cache/types.d.ts +188 -0
- package/src/inference/kv-cache/types.js +80 -0
- package/src/inference/kv-cache.d.ts +36 -0
- package/src/inference/kv-cache.js +18 -0
- package/src/inference/moe-router.d.ts +212 -0
- package/src/inference/moe-router.js +553 -0
- package/src/inference/multi-model-network.d.ts +139 -0
- package/src/inference/multi-model-network.js +769 -0
- package/src/inference/multi-pipeline-pool.d.ts +62 -0
- package/src/inference/multi-pipeline-pool.js +161 -0
- package/src/inference/network-evolution.d.ts +46 -0
- package/src/inference/network-evolution.js +80 -0
- package/src/inference/pipelines/context.d.ts +18 -0
- package/src/inference/pipelines/context.js +44 -0
- package/src/inference/pipelines/diffusion/helpers.d.ts +29 -0
- package/src/inference/pipelines/diffusion/helpers.js +112 -0
- package/src/inference/pipelines/diffusion/index.d.ts +3 -0
- package/src/inference/pipelines/diffusion/index.js +3 -0
- package/src/inference/pipelines/diffusion/init.d.ts +24 -0
- package/src/inference/pipelines/diffusion/init.js +124 -0
- package/src/inference/pipelines/diffusion/pipeline.d.ts +38 -0
- package/src/inference/pipelines/diffusion/pipeline.js +632 -0
- package/src/inference/pipelines/diffusion/scheduler.d.ts +19 -0
- package/src/inference/pipelines/diffusion/scheduler.js +65 -0
- package/src/inference/pipelines/diffusion/sd3-transformer.d.ts +20 -0
- package/src/inference/pipelines/diffusion/sd3-transformer.js +1194 -0
- package/src/inference/pipelines/diffusion/sd3-weights.d.ts +21 -0
- package/src/inference/pipelines/diffusion/sd3-weights.js +287 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +80 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +935 -0
- package/src/inference/pipelines/diffusion/text-encoder.d.ts +29 -0
- package/src/inference/pipelines/diffusion/text-encoder.js +178 -0
- package/src/inference/pipelines/diffusion/types.d.ts +112 -0
- package/src/inference/pipelines/diffusion/types.js +1 -0
- package/src/inference/pipelines/diffusion/vae.d.ts +20 -0
- package/src/inference/pipelines/diffusion/vae.js +675 -0
- package/src/inference/pipelines/diffusion/weights.d.ts +40 -0
- package/src/inference/pipelines/diffusion/weights.js +150 -0
- package/src/inference/pipelines/dream/energy-head-pipeline.d.ts +29 -0
- package/src/inference/pipelines/dream/energy-head-pipeline.js +6 -0
- package/src/inference/pipelines/dream/pipeline.d.ts +17 -0
- package/src/inference/pipelines/dream/pipeline.js +8 -0
- package/src/inference/pipelines/energy/index.d.ts +1 -0
- package/src/inference/pipelines/energy/index.js +1 -0
- package/src/inference/pipelines/energy/pipeline.d.ts +27 -0
- package/src/inference/pipelines/energy/pipeline.js +680 -0
- package/src/inference/pipelines/energy/quintel.d.ts +87 -0
- package/src/inference/pipelines/energy/quintel.js +207 -0
- package/src/inference/pipelines/energy/types.d.ts +63 -0
- package/src/inference/pipelines/energy/types.js +1 -0
- package/src/inference/pipelines/energy-head/index.d.ts +6 -0
- package/src/inference/pipelines/energy-head/index.js +6 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.d.ts +103 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +487 -0
- package/src/inference/pipelines/factory.d.ts +10 -0
- package/src/inference/pipelines/factory.js +6 -0
- package/src/inference/pipelines/index.d.ts +22 -0
- package/src/inference/pipelines/index.js +19 -0
- package/src/inference/pipelines/registry.d.ts +15 -0
- package/src/inference/pipelines/registry.js +23 -0
- package/src/inference/pipelines/rng.d.ts +2 -0
- package/src/inference/pipelines/rng.js +17 -0
- package/src/inference/pipelines/structured/index.d.ts +8 -0
- package/src/inference/pipelines/structured/index.js +8 -0
- package/src/inference/pipelines/structured/json-head-pipeline.d.ts +58 -0
- package/src/inference/pipelines/structured/json-head-pipeline.js +181 -0
- package/src/inference/pipelines/text/attention/index.d.ts +24 -0
- package/src/inference/pipelines/text/attention/index.js +17 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +101 -0
- package/src/inference/pipelines/text/attention/projections.js +435 -0
- package/src/inference/pipelines/text/attention/record.d.ts +36 -0
- package/src/inference/pipelines/text/attention/record.js +613 -0
- package/src/inference/pipelines/text/attention/run.d.ts +38 -0
- package/src/inference/pipelines/text/attention/run.js +826 -0
- package/src/inference/pipelines/text/attention/types.d.ts +98 -0
- package/src/inference/pipelines/text/attention/types.js +67 -0
- package/src/inference/pipelines/text/attention.d.ts +23 -0
- package/src/inference/pipelines/text/attention.js +12 -0
- package/src/inference/pipelines/text/bdpa-steamroller.d.ts +22 -0
- package/src/inference/pipelines/text/bdpa-steamroller.js +158 -0
- package/src/inference/pipelines/text/buffer-types.d.ts +7 -0
- package/src/inference/pipelines/text/buffer-types.js +4 -0
- package/src/inference/pipelines/text/chat-format.d.ts +46 -0
- package/src/inference/pipelines/text/chat-format.js +366 -0
- package/src/inference/pipelines/text/config.d.ts +235 -0
- package/src/inference/pipelines/text/config.js +623 -0
- package/src/inference/pipelines/text/debug-utils/config.d.ts +144 -0
- package/src/inference/pipelines/text/debug-utils/config.js +156 -0
- package/src/inference/pipelines/text/debug-utils/index.d.ts +53 -0
- package/src/inference/pipelines/text/debug-utils/index.js +44 -0
- package/src/inference/pipelines/text/debug-utils/logging.d.ts +106 -0
- package/src/inference/pipelines/text/debug-utils/logging.js +152 -0
- package/src/inference/pipelines/text/debug-utils/tensor.d.ts +119 -0
- package/src/inference/pipelines/text/debug-utils/tensor.js +268 -0
- package/src/inference/pipelines/text/debug-utils/utils.d.ts +77 -0
- package/src/inference/pipelines/text/debug-utils/utils.js +139 -0
- package/src/inference/pipelines/text/debug-utils.d.ts +42 -0
- package/src/inference/pipelines/text/debug-utils.js +34 -0
- package/src/inference/pipelines/text/embed.d.ts +67 -0
- package/src/inference/pipelines/text/embed.js +461 -0
- package/src/inference/pipelines/text/execution-plan.d.ts +116 -0
- package/src/inference/pipelines/text/execution-plan.js +314 -0
- package/src/inference/pipelines/text/execution-v0.d.ts +66 -0
- package/src/inference/pipelines/text/execution-v0.js +1139 -0
- package/src/inference/pipelines/text/ffn/dense.d.ts +40 -0
- package/src/inference/pipelines/text/ffn/dense.js +759 -0
- package/src/inference/pipelines/text/ffn/index.d.ts +23 -0
- package/src/inference/pipelines/text/ffn/index.js +16 -0
- package/src/inference/pipelines/text/ffn/moe.d.ts +21 -0
- package/src/inference/pipelines/text/ffn/moe.js +49 -0
- package/src/inference/pipelines/text/ffn/sandwich.d.ts +25 -0
- package/src/inference/pipelines/text/ffn/sandwich.js +196 -0
- package/src/inference/pipelines/text/ffn/standard.d.ts +23 -0
- package/src/inference/pipelines/text/ffn/standard.js +84 -0
- package/src/inference/pipelines/text/ffn/types.d.ts +30 -0
- package/src/inference/pipelines/text/ffn/types.js +25 -0
- package/src/inference/pipelines/text/ffn.d.ts +31 -0
- package/src/inference/pipelines/text/ffn.js +18 -0
- package/src/inference/pipelines/text/finiteness-guard-status.d.ts +11 -0
- package/src/inference/pipelines/text/finiteness-guard-status.js +21 -0
- package/src/inference/pipelines/text/finiteness-policy.d.ts +35 -0
- package/src/inference/pipelines/text/finiteness-policy.js +45 -0
- package/src/inference/pipelines/text/generator-helpers.d.ts +34 -0
- package/src/inference/pipelines/text/generator-helpers.js +175 -0
- package/src/inference/pipelines/text/generator-runtime.d.ts +93 -0
- package/src/inference/pipelines/text/generator-runtime.js +373 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +75 -0
- package/src/inference/pipelines/text/generator-steps.js +1078 -0
- package/src/inference/pipelines/text/generator.d.ts +41 -0
- package/src/inference/pipelines/text/generator.js +1345 -0
- package/src/inference/pipelines/text/index.d.ts +5 -0
- package/src/inference/pipelines/text/index.js +6 -0
- package/src/inference/pipelines/text/init.d.ts +295 -0
- package/src/inference/pipelines/text/init.js +965 -0
- package/src/inference/pipelines/text/kernel-path-auto-select.d.ts +12 -0
- package/src/inference/pipelines/text/kernel-path-auto-select.js +90 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +150 -0
- package/src/inference/pipelines/text/kernel-trace.js +324 -0
- package/src/inference/pipelines/text/layer-plan.d.ts +65 -0
- package/src/inference/pipelines/text/layer-plan.js +249 -0
- package/src/inference/pipelines/text/layer.d.ts +56 -0
- package/src/inference/pipelines/text/layer.js +916 -0
- package/src/inference/pipelines/text/linear-attention.d.ts +94 -0
- package/src/inference/pipelines/text/linear-attention.js +803 -0
- package/src/inference/pipelines/text/logits/cpu.d.ts +81 -0
- package/src/inference/pipelines/text/logits/cpu.js +91 -0
- package/src/inference/pipelines/text/logits/gpu.d.ts +113 -0
- package/src/inference/pipelines/text/logits/gpu.js +406 -0
- package/src/inference/pipelines/text/logits/index.d.ts +57 -0
- package/src/inference/pipelines/text/logits/index.js +305 -0
- package/src/inference/pipelines/text/logits/types.d.ts +46 -0
- package/src/inference/pipelines/text/logits/types.js +4 -0
- package/src/inference/pipelines/text/logits/utils.d.ts +49 -0
- package/src/inference/pipelines/text/logits/utils.js +59 -0
- package/src/inference/pipelines/text/logits.d.ts +27 -0
- package/src/inference/pipelines/text/logits.js +16 -0
- package/src/inference/pipelines/text/lora-apply.d.ts +28 -0
- package/src/inference/pipelines/text/lora-apply.js +58 -0
- package/src/inference/pipelines/text/lora-types.d.ts +39 -0
- package/src/inference/pipelines/text/lora-types.js +18 -0
- package/src/inference/pipelines/text/lora.d.ts +18 -0
- package/src/inference/pipelines/text/lora.js +12 -0
- package/src/inference/pipelines/text/model-load.d.ts +58 -0
- package/src/inference/pipelines/text/model-load.js +561 -0
- package/src/inference/pipelines/text/moe-cache.d.ts +32 -0
- package/src/inference/pipelines/text/moe-cache.js +107 -0
- package/src/inference/pipelines/text/moe-cpu-gptoss.d.ts +9 -0
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +110 -0
- package/src/inference/pipelines/text/moe-cpu.d.ts +13 -0
- package/src/inference/pipelines/text/moe-cpu.js +116 -0
- package/src/inference/pipelines/text/moe-gpu.d.ts +13 -0
- package/src/inference/pipelines/text/moe-gpu.js +611 -0
- package/src/inference/pipelines/text/moe-helpers.d.ts +12 -0
- package/src/inference/pipelines/text/moe-helpers.js +21 -0
- package/src/inference/pipelines/text/moe-impl.d.ts +117 -0
- package/src/inference/pipelines/text/moe-impl.js +9 -0
- package/src/inference/pipelines/text/moe-shape-validator.d.ts +31 -0
- package/src/inference/pipelines/text/moe-shape-validator.js +78 -0
- package/src/inference/pipelines/text/ops.d.ts +167 -0
- package/src/inference/pipelines/text/ops.js +367 -0
- package/src/inference/pipelines/text/probes.d.ts +31 -0
- package/src/inference/pipelines/text/probes.js +170 -0
- package/src/inference/pipelines/text/sampling.d.ts +54 -0
- package/src/inference/pipelines/text/sampling.js +203 -0
- package/src/inference/pipelines/text/state.d.ts +112 -0
- package/src/inference/pipelines/text/state.js +152 -0
- package/src/inference/pipelines/text/types.d.ts +627 -0
- package/src/inference/pipelines/text/types.js +4 -0
- package/src/inference/pipelines/text/weights.d.ts +110 -0
- package/src/inference/pipelines/text/weights.js +163 -0
- package/src/inference/pipelines/text.d.ts +157 -0
- package/src/inference/pipelines/text.js +586 -0
- package/src/inference/speculative.d.ts +239 -0
- package/src/inference/speculative.js +416 -0
- package/src/inference/test-harness.d.ts +178 -0
- package/src/inference/test-harness.js +349 -0
- package/src/inference/tokenizer.d.ts +77 -0
- package/src/inference/tokenizer.js +258 -0
- package/src/inference/tokenizers/base.d.ts +39 -0
- package/src/inference/tokenizers/base.js +69 -0
- package/src/inference/tokenizers/bpe.d.ts +27 -0
- package/src/inference/tokenizers/bpe.js +171 -0
- package/src/inference/tokenizers/bundled.d.ts +63 -0
- package/src/inference/tokenizers/bundled.js +866 -0
- package/src/inference/tokenizers/sentencepiece.d.ts +28 -0
- package/src/inference/tokenizers/sentencepiece.js +389 -0
- package/src/inference/tokenizers/types.d.ts +166 -0
- package/src/inference/tokenizers/types.js +7 -0
- package/src/loader/doppler-loader.d.ts +134 -0
- package/src/loader/doppler-loader.js +1036 -0
- package/src/loader/dtype-utils.d.ts +40 -0
- package/src/loader/dtype-utils.js +102 -0
- package/src/loader/embedding-loader.d.ts +56 -0
- package/src/loader/embedding-loader.js +207 -0
- package/src/loader/experts/expert-cache.d.ts +156 -0
- package/src/loader/experts/expert-cache.js +375 -0
- package/src/loader/experts/expert-loader.d.ts +108 -0
- package/src/loader/experts/expert-loader.js +384 -0
- package/src/loader/final-weights-loader.d.ts +68 -0
- package/src/loader/final-weights-loader.js +262 -0
- package/src/loader/index.d.ts +150 -0
- package/src/loader/index.js +124 -0
- package/src/loader/layer-loader.d.ts +63 -0
- package/src/loader/layer-loader.js +417 -0
- package/src/loader/loader-state.d.ts +51 -0
- package/src/loader/loader-state.js +142 -0
- package/src/loader/loader-types.d.ts +236 -0
- package/src/loader/loader-types.js +4 -0
- package/src/loader/manifest-config.d.ts +97 -0
- package/src/loader/manifest-config.js +132 -0
- package/src/loader/memory-monitor.d.ts +112 -0
- package/src/loader/memory-monitor.js +276 -0
- package/src/loader/multi-model-loader.d.ts +37 -0
- package/src/loader/multi-model-loader.js +87 -0
- package/src/loader/quantization-constants.d.ts +23 -0
- package/src/loader/quantization-constants.js +14 -0
- package/src/loader/shard-cache.d.ts +60 -0
- package/src/loader/shard-cache.js +568 -0
- package/src/loader/shard-resolver.d.ts +12 -0
- package/src/loader/shard-resolver.js +83 -0
- package/src/loader/tensors/tensor-loader.d.ts +154 -0
- package/src/loader/tensors/tensor-loader.js +427 -0
- package/src/loader/tensors/tensor-reader.d.ts +22 -0
- package/src/loader/tensors/tensor-reader.js +56 -0
- package/src/loader/tensors/tensor-role.d.ts +7 -0
- package/src/loader/tensors/tensor-role.js +12 -0
- package/src/loader/weight-downcast.d.ts +62 -0
- package/src/loader/weight-downcast.js +213 -0
- package/src/loader/weights.d.ts +22 -0
- package/src/loader/weights.js +4 -0
- package/src/memory/address-table.d.ts +104 -0
- package/src/memory/address-table.js +114 -0
- package/src/memory/buffer-pool.d.ts +196 -0
- package/src/memory/buffer-pool.js +756 -0
- package/src/memory/capability.d.ts +49 -0
- package/src/memory/capability.js +95 -0
- package/src/memory/heap-manager.d.ts +104 -0
- package/src/memory/heap-manager.js +264 -0
- package/src/memory/unified-detect.d.ts +59 -0
- package/src/memory/unified-detect.js +192 -0
- package/src/rules/converter/execution.rules.json +20 -0
- package/src/rules/converter/tensor-roles.rules.json +13 -0
- package/src/rules/converter/tokenizer.rules.json +7 -0
- package/src/rules/inference/attention.rules.json +54 -0
- package/src/rules/inference/config.rules.json +58 -0
- package/src/rules/inference/dtype.rules.json +94 -0
- package/src/rules/inference/execution.rules.json +45 -0
- package/src/rules/inference/ffn.rules.json +35 -0
- package/src/rules/inference/kernel-path.rules.json +76 -0
- package/src/rules/inference/layer-pattern.rules.json +16 -0
- package/src/rules/inference/layer.rules.json +7 -0
- package/src/rules/inference/moe.rules.json +48 -0
- package/src/rules/kernels/attention.rules.json +61 -0
- package/src/rules/kernels/conv2d.rules.json +6 -0
- package/src/rules/kernels/dequant.rules.json +58 -0
- package/src/rules/kernels/energy.rules.json +22 -0
- package/src/rules/kernels/fused-ffn.rules.json +13 -0
- package/src/rules/kernels/fused-matmul-residual.rules.json +6 -0
- package/src/rules/kernels/fused-matmul-rmsnorm.rules.json +8 -0
- package/src/rules/kernels/gather.rules.json +12 -0
- package/src/rules/kernels/gelu.rules.json +11 -0
- package/src/rules/kernels/groupnorm.rules.json +10 -0
- package/src/rules/kernels/kernel-validator.d.ts +24 -0
- package/src/rules/kernels/kernel-validator.js +160 -0
- package/src/rules/kernels/kv_quantize.rules.json +7 -0
- package/src/rules/kernels/layernorm.rules.json +6 -0
- package/src/rules/kernels/matmul.rules.json +60 -0
- package/src/rules/kernels/modulate.rules.json +6 -0
- package/src/rules/kernels/moe.rules.gptoss.json +105 -0
- package/src/rules/kernels/moe.rules.json +11 -0
- package/src/rules/kernels/pixel_shuffle.rules.json +6 -0
- package/src/rules/kernels/residual.rules.json +12 -0
- package/src/rules/kernels/rmsnorm.rules.json +11 -0
- package/src/rules/kernels/rope.rules.json +6 -0
- package/src/rules/kernels/sample.rules.json +6 -0
- package/src/rules/kernels/scale.rules.json +6 -0
- package/src/rules/kernels/silu.rules.json +21 -0
- package/src/rules/kernels/softmax.rules.json +23 -0
- package/src/rules/kernels/split-qkv.rules.json +6 -0
- package/src/rules/kernels/upsample2d.rules.json +6 -0
- package/src/rules/loader/tensor-loader.rules.json +15 -0
- package/src/rules/loader/weights.rules.json +41 -0
- package/src/rules/rule-registry.d.ts +48 -0
- package/src/rules/rule-registry.js +177 -0
- package/src/rules/tooling/command-runtime.rules.json +38 -0
- package/src/storage/backends/idb-store.d.ts +52 -0
- package/src/storage/backends/idb-store.js +590 -0
- package/src/storage/backends/memory-store.d.ts +36 -0
- package/src/storage/backends/memory-store.js +242 -0
- package/src/storage/backends/opfs-store.d.ts +41 -0
- package/src/storage/backends/opfs-store.js +429 -0
- package/src/storage/blake3.d.ts +17 -0
- package/src/storage/blake3.js +269 -0
- package/src/storage/download-types.d.ts +157 -0
- package/src/storage/download-types.js +48 -0
- package/src/storage/downloader.d.ts +103 -0
- package/src/storage/downloader.js +839 -0
- package/src/storage/emulated-vram.d.ts +264 -0
- package/src/storage/emulated-vram.js +576 -0
- package/src/storage/export.d.ts +20 -0
- package/src/storage/export.js +159 -0
- package/src/storage/index.d.ts +253 -0
- package/src/storage/index.js +185 -0
- package/src/storage/inventory.d.ts +26 -0
- package/src/storage/inventory.js +218 -0
- package/src/storage/preflight.d.ts +144 -0
- package/src/storage/preflight.js +294 -0
- package/src/storage/quickstart-downloader.d.ts +154 -0
- package/src/storage/quickstart-downloader.js +265 -0
- package/src/storage/quota.d.ts +150 -0
- package/src/storage/quota.js +304 -0
- package/src/storage/registry.d.ts +28 -0
- package/src/storage/registry.js +125 -0
- package/src/storage/reports.d.ts +20 -0
- package/src/storage/reports.js +94 -0
- package/src/storage/shard-manager.d.ts +137 -0
- package/src/storage/shard-manager.js +801 -0
- package/src/sw.d.ts +1 -0
- package/src/sw.js +187 -0
- package/src/tooling/browser-command-runner.d.ts +28 -0
- package/src/tooling/browser-command-runner.js +82 -0
- package/src/tooling/command-api.d.ts +147 -0
- package/src/tooling/command-api.js +523 -0
- package/src/tooling/command-envelope.d.ts +81 -0
- package/src/tooling/command-envelope.js +195 -0
- package/src/tooling/command-runner-shared.d.ts +73 -0
- package/src/tooling/command-runner-shared.js +146 -0
- package/src/tooling/command-runner.html +45 -0
- package/src/tooling/node-browser-command-runner.d.ts +30 -0
- package/src/tooling/node-browser-command-runner.js +868 -0
- package/src/tooling/node-command-runner.d.ts +36 -0
- package/src/tooling/node-command-runner.js +127 -0
- package/src/tooling/node-convert-worker-pool.d.ts +16 -0
- package/src/tooling/node-convert-worker-pool.js +186 -0
- package/src/tooling/node-convert-worker.d.ts +1 -0
- package/src/tooling/node-convert-worker.js +60 -0
- package/src/tooling/node-convert.d.ts +44 -0
- package/src/tooling/node-converter.d.ts +1 -0
- package/src/tooling/node-converter.js +1227 -0
- package/src/tooling/node-file-fetch.d.ts +1 -0
- package/src/tooling/node-file-fetch.js +38 -0
- package/src/tooling/node-source-runtime.d.ts +19 -0
- package/src/tooling/node-source-runtime.js +469 -0
- package/src/tooling/node-webgpu.d.ts +6 -0
- package/src/tooling/node-webgpu.js +321 -0
- package/src/tooling/opfs-cache.d.ts +11 -0
- package/src/tooling/opfs-cache.js +174 -0
- package/src/tooling/source-runtime-bundle.d.ts +102 -0
- package/src/tooling/source-runtime-bundle.js +484 -0
- package/src/tooling-exports.browser.d.ts +7 -0
- package/src/tooling-exports.browser.js +2 -0
- package/src/tooling-exports.d.ts +22 -0
- package/src/tooling-exports.js +7 -0
- package/src/tooling-exports.shared.d.ts +105 -0
- package/src/tooling-exports.shared.js +92 -0
- package/src/training/README.md +153 -0
- package/src/training/artifacts.d.ts +160 -0
- package/src/training/artifacts.js +896 -0
- package/src/training/attention-backward.d.ts +30 -0
- package/src/training/attention-backward.js +217 -0
- package/src/training/attention-forward.d.ts +22 -0
- package/src/training/attention-forward.js +82 -0
- package/src/training/autograd.d.ts +51 -0
- package/src/training/autograd.js +380 -0
- package/src/training/checkpoint.d.ts +31 -0
- package/src/training/checkpoint.js +238 -0
- package/src/training/clip.d.ts +9 -0
- package/src/training/clip.js +54 -0
- package/src/training/dataloader.d.ts +8 -0
- package/src/training/dataloader.js +44 -0
- package/src/training/datasets/index.d.ts +12 -0
- package/src/training/datasets/index.js +6 -0
- package/src/training/datasets/jsonl.d.ts +11 -0
- package/src/training/datasets/jsonl.js +50 -0
- package/src/training/datasets/reploid.d.ts +3 -0
- package/src/training/datasets/reploid.js +36 -0
- package/src/training/datasets/text-pairs.d.ts +21 -0
- package/src/training/datasets/text-pairs.js +42 -0
- package/src/training/datasets/token-batch.d.ts +21 -0
- package/src/training/datasets/token-batch.js +40 -0
- package/src/training/datasets/translation-pairs.d.ts +34 -0
- package/src/training/datasets/translation-pairs.js +49 -0
- package/src/training/export.d.ts +32 -0
- package/src/training/export.js +112 -0
- package/src/training/index.d.ts +52 -0
- package/src/training/index.js +41 -0
- package/src/training/lora.d.ts +19 -0
- package/src/training/lora.js +57 -0
- package/src/training/loss-scaling.d.ts +21 -0
- package/src/training/loss-scaling.js +80 -0
- package/src/training/loss.d.ts +10 -0
- package/src/training/loss.js +41 -0
- package/src/training/objectives/base.d.ts +58 -0
- package/src/training/objectives/base.js +38 -0
- package/src/training/objectives/cross_entropy.d.ts +18 -0
- package/src/training/objectives/cross_entropy.js +37 -0
- package/src/training/objectives/distill_kd.d.ts +16 -0
- package/src/training/objectives/distill_kd.js +369 -0
- package/src/training/objectives/distill_triplet.d.ts +16 -0
- package/src/training/objectives/distill_triplet.js +412 -0
- package/src/training/objectives/index.d.ts +12 -0
- package/src/training/objectives/index.js +6 -0
- package/src/training/objectives/ul_stage1_joint.d.ts +16 -0
- package/src/training/objectives/ul_stage1_joint.js +188 -0
- package/src/training/objectives/ul_stage2_base.d.ts +16 -0
- package/src/training/objectives/ul_stage2_base.js +222 -0
- package/src/training/optimizer.d.ts +22 -0
- package/src/training/optimizer.js +115 -0
- package/src/training/runner.d.ts +196 -0
- package/src/training/runner.js +1194 -0
- package/src/training/suite.d.ts +187 -0
- package/src/training/suite.js +3156 -0
- package/src/training/trainer.d.ts +89 -0
- package/src/training/trainer.js +301 -0
- package/src/training/ul_dataset.d.ts +47 -0
- package/src/training/ul_dataset.js +153 -0
- package/src/training/ul_schedule.d.ts +6 -0
- package/src/training/ul_schedule.js +29 -0
- package/src/types/chrome.d.ts +36 -0
- package/src/types/chrome.js +1 -0
- package/src/types/gpu.d.ts +185 -0
- package/src/types/gpu.js +5 -0
- package/src/types/index.d.ts +3 -0
- package/src/types/index.js +3 -0
- package/src/types/inference.d.ts +197 -0
- package/src/types/inference.js +5 -0
- package/src/types/model.d.ts +125 -0
- package/src/types/model.js +5 -0
- package/src/utils/index.d.ts +7 -0
- package/src/utils/index.js +7 -0
- package/src/utils/load-json.d.ts +5 -0
- package/src/utils/load-json.js +23 -0
- package/src/utils/plain-object.d.ts +1 -0
- package/src/utils/plain-object.js +3 -0
- package/src/utils/sha256.d.ts +4 -0
- package/src/utils/sha256.js +135 -0
- package/tools/convert-safetensors-node.js +180 -0
- package/tools/doppler-cli.js +1170 -0
|
@@ -0,0 +1,2665 @@
|
|
|
1
|
+
|
|
2
|
+
import { initializeInference, parseRuntimeOverridesFromURL } from './test-harness.js';
|
|
3
|
+
import { saveReport } from '../storage/reports.js';
|
|
4
|
+
import { getRuntimeConfig, setRuntimeConfig } from '../config/runtime.js';
|
|
5
|
+
import { initDevice, getKernelCapabilities, getDevice } from '../gpu/device.js';
|
|
6
|
+
import { createPipeline } from './pipelines/text.js';
|
|
7
|
+
import { parseModelConfigFromManifest } from './pipelines/text/config.js';
|
|
8
|
+
import { resolveKernelPathState, activateKernelPathState } from './pipelines/text/model-load.js';
|
|
9
|
+
import { openModelStore, loadManifestFromStore } from '../storage/shard-manager.js';
|
|
10
|
+
import { parseManifest } from '../formats/rdrr/index.js';
|
|
11
|
+
import { computeSampleStats } from '../debug/stats.js';
|
|
12
|
+
import {
|
|
13
|
+
setActiveKernelPath,
|
|
14
|
+
getActiveKernelPath,
|
|
15
|
+
getActiveKernelPathSource,
|
|
16
|
+
getActiveKernelPathPolicy,
|
|
17
|
+
} from '../config/kernel-path-loader.js';
|
|
18
|
+
import { selectRuleValue } from '../rules/rule-registry.js';
|
|
19
|
+
import { mergeRuntimeValues } from '../config/runtime-merge.js';
|
|
20
|
+
import { isPlainObject } from '../utils/plain-object.js';
|
|
21
|
+
import { validateTrainingMetricsReport } from '../config/schema/training-metrics.schema.js';
|
|
22
|
+
|
|
23
|
+
const TRAINING_SUITE_MODULE_PATH = '../training/suite.js';
|
|
24
|
+
const NODE_SOURCE_RUNTIME_MODULE_PATH = '../tooling/node-source-runtime.js';
|
|
25
|
+
let trainingSuiteModulePromise = null;
|
|
26
|
+
|
|
27
|
+
async function loadTrainingSuiteModule() {
|
|
28
|
+
if (!trainingSuiteModulePromise) {
|
|
29
|
+
trainingSuiteModulePromise = import(TRAINING_SUITE_MODULE_PATH);
|
|
30
|
+
}
|
|
31
|
+
return trainingSuiteModulePromise;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export async function runTrainingSuite(options = {}) {
|
|
35
|
+
const module = await loadTrainingSuiteModule();
|
|
36
|
+
return module.runTrainingSuite(options);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function runTrainingBenchSuite(options = {}) {
|
|
40
|
+
const module = await loadTrainingSuiteModule();
|
|
41
|
+
return module.runTrainingBenchSuite(options);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function parseReportTimestamp(rawTimestamp, label = 'timestamp') {
|
|
45
|
+
if (rawTimestamp == null) {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (rawTimestamp instanceof Date) {
|
|
50
|
+
const timestamp = rawTimestamp.getTime();
|
|
51
|
+
if (!Number.isFinite(timestamp)) {
|
|
52
|
+
throw new Error(`Invalid ${label}: not a valid Date.`);
|
|
53
|
+
}
|
|
54
|
+
return rawTimestamp.toISOString();
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (typeof rawTimestamp === 'number') {
|
|
58
|
+
if (!Number.isFinite(rawTimestamp)) {
|
|
59
|
+
throw new Error(`Invalid ${label}: must be a finite epoch timestamp.`);
|
|
60
|
+
}
|
|
61
|
+
return new Date(rawTimestamp).toISOString();
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (typeof rawTimestamp === 'string') {
|
|
65
|
+
const trimmed = rawTimestamp.trim();
|
|
66
|
+
if (trimmed.length === 0) {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
const numericCandidate = Number(trimmed);
|
|
70
|
+
if (Number.isFinite(numericCandidate)) {
|
|
71
|
+
return new Date(numericCandidate).toISOString();
|
|
72
|
+
}
|
|
73
|
+
const parsed = new Date(trimmed);
|
|
74
|
+
if (Number.isNaN(parsed.getTime())) {
|
|
75
|
+
throw new Error(`Invalid ${label}: expected ISO-8601 string or epoch milliseconds.`);
|
|
76
|
+
}
|
|
77
|
+
return parsed.toISOString();
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
throw new Error(`Invalid ${label}: expected Date, ISO-8601 string, epoch milliseconds, or nullish.`);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function resolveReportTimestamp(rawTimestamp, label, fallbackTimestamp = null) {
|
|
84
|
+
const parsed = parseReportTimestamp(rawTimestamp, label);
|
|
85
|
+
return parsed ?? (fallbackTimestamp == null ? new Date().toISOString() : String(fallbackTimestamp));
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function resolveRuntime(options) {
|
|
89
|
+
if (options.runtime) return options.runtime;
|
|
90
|
+
if (options.searchParams) return parseRuntimeOverridesFromURL(options.searchParams);
|
|
91
|
+
if (typeof globalThis.location === 'undefined') return parseRuntimeOverridesFromURL(new URLSearchParams());
|
|
92
|
+
return parseRuntimeOverridesFromURL();
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function normalizePresetPath(value) {
|
|
96
|
+
const trimmed = String(value || '').replace(/^[./]+/, '');
|
|
97
|
+
if (!trimmed) return null;
|
|
98
|
+
return trimmed.endsWith('.json') ? trimmed : `${trimmed}.json`;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function resolvePresetBaseUrl() {
|
|
102
|
+
try {
|
|
103
|
+
return new URL('../config/presets/runtime/', import.meta.url).toString().replace(/\/$/, '');
|
|
104
|
+
} catch {
|
|
105
|
+
if (typeof globalThis.location !== 'undefined' && globalThis.location?.href) {
|
|
106
|
+
return new URL('/src/config/presets/runtime/', globalThis.location.href).toString().replace(/\/$/, '');
|
|
107
|
+
}
|
|
108
|
+
return '/src/config/presets/runtime';
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function cloneRuntimeConfig(runtimeConfig) {
|
|
113
|
+
if (!runtimeConfig) return null;
|
|
114
|
+
if (typeof structuredClone === 'function') {
|
|
115
|
+
return structuredClone(runtimeConfig);
|
|
116
|
+
}
|
|
117
|
+
return JSON.parse(JSON.stringify(runtimeConfig));
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function snapshotRuntimeState() {
|
|
121
|
+
return {
|
|
122
|
+
runtimeConfig: cloneRuntimeConfig(getRuntimeConfig()),
|
|
123
|
+
activeKernelPath: getActiveKernelPath(),
|
|
124
|
+
activeKernelPathSource: getActiveKernelPathSource(),
|
|
125
|
+
activeKernelPathPolicy: getActiveKernelPathPolicy(),
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function restoreRuntimeState(snapshot) {
|
|
130
|
+
if (!snapshot) {
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
setRuntimeConfig(snapshot.runtimeConfig);
|
|
134
|
+
setActiveKernelPath(
|
|
135
|
+
snapshot.activeKernelPath,
|
|
136
|
+
snapshot.activeKernelPathSource || 'none',
|
|
137
|
+
snapshot.activeKernelPathPolicy ?? null
|
|
138
|
+
);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async function runWithRuntimeIsolationForSuite(run) {
|
|
142
|
+
const snapshot = snapshotRuntimeState();
|
|
143
|
+
try {
|
|
144
|
+
return await run();
|
|
145
|
+
} finally {
|
|
146
|
+
restoreRuntimeState(snapshot);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function resolveRuntimeFromConfig(config) {
|
|
151
|
+
if (!config || typeof config !== 'object') return null;
|
|
152
|
+
if (config.runtime && typeof config.runtime === 'object') return config.runtime;
|
|
153
|
+
if (config.shared || config.loading || config.inference || config.emulation) return config;
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function sanitizeReportOutput(output) {
|
|
158
|
+
if (output == null) return null;
|
|
159
|
+
if (typeof output !== 'object') return output;
|
|
160
|
+
if (ArrayBuffer.isView(output)) {
|
|
161
|
+
return {
|
|
162
|
+
type: output.constructor?.name || 'TypedArray',
|
|
163
|
+
length: Number.isFinite(output.length) ? output.length : null,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
if (
|
|
167
|
+
Number.isFinite(output?.width)
|
|
168
|
+
&& Number.isFinite(output?.height)
|
|
169
|
+
&& ArrayBuffer.isView(output?.pixels)
|
|
170
|
+
) {
|
|
171
|
+
const { pixels, ...rest } = output;
|
|
172
|
+
return {
|
|
173
|
+
...rest,
|
|
174
|
+
width: output.width,
|
|
175
|
+
height: output.height,
|
|
176
|
+
pixels: {
|
|
177
|
+
type: pixels.constructor?.name || 'TypedArray',
|
|
178
|
+
length: Number.isFinite(pixels.length) ? pixels.length : null,
|
|
179
|
+
},
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
return output;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function normalizeExtends(value) {
|
|
186
|
+
if (Array.isArray(value)) {
|
|
187
|
+
return value.map((entry) => String(entry || '').trim()).filter(Boolean);
|
|
188
|
+
}
|
|
189
|
+
if (typeof value === 'string') {
|
|
190
|
+
const trimmed = value.trim();
|
|
191
|
+
return trimmed ? [trimmed] : [];
|
|
192
|
+
}
|
|
193
|
+
return [];
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function normalizeExtendsPath(value) {
|
|
197
|
+
const trimmed = String(value || '').trim();
|
|
198
|
+
if (!trimmed) return null;
|
|
199
|
+
return trimmed.endsWith('.json') ? trimmed : `${trimmed}.json`;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function resolveAbsoluteUrl(target, base) {
|
|
203
|
+
try {
|
|
204
|
+
if (base) {
|
|
205
|
+
return new URL(target, base).toString();
|
|
206
|
+
}
|
|
207
|
+
if (typeof globalThis.location !== 'undefined' && globalThis.location?.href) {
|
|
208
|
+
return new URL(target, globalThis.location.href).toString();
|
|
209
|
+
}
|
|
210
|
+
return new URL(target, import.meta.url).toString();
|
|
211
|
+
} catch {
|
|
212
|
+
return target;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function isAbsoluteUrl(value) {
|
|
217
|
+
return /^[a-zA-Z][a-zA-Z0-9+.-]*:/.test(value);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function joinUrl(base, path) {
|
|
221
|
+
if (!base) return path;
|
|
222
|
+
if (isAbsoluteUrl(base)) {
|
|
223
|
+
return new URL(path, base.endsWith('/') ? base : `${base}/`).toString();
|
|
224
|
+
}
|
|
225
|
+
const normalizedBase = base.replace(/\/$/, '');
|
|
226
|
+
const normalizedPath = path.replace(/^\//, '');
|
|
227
|
+
return `${normalizedBase}/${normalizedPath}`;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function resolveExtendCandidates(ref, context) {
|
|
231
|
+
const normalized = normalizeExtendsPath(ref);
|
|
232
|
+
if (!normalized) return [];
|
|
233
|
+
if (isAbsoluteUrl(normalized) || normalized.startsWith('/')) {
|
|
234
|
+
return [normalized];
|
|
235
|
+
}
|
|
236
|
+
if (normalized.startsWith('./') || normalized.startsWith('../')) {
|
|
237
|
+
return [resolveAbsoluteUrl(normalized, context.sourceUrl)];
|
|
238
|
+
}
|
|
239
|
+
if (normalized.includes('/')) {
|
|
240
|
+
return [joinUrl(context.presetBaseUrl, normalized)];
|
|
241
|
+
}
|
|
242
|
+
const candidates = [];
|
|
243
|
+
if (context.presetBaseUrl) {
|
|
244
|
+
candidates.push(joinUrl(context.presetBaseUrl, normalized));
|
|
245
|
+
candidates.push(joinUrl(context.presetBaseUrl, `modes/${normalized}`));
|
|
246
|
+
}
|
|
247
|
+
if (context.sourceUrl) {
|
|
248
|
+
const sourceDir = resolveAbsoluteUrl('./', context.sourceUrl);
|
|
249
|
+
candidates.push(resolveAbsoluteUrl(normalized, sourceDir));
|
|
250
|
+
}
|
|
251
|
+
return [...new Set(candidates)];
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
async function fetchRuntimeConfig(url, options = {}) {
|
|
255
|
+
const response = await fetch(url, { signal: options.signal });
|
|
256
|
+
if (!response.ok) {
|
|
257
|
+
const error = new Error(`Failed to load runtime config: ${response.status}`);
|
|
258
|
+
error.code = response.status === 404 ? 'runtime_config_not_found' : 'runtime_config_fetch_failed';
|
|
259
|
+
throw error;
|
|
260
|
+
}
|
|
261
|
+
return response.json();
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
async function resolveRuntimeConfigExtends(config, context) {
|
|
265
|
+
const runtime = resolveRuntimeFromConfig(config);
|
|
266
|
+
if (!runtime) {
|
|
267
|
+
throw new Error('Runtime config is missing runtime fields');
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const extendsRefs = normalizeExtends(config.extends);
|
|
271
|
+
let mergedRuntime = null;
|
|
272
|
+
let mergedConfig = null;
|
|
273
|
+
|
|
274
|
+
for (const ref of extendsRefs) {
|
|
275
|
+
const base = await loadRuntimeConfigFromRef(ref, context);
|
|
276
|
+
mergedRuntime = mergedRuntime ? mergeRuntimeValues(mergedRuntime, base.runtime) : base.runtime;
|
|
277
|
+
mergedConfig = mergedConfig ? mergeRuntimeValues(mergedConfig, base.config) : base.config;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const combinedRuntime = mergedRuntime ? mergeRuntimeValues(mergedRuntime, runtime) : runtime;
|
|
281
|
+
const combinedConfig = mergedConfig ? mergeRuntimeValues(mergedConfig, config) : { ...config };
|
|
282
|
+
const resolved = { ...combinedConfig, runtime: combinedRuntime };
|
|
283
|
+
if (resolved.extends !== undefined) {
|
|
284
|
+
delete resolved.extends;
|
|
285
|
+
}
|
|
286
|
+
return { config: resolved, runtime: combinedRuntime };
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
async function loadRuntimeConfigChain(url, options = {}, stack = []) {
|
|
290
|
+
const presetBaseUrl = options.presetBaseUrl || options.baseUrl || resolvePresetBaseUrl();
|
|
291
|
+
const resolvedUrl = resolveAbsoluteUrl(url);
|
|
292
|
+
if (stack.includes(resolvedUrl)) {
|
|
293
|
+
throw new Error(`Runtime config extends cycle: ${[...stack, resolvedUrl].join(' -> ')}`);
|
|
294
|
+
}
|
|
295
|
+
const config = await fetchRuntimeConfig(resolvedUrl, options);
|
|
296
|
+
return resolveRuntimeConfigExtends(config, {
|
|
297
|
+
...options,
|
|
298
|
+
sourceUrl: resolvedUrl,
|
|
299
|
+
presetBaseUrl,
|
|
300
|
+
stack: [...stack, resolvedUrl],
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
async function loadRuntimeConfigFromRef(ref, context) {
|
|
305
|
+
const candidates = resolveExtendCandidates(ref, context);
|
|
306
|
+
if (!candidates.length) {
|
|
307
|
+
throw new Error(`Runtime config extends is invalid: ${ref}`);
|
|
308
|
+
}
|
|
309
|
+
let lastError = null;
|
|
310
|
+
for (const candidate of candidates) {
|
|
311
|
+
try {
|
|
312
|
+
return await loadRuntimeConfigChain(candidate, context, context.stack ?? []);
|
|
313
|
+
} catch (error) {
|
|
314
|
+
if (error?.code === 'runtime_config_not_found') {
|
|
315
|
+
lastError = error;
|
|
316
|
+
continue;
|
|
317
|
+
}
|
|
318
|
+
throw error;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
if (lastError) {
|
|
322
|
+
throw lastError;
|
|
323
|
+
}
|
|
324
|
+
throw new Error(`Runtime config extends not found: ${ref}`);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
export async function loadRuntimeConfigFromUrl(url, options = {}) {
|
|
328
|
+
if (!url) {
|
|
329
|
+
throw new Error('runtime config url is required');
|
|
330
|
+
}
|
|
331
|
+
return loadRuntimeConfigChain(url, options);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
export async function applyRuntimeConfigFromUrl(url, options = {}) {
|
|
335
|
+
const { runtime } = await loadRuntimeConfigFromUrl(url, options);
|
|
336
|
+
setRuntimeConfig(runtime);
|
|
337
|
+
return runtime;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
export async function loadRuntimePreset(presetId, options = {}) {
|
|
341
|
+
const baseUrl = options.baseUrl || resolvePresetBaseUrl();
|
|
342
|
+
const normalized = normalizePresetPath(presetId);
|
|
343
|
+
if (!normalized) {
|
|
344
|
+
throw new Error('runtime preset id is required');
|
|
345
|
+
}
|
|
346
|
+
const url = `${baseUrl.replace(/\/$/, '')}/${normalized}`;
|
|
347
|
+
return loadRuntimeConfigFromUrl(url, { ...options, presetBaseUrl: baseUrl });
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
export async function applyRuntimePreset(presetId, options = {}) {
|
|
351
|
+
const { runtime } = await loadRuntimePreset(presetId, options);
|
|
352
|
+
setRuntimeConfig(runtime);
|
|
353
|
+
return runtime;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
export async function initializeBrowserHarness(options = {}) {
|
|
357
|
+
const { modelUrl, onProgress, log } = options;
|
|
358
|
+
if (!modelUrl) {
|
|
359
|
+
throw new Error('modelUrl is required');
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
const runtime = resolveRuntime(options);
|
|
363
|
+
const result = await initializeInference(modelUrl, {
|
|
364
|
+
runtime,
|
|
365
|
+
onProgress,
|
|
366
|
+
log,
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
return { ...result, runtime };
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
export async function saveBrowserReport(modelId, report, options = {}) {
|
|
373
|
+
return saveReport(modelId, report, options);
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
export async function runBrowserHarness(options = {}) {
|
|
377
|
+
const harness = await initializeBrowserHarness(options);
|
|
378
|
+
const reportTimestamp = resolveReportTimestamp(options.timestamp, 'runBrowserHarness timestamp');
|
|
379
|
+
const modelId = options.modelId || harness.manifest?.modelId || 'unknown';
|
|
380
|
+
|
|
381
|
+
let report = options.report || null;
|
|
382
|
+
if (!report && typeof options.buildReport === 'function') {
|
|
383
|
+
report = await options.buildReport(harness);
|
|
384
|
+
}
|
|
385
|
+
if (!report) {
|
|
386
|
+
report = {
|
|
387
|
+
modelId,
|
|
388
|
+
timestamp: reportTimestamp,
|
|
389
|
+
};
|
|
390
|
+
} else if (!report.timestamp) {
|
|
391
|
+
report = { ...report, timestamp: reportTimestamp };
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
const reportInfo = await saveReport(modelId, report, { timestamp: report.timestamp || reportTimestamp });
|
|
395
|
+
return { ...harness, report, reportInfo };
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
const BROWSER_SUITE_SET = Object.freeze([
|
|
399
|
+
'kernels',
|
|
400
|
+
'inference',
|
|
401
|
+
'training',
|
|
402
|
+
'bench',
|
|
403
|
+
'debug',
|
|
404
|
+
'diffusion',
|
|
405
|
+
'energy',
|
|
406
|
+
]);
|
|
407
|
+
|
|
408
|
+
const BROWSER_SUITE_DISPATCH_MAP = Object.freeze({
|
|
409
|
+
kernels: 'runKernelSuite',
|
|
410
|
+
inference: 'runInferenceSuite',
|
|
411
|
+
training: 'runTrainingSuite',
|
|
412
|
+
bench: 'runBenchSuite',
|
|
413
|
+
debug: 'runInferenceSuite(debug)',
|
|
414
|
+
diffusion: 'runDiffusionSuite',
|
|
415
|
+
energy: 'runEnergySuite',
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
export function getBrowserSupportedSuites() {
|
|
419
|
+
return [...BROWSER_SUITE_SET];
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
export function getBrowserSuiteDispatchMap() {
|
|
423
|
+
return { ...BROWSER_SUITE_DISPATCH_MAP };
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
function createUnsupportedSuiteError(requestedSuite, context = {}) {
|
|
427
|
+
const command = typeof context.command === 'string' && context.command.trim()
|
|
428
|
+
? context.command.trim()
|
|
429
|
+
: 'run-browser-suite';
|
|
430
|
+
const surface = typeof context.surface === 'string' && context.surface.trim()
|
|
431
|
+
? context.surface.trim()
|
|
432
|
+
: 'browser';
|
|
433
|
+
const allowedSuites = [...BROWSER_SUITE_SET];
|
|
434
|
+
const error = new Error(
|
|
435
|
+
`Unsupported suite "${requestedSuite}". Allowed suites: ${allowedSuites.join(', ')}. ` +
|
|
436
|
+
`command="${command}" surface="${surface}".`
|
|
437
|
+
);
|
|
438
|
+
error.code = 'unsupported_suite';
|
|
439
|
+
error.requestedSuite = requestedSuite;
|
|
440
|
+
error.allowedSuites = allowedSuites;
|
|
441
|
+
error.command = command;
|
|
442
|
+
error.surface = surface;
|
|
443
|
+
error.details = {
|
|
444
|
+
requestedSuite,
|
|
445
|
+
allowedSuites,
|
|
446
|
+
command,
|
|
447
|
+
surface,
|
|
448
|
+
};
|
|
449
|
+
return error;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
function resolveSuiteContext(options = {}) {
|
|
453
|
+
const command = typeof options.command === 'string' ? options.command : null;
|
|
454
|
+
const surface = typeof options.surface === 'string' ? options.surface : null;
|
|
455
|
+
return {
|
|
456
|
+
command: command ?? 'run-browser-suite',
|
|
457
|
+
surface: surface ?? 'browser',
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
function normalizeSuite(value, context = {}) {
|
|
462
|
+
const suite = String(value || '').trim().toLowerCase();
|
|
463
|
+
if (!suite) {
|
|
464
|
+
throw createUnsupportedSuiteError(suite, context);
|
|
465
|
+
}
|
|
466
|
+
const normalized = suite === 'benchmark' ? 'bench' : suite;
|
|
467
|
+
if (!BROWSER_SUITE_SET.includes(normalized)) {
|
|
468
|
+
throw createUnsupportedSuiteError(normalized, context);
|
|
469
|
+
}
|
|
470
|
+
return normalized;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
export function buildSuiteSummary(suiteName, results, startTimeMs) {
|
|
474
|
+
let passed = 0;
|
|
475
|
+
let failed = 0;
|
|
476
|
+
let skipped = 0;
|
|
477
|
+
const safeResults = Array.isArray(results) ? results : [];
|
|
478
|
+
for (const result of safeResults) {
|
|
479
|
+
if (result.skipped) {
|
|
480
|
+
skipped++;
|
|
481
|
+
} else if (result.passed) {
|
|
482
|
+
passed++;
|
|
483
|
+
} else {
|
|
484
|
+
failed++;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
const duration = Math.max(0, performance.now() - (Number.isFinite(startTimeMs) ? startTimeMs : performance.now()));
|
|
488
|
+
return { suite: suiteName, passed, failed, skipped, duration, results: safeResults };
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function normalizeCacheMode(value) {
|
|
492
|
+
return value === 'cold' || value === 'warm' ? value : 'warm';
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
function normalizeLoadMode(value, hasModelUrl) {
|
|
496
|
+
if (value === 'opfs' || value === 'http' || value === 'memory') {
|
|
497
|
+
return value;
|
|
498
|
+
}
|
|
499
|
+
return hasModelUrl ? 'http' : 'opfs';
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
function isNodeRuntime() {
|
|
503
|
+
return typeof process !== 'undefined' && !!process.versions?.node;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
function normalizeWorkloadType(value) {
|
|
507
|
+
const normalized = String(value || '').trim().toLowerCase();
|
|
508
|
+
return normalized || null;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function safeStatsValue(value) {
|
|
512
|
+
return Number.isFinite(value) ? Number(value) : 0;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
function calculateRatePerSecond(count, durationMs) {
|
|
516
|
+
const safeCount = safeStatsValue(count);
|
|
517
|
+
const safeDurationMs = safeStatsValue(durationMs);
|
|
518
|
+
if (safeCount <= 0 || safeDurationMs <= 0) return 0;
|
|
519
|
+
return Number(((safeCount * 1000) / safeDurationMs).toFixed(2));
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
function buildDiffusionPerformanceArtifact({
|
|
523
|
+
warmupRuns,
|
|
524
|
+
timedRuns,
|
|
525
|
+
width,
|
|
526
|
+
height,
|
|
527
|
+
steps,
|
|
528
|
+
guidanceScale,
|
|
529
|
+
avgPrefillTokens,
|
|
530
|
+
avgDecodeTokens,
|
|
531
|
+
cpuStats,
|
|
532
|
+
gpuStats,
|
|
533
|
+
}) {
|
|
534
|
+
const cpuPrefillMs = safeStatsValue(cpuStats?.prefillMs?.median);
|
|
535
|
+
const cpuDenoiseMs = safeStatsValue(cpuStats?.denoiseMs?.median);
|
|
536
|
+
const cpuVaeMs = safeStatsValue(cpuStats?.vaeMs?.median);
|
|
537
|
+
const cpuTotalMs = safeStatsValue(cpuStats?.totalMs?.median);
|
|
538
|
+
const gpuPrefillMs = safeStatsValue(gpuStats?.prefillMs?.median);
|
|
539
|
+
const gpuDenoiseMs = safeStatsValue(gpuStats?.denoiseMs?.median);
|
|
540
|
+
const gpuVaeMs = safeStatsValue(gpuStats?.vaeMs?.median);
|
|
541
|
+
const gpuTotalMs = safeStatsValue(gpuStats?.totalMs?.median);
|
|
542
|
+
const decodeStepsPerSec = calculateRatePerSecond(steps, cpuDenoiseMs);
|
|
543
|
+
const decodeTokensPerSec = calculateRatePerSecond(avgDecodeTokens, cpuDenoiseMs);
|
|
544
|
+
const prefillTokensPerSec = calculateRatePerSecond(avgPrefillTokens, cpuPrefillMs);
|
|
545
|
+
|
|
546
|
+
return {
|
|
547
|
+
schemaVersion: 1,
|
|
548
|
+
warmupRuns,
|
|
549
|
+
timedRuns,
|
|
550
|
+
shape: {
|
|
551
|
+
width,
|
|
552
|
+
height,
|
|
553
|
+
},
|
|
554
|
+
scheduler: {
|
|
555
|
+
steps,
|
|
556
|
+
guidanceScale,
|
|
557
|
+
},
|
|
558
|
+
cpu: {
|
|
559
|
+
totalMs: cpuTotalMs,
|
|
560
|
+
prefillMs: cpuPrefillMs,
|
|
561
|
+
denoiseMs: cpuDenoiseMs,
|
|
562
|
+
vaeMs: cpuVaeMs,
|
|
563
|
+
},
|
|
564
|
+
gpu: {
|
|
565
|
+
available: gpuStats?.available === true,
|
|
566
|
+
totalMs: gpuStats?.available === true ? gpuTotalMs : null,
|
|
567
|
+
prefillMs: gpuStats?.available === true ? gpuPrefillMs : null,
|
|
568
|
+
denoiseMs: gpuStats?.available === true ? gpuDenoiseMs : null,
|
|
569
|
+
vaeMs: gpuStats?.available === true ? gpuVaeMs : null,
|
|
570
|
+
},
|
|
571
|
+
throughput: {
|
|
572
|
+
prefillTokensPerSec,
|
|
573
|
+
decodeTokensPerSec,
|
|
574
|
+
decodeStepsPerSec,
|
|
575
|
+
},
|
|
576
|
+
tokens: {
|
|
577
|
+
avgPrefillTokens: safeStatsValue(avgPrefillTokens),
|
|
578
|
+
avgDecodeTokens: safeStatsValue(avgDecodeTokens),
|
|
579
|
+
},
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
function assertDiffusionPerformanceArtifact(metrics, contextLabel = 'diffusion') {
|
|
584
|
+
const artifact = metrics?.performanceArtifact;
|
|
585
|
+
if (!artifact || typeof artifact !== 'object') {
|
|
586
|
+
throw new Error(`${contextLabel}: metrics.performanceArtifact is required.`);
|
|
587
|
+
}
|
|
588
|
+
if (artifact.schemaVersion !== 1) {
|
|
589
|
+
throw new Error(`${contextLabel}: metrics.performanceArtifact.schemaVersion must be 1.`);
|
|
590
|
+
}
|
|
591
|
+
if (!Number.isInteger(artifact.warmupRuns) || artifact.warmupRuns < 0) {
|
|
592
|
+
throw new Error(`${contextLabel}: metrics.performanceArtifact.warmupRuns must be a non-negative integer.`);
|
|
593
|
+
}
|
|
594
|
+
if (!Number.isInteger(artifact.timedRuns) || artifact.timedRuns < 1) {
|
|
595
|
+
throw new Error(`${contextLabel}: metrics.performanceArtifact.timedRuns must be a positive integer.`);
|
|
596
|
+
}
|
|
597
|
+
if (!Number.isFinite(artifact?.cpu?.prefillMs)) {
|
|
598
|
+
throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.prefillMs must be finite.`);
|
|
599
|
+
}
|
|
600
|
+
if (!Number.isFinite(artifact?.cpu?.denoiseMs)) {
|
|
601
|
+
throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.denoiseMs must be finite.`);
|
|
602
|
+
}
|
|
603
|
+
if (!Number.isFinite(artifact?.cpu?.vaeMs)) {
|
|
604
|
+
throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.vaeMs must be finite.`);
|
|
605
|
+
}
|
|
606
|
+
if (!Number.isFinite(artifact?.cpu?.totalMs)) {
|
|
607
|
+
throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.totalMs must be finite.`);
|
|
608
|
+
}
|
|
609
|
+
if (!Number.isFinite(artifact?.throughput?.decodeStepsPerSec)) {
|
|
610
|
+
throw new Error(`${contextLabel}: metrics.performanceArtifact.throughput.decodeStepsPerSec must be finite.`);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
function toTimingNumber(value, fallback = 0) {
|
|
615
|
+
return formatMetricNumber(value, fallback, 2);
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
function safeToFixed(value, fallback = 0, digits = 2) {
|
|
619
|
+
return formatMetricNumber(value, fallback, digits);
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
function sampleTimingNumber(stats, key, fallback = 0) {
|
|
623
|
+
return formatMetricNumber(stats?.[key], fallback, 2);
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
function formatMetricNumber(value, fallback = 0, digits = 2) {
|
|
627
|
+
const numericValue = Number(value);
|
|
628
|
+
if (!Number.isFinite(numericValue)) return fallback;
|
|
629
|
+
return Number(numericValue.toFixed(digits));
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
function buildCanonicalTiming(overrides = {}) {
|
|
633
|
+
const cacheMode = normalizeCacheMode(overrides.cacheMode);
|
|
634
|
+
const modelLoadMs = toTimingNumber(overrides.modelLoadMs, 0);
|
|
635
|
+
const prefillMs = toTimingNumber(overrides.prefillMs, 0);
|
|
636
|
+
const decodeMs = toTimingNumber(overrides.decodeMs, 0);
|
|
637
|
+
const decodeMsPerTokenP50 = Number.isFinite(overrides.decodeMsPerTokenP50)
|
|
638
|
+
? toTimingNumber(overrides.decodeMsPerTokenP50)
|
|
639
|
+
: null;
|
|
640
|
+
const decodeMsPerTokenP95 = Number.isFinite(overrides.decodeMsPerTokenP95)
|
|
641
|
+
? toTimingNumber(overrides.decodeMsPerTokenP95)
|
|
642
|
+
: null;
|
|
643
|
+
const decodeMsPerTokenP99 = Number.isFinite(overrides.decodeMsPerTokenP99)
|
|
644
|
+
? toTimingNumber(overrides.decodeMsPerTokenP99)
|
|
645
|
+
: null;
|
|
646
|
+
const decodeTokensPerSec = Number.isFinite(overrides.decodeTokensPerSec)
|
|
647
|
+
? toTimingNumber(overrides.decodeTokensPerSec)
|
|
648
|
+
: null;
|
|
649
|
+
const prefillTokensPerSec = Number.isFinite(overrides.prefillTokensPerSec)
|
|
650
|
+
? toTimingNumber(overrides.prefillTokensPerSec)
|
|
651
|
+
: null;
|
|
652
|
+
const totalRunMs = toTimingNumber(
|
|
653
|
+
overrides.totalRunMs,
|
|
654
|
+
toTimingNumber(prefillMs + decodeMs)
|
|
655
|
+
);
|
|
656
|
+
const firstTokenMs = Number.isFinite(overrides.firstTokenMs)
|
|
657
|
+
? toTimingNumber(overrides.firstTokenMs)
|
|
658
|
+
: null;
|
|
659
|
+
const firstResponseMs = Number.isFinite(overrides.firstResponseMs)
|
|
660
|
+
? toTimingNumber(overrides.firstResponseMs)
|
|
661
|
+
: toTimingNumber(modelLoadMs + totalRunMs);
|
|
662
|
+
|
|
663
|
+
return {
|
|
664
|
+
modelLoadMs,
|
|
665
|
+
firstTokenMs,
|
|
666
|
+
firstResponseMs,
|
|
667
|
+
prefillMs,
|
|
668
|
+
decodeMs,
|
|
669
|
+
decodeMsPerTokenP50,
|
|
670
|
+
decodeMsPerTokenP95,
|
|
671
|
+
decodeMsPerTokenP99,
|
|
672
|
+
decodeTokensPerSec,
|
|
673
|
+
prefillTokensPerSec,
|
|
674
|
+
totalRunMs,
|
|
675
|
+
cacheMode,
|
|
676
|
+
loadMode: overrides.loadMode,
|
|
677
|
+
};
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
function buildTimingDiagnostics(timing = {}, options = {}) {
|
|
681
|
+
const prefillSemantics = String(options.prefillSemantics || 'internal_prefill_phase');
|
|
682
|
+
const source = String(options.source || 'doppler');
|
|
683
|
+
const modelLoadMs = Number.isFinite(timing.modelLoadMs) ? toTimingNumber(timing.modelLoadMs) : null;
|
|
684
|
+
const firstTokenMs = Number.isFinite(timing.firstTokenMs) ? toTimingNumber(timing.firstTokenMs) : null;
|
|
685
|
+
const firstResponseMs = Number.isFinite(timing.firstResponseMs) ? toTimingNumber(timing.firstResponseMs) : null;
|
|
686
|
+
const prefillMs = Number.isFinite(timing.prefillMs) ? toTimingNumber(timing.prefillMs) : null;
|
|
687
|
+
const decodeMs = Number.isFinite(timing.decodeMs) ? toTimingNumber(timing.decodeMs) : null;
|
|
688
|
+
const totalRunMs = Number.isFinite(timing.totalRunMs) ? toTimingNumber(timing.totalRunMs) : null;
|
|
689
|
+
|
|
690
|
+
const firstResponseFromLoadAndFirstTokenMs = (
|
|
691
|
+
Number.isFinite(modelLoadMs) && Number.isFinite(firstTokenMs)
|
|
692
|
+
)
|
|
693
|
+
? toTimingNumber(modelLoadMs + firstTokenMs)
|
|
694
|
+
: null;
|
|
695
|
+
const runFromPrefillAndDecodeMs = (
|
|
696
|
+
Number.isFinite(prefillMs) && Number.isFinite(decodeMs)
|
|
697
|
+
)
|
|
698
|
+
? toTimingNumber(prefillMs + decodeMs)
|
|
699
|
+
: null;
|
|
700
|
+
|
|
701
|
+
const firstResponseResidualMs = (
|
|
702
|
+
Number.isFinite(firstResponseMs) && Number.isFinite(firstResponseFromLoadAndFirstTokenMs)
|
|
703
|
+
)
|
|
704
|
+
? toTimingNumber(firstResponseMs - firstResponseFromLoadAndFirstTokenMs)
|
|
705
|
+
: null;
|
|
706
|
+
const runResidualMs = (
|
|
707
|
+
Number.isFinite(totalRunMs) && Number.isFinite(runFromPrefillAndDecodeMs)
|
|
708
|
+
)
|
|
709
|
+
? toTimingNumber(totalRunMs - runFromPrefillAndDecodeMs)
|
|
710
|
+
: null;
|
|
711
|
+
|
|
712
|
+
return {
|
|
713
|
+
schemaVersion: 1,
|
|
714
|
+
source,
|
|
715
|
+
semantics: {
|
|
716
|
+
modelLoadMs: 'model initialization/load before generation',
|
|
717
|
+
firstTokenMs: 'ttft from generation start',
|
|
718
|
+
firstResponseMs: 'modelLoadMs + firstTokenMs',
|
|
719
|
+
prefillMs: prefillSemantics,
|
|
720
|
+
decodeMs: 'time after first token',
|
|
721
|
+
totalRunMs: 'prefillMs + decodeMs',
|
|
722
|
+
},
|
|
723
|
+
componentsMs: {
|
|
724
|
+
modelLoadMs,
|
|
725
|
+
firstTokenMs,
|
|
726
|
+
firstResponseMs,
|
|
727
|
+
prefillMs,
|
|
728
|
+
decodeMs,
|
|
729
|
+
totalRunMs,
|
|
730
|
+
},
|
|
731
|
+
sumsMs: {
|
|
732
|
+
firstResponseFromLoadAndFirstTokenMs,
|
|
733
|
+
runFromPrefillAndDecodeMs,
|
|
734
|
+
},
|
|
735
|
+
residualsMs: {
|
|
736
|
+
firstResponseResidualMs,
|
|
737
|
+
runResidualMs,
|
|
738
|
+
},
|
|
739
|
+
consistent: {
|
|
740
|
+
firstResponse: Number.isFinite(firstResponseResidualMs) ? Math.abs(firstResponseResidualMs) <= 2 : null,
|
|
741
|
+
totalRun: Number.isFinite(runResidualMs) ? Math.abs(runResidualMs) <= 2 : null,
|
|
742
|
+
},
|
|
743
|
+
};
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
function resolveDeviceInfo() {
|
|
747
|
+
try {
|
|
748
|
+
return getKernelCapabilities();
|
|
749
|
+
} catch {
|
|
750
|
+
return null;
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
async function resolveKernelPathForModel(options = {}) {
|
|
755
|
+
const runtimeConfig = options.runtime?.runtimeConfig ?? getRuntimeConfig();
|
|
756
|
+
let manifest = null;
|
|
757
|
+
let manifestModelId = options.modelId || null;
|
|
758
|
+
|
|
759
|
+
if (options.modelId) {
|
|
760
|
+
await openModelStore(options.modelId);
|
|
761
|
+
const manifestText = await loadManifestFromStore();
|
|
762
|
+
if (manifestText) {
|
|
763
|
+
manifest = parseManifest(manifestText);
|
|
764
|
+
manifestModelId = manifest.modelId ?? options.modelId;
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
if (!manifest) return null;
|
|
769
|
+
|
|
770
|
+
const modelConfig = parseModelConfigFromManifest(manifest, runtimeConfig);
|
|
771
|
+
const kernelPathState = resolveKernelPathState({
|
|
772
|
+
manifest,
|
|
773
|
+
runtimeConfig,
|
|
774
|
+
modelConfig,
|
|
775
|
+
});
|
|
776
|
+
activateKernelPathState(kernelPathState);
|
|
777
|
+
return {
|
|
778
|
+
modelId: manifestModelId,
|
|
779
|
+
kernelPath: kernelPathState.resolvedKernelPath,
|
|
780
|
+
source: kernelPathState.kernelPathSource,
|
|
781
|
+
};
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
async function initializeInferenceFromStorage(modelId, options = {}) {
|
|
785
|
+
const { onProgress } = options;
|
|
786
|
+
if (!modelId) {
|
|
787
|
+
throw new Error('modelId is required');
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
if (options.runtime?.runtimeConfig) {
|
|
791
|
+
setRuntimeConfig(options.runtime.runtimeConfig);
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
onProgress?.('storage', 0.05, 'Opening model store...');
|
|
795
|
+
await openModelStore(modelId);
|
|
796
|
+
|
|
797
|
+
onProgress?.('manifest', 0.1, 'Loading manifest...');
|
|
798
|
+
const manifestText = await loadManifestFromStore();
|
|
799
|
+
if (!manifestText) {
|
|
800
|
+
throw new Error('Manifest not found in storage');
|
|
801
|
+
}
|
|
802
|
+
const manifest = parseManifest(manifestText);
|
|
803
|
+
|
|
804
|
+
onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
|
|
805
|
+
await initDevice();
|
|
806
|
+
const device = getDevice();
|
|
807
|
+
const capabilities = getKernelCapabilities();
|
|
808
|
+
|
|
809
|
+
onProgress?.('pipeline', 0.3, 'Creating pipeline...');
|
|
810
|
+
const pipeline = await createPipeline(manifest, {
|
|
811
|
+
gpu: { device },
|
|
812
|
+
runtime: options.runtime,
|
|
813
|
+
onProgress,
|
|
814
|
+
});
|
|
815
|
+
|
|
816
|
+
return { pipeline, manifest, capabilities };
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
async function initializeInferenceFromSourcePath(sourcePath, options = {}) {
|
|
820
|
+
const { onProgress } = options;
|
|
821
|
+
if (!sourcePath || typeof sourcePath !== 'string') {
|
|
822
|
+
throw new Error('modelUrl is required for loadMode=memory.');
|
|
823
|
+
}
|
|
824
|
+
if (!isNodeRuntime()) {
|
|
825
|
+
throw new Error('loadMode=memory source runtime is currently supported on Node only.');
|
|
826
|
+
}
|
|
827
|
+
if (/^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//.test(sourcePath)) {
|
|
828
|
+
throw new Error(
|
|
829
|
+
'loadMode=memory expects a local filesystem path (Safetensors directory or .gguf file), not an URL.'
|
|
830
|
+
);
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
if (options.runtime?.runtimeConfig) {
|
|
834
|
+
setRuntimeConfig(options.runtime.runtimeConfig);
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
onProgress?.('source', 0.05, 'Preparing source runtime bundle...');
|
|
838
|
+
const { resolveNodeSourceRuntimeBundle } = await import(NODE_SOURCE_RUNTIME_MODULE_PATH);
|
|
839
|
+
const sourceBundle = await resolveNodeSourceRuntimeBundle({
|
|
840
|
+
inputPath: sourcePath,
|
|
841
|
+
modelId: options.modelId || null,
|
|
842
|
+
});
|
|
843
|
+
if (!sourceBundle) {
|
|
844
|
+
throw new Error(
|
|
845
|
+
`No source-runtime model detected at "${sourcePath}". ` +
|
|
846
|
+
'Expected a Safetensors directory or a .gguf file path.'
|
|
847
|
+
);
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
|
|
851
|
+
await initDevice();
|
|
852
|
+
const device = getDevice();
|
|
853
|
+
const capabilities = getKernelCapabilities();
|
|
854
|
+
|
|
855
|
+
onProgress?.('pipeline', 0.3, 'Creating pipeline...');
|
|
856
|
+
const pipeline = await createPipeline(sourceBundle.manifest, {
|
|
857
|
+
gpu: { device },
|
|
858
|
+
runtime: options.runtime,
|
|
859
|
+
storage: sourceBundle.storageContext,
|
|
860
|
+
onProgress,
|
|
861
|
+
});
|
|
862
|
+
|
|
863
|
+
return {
|
|
864
|
+
pipeline,
|
|
865
|
+
manifest: sourceBundle.manifest,
|
|
866
|
+
capabilities,
|
|
867
|
+
};
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
async function resolveHarnessOverride(options = {}) {
|
|
871
|
+
const input = typeof options.harnessOverride === 'function'
|
|
872
|
+
? await options.harnessOverride(options)
|
|
873
|
+
: options.harnessOverride;
|
|
874
|
+
|
|
875
|
+
if (!input || typeof input !== 'object') {
|
|
876
|
+
throw new Error('harnessOverride must resolve to an object.');
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
if (!input.pipeline || typeof input.pipeline.generate !== 'function') {
|
|
880
|
+
throw new Error('harnessOverride.pipeline.generate(request) is required.');
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
const manifest = input.manifest && typeof input.manifest === 'object'
|
|
884
|
+
? input.manifest
|
|
885
|
+
: {
|
|
886
|
+
modelId: options.modelId || 'diffusion-harness-override',
|
|
887
|
+
modelType: 'diffusion',
|
|
888
|
+
};
|
|
889
|
+
|
|
890
|
+
const modelLoadMs = Number.isFinite(input.modelLoadMs)
|
|
891
|
+
? Math.max(0, input.modelLoadMs)
|
|
892
|
+
: 0;
|
|
893
|
+
|
|
894
|
+
return {
|
|
895
|
+
...input,
|
|
896
|
+
manifest,
|
|
897
|
+
modelLoadMs,
|
|
898
|
+
};
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
async function initializeSuiteModel(options = {}) {
|
|
902
|
+
if (options.harnessOverride) {
|
|
903
|
+
return resolveHarnessOverride(options);
|
|
904
|
+
}
|
|
905
|
+
const loadStart = performance.now();
|
|
906
|
+
const runtime = resolveRuntime(options);
|
|
907
|
+
const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
|
|
908
|
+
let harness;
|
|
909
|
+
if (loadMode === 'memory') {
|
|
910
|
+
if (!options.modelUrl) {
|
|
911
|
+
throw new Error('loadMode=memory requires modelUrl to be a local model path.');
|
|
912
|
+
}
|
|
913
|
+
harness = await initializeInferenceFromSourcePath(options.modelUrl, { ...options, runtime });
|
|
914
|
+
} else if (options.modelId && !options.modelUrl) {
|
|
915
|
+
harness = await initializeInferenceFromStorage(options.modelId, { ...options, runtime });
|
|
916
|
+
} else {
|
|
917
|
+
if (!options.modelUrl) {
|
|
918
|
+
throw new Error('modelUrl is required for this suite');
|
|
919
|
+
}
|
|
920
|
+
harness = await initializeInference(options.modelUrl, {
|
|
921
|
+
runtime,
|
|
922
|
+
onProgress: options.onProgress,
|
|
923
|
+
log: options.log,
|
|
924
|
+
});
|
|
925
|
+
}
|
|
926
|
+
const modelLoadMs = Math.max(0, performance.now() - loadStart);
|
|
927
|
+
return { ...harness, modelLoadMs };
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
async function runKernelSuite(options = {}) {
|
|
931
|
+
const startTime = performance.now();
|
|
932
|
+
const { testHarness, initGPU } = await import('../../tests/kernels/browser/test-page.js');
|
|
933
|
+
const { runKernelSuite: runAllKernelTests } = await import('../../tests/kernels/browser/kernel-suite.js');
|
|
934
|
+
await initGPU();
|
|
935
|
+
|
|
936
|
+
const previousKernelPath = getActiveKernelPath();
|
|
937
|
+
const previousKernelSource = getActiveKernelPathSource();
|
|
938
|
+
const previousKernelPathPolicy = getActiveKernelPathPolicy();
|
|
939
|
+
if (options.modelId) {
|
|
940
|
+
await resolveKernelPathForModel(options);
|
|
941
|
+
}
|
|
942
|
+
let results = [];
|
|
943
|
+
try {
|
|
944
|
+
results = await runAllKernelTests(testHarness);
|
|
945
|
+
} finally {
|
|
946
|
+
setActiveKernelPath(previousKernelPath, previousKernelSource, previousKernelPathPolicy);
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
const summary = buildSuiteSummary('kernels', results, startTime);
|
|
950
|
+
return {
|
|
951
|
+
...summary,
|
|
952
|
+
deviceInfo: resolveDeviceInfo(),
|
|
953
|
+
};
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
|
|
960
|
+
const DEFAULT_HARNESS_PROMPT = 'Summarize this input in one sentence.';
|
|
961
|
+
const DEFAULT_HARNESS_MAX_TOKENS = 32;
|
|
962
|
+
const EMBEDDING_PREVIEW_LENGTH = 16;
|
|
963
|
+
const EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1 = 0.67;
|
|
964
|
+
const EMBEDDING_SEMANTIC_MIN_PAIR_ACC = 0.67;
|
|
965
|
+
const EMBEDDING_SEMANTIC_PAIR_MARGIN = 0.01;
|
|
966
|
+
|
|
967
|
+
const EMBEDDING_SEMANTIC_RETRIEVAL_CASES = Object.freeze([
|
|
968
|
+
Object.freeze({
|
|
969
|
+
id: 'library_search',
|
|
970
|
+
query: 'Where can I borrow books and study quietly?',
|
|
971
|
+
docs: Object.freeze([
|
|
972
|
+
'The city library lends books, provides study rooms, and offers free Wi-Fi.',
|
|
973
|
+
'The cafe serves coffee, pastries, and sandwiches all day.',
|
|
974
|
+
'The bike repair shop fixes flat tires and broken chains.',
|
|
975
|
+
]),
|
|
976
|
+
expectedDoc: 0,
|
|
977
|
+
}),
|
|
978
|
+
Object.freeze({
|
|
979
|
+
id: 'password_reset',
|
|
980
|
+
query: 'How do I reset my account password?',
|
|
981
|
+
docs: Object.freeze([
|
|
982
|
+
'To reset your password, open account settings and choose the forgot-password flow.',
|
|
983
|
+
'Our shipping policy explains delivery timelines and tracking updates.',
|
|
984
|
+
'The recipe combines tomatoes, basil, and olive oil.',
|
|
985
|
+
]),
|
|
986
|
+
expectedDoc: 0,
|
|
987
|
+
}),
|
|
988
|
+
Object.freeze({
|
|
989
|
+
id: 'damaged_package',
|
|
990
|
+
query: 'What should I do if my package arrives damaged?',
|
|
991
|
+
docs: Object.freeze([
|
|
992
|
+
'Contact support within seven days with photos to request a replacement for damaged items.',
|
|
993
|
+
'The concert starts at 8 PM at the downtown arena.',
|
|
994
|
+
'Plant roses in spring and water them twice a week.',
|
|
995
|
+
]),
|
|
996
|
+
expectedDoc: 0,
|
|
997
|
+
}),
|
|
998
|
+
Object.freeze({
|
|
999
|
+
id: 'flight_change_policy',
|
|
1000
|
+
query: 'Can I change my flight after booking?',
|
|
1001
|
+
docs: Object.freeze([
|
|
1002
|
+
'The museum opens daily at 10 AM and offers guided tours on weekends.',
|
|
1003
|
+
'You can change your flight in Manage Booking up to 24 hours before departure, with any fare difference applied.',
|
|
1004
|
+
'Our gym membership includes group classes and access to the pool.',
|
|
1005
|
+
]),
|
|
1006
|
+
expectedDoc: 1,
|
|
1007
|
+
}),
|
|
1008
|
+
Object.freeze({
|
|
1009
|
+
id: 'wifi_troubleshoot',
|
|
1010
|
+
query: 'Why does my home Wi-Fi keep disconnecting?',
|
|
1011
|
+
docs: Object.freeze([
|
|
1012
|
+
'The dessert menu includes cheesecake, brownies, and fruit tart.',
|
|
1013
|
+
'You can review your recent orders in your account purchase history.',
|
|
1014
|
+
'Frequent Wi-Fi drops can be fixed by restarting the router, updating firmware, and changing the wireless channel.',
|
|
1015
|
+
]),
|
|
1016
|
+
expectedDoc: 2,
|
|
1017
|
+
}),
|
|
1018
|
+
Object.freeze({
|
|
1019
|
+
id: 'refund_deadline',
|
|
1020
|
+
query: 'How long do I have to request a refund?',
|
|
1021
|
+
docs: Object.freeze([
|
|
1022
|
+
'Refund requests are accepted within 30 days of purchase when the item is in original condition.',
|
|
1023
|
+
'The conference keynote starts at 9 AM in the main hall.',
|
|
1024
|
+
'Use a medium grind when brewing coffee with a drip machine.',
|
|
1025
|
+
]),
|
|
1026
|
+
expectedDoc: 0,
|
|
1027
|
+
}),
|
|
1028
|
+
Object.freeze({
|
|
1029
|
+
id: 'passport_renewal_docs',
|
|
1030
|
+
query: 'What documents do I need to renew a passport?',
|
|
1031
|
+
docs: Object.freeze([
|
|
1032
|
+
'To care for houseplants, water only when the top soil is dry.',
|
|
1033
|
+
'Passport renewal usually requires the application form, current passport, compliant photo, and payment.',
|
|
1034
|
+
'The train to downtown runs every 20 minutes during peak hours.',
|
|
1035
|
+
]),
|
|
1036
|
+
expectedDoc: 1,
|
|
1037
|
+
}),
|
|
1038
|
+
]);
|
|
1039
|
+
|
|
1040
|
+
const EMBEDDING_SEMANTIC_PAIR_CASES = Object.freeze([
|
|
1041
|
+
Object.freeze({
|
|
1042
|
+
id: 'bike_paraphrase',
|
|
1043
|
+
anchor: 'The child is riding a bicycle through the park.',
|
|
1044
|
+
positive: 'A kid bikes along a path in the park.',
|
|
1045
|
+
negative: 'The stock market closed lower after interest-rate news.',
|
|
1046
|
+
}),
|
|
1047
|
+
Object.freeze({
|
|
1048
|
+
id: 'cancel_subscription',
|
|
1049
|
+
anchor: 'Please cancel my subscription before renewal.',
|
|
1050
|
+
positive: 'I want to stop the plan so it does not renew.',
|
|
1051
|
+
negative: 'The mountain trail is closed after heavy snow.',
|
|
1052
|
+
}),
|
|
1053
|
+
Object.freeze({
|
|
1054
|
+
id: 'battery_drain',
|
|
1055
|
+
anchor: 'The laptop battery drains very quickly.',
|
|
1056
|
+
positive: 'My notebook loses charge fast.',
|
|
1057
|
+
negative: 'This pasta sauce tastes sweet and spicy.',
|
|
1058
|
+
}),
|
|
1059
|
+
Object.freeze({
|
|
1060
|
+
id: 'order_tracking',
|
|
1061
|
+
anchor: 'I need to track where my order is.',
|
|
1062
|
+
positive: 'How can I check my package delivery status?',
|
|
1063
|
+
negative: 'The violin concerto was composed in the 1800s.',
|
|
1064
|
+
}),
|
|
1065
|
+
Object.freeze({
|
|
1066
|
+
id: 'account_lockout',
|
|
1067
|
+
anchor: 'My account is locked after too many login attempts.',
|
|
1068
|
+
positive: 'I cannot sign in because the system temporarily blocked my account.',
|
|
1069
|
+
negative: 'Bake the cake at 350 degrees for thirty minutes.',
|
|
1070
|
+
}),
|
|
1071
|
+
Object.freeze({
|
|
1072
|
+
id: 'invoice_request',
|
|
1073
|
+
anchor: 'Please send me the invoice for last month.',
|
|
1074
|
+
positive: 'Can you provide the billing statement for the previous month?',
|
|
1075
|
+
negative: 'The hiking trail follows the river for five miles.',
|
|
1076
|
+
}),
|
|
1077
|
+
Object.freeze({
|
|
1078
|
+
id: 'slow_internet',
|
|
1079
|
+
anchor: 'The internet speed is much slower tonight.',
|
|
1080
|
+
positive: 'My connection is unusually slow this evening.',
|
|
1081
|
+
negative: 'The novel explores themes of memory and loss.',
|
|
1082
|
+
}),
|
|
1083
|
+
]);
|
|
1084
|
+
|
|
1085
|
+
function asText(value) {
|
|
1086
|
+
if (typeof value !== 'string') return null;
|
|
1087
|
+
const trimmed = value.trim();
|
|
1088
|
+
return trimmed || null;
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
function normalizeRetrievalFixtures(cases) {
|
|
1092
|
+
if (!Array.isArray(cases)) return null;
|
|
1093
|
+
const normalized = [];
|
|
1094
|
+
for (let i = 0; i < cases.length; i++) {
|
|
1095
|
+
const entry = cases[i];
|
|
1096
|
+
if (!entry || typeof entry !== 'object') continue;
|
|
1097
|
+
|
|
1098
|
+
const query = asText(entry.query);
|
|
1099
|
+
const docs = Array.isArray(entry.docs) ? entry.docs.map(asText).filter(Boolean) : [];
|
|
1100
|
+
if (!query || docs.length === 0 || !Number.isFinite(entry.expectedDoc)) {
|
|
1101
|
+
continue;
|
|
1102
|
+
}
|
|
1103
|
+
const expectedDoc = Math.floor(entry.expectedDoc);
|
|
1104
|
+
normalized.push({
|
|
1105
|
+
id: asText(entry.id) ?? `case-${i + 1}`,
|
|
1106
|
+
query,
|
|
1107
|
+
docs,
|
|
1108
|
+
expectedDoc: Math.max(0, Math.min(expectedDoc, docs.length - 1)),
|
|
1109
|
+
});
|
|
1110
|
+
}
|
|
1111
|
+
return normalized.length > 0 ? normalized : null;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
function normalizePairFixtures(cases) {
|
|
1115
|
+
if (!Array.isArray(cases)) return null;
|
|
1116
|
+
const normalized = [];
|
|
1117
|
+
for (let i = 0; i < cases.length; i++) {
|
|
1118
|
+
const entry = cases[i];
|
|
1119
|
+
if (!entry || typeof entry !== 'object') continue;
|
|
1120
|
+
|
|
1121
|
+
const anchor = asText(entry.anchor);
|
|
1122
|
+
const positive = asText(entry.positive);
|
|
1123
|
+
const negative = asText(entry.negative);
|
|
1124
|
+
if (!anchor || !positive || !negative) {
|
|
1125
|
+
continue;
|
|
1126
|
+
}
|
|
1127
|
+
normalized.push({
|
|
1128
|
+
id: asText(entry.id) ?? `pair-${i + 1}`,
|
|
1129
|
+
anchor,
|
|
1130
|
+
positive,
|
|
1131
|
+
negative,
|
|
1132
|
+
});
|
|
1133
|
+
}
|
|
1134
|
+
return normalized.length > 0 ? normalized : null;
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
function resolveEmbeddingSemanticFixtures(runtimeConfig, options = null) {
|
|
1138
|
+
const overrides = isPlainObject(options?.embeddingSemantic)
|
|
1139
|
+
? options.embeddingSemantic
|
|
1140
|
+
: null;
|
|
1141
|
+
const runtimeOverrides = runtimeConfig?.shared?.benchmark?.run?.embeddingSemantic;
|
|
1142
|
+
const source = overrides ?? (isPlainObject(runtimeOverrides) ? runtimeOverrides : null);
|
|
1143
|
+
|
|
1144
|
+
const retrievalCases = normalizeRetrievalFixtures(source?.retrievalCases)
|
|
1145
|
+
?? EMBEDDING_SEMANTIC_RETRIEVAL_CASES;
|
|
1146
|
+
const pairCases = normalizePairFixtures(source?.pairCases)
|
|
1147
|
+
?? EMBEDDING_SEMANTIC_PAIR_CASES;
|
|
1148
|
+
const minRetrievalTop1Acc = Number.isFinite(source?.minRetrievalTop1Acc)
|
|
1149
|
+
? Math.max(0, Math.min(1, Number(source.minRetrievalTop1Acc)))
|
|
1150
|
+
: EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1;
|
|
1151
|
+
const minPairAcc = Number.isFinite(source?.minPairAcc)
|
|
1152
|
+
? Math.max(0, Math.min(1, Number(source.minPairAcc)))
|
|
1153
|
+
: EMBEDDING_SEMANTIC_MIN_PAIR_ACC;
|
|
1154
|
+
const pairMargin = Number.isFinite(source?.pairMargin)
|
|
1155
|
+
? Number(source.pairMargin)
|
|
1156
|
+
: EMBEDDING_SEMANTIC_PAIR_MARGIN;
|
|
1157
|
+
|
|
1158
|
+
return {
|
|
1159
|
+
retrievalCases,
|
|
1160
|
+
pairCases,
|
|
1161
|
+
minRetrievalTop1Acc,
|
|
1162
|
+
minPairAcc,
|
|
1163
|
+
pairMargin,
|
|
1164
|
+
};
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
function resolveEmbeddingSemanticStyle(pipeline) {
|
|
1168
|
+
const manifest = pipeline?.manifest ?? null;
|
|
1169
|
+
const style = selectRuleValue('inference', 'config', 'embeddingSemanticStyle', {
|
|
1170
|
+
modelId: String(manifest?.modelId ?? '').toLowerCase(),
|
|
1171
|
+
presetId: String(manifest?.inference?.presetId ?? '').toLowerCase(),
|
|
1172
|
+
manifestModelType: String(
|
|
1173
|
+
manifest?.config?.model_type
|
|
1174
|
+
?? manifest?.config?.text_config?.model_type
|
|
1175
|
+
?? ''
|
|
1176
|
+
).toLowerCase(),
|
|
1177
|
+
});
|
|
1178
|
+
if (typeof style === 'string' && style.length > 0) {
|
|
1179
|
+
return style;
|
|
1180
|
+
}
|
|
1181
|
+
return 'default';
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
function formatEmbeddingSemanticText(text, kind, style) {
|
|
1185
|
+
if (style === 'embeddinggemma') {
|
|
1186
|
+
if (kind === 'query') {
|
|
1187
|
+
return `task: search result | query: ${text}`;
|
|
1188
|
+
}
|
|
1189
|
+
if (kind === 'document') {
|
|
1190
|
+
return `title: None | text: ${text}`;
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
return text;
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
function resolvePrompt(runtimeConfig) {
|
|
1197
|
+
const runtimePrompt = runtimeConfig?.inference?.prompt;
|
|
1198
|
+
if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
|
|
1199
|
+
return runtimePrompt.trim();
|
|
1200
|
+
}
|
|
1201
|
+
return DEFAULT_HARNESS_PROMPT;
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
function isStructuredPromptInput(value) {
|
|
1205
|
+
return Array.isArray(value) || (value != null && typeof value === 'object');
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
function resolveGenerationPromptInput(runtimeConfig, runOverrides = null) {
|
|
1209
|
+
const overridePrompt = runOverrides?.prompt;
|
|
1210
|
+
if (typeof overridePrompt === 'string' && overridePrompt.trim()) {
|
|
1211
|
+
return overridePrompt.trim();
|
|
1212
|
+
}
|
|
1213
|
+
if (isStructuredPromptInput(overridePrompt)) {
|
|
1214
|
+
return overridePrompt;
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1217
|
+
const runtimePrompt = runtimeConfig?.inference?.prompt;
|
|
1218
|
+
if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
|
|
1219
|
+
return runtimePrompt.trim();
|
|
1220
|
+
}
|
|
1221
|
+
if (isStructuredPromptInput(runtimePrompt)) {
|
|
1222
|
+
return runtimePrompt;
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1225
|
+
return DEFAULT_HARNESS_PROMPT;
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
function resolveMaxTokens(runtimeConfig) {
|
|
1229
|
+
const runtimeMax = runtimeConfig?.inference?.batching?.maxTokens;
|
|
1230
|
+
if (Number.isFinite(runtimeMax)) {
|
|
1231
|
+
return Math.max(1, Math.floor(runtimeMax));
|
|
1232
|
+
}
|
|
1233
|
+
return DEFAULT_HARNESS_MAX_TOKENS;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
function resolveBenchmarkRunSettings(runtimeConfig) {
|
|
1237
|
+
const benchConfig = runtimeConfig?.shared?.benchmark?.run || {};
|
|
1238
|
+
const runtimeSampling = isPlainObject(runtimeConfig?.inference?.sampling)
|
|
1239
|
+
? runtimeConfig.inference.sampling
|
|
1240
|
+
: {};
|
|
1241
|
+
const benchSampling = isPlainObject(benchConfig?.sampling)
|
|
1242
|
+
? benchConfig.sampling
|
|
1243
|
+
: {};
|
|
1244
|
+
const prompt = typeof benchConfig.customPrompt === 'string' && benchConfig.customPrompt.trim()
|
|
1245
|
+
? benchConfig.customPrompt.trim()
|
|
1246
|
+
: resolvePrompt(runtimeConfig);
|
|
1247
|
+
const maxTokens = Number.isFinite(benchConfig.maxNewTokens)
|
|
1248
|
+
? Math.max(1, Math.floor(benchConfig.maxNewTokens))
|
|
1249
|
+
: resolveMaxTokens(runtimeConfig);
|
|
1250
|
+
|
|
1251
|
+
return {
|
|
1252
|
+
warmupRuns: Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0)),
|
|
1253
|
+
timedRuns: Math.max(1, Math.floor(benchConfig.timedRuns ?? 1)),
|
|
1254
|
+
prompt,
|
|
1255
|
+
maxTokens,
|
|
1256
|
+
sampling: {
|
|
1257
|
+
...runtimeSampling,
|
|
1258
|
+
...benchSampling,
|
|
1259
|
+
},
|
|
1260
|
+
};
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
function summarizeEmbeddingValues(embedding) {
|
|
1264
|
+
const values = ArrayBuffer.isView(embedding) || Array.isArray(embedding) ? embedding : null;
|
|
1265
|
+
const embeddingDim = Number.isFinite(values?.length) ? values.length : 0;
|
|
1266
|
+
const preview = [];
|
|
1267
|
+
|
|
1268
|
+
let nonFiniteCount = 0;
|
|
1269
|
+
let finiteCount = 0;
|
|
1270
|
+
let min = Infinity;
|
|
1271
|
+
let max = -Infinity;
|
|
1272
|
+
let maxAbs = 0;
|
|
1273
|
+
let sum = 0;
|
|
1274
|
+
let sumSq = 0;
|
|
1275
|
+
|
|
1276
|
+
for (let i = 0; i < embeddingDim; i++) {
|
|
1277
|
+
const value = Number(values[i]);
|
|
1278
|
+
if (preview.length < EMBEDDING_PREVIEW_LENGTH) {
|
|
1279
|
+
preview.push(Number.isFinite(value) ? Number(value.toFixed(6)) : null);
|
|
1280
|
+
}
|
|
1281
|
+
if (!Number.isFinite(value)) {
|
|
1282
|
+
nonFiniteCount++;
|
|
1283
|
+
continue;
|
|
1284
|
+
}
|
|
1285
|
+
finiteCount++;
|
|
1286
|
+
if (value < min) min = value;
|
|
1287
|
+
if (value > max) max = value;
|
|
1288
|
+
const abs = Math.abs(value);
|
|
1289
|
+
if (abs > maxAbs) maxAbs = abs;
|
|
1290
|
+
sum += value;
|
|
1291
|
+
sumSq += value * value;
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
const mean = finiteCount > 0 ? (sum / finiteCount) : null;
|
|
1295
|
+
const variance = finiteCount > 0 ? Math.max(0, (sumSq / finiteCount) - ((mean || 0) * (mean || 0))) : null;
|
|
1296
|
+
const stdDev = variance == null ? null : Math.sqrt(variance);
|
|
1297
|
+
const l2Norm = finiteCount > 0 ? Math.sqrt(sumSq) : null;
|
|
1298
|
+
const finiteRatio = embeddingDim > 0 ? finiteCount / embeddingDim : 0;
|
|
1299
|
+
|
|
1300
|
+
return {
|
|
1301
|
+
embeddingDim,
|
|
1302
|
+
nonFiniteCount,
|
|
1303
|
+
finiteCount,
|
|
1304
|
+
finiteRatio,
|
|
1305
|
+
min: finiteCount > 0 ? min : null,
|
|
1306
|
+
max: finiteCount > 0 ? max : null,
|
|
1307
|
+
maxAbs: finiteCount > 0 ? maxAbs : null,
|
|
1308
|
+
mean,
|
|
1309
|
+
stdDev,
|
|
1310
|
+
l2Norm,
|
|
1311
|
+
preview,
|
|
1312
|
+
};
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
function cosineSimilarity(a, b) {
|
|
1316
|
+
if (!a || !b || !Number.isFinite(a.length) || !Number.isFinite(b.length)) return NaN;
|
|
1317
|
+
if (a.length !== b.length || a.length === 0) return NaN;
|
|
1318
|
+
let dot = 0;
|
|
1319
|
+
let normA = 0;
|
|
1320
|
+
let normB = 0;
|
|
1321
|
+
for (let i = 0; i < a.length; i++) {
|
|
1322
|
+
const av = Number(a[i]);
|
|
1323
|
+
const bv = Number(b[i]);
|
|
1324
|
+
if (!Number.isFinite(av) || !Number.isFinite(bv)) return NaN;
|
|
1325
|
+
dot += av * bv;
|
|
1326
|
+
normA += av * av;
|
|
1327
|
+
normB += bv * bv;
|
|
1328
|
+
}
|
|
1329
|
+
if (normA <= 0 || normB <= 0) return NaN;
|
|
1330
|
+
return dot / Math.sqrt(normA * normB);
|
|
1331
|
+
}
|
|
1332
|
+
|
|
1333
|
+
function top1Index(values) {
|
|
1334
|
+
let best = -1;
|
|
1335
|
+
let bestValue = -Infinity;
|
|
1336
|
+
for (let i = 0; i < values.length; i++) {
|
|
1337
|
+
const value = Number(values[i]);
|
|
1338
|
+
if (!Number.isFinite(value)) continue;
|
|
1339
|
+
if (value > bestValue) {
|
|
1340
|
+
bestValue = value;
|
|
1341
|
+
best = i;
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
return best;
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
async function embedStandaloneText(pipeline, text) {
|
|
1348
|
+
pipeline.reset?.();
|
|
1349
|
+
const result = await pipeline.embed(text);
|
|
1350
|
+
const embedding = result?.embedding;
|
|
1351
|
+
if (!embedding || !Number.isFinite(embedding.length) || embedding.length <= 0) {
|
|
1352
|
+
throw new Error('Semantic check embedding is missing.');
|
|
1353
|
+
}
|
|
1354
|
+
return embedding;
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
async function runEmbeddingSemanticChecks(pipeline, options = null) {
|
|
1358
|
+
const config = resolveEmbeddingSemanticFixtures(
|
|
1359
|
+
pipeline?.runtimeConfig ?? {},
|
|
1360
|
+
options
|
|
1361
|
+
);
|
|
1362
|
+
const start = performance.now();
|
|
1363
|
+
const semanticStyle = resolveEmbeddingSemanticStyle(pipeline);
|
|
1364
|
+
const retrieval = [];
|
|
1365
|
+
let retrievalPassed = 0;
|
|
1366
|
+
|
|
1367
|
+
for (const testCase of config.retrievalCases) {
|
|
1368
|
+
const queryEmbedding = await embedStandaloneText(
|
|
1369
|
+
pipeline,
|
|
1370
|
+
formatEmbeddingSemanticText(testCase.query, 'query', semanticStyle)
|
|
1371
|
+
);
|
|
1372
|
+
const docEmbeddings = [];
|
|
1373
|
+
for (const doc of testCase.docs) {
|
|
1374
|
+
docEmbeddings.push(await embedStandaloneText(
|
|
1375
|
+
pipeline,
|
|
1376
|
+
formatEmbeddingSemanticText(doc, 'document', semanticStyle)
|
|
1377
|
+
));
|
|
1378
|
+
}
|
|
1379
|
+
const sims = docEmbeddings.map((docEmbedding) => cosineSimilarity(queryEmbedding, docEmbedding));
|
|
1380
|
+
const topDoc = top1Index(sims);
|
|
1381
|
+
const passed = topDoc === testCase.expectedDoc;
|
|
1382
|
+
if (passed) retrievalPassed++;
|
|
1383
|
+
retrieval.push({
|
|
1384
|
+
id: testCase.id,
|
|
1385
|
+
passed,
|
|
1386
|
+
expectedDoc: testCase.expectedDoc,
|
|
1387
|
+
topDoc,
|
|
1388
|
+
sims: sims.map((v) => (Number.isFinite(v) ? Number(v.toFixed(6)) : null)),
|
|
1389
|
+
});
|
|
1390
|
+
}
|
|
1391
|
+
|
|
1392
|
+
const pairs = [];
|
|
1393
|
+
let pairPassed = 0;
|
|
1394
|
+
for (const testCase of config.pairCases) {
|
|
1395
|
+
const anchor = await embedStandaloneText(
|
|
1396
|
+
pipeline,
|
|
1397
|
+
formatEmbeddingSemanticText(testCase.anchor, 'query', semanticStyle)
|
|
1398
|
+
);
|
|
1399
|
+
const positive = await embedStandaloneText(
|
|
1400
|
+
pipeline,
|
|
1401
|
+
formatEmbeddingSemanticText(testCase.positive, 'query', semanticStyle)
|
|
1402
|
+
);
|
|
1403
|
+
const negative = await embedStandaloneText(
|
|
1404
|
+
pipeline,
|
|
1405
|
+
formatEmbeddingSemanticText(testCase.negative, 'query', semanticStyle)
|
|
1406
|
+
);
|
|
1407
|
+
const simPos = cosineSimilarity(anchor, positive);
|
|
1408
|
+
const simNeg = cosineSimilarity(anchor, negative);
|
|
1409
|
+
const margin = simPos - simNeg;
|
|
1410
|
+
const passed = Number.isFinite(margin) && margin > config.pairMargin;
|
|
1411
|
+
if (passed) pairPassed++;
|
|
1412
|
+
pairs.push({
|
|
1413
|
+
id: testCase.id,
|
|
1414
|
+
passed,
|
|
1415
|
+
simPos: Number.isFinite(simPos) ? Number(simPos.toFixed(6)) : null,
|
|
1416
|
+
simNeg: Number.isFinite(simNeg) ? Number(simNeg.toFixed(6)) : null,
|
|
1417
|
+
margin: Number.isFinite(margin) ? Number(margin.toFixed(6)) : null,
|
|
1418
|
+
});
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1421
|
+
const retrievalTop1Acc = retrieval.length > 0 ? retrievalPassed / retrieval.length : 0;
|
|
1422
|
+
const pairAcc = pairs.length > 0 ? pairPassed / pairs.length : 0;
|
|
1423
|
+
const passed = retrievalTop1Acc >= config.minRetrievalTop1Acc
|
|
1424
|
+
&& pairAcc >= config.minPairAcc;
|
|
1425
|
+
const failedCaseIds = [
|
|
1426
|
+
...retrieval.filter((item) => !item.passed).map((item) => `retrieval:${item.id}`),
|
|
1427
|
+
...pairs.filter((item) => !item.passed).map((item) => `pair:${item.id}`),
|
|
1428
|
+
];
|
|
1429
|
+
|
|
1430
|
+
return {
|
|
1431
|
+
passed,
|
|
1432
|
+
style: semanticStyle,
|
|
1433
|
+
retrievalTop1Acc,
|
|
1434
|
+
pairAcc,
|
|
1435
|
+
retrievalPassed,
|
|
1436
|
+
retrievalTotal: retrieval.length,
|
|
1437
|
+
pairPassed,
|
|
1438
|
+
pairTotal: pairs.length,
|
|
1439
|
+
minRetrievalTop1Acc: Number(config.minRetrievalTop1Acc.toFixed(4)),
|
|
1440
|
+
minPairAcc: Number(config.minPairAcc.toFixed(4)),
|
|
1441
|
+
pairMarginThreshold: Number(config.pairMargin.toFixed(4)),
|
|
1442
|
+
failedCaseIds,
|
|
1443
|
+
retrieval,
|
|
1444
|
+
pairs,
|
|
1445
|
+
durationMs: Math.max(1, performance.now() - start),
|
|
1446
|
+
};
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1449
|
+
// Matches pad/special tokens that indicate degenerate output: <pad>, <unused123>, <eos>,
|
|
1450
|
+
// <bos>, <s>, </s>, [PAD], [UNK], [SEP], [CLS], and bare angle-bracket tokens.
|
|
1451
|
+
const SPECIAL_TOKEN_RE = /^(<pad>|<unused\d*>|<eos>|<bos>|<s>|<\/s>|\[PAD\]|\[UNK\]|\[SEP\]|\[CLS\]|<[^>]{1,32}>)$/i;
|
|
1452
|
+
const PAD_DOMINANCE_THRESHOLD = 0.5;
|
|
1453
|
+
|
|
1454
|
+
function isCoherentOutput(tokens, output) {
|
|
1455
|
+
if (tokens.length === 0) return false;
|
|
1456
|
+
const specialTokenCount = tokens.filter((t) => SPECIAL_TOKEN_RE.test(String(t).trim())).length;
|
|
1457
|
+
if (specialTokenCount / tokens.length >= PAD_DOMINANCE_THRESHOLD) return false;
|
|
1458
|
+
const cleanedOutput = String(output || '')
|
|
1459
|
+
.replace(/<[^>\n]{1,80}>/g, ' ')
|
|
1460
|
+
.replace(/\s+/g, ' ')
|
|
1461
|
+
.trim();
|
|
1462
|
+
return cleanedOutput.length > 0;
|
|
1463
|
+
}
|
|
1464
|
+
|
|
1465
|
+
async function runGeneration(pipeline, runtimeConfig, runOverrides = null) {
|
|
1466
|
+
const tokens = [];
|
|
1467
|
+
const tokenIds = [];
|
|
1468
|
+
const promptInput = resolveGenerationPromptInput(runtimeConfig, runOverrides);
|
|
1469
|
+
const useChatTemplate = runOverrides?.useChatTemplate
|
|
1470
|
+
?? runtimeConfig?.inference?.chatTemplate?.enabled
|
|
1471
|
+
?? (isStructuredPromptInput(promptInput) ? true : undefined);
|
|
1472
|
+
const maxTokens = Number.isFinite(runOverrides?.maxTokens)
|
|
1473
|
+
? Math.max(1, Math.floor(runOverrides.maxTokens))
|
|
1474
|
+
: resolveMaxTokens(runtimeConfig);
|
|
1475
|
+
const sampling = isPlainObject(runOverrides?.sampling)
|
|
1476
|
+
? runOverrides.sampling
|
|
1477
|
+
: (runtimeConfig.inference?.sampling || {});
|
|
1478
|
+
const debugProbes = runtimeConfig.shared?.debug?.probes || [];
|
|
1479
|
+
const profile = runtimeConfig.shared?.debug?.profiler?.enabled === true;
|
|
1480
|
+
const disableCommandBatching = Array.isArray(debugProbes) && debugProbes.length > 0;
|
|
1481
|
+
const start = performance.now();
|
|
1482
|
+
|
|
1483
|
+
for await (const tokenText of pipeline.generate(promptInput, {
|
|
1484
|
+
maxTokens,
|
|
1485
|
+
temperature: sampling.temperature,
|
|
1486
|
+
topP: sampling.topP,
|
|
1487
|
+
topK: sampling.topK,
|
|
1488
|
+
repetitionPenalty: sampling.repetitionPenalty,
|
|
1489
|
+
greedyThreshold: sampling.greedyThreshold,
|
|
1490
|
+
useChatTemplate,
|
|
1491
|
+
profile,
|
|
1492
|
+
disableCommandBatching,
|
|
1493
|
+
onToken: (tokenId) => {
|
|
1494
|
+
tokenIds.push(tokenId);
|
|
1495
|
+
},
|
|
1496
|
+
})) {
|
|
1497
|
+
if (typeof tokenText === 'string') {
|
|
1498
|
+
tokens.push(tokenText);
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1502
|
+
const durationMs = Math.max(1, performance.now() - start);
|
|
1503
|
+
const tokensPerSec = (tokens.length / durationMs) * 1000;
|
|
1504
|
+
const stats = typeof pipeline?.getStats === 'function'
|
|
1505
|
+
? (pipeline.getStats() || {})
|
|
1506
|
+
: {};
|
|
1507
|
+
const prefillMs = Number.isFinite(stats.prefillTimeMs) ? stats.prefillTimeMs : 0;
|
|
1508
|
+
const ttftMs = Number.isFinite(stats.ttftMs) ? stats.ttftMs : prefillMs;
|
|
1509
|
+
const decodeMs = Number.isFinite(stats.decodeTimeMs) ? stats.decodeTimeMs : 0;
|
|
1510
|
+
const prefillTokens = Number.isFinite(stats.prefillTokens) ? stats.prefillTokens : 0;
|
|
1511
|
+
const decodeTokens = Number.isFinite(stats.decodeTokens)
|
|
1512
|
+
? stats.decodeTokens
|
|
1513
|
+
: Math.max(0, tokens.length - 1);
|
|
1514
|
+
const decodeTokensPerSec = decodeMs > 0
|
|
1515
|
+
? (decodeTokens / decodeMs) * 1000
|
|
1516
|
+
: 0;
|
|
1517
|
+
const prefillTokensPerSec = prefillMs > 0
|
|
1518
|
+
? (prefillTokens / prefillMs) * 1000
|
|
1519
|
+
: 0;
|
|
1520
|
+
const prefillTokensPerSecTtft = ttftMs > 0
|
|
1521
|
+
? (prefillTokens / ttftMs) * 1000
|
|
1522
|
+
: 0;
|
|
1523
|
+
const gpu = {};
|
|
1524
|
+
if (Number.isFinite(stats.gpuTimePrefillMs)) gpu.prefillMs = stats.gpuTimePrefillMs;
|
|
1525
|
+
if (Number.isFinite(stats.gpuTimeDecodeMs)) gpu.decodeMs = stats.gpuTimeDecodeMs;
|
|
1526
|
+
if (Number.isFinite(stats.decodeRecordMs)) gpu.decodeRecordMs = stats.decodeRecordMs;
|
|
1527
|
+
if (Number.isFinite(stats.decodeSubmitWaitMs)) gpu.decodeSubmitWaitMs = stats.decodeSubmitWaitMs;
|
|
1528
|
+
if (Number.isFinite(stats.decodeReadbackWaitMs)) gpu.decodeReadbackWaitMs = stats.decodeReadbackWaitMs;
|
|
1529
|
+
const gpuPhase = Object.keys(gpu).length > 0 ? gpu : null;
|
|
1530
|
+
const decodeProfileSteps = Array.isArray(stats.decodeProfileSteps)
|
|
1531
|
+
? stats.decodeProfileSteps
|
|
1532
|
+
: null;
|
|
1533
|
+
|
|
1534
|
+
return {
|
|
1535
|
+
prompt: promptInput,
|
|
1536
|
+
maxTokens,
|
|
1537
|
+
tokens,
|
|
1538
|
+
tokenIds,
|
|
1539
|
+
output: tokens.join(''),
|
|
1540
|
+
durationMs,
|
|
1541
|
+
tokensPerSec,
|
|
1542
|
+
phase: {
|
|
1543
|
+
totalMs: Number.isFinite(stats.totalTimeMs) ? stats.totalTimeMs : durationMs,
|
|
1544
|
+
ttftMs,
|
|
1545
|
+
prefillMs,
|
|
1546
|
+
decodeMs,
|
|
1547
|
+
prefillTokens,
|
|
1548
|
+
decodeTokens,
|
|
1549
|
+
prefillTokensPerSec,
|
|
1550
|
+
prefillTokensPerSecTtft,
|
|
1551
|
+
decodeTokensPerSec,
|
|
1552
|
+
gpu: gpuPhase,
|
|
1553
|
+
decodeProfileSteps,
|
|
1554
|
+
},
|
|
1555
|
+
};
|
|
1556
|
+
}
|
|
1557
|
+
|
|
1558
|
+
async function runEmbedding(pipeline, runtimeConfig, runOverrides = null) {
|
|
1559
|
+
const prompt = typeof runOverrides?.prompt === 'string' && runOverrides.prompt.trim()
|
|
1560
|
+
? runOverrides.prompt.trim()
|
|
1561
|
+
: resolvePrompt(runtimeConfig);
|
|
1562
|
+
const start = performance.now();
|
|
1563
|
+
const result = await pipeline.embed(prompt);
|
|
1564
|
+
const durationMs = Math.max(1, performance.now() - start);
|
|
1565
|
+
const tokenCount = Number.isFinite(result?.tokens?.length) ? result.tokens.length : 0;
|
|
1566
|
+
const stats = summarizeEmbeddingValues(result?.embedding);
|
|
1567
|
+
return {
|
|
1568
|
+
prompt,
|
|
1569
|
+
tokenCount,
|
|
1570
|
+
durationMs,
|
|
1571
|
+
...stats,
|
|
1572
|
+
};
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
async function runInferenceSuite(options = {}) {
|
|
1576
|
+
const startTime = performance.now();
|
|
1577
|
+
const harness = await initializeSuiteModel(options);
|
|
1578
|
+
const runtimeConfig = getRuntimeConfig();
|
|
1579
|
+
const modelType = harness.manifest?.modelType || 'transformer';
|
|
1580
|
+
const cacheMode = normalizeCacheMode(options.cacheMode);
|
|
1581
|
+
const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
|
|
1582
|
+
const safeModelLoadMs = toTimingNumber(harness.modelLoadMs, 0);
|
|
1583
|
+
|
|
1584
|
+
let results;
|
|
1585
|
+
let output = null;
|
|
1586
|
+
let metrics;
|
|
1587
|
+
|
|
1588
|
+
if (modelType === 'embedding') {
|
|
1589
|
+
const run = await runEmbedding(harness.pipeline, runtimeConfig);
|
|
1590
|
+
const semantic = await runEmbeddingSemanticChecks(harness.pipeline, options);
|
|
1591
|
+
const isValidEmbedding = run.embeddingDim > 0 && run.nonFiniteCount === 0;
|
|
1592
|
+
const isSemanticValid = semantic.passed;
|
|
1593
|
+
output = {
|
|
1594
|
+
mode: 'embedding',
|
|
1595
|
+
tokens: run.tokenCount,
|
|
1596
|
+
embeddingDim: run.embeddingDim,
|
|
1597
|
+
finiteValues: run.finiteCount,
|
|
1598
|
+
nonFiniteValues: run.nonFiniteCount,
|
|
1599
|
+
finiteRatio: Number((run.finiteRatio ?? 0).toFixed(6)),
|
|
1600
|
+
min: run.min == null ? null : Number(run.min.toFixed(6)),
|
|
1601
|
+
max: run.max == null ? null : Number(run.max.toFixed(6)),
|
|
1602
|
+
maxAbs: run.maxAbs == null ? null : Number(run.maxAbs.toFixed(6)),
|
|
1603
|
+
mean: run.mean == null ? null : Number(run.mean.toFixed(6)),
|
|
1604
|
+
stdDev: run.stdDev == null ? null : Number(run.stdDev.toFixed(6)),
|
|
1605
|
+
l2Norm: run.l2Norm == null ? null : Number(run.l2Norm.toFixed(6)),
|
|
1606
|
+
preview: run.preview,
|
|
1607
|
+
semantic: {
|
|
1608
|
+
passed: isSemanticValid,
|
|
1609
|
+
style: semantic.style,
|
|
1610
|
+
retrievalTop1Acc: Number(semantic.retrievalTop1Acc.toFixed(4)),
|
|
1611
|
+
pairAcc: Number(semantic.pairAcc.toFixed(4)),
|
|
1612
|
+
failedCaseIds: semantic.failedCaseIds,
|
|
1613
|
+
},
|
|
1614
|
+
};
|
|
1615
|
+
results = [
|
|
1616
|
+
{
|
|
1617
|
+
name: 'embedding',
|
|
1618
|
+
passed: isValidEmbedding,
|
|
1619
|
+
duration: run.durationMs,
|
|
1620
|
+
error: isValidEmbedding
|
|
1621
|
+
? undefined
|
|
1622
|
+
: (
|
|
1623
|
+
run.embeddingDim <= 0
|
|
1624
|
+
? 'No embedding returned'
|
|
1625
|
+
: `Embedding contains non-finite values (${run.nonFiniteCount}/${run.embeddingDim})`
|
|
1626
|
+
),
|
|
1627
|
+
},
|
|
1628
|
+
{
|
|
1629
|
+
name: 'embedding-semantic',
|
|
1630
|
+
passed: isSemanticValid,
|
|
1631
|
+
duration: semantic.durationMs,
|
|
1632
|
+
error: isSemanticValid
|
|
1633
|
+
? undefined
|
|
1634
|
+
: (
|
|
1635
|
+
`Semantic checks below threshold: retrieval=${(semantic.retrievalTop1Acc * 100).toFixed(1)}% `
|
|
1636
|
+
+ `(min ${(semantic.minRetrievalTop1Acc * 100).toFixed(1)}%), `
|
|
1637
|
+
+ `pairs=${(semantic.pairAcc * 100).toFixed(1)}% `
|
|
1638
|
+
+ `(min ${(semantic.minPairAcc * 100).toFixed(1)}%). `
|
|
1639
|
+
+ (semantic.failedCaseIds.length > 0 ? `Failed: ${semantic.failedCaseIds.join(', ')}` : '')
|
|
1640
|
+
),
|
|
1641
|
+
},
|
|
1642
|
+
];
|
|
1643
|
+
metrics = {
|
|
1644
|
+
prompt: run.prompt,
|
|
1645
|
+
embeddingTokens: run.tokenCount,
|
|
1646
|
+
embeddingDim: run.embeddingDim,
|
|
1647
|
+
finiteValues: run.finiteCount,
|
|
1648
|
+
finiteRatio: Number((run.finiteRatio ?? 0).toFixed(6)),
|
|
1649
|
+
nonFiniteValues: run.nonFiniteCount,
|
|
1650
|
+
embeddingMin: run.min == null ? null : Number(run.min.toFixed(6)),
|
|
1651
|
+
embeddingMax: run.max == null ? null : Number(run.max.toFixed(6)),
|
|
1652
|
+
embeddingMaxAbs: run.maxAbs == null ? null : Number(run.maxAbs.toFixed(6)),
|
|
1653
|
+
embeddingMean: run.mean == null ? null : Number(run.mean.toFixed(6)),
|
|
1654
|
+
embeddingStdDev: run.stdDev == null ? null : Number(run.stdDev.toFixed(6)),
|
|
1655
|
+
embeddingL2Norm: run.l2Norm == null ? null : Number(run.l2Norm.toFixed(6)),
|
|
1656
|
+
embeddingMs: Number(run.durationMs.toFixed(2)),
|
|
1657
|
+
semanticPassed: isSemanticValid,
|
|
1658
|
+
semanticDurationMs: Number(semantic.durationMs.toFixed(2)),
|
|
1659
|
+
semanticRetrievalTop1Acc: Number(semantic.retrievalTop1Acc.toFixed(4)),
|
|
1660
|
+
semanticPairAcc: Number(semantic.pairAcc.toFixed(4)),
|
|
1661
|
+
semanticRetrievalPassed: semantic.retrievalPassed,
|
|
1662
|
+
semanticRetrievalTotal: semantic.retrievalTotal,
|
|
1663
|
+
semanticPairPassed: semantic.pairPassed,
|
|
1664
|
+
semanticPairTotal: semantic.pairTotal,
|
|
1665
|
+
semanticMinRetrievalTop1Acc: Number(semantic.minRetrievalTop1Acc.toFixed(4)),
|
|
1666
|
+
semanticMinPairAcc: Number(semantic.minPairAcc.toFixed(4)),
|
|
1667
|
+
semanticPairMarginThreshold: Number(semantic.pairMarginThreshold.toFixed(4)),
|
|
1668
|
+
semanticStyle: semantic.style,
|
|
1669
|
+
semanticFailedCases: semantic.failedCaseIds,
|
|
1670
|
+
semanticDetails: {
|
|
1671
|
+
retrieval: semantic.retrieval,
|
|
1672
|
+
pairs: semantic.pairs,
|
|
1673
|
+
},
|
|
1674
|
+
modelLoadMs: safeModelLoadMs,
|
|
1675
|
+
endToEndMs: safeToFixed(safeModelLoadMs + run.durationMs),
|
|
1676
|
+
embeddingPreview: run.preview,
|
|
1677
|
+
};
|
|
1678
|
+
} else {
|
|
1679
|
+
const run = await runGeneration(harness.pipeline, runtimeConfig);
|
|
1680
|
+
const coherent = isCoherentOutput(run.tokens, run.output);
|
|
1681
|
+
results = [
|
|
1682
|
+
{
|
|
1683
|
+
name: 'generation',
|
|
1684
|
+
passed: run.tokens.length > 0 && coherent,
|
|
1685
|
+
duration: run.durationMs,
|
|
1686
|
+
error: run.tokens.length === 0
|
|
1687
|
+
? 'No tokens generated'
|
|
1688
|
+
: (!coherent ? 'Output dominated by padding or special tokens' : undefined),
|
|
1689
|
+
},
|
|
1690
|
+
];
|
|
1691
|
+
output = run.output;
|
|
1692
|
+
metrics = {
|
|
1693
|
+
prompt: run.prompt,
|
|
1694
|
+
maxTokens: run.maxTokens,
|
|
1695
|
+
tokensGenerated: run.tokens.length,
|
|
1696
|
+
tokensPerSec: safeToFixed(run.tokensPerSec),
|
|
1697
|
+
totalRunMs: safeToFixed(run.phase.totalMs),
|
|
1698
|
+
firstTokenMs: safeToFixed(run.phase.ttftMs),
|
|
1699
|
+
firstResponseMs: safeToFixed(safeModelLoadMs + run.phase.ttftMs),
|
|
1700
|
+
prefillMs: safeToFixed(run.phase.prefillMs),
|
|
1701
|
+
decodeMs: safeToFixed(run.phase.decodeMs),
|
|
1702
|
+
prefillTokens: Math.round(run.phase.prefillTokens),
|
|
1703
|
+
decodeTokens: Math.round(run.phase.decodeTokens),
|
|
1704
|
+
prefillTokensPerSec: safeToFixed(run.phase.prefillTokensPerSec),
|
|
1705
|
+
prefillTokensPerSecTtft: safeToFixed(run.phase.prefillTokensPerSecTtft),
|
|
1706
|
+
decodeTokensPerSec: safeToFixed(run.phase.decodeTokensPerSec),
|
|
1707
|
+
modelLoadMs: safeModelLoadMs,
|
|
1708
|
+
gpu: run.phase.gpu,
|
|
1709
|
+
decodeProfileSteps: run.phase.decodeProfileSteps,
|
|
1710
|
+
};
|
|
1711
|
+
}
|
|
1712
|
+
|
|
1713
|
+
const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
|
|
1714
|
+
? harness.pipeline.getMemoryStats()
|
|
1715
|
+
: null;
|
|
1716
|
+
if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
|
|
1717
|
+
await harness.pipeline.unload();
|
|
1718
|
+
}
|
|
1719
|
+
|
|
1720
|
+
const summary = buildSuiteSummary(options.suiteName || 'inference', results, startTime);
|
|
1721
|
+
const timing = buildCanonicalTiming({
|
|
1722
|
+
modelLoadMs: safeModelLoadMs,
|
|
1723
|
+
firstTokenMs: metrics.firstTokenMs ?? null,
|
|
1724
|
+
firstResponseMs: Number.isFinite(metrics.firstTokenMs)
|
|
1725
|
+
? safeModelLoadMs + metrics.firstTokenMs
|
|
1726
|
+
: null,
|
|
1727
|
+
prefillMs: metrics.prefillMs ?? 0,
|
|
1728
|
+
decodeMs: metrics.decodeMs ?? 0,
|
|
1729
|
+
decodeMsPerTokenP50: metrics.decodeMsPerTokenP50 ?? null,
|
|
1730
|
+
decodeMsPerTokenP95: metrics.decodeMsPerTokenP95 ?? null,
|
|
1731
|
+
decodeMsPerTokenP99: metrics.decodeMsPerTokenP99 ?? null,
|
|
1732
|
+
totalRunMs: metrics.totalRunMs ?? metrics.decodeMs ?? 0,
|
|
1733
|
+
decodeTokensPerSec: metrics.decodeTokensPerSec,
|
|
1734
|
+
prefillTokensPerSec: metrics.prefillTokensPerSec,
|
|
1735
|
+
cacheMode,
|
|
1736
|
+
loadMode,
|
|
1737
|
+
});
|
|
1738
|
+
const timingDiagnostics = buildTimingDiagnostics(timing, {
|
|
1739
|
+
source: 'doppler',
|
|
1740
|
+
prefillSemantics: 'internal_prefill_phase',
|
|
1741
|
+
});
|
|
1742
|
+
return {
|
|
1743
|
+
...summary,
|
|
1744
|
+
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
1745
|
+
cacheMode,
|
|
1746
|
+
loadMode,
|
|
1747
|
+
env: {
|
|
1748
|
+
library: 'doppler',
|
|
1749
|
+
runtime: 'browser',
|
|
1750
|
+
device: 'webgpu',
|
|
1751
|
+
browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
|
|
1752
|
+
browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
|
|
1753
|
+
browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
|
|
1754
|
+
browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
|
|
1755
|
+
},
|
|
1756
|
+
timing,
|
|
1757
|
+
timingDiagnostics,
|
|
1758
|
+
output,
|
|
1759
|
+
metrics,
|
|
1760
|
+
memoryStats,
|
|
1761
|
+
deviceInfo: resolveDeviceInfo(),
|
|
1762
|
+
pipeline: options.keepPipeline ? harness.pipeline : null,
|
|
1763
|
+
};
|
|
1764
|
+
}
|
|
1765
|
+
|
|
1766
|
+
async function runBenchSuite(options = {}) {
|
|
1767
|
+
const startTime = performance.now();
|
|
1768
|
+
const runtimeConfig = getRuntimeConfig();
|
|
1769
|
+
const benchRun = resolveBenchmarkRunSettings(runtimeConfig);
|
|
1770
|
+
const warmupRuns = benchRun.warmupRuns;
|
|
1771
|
+
const timedRuns = benchRun.timedRuns;
|
|
1772
|
+
const cacheMode = normalizeCacheMode(options.cacheMode);
|
|
1773
|
+
const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
|
|
1774
|
+
const workloadType = normalizeWorkloadType(options.workloadType);
|
|
1775
|
+
|
|
1776
|
+
if (workloadType === 'training') {
|
|
1777
|
+
const trainingBench = await runTrainingBenchSuite({
|
|
1778
|
+
...options,
|
|
1779
|
+
benchRun,
|
|
1780
|
+
workloadType,
|
|
1781
|
+
});
|
|
1782
|
+
const trainingReport = trainingBench?.metrics?.trainingMetricsReport;
|
|
1783
|
+
if (Array.isArray(trainingReport) && trainingReport.length > 0) {
|
|
1784
|
+
validateTrainingMetricsReport(trainingReport);
|
|
1785
|
+
}
|
|
1786
|
+
const runStats = trainingBench?.metrics?.latency?.runMs || computeSampleStats([]);
|
|
1787
|
+
const stepStats = trainingBench?.metrics?.latency?.stepMs || computeSampleStats([]);
|
|
1788
|
+
const throughputStats = trainingBench?.metrics?.throughput?.stepsPerSec || computeSampleStats([]);
|
|
1789
|
+
const timing = buildCanonicalTiming({
|
|
1790
|
+
modelLoadMs: 0,
|
|
1791
|
+
firstTokenMs: null,
|
|
1792
|
+
firstResponseMs: null,
|
|
1793
|
+
prefillMs: null,
|
|
1794
|
+
decodeMs: stepStats.median,
|
|
1795
|
+
totalRunMs: runStats.median,
|
|
1796
|
+
decodeTokensPerSec: throughputStats.median,
|
|
1797
|
+
prefillTokensPerSec: null,
|
|
1798
|
+
cacheMode,
|
|
1799
|
+
loadMode,
|
|
1800
|
+
});
|
|
1801
|
+
const timingDiagnostics = buildTimingDiagnostics(timing, {
|
|
1802
|
+
source: 'doppler',
|
|
1803
|
+
prefillSemantics: 'not_applicable_training_workload',
|
|
1804
|
+
});
|
|
1805
|
+
return {
|
|
1806
|
+
...trainingBench,
|
|
1807
|
+
modelId: trainingBench.modelId || options.modelId || options.modelUrl || 'training',
|
|
1808
|
+
cacheMode,
|
|
1809
|
+
loadMode,
|
|
1810
|
+
env: {
|
|
1811
|
+
library: 'doppler',
|
|
1812
|
+
runtime: 'browser',
|
|
1813
|
+
device: 'webgpu',
|
|
1814
|
+
browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
|
|
1815
|
+
browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
|
|
1816
|
+
browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
|
|
1817
|
+
browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
|
|
1818
|
+
},
|
|
1819
|
+
timing,
|
|
1820
|
+
timingDiagnostics,
|
|
1821
|
+
output: null,
|
|
1822
|
+
memoryStats: null,
|
|
1823
|
+
deviceInfo: trainingBench.deviceInfo ?? resolveDeviceInfo(),
|
|
1824
|
+
pipeline: null,
|
|
1825
|
+
};
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
if (workloadType === 'diffusion') {
|
|
1829
|
+
const diffusionBench = await runDiffusionSuite({
|
|
1830
|
+
...options,
|
|
1831
|
+
command: 'bench',
|
|
1832
|
+
suite: 'diffusion',
|
|
1833
|
+
captureOutput: options.captureOutput === true,
|
|
1834
|
+
cacheMode,
|
|
1835
|
+
loadMode,
|
|
1836
|
+
});
|
|
1837
|
+
|
|
1838
|
+
const benchResults = [
|
|
1839
|
+
{
|
|
1840
|
+
name: 'benchmark-diffusion',
|
|
1841
|
+
passed: diffusionBench.passed > 0 && diffusionBench.failed === 0,
|
|
1842
|
+
duration: diffusionBench.duration,
|
|
1843
|
+
error: diffusionBench.failed === 0 ? undefined : 'Diffusion benchmark run failed.',
|
|
1844
|
+
},
|
|
1845
|
+
];
|
|
1846
|
+
const summary = buildSuiteSummary('bench', benchResults, startTime);
|
|
1847
|
+
|
|
1848
|
+
return {
|
|
1849
|
+
...diffusionBench,
|
|
1850
|
+
...summary,
|
|
1851
|
+
suite: 'bench',
|
|
1852
|
+
results: benchResults,
|
|
1853
|
+
metrics: {
|
|
1854
|
+
...(diffusionBench.metrics || {}),
|
|
1855
|
+
workloadType: 'diffusion',
|
|
1856
|
+
},
|
|
1857
|
+
};
|
|
1858
|
+
}
|
|
1859
|
+
|
|
1860
|
+
const harness = await initializeSuiteModel(options);
|
|
1861
|
+
const modelType = harness.manifest?.modelType || 'transformer';
|
|
1862
|
+
const safeModelLoadMs = toTimingNumber(harness.modelLoadMs, 0);
|
|
1863
|
+
|
|
1864
|
+
let results;
|
|
1865
|
+
let metrics;
|
|
1866
|
+
let output = null;
|
|
1867
|
+
let timing;
|
|
1868
|
+
|
|
1869
|
+
if (modelType === 'embedding') {
|
|
1870
|
+
const durations = [];
|
|
1871
|
+
const timedDurations = [];
|
|
1872
|
+
const embeddingDims = [];
|
|
1873
|
+
const embeddingTokenCounts = [];
|
|
1874
|
+
const embeddingNorms = [];
|
|
1875
|
+
let firstTimedEmbeddingMs = null;
|
|
1876
|
+
let invalidRuns = 0;
|
|
1877
|
+
let totalNonFiniteValues = 0;
|
|
1878
|
+
for (let i = 0; i < warmupRuns + timedRuns; i++) {
|
|
1879
|
+
harness.pipeline.reset?.();
|
|
1880
|
+
const run = await runEmbedding(harness.pipeline, runtimeConfig, benchRun);
|
|
1881
|
+
if (i >= warmupRuns) {
|
|
1882
|
+
timedDurations.push(run.durationMs);
|
|
1883
|
+
if (firstTimedEmbeddingMs == null) {
|
|
1884
|
+
firstTimedEmbeddingMs = run.durationMs;
|
|
1885
|
+
}
|
|
1886
|
+
totalNonFiniteValues += run.nonFiniteCount;
|
|
1887
|
+
if (Number.isFinite(run.tokenCount)) {
|
|
1888
|
+
embeddingTokenCounts.push(run.tokenCount);
|
|
1889
|
+
}
|
|
1890
|
+
if (Number.isFinite(run.l2Norm)) {
|
|
1891
|
+
embeddingNorms.push(run.l2Norm);
|
|
1892
|
+
}
|
|
1893
|
+
if (run.embeddingDim > 0 && run.nonFiniteCount === 0) {
|
|
1894
|
+
durations.push(run.durationMs);
|
|
1895
|
+
embeddingDims.push(run.embeddingDim);
|
|
1896
|
+
} else {
|
|
1897
|
+
invalidRuns++;
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
}
|
|
1901
|
+
|
|
1902
|
+
const embeddingMsStats = computeSampleStats(durations);
|
|
1903
|
+
const timedEmbeddingMsStats = computeSampleStats(timedDurations);
|
|
1904
|
+
const embeddingDimStats = computeSampleStats(embeddingDims);
|
|
1905
|
+
const embeddingTokensStats = computeSampleStats(embeddingTokenCounts);
|
|
1906
|
+
const embeddingNormStats = computeSampleStats(embeddingNorms);
|
|
1907
|
+
const avgMs = embeddingMsStats.mean;
|
|
1908
|
+
|
|
1909
|
+
results = [
|
|
1910
|
+
{
|
|
1911
|
+
name: 'benchmark-embedding',
|
|
1912
|
+
passed: durations.length > 0 && invalidRuns === 0,
|
|
1913
|
+
duration: durations.reduce((sum, value) => sum + value, 0),
|
|
1914
|
+
error: durations.length > 0
|
|
1915
|
+
? (
|
|
1916
|
+
invalidRuns === 0
|
|
1917
|
+
? undefined
|
|
1918
|
+
: `Invalid embedding runs: ${invalidRuns} (non-finite values observed)`
|
|
1919
|
+
)
|
|
1920
|
+
: 'No valid embedding benchmark runs completed',
|
|
1921
|
+
},
|
|
1922
|
+
];
|
|
1923
|
+
|
|
1924
|
+
metrics = {
|
|
1925
|
+
warmupRuns,
|
|
1926
|
+
timedRuns,
|
|
1927
|
+
validRuns: durations.length,
|
|
1928
|
+
invalidRuns,
|
|
1929
|
+
invalidRatePct: Number((timedRuns > 0 ? (invalidRuns / timedRuns) * 100 : 0).toFixed(2)),
|
|
1930
|
+
prompt: benchRun.prompt,
|
|
1931
|
+
embeddingDim: Math.round(embeddingDims.reduce((a, b) => a + b, 0) / (embeddingDims.length || 1)),
|
|
1932
|
+
nonFiniteValues: totalNonFiniteValues,
|
|
1933
|
+
firstTimedEmbeddingMs: Number((firstTimedEmbeddingMs ?? 0).toFixed(2)),
|
|
1934
|
+
minEmbeddingMs: Number(embeddingMsStats.min.toFixed(2)),
|
|
1935
|
+
medianEmbeddingMs: Number(embeddingMsStats.median.toFixed(2)),
|
|
1936
|
+
p95EmbeddingMs: Number(embeddingMsStats.p95.toFixed(2)),
|
|
1937
|
+
p99EmbeddingMs: Number(embeddingMsStats.p99.toFixed(2)),
|
|
1938
|
+
maxEmbeddingMs: Number(embeddingMsStats.max.toFixed(2)),
|
|
1939
|
+
stdDevEmbeddingMs: Number(embeddingMsStats.stdDev.toFixed(2)),
|
|
1940
|
+
ci95EmbeddingMs: Number(embeddingMsStats.ci95.toFixed(2)),
|
|
1941
|
+
avgEmbeddingMs: Number(avgMs.toFixed(2)),
|
|
1942
|
+
avgEmbeddingsPerSec: Number((avgMs > 0 ? (1000 / avgMs) : 0).toFixed(2)),
|
|
1943
|
+
avgEmbeddingTokens: Number(embeddingTokensStats.mean.toFixed(2)),
|
|
1944
|
+
avgEmbeddingL2Norm: Number(embeddingNormStats.mean.toFixed(4)),
|
|
1945
|
+
modelLoadMs: safeModelLoadMs,
|
|
1946
|
+
latency: {
|
|
1947
|
+
timedEmbeddingMs: timedEmbeddingMsStats,
|
|
1948
|
+
embeddingMs: embeddingMsStats,
|
|
1949
|
+
},
|
|
1950
|
+
dimensions: {
|
|
1951
|
+
embedding: embeddingDimStats,
|
|
1952
|
+
},
|
|
1953
|
+
embedding: {
|
|
1954
|
+
tokens: embeddingTokensStats,
|
|
1955
|
+
l2Norm: embeddingNormStats,
|
|
1956
|
+
},
|
|
1957
|
+
};
|
|
1958
|
+
|
|
1959
|
+
const timedStats = computeSampleStats(durations);
|
|
1960
|
+
timing = buildCanonicalTiming({
|
|
1961
|
+
modelLoadMs: safeModelLoadMs,
|
|
1962
|
+
firstTokenMs: null,
|
|
1963
|
+
firstResponseMs: Number.isFinite(firstTimedEmbeddingMs)
|
|
1964
|
+
? safeModelLoadMs + firstTimedEmbeddingMs
|
|
1965
|
+
: null,
|
|
1966
|
+
prefillMs: null,
|
|
1967
|
+
decodeMs: null,
|
|
1968
|
+
totalRunMs: timedStats.median,
|
|
1969
|
+
cacheMode,
|
|
1970
|
+
loadMode,
|
|
1971
|
+
});
|
|
1972
|
+
} else {
|
|
1973
|
+
const tokensPerSec = [];
|
|
1974
|
+
const durations = [];
|
|
1975
|
+
const tokensGenerated = [];
|
|
1976
|
+
const decodeMsPerToken = [];
|
|
1977
|
+
const ttftMs = [];
|
|
1978
|
+
const prefillMs = [];
|
|
1979
|
+
const decodeMs = [];
|
|
1980
|
+
const prefillTokens = [];
|
|
1981
|
+
const decodeTokens = [];
|
|
1982
|
+
const decodeTokensPerSec = [];
|
|
1983
|
+
const prefillTokensPerSec = [];
|
|
1984
|
+
const prefillTokensPerSecTtft = [];
|
|
1985
|
+
const gpuPrefillMs = [];
|
|
1986
|
+
const gpuDecodeMs = [];
|
|
1987
|
+
const gpuDecodeRecordMs = [];
|
|
1988
|
+
const gpuDecodeSubmitWaitMs = [];
|
|
1989
|
+
const gpuDecodeReadbackWaitMs = [];
|
|
1990
|
+
|
|
1991
|
+
let generatedText = null;
|
|
1992
|
+
for (let i = 0; i < warmupRuns + timedRuns; i++) {
|
|
1993
|
+
harness.pipeline.reset?.();
|
|
1994
|
+
const run = await runGeneration(harness.pipeline, runtimeConfig, benchRun);
|
|
1995
|
+
if (i === warmupRuns + timedRuns - 1) {
|
|
1996
|
+
generatedText = run?.output ?? null;
|
|
1997
|
+
}
|
|
1998
|
+
if (i >= warmupRuns) {
|
|
1999
|
+
const phase = run?.phase ?? {};
|
|
2000
|
+
const phaseTokens = Array.isArray(run?.tokens) ? run.tokens : [];
|
|
2001
|
+
const phaseGpu = phase.gpu;
|
|
2002
|
+
tokensPerSec.push(run?.tokensPerSec);
|
|
2003
|
+
durations.push(run?.durationMs);
|
|
2004
|
+
tokensGenerated.push(phaseTokens.length);
|
|
2005
|
+
ttftMs.push(phase.ttftMs);
|
|
2006
|
+
prefillMs.push(phase.prefillMs);
|
|
2007
|
+
decodeMs.push(phase.decodeMs);
|
|
2008
|
+
prefillTokens.push(phase.prefillTokens);
|
|
2009
|
+
decodeTokens.push(phase.decodeTokens);
|
|
2010
|
+
decodeTokensPerSec.push(phase.decodeTokensPerSec);
|
|
2011
|
+
prefillTokensPerSec.push(phase.prefillTokensPerSec);
|
|
2012
|
+
prefillTokensPerSecTtft.push(phase.prefillTokensPerSecTtft);
|
|
2013
|
+
if (phase.decodeMs > 0 && phase.decodeTokens > 0) {
|
|
2014
|
+
decodeMsPerToken.push(phase.decodeMs / phase.decodeTokens);
|
|
2015
|
+
}
|
|
2016
|
+
if (Number.isFinite(phaseGpu?.prefillMs)) gpuPrefillMs.push(phaseGpu.prefillMs);
|
|
2017
|
+
if (Number.isFinite(phaseGpu?.decodeMs)) gpuDecodeMs.push(phaseGpu.decodeMs);
|
|
2018
|
+
if (Number.isFinite(phaseGpu?.decodeRecordMs)) gpuDecodeRecordMs.push(phaseGpu.decodeRecordMs);
|
|
2019
|
+
if (Number.isFinite(phaseGpu?.decodeSubmitWaitMs)) gpuDecodeSubmitWaitMs.push(phaseGpu.decodeSubmitWaitMs);
|
|
2020
|
+
if (Number.isFinite(phaseGpu?.decodeReadbackWaitMs)) gpuDecodeReadbackWaitMs.push(phaseGpu.decodeReadbackWaitMs);
|
|
2021
|
+
}
|
|
2022
|
+
}
|
|
2023
|
+
|
|
2024
|
+
const totalMsStats = computeSampleStats(durations);
|
|
2025
|
+
const tokensPerSecStats = computeSampleStats(tokensPerSec);
|
|
2026
|
+
const decodeTokensPerSecStats = computeSampleStats(decodeTokensPerSec);
|
|
2027
|
+
const prefillTokensPerSecStats = computeSampleStats(prefillTokensPerSec);
|
|
2028
|
+
const prefillTokensPerSecTtftStats = computeSampleStats(prefillTokensPerSecTtft);
|
|
2029
|
+
const decodeMsPerTokenStats = computeSampleStats(decodeMsPerToken);
|
|
2030
|
+
const ttftMsStats = computeSampleStats(ttftMs);
|
|
2031
|
+
const prefillMsStats = computeSampleStats(prefillMs);
|
|
2032
|
+
const decodeMsStats = computeSampleStats(decodeMs);
|
|
2033
|
+
const tokensGeneratedStats = computeSampleStats(tokensGenerated);
|
|
2034
|
+
const prefillTokensStats = computeSampleStats(prefillTokens);
|
|
2035
|
+
const decodeTokensStats = computeSampleStats(decodeTokens);
|
|
2036
|
+
const gpuPhaseStats = gpuPrefillMs.length > 0 || gpuDecodeMs.length > 0 || gpuDecodeRecordMs.length > 0
|
|
2037
|
+
|| gpuDecodeSubmitWaitMs.length > 0 || gpuDecodeReadbackWaitMs.length > 0
|
|
2038
|
+
? {
|
|
2039
|
+
prefillMs: computeSampleStats(gpuPrefillMs),
|
|
2040
|
+
decodeMs: computeSampleStats(gpuDecodeMs),
|
|
2041
|
+
decodeRecordMs: computeSampleStats(gpuDecodeRecordMs),
|
|
2042
|
+
decodeSubmitWaitMs: computeSampleStats(gpuDecodeSubmitWaitMs),
|
|
2043
|
+
decodeReadbackWaitMs: computeSampleStats(gpuDecodeReadbackWaitMs),
|
|
2044
|
+
}
|
|
2045
|
+
: null;
|
|
2046
|
+
|
|
2047
|
+
results = [
|
|
2048
|
+
{
|
|
2049
|
+
name: 'benchmark',
|
|
2050
|
+
passed: tokensPerSec.length > 0,
|
|
2051
|
+
duration: durations.reduce((sum, value) => sum + value, 0),
|
|
2052
|
+
error: tokensPerSec.length > 0 ? undefined : 'No benchmark runs completed',
|
|
2053
|
+
},
|
|
2054
|
+
];
|
|
2055
|
+
|
|
2056
|
+
const normalizedFirstTokenMs = sampleTimingNumber(ttftMsStats, 'median', null);
|
|
2057
|
+
|
|
2058
|
+
metrics = {
|
|
2059
|
+
warmupRuns,
|
|
2060
|
+
timedRuns,
|
|
2061
|
+
prompt: benchRun.prompt,
|
|
2062
|
+
maxTokens: benchRun.maxTokens,
|
|
2063
|
+
decodeTokensPerSec: sampleTimingNumber(decodeTokensPerSecStats, 'median'),
|
|
2064
|
+
avgTokensGenerated: Math.round(tokensGeneratedStats.mean),
|
|
2065
|
+
avgPrefillTokens: Math.round(prefillTokensStats.mean),
|
|
2066
|
+
avgDecodeTokens: Math.round(decodeTokensStats.mean),
|
|
2067
|
+
medianPrefillTokensPerSec: sampleTimingNumber(prefillTokensPerSecStats, 'median'),
|
|
2068
|
+
avgPrefillTokensPerSec: sampleTimingNumber(prefillTokensPerSecStats, 'mean'),
|
|
2069
|
+
medianPrefillTokensPerSecTtft: sampleTimingNumber(prefillTokensPerSecTtftStats, 'median'),
|
|
2070
|
+
avgPrefillTokensPerSecTtft: sampleTimingNumber(prefillTokensPerSecTtftStats, 'mean'),
|
|
2071
|
+
avgDecodeTokensPerSec: sampleTimingNumber(decodeTokensPerSecStats, 'mean'),
|
|
2072
|
+
firstTokenMs: normalizedFirstTokenMs,
|
|
2073
|
+
firstResponseMs: safeToFixed(safeModelLoadMs + normalizedFirstTokenMs, null),
|
|
2074
|
+
prefillMs: sampleTimingNumber(prefillMsStats, 'median'),
|
|
2075
|
+
decodeMs: sampleTimingNumber(decodeMsStats, 'median'),
|
|
2076
|
+
totalRunMs: sampleTimingNumber(totalMsStats, 'median'),
|
|
2077
|
+
decodeMsPerTokenP50: sampleTimingNumber(decodeMsPerTokenStats, 'median'),
|
|
2078
|
+
decodeMsPerTokenP95: sampleTimingNumber(decodeMsPerTokenStats, 'p95'),
|
|
2079
|
+
decodeMsPerTokenP99: sampleTimingNumber(decodeMsPerTokenStats, 'p99'),
|
|
2080
|
+
avgPrefillMs: sampleTimingNumber(prefillMsStats, 'mean'),
|
|
2081
|
+
modelLoadMs: safeModelLoadMs,
|
|
2082
|
+
throughput: {
|
|
2083
|
+
tokensPerSec: tokensPerSecStats,
|
|
2084
|
+
prefillTokensPerSec: prefillTokensPerSecStats,
|
|
2085
|
+
prefillTokensPerSecTtft: prefillTokensPerSecTtftStats,
|
|
2086
|
+
decodeTokensPerSec: decodeTokensPerSecStats,
|
|
2087
|
+
},
|
|
2088
|
+
latency: {
|
|
2089
|
+
totalMs: totalMsStats,
|
|
2090
|
+
prefillMs: prefillMsStats,
|
|
2091
|
+
decodeMs: decodeMsStats,
|
|
2092
|
+
firstTokenMs: ttftMsStats,
|
|
2093
|
+
},
|
|
2094
|
+
tokens: {
|
|
2095
|
+
generated: tokensGeneratedStats,
|
|
2096
|
+
prefill: prefillTokensStats,
|
|
2097
|
+
decode: decodeTokensStats,
|
|
2098
|
+
},
|
|
2099
|
+
gpu: gpuPhaseStats,
|
|
2100
|
+
generatedText,
|
|
2101
|
+
};
|
|
2102
|
+
|
|
2103
|
+
timing = buildCanonicalTiming({
|
|
2104
|
+
modelLoadMs: safeModelLoadMs,
|
|
2105
|
+
firstTokenMs: normalizedFirstTokenMs,
|
|
2106
|
+
firstResponseMs: Number.isFinite(normalizedFirstTokenMs)
|
|
2107
|
+
? safeModelLoadMs + normalizedFirstTokenMs
|
|
2108
|
+
: null,
|
|
2109
|
+
prefillMs: prefillMsStats?.median ?? null,
|
|
2110
|
+
decodeMs: decodeMsStats?.median ?? null,
|
|
2111
|
+
decodeMsPerTokenP50: decodeMsPerTokenStats?.median ?? null,
|
|
2112
|
+
decodeMsPerTokenP95: decodeMsPerTokenStats?.p95 ?? null,
|
|
2113
|
+
decodeMsPerTokenP99: decodeMsPerTokenStats?.p99 ?? null,
|
|
2114
|
+
totalRunMs: totalMsStats.median,
|
|
2115
|
+
decodeTokensPerSec: decodeTokensPerSecStats?.median,
|
|
2116
|
+
prefillTokensPerSec: prefillTokensPerSecStats?.median,
|
|
2117
|
+
cacheMode,
|
|
2118
|
+
loadMode,
|
|
2119
|
+
});
|
|
2120
|
+
}
|
|
2121
|
+
|
|
2122
|
+
const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
|
|
2123
|
+
? harness.pipeline.getMemoryStats()
|
|
2124
|
+
: null;
|
|
2125
|
+
|
|
2126
|
+
if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
|
|
2127
|
+
await harness.pipeline.unload();
|
|
2128
|
+
}
|
|
2129
|
+
|
|
2130
|
+
const summary = buildSuiteSummary('bench', results, startTime);
|
|
2131
|
+
const timingDiagnostics = buildTimingDiagnostics(timing, {
|
|
2132
|
+
source: 'doppler',
|
|
2133
|
+
prefillSemantics: 'internal_prefill_phase',
|
|
2134
|
+
});
|
|
2135
|
+
return {
|
|
2136
|
+
...summary,
|
|
2137
|
+
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
2138
|
+
cacheMode,
|
|
2139
|
+
loadMode,
|
|
2140
|
+
env: {
|
|
2141
|
+
library: 'doppler',
|
|
2142
|
+
runtime: 'browser',
|
|
2143
|
+
device: 'webgpu',
|
|
2144
|
+
browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
|
|
2145
|
+
browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
|
|
2146
|
+
browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
|
|
2147
|
+
browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
|
|
2148
|
+
},
|
|
2149
|
+
timing,
|
|
2150
|
+
timingDiagnostics,
|
|
2151
|
+
output,
|
|
2152
|
+
metrics,
|
|
2153
|
+
memoryStats,
|
|
2154
|
+
deviceInfo: resolveDeviceInfo(),
|
|
2155
|
+
pipeline: options.keepPipeline ? harness.pipeline : null,
|
|
2156
|
+
};
|
|
2157
|
+
}
|
|
2158
|
+
|
|
2159
|
+
async function runDiffusionSuite(options = {}) {
|
|
2160
|
+
const startTime = performance.now();
|
|
2161
|
+
const runtimeConfig = getRuntimeConfig();
|
|
2162
|
+
const captureOutput = options.captureOutput === true;
|
|
2163
|
+
const cacheMode = normalizeCacheMode(options.cacheMode);
|
|
2164
|
+
const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
|
|
2165
|
+
const benchConfig = runtimeConfig.shared?.benchmark?.run || {};
|
|
2166
|
+
const warmupRuns = Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0));
|
|
2167
|
+
const timedRuns = Math.max(1, Math.floor(benchConfig.timedRuns ?? 1));
|
|
2168
|
+
|
|
2169
|
+
const diffusionConfig = runtimeConfig.inference?.diffusion;
|
|
2170
|
+
if (!diffusionConfig) {
|
|
2171
|
+
throw new Error('runtime.inference.diffusion must be set for diffusion harness runs.');
|
|
2172
|
+
}
|
|
2173
|
+
const scheduler = diffusionConfig.scheduler;
|
|
2174
|
+
const latent = diffusionConfig.latent;
|
|
2175
|
+
const prompt = resolvePrompt(runtimeConfig);
|
|
2176
|
+
const negativePrompt = diffusionConfig.negativePrompt ?? '';
|
|
2177
|
+
|
|
2178
|
+
const width = Math.floor(latent?.width);
|
|
2179
|
+
const height = Math.floor(latent?.height);
|
|
2180
|
+
const steps = Math.floor(scheduler?.numSteps);
|
|
2181
|
+
const guidanceScale = scheduler?.guidanceScale;
|
|
2182
|
+
|
|
2183
|
+
if (!Number.isFinite(width) || width <= 0) {
|
|
2184
|
+
throw new Error('runtime.inference.diffusion.latent.width must be set for diffusion harness runs.');
|
|
2185
|
+
}
|
|
2186
|
+
if (!Number.isFinite(height) || height <= 0) {
|
|
2187
|
+
throw new Error('runtime.inference.diffusion.latent.height must be set for diffusion harness runs.');
|
|
2188
|
+
}
|
|
2189
|
+
if (!Number.isFinite(steps) || steps <= 0) {
|
|
2190
|
+
throw new Error('runtime.inference.diffusion.scheduler.numSteps must be set for diffusion harness runs.');
|
|
2191
|
+
}
|
|
2192
|
+
if (!Number.isFinite(guidanceScale) || guidanceScale <= 0) {
|
|
2193
|
+
throw new Error('runtime.inference.diffusion.scheduler.guidanceScale must be set for diffusion harness runs.');
|
|
2194
|
+
}
|
|
2195
|
+
|
|
2196
|
+
const harness = await initializeSuiteModel(options);
|
|
2197
|
+
const totalMs = [];
|
|
2198
|
+
const prefillMs = [];
|
|
2199
|
+
const denoiseMs = [];
|
|
2200
|
+
const vaeMs = [];
|
|
2201
|
+
const prefillTokens = [];
|
|
2202
|
+
const decodeTokens = [];
|
|
2203
|
+
const gpuTotalMs = [];
|
|
2204
|
+
const gpuPrefillMs = [];
|
|
2205
|
+
const gpuDenoiseMs = [];
|
|
2206
|
+
const gpuVaeMs = [];
|
|
2207
|
+
let output = null;
|
|
2208
|
+
|
|
2209
|
+
for (let i = 0; i < warmupRuns + timedRuns; i++) {
|
|
2210
|
+
harness.pipeline.reset?.();
|
|
2211
|
+
const result = await harness.pipeline.generate({
|
|
2212
|
+
prompt,
|
|
2213
|
+
negativePrompt,
|
|
2214
|
+
steps,
|
|
2215
|
+
guidanceScale,
|
|
2216
|
+
width,
|
|
2217
|
+
height,
|
|
2218
|
+
});
|
|
2219
|
+
if (captureOutput && i === warmupRuns + timedRuns - 1) {
|
|
2220
|
+
output = result;
|
|
2221
|
+
}
|
|
2222
|
+
|
|
2223
|
+
if (i < warmupRuns) continue;
|
|
2224
|
+
|
|
2225
|
+
const stats = harness.pipeline.getStats?.() ?? {};
|
|
2226
|
+
if (Number.isFinite(stats.totalTimeMs)) totalMs.push(stats.totalTimeMs);
|
|
2227
|
+
if (Number.isFinite(stats.prefillTimeMs)) prefillMs.push(stats.prefillTimeMs);
|
|
2228
|
+
if (Number.isFinite(stats.decodeTimeMs)) denoiseMs.push(stats.decodeTimeMs);
|
|
2229
|
+
if (Number.isFinite(stats.vaeTimeMs)) vaeMs.push(stats.vaeTimeMs);
|
|
2230
|
+
if (Number.isFinite(stats.prefillTokens)) prefillTokens.push(stats.prefillTokens);
|
|
2231
|
+
if (Number.isFinite(stats.decodeTokens)) decodeTokens.push(stats.decodeTokens);
|
|
2232
|
+
|
|
2233
|
+
const gpu = stats.gpu ?? null;
|
|
2234
|
+
if (gpu?.available) {
|
|
2235
|
+
if (Number.isFinite(gpu.totalMs)) gpuTotalMs.push(gpu.totalMs);
|
|
2236
|
+
if (Number.isFinite(gpu.prefillMs)) gpuPrefillMs.push(gpu.prefillMs);
|
|
2237
|
+
if (Number.isFinite(gpu.denoiseMs)) gpuDenoiseMs.push(gpu.denoiseMs);
|
|
2238
|
+
if (Number.isFinite(gpu.vaeMs)) gpuVaeMs.push(gpu.vaeMs);
|
|
2239
|
+
}
|
|
2240
|
+
}
|
|
2241
|
+
|
|
2242
|
+
const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
|
|
2243
|
+
? harness.pipeline.getMemoryStats()
|
|
2244
|
+
: null;
|
|
2245
|
+
|
|
2246
|
+
if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
|
|
2247
|
+
await harness.pipeline.unload();
|
|
2248
|
+
}
|
|
2249
|
+
|
|
2250
|
+
const results = [
|
|
2251
|
+
{
|
|
2252
|
+
name: 'diffusion',
|
|
2253
|
+
passed: totalMs.length > 0,
|
|
2254
|
+
duration: totalMs.reduce((sum, value) => sum + value, 0),
|
|
2255
|
+
error: totalMs.length > 0 ? undefined : 'No diffusion runs completed',
|
|
2256
|
+
},
|
|
2257
|
+
];
|
|
2258
|
+
|
|
2259
|
+
const summary = buildSuiteSummary('diffusion', results, startTime);
|
|
2260
|
+
const cpuStats = {
|
|
2261
|
+
totalMs: computeSampleStats(totalMs),
|
|
2262
|
+
prefillMs: computeSampleStats(prefillMs),
|
|
2263
|
+
denoiseMs: computeSampleStats(denoiseMs),
|
|
2264
|
+
vaeMs: computeSampleStats(vaeMs),
|
|
2265
|
+
};
|
|
2266
|
+
const gpuStats = gpuTotalMs.length > 0
|
|
2267
|
+
? {
|
|
2268
|
+
available: true,
|
|
2269
|
+
totalMs: computeSampleStats(gpuTotalMs),
|
|
2270
|
+
prefillMs: computeSampleStats(gpuPrefillMs),
|
|
2271
|
+
denoiseMs: computeSampleStats(gpuDenoiseMs),
|
|
2272
|
+
vaeMs: computeSampleStats(gpuVaeMs),
|
|
2273
|
+
}
|
|
2274
|
+
: { available: false };
|
|
2275
|
+
|
|
2276
|
+
const avgPrefillTokens = prefillTokens.length
|
|
2277
|
+
? Math.round(prefillTokens.reduce((a, b) => a + b, 0) / prefillTokens.length)
|
|
2278
|
+
: 0;
|
|
2279
|
+
const avgDecodeTokens = decodeTokens.length
|
|
2280
|
+
? Math.round(decodeTokens.reduce((a, b) => a + b, 0) / decodeTokens.length)
|
|
2281
|
+
: 0;
|
|
2282
|
+
const prefillMsMedian = safeStatsValue(cpuStats.prefillMs?.median);
|
|
2283
|
+
const denoiseMsMedian = safeStatsValue(cpuStats.denoiseMs?.median);
|
|
2284
|
+
const totalMsMedian = safeStatsValue(cpuStats.totalMs?.median);
|
|
2285
|
+
const diffusionPerformanceArtifact = buildDiffusionPerformanceArtifact({
|
|
2286
|
+
warmupRuns,
|
|
2287
|
+
timedRuns,
|
|
2288
|
+
width,
|
|
2289
|
+
height,
|
|
2290
|
+
steps,
|
|
2291
|
+
guidanceScale,
|
|
2292
|
+
avgPrefillTokens,
|
|
2293
|
+
avgDecodeTokens,
|
|
2294
|
+
cpuStats,
|
|
2295
|
+
gpuStats,
|
|
2296
|
+
});
|
|
2297
|
+
const timing = buildCanonicalTiming({
|
|
2298
|
+
modelLoadMs: 0,
|
|
2299
|
+
firstTokenMs: null,
|
|
2300
|
+
firstResponseMs: null,
|
|
2301
|
+
prefillMs: prefillMsMedian,
|
|
2302
|
+
decodeMs: denoiseMsMedian,
|
|
2303
|
+
totalRunMs: totalMsMedian,
|
|
2304
|
+
prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
|
|
2305
|
+
decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
|
|
2306
|
+
cacheMode,
|
|
2307
|
+
loadMode,
|
|
2308
|
+
});
|
|
2309
|
+
const timingDiagnostics = buildTimingDiagnostics(timing, {
|
|
2310
|
+
source: 'doppler',
|
|
2311
|
+
prefillSemantics: 'internal_prefill_phase',
|
|
2312
|
+
});
|
|
2313
|
+
|
|
2314
|
+
return {
|
|
2315
|
+
...summary,
|
|
2316
|
+
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
2317
|
+
cacheMode,
|
|
2318
|
+
loadMode,
|
|
2319
|
+
env: {
|
|
2320
|
+
library: 'doppler',
|
|
2321
|
+
runtime: 'browser',
|
|
2322
|
+
device: 'webgpu',
|
|
2323
|
+
browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
|
|
2324
|
+
browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
|
|
2325
|
+
browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
|
|
2326
|
+
browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
|
|
2327
|
+
},
|
|
2328
|
+
timing,
|
|
2329
|
+
timingDiagnostics,
|
|
2330
|
+
output,
|
|
2331
|
+
metrics: {
|
|
2332
|
+
warmupRuns,
|
|
2333
|
+
timedRuns,
|
|
2334
|
+
width,
|
|
2335
|
+
height,
|
|
2336
|
+
steps,
|
|
2337
|
+
guidanceScale,
|
|
2338
|
+
prompt,
|
|
2339
|
+
avgPrefillTokens,
|
|
2340
|
+
avgDecodeTokens,
|
|
2341
|
+
latency: {
|
|
2342
|
+
totalMs: cpuStats.totalMs,
|
|
2343
|
+
prefillMs: cpuStats.prefillMs,
|
|
2344
|
+
denoiseMs: cpuStats.denoiseMs,
|
|
2345
|
+
vaeMs: cpuStats.vaeMs,
|
|
2346
|
+
},
|
|
2347
|
+
throughput: {
|
|
2348
|
+
prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
|
|
2349
|
+
decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
|
|
2350
|
+
decodeStepsPerSec: diffusionPerformanceArtifact.throughput.decodeStepsPerSec,
|
|
2351
|
+
},
|
|
2352
|
+
cpu: cpuStats,
|
|
2353
|
+
gpu: gpuStats,
|
|
2354
|
+
performanceArtifact: diffusionPerformanceArtifact,
|
|
2355
|
+
},
|
|
2356
|
+
memoryStats,
|
|
2357
|
+
deviceInfo: resolveDeviceInfo(),
|
|
2358
|
+
pipeline: options.keepPipeline ? harness.pipeline : null,
|
|
2359
|
+
};
|
|
2360
|
+
}
|
|
2361
|
+
|
|
2362
|
+
async function runEnergySuite(options = {}) {
|
|
2363
|
+
const startTime = performance.now();
|
|
2364
|
+
const harness = await initializeSuiteModel(options);
|
|
2365
|
+
if (harness.manifest?.modelType !== 'energy') {
|
|
2366
|
+
throw new Error('Energy suite requires an energy model manifest.');
|
|
2367
|
+
}
|
|
2368
|
+
|
|
2369
|
+
const result = await harness.pipeline.generate();
|
|
2370
|
+
const stats = harness.pipeline.getStats?.() ?? {};
|
|
2371
|
+
|
|
2372
|
+
const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
|
|
2373
|
+
? harness.pipeline.getMemoryStats()
|
|
2374
|
+
: null;
|
|
2375
|
+
|
|
2376
|
+
if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
|
|
2377
|
+
await harness.pipeline.unload();
|
|
2378
|
+
}
|
|
2379
|
+
|
|
2380
|
+
const results = [
|
|
2381
|
+
{
|
|
2382
|
+
name: 'energy',
|
|
2383
|
+
passed: Number.isFinite(result.energy ?? NaN),
|
|
2384
|
+
duration: result.totalTimeMs ?? Math.max(0, performance.now() - startTime),
|
|
2385
|
+
error: Number.isFinite(result.energy ?? NaN) ? undefined : 'Energy did not converge',
|
|
2386
|
+
},
|
|
2387
|
+
];
|
|
2388
|
+
|
|
2389
|
+
const summary = buildSuiteSummary('energy', results, startTime);
|
|
2390
|
+
return {
|
|
2391
|
+
...summary,
|
|
2392
|
+
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
2393
|
+
metrics: {
|
|
2394
|
+
steps: result.steps,
|
|
2395
|
+
energy: result.energy ?? null,
|
|
2396
|
+
dtype: result.dtype,
|
|
2397
|
+
shape: result.shape,
|
|
2398
|
+
totalTimeMs: result.totalTimeMs ?? null,
|
|
2399
|
+
energyHistory: result.energyHistory ?? [],
|
|
2400
|
+
stateStats: result.stateStats ?? null,
|
|
2401
|
+
readbackCount: stats.readbackCount ?? null,
|
|
2402
|
+
},
|
|
2403
|
+
memoryStats,
|
|
2404
|
+
deviceInfo: resolveDeviceInfo(),
|
|
2405
|
+
pipeline: options.keepPipeline ? harness.pipeline : null,
|
|
2406
|
+
};
|
|
2407
|
+
}
|
|
2408
|
+
|
|
2409
|
+
async function dispatchBrowserSuite(suite, options) {
|
|
2410
|
+
if (suite === 'kernels') {
|
|
2411
|
+
return runKernelSuite(options);
|
|
2412
|
+
}
|
|
2413
|
+
if (suite === 'training') {
|
|
2414
|
+
return runTrainingSuite(options);
|
|
2415
|
+
}
|
|
2416
|
+
if (suite === 'bench') {
|
|
2417
|
+
return runBenchSuite(options);
|
|
2418
|
+
}
|
|
2419
|
+
if (suite === 'diffusion') {
|
|
2420
|
+
return runDiffusionSuite(options);
|
|
2421
|
+
}
|
|
2422
|
+
if (suite === 'energy') {
|
|
2423
|
+
return runEnergySuite(options);
|
|
2424
|
+
}
|
|
2425
|
+
if (suite === 'debug') {
|
|
2426
|
+
return runInferenceSuite({ ...options, suiteName: 'debug' });
|
|
2427
|
+
}
|
|
2428
|
+
if (suite === 'inference') {
|
|
2429
|
+
return runInferenceSuite({ ...options, suiteName: 'inference' });
|
|
2430
|
+
}
|
|
2431
|
+
return null;
|
|
2432
|
+
}
|
|
2433
|
+
|
|
2434
|
+
function collectTrainingArtifactsFromSuiteResult(suiteResult) {
|
|
2435
|
+
const ulArtifacts = [];
|
|
2436
|
+
const distillArtifacts = [];
|
|
2437
|
+
const checkpointResumeTimeline = Array.isArray(suiteResult?.metrics?.checkpointResumeTimeline)
|
|
2438
|
+
? suiteResult.metrics.checkpointResumeTimeline
|
|
2439
|
+
.filter((entry) => entry && typeof entry === 'object')
|
|
2440
|
+
: [];
|
|
2441
|
+
const addArtifact = (artifact, source = null) => {
|
|
2442
|
+
if (!artifact || typeof artifact !== 'object' || typeof artifact.manifestPath !== 'string') {
|
|
2443
|
+
return;
|
|
2444
|
+
}
|
|
2445
|
+
const stage = String(artifact.stage || '').trim();
|
|
2446
|
+
const kind = String(artifact.kind || '').trim();
|
|
2447
|
+
if (kind === 'distill' || stage === 'stage_a' || stage === 'stage_b') {
|
|
2448
|
+
distillArtifacts.push(artifact);
|
|
2449
|
+
return;
|
|
2450
|
+
}
|
|
2451
|
+
if (kind === 'ul' || stage === 'stage1_joint' || stage === 'stage2_base' || source === 'ul') {
|
|
2452
|
+
ulArtifacts.push(artifact);
|
|
2453
|
+
return;
|
|
2454
|
+
}
|
|
2455
|
+
ulArtifacts.push(artifact);
|
|
2456
|
+
};
|
|
2457
|
+
|
|
2458
|
+
const metricUlArtifacts = Array.isArray(suiteResult?.metrics?.ulArtifacts)
|
|
2459
|
+
? suiteResult.metrics.ulArtifacts
|
|
2460
|
+
: [];
|
|
2461
|
+
for (const artifact of metricUlArtifacts) {
|
|
2462
|
+
addArtifact(artifact, 'ul');
|
|
2463
|
+
}
|
|
2464
|
+
const metricDistillArtifacts = Array.isArray(suiteResult?.metrics?.distillArtifacts)
|
|
2465
|
+
? suiteResult.metrics.distillArtifacts
|
|
2466
|
+
: [];
|
|
2467
|
+
for (const artifact of metricDistillArtifacts) {
|
|
2468
|
+
addArtifact(artifact, 'distill');
|
|
2469
|
+
}
|
|
2470
|
+
const resultEntries = Array.isArray(suiteResult?.results) ? suiteResult.results : [];
|
|
2471
|
+
for (const entry of resultEntries) {
|
|
2472
|
+
addArtifact(entry?.artifact, null);
|
|
2473
|
+
}
|
|
2474
|
+
return { ulArtifacts, distillArtifacts, checkpointResumeTimeline };
|
|
2475
|
+
}
|
|
2476
|
+
|
|
2477
|
+
export async function runBrowserSuite(options = {}) {
|
|
2478
|
+
return runWithRuntimeIsolationForSuite(async () => {
|
|
2479
|
+
const suiteTimestamp = resolveReportTimestamp(options.timestamp, 'runBrowserSuite timestamp');
|
|
2480
|
+
const suiteContext = resolveSuiteContext(options);
|
|
2481
|
+
const suite = normalizeSuite(options.suite, suiteContext);
|
|
2482
|
+
const suiteResult = await dispatchBrowserSuite(suite, options);
|
|
2483
|
+
if (!suiteResult) {
|
|
2484
|
+
throw createUnsupportedSuiteError(suite, suiteContext);
|
|
2485
|
+
}
|
|
2486
|
+
|
|
2487
|
+
if (suite === 'bench' && suiteResult?.metrics?.workloadType === 'training') {
|
|
2488
|
+
const trainingReport = suiteResult?.metrics?.trainingMetricsReport;
|
|
2489
|
+
if (Array.isArray(trainingReport) && trainingReport.length > 0) {
|
|
2490
|
+
validateTrainingMetricsReport(trainingReport);
|
|
2491
|
+
}
|
|
2492
|
+
}
|
|
2493
|
+
if (suite === 'diffusion') {
|
|
2494
|
+
assertDiffusionPerformanceArtifact(suiteResult?.metrics, 'diffusion verify');
|
|
2495
|
+
}
|
|
2496
|
+
if (suite === 'bench' && suiteResult?.metrics?.workloadType === 'diffusion') {
|
|
2497
|
+
assertDiffusionPerformanceArtifact(suiteResult?.metrics, 'diffusion bench');
|
|
2498
|
+
}
|
|
2499
|
+
|
|
2500
|
+
const modelId = suiteResult.modelId || options.modelId || options.modelUrl || suite;
|
|
2501
|
+
const reportOutput = sanitizeReportOutput(suiteResult.output);
|
|
2502
|
+
const trainingArtifacts = collectTrainingArtifactsFromSuiteResult(suiteResult);
|
|
2503
|
+
const ulArtifacts = trainingArtifacts.ulArtifacts;
|
|
2504
|
+
const distillArtifacts = trainingArtifacts.distillArtifacts;
|
|
2505
|
+
const checkpointResumeTimeline = trainingArtifacts.checkpointResumeTimeline;
|
|
2506
|
+
const report = {
|
|
2507
|
+
suite,
|
|
2508
|
+
modelId,
|
|
2509
|
+
runtimePreset: options.runtimePreset ?? null,
|
|
2510
|
+
deviceInfo: suiteResult.deviceInfo ?? null,
|
|
2511
|
+
results: suiteResult.results,
|
|
2512
|
+
durationMs: suiteResult.duration,
|
|
2513
|
+
timestamp: suiteTimestamp,
|
|
2514
|
+
metrics: suiteResult.metrics ?? null,
|
|
2515
|
+
output: reportOutput,
|
|
2516
|
+
memory: suiteResult.memoryStats ?? null,
|
|
2517
|
+
...options.report,
|
|
2518
|
+
};
|
|
2519
|
+
if (ulArtifacts.length > 0 || distillArtifacts.length > 0 || checkpointResumeTimeline.length > 0) {
|
|
2520
|
+
report.lineage = {
|
|
2521
|
+
...(report.lineage && typeof report.lineage === 'object' ? report.lineage : {}),
|
|
2522
|
+
training: {
|
|
2523
|
+
...(
|
|
2524
|
+
report.lineage?.training && typeof report.lineage.training === 'object'
|
|
2525
|
+
? report.lineage.training
|
|
2526
|
+
: {}
|
|
2527
|
+
),
|
|
2528
|
+
...(ulArtifacts.length > 0 ? { ulArtifacts } : {}),
|
|
2529
|
+
...(distillArtifacts.length > 0 ? { distillArtifacts } : {}),
|
|
2530
|
+
...(checkpointResumeTimeline.length > 0 ? { checkpointResumeTimeline } : {}),
|
|
2531
|
+
},
|
|
2532
|
+
};
|
|
2533
|
+
}
|
|
2534
|
+
if (!report.timestamp) {
|
|
2535
|
+
report.timestamp = suiteTimestamp;
|
|
2536
|
+
}
|
|
2537
|
+
const reportInfo = await saveReport(modelId, report, { timestamp: report.timestamp });
|
|
2538
|
+
return { ...suiteResult, report, reportInfo };
|
|
2539
|
+
});
|
|
2540
|
+
}
|
|
2541
|
+
|
|
2542
|
+
function normalizeManifest(manifest) {
|
|
2543
|
+
if (!manifest || typeof manifest !== 'object') {
|
|
2544
|
+
throw new Error('Harness manifest must be an object.');
|
|
2545
|
+
}
|
|
2546
|
+
const runs = Array.isArray(manifest.runs) ? manifest.runs : [];
|
|
2547
|
+
if (!runs.length) {
|
|
2548
|
+
throw new Error('Harness manifest must include at least one run.');
|
|
2549
|
+
}
|
|
2550
|
+
return {
|
|
2551
|
+
defaults: manifest.defaults ?? {},
|
|
2552
|
+
runs,
|
|
2553
|
+
reportModelId: manifest.reportModelId ?? manifest.id ?? 'manifest',
|
|
2554
|
+
report: manifest.report ?? null,
|
|
2555
|
+
};
|
|
2556
|
+
}
|
|
2557
|
+
|
|
2558
|
+
function mergeRunDefaults(defaults, run) {
|
|
2559
|
+
return {
|
|
2560
|
+
...defaults,
|
|
2561
|
+
...run,
|
|
2562
|
+
runtimePreset: run.runtimePreset ?? defaults.runtimePreset ?? null,
|
|
2563
|
+
runtimeConfigUrl: run.runtimeConfigUrl ?? defaults.runtimeConfigUrl ?? null,
|
|
2564
|
+
runtimeConfig: run.runtimeConfig ?? defaults.runtimeConfig ?? null,
|
|
2565
|
+
suite: run.suite ?? defaults.suite ?? 'inference',
|
|
2566
|
+
};
|
|
2567
|
+
}
|
|
2568
|
+
|
|
2569
|
+
async function applyRuntimeForRun(run, options) {
|
|
2570
|
+
if (run.runtimeConfig) {
|
|
2571
|
+
const runtime = resolveRuntimeFromConfig(run.runtimeConfig);
|
|
2572
|
+
if (!runtime) {
|
|
2573
|
+
throw new Error('runtimeConfig is missing runtime fields');
|
|
2574
|
+
}
|
|
2575
|
+
setRuntimeConfig(runtime);
|
|
2576
|
+
return;
|
|
2577
|
+
}
|
|
2578
|
+
if (run.runtimeConfigUrl) {
|
|
2579
|
+
await applyRuntimeConfigFromUrl(run.runtimeConfigUrl, options);
|
|
2580
|
+
return;
|
|
2581
|
+
}
|
|
2582
|
+
if (run.runtimePreset) {
|
|
2583
|
+
await applyRuntimePreset(run.runtimePreset, options);
|
|
2584
|
+
}
|
|
2585
|
+
}
|
|
2586
|
+
|
|
2587
|
+
function summarizeManifestRuns(results) {
|
|
2588
|
+
let passedRuns = 0;
|
|
2589
|
+
let failedRuns = 0;
|
|
2590
|
+
let durationMs = 0;
|
|
2591
|
+
for (const result of results) {
|
|
2592
|
+
const failures = (result.results || []).filter((entry) => !entry.passed && !entry.skipped);
|
|
2593
|
+
if (failures.length > 0) {
|
|
2594
|
+
failedRuns += 1;
|
|
2595
|
+
} else {
|
|
2596
|
+
passedRuns += 1;
|
|
2597
|
+
}
|
|
2598
|
+
durationMs += result.duration || 0;
|
|
2599
|
+
}
|
|
2600
|
+
return {
|
|
2601
|
+
totalRuns: results.length,
|
|
2602
|
+
passedRuns,
|
|
2603
|
+
failedRuns,
|
|
2604
|
+
durationMs,
|
|
2605
|
+
};
|
|
2606
|
+
}
|
|
2607
|
+
|
|
2608
|
+
export async function runBrowserManifest(manifest, options = {}) {
|
|
2609
|
+
const normalized = normalizeManifest(manifest);
|
|
2610
|
+
const results = [];
|
|
2611
|
+
const manifestTimestamp = resolveReportTimestamp(options.timestamp, 'runBrowserManifest timestamp');
|
|
2612
|
+
const baseRuntimeConfig = cloneRuntimeConfig(getRuntimeConfig());
|
|
2613
|
+
const baseKernelPath = getActiveKernelPath();
|
|
2614
|
+
const baseKernelPathSource = getActiveKernelPathSource();
|
|
2615
|
+
const baseKernelPathPolicy = getActiveKernelPathPolicy();
|
|
2616
|
+
|
|
2617
|
+
for (let i = 0; i < normalized.runs.length; i++) {
|
|
2618
|
+
const run = mergeRunDefaults(normalized.defaults, normalized.runs[i] || {});
|
|
2619
|
+
try {
|
|
2620
|
+
setRuntimeConfig(baseRuntimeConfig);
|
|
2621
|
+
setActiveKernelPath(baseKernelPath, baseKernelPathSource, baseKernelPathPolicy);
|
|
2622
|
+
await applyRuntimeForRun(run, options);
|
|
2623
|
+
const runTimestamp = resolveReportTimestamp(
|
|
2624
|
+
run.timestamp,
|
|
2625
|
+
`runBrowserManifest run[${i}] timestamp`,
|
|
2626
|
+
manifestTimestamp
|
|
2627
|
+
);
|
|
2628
|
+
const result = await runBrowserSuite({ ...run, timestamp: runTimestamp });
|
|
2629
|
+
results.push({
|
|
2630
|
+
...result,
|
|
2631
|
+
label: run.label ?? `${run.suite || 'inference'}:${result.modelId || 'unknown'}`,
|
|
2632
|
+
});
|
|
2633
|
+
options.onProgress?.({
|
|
2634
|
+
index: i + 1,
|
|
2635
|
+
total: normalized.runs.length,
|
|
2636
|
+
label: run.label ?? result.modelId ?? run.suite ?? 'run',
|
|
2637
|
+
});
|
|
2638
|
+
} finally {
|
|
2639
|
+
setRuntimeConfig(baseRuntimeConfig);
|
|
2640
|
+
setActiveKernelPath(baseKernelPath, baseKernelPathSource, baseKernelPathPolicy);
|
|
2641
|
+
}
|
|
2642
|
+
}
|
|
2643
|
+
|
|
2644
|
+
const summary = summarizeManifestRuns(results);
|
|
2645
|
+
const report = {
|
|
2646
|
+
timestamp: manifestTimestamp,
|
|
2647
|
+
summary,
|
|
2648
|
+
runs: results.map((result) => ({
|
|
2649
|
+
label: result.label,
|
|
2650
|
+
suite: result.suite,
|
|
2651
|
+
modelId: result.modelId,
|
|
2652
|
+
results: result.results,
|
|
2653
|
+
metrics: result.metrics ?? null,
|
|
2654
|
+
output: typeof result.output === 'string' ? result.output : null,
|
|
2655
|
+
reportInfo: result.reportInfo ?? null,
|
|
2656
|
+
})),
|
|
2657
|
+
manifest: normalized.report ?? null,
|
|
2658
|
+
};
|
|
2659
|
+
|
|
2660
|
+
const reportInfo = options.saveReport === false
|
|
2661
|
+
? null
|
|
2662
|
+
: await saveReport(normalized.reportModelId, report, { timestamp: options.timestamp });
|
|
2663
|
+
|
|
2664
|
+
return { results, summary, report, reportInfo };
|
|
2665
|
+
}
|