@simulatte/doppler 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +16 -23
  3. package/package.json +30 -32
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +31 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +5 -20
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.d.ts +5 -0
  29. package/src/config/kernel-path-loader.js +18 -36
  30. package/src/config/kernels/kernel-ref-digests.js +1 -1
  31. package/src/config/kernels/registry.js +14 -1
  32. package/src/config/kernels/registry.json +81 -5
  33. package/src/config/loader.d.ts +1 -1
  34. package/src/config/loader.js +15 -2
  35. package/src/config/merge-contract-check.js +66 -4
  36. package/src/config/merge-helpers.js +128 -7
  37. package/src/config/merge.d.ts +1 -0
  38. package/src/config/merge.js +10 -0
  39. package/src/config/param-validator.js +47 -2
  40. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  41. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  42. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  43. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
  44. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
  45. package/src/config/presets/kernel-paths/registry.json +43 -8
  46. package/src/config/presets/models/gemma2.json +3 -2
  47. package/src/config/presets/models/gemma3.json +2 -0
  48. package/src/config/presets/models/qwen3.json +4 -3
  49. package/src/config/presets/models/qwen3_5.json +16 -0
  50. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  51. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  52. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  53. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  54. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  55. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  56. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  57. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  58. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  59. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  60. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  61. package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
  62. package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
  63. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  64. package/src/config/runtime.js +6 -1
  65. package/src/config/schema/conversion.schema.d.ts +1 -0
  66. package/src/config/schema/debug.schema.d.ts +5 -0
  67. package/src/config/schema/doppler.schema.js +16 -21
  68. package/src/config/schema/inference-defaults.schema.js +3 -3
  69. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  70. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  71. package/src/config/schema/manifest.schema.d.ts +3 -2
  72. package/src/config/schema/manifest.schema.js +17 -4
  73. package/src/config/schema/storage.schema.js +1 -1
  74. package/src/config/training-defaults.js +30 -22
  75. package/src/converter/conversion-plan.js +104 -11
  76. package/src/converter/core.d.ts +7 -0
  77. package/src/converter/core.js +16 -9
  78. package/src/converter/execution-v0-manifest.js +4 -1
  79. package/src/converter/index.d.ts +1 -0
  80. package/src/converter/index.js +1 -0
  81. package/src/converter/manifest-inference.js +50 -29
  82. package/src/converter/parsers/diffusion.js +0 -3
  83. package/src/converter/parsers/transformer.js +4 -0
  84. package/src/converter/quantization-info.js +40 -16
  85. package/src/converter/quantizer.js +19 -12
  86. package/src/converter/rope-config.js +8 -6
  87. package/src/converter/shard-packer.d.ts +1 -1
  88. package/src/converter/shard-packer.js +4 -1
  89. package/src/converter/tokenizer-utils.d.ts +1 -0
  90. package/src/converter/tokenizer-utils.js +4 -1
  91. package/src/debug/config.js +123 -11
  92. package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
  93. package/src/debug/signals.js +7 -1
  94. package/src/debug/tensor.d.ts +2 -0
  95. package/src/debug/tensor.js +13 -2
  96. package/src/distribution/p2p-control-plane.js +52 -12
  97. package/src/distribution/p2p-observability.js +43 -7
  98. package/src/distribution/p2p-webrtc-browser.js +20 -0
  99. package/src/distribution/shard-delivery.js +83 -27
  100. package/src/formats/gguf/types.js +33 -16
  101. package/src/formats/rdrr/groups.d.ts +12 -4
  102. package/src/formats/rdrr/groups.js +3 -6
  103. package/src/formats/rdrr/parsing.d.ts +4 -0
  104. package/src/formats/rdrr/parsing.js +53 -3
  105. package/src/formats/rdrr/types.d.ts +2 -1
  106. package/src/gpu/command-recorder.js +86 -61
  107. package/src/gpu/device.d.ts +1 -0
  108. package/src/gpu/device.js +73 -19
  109. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  110. package/src/gpu/kernel-tuner/cache.js +71 -4
  111. package/src/gpu/kernel-tuner/tuner.js +22 -4
  112. package/src/gpu/kernels/attention.js +15 -34
  113. package/src/gpu/kernels/backward/adam.js +62 -58
  114. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  115. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  116. package/src/gpu/kernels/cast.js +191 -149
  117. package/src/gpu/kernels/check-stop.js +33 -44
  118. package/src/gpu/kernels/conv2d.js +27 -17
  119. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  120. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  121. package/src/gpu/kernels/dequant.js +178 -126
  122. package/src/gpu/kernels/energy.d.ts +3 -21
  123. package/src/gpu/kernels/energy.js +111 -88
  124. package/src/gpu/kernels/feature-check.js +1 -1
  125. package/src/gpu/kernels/fused_ffn.js +84 -65
  126. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  127. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  128. package/src/gpu/kernels/gather.js +33 -15
  129. package/src/gpu/kernels/gelu.js +19 -11
  130. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  131. package/src/gpu/kernels/groupnorm.js +34 -23
  132. package/src/gpu/kernels/index.d.ts +8 -0
  133. package/src/gpu/kernels/index.js +6 -0
  134. package/src/gpu/kernels/kv-quantize.js +5 -2
  135. package/src/gpu/kernels/layernorm.js +35 -19
  136. package/src/gpu/kernels/logit-merge.js +5 -3
  137. package/src/gpu/kernels/matmul-selection.js +47 -4
  138. package/src/gpu/kernels/matmul.d.ts +2 -0
  139. package/src/gpu/kernels/matmul.js +59 -40
  140. package/src/gpu/kernels/modulate.js +23 -15
  141. package/src/gpu/kernels/moe.js +221 -175
  142. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  143. package/src/gpu/kernels/relu.js +18 -10
  144. package/src/gpu/kernels/repeat_channels.js +25 -17
  145. package/src/gpu/kernels/residual.js +37 -27
  146. package/src/gpu/kernels/rmsnorm.js +66 -43
  147. package/src/gpu/kernels/rope.js +3 -0
  148. package/src/gpu/kernels/sample.js +27 -38
  149. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  150. package/src/gpu/kernels/scale.js +18 -11
  151. package/src/gpu/kernels/shader-cache.js +4 -2
  152. package/src/gpu/kernels/silu.js +120 -72
  153. package/src/gpu/kernels/softmax.js +44 -25
  154. package/src/gpu/kernels/split_qg.d.ts +50 -0
  155. package/src/gpu/kernels/split_qg.js +46 -0
  156. package/src/gpu/kernels/split_qg.wgsl +58 -0
  157. package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
  158. package/src/gpu/kernels/split_qkv.js +23 -13
  159. package/src/gpu/kernels/transpose.js +18 -10
  160. package/src/gpu/kernels/transpose.wgsl +5 -3
  161. package/src/gpu/kernels/upsample2d.js +21 -13
  162. package/src/gpu/kernels/utils.js +20 -13
  163. package/src/gpu/partitioned-buffer-pool.js +10 -2
  164. package/src/gpu/perf-guards.js +2 -9
  165. package/src/gpu/profiler.js +27 -22
  166. package/src/gpu/readback-utils.d.ts +16 -0
  167. package/src/gpu/readback-utils.js +41 -0
  168. package/src/gpu/submit-tracker.js +13 -0
  169. package/src/gpu/uniform-cache.d.ts +1 -0
  170. package/src/gpu/uniform-cache.js +30 -9
  171. package/src/gpu/weight-buffer.d.ts +1 -1
  172. package/src/gpu/weight-buffer.js +1 -1
  173. package/src/hotswap/intent-bundle.js +6 -0
  174. package/src/hotswap/manifest.d.ts +10 -1
  175. package/src/hotswap/manifest.js +12 -2
  176. package/src/hotswap/runtime.js +30 -8
  177. package/src/index-browser.d.ts +44 -0
  178. package/src/index-browser.js +14 -0
  179. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  180. package/src/inference/browser-harness-contract-helpers.js +28 -0
  181. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  182. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  183. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  184. package/src/inference/browser-harness-model-helpers.js +217 -0
  185. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  186. package/src/inference/browser-harness-report-helpers.js +42 -0
  187. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  188. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  189. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  190. package/src/inference/browser-harness-suite-helpers.js +268 -0
  191. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  192. package/src/inference/browser-harness-text-helpers.js +788 -0
  193. package/src/inference/browser-harness.d.ts +8 -0
  194. package/src/inference/browser-harness.js +149 -1996
  195. package/src/inference/kv-cache/base.js +140 -94
  196. package/src/inference/kv-cache/tiered.js +5 -3
  197. package/src/inference/moe-router.js +88 -56
  198. package/src/inference/multi-model-network.js +5 -3
  199. package/src/inference/network-evolution.d.ts +11 -2
  200. package/src/inference/network-evolution.js +20 -21
  201. package/src/inference/pipelines/context.d.ts +3 -0
  202. package/src/inference/pipelines/context.js +142 -2
  203. package/src/inference/pipelines/diffusion/helpers.js +10 -2
  204. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  205. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  206. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
  207. package/src/inference/pipelines/diffusion/vae.js +3 -7
  208. package/src/inference/pipelines/energy/pipeline.js +27 -21
  209. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  210. package/src/inference/pipelines/energy/quintel.js +11 -0
  211. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  212. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  213. package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
  214. package/src/inference/pipelines/text/attention/output-projection.js +8 -0
  215. package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
  216. package/src/inference/pipelines/text/attention/projections.js +192 -112
  217. package/src/inference/pipelines/text/attention/record.js +77 -14
  218. package/src/inference/pipelines/text/attention/run.js +112 -14
  219. package/src/inference/pipelines/text/config.js +17 -4
  220. package/src/inference/pipelines/text/embed.js +2 -8
  221. package/src/inference/pipelines/text/execution-plan.js +46 -23
  222. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  223. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  224. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  225. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  226. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  227. package/src/inference/pipelines/text/generator-runtime.js +5 -0
  228. package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
  229. package/src/inference/pipelines/text/generator-steps.js +340 -221
  230. package/src/inference/pipelines/text/generator.js +56 -40
  231. package/src/inference/pipelines/text/init.d.ts +13 -0
  232. package/src/inference/pipelines/text/init.js +94 -25
  233. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  234. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  235. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  236. package/src/inference/pipelines/text/layer.js +4 -9
  237. package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
  238. package/src/inference/pipelines/text/linear-attention.js +113 -9
  239. package/src/inference/pipelines/text/logits/gpu.js +12 -7
  240. package/src/inference/pipelines/text/logits/index.d.ts +6 -1
  241. package/src/inference/pipelines/text/logits/index.js +13 -12
  242. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  243. package/src/inference/pipelines/text/logits/utils.js +9 -0
  244. package/src/inference/pipelines/text/lora-apply.js +50 -32
  245. package/src/inference/pipelines/text/model-load.js +282 -104
  246. package/src/inference/pipelines/text/moe-cache.js +5 -4
  247. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  248. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  249. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  250. package/src/inference/pipelines/text/ops.js +90 -90
  251. package/src/inference/pipelines/text/probes.js +9 -9
  252. package/src/inference/pipelines/text/sampling.js +52 -6
  253. package/src/inference/pipelines/text/weights.js +17 -7
  254. package/src/inference/pipelines/text.js +13 -1
  255. package/src/inference/speculative.d.ts +2 -2
  256. package/src/inference/speculative.js +4 -18
  257. package/src/inference/test-harness.d.ts +1 -1
  258. package/src/inference/test-harness.js +17 -7
  259. package/src/inference/tokenizer.d.ts +0 -5
  260. package/src/inference/tokenizer.js +4 -23
  261. package/src/inference/tokenizers/bpe.js +9 -0
  262. package/src/inference/tokenizers/bundled.js +20 -0
  263. package/src/inference/tokenizers/sentencepiece.js +12 -0
  264. package/src/loader/doppler-loader.js +38 -22
  265. package/src/loader/dtype-utils.js +3 -44
  266. package/src/loader/embedding-loader.js +7 -3
  267. package/src/loader/experts/expert-cache.js +13 -6
  268. package/src/loader/experts/expert-loader.js +10 -6
  269. package/src/loader/final-weights-loader.js +10 -4
  270. package/src/loader/layer-loader.js +2 -1
  271. package/src/loader/loader-state.js +2 -2
  272. package/src/loader/memory-monitor.js +8 -0
  273. package/src/loader/multi-model-loader.d.ts +14 -0
  274. package/src/loader/multi-model-loader.js +70 -24
  275. package/src/loader/shard-cache.js +84 -14
  276. package/src/loader/shard-resolver.js +25 -3
  277. package/src/loader/tensors/tensor-loader.js +214 -144
  278. package/src/loader/tensors/tensor-reader.js +76 -19
  279. package/src/loader/weight-downcast.js +1 -1
  280. package/src/memory/buffer-pool.d.ts +9 -1
  281. package/src/memory/buffer-pool.js +109 -44
  282. package/src/memory/unified-detect.js +1 -1
  283. package/src/rules/inference/dtype.rules.json +5 -0
  284. package/src/rules/inference/kernel-path.rules.json +24 -8
  285. package/src/rules/kernels/split-qg.rules.json +6 -0
  286. package/src/rules/rule-registry.js +27 -1
  287. package/src/storage/backends/opfs-store.js +68 -24
  288. package/src/storage/downloader.js +365 -83
  289. package/src/storage/index.d.ts +3 -0
  290. package/src/storage/index.js +3 -0
  291. package/src/storage/preflight.d.ts +2 -2
  292. package/src/storage/preflight.js +24 -2
  293. package/src/storage/quickstart-downloader.js +11 -5
  294. package/src/storage/registry.js +10 -4
  295. package/src/storage/reports.js +1 -1
  296. package/src/storage/shard-manager.d.ts +15 -1
  297. package/src/storage/shard-manager.js +55 -6
  298. package/src/storage/source-artifact-store.d.ts +52 -0
  299. package/src/storage/source-artifact-store.js +234 -0
  300. package/src/tooling/command-api-constants.d.ts +9 -0
  301. package/src/tooling/command-api-constants.js +9 -0
  302. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  303. package/src/tooling/command-api-family-normalizers.js +343 -0
  304. package/src/tooling/command-api-helpers.d.ts +25 -0
  305. package/src/tooling/command-api-helpers.js +262 -0
  306. package/src/tooling/command-api.js +16 -602
  307. package/src/tooling/command-envelope.js +4 -1
  308. package/src/tooling/command-runner-shared.js +52 -18
  309. package/src/tooling/conversion-config-materializer.js +3 -5
  310. package/src/tooling/lean-execution-contract.js +150 -3
  311. package/src/tooling/node-browser-command-runner.js +161 -271
  312. package/src/tooling/node-command-runner.js +29 -3
  313. package/src/tooling/node-converter.js +30 -1
  314. package/src/tooling/node-source-runtime.d.ts +1 -1
  315. package/src/tooling/node-source-runtime.js +120 -3
  316. package/src/tooling/node-webgpu.js +24 -21
  317. package/src/tooling/opfs-cache.js +21 -4
  318. package/src/tooling/runtime-input-composition.d.ts +38 -0
  319. package/src/tooling/runtime-input-composition.js +86 -0
  320. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  321. package/src/tooling/source-runtime-bundle.js +261 -34
  322. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  323. package/src/tooling/source-runtime-materializer.js +93 -0
  324. package/src/training/attention-backward.js +32 -17
  325. package/src/training/autograd.js +80 -52
  326. package/src/training/checkpoint-watch.d.ts +2 -1
  327. package/src/training/checkpoint-watch.js +39 -6
  328. package/src/training/checkpoint.js +40 -11
  329. package/src/training/clip.js +2 -1
  330. package/src/training/datasets/token-batch.js +20 -8
  331. package/src/training/distillation/checkpoint-watch.js +1 -0
  332. package/src/training/distillation/student-fixture.d.ts +22 -0
  333. package/src/training/distillation/student-fixture.js +846 -0
  334. package/src/training/distillation/suite-data.d.ts +45 -0
  335. package/src/training/distillation/suite-data.js +189 -0
  336. package/src/training/lora-pipeline.js +4 -7
  337. package/src/training/lora.js +26 -12
  338. package/src/training/loss.js +5 -6
  339. package/src/training/objectives/cross_entropy.js +2 -5
  340. package/src/training/objectives/distill_kd.js +4 -8
  341. package/src/training/objectives/distill_triplet.js +4 -8
  342. package/src/training/objectives/ul_stage2_base.js +4 -8
  343. package/src/training/operator-command.js +2 -0
  344. package/src/training/optimizer.js +19 -7
  345. package/src/training/runner.js +2 -1
  346. package/src/training/suite.js +18 -978
  347. package/src/training/tensor-factory.d.ts +9 -0
  348. package/src/training/tensor-factory.js +13 -0
  349. package/src/training/trainer.js +3 -5
  350. package/src/training/ul_dataset.js +3 -5
  351. package/src/training/workloads.js +70 -79
  352. package/src/types/model.d.ts +5 -0
  353. package/src/version.js +1 -1
  354. package/tools/convert-safetensors-node.js +22 -16
  355. package/tools/doppler-cli.js +50 -26
@@ -1,13 +1,8 @@
1
1
 
2
- import { initializeInference, parseRuntimeOverridesFromURL } from './test-harness.js';
2
+ import { initializeInference } from './test-harness.js';
3
3
  import { saveReport } from '../storage/reports.js';
4
4
  import { getRuntimeConfig, setRuntimeConfig } from '../config/runtime.js';
5
- import { initDevice, getKernelCapabilities, getDevice } from '../gpu/device.js';
6
- import { createPipeline } from './pipelines/text.js';
7
- import { parseModelConfigFromManifest } from './pipelines/text/config.js';
8
- import { resolveKernelPathState, activateKernelPathState } from './pipelines/text/model-load.js';
9
- import { openModelStore, loadManifestFromStore } from '../storage/shard-manager.js';
10
- import { parseManifest } from '../formats/rdrr/index.js';
5
+ import { clearLogHistory, getDebugSnapshot } from '../debug/history.js';
11
6
  import { computeSampleStats } from '../debug/stats.js';
12
7
  import {
13
8
  setActiveKernelPath,
@@ -15,19 +10,54 @@ import {
15
10
  getActiveKernelPathSource,
16
11
  getActiveKernelPathPolicy,
17
12
  } from '../config/kernel-path-loader.js';
18
- import {
19
- getInferenceLayerPatternContractArtifact,
20
- selectRuleValue,
21
- } from '../rules/rule-registry.js';
22
- import { mergeRuntimeValues } from '../config/runtime-merge.js';
23
- import { isPlainObject } from '../utils/plain-object.js';
24
- import { validateBrowserSuiteMetrics } from '../config/schema/browser-suite-metrics.schema.js';
25
13
  import { validateTrainingMetricsReport } from '../config/schema/training-metrics.schema.js';
26
- import { buildExecutionContractArtifact } from '../config/execution-contract-check.js';
27
- import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
14
+ import {
15
+ resolveReportTimestamp,
16
+ resolveRuntime,
17
+ cloneRuntimeConfig,
18
+ runWithRuntimeIsolationForSuite,
19
+ sanitizeReportOutput,
20
+ loadRuntimeConfigFromUrl,
21
+ applyRuntimeConfigFromUrl,
22
+ loadRuntimePreset,
23
+ applyRuntimePreset,
24
+ applyRuntimeForRun,
25
+ normalizeManifest,
26
+ mergeRunDefaults,
27
+ summarizeManifestRuns,
28
+ } from './browser-harness-runtime-helpers.js';
29
+ import {
30
+ buildSuiteSummary,
31
+ normalizeCacheMode,
32
+ normalizeLoadMode,
33
+ normalizeWorkloadType,
34
+ assertDiffusionPerformanceArtifact,
35
+ toTimingNumber,
36
+ safeToFixed,
37
+ sampleTimingNumber,
38
+ buildCanonicalTiming,
39
+ buildTimingDiagnostics,
40
+ } from './browser-harness-suite-helpers.js';
41
+ import {
42
+ resolveDeviceInfo,
43
+ resolveKernelPathForModel,
44
+ initializeSuiteModel,
45
+ } from './browser-harness-model-helpers.js';
46
+ import {
47
+ resolveBenchmarkRunSettings,
48
+ runEmbeddingSemanticChecks,
49
+ isCoherentOutput,
50
+ runGeneration,
51
+ runEmbedding,
52
+ } from './browser-harness-text-helpers.js';
53
+ import { buildSuiteContractMetrics } from './browser-harness-contract-helpers.js';
54
+ import {
55
+ runDiffusionSuite,
56
+ runEnergySuite,
57
+ } from './browser-harness-diffusion-energy-suites.js';
58
+ import { collectTrainingArtifactsFromSuiteResult } from './browser-harness-report-helpers.js';
28
59
 
29
60
  const TRAINING_SUITE_MODULE_PATH = '../training/suite.js';
30
- const NODE_SOURCE_RUNTIME_MODULE_PATH = '../tooling/node-source-runtime.js';
31
61
  let trainingSuiteModulePromise = null;
32
62
 
33
63
  async function loadTrainingSuiteModule() {
@@ -42,346 +72,20 @@ export async function runTrainingSuite(options = {}) {
42
72
  return module.runTrainingSuite(options);
43
73
  }
44
74
 
75
+ export {
76
+ loadRuntimeConfigFromUrl,
77
+ applyRuntimeConfigFromUrl,
78
+ loadRuntimePreset,
79
+ applyRuntimePreset,
80
+ applyRuntimeForRun,
81
+ buildSuiteSummary,
82
+ };
83
+
45
84
  async function runTrainingBenchSuite(options = {}) {
46
85
  const module = await loadTrainingSuiteModule();
47
86
  return module.runTrainingBenchSuite(options);
48
87
  }
49
88
 
50
- function buildSuiteContractMetrics(suite, baseMetrics, manifest) {
51
- const executionContractArtifact = buildExecutionContractArtifact(manifest);
52
- const executionV0GraphContractArtifact = executionContractArtifact?.executionV0?.graph ?? null;
53
- const layerPatternContractArtifact = getInferenceLayerPatternContractArtifact();
54
- const requiredInferenceFieldsArtifact = manifest?.modelType === 'transformer'
55
- && isPlainObject(manifest?.inference?.attention)
56
- ? buildManifestRequiredInferenceFieldsArtifact(
57
- manifest?.inference ?? null,
58
- `${manifest?.modelId ?? 'unknown'}.inference`
59
- )
60
- : null;
61
- return validateBrowserSuiteMetrics({
62
- ...baseMetrics,
63
- schemaVersion: 1,
64
- source: 'doppler',
65
- suite,
66
- ...(executionContractArtifact ? { executionContractArtifact } : {}),
67
- executionV0GraphContractArtifact,
68
- layerPatternContractArtifact,
69
- requiredInferenceFieldsArtifact,
70
- });
71
- }
72
-
73
- function parseReportTimestamp(rawTimestamp, label = 'timestamp') {
74
- if (rawTimestamp == null) {
75
- return null;
76
- }
77
-
78
- if (rawTimestamp instanceof Date) {
79
- const timestamp = rawTimestamp.getTime();
80
- if (!Number.isFinite(timestamp)) {
81
- throw new Error(`Invalid ${label}: not a valid Date.`);
82
- }
83
- return rawTimestamp.toISOString();
84
- }
85
-
86
- if (typeof rawTimestamp === 'number') {
87
- if (!Number.isFinite(rawTimestamp)) {
88
- throw new Error(`Invalid ${label}: must be a finite epoch timestamp.`);
89
- }
90
- return new Date(rawTimestamp).toISOString();
91
- }
92
-
93
- if (typeof rawTimestamp === 'string') {
94
- const trimmed = rawTimestamp.trim();
95
- if (trimmed.length === 0) {
96
- return null;
97
- }
98
- const numericCandidate = Number(trimmed);
99
- if (Number.isFinite(numericCandidate)) {
100
- return new Date(numericCandidate).toISOString();
101
- }
102
- const parsed = new Date(trimmed);
103
- if (Number.isNaN(parsed.getTime())) {
104
- throw new Error(`Invalid ${label}: expected ISO-8601 string or epoch milliseconds.`);
105
- }
106
- return parsed.toISOString();
107
- }
108
-
109
- throw new Error(`Invalid ${label}: expected Date, ISO-8601 string, epoch milliseconds, or nullish.`);
110
- }
111
-
112
- function resolveReportTimestamp(rawTimestamp, label, fallbackTimestamp = null) {
113
- const parsed = parseReportTimestamp(rawTimestamp, label);
114
- return parsed ?? (fallbackTimestamp == null ? new Date().toISOString() : String(fallbackTimestamp));
115
- }
116
-
117
- function resolveRuntime(options) {
118
- if (options.runtime) return options.runtime;
119
- if (options.searchParams) return parseRuntimeOverridesFromURL(options.searchParams);
120
- if (typeof globalThis.location === 'undefined') return parseRuntimeOverridesFromURL(new URLSearchParams());
121
- return parseRuntimeOverridesFromURL();
122
- }
123
-
124
- function normalizePresetPath(value) {
125
- const trimmed = String(value || '').replace(/^[./]+/, '');
126
- if (!trimmed) return null;
127
- return trimmed.endsWith('.json') ? trimmed : `${trimmed}.json`;
128
- }
129
-
130
- function resolvePresetBaseUrl() {
131
- try {
132
- return new URL('../config/presets/runtime/', import.meta.url).toString().replace(/\/$/, '');
133
- } catch {
134
- if (typeof globalThis.location !== 'undefined' && globalThis.location?.href) {
135
- return new URL('/src/config/presets/runtime/', globalThis.location.href).toString().replace(/\/$/, '');
136
- }
137
- return '/src/config/presets/runtime';
138
- }
139
- }
140
-
141
- function cloneRuntimeConfig(runtimeConfig) {
142
- if (!runtimeConfig) return null;
143
- if (typeof structuredClone === 'function') {
144
- return structuredClone(runtimeConfig);
145
- }
146
- return JSON.parse(JSON.stringify(runtimeConfig));
147
- }
148
-
149
- function snapshotRuntimeState() {
150
- return {
151
- runtimeConfig: cloneRuntimeConfig(getRuntimeConfig()),
152
- activeKernelPath: getActiveKernelPath(),
153
- activeKernelPathSource: getActiveKernelPathSource(),
154
- activeKernelPathPolicy: getActiveKernelPathPolicy(),
155
- };
156
- }
157
-
158
- function restoreRuntimeState(snapshot) {
159
- if (!snapshot) {
160
- return;
161
- }
162
- setRuntimeConfig(snapshot.runtimeConfig);
163
- setActiveKernelPath(
164
- snapshot.activeKernelPath,
165
- snapshot.activeKernelPathSource || 'none',
166
- snapshot.activeKernelPathPolicy ?? null
167
- );
168
- }
169
-
170
- async function runWithRuntimeIsolationForSuite(run) {
171
- const snapshot = snapshotRuntimeState();
172
- try {
173
- return await run();
174
- } finally {
175
- restoreRuntimeState(snapshot);
176
- }
177
- }
178
-
179
- function resolveRuntimeFromConfig(config) {
180
- if (!config || typeof config !== 'object') return null;
181
- if (config.runtime && typeof config.runtime === 'object') return config.runtime;
182
- if (config.shared || config.loading || config.inference || config.emulation) return config;
183
- return null;
184
- }
185
-
186
- function sanitizeReportOutput(output) {
187
- if (output == null) return null;
188
- if (typeof output !== 'object') return output;
189
- if (ArrayBuffer.isView(output)) {
190
- return {
191
- type: output.constructor?.name || 'TypedArray',
192
- length: Number.isFinite(output.length) ? output.length : null,
193
- };
194
- }
195
- if (
196
- Number.isFinite(output?.width)
197
- && Number.isFinite(output?.height)
198
- && ArrayBuffer.isView(output?.pixels)
199
- ) {
200
- const { pixels, ...rest } = output;
201
- return {
202
- ...rest,
203
- width: output.width,
204
- height: output.height,
205
- pixels: {
206
- type: pixels.constructor?.name || 'TypedArray',
207
- length: Number.isFinite(pixels.length) ? pixels.length : null,
208
- },
209
- };
210
- }
211
- return output;
212
- }
213
-
214
- function normalizeExtends(value) {
215
- if (Array.isArray(value)) {
216
- return value.map((entry) => String(entry || '').trim()).filter(Boolean);
217
- }
218
- if (typeof value === 'string') {
219
- const trimmed = value.trim();
220
- return trimmed ? [trimmed] : [];
221
- }
222
- return [];
223
- }
224
-
225
- function normalizeExtendsPath(value) {
226
- const trimmed = String(value || '').trim();
227
- if (!trimmed) return null;
228
- return trimmed.endsWith('.json') ? trimmed : `${trimmed}.json`;
229
- }
230
-
231
- function resolveAbsoluteUrl(target, base) {
232
- try {
233
- if (base) {
234
- return new URL(target, base).toString();
235
- }
236
- if (typeof globalThis.location !== 'undefined' && globalThis.location?.href) {
237
- return new URL(target, globalThis.location.href).toString();
238
- }
239
- return new URL(target, import.meta.url).toString();
240
- } catch {
241
- return target;
242
- }
243
- }
244
-
245
- function isAbsoluteUrl(value) {
246
- return /^[a-zA-Z][a-zA-Z0-9+.-]*:/.test(value);
247
- }
248
-
249
- function joinUrl(base, path) {
250
- if (!base) return path;
251
- if (isAbsoluteUrl(base)) {
252
- return new URL(path, base.endsWith('/') ? base : `${base}/`).toString();
253
- }
254
- const normalizedBase = base.replace(/\/$/, '');
255
- const normalizedPath = path.replace(/^\//, '');
256
- return `${normalizedBase}/${normalizedPath}`;
257
- }
258
-
259
- function resolveExtendCandidates(ref, context) {
260
- const normalized = normalizeExtendsPath(ref);
261
- if (!normalized) return [];
262
- if (isAbsoluteUrl(normalized) || normalized.startsWith('/')) {
263
- return [normalized];
264
- }
265
- if (normalized.startsWith('./') || normalized.startsWith('../')) {
266
- return [resolveAbsoluteUrl(normalized, context.sourceUrl)];
267
- }
268
- if (normalized.includes('/')) {
269
- return [joinUrl(context.presetBaseUrl, normalized)];
270
- }
271
- const candidates = [];
272
- if (context.presetBaseUrl) {
273
- candidates.push(joinUrl(context.presetBaseUrl, normalized));
274
- candidates.push(joinUrl(context.presetBaseUrl, `modes/${normalized}`));
275
- }
276
- if (context.sourceUrl) {
277
- const sourceDir = resolveAbsoluteUrl('./', context.sourceUrl);
278
- candidates.push(resolveAbsoluteUrl(normalized, sourceDir));
279
- }
280
- return [...new Set(candidates)];
281
- }
282
-
283
- async function fetchRuntimeConfig(url, options = {}) {
284
- const response = await fetch(url, { signal: options.signal });
285
- if (!response.ok) {
286
- const error = new Error(`Failed to load runtime config: ${response.status}`);
287
- error.code = response.status === 404 ? 'runtime_config_not_found' : 'runtime_config_fetch_failed';
288
- throw error;
289
- }
290
- return response.json();
291
- }
292
-
293
- async function resolveRuntimeConfigExtends(config, context) {
294
- const runtime = resolveRuntimeFromConfig(config);
295
- if (!runtime) {
296
- throw new Error('Runtime config is missing runtime fields');
297
- }
298
-
299
- const extendsRefs = normalizeExtends(config.extends);
300
- let mergedRuntime = null;
301
- let mergedConfig = null;
302
-
303
- for (const ref of extendsRefs) {
304
- const base = await loadRuntimeConfigFromRef(ref, context);
305
- mergedRuntime = mergedRuntime ? mergeRuntimeValues(mergedRuntime, base.runtime) : base.runtime;
306
- mergedConfig = mergedConfig ? mergeRuntimeValues(mergedConfig, base.config) : base.config;
307
- }
308
-
309
- const combinedRuntime = mergedRuntime ? mergeRuntimeValues(mergedRuntime, runtime) : runtime;
310
- const combinedConfig = mergedConfig ? mergeRuntimeValues(mergedConfig, config) : { ...config };
311
- const resolved = { ...combinedConfig, runtime: combinedRuntime };
312
- if (resolved.extends !== undefined) {
313
- delete resolved.extends;
314
- }
315
- return { config: resolved, runtime: combinedRuntime };
316
- }
317
-
318
- async function loadRuntimeConfigChain(url, options = {}, stack = []) {
319
- const presetBaseUrl = options.presetBaseUrl || options.baseUrl || resolvePresetBaseUrl();
320
- const resolvedUrl = resolveAbsoluteUrl(url);
321
- if (stack.includes(resolvedUrl)) {
322
- throw new Error(`Runtime config extends cycle: ${[...stack, resolvedUrl].join(' -> ')}`);
323
- }
324
- const config = await fetchRuntimeConfig(resolvedUrl, options);
325
- return resolveRuntimeConfigExtends(config, {
326
- ...options,
327
- sourceUrl: resolvedUrl,
328
- presetBaseUrl,
329
- stack: [...stack, resolvedUrl],
330
- });
331
- }
332
-
333
- async function loadRuntimeConfigFromRef(ref, context) {
334
- const candidates = resolveExtendCandidates(ref, context);
335
- if (!candidates.length) {
336
- throw new Error(`Runtime config extends is invalid: ${ref}`);
337
- }
338
- let lastError = null;
339
- for (const candidate of candidates) {
340
- try {
341
- return await loadRuntimeConfigChain(candidate, context, context.stack ?? []);
342
- } catch (error) {
343
- if (error?.code === 'runtime_config_not_found') {
344
- lastError = error;
345
- continue;
346
- }
347
- throw error;
348
- }
349
- }
350
- if (lastError) {
351
- throw lastError;
352
- }
353
- throw new Error(`Runtime config extends not found: ${ref}`);
354
- }
355
-
356
- export async function loadRuntimeConfigFromUrl(url, options = {}) {
357
- if (!url) {
358
- throw new Error('runtime config url is required');
359
- }
360
- return loadRuntimeConfigChain(url, options);
361
- }
362
-
363
- export async function applyRuntimeConfigFromUrl(url, options = {}) {
364
- const { runtime } = await loadRuntimeConfigFromUrl(url, options);
365
- setRuntimeConfig(runtime);
366
- return runtime;
367
- }
368
-
369
- export async function loadRuntimePreset(presetId, options = {}) {
370
- const baseUrl = options.baseUrl || resolvePresetBaseUrl();
371
- const normalized = normalizePresetPath(presetId);
372
- if (!normalized) {
373
- throw new Error('runtime preset id is required');
374
- }
375
- const url = `${baseUrl.replace(/\/$/, '')}/${normalized}`;
376
- return loadRuntimeConfigFromUrl(url, { ...options, presetBaseUrl: baseUrl });
377
- }
378
-
379
- export async function applyRuntimePreset(presetId, options = {}) {
380
- const { runtime } = await loadRuntimePreset(presetId, options);
381
- setRuntimeConfig(runtime);
382
- return runtime;
383
- }
384
-
385
89
  export async function initializeBrowserHarness(options = {}) {
386
90
  const { modelUrl, onProgress, log } = options;
387
91
  if (!modelUrl) {
@@ -426,1309 +130,102 @@ export async function runBrowserHarness(options = {}) {
426
130
 
427
131
  const BROWSER_SUITE_SET = Object.freeze([
428
132
  'kernels',
429
- 'inference',
430
- 'training',
431
- 'bench',
432
- 'debug',
433
- 'diffusion',
434
- 'energy',
435
- ]);
436
-
437
- const BROWSER_SUITE_DISPATCH_MAP = Object.freeze({
438
- kernels: 'runKernelSuite',
439
- inference: 'runInferenceSuite',
440
- training: 'runTrainingSuite',
441
- bench: 'runBenchSuite',
442
- debug: 'runInferenceSuite(debug)',
443
- diffusion: 'runDiffusionSuite',
444
- energy: 'runEnergySuite',
445
- });
446
-
447
- export function getBrowserSupportedSuites() {
448
- return [...BROWSER_SUITE_SET];
449
- }
450
-
451
- export function getBrowserSuiteDispatchMap() {
452
- return { ...BROWSER_SUITE_DISPATCH_MAP };
453
- }
454
-
455
- function createUnsupportedSuiteError(requestedSuite, context = {}) {
456
- const command = typeof context.command === 'string' && context.command.trim()
457
- ? context.command.trim()
458
- : 'run-browser-suite';
459
- const surface = typeof context.surface === 'string' && context.surface.trim()
460
- ? context.surface.trim()
461
- : 'browser';
462
- const allowedSuites = [...BROWSER_SUITE_SET];
463
- const error = new Error(
464
- `Unsupported suite "${requestedSuite}". Allowed suites: ${allowedSuites.join(', ')}. ` +
465
- `command="${command}" surface="${surface}".`
466
- );
467
- error.code = 'unsupported_suite';
468
- error.requestedSuite = requestedSuite;
469
- error.allowedSuites = allowedSuites;
470
- error.command = command;
471
- error.surface = surface;
472
- error.details = {
473
- requestedSuite,
474
- allowedSuites,
475
- command,
476
- surface,
477
- };
478
- return error;
479
- }
480
-
481
- function resolveSuiteContext(options = {}) {
482
- const command = typeof options.command === 'string' ? options.command : null;
483
- const surface = typeof options.surface === 'string' ? options.surface : null;
484
- return {
485
- command: command ?? 'run-browser-suite',
486
- surface: surface ?? 'browser',
487
- };
488
- }
489
-
490
- function normalizeSuite(value, context = {}) {
491
- const suite = String(value || '').trim().toLowerCase();
492
- if (!suite) {
493
- throw createUnsupportedSuiteError(suite, context);
494
- }
495
- const normalized = suite === 'benchmark' ? 'bench' : suite;
496
- if (!BROWSER_SUITE_SET.includes(normalized)) {
497
- throw createUnsupportedSuiteError(normalized, context);
498
- }
499
- return normalized;
500
- }
501
-
502
- export function buildSuiteSummary(suiteName, results, startTimeMs) {
503
- let passed = 0;
504
- let failed = 0;
505
- let skipped = 0;
506
- const safeResults = Array.isArray(results) ? results : [];
507
- for (const result of safeResults) {
508
- if (result.skipped) {
509
- skipped++;
510
- } else if (result.passed) {
511
- passed++;
512
- } else {
513
- failed++;
514
- }
515
- }
516
- const duration = Math.max(0, performance.now() - (Number.isFinite(startTimeMs) ? startTimeMs : performance.now()));
517
- return { suite: suiteName, passed, failed, skipped, duration, results: safeResults };
518
- }
519
-
520
- function normalizeCacheMode(value) {
521
- return value === 'cold' || value === 'warm' ? value : 'warm';
522
- }
523
-
524
- function normalizeLoadMode(value, hasModelUrl) {
525
- if (value === 'opfs' || value === 'http' || value === 'memory') {
526
- return value;
527
- }
528
- return hasModelUrl ? 'http' : 'opfs';
529
- }
530
-
531
- function isNodeRuntime() {
532
- return typeof process !== 'undefined' && !!process.versions?.node;
533
- }
534
-
535
- function normalizeWorkloadType(value) {
536
- const normalized = String(value || '').trim().toLowerCase();
537
- return normalized || null;
538
- }
539
-
540
- function safeStatsValue(value) {
541
- return Number.isFinite(value) ? Number(value) : 0;
542
- }
543
-
544
- function calculateRatePerSecond(count, durationMs) {
545
- const safeCount = safeStatsValue(count);
546
- const safeDurationMs = safeStatsValue(durationMs);
547
- if (safeCount <= 0 || safeDurationMs <= 0) return 0;
548
- return Number(((safeCount * 1000) / safeDurationMs).toFixed(2));
549
- }
550
-
551
- function buildDiffusionPerformanceArtifact({
552
- warmupRuns,
553
- timedRuns,
554
- width,
555
- height,
556
- steps,
557
- guidanceScale,
558
- avgPrefillTokens,
559
- avgDecodeTokens,
560
- cpuStats,
561
- gpuStats,
562
- }) {
563
- const cpuPrefillMs = safeStatsValue(cpuStats?.prefillMs?.median);
564
- const cpuDenoiseMs = safeStatsValue(cpuStats?.denoiseMs?.median);
565
- const cpuVaeMs = safeStatsValue(cpuStats?.vaeMs?.median);
566
- const cpuTotalMs = safeStatsValue(cpuStats?.totalMs?.median);
567
- const gpuPrefillMs = safeStatsValue(gpuStats?.prefillMs?.median);
568
- const gpuDenoiseMs = safeStatsValue(gpuStats?.denoiseMs?.median);
569
- const gpuVaeMs = safeStatsValue(gpuStats?.vaeMs?.median);
570
- const gpuTotalMs = safeStatsValue(gpuStats?.totalMs?.median);
571
- const decodeStepsPerSec = calculateRatePerSecond(steps, cpuDenoiseMs);
572
- const decodeTokensPerSec = calculateRatePerSecond(avgDecodeTokens, cpuDenoiseMs);
573
- const prefillTokensPerSec = calculateRatePerSecond(avgPrefillTokens, cpuPrefillMs);
574
-
575
- return {
576
- schemaVersion: 1,
577
- warmupRuns,
578
- timedRuns,
579
- shape: {
580
- width,
581
- height,
582
- },
583
- scheduler: {
584
- steps,
585
- guidanceScale,
586
- },
587
- cpu: {
588
- totalMs: cpuTotalMs,
589
- prefillMs: cpuPrefillMs,
590
- denoiseMs: cpuDenoiseMs,
591
- vaeMs: cpuVaeMs,
592
- },
593
- gpu: {
594
- available: gpuStats?.available === true,
595
- totalMs: gpuStats?.available === true ? gpuTotalMs : null,
596
- prefillMs: gpuStats?.available === true ? gpuPrefillMs : null,
597
- denoiseMs: gpuStats?.available === true ? gpuDenoiseMs : null,
598
- vaeMs: gpuStats?.available === true ? gpuVaeMs : null,
599
- },
600
- throughput: {
601
- prefillTokensPerSec,
602
- decodeTokensPerSec,
603
- decodeStepsPerSec,
604
- },
605
- tokens: {
606
- avgPrefillTokens: safeStatsValue(avgPrefillTokens),
607
- avgDecodeTokens: safeStatsValue(avgDecodeTokens),
608
- },
609
- };
610
- }
611
-
612
- function assertDiffusionPerformanceArtifact(metrics, contextLabel = 'diffusion') {
613
- const artifact = metrics?.performanceArtifact;
614
- if (!artifact || typeof artifact !== 'object') {
615
- throw new Error(`${contextLabel}: metrics.performanceArtifact is required.`);
616
- }
617
- if (artifact.schemaVersion !== 1) {
618
- throw new Error(`${contextLabel}: metrics.performanceArtifact.schemaVersion must be 1.`);
619
- }
620
- if (!Number.isInteger(artifact.warmupRuns) || artifact.warmupRuns < 0) {
621
- throw new Error(`${contextLabel}: metrics.performanceArtifact.warmupRuns must be a non-negative integer.`);
622
- }
623
- if (!Number.isInteger(artifact.timedRuns) || artifact.timedRuns < 1) {
624
- throw new Error(`${contextLabel}: metrics.performanceArtifact.timedRuns must be a positive integer.`);
625
- }
626
- if (!Number.isFinite(artifact?.cpu?.prefillMs)) {
627
- throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.prefillMs must be finite.`);
628
- }
629
- if (!Number.isFinite(artifact?.cpu?.denoiseMs)) {
630
- throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.denoiseMs must be finite.`);
631
- }
632
- if (!Number.isFinite(artifact?.cpu?.vaeMs)) {
633
- throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.vaeMs must be finite.`);
634
- }
635
- if (!Number.isFinite(artifact?.cpu?.totalMs)) {
636
- throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.totalMs must be finite.`);
637
- }
638
- if (!Number.isFinite(artifact?.throughput?.decodeStepsPerSec)) {
639
- throw new Error(`${contextLabel}: metrics.performanceArtifact.throughput.decodeStepsPerSec must be finite.`);
640
- }
641
- }
642
-
643
- function toTimingNumber(value, fallback = 0) {
644
- return formatMetricNumber(value, fallback, 2);
645
- }
646
-
647
- function safeToFixed(value, fallback = 0, digits = 2) {
648
- return formatMetricNumber(value, fallback, digits);
649
- }
650
-
651
- function sampleTimingNumber(stats, key, fallback = 0) {
652
- return formatMetricNumber(stats?.[key], fallback, 2);
653
- }
654
-
655
- function formatMetricNumber(value, fallback = 0, digits = 2) {
656
- const numericValue = Number(value);
657
- if (!Number.isFinite(numericValue)) return fallback;
658
- return Number(numericValue.toFixed(digits));
659
- }
660
-
661
- function buildCanonicalTiming(overrides = {}) {
662
- const cacheMode = normalizeCacheMode(overrides.cacheMode);
663
- const modelLoadMs = toTimingNumber(overrides.modelLoadMs, 0);
664
- const prefillMs = toTimingNumber(overrides.prefillMs, 0);
665
- const decodeMs = toTimingNumber(overrides.decodeMs, 0);
666
- const decodeMsPerTokenP50 = Number.isFinite(overrides.decodeMsPerTokenP50)
667
- ? toTimingNumber(overrides.decodeMsPerTokenP50)
668
- : null;
669
- const decodeMsPerTokenP95 = Number.isFinite(overrides.decodeMsPerTokenP95)
670
- ? toTimingNumber(overrides.decodeMsPerTokenP95)
671
- : null;
672
- const decodeMsPerTokenP99 = Number.isFinite(overrides.decodeMsPerTokenP99)
673
- ? toTimingNumber(overrides.decodeMsPerTokenP99)
674
- : null;
675
- const decodeTokensPerSec = Number.isFinite(overrides.decodeTokensPerSec)
676
- ? toTimingNumber(overrides.decodeTokensPerSec)
677
- : null;
678
- const prefillTokensPerSec = Number.isFinite(overrides.prefillTokensPerSec)
679
- ? toTimingNumber(overrides.prefillTokensPerSec)
680
- : null;
681
- const totalRunMs = toTimingNumber(
682
- overrides.totalRunMs,
683
- toTimingNumber(prefillMs + decodeMs)
684
- );
685
- const firstTokenMs = Number.isFinite(overrides.firstTokenMs)
686
- ? toTimingNumber(overrides.firstTokenMs)
687
- : null;
688
- const firstResponseMs = Number.isFinite(overrides.firstResponseMs)
689
- ? toTimingNumber(overrides.firstResponseMs)
690
- : toTimingNumber(modelLoadMs + totalRunMs);
691
-
692
- return {
693
- modelLoadMs,
694
- firstTokenMs,
695
- firstResponseMs,
696
- prefillMs,
697
- decodeMs,
698
- decodeMsPerTokenP50,
699
- decodeMsPerTokenP95,
700
- decodeMsPerTokenP99,
701
- decodeTokensPerSec,
702
- prefillTokensPerSec,
703
- totalRunMs,
704
- cacheMode,
705
- loadMode: overrides.loadMode,
706
- };
707
- }
708
-
709
- function buildTimingDiagnostics(timing = {}, options = {}) {
710
- const prefillSemantics = String(options.prefillSemantics || 'internal_prefill_phase');
711
- const source = String(options.source || 'doppler');
712
- const modelLoadMs = Number.isFinite(timing.modelLoadMs) ? toTimingNumber(timing.modelLoadMs) : null;
713
- const firstTokenMs = Number.isFinite(timing.firstTokenMs) ? toTimingNumber(timing.firstTokenMs) : null;
714
- const firstResponseMs = Number.isFinite(timing.firstResponseMs) ? toTimingNumber(timing.firstResponseMs) : null;
715
- const prefillMs = Number.isFinite(timing.prefillMs) ? toTimingNumber(timing.prefillMs) : null;
716
- const decodeMs = Number.isFinite(timing.decodeMs) ? toTimingNumber(timing.decodeMs) : null;
717
- const totalRunMs = Number.isFinite(timing.totalRunMs) ? toTimingNumber(timing.totalRunMs) : null;
718
-
719
- const firstResponseFromLoadAndFirstTokenMs = (
720
- Number.isFinite(modelLoadMs) && Number.isFinite(firstTokenMs)
721
- )
722
- ? toTimingNumber(modelLoadMs + firstTokenMs)
723
- : null;
724
- const runFromPrefillAndDecodeMs = (
725
- Number.isFinite(prefillMs) && Number.isFinite(decodeMs)
726
- )
727
- ? toTimingNumber(prefillMs + decodeMs)
728
- : null;
729
-
730
- const firstResponseResidualMs = (
731
- Number.isFinite(firstResponseMs) && Number.isFinite(firstResponseFromLoadAndFirstTokenMs)
732
- )
733
- ? toTimingNumber(firstResponseMs - firstResponseFromLoadAndFirstTokenMs)
734
- : null;
735
- const runResidualMs = (
736
- Number.isFinite(totalRunMs) && Number.isFinite(runFromPrefillAndDecodeMs)
737
- )
738
- ? toTimingNumber(totalRunMs - runFromPrefillAndDecodeMs)
739
- : null;
740
-
741
- return {
742
- schemaVersion: 1,
743
- source,
744
- semantics: {
745
- modelLoadMs: 'model initialization/load before generation',
746
- firstTokenMs: 'ttft from generation start',
747
- firstResponseMs: 'modelLoadMs + firstTokenMs',
748
- prefillMs: prefillSemantics,
749
- decodeMs: 'time after first token',
750
- totalRunMs: 'prefillMs + decodeMs',
751
- },
752
- componentsMs: {
753
- modelLoadMs,
754
- firstTokenMs,
755
- firstResponseMs,
756
- prefillMs,
757
- decodeMs,
758
- totalRunMs,
759
- },
760
- sumsMs: {
761
- firstResponseFromLoadAndFirstTokenMs,
762
- runFromPrefillAndDecodeMs,
763
- },
764
- residualsMs: {
765
- firstResponseResidualMs,
766
- runResidualMs,
767
- },
768
- consistent: {
769
- firstResponse: Number.isFinite(firstResponseResidualMs) ? Math.abs(firstResponseResidualMs) <= 2 : null,
770
- totalRun: Number.isFinite(runResidualMs) ? Math.abs(runResidualMs) <= 2 : null,
771
- },
772
- };
773
- }
774
-
775
- function resolveDeviceInfo() {
776
- try {
777
- return getKernelCapabilities();
778
- } catch {
779
- return null;
780
- }
781
- }
782
-
783
- async function resolveKernelPathForModel(options = {}) {
784
- const runtimeConfig = options.runtime?.runtimeConfig ?? getRuntimeConfig();
785
- let manifest = null;
786
- let manifestModelId = options.modelId || null;
787
-
788
- if (options.modelId) {
789
- await openModelStore(options.modelId);
790
- const manifestText = await loadManifestFromStore();
791
- if (manifestText) {
792
- manifest = parseManifest(manifestText);
793
- manifestModelId = manifest.modelId ?? options.modelId;
794
- }
795
- }
796
-
797
- if (!manifest) return null;
798
-
799
- const modelConfig = parseModelConfigFromManifest(manifest, runtimeConfig);
800
- const kernelPathState = resolveKernelPathState({
801
- manifest,
802
- runtimeConfig,
803
- modelConfig,
804
- });
805
- activateKernelPathState(kernelPathState);
806
- return {
807
- modelId: manifestModelId,
808
- kernelPath: kernelPathState.resolvedKernelPath,
809
- source: kernelPathState.kernelPathSource,
810
- };
811
- }
812
-
813
- async function initializeInferenceFromStorage(modelId, options = {}) {
814
- const { onProgress } = options;
815
- if (!modelId) {
816
- throw new Error('modelId is required');
817
- }
818
-
819
- if (options.runtime?.runtimeConfig) {
820
- setRuntimeConfig(options.runtime.runtimeConfig);
821
- }
822
-
823
- onProgress?.('storage', 0.05, 'Opening model store...');
824
- await openModelStore(modelId);
825
-
826
- onProgress?.('manifest', 0.1, 'Loading manifest...');
827
- const manifestText = await loadManifestFromStore();
828
- if (!manifestText) {
829
- throw new Error('Manifest not found in storage');
830
- }
831
- const manifest = parseManifest(manifestText);
832
-
833
- onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
834
- await initDevice();
835
- const device = getDevice();
836
- const capabilities = getKernelCapabilities();
837
-
838
- onProgress?.('pipeline', 0.3, 'Creating pipeline...');
839
- const pipeline = await createPipeline(manifest, {
840
- gpu: { device },
841
- runtime: options.runtime,
842
- onProgress,
843
- });
844
-
845
- return { pipeline, manifest, capabilities };
846
- }
847
-
848
- async function initializeInferenceFromSourcePath(sourcePath, options = {}) {
849
- const { onProgress } = options;
850
- if (!sourcePath || typeof sourcePath !== 'string') {
851
- throw new Error('modelUrl is required for loadMode=memory.');
852
- }
853
- if (!isNodeRuntime()) {
854
- throw new Error('loadMode=memory source runtime is currently supported on Node only.');
855
- }
856
- if (/^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//.test(sourcePath)) {
857
- throw new Error(
858
- 'loadMode=memory expects a local filesystem path (Safetensors directory or .gguf file), not an URL.'
859
- );
860
- }
861
-
862
- if (options.runtime?.runtimeConfig) {
863
- setRuntimeConfig(options.runtime.runtimeConfig);
864
- }
865
-
866
- onProgress?.('source', 0.05, 'Preparing source runtime bundle...');
867
- const { resolveNodeSourceRuntimeBundle } = await import(NODE_SOURCE_RUNTIME_MODULE_PATH);
868
- const sourceBundle = await resolveNodeSourceRuntimeBundle({
869
- inputPath: sourcePath,
870
- modelId: options.modelId || null,
871
- });
872
- if (!sourceBundle) {
873
- throw new Error(
874
- `No source-runtime model detected at "${sourcePath}". ` +
875
- 'Expected a Safetensors directory or a .gguf file path.'
876
- );
877
- }
878
-
879
- onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
880
- await initDevice();
881
- const device = getDevice();
882
- const capabilities = getKernelCapabilities();
883
-
884
- onProgress?.('pipeline', 0.3, 'Creating pipeline...');
885
- const pipeline = await createPipeline(sourceBundle.manifest, {
886
- gpu: { device },
887
- runtime: options.runtime,
888
- storage: sourceBundle.storageContext,
889
- onProgress,
890
- });
891
-
892
- return {
893
- pipeline,
894
- manifest: sourceBundle.manifest,
895
- capabilities,
896
- };
897
- }
898
-
899
- async function resolveHarnessOverride(options = {}) {
900
- const input = typeof options.harnessOverride === 'function'
901
- ? await options.harnessOverride(options)
902
- : options.harnessOverride;
903
-
904
- if (!input || typeof input !== 'object') {
905
- throw new Error('harnessOverride must resolve to an object.');
906
- }
907
-
908
- if (!input.pipeline || typeof input.pipeline.generate !== 'function') {
909
- throw new Error('harnessOverride.pipeline.generate(request) is required.');
910
- }
911
-
912
- const manifest = input.manifest && typeof input.manifest === 'object'
913
- ? input.manifest
914
- : {
915
- modelId: options.modelId || 'diffusion-harness-override',
916
- modelType: 'diffusion',
917
- };
918
-
919
- const modelLoadMs = Number.isFinite(input.modelLoadMs)
920
- ? Math.max(0, input.modelLoadMs)
921
- : 0;
922
-
923
- return {
924
- ...input,
925
- manifest,
926
- modelLoadMs,
927
- };
928
- }
929
-
930
- async function initializeSuiteModel(options = {}) {
931
- if (options.harnessOverride) {
932
- if (options.runtime?.runtimeConfig) {
933
- setRuntimeConfig(options.runtime.runtimeConfig);
934
- }
935
- return resolveHarnessOverride(options);
936
- }
937
- const loadStart = performance.now();
938
- const runtime = resolveRuntime(options);
939
- const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
940
- let harness;
941
- if (loadMode === 'memory') {
942
- if (!options.modelUrl) {
943
- throw new Error('loadMode=memory requires modelUrl to be a local model path.');
944
- }
945
- harness = await initializeInferenceFromSourcePath(options.modelUrl, { ...options, runtime });
946
- } else if (options.modelId && !options.modelUrl) {
947
- harness = await initializeInferenceFromStorage(options.modelId, { ...options, runtime });
948
- } else {
949
- if (!options.modelUrl) {
950
- throw new Error('modelUrl is required for this suite');
951
- }
952
- harness = await initializeInference(options.modelUrl, {
953
- runtime,
954
- onProgress: options.onProgress,
955
- log: options.log,
956
- });
957
- }
958
- const modelLoadMs = Math.max(0, performance.now() - loadStart);
959
- return { ...harness, modelLoadMs };
960
- }
961
-
962
- async function runKernelSuite(options = {}) {
963
- const startTime = performance.now();
964
- const { testHarness, initGPU } = await import('../../tests/kernels/browser/test-page.js');
965
- const { runKernelSuite: runAllKernelTests } = await import('../../tests/kernels/browser/kernel-suite.js');
966
- await initGPU();
967
-
968
- const previousKernelPath = getActiveKernelPath();
969
- const previousKernelSource = getActiveKernelPathSource();
970
- const previousKernelPathPolicy = getActiveKernelPathPolicy();
971
- if (options.modelId) {
972
- await resolveKernelPathForModel(options);
973
- }
974
- let results = [];
975
- try {
976
- results = await runAllKernelTests(testHarness);
977
- } finally {
978
- setActiveKernelPath(previousKernelPath, previousKernelSource, previousKernelPathPolicy);
979
- }
980
-
981
- const summary = buildSuiteSummary('kernels', results, startTime);
982
- return {
983
- ...summary,
984
- deviceInfo: resolveDeviceInfo(),
985
- };
986
- }
987
-
988
-
989
-
990
-
991
-
992
- const DEFAULT_HARNESS_PROMPT = 'Summarize this input in one sentence.';
993
- const DEFAULT_RUNTIME_PLACEHOLDER_PROMPT = 'Hello from Doppler.';
994
- const DEFAULT_QWEN_PROMPT = Object.freeze({
995
- messages: Object.freeze([
996
- Object.freeze({
997
- role: 'user',
998
- content: 'Answer in one short sentence: What color is the sky on a clear day?',
999
- }),
1000
- ]),
1001
- });
1002
- const DEFAULT_TRANSLATEGEMMA_PROMPT = Object.freeze({
1003
- messages: Object.freeze([
1004
- Object.freeze({
1005
- role: 'user',
1006
- content: Object.freeze([
1007
- Object.freeze({
1008
- type: 'text',
1009
- source_lang_code: 'en',
1010
- target_lang_code: 'fr',
1011
- text: 'Hello world.',
1012
- }),
1013
- ]),
1014
- }),
1015
- ]),
1016
- });
1017
- const DEFAULT_HARNESS_MAX_TOKENS = 32;
1018
- const EMBEDDING_PREVIEW_LENGTH = 16;
1019
- const EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1 = 0.67;
1020
- const EMBEDDING_SEMANTIC_MIN_PAIR_ACC = 0.67;
1021
- const EMBEDDING_SEMANTIC_PAIR_MARGIN = 0.01;
1022
-
1023
- const EMBEDDING_SEMANTIC_RETRIEVAL_CASES = Object.freeze([
1024
- Object.freeze({
1025
- id: 'library_search',
1026
- query: 'Where can I borrow books and study quietly?',
1027
- docs: Object.freeze([
1028
- 'The city library lends books, provides study rooms, and offers free Wi-Fi.',
1029
- 'The cafe serves coffee, pastries, and sandwiches all day.',
1030
- 'The bike repair shop fixes flat tires and broken chains.',
1031
- ]),
1032
- expectedDoc: 0,
1033
- }),
1034
- Object.freeze({
1035
- id: 'password_reset',
1036
- query: 'How do I reset my account password?',
1037
- docs: Object.freeze([
1038
- 'To reset your password, open account settings and choose the forgot-password flow.',
1039
- 'Our shipping policy explains delivery timelines and tracking updates.',
1040
- 'The recipe combines tomatoes, basil, and olive oil.',
1041
- ]),
1042
- expectedDoc: 0,
1043
- }),
1044
- Object.freeze({
1045
- id: 'damaged_package',
1046
- query: 'What should I do if my package arrives damaged?',
1047
- docs: Object.freeze([
1048
- 'Contact support within seven days with photos to request a replacement for damaged items.',
1049
- 'The concert starts at 8 PM at the downtown arena.',
1050
- 'Plant roses in spring and water them twice a week.',
1051
- ]),
1052
- expectedDoc: 0,
1053
- }),
1054
- Object.freeze({
1055
- id: 'flight_change_policy',
1056
- query: 'Can I change my flight after booking?',
1057
- docs: Object.freeze([
1058
- 'The museum opens daily at 10 AM and offers guided tours on weekends.',
1059
- 'You can change your flight in Manage Booking up to 24 hours before departure, with any fare difference applied.',
1060
- 'Our gym membership includes group classes and access to the pool.',
1061
- ]),
1062
- expectedDoc: 1,
1063
- }),
1064
- Object.freeze({
1065
- id: 'wifi_troubleshoot',
1066
- query: 'Why does my home Wi-Fi keep disconnecting?',
1067
- docs: Object.freeze([
1068
- 'The dessert menu includes cheesecake, brownies, and fruit tart.',
1069
- 'You can review your recent orders in your account purchase history.',
1070
- 'Frequent Wi-Fi drops can be fixed by restarting the router, updating firmware, and changing the wireless channel.',
1071
- ]),
1072
- expectedDoc: 2,
1073
- }),
1074
- Object.freeze({
1075
- id: 'refund_deadline',
1076
- query: 'How long do I have to request a refund?',
1077
- docs: Object.freeze([
1078
- 'Refund requests are accepted within 30 days of purchase when the item is in original condition.',
1079
- 'The conference keynote starts at 9 AM in the main hall.',
1080
- 'Use a medium grind when brewing coffee with a drip machine.',
1081
- ]),
1082
- expectedDoc: 0,
1083
- }),
1084
- Object.freeze({
1085
- id: 'passport_renewal_docs',
1086
- query: 'What documents do I need to renew a passport?',
1087
- docs: Object.freeze([
1088
- 'To care for houseplants, water only when the top soil is dry.',
1089
- 'Passport renewal usually requires the application form, current passport, compliant photo, and payment.',
1090
- 'The train to downtown runs every 20 minutes during peak hours.',
1091
- ]),
1092
- expectedDoc: 1,
1093
- }),
1094
- ]);
1095
-
1096
- const EMBEDDING_SEMANTIC_PAIR_CASES = Object.freeze([
1097
- Object.freeze({
1098
- id: 'bike_paraphrase',
1099
- anchor: 'The child is riding a bicycle through the park.',
1100
- positive: 'A kid bikes along a path in the park.',
1101
- negative: 'The stock market closed lower after interest-rate news.',
1102
- }),
1103
- Object.freeze({
1104
- id: 'cancel_subscription',
1105
- anchor: 'Please cancel my subscription before renewal.',
1106
- positive: 'I want to stop the plan so it does not renew.',
1107
- negative: 'The mountain trail is closed after heavy snow.',
1108
- }),
1109
- Object.freeze({
1110
- id: 'battery_drain',
1111
- anchor: 'The laptop battery drains very quickly.',
1112
- positive: 'My notebook loses charge fast.',
1113
- negative: 'This pasta sauce tastes sweet and spicy.',
1114
- }),
1115
- Object.freeze({
1116
- id: 'order_tracking',
1117
- anchor: 'I need to track where my order is.',
1118
- positive: 'How can I check my package delivery status?',
1119
- negative: 'The violin concerto was composed in the 1800s.',
1120
- }),
1121
- Object.freeze({
1122
- id: 'account_lockout',
1123
- anchor: 'My account is locked after too many login attempts.',
1124
- positive: 'I cannot sign in because the system temporarily blocked my account.',
1125
- negative: 'Bake the cake at 350 degrees for thirty minutes.',
1126
- }),
1127
- Object.freeze({
1128
- id: 'invoice_request',
1129
- anchor: 'Please send me the invoice for last month.',
1130
- positive: 'Can you provide the billing statement for the previous month?',
1131
- negative: 'The hiking trail follows the river for five miles.',
1132
- }),
1133
- Object.freeze({
1134
- id: 'slow_internet',
1135
- anchor: 'The internet speed is much slower tonight.',
1136
- positive: 'My connection is unusually slow this evening.',
1137
- negative: 'The novel explores themes of memory and loss.',
1138
- }),
1139
- ]);
1140
-
1141
- function asText(value) {
1142
- if (typeof value !== 'string') return null;
1143
- const trimmed = value.trim();
1144
- return trimmed || null;
1145
- }
1146
-
1147
- function normalizeRetrievalFixtures(cases) {
1148
- if (!Array.isArray(cases)) return null;
1149
- const normalized = [];
1150
- for (let i = 0; i < cases.length; i++) {
1151
- const entry = cases[i];
1152
- if (!entry || typeof entry !== 'object') continue;
1153
-
1154
- const query = asText(entry.query);
1155
- const docs = Array.isArray(entry.docs) ? entry.docs.map(asText).filter(Boolean) : [];
1156
- if (!query || docs.length === 0 || !Number.isFinite(entry.expectedDoc)) {
1157
- continue;
1158
- }
1159
- const expectedDoc = Math.floor(entry.expectedDoc);
1160
- normalized.push({
1161
- id: asText(entry.id) ?? `case-${i + 1}`,
1162
- query,
1163
- docs,
1164
- expectedDoc: Math.max(0, Math.min(expectedDoc, docs.length - 1)),
1165
- });
1166
- }
1167
- return normalized.length > 0 ? normalized : null;
1168
- }
1169
-
1170
- function normalizePairFixtures(cases) {
1171
- if (!Array.isArray(cases)) return null;
1172
- const normalized = [];
1173
- for (let i = 0; i < cases.length; i++) {
1174
- const entry = cases[i];
1175
- if (!entry || typeof entry !== 'object') continue;
1176
-
1177
- const anchor = asText(entry.anchor);
1178
- const positive = asText(entry.positive);
1179
- const negative = asText(entry.negative);
1180
- if (!anchor || !positive || !negative) {
1181
- continue;
1182
- }
1183
- normalized.push({
1184
- id: asText(entry.id) ?? `pair-${i + 1}`,
1185
- anchor,
1186
- positive,
1187
- negative,
1188
- });
1189
- }
1190
- return normalized.length > 0 ? normalized : null;
1191
- }
1192
-
1193
- function resolveEmbeddingSemanticFixtures(runtimeConfig, options = null) {
1194
- const overrides = isPlainObject(options?.embeddingSemantic)
1195
- ? options.embeddingSemantic
1196
- : null;
1197
- const runtimeOverrides = runtimeConfig?.shared?.benchmark?.run?.embeddingSemantic;
1198
- const source = overrides ?? (isPlainObject(runtimeOverrides) ? runtimeOverrides : null);
1199
-
1200
- const retrievalCases = normalizeRetrievalFixtures(source?.retrievalCases)
1201
- ?? EMBEDDING_SEMANTIC_RETRIEVAL_CASES;
1202
- const pairCases = normalizePairFixtures(source?.pairCases)
1203
- ?? EMBEDDING_SEMANTIC_PAIR_CASES;
1204
- const minRetrievalTop1Acc = Number.isFinite(source?.minRetrievalTop1Acc)
1205
- ? Math.max(0, Math.min(1, Number(source.minRetrievalTop1Acc)))
1206
- : EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1;
1207
- const minPairAcc = Number.isFinite(source?.minPairAcc)
1208
- ? Math.max(0, Math.min(1, Number(source.minPairAcc)))
1209
- : EMBEDDING_SEMANTIC_MIN_PAIR_ACC;
1210
- const pairMargin = Number.isFinite(source?.pairMargin)
1211
- ? Number(source.pairMargin)
1212
- : EMBEDDING_SEMANTIC_PAIR_MARGIN;
1213
-
1214
- return {
1215
- retrievalCases,
1216
- pairCases,
1217
- minRetrievalTop1Acc,
1218
- minPairAcc,
1219
- pairMargin,
1220
- };
1221
- }
1222
-
1223
- function resolveEmbeddingSemanticStyle(pipeline) {
1224
- const manifest = pipeline?.manifest ?? null;
1225
- const style = selectRuleValue('inference', 'config', 'embeddingSemanticStyle', {
1226
- modelId: String(manifest?.modelId ?? '').toLowerCase(),
1227
- presetId: String(manifest?.inference?.presetId ?? '').toLowerCase(),
1228
- manifestModelType: String(
1229
- manifest?.config?.model_type
1230
- ?? manifest?.config?.text_config?.model_type
1231
- ?? ''
1232
- ).toLowerCase(),
1233
- });
1234
- if (typeof style === 'string' && style.length > 0) {
1235
- return style;
1236
- }
1237
- return 'default';
1238
- }
1239
-
1240
- function formatEmbeddingSemanticText(text, kind, style) {
1241
- if (style === 'embeddinggemma') {
1242
- if (kind === 'query') {
1243
- return `task: search result | query: ${text}`;
1244
- }
1245
- if (kind === 'document') {
1246
- return `title: None | text: ${text}`;
1247
- }
1248
- }
1249
- return text;
1250
- }
1251
-
1252
- function resolvePrompt(runtimeConfig) {
1253
- const runtimePrompt = runtimeConfig?.inference?.prompt;
1254
- if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
1255
- return runtimePrompt.trim();
1256
- }
1257
- return DEFAULT_HARNESS_PROMPT;
1258
- }
1259
-
1260
- function isStructuredPromptInput(value) {
1261
- return Array.isArray(value) || (value != null && typeof value === 'object');
1262
- }
1263
-
1264
- function clonePromptInput(promptInput) {
1265
- if (!isStructuredPromptInput(promptInput)) {
1266
- return promptInput;
1267
- }
1268
- if (typeof structuredClone === 'function') {
1269
- return structuredClone(promptInput);
1270
- }
1271
- return JSON.parse(JSON.stringify(promptInput));
1272
- }
1273
-
1274
- function resolvePromptTemplateType(source) {
1275
- const sourceTemplateType = asText(source?.chatTemplateType);
1276
- if (sourceTemplateType) {
1277
- return sourceTemplateType;
1278
- }
1279
- const modelConfigTemplateType = asText(source?.modelConfig?.chatTemplateType);
1280
- if (modelConfigTemplateType) {
1281
- return modelConfigTemplateType;
1282
- }
1283
- return asText(source?.manifest?.inference?.chatTemplate?.type);
1284
- }
1285
-
1286
- function buildDefaultGenerationPrompt(templateType) {
1287
- if (templateType === 'qwen') {
1288
- return clonePromptInput(DEFAULT_QWEN_PROMPT);
1289
- }
1290
- if (templateType === 'translategemma') {
1291
- return clonePromptInput(DEFAULT_TRANSLATEGEMMA_PROMPT);
1292
- }
1293
- return DEFAULT_HARNESS_PROMPT;
1294
- }
1295
-
1296
- function shouldPreferModelDefaultPrompt(runtimePrompt, templateType) {
1297
- if (templateType !== 'translategemma' && templateType !== 'qwen') {
1298
- return false;
1299
- }
1300
- if (typeof runtimePrompt !== 'string') {
1301
- return false;
1302
- }
1303
- return runtimePrompt.trim() === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT;
1304
- }
1305
-
1306
- function assertPromptContract(runtimePrompt, templateType, source = 'runtime.inference.prompt') {
1307
- if (templateType !== 'translategemma') {
1308
- return;
1309
- }
1310
- if (runtimePrompt === undefined || runtimePrompt === null) {
1311
- return;
1312
- }
1313
- if (typeof runtimePrompt === 'string') {
1314
- const trimmed = runtimePrompt.trim();
1315
- if (!trimmed || trimmed === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT) {
1316
- return;
1317
- }
1318
- throw new Error(
1319
- `TranslateGemma harness prompt contract violation: ${source} must be ` +
1320
- '{ messages: [...] } with source_lang_code/target_lang_code blocks, not a plain string.'
1321
- );
1322
- }
1323
- if (!isStructuredPromptInput(runtimePrompt)) {
1324
- throw new Error(
1325
- `TranslateGemma harness prompt contract violation: ${source} must be ` +
1326
- '{ messages: [...] } with source_lang_code/target_lang_code blocks.'
1327
- );
1328
- }
1329
- }
1330
-
1331
- function describePromptInput(promptInput) {
1332
- if (typeof promptInput === 'string') {
1333
- return promptInput.trim() || DEFAULT_HARNESS_PROMPT;
1334
- }
1335
- const firstMessage = Array.isArray(promptInput?.messages)
1336
- ? promptInput.messages[0]
1337
- : null;
1338
- const firstContent = Array.isArray(firstMessage?.content)
1339
- ? firstMessage.content[0]
1340
- : null;
1341
- const sourceLang = asText(firstContent?.source_lang_code);
1342
- const targetLang = asText(firstContent?.target_lang_code);
1343
- const text = asText(firstContent?.text);
1344
- if (sourceLang && targetLang) {
1345
- return `${sourceLang} -> ${targetLang}: ${text || '[non-text request]'}`;
1346
- }
1347
- const stringContent = asText(firstMessage?.content);
1348
- if (stringContent) {
1349
- const role = asText(firstMessage?.role) || 'user';
1350
- return `${role}: ${stringContent}`;
1351
- }
1352
- try {
1353
- return JSON.stringify(promptInput);
1354
- } catch {
1355
- return '[structured prompt]';
1356
- }
1357
- }
1358
-
1359
- function resolveGenerationPromptInput(runtimeConfig, runOverrides = null, source = null) {
1360
- const templateType = resolvePromptTemplateType(source);
1361
- const overridePrompt = runOverrides?.prompt;
1362
- assertPromptContract(overridePrompt, templateType, 'runOverrides.prompt');
1363
- if (typeof overridePrompt === 'string' && overridePrompt.trim()) {
1364
- return overridePrompt.trim();
1365
- }
1366
- if (isStructuredPromptInput(overridePrompt)) {
1367
- return clonePromptInput(overridePrompt);
1368
- }
133
+ 'inference',
134
+ 'training',
135
+ 'bench',
136
+ 'debug',
137
+ 'diffusion',
138
+ 'energy',
139
+ ]);
1369
140
 
1370
- const runtimePrompt = runtimeConfig?.inference?.prompt;
1371
- assertPromptContract(runtimePrompt, templateType, 'runtimeConfig.inference.prompt');
1372
- if (shouldPreferModelDefaultPrompt(runtimePrompt, templateType)) {
1373
- return buildDefaultGenerationPrompt(templateType);
1374
- }
1375
- if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
1376
- return runtimePrompt.trim();
1377
- }
1378
- if (isStructuredPromptInput(runtimePrompt)) {
1379
- return clonePromptInput(runtimePrompt);
1380
- }
141
+ const BROWSER_SUITE_DISPATCH_MAP = Object.freeze({
142
+ kernels: 'runKernelSuite',
143
+ inference: 'runInferenceSuite',
144
+ training: 'runTrainingSuite',
145
+ bench: 'runBenchSuite',
146
+ debug: 'runInferenceSuite(debug)',
147
+ diffusion: 'runDiffusionSuite',
148
+ energy: 'runEnergySuite',
149
+ });
1381
150
 
1382
- return buildDefaultGenerationPrompt(templateType);
151
+ export function getBrowserSupportedSuites() {
152
+ return [...BROWSER_SUITE_SET];
1383
153
  }
1384
154
 
1385
- function resolveMaxTokens(runtimeConfig) {
1386
- const runtimeMax = runtimeConfig?.inference?.batching?.maxTokens;
1387
- if (Number.isFinite(runtimeMax)) {
1388
- return Math.max(1, Math.floor(runtimeMax));
1389
- }
1390
- return DEFAULT_HARNESS_MAX_TOKENS;
155
+ export function getBrowserSuiteDispatchMap() {
156
+ return { ...BROWSER_SUITE_DISPATCH_MAP };
1391
157
  }
1392
158
 
1393
- function resolveBenchmarkRunSettings(runtimeConfig, source = null) {
1394
- const benchConfig = runtimeConfig?.shared?.benchmark?.run || {};
1395
- const runtimeSampling = isPlainObject(runtimeConfig?.inference?.sampling)
1396
- ? runtimeConfig.inference.sampling
1397
- : {};
1398
- const benchSampling = isPlainObject(benchConfig?.sampling)
1399
- ? benchConfig.sampling
1400
- : {};
1401
- const promptInput = typeof benchConfig.customPrompt === 'string' && benchConfig.customPrompt.trim()
1402
- ? benchConfig.customPrompt.trim()
1403
- : resolveGenerationPromptInput(runtimeConfig, null, source);
1404
- const maxTokens = Number.isFinite(benchConfig.maxNewTokens)
1405
- ? Math.max(1, Math.floor(benchConfig.maxNewTokens))
1406
- : resolveMaxTokens(runtimeConfig);
1407
-
1408
- return {
1409
- warmupRuns: Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0)),
1410
- timedRuns: Math.max(1, Math.floor(benchConfig.timedRuns ?? 1)),
1411
- prompt: promptInput,
1412
- promptLabel: describePromptInput(promptInput),
1413
- maxTokens,
1414
- sampling: {
1415
- ...runtimeSampling,
1416
- ...benchSampling,
1417
- },
159
+ function createUnsupportedSuiteError(requestedSuite, context = {}) {
160
+ const command = typeof context.command === 'string' && context.command.trim()
161
+ ? context.command.trim()
162
+ : 'run-browser-suite';
163
+ const surface = typeof context.surface === 'string' && context.surface.trim()
164
+ ? context.surface.trim()
165
+ : 'browser';
166
+ const allowedSuites = [...BROWSER_SUITE_SET];
167
+ const error = new Error(
168
+ `Unsupported suite "${requestedSuite}". Allowed suites: ${allowedSuites.join(', ')}. ` +
169
+ `command="${command}" surface="${surface}".`
170
+ );
171
+ error.code = 'unsupported_suite';
172
+ error.requestedSuite = requestedSuite;
173
+ error.allowedSuites = allowedSuites;
174
+ error.command = command;
175
+ error.surface = surface;
176
+ error.details = {
177
+ requestedSuite,
178
+ allowedSuites,
179
+ command,
180
+ surface,
1418
181
  };
182
+ return error;
1419
183
  }
1420
184
 
1421
- function summarizeEmbeddingValues(embedding) {
1422
- const values = ArrayBuffer.isView(embedding) || Array.isArray(embedding) ? embedding : null;
1423
- const embeddingDim = Number.isFinite(values?.length) ? values.length : 0;
1424
- const preview = [];
1425
-
1426
- let nonFiniteCount = 0;
1427
- let finiteCount = 0;
1428
- let min = Infinity;
1429
- let max = -Infinity;
1430
- let maxAbs = 0;
1431
- let sum = 0;
1432
- let sumSq = 0;
1433
-
1434
- for (let i = 0; i < embeddingDim; i++) {
1435
- const value = Number(values[i]);
1436
- if (preview.length < EMBEDDING_PREVIEW_LENGTH) {
1437
- preview.push(Number.isFinite(value) ? Number(value.toFixed(6)) : null);
1438
- }
1439
- if (!Number.isFinite(value)) {
1440
- nonFiniteCount++;
1441
- continue;
1442
- }
1443
- finiteCount++;
1444
- if (value < min) min = value;
1445
- if (value > max) max = value;
1446
- const abs = Math.abs(value);
1447
- if (abs > maxAbs) maxAbs = abs;
1448
- sum += value;
1449
- sumSq += value * value;
1450
- }
1451
-
1452
- const mean = finiteCount > 0 ? (sum / finiteCount) : null;
1453
- const variance = finiteCount > 0 ? Math.max(0, (sumSq / finiteCount) - ((mean || 0) * (mean || 0))) : null;
1454
- const stdDev = variance == null ? null : Math.sqrt(variance);
1455
- const l2Norm = finiteCount > 0 ? Math.sqrt(sumSq) : null;
1456
- const finiteRatio = embeddingDim > 0 ? finiteCount / embeddingDim : 0;
1457
-
185
+ function resolveSuiteContext(options = {}) {
186
+ const command = typeof options.command === 'string' ? options.command : null;
187
+ const surface = typeof options.surface === 'string' ? options.surface : null;
1458
188
  return {
1459
- embeddingDim,
1460
- nonFiniteCount,
1461
- finiteCount,
1462
- finiteRatio,
1463
- min: finiteCount > 0 ? min : null,
1464
- max: finiteCount > 0 ? max : null,
1465
- maxAbs: finiteCount > 0 ? maxAbs : null,
1466
- mean,
1467
- stdDev,
1468
- l2Norm,
1469
- preview,
189
+ command: command ?? 'run-browser-suite',
190
+ surface: surface ?? 'browser',
1470
191
  };
1471
192
  }
1472
193
 
1473
- function cosineSimilarity(a, b) {
1474
- if (!a || !b || !Number.isFinite(a.length) || !Number.isFinite(b.length)) return NaN;
1475
- if (a.length !== b.length || a.length === 0) return NaN;
1476
- let dot = 0;
1477
- let normA = 0;
1478
- let normB = 0;
1479
- for (let i = 0; i < a.length; i++) {
1480
- const av = Number(a[i]);
1481
- const bv = Number(b[i]);
1482
- if (!Number.isFinite(av) || !Number.isFinite(bv)) return NaN;
1483
- dot += av * bv;
1484
- normA += av * av;
1485
- normB += bv * bv;
1486
- }
1487
- if (normA <= 0 || normB <= 0) return NaN;
1488
- return dot / Math.sqrt(normA * normB);
1489
- }
1490
-
1491
- function top1Index(values) {
1492
- let best = -1;
1493
- let bestValue = -Infinity;
1494
- for (let i = 0; i < values.length; i++) {
1495
- const value = Number(values[i]);
1496
- if (!Number.isFinite(value)) continue;
1497
- if (value > bestValue) {
1498
- bestValue = value;
1499
- best = i;
1500
- }
194
+ function normalizeSuite(value, context = {}) {
195
+ const suite = String(value || '').trim().toLowerCase();
196
+ if (!suite) {
197
+ throw createUnsupportedSuiteError(suite, context);
1501
198
  }
1502
- return best;
1503
- }
1504
-
1505
- async function embedStandaloneText(pipeline, text) {
1506
- pipeline.reset?.();
1507
- const result = await pipeline.embed(text);
1508
- const embedding = result?.embedding;
1509
- if (!embedding || !Number.isFinite(embedding.length) || embedding.length <= 0) {
1510
- throw new Error('Semantic check embedding is missing.');
199
+ const normalized = suite === 'benchmark' ? 'bench' : suite;
200
+ if (!BROWSER_SUITE_SET.includes(normalized)) {
201
+ throw createUnsupportedSuiteError(normalized, context);
1511
202
  }
1512
- return embedding;
203
+ return normalized;
1513
204
  }
1514
205
 
1515
- async function runEmbeddingSemanticChecks(pipeline, options = null) {
1516
- const config = resolveEmbeddingSemanticFixtures(
1517
- pipeline?.runtimeConfig ?? {},
1518
- options
1519
- );
1520
- const start = performance.now();
1521
- const semanticStyle = resolveEmbeddingSemanticStyle(pipeline);
1522
- const retrieval = [];
1523
- let retrievalPassed = 0;
1524
-
1525
- for (const testCase of config.retrievalCases) {
1526
- const queryEmbedding = await embedStandaloneText(
1527
- pipeline,
1528
- formatEmbeddingSemanticText(testCase.query, 'query', semanticStyle)
1529
- );
1530
- const docEmbeddings = [];
1531
- for (const doc of testCase.docs) {
1532
- docEmbeddings.push(await embedStandaloneText(
1533
- pipeline,
1534
- formatEmbeddingSemanticText(doc, 'document', semanticStyle)
1535
- ));
1536
- }
1537
- const sims = docEmbeddings.map((docEmbedding) => cosineSimilarity(queryEmbedding, docEmbedding));
1538
- const topDoc = top1Index(sims);
1539
- const passed = topDoc === testCase.expectedDoc;
1540
- if (passed) retrievalPassed++;
1541
- retrieval.push({
1542
- id: testCase.id,
1543
- passed,
1544
- expectedDoc: testCase.expectedDoc,
1545
- topDoc,
1546
- sims: sims.map((v) => (Number.isFinite(v) ? Number(v.toFixed(6)) : null)),
1547
- });
1548
- }
206
+ async function runKernelSuite(options = {}) {
207
+ const startTime = performance.now();
208
+ const { testHarness, initGPU } = await import('../../tests/kernels/browser/test-page.js');
209
+ const { runKernelSuite: runAllKernelTests } = await import('../../tests/kernels/browser/kernel-suite.js');
210
+ await initGPU();
1549
211
 
1550
- const pairs = [];
1551
- let pairPassed = 0;
1552
- for (const testCase of config.pairCases) {
1553
- const anchor = await embedStandaloneText(
1554
- pipeline,
1555
- formatEmbeddingSemanticText(testCase.anchor, 'query', semanticStyle)
1556
- );
1557
- const positive = await embedStandaloneText(
1558
- pipeline,
1559
- formatEmbeddingSemanticText(testCase.positive, 'query', semanticStyle)
1560
- );
1561
- const negative = await embedStandaloneText(
1562
- pipeline,
1563
- formatEmbeddingSemanticText(testCase.negative, 'query', semanticStyle)
1564
- );
1565
- const simPos = cosineSimilarity(anchor, positive);
1566
- const simNeg = cosineSimilarity(anchor, negative);
1567
- const margin = simPos - simNeg;
1568
- const passed = Number.isFinite(margin) && margin > config.pairMargin;
1569
- if (passed) pairPassed++;
1570
- pairs.push({
1571
- id: testCase.id,
1572
- passed,
1573
- simPos: Number.isFinite(simPos) ? Number(simPos.toFixed(6)) : null,
1574
- simNeg: Number.isFinite(simNeg) ? Number(simNeg.toFixed(6)) : null,
1575
- margin: Number.isFinite(margin) ? Number(margin.toFixed(6)) : null,
1576
- });
212
+ const previousKernelPath = getActiveKernelPath();
213
+ const previousKernelSource = getActiveKernelPathSource();
214
+ const previousKernelPathPolicy = getActiveKernelPathPolicy();
215
+ if (options.modelId) {
216
+ await resolveKernelPathForModel(options);
1577
217
  }
1578
-
1579
- const retrievalTop1Acc = retrieval.length > 0 ? retrievalPassed / retrieval.length : 0;
1580
- const pairAcc = pairs.length > 0 ? pairPassed / pairs.length : 0;
1581
- const passed = retrievalTop1Acc >= config.minRetrievalTop1Acc
1582
- && pairAcc >= config.minPairAcc;
1583
- const failedCaseIds = [
1584
- ...retrieval.filter((item) => !item.passed).map((item) => `retrieval:${item.id}`),
1585
- ...pairs.filter((item) => !item.passed).map((item) => `pair:${item.id}`),
1586
- ];
1587
-
1588
- return {
1589
- passed,
1590
- style: semanticStyle,
1591
- retrievalTop1Acc,
1592
- pairAcc,
1593
- retrievalPassed,
1594
- retrievalTotal: retrieval.length,
1595
- pairPassed,
1596
- pairTotal: pairs.length,
1597
- minRetrievalTop1Acc: Number(config.minRetrievalTop1Acc.toFixed(4)),
1598
- minPairAcc: Number(config.minPairAcc.toFixed(4)),
1599
- pairMarginThreshold: Number(config.pairMargin.toFixed(4)),
1600
- failedCaseIds,
1601
- retrieval,
1602
- pairs,
1603
- durationMs: Math.max(1, performance.now() - start),
1604
- };
1605
- }
1606
-
1607
- // Matches pad/special tokens that indicate degenerate output: <pad>, <unused123>, <eos>,
1608
- // <bos>, <s>, </s>, [PAD], [UNK], [SEP], [CLS], and bare angle-bracket tokens.
1609
- const SPECIAL_TOKEN_RE = /^(<pad>|<unused\d*>|<eos>|<bos>|<s>|<\/s>|\[PAD\]|\[UNK\]|\[SEP\]|\[CLS\]|<[^>]{1,32}>)$/i;
1610
- const PAD_DOMINANCE_THRESHOLD = 0.5;
1611
-
1612
- function isCoherentOutput(tokens, output) {
1613
- if (tokens.length === 0) return false;
1614
- const specialTokenCount = tokens.filter((t) => SPECIAL_TOKEN_RE.test(String(t).trim())).length;
1615
- if (specialTokenCount / tokens.length >= PAD_DOMINANCE_THRESHOLD) return false;
1616
- const cleanedOutput = String(output || '')
1617
- .replace(/<[^>\n]{1,80}>/g, ' ')
1618
- .replace(/\s+/g, ' ')
1619
- .trim();
1620
- return cleanedOutput.length > 0;
1621
- }
1622
-
1623
- async function runGeneration(pipeline, runtimeConfig, runOverrides = null) {
1624
- const tokens = [];
1625
- const tokenIds = [];
1626
- const promptInput = resolveGenerationPromptInput(runtimeConfig, runOverrides, pipeline);
1627
- const promptLabel = describePromptInput(promptInput);
1628
- const useChatTemplate = runOverrides?.useChatTemplate
1629
- ?? runtimeConfig?.inference?.chatTemplate?.enabled
1630
- ?? (isStructuredPromptInput(promptInput) ? true : undefined);
1631
- const maxTokens = Number.isFinite(runOverrides?.maxTokens)
1632
- ? Math.max(1, Math.floor(runOverrides.maxTokens))
1633
- : resolveMaxTokens(runtimeConfig);
1634
- const sampling = isPlainObject(runOverrides?.sampling)
1635
- ? runOverrides.sampling
1636
- : (runtimeConfig.inference?.sampling || {});
1637
- const debugProbes = runtimeConfig.shared?.debug?.probes || [];
1638
- const profile = runtimeConfig.shared?.debug?.profiler?.enabled === true;
1639
- const disableCommandBatching = Array.isArray(debugProbes) && debugProbes.length > 0;
1640
- const start = performance.now();
1641
-
1642
- for await (const tokenText of pipeline.generate(promptInput, {
1643
- maxTokens,
1644
- temperature: sampling.temperature,
1645
- topP: sampling.topP,
1646
- topK: sampling.topK,
1647
- repetitionPenalty: sampling.repetitionPenalty,
1648
- greedyThreshold: sampling.greedyThreshold,
1649
- useChatTemplate,
1650
- profile,
1651
- disableCommandBatching,
1652
- onToken: (tokenId) => {
1653
- tokenIds.push(tokenId);
1654
- },
1655
- })) {
1656
- if (typeof tokenText === 'string') {
1657
- tokens.push(tokenText);
1658
- }
218
+ let results = [];
219
+ try {
220
+ results = await runAllKernelTests(testHarness);
221
+ } finally {
222
+ setActiveKernelPath(previousKernelPath, previousKernelSource, previousKernelPathPolicy);
1659
223
  }
1660
224
 
1661
- const durationMs = Math.max(1, performance.now() - start);
1662
- const tokensPerSec = (tokens.length / durationMs) * 1000;
1663
- const stats = typeof pipeline?.getStats === 'function'
1664
- ? (pipeline.getStats() || {})
1665
- : {};
1666
- const prefillMs = Number.isFinite(stats.prefillTimeMs) ? stats.prefillTimeMs : 0;
1667
- const ttftMs = Number.isFinite(stats.ttftMs) ? stats.ttftMs : prefillMs;
1668
- const decodeMs = Number.isFinite(stats.decodeTimeMs) ? stats.decodeTimeMs : 0;
1669
- const prefillTokens = Number.isFinite(stats.prefillTokens) ? stats.prefillTokens : 0;
1670
- const decodeTokens = Number.isFinite(stats.decodeTokens)
1671
- ? stats.decodeTokens
1672
- : Math.max(0, tokens.length - 1);
1673
- const decodeTokensPerSec = decodeMs > 0
1674
- ? (decodeTokens / decodeMs) * 1000
1675
- : 0;
1676
- const prefillTokensPerSec = prefillMs > 0
1677
- ? (prefillTokens / prefillMs) * 1000
1678
- : 0;
1679
- const prefillTokensPerSecTtft = ttftMs > 0
1680
- ? (prefillTokens / ttftMs) * 1000
1681
- : 0;
1682
- const gpu = {};
1683
- if (Number.isFinite(stats.gpuTimePrefillMs)) gpu.prefillMs = stats.gpuTimePrefillMs;
1684
- if (Number.isFinite(stats.gpuTimeDecodeMs)) gpu.decodeMs = stats.gpuTimeDecodeMs;
1685
- if (Number.isFinite(stats.decodeRecordMs)) gpu.decodeRecordMs = stats.decodeRecordMs;
1686
- if (Number.isFinite(stats.decodeSubmitWaitMs)) gpu.decodeSubmitWaitMs = stats.decodeSubmitWaitMs;
1687
- if (Number.isFinite(stats.decodeReadbackWaitMs)) gpu.decodeReadbackWaitMs = stats.decodeReadbackWaitMs;
1688
- const gpuPhase = Object.keys(gpu).length > 0 ? gpu : null;
1689
- const decodeProfileSteps = Array.isArray(stats.decodeProfileSteps)
1690
- ? stats.decodeProfileSteps
1691
- : null;
1692
-
1693
- return {
1694
- prompt: promptLabel,
1695
- promptInput,
1696
- maxTokens,
1697
- tokens,
1698
- tokenIds,
1699
- output: tokens.join(''),
1700
- durationMs,
1701
- tokensPerSec,
1702
- phase: {
1703
- totalMs: Number.isFinite(stats.totalTimeMs) ? stats.totalTimeMs : durationMs,
1704
- ttftMs,
1705
- prefillMs,
1706
- decodeMs,
1707
- prefillTokens,
1708
- decodeTokens,
1709
- prefillTokensPerSec,
1710
- prefillTokensPerSecTtft,
1711
- decodeTokensPerSec,
1712
- gpu: gpuPhase,
1713
- decodeProfileSteps,
1714
- },
1715
- };
1716
- }
1717
-
1718
- async function runEmbedding(pipeline, runtimeConfig, runOverrides = null) {
1719
- const prompt = typeof runOverrides?.prompt === 'string' && runOverrides.prompt.trim()
1720
- ? runOverrides.prompt.trim()
1721
- : resolvePrompt(runtimeConfig);
1722
- const start = performance.now();
1723
- const result = await pipeline.embed(prompt);
1724
- const durationMs = Math.max(1, performance.now() - start);
1725
- const tokenCount = Number.isFinite(result?.tokens?.length) ? result.tokens.length : 0;
1726
- const stats = summarizeEmbeddingValues(result?.embedding);
225
+ const summary = buildSuiteSummary('kernels', results, startTime);
1727
226
  return {
1728
- prompt,
1729
- tokenCount,
1730
- durationMs,
1731
- ...stats,
227
+ ...summary,
228
+ deviceInfo: resolveDeviceInfo(),
1732
229
  };
1733
230
  }
1734
231
 
@@ -1867,6 +364,7 @@ async function runInferenceSuite(options = {}) {
1867
364
  modelLoadMs: safeModelLoadMs,
1868
365
  gpu: run.phase.gpu,
1869
366
  decodeProfileSteps: run.phase.decodeProfileSteps,
367
+ generationDiagnostics: run.tokenDiagnostics,
1870
368
  };
1871
369
  }
1872
370
 
@@ -2280,6 +778,7 @@ async function runBenchSuite(options = {}) {
2280
778
  totalRunMs: totalMsStats.median,
2281
779
  decodeTokensPerSec: decodeTokensPerSecStats?.median,
2282
780
  prefillTokensPerSec: prefillTokensPerSecStats?.median,
781
+ prefillTokensPerSecTtft: prefillTokensPerSecTtftStats?.median,
2283
782
  cacheMode,
2284
783
  loadMode,
2285
784
  });
@@ -2323,261 +822,6 @@ async function runBenchSuite(options = {}) {
2323
822
  };
2324
823
  }
2325
824
 
2326
- async function runDiffusionSuite(options = {}) {
2327
- const startTime = performance.now();
2328
- const runtimeConfig = getRuntimeConfig();
2329
- const captureOutput = options.captureOutput === true;
2330
- const cacheMode = normalizeCacheMode(options.cacheMode);
2331
- const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
2332
- const benchConfig = runtimeConfig.shared?.benchmark?.run || {};
2333
- const warmupRuns = Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0));
2334
- const timedRuns = Math.max(1, Math.floor(benchConfig.timedRuns ?? 1));
2335
-
2336
- const diffusionConfig = runtimeConfig.inference?.diffusion;
2337
- if (!diffusionConfig) {
2338
- throw new Error('runtime.inference.diffusion must be set for diffusion harness runs.');
2339
- }
2340
- const scheduler = diffusionConfig.scheduler;
2341
- const latent = diffusionConfig.latent;
2342
- const prompt = resolvePrompt(runtimeConfig);
2343
- const negativePrompt = diffusionConfig.negativePrompt ?? '';
2344
-
2345
- const width = Math.floor(latent?.width);
2346
- const height = Math.floor(latent?.height);
2347
- const steps = Math.floor(scheduler?.numSteps);
2348
- const guidanceScale = scheduler?.guidanceScale;
2349
-
2350
- if (!Number.isFinite(width) || width <= 0) {
2351
- throw new Error('runtime.inference.diffusion.latent.width must be set for diffusion harness runs.');
2352
- }
2353
- if (!Number.isFinite(height) || height <= 0) {
2354
- throw new Error('runtime.inference.diffusion.latent.height must be set for diffusion harness runs.');
2355
- }
2356
- if (!Number.isFinite(steps) || steps <= 0) {
2357
- throw new Error('runtime.inference.diffusion.scheduler.numSteps must be set for diffusion harness runs.');
2358
- }
2359
- if (!Number.isFinite(guidanceScale) || guidanceScale <= 0) {
2360
- throw new Error('runtime.inference.diffusion.scheduler.guidanceScale must be set for diffusion harness runs.');
2361
- }
2362
-
2363
- const harness = await initializeSuiteModel(options);
2364
- const totalMs = [];
2365
- const prefillMs = [];
2366
- const denoiseMs = [];
2367
- const vaeMs = [];
2368
- const prefillTokens = [];
2369
- const decodeTokens = [];
2370
- const gpuTotalMs = [];
2371
- const gpuPrefillMs = [];
2372
- const gpuDenoiseMs = [];
2373
- const gpuVaeMs = [];
2374
- let output = null;
2375
-
2376
- for (let i = 0; i < warmupRuns + timedRuns; i++) {
2377
- harness.pipeline.reset?.();
2378
- const result = await harness.pipeline.generate({
2379
- prompt,
2380
- negativePrompt,
2381
- steps,
2382
- guidanceScale,
2383
- width,
2384
- height,
2385
- });
2386
- if (captureOutput && i === warmupRuns + timedRuns - 1) {
2387
- output = result;
2388
- }
2389
-
2390
- if (i < warmupRuns) continue;
2391
-
2392
- const stats = harness.pipeline.getStats?.() ?? {};
2393
- if (Number.isFinite(stats.totalTimeMs)) totalMs.push(stats.totalTimeMs);
2394
- if (Number.isFinite(stats.prefillTimeMs)) prefillMs.push(stats.prefillTimeMs);
2395
- if (Number.isFinite(stats.decodeTimeMs)) denoiseMs.push(stats.decodeTimeMs);
2396
- if (Number.isFinite(stats.vaeTimeMs)) vaeMs.push(stats.vaeTimeMs);
2397
- if (Number.isFinite(stats.prefillTokens)) prefillTokens.push(stats.prefillTokens);
2398
- if (Number.isFinite(stats.decodeTokens)) decodeTokens.push(stats.decodeTokens);
2399
-
2400
- const gpu = stats.gpu ?? null;
2401
- if (gpu?.available) {
2402
- if (Number.isFinite(gpu.totalMs)) gpuTotalMs.push(gpu.totalMs);
2403
- if (Number.isFinite(gpu.prefillMs)) gpuPrefillMs.push(gpu.prefillMs);
2404
- if (Number.isFinite(gpu.denoiseMs)) gpuDenoiseMs.push(gpu.denoiseMs);
2405
- if (Number.isFinite(gpu.vaeMs)) gpuVaeMs.push(gpu.vaeMs);
2406
- }
2407
- }
2408
-
2409
- const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
2410
- ? harness.pipeline.getMemoryStats()
2411
- : null;
2412
-
2413
- if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
2414
- await harness.pipeline.unload();
2415
- }
2416
-
2417
- const results = [
2418
- {
2419
- name: 'diffusion',
2420
- passed: totalMs.length > 0,
2421
- duration: totalMs.reduce((sum, value) => sum + value, 0),
2422
- error: totalMs.length > 0 ? undefined : 'No diffusion runs completed',
2423
- },
2424
- ];
2425
-
2426
- const summary = buildSuiteSummary('diffusion', results, startTime);
2427
- const cpuStats = {
2428
- totalMs: computeSampleStats(totalMs),
2429
- prefillMs: computeSampleStats(prefillMs),
2430
- denoiseMs: computeSampleStats(denoiseMs),
2431
- vaeMs: computeSampleStats(vaeMs),
2432
- };
2433
- const gpuStats = gpuTotalMs.length > 0
2434
- ? {
2435
- available: true,
2436
- totalMs: computeSampleStats(gpuTotalMs),
2437
- prefillMs: computeSampleStats(gpuPrefillMs),
2438
- denoiseMs: computeSampleStats(gpuDenoiseMs),
2439
- vaeMs: computeSampleStats(gpuVaeMs),
2440
- }
2441
- : { available: false };
2442
-
2443
- const avgPrefillTokens = prefillTokens.length
2444
- ? Math.round(prefillTokens.reduce((a, b) => a + b, 0) / prefillTokens.length)
2445
- : 0;
2446
- const avgDecodeTokens = decodeTokens.length
2447
- ? Math.round(decodeTokens.reduce((a, b) => a + b, 0) / decodeTokens.length)
2448
- : 0;
2449
- const prefillMsMedian = safeStatsValue(cpuStats.prefillMs?.median);
2450
- const denoiseMsMedian = safeStatsValue(cpuStats.denoiseMs?.median);
2451
- const totalMsMedian = safeStatsValue(cpuStats.totalMs?.median);
2452
- const diffusionPerformanceArtifact = buildDiffusionPerformanceArtifact({
2453
- warmupRuns,
2454
- timedRuns,
2455
- width,
2456
- height,
2457
- steps,
2458
- guidanceScale,
2459
- avgPrefillTokens,
2460
- avgDecodeTokens,
2461
- cpuStats,
2462
- gpuStats,
2463
- });
2464
- const timing = buildCanonicalTiming({
2465
- modelLoadMs: 0,
2466
- firstTokenMs: null,
2467
- firstResponseMs: null,
2468
- prefillMs: prefillMsMedian,
2469
- decodeMs: denoiseMsMedian,
2470
- totalRunMs: totalMsMedian,
2471
- prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
2472
- decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
2473
- cacheMode,
2474
- loadMode,
2475
- });
2476
- const timingDiagnostics = buildTimingDiagnostics(timing, {
2477
- source: 'doppler',
2478
- prefillSemantics: 'internal_prefill_phase',
2479
- });
2480
- const metricsWithContracts = buildSuiteContractMetrics(
2481
- 'diffusion',
2482
- {
2483
- warmupRuns,
2484
- timedRuns,
2485
- width,
2486
- height,
2487
- steps,
2488
- guidanceScale,
2489
- prompt,
2490
- avgPrefillTokens,
2491
- avgDecodeTokens,
2492
- latency: {
2493
- totalMs: cpuStats.totalMs,
2494
- prefillMs: cpuStats.prefillMs,
2495
- denoiseMs: cpuStats.denoiseMs,
2496
- vaeMs: cpuStats.vaeMs,
2497
- },
2498
- throughput: {
2499
- prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
2500
- decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
2501
- decodeStepsPerSec: diffusionPerformanceArtifact.throughput.decodeStepsPerSec,
2502
- },
2503
- cpu: cpuStats,
2504
- gpu: gpuStats,
2505
- performanceArtifact: diffusionPerformanceArtifact,
2506
- },
2507
- harness.manifest
2508
- );
2509
-
2510
- return {
2511
- ...summary,
2512
- modelId: options.modelId || harness.manifest?.modelId || 'unknown',
2513
- cacheMode,
2514
- loadMode,
2515
- env: {
2516
- library: 'doppler',
2517
- runtime: 'browser',
2518
- device: 'webgpu',
2519
- browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
2520
- browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
2521
- browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
2522
- browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
2523
- },
2524
- timing,
2525
- timingDiagnostics,
2526
- output,
2527
- metrics: metricsWithContracts,
2528
- memoryStats,
2529
- deviceInfo: resolveDeviceInfo(),
2530
- pipeline: options.keepPipeline ? harness.pipeline : null,
2531
- };
2532
- }
2533
-
2534
- async function runEnergySuite(options = {}) {
2535
- const startTime = performance.now();
2536
- const harness = await initializeSuiteModel(options);
2537
- if (harness.manifest?.modelType !== 'energy') {
2538
- throw new Error('Energy suite requires an energy model manifest.');
2539
- }
2540
-
2541
- const result = await harness.pipeline.generate();
2542
- const stats = harness.pipeline.getStats?.() ?? {};
2543
-
2544
- const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
2545
- ? harness.pipeline.getMemoryStats()
2546
- : null;
2547
-
2548
- if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
2549
- await harness.pipeline.unload();
2550
- }
2551
-
2552
- const results = [
2553
- {
2554
- name: 'energy',
2555
- passed: Number.isFinite(result.energy ?? NaN),
2556
- duration: result.totalTimeMs ?? Math.max(0, performance.now() - startTime),
2557
- error: Number.isFinite(result.energy ?? NaN) ? undefined : 'Energy did not converge',
2558
- },
2559
- ];
2560
-
2561
- const summary = buildSuiteSummary('energy', results, startTime);
2562
- return {
2563
- ...summary,
2564
- modelId: options.modelId || harness.manifest?.modelId || 'unknown',
2565
- metrics: {
2566
- steps: result.steps,
2567
- energy: result.energy ?? null,
2568
- dtype: result.dtype,
2569
- shape: result.shape,
2570
- totalTimeMs: result.totalTimeMs ?? null,
2571
- energyHistory: result.energyHistory ?? [],
2572
- stateStats: result.stateStats ?? null,
2573
- readbackCount: stats.readbackCount ?? null,
2574
- },
2575
- memoryStats,
2576
- deviceInfo: resolveDeviceInfo(),
2577
- pipeline: options.keepPipeline ? harness.pipeline : null,
2578
- };
2579
- }
2580
-
2581
825
  async function dispatchBrowserSuite(suite, options) {
2582
826
  if (suite === 'kernels') {
2583
827
  return runKernelSuite(options);
@@ -2603,47 +847,16 @@ async function dispatchBrowserSuite(suite, options) {
2603
847
  return null;
2604
848
  }
2605
849
 
2606
- function collectTrainingArtifactsFromSuiteResult(suiteResult) {
2607
- const ulArtifacts = [];
2608
- const distillArtifacts = [];
2609
- const checkpointResumeTimeline = Array.isArray(suiteResult?.metrics?.checkpointResumeTimeline)
2610
- ? suiteResult.metrics.checkpointResumeTimeline
2611
- .filter((entry) => entry && typeof entry === 'object')
2612
- : [];
2613
- const addArtifact = (artifact, source = null) => {
2614
- if (!artifact || typeof artifact !== 'object' || typeof artifact.manifestPath !== 'string') {
2615
- return;
2616
- }
2617
- const stage = String(artifact.stage || '').trim();
2618
- const kind = String(artifact.kind || '').trim();
2619
- if (kind === 'distill' || stage === 'stage_a' || stage === 'stage_b') {
2620
- distillArtifacts.push(artifact);
2621
- return;
2622
- }
2623
- if (kind === 'ul' || stage === 'stage1_joint' || stage === 'stage2_base' || source === 'ul') {
2624
- ulArtifacts.push(artifact);
2625
- return;
2626
- }
2627
- ulArtifacts.push(artifact);
2628
- };
2629
-
2630
- const metricUlArtifacts = Array.isArray(suiteResult?.metrics?.ulArtifacts)
2631
- ? suiteResult.metrics.ulArtifacts
2632
- : [];
2633
- for (const artifact of metricUlArtifacts) {
2634
- addArtifact(artifact, 'ul');
2635
- }
2636
- const metricDistillArtifacts = Array.isArray(suiteResult?.metrics?.distillArtifacts)
2637
- ? suiteResult.metrics.distillArtifacts
2638
- : [];
2639
- for (const artifact of metricDistillArtifacts) {
2640
- addArtifact(artifact, 'distill');
2641
- }
2642
- const resultEntries = Array.isArray(suiteResult?.results) ? suiteResult.results : [];
2643
- for (const entry of resultEntries) {
2644
- addArtifact(entry?.artifact, null);
2645
- }
2646
- return { ulArtifacts, distillArtifacts, checkpointResumeTimeline };
850
+ function shouldCaptureDebugSnapshot(suite, runtimeConfig) {
851
+ const debug = runtimeConfig?.shared?.debug ?? {};
852
+ const logLevel = String(debug.logLevel?.defaultLogLevel ?? '').toLowerCase();
853
+ return suite === 'debug'
854
+ || debug.trace?.enabled === true
855
+ || debug.pipeline?.enabled === true
856
+ || (Array.isArray(debug.probes) && debug.probes.length > 0)
857
+ || debug.profiler?.enabled === true
858
+ || logLevel === 'debug'
859
+ || logLevel === 'verbose';
2647
860
  }
2648
861
 
2649
862
  export async function runBrowserSuite(options = {}) {
@@ -2651,10 +864,15 @@ export async function runBrowserSuite(options = {}) {
2651
864
  const suiteTimestamp = resolveReportTimestamp(options.timestamp, 'runBrowserSuite timestamp');
2652
865
  const suiteContext = resolveSuiteContext(options);
2653
866
  const suite = normalizeSuite(options.suite, suiteContext);
867
+ const captureDebugSnapshot = shouldCaptureDebugSnapshot(suite, getRuntimeConfig());
868
+ if (captureDebugSnapshot) {
869
+ clearLogHistory();
870
+ }
2654
871
  const suiteResult = await dispatchBrowserSuite(suite, options);
2655
872
  if (!suiteResult) {
2656
873
  throw createUnsupportedSuiteError(suite, suiteContext);
2657
874
  }
875
+ const debugSnapshot = captureDebugSnapshot ? getDebugSnapshot() : null;
2658
876
 
2659
877
  if (suite === 'bench' && suiteResult?.metrics?.workloadType === 'training') {
2660
878
  const trainingReport = suiteResult?.metrics?.trainingMetricsReport;
@@ -2686,6 +904,7 @@ export async function runBrowserSuite(options = {}) {
2686
904
  metrics: suiteResult.metrics ?? null,
2687
905
  output: reportOutput,
2688
906
  memory: suiteResult.memoryStats ?? null,
907
+ debugSnapshot,
2689
908
  ...options.report,
2690
909
  };
2691
910
  if (ulArtifacts.length > 0 || distillArtifacts.length > 0 || checkpointResumeTimeline.length > 0) {
@@ -2707,76 +926,10 @@ export async function runBrowserSuite(options = {}) {
2707
926
  report.timestamp = suiteTimestamp;
2708
927
  }
2709
928
  const reportInfo = await saveReport(modelId, report, { timestamp: report.timestamp });
2710
- return { ...suiteResult, report, reportInfo };
929
+ return { ...suiteResult, debugSnapshot, report, reportInfo };
2711
930
  });
2712
931
  }
2713
932
 
2714
- function normalizeManifest(manifest) {
2715
- if (!manifest || typeof manifest !== 'object') {
2716
- throw new Error('Harness manifest must be an object.');
2717
- }
2718
- const runs = Array.isArray(manifest.runs) ? manifest.runs : [];
2719
- if (!runs.length) {
2720
- throw new Error('Harness manifest must include at least one run.');
2721
- }
2722
- return {
2723
- defaults: manifest.defaults ?? {},
2724
- runs,
2725
- reportModelId: manifest.reportModelId ?? manifest.id ?? 'manifest',
2726
- report: manifest.report ?? null,
2727
- };
2728
- }
2729
-
2730
- function mergeRunDefaults(defaults, run) {
2731
- return {
2732
- ...defaults,
2733
- ...run,
2734
- runtimePreset: run.runtimePreset ?? defaults.runtimePreset ?? null,
2735
- runtimeConfigUrl: run.runtimeConfigUrl ?? defaults.runtimeConfigUrl ?? null,
2736
- runtimeConfig: run.runtimeConfig ?? defaults.runtimeConfig ?? null,
2737
- suite: run.suite ?? defaults.suite ?? 'inference',
2738
- };
2739
- }
2740
-
2741
- async function applyRuntimeForRun(run, options) {
2742
- if (run.runtimeConfig) {
2743
- const runtime = resolveRuntimeFromConfig(run.runtimeConfig);
2744
- if (!runtime) {
2745
- throw new Error('runtimeConfig is missing runtime fields');
2746
- }
2747
- setRuntimeConfig(runtime);
2748
- return;
2749
- }
2750
- if (run.runtimeConfigUrl) {
2751
- await applyRuntimeConfigFromUrl(run.runtimeConfigUrl, options);
2752
- return;
2753
- }
2754
- if (run.runtimePreset) {
2755
- await applyRuntimePreset(run.runtimePreset, options);
2756
- }
2757
- }
2758
-
2759
- function summarizeManifestRuns(results) {
2760
- let passedRuns = 0;
2761
- let failedRuns = 0;
2762
- let durationMs = 0;
2763
- for (const result of results) {
2764
- const failures = (result.results || []).filter((entry) => !entry.passed && !entry.skipped);
2765
- if (failures.length > 0) {
2766
- failedRuns += 1;
2767
- } else {
2768
- passedRuns += 1;
2769
- }
2770
- durationMs += result.duration || 0;
2771
- }
2772
- return {
2773
- totalRuns: results.length,
2774
- passedRuns,
2775
- failedRuns,
2776
- durationMs,
2777
- };
2778
- }
2779
-
2780
933
  export async function runBrowserManifest(manifest, options = {}) {
2781
934
  const normalized = normalizeManifest(manifest);
2782
935
  const results = [];