@simulatte/doppler 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +16 -23
  3. package/package.json +14 -1
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +1 -1
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +7 -5
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +12 -2
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +10 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  45. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  46. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  47. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  48. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  49. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  50. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  52. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  54. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  55. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  56. package/src/config/runtime.js +6 -1
  57. package/src/config/schema/debug.schema.d.ts +5 -0
  58. package/src/config/schema/doppler.schema.js +16 -21
  59. package/src/config/schema/inference-defaults.schema.js +3 -3
  60. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  61. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  62. package/src/config/schema/manifest.schema.d.ts +2 -1
  63. package/src/config/schema/manifest.schema.js +16 -3
  64. package/src/config/training-defaults.js +30 -22
  65. package/src/converter/conversion-plan.js +94 -9
  66. package/src/converter/core.d.ts +7 -0
  67. package/src/converter/core.js +14 -9
  68. package/src/converter/execution-v0-manifest.js +4 -1
  69. package/src/converter/index.d.ts +1 -0
  70. package/src/converter/index.js +1 -0
  71. package/src/converter/manifest-inference.js +43 -12
  72. package/src/converter/parsers/diffusion.js +0 -3
  73. package/src/converter/quantization-info.js +35 -15
  74. package/src/converter/shard-packer.d.ts +1 -1
  75. package/src/converter/shard-packer.js +4 -1
  76. package/src/debug/config.js +123 -11
  77. package/src/debug/signals.js +7 -1
  78. package/src/debug/tensor.d.ts +2 -0
  79. package/src/debug/tensor.js +13 -2
  80. package/src/distribution/p2p-control-plane.js +52 -12
  81. package/src/distribution/p2p-observability.js +43 -7
  82. package/src/distribution/p2p-webrtc-browser.js +20 -0
  83. package/src/distribution/shard-delivery.js +77 -26
  84. package/src/formats/gguf/types.js +33 -16
  85. package/src/formats/rdrr/groups.d.ts +12 -4
  86. package/src/formats/rdrr/groups.js +3 -6
  87. package/src/formats/rdrr/parsing.js +39 -2
  88. package/src/formats/rdrr/types.d.ts +2 -1
  89. package/src/gpu/command-recorder.js +86 -61
  90. package/src/gpu/device.d.ts +1 -0
  91. package/src/gpu/device.js +73 -19
  92. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  93. package/src/gpu/kernel-tuner/cache.js +71 -4
  94. package/src/gpu/kernel-tuner/tuner.js +22 -4
  95. package/src/gpu/kernels/attention.js +15 -34
  96. package/src/gpu/kernels/backward/adam.js +62 -58
  97. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  98. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  99. package/src/gpu/kernels/cast.js +191 -149
  100. package/src/gpu/kernels/check-stop.js +33 -44
  101. package/src/gpu/kernels/conv2d.js +27 -17
  102. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  103. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  104. package/src/gpu/kernels/dequant.js +178 -126
  105. package/src/gpu/kernels/energy.d.ts +3 -21
  106. package/src/gpu/kernels/energy.js +111 -88
  107. package/src/gpu/kernels/feature-check.js +1 -1
  108. package/src/gpu/kernels/fused_ffn.js +84 -65
  109. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  110. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  111. package/src/gpu/kernels/gather.js +33 -15
  112. package/src/gpu/kernels/gelu.js +19 -11
  113. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  114. package/src/gpu/kernels/groupnorm.js +34 -23
  115. package/src/gpu/kernels/kv-quantize.js +5 -2
  116. package/src/gpu/kernels/layernorm.js +35 -19
  117. package/src/gpu/kernels/logit-merge.js +5 -3
  118. package/src/gpu/kernels/matmul.js +58 -39
  119. package/src/gpu/kernels/modulate.js +23 -15
  120. package/src/gpu/kernels/moe.js +221 -175
  121. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  122. package/src/gpu/kernels/relu.js +18 -10
  123. package/src/gpu/kernels/repeat_channels.js +25 -17
  124. package/src/gpu/kernels/residual.js +37 -27
  125. package/src/gpu/kernels/rmsnorm.js +57 -41
  126. package/src/gpu/kernels/rope.js +3 -0
  127. package/src/gpu/kernels/sample.js +27 -38
  128. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  129. package/src/gpu/kernels/scale.js +18 -11
  130. package/src/gpu/kernels/shader-cache.js +4 -2
  131. package/src/gpu/kernels/silu.js +120 -72
  132. package/src/gpu/kernels/softmax.js +44 -25
  133. package/src/gpu/kernels/split_qkv.js +23 -13
  134. package/src/gpu/kernels/transpose.js +18 -10
  135. package/src/gpu/kernels/transpose.wgsl +5 -3
  136. package/src/gpu/kernels/upsample2d.js +21 -13
  137. package/src/gpu/kernels/utils.js +20 -13
  138. package/src/gpu/partitioned-buffer-pool.js +10 -2
  139. package/src/gpu/perf-guards.js +2 -9
  140. package/src/gpu/profiler.js +27 -22
  141. package/src/gpu/readback-utils.d.ts +16 -0
  142. package/src/gpu/readback-utils.js +41 -0
  143. package/src/gpu/submit-tracker.js +13 -0
  144. package/src/gpu/uniform-cache.d.ts +1 -0
  145. package/src/gpu/uniform-cache.js +30 -9
  146. package/src/hotswap/intent-bundle.js +6 -0
  147. package/src/hotswap/manifest.d.ts +10 -1
  148. package/src/hotswap/manifest.js +12 -2
  149. package/src/hotswap/runtime.js +30 -8
  150. package/src/index-browser.d.ts +44 -0
  151. package/src/index-browser.js +14 -0
  152. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  153. package/src/inference/browser-harness-contract-helpers.js +28 -0
  154. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  155. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  156. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  157. package/src/inference/browser-harness-model-helpers.js +217 -0
  158. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  159. package/src/inference/browser-harness-report-helpers.js +42 -0
  160. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  161. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  162. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  163. package/src/inference/browser-harness-suite-helpers.js +268 -0
  164. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  165. package/src/inference/browser-harness-text-helpers.js +788 -0
  166. package/src/inference/browser-harness.d.ts +6 -0
  167. package/src/inference/browser-harness.js +130 -1996
  168. package/src/inference/kv-cache/base.js +140 -94
  169. package/src/inference/kv-cache/tiered.js +5 -3
  170. package/src/inference/moe-router.js +88 -56
  171. package/src/inference/multi-model-network.js +5 -3
  172. package/src/inference/network-evolution.d.ts +11 -2
  173. package/src/inference/network-evolution.js +20 -21
  174. package/src/inference/pipelines/context.d.ts +3 -0
  175. package/src/inference/pipelines/context.js +142 -2
  176. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  177. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  178. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  179. package/src/inference/pipelines/diffusion/vae.js +3 -7
  180. package/src/inference/pipelines/energy/pipeline.js +27 -21
  181. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  182. package/src/inference/pipelines/energy/quintel.js +11 -0
  183. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  184. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  185. package/src/inference/pipelines/text/attention/projections.js +151 -101
  186. package/src/inference/pipelines/text/attention/record.js +62 -8
  187. package/src/inference/pipelines/text/attention/run.js +62 -8
  188. package/src/inference/pipelines/text/config.js +3 -4
  189. package/src/inference/pipelines/text/embed.js +2 -8
  190. package/src/inference/pipelines/text/execution-plan.js +41 -19
  191. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  192. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  193. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  194. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  195. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  196. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  197. package/src/inference/pipelines/text/generator-steps.js +298 -207
  198. package/src/inference/pipelines/text/generator.js +6 -23
  199. package/src/inference/pipelines/text/init.js +78 -20
  200. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  201. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  202. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  203. package/src/inference/pipelines/text/layer.js +3 -9
  204. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  205. package/src/inference/pipelines/text/linear-attention.js +80 -6
  206. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  207. package/src/inference/pipelines/text/logits/index.js +10 -11
  208. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  209. package/src/inference/pipelines/text/logits/utils.js +9 -0
  210. package/src/inference/pipelines/text/lora-apply.js +50 -32
  211. package/src/inference/pipelines/text/model-load.js +279 -104
  212. package/src/inference/pipelines/text/moe-cache.js +5 -4
  213. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  214. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  215. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  216. package/src/inference/pipelines/text/ops.js +90 -90
  217. package/src/inference/pipelines/text/probes.js +9 -9
  218. package/src/inference/pipelines/text/weights.js +17 -7
  219. package/src/inference/pipelines/text.js +13 -1
  220. package/src/inference/speculative.d.ts +2 -2
  221. package/src/inference/speculative.js +4 -18
  222. package/src/inference/test-harness.d.ts +1 -1
  223. package/src/inference/test-harness.js +15 -5
  224. package/src/inference/tokenizer.d.ts +0 -5
  225. package/src/inference/tokenizer.js +4 -23
  226. package/src/inference/tokenizers/bpe.js +9 -0
  227. package/src/inference/tokenizers/bundled.js +20 -0
  228. package/src/inference/tokenizers/sentencepiece.js +12 -0
  229. package/src/loader/doppler-loader.js +38 -22
  230. package/src/loader/dtype-utils.js +3 -44
  231. package/src/loader/embedding-loader.js +7 -3
  232. package/src/loader/experts/expert-cache.js +13 -6
  233. package/src/loader/experts/expert-loader.js +10 -6
  234. package/src/loader/final-weights-loader.js +8 -4
  235. package/src/loader/layer-loader.js +2 -1
  236. package/src/loader/loader-state.js +2 -2
  237. package/src/loader/memory-monitor.js +8 -0
  238. package/src/loader/multi-model-loader.d.ts +14 -0
  239. package/src/loader/multi-model-loader.js +70 -24
  240. package/src/loader/shard-cache.js +81 -12
  241. package/src/loader/shard-resolver.js +25 -3
  242. package/src/loader/tensors/tensor-loader.js +209 -144
  243. package/src/loader/tensors/tensor-reader.js +76 -19
  244. package/src/loader/weight-downcast.js +1 -1
  245. package/src/memory/buffer-pool.d.ts +9 -1
  246. package/src/memory/buffer-pool.js +109 -44
  247. package/src/memory/unified-detect.js +1 -1
  248. package/src/rules/inference/kernel-path.rules.json +24 -8
  249. package/src/rules/rule-registry.js +25 -1
  250. package/src/storage/backends/opfs-store.js +68 -24
  251. package/src/storage/downloader.js +364 -83
  252. package/src/storage/index.d.ts +3 -0
  253. package/src/storage/index.js +3 -0
  254. package/src/storage/preflight.d.ts +2 -2
  255. package/src/storage/preflight.js +24 -2
  256. package/src/storage/quickstart-downloader.js +11 -5
  257. package/src/storage/registry.js +10 -4
  258. package/src/storage/reports.js +1 -1
  259. package/src/storage/shard-manager.d.ts +15 -1
  260. package/src/storage/shard-manager.js +51 -3
  261. package/src/storage/source-artifact-store.d.ts +52 -0
  262. package/src/storage/source-artifact-store.js +234 -0
  263. package/src/tooling/command-api-constants.d.ts +9 -0
  264. package/src/tooling/command-api-constants.js +9 -0
  265. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  266. package/src/tooling/command-api-family-normalizers.js +343 -0
  267. package/src/tooling/command-api-helpers.d.ts +25 -0
  268. package/src/tooling/command-api-helpers.js +262 -0
  269. package/src/tooling/command-api.js +16 -602
  270. package/src/tooling/command-envelope.js +4 -1
  271. package/src/tooling/command-runner-shared.js +52 -18
  272. package/src/tooling/lean-execution-contract.js +150 -3
  273. package/src/tooling/node-browser-command-runner.js +161 -271
  274. package/src/tooling/node-command-runner.js +29 -3
  275. package/src/tooling/node-converter.js +27 -1
  276. package/src/tooling/node-source-runtime.d.ts +1 -1
  277. package/src/tooling/node-source-runtime.js +84 -3
  278. package/src/tooling/node-webgpu.js +24 -21
  279. package/src/tooling/opfs-cache.js +21 -4
  280. package/src/tooling/runtime-input-composition.d.ts +38 -0
  281. package/src/tooling/runtime-input-composition.js +86 -0
  282. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  283. package/src/tooling/source-runtime-bundle.js +261 -34
  284. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  285. package/src/tooling/source-runtime-materializer.js +93 -0
  286. package/src/training/attention-backward.js +32 -17
  287. package/src/training/autograd.js +80 -52
  288. package/src/training/checkpoint-watch.d.ts +2 -1
  289. package/src/training/checkpoint-watch.js +39 -6
  290. package/src/training/checkpoint.js +40 -11
  291. package/src/training/clip.js +2 -1
  292. package/src/training/datasets/token-batch.js +20 -8
  293. package/src/training/distillation/checkpoint-watch.js +1 -0
  294. package/src/training/distillation/student-fixture.d.ts +22 -0
  295. package/src/training/distillation/student-fixture.js +846 -0
  296. package/src/training/distillation/suite-data.d.ts +45 -0
  297. package/src/training/distillation/suite-data.js +189 -0
  298. package/src/training/lora-pipeline.js +4 -7
  299. package/src/training/lora.js +26 -12
  300. package/src/training/loss.js +5 -6
  301. package/src/training/objectives/cross_entropy.js +2 -5
  302. package/src/training/objectives/distill_kd.js +4 -8
  303. package/src/training/objectives/distill_triplet.js +4 -8
  304. package/src/training/objectives/ul_stage2_base.js +4 -8
  305. package/src/training/operator-command.js +2 -0
  306. package/src/training/optimizer.js +19 -7
  307. package/src/training/runner.js +2 -1
  308. package/src/training/suite.js +18 -978
  309. package/src/training/tensor-factory.d.ts +9 -0
  310. package/src/training/tensor-factory.js +13 -0
  311. package/src/training/trainer.js +3 -5
  312. package/src/training/ul_dataset.js +3 -5
  313. package/src/training/workloads.js +70 -79
  314. package/src/version.js +1 -1
  315. package/tools/convert-safetensors-node.js +22 -16
  316. package/tools/doppler-cli.js +44 -25
@@ -1,13 +1,7 @@
1
1
 
2
- import { initializeInference, parseRuntimeOverridesFromURL } from './test-harness.js';
2
+ import { initializeInference } from './test-harness.js';
3
3
  import { saveReport } from '../storage/reports.js';
4
4
  import { getRuntimeConfig, setRuntimeConfig } from '../config/runtime.js';
5
- import { initDevice, getKernelCapabilities, getDevice } from '../gpu/device.js';
6
- import { createPipeline } from './pipelines/text.js';
7
- import { parseModelConfigFromManifest } from './pipelines/text/config.js';
8
- import { resolveKernelPathState, activateKernelPathState } from './pipelines/text/model-load.js';
9
- import { openModelStore, loadManifestFromStore } from '../storage/shard-manager.js';
10
- import { parseManifest } from '../formats/rdrr/index.js';
11
5
  import { computeSampleStats } from '../debug/stats.js';
12
6
  import {
13
7
  setActiveKernelPath,
@@ -15,19 +9,54 @@ import {
15
9
  getActiveKernelPathSource,
16
10
  getActiveKernelPathPolicy,
17
11
  } from '../config/kernel-path-loader.js';
18
- import {
19
- getInferenceLayerPatternContractArtifact,
20
- selectRuleValue,
21
- } from '../rules/rule-registry.js';
22
- import { mergeRuntimeValues } from '../config/runtime-merge.js';
23
- import { isPlainObject } from '../utils/plain-object.js';
24
- import { validateBrowserSuiteMetrics } from '../config/schema/browser-suite-metrics.schema.js';
25
12
  import { validateTrainingMetricsReport } from '../config/schema/training-metrics.schema.js';
26
- import { buildExecutionContractArtifact } from '../config/execution-contract-check.js';
27
- import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
13
+ import {
14
+ resolveReportTimestamp,
15
+ resolveRuntime,
16
+ cloneRuntimeConfig,
17
+ runWithRuntimeIsolationForSuite,
18
+ sanitizeReportOutput,
19
+ loadRuntimeConfigFromUrl,
20
+ applyRuntimeConfigFromUrl,
21
+ loadRuntimePreset,
22
+ applyRuntimePreset,
23
+ applyRuntimeForRun,
24
+ normalizeManifest,
25
+ mergeRunDefaults,
26
+ summarizeManifestRuns,
27
+ } from './browser-harness-runtime-helpers.js';
28
+ import {
29
+ buildSuiteSummary,
30
+ normalizeCacheMode,
31
+ normalizeLoadMode,
32
+ normalizeWorkloadType,
33
+ assertDiffusionPerformanceArtifact,
34
+ toTimingNumber,
35
+ safeToFixed,
36
+ sampleTimingNumber,
37
+ buildCanonicalTiming,
38
+ buildTimingDiagnostics,
39
+ } from './browser-harness-suite-helpers.js';
40
+ import {
41
+ resolveDeviceInfo,
42
+ resolveKernelPathForModel,
43
+ initializeSuiteModel,
44
+ } from './browser-harness-model-helpers.js';
45
+ import {
46
+ resolveBenchmarkRunSettings,
47
+ runEmbeddingSemanticChecks,
48
+ isCoherentOutput,
49
+ runGeneration,
50
+ runEmbedding,
51
+ } from './browser-harness-text-helpers.js';
52
+ import { buildSuiteContractMetrics } from './browser-harness-contract-helpers.js';
53
+ import {
54
+ runDiffusionSuite,
55
+ runEnergySuite,
56
+ } from './browser-harness-diffusion-energy-suites.js';
57
+ import { collectTrainingArtifactsFromSuiteResult } from './browser-harness-report-helpers.js';
28
58
 
29
59
  const TRAINING_SUITE_MODULE_PATH = '../training/suite.js';
30
- const NODE_SOURCE_RUNTIME_MODULE_PATH = '../tooling/node-source-runtime.js';
31
60
  let trainingSuiteModulePromise = null;
32
61
 
33
62
  async function loadTrainingSuiteModule() {
@@ -42,346 +71,20 @@ export async function runTrainingSuite(options = {}) {
42
71
  return module.runTrainingSuite(options);
43
72
  }
44
73
 
74
+ export {
75
+ loadRuntimeConfigFromUrl,
76
+ applyRuntimeConfigFromUrl,
77
+ loadRuntimePreset,
78
+ applyRuntimePreset,
79
+ applyRuntimeForRun,
80
+ buildSuiteSummary,
81
+ };
82
+
45
83
  async function runTrainingBenchSuite(options = {}) {
46
84
  const module = await loadTrainingSuiteModule();
47
85
  return module.runTrainingBenchSuite(options);
48
86
  }
49
87
 
50
- function buildSuiteContractMetrics(suite, baseMetrics, manifest) {
51
- const executionContractArtifact = buildExecutionContractArtifact(manifest);
52
- const executionV0GraphContractArtifact = executionContractArtifact?.executionV0?.graph ?? null;
53
- const layerPatternContractArtifact = getInferenceLayerPatternContractArtifact();
54
- const requiredInferenceFieldsArtifact = manifest?.modelType === 'transformer'
55
- && isPlainObject(manifest?.inference?.attention)
56
- ? buildManifestRequiredInferenceFieldsArtifact(
57
- manifest?.inference ?? null,
58
- `${manifest?.modelId ?? 'unknown'}.inference`
59
- )
60
- : null;
61
- return validateBrowserSuiteMetrics({
62
- ...baseMetrics,
63
- schemaVersion: 1,
64
- source: 'doppler',
65
- suite,
66
- ...(executionContractArtifact ? { executionContractArtifact } : {}),
67
- executionV0GraphContractArtifact,
68
- layerPatternContractArtifact,
69
- requiredInferenceFieldsArtifact,
70
- });
71
- }
72
-
73
- function parseReportTimestamp(rawTimestamp, label = 'timestamp') {
74
- if (rawTimestamp == null) {
75
- return null;
76
- }
77
-
78
- if (rawTimestamp instanceof Date) {
79
- const timestamp = rawTimestamp.getTime();
80
- if (!Number.isFinite(timestamp)) {
81
- throw new Error(`Invalid ${label}: not a valid Date.`);
82
- }
83
- return rawTimestamp.toISOString();
84
- }
85
-
86
- if (typeof rawTimestamp === 'number') {
87
- if (!Number.isFinite(rawTimestamp)) {
88
- throw new Error(`Invalid ${label}: must be a finite epoch timestamp.`);
89
- }
90
- return new Date(rawTimestamp).toISOString();
91
- }
92
-
93
- if (typeof rawTimestamp === 'string') {
94
- const trimmed = rawTimestamp.trim();
95
- if (trimmed.length === 0) {
96
- return null;
97
- }
98
- const numericCandidate = Number(trimmed);
99
- if (Number.isFinite(numericCandidate)) {
100
- return new Date(numericCandidate).toISOString();
101
- }
102
- const parsed = new Date(trimmed);
103
- if (Number.isNaN(parsed.getTime())) {
104
- throw new Error(`Invalid ${label}: expected ISO-8601 string or epoch milliseconds.`);
105
- }
106
- return parsed.toISOString();
107
- }
108
-
109
- throw new Error(`Invalid ${label}: expected Date, ISO-8601 string, epoch milliseconds, or nullish.`);
110
- }
111
-
112
- function resolveReportTimestamp(rawTimestamp, label, fallbackTimestamp = null) {
113
- const parsed = parseReportTimestamp(rawTimestamp, label);
114
- return parsed ?? (fallbackTimestamp == null ? new Date().toISOString() : String(fallbackTimestamp));
115
- }
116
-
117
- function resolveRuntime(options) {
118
- if (options.runtime) return options.runtime;
119
- if (options.searchParams) return parseRuntimeOverridesFromURL(options.searchParams);
120
- if (typeof globalThis.location === 'undefined') return parseRuntimeOverridesFromURL(new URLSearchParams());
121
- return parseRuntimeOverridesFromURL();
122
- }
123
-
124
- function normalizePresetPath(value) {
125
- const trimmed = String(value || '').replace(/^[./]+/, '');
126
- if (!trimmed) return null;
127
- return trimmed.endsWith('.json') ? trimmed : `${trimmed}.json`;
128
- }
129
-
130
- function resolvePresetBaseUrl() {
131
- try {
132
- return new URL('../config/presets/runtime/', import.meta.url).toString().replace(/\/$/, '');
133
- } catch {
134
- if (typeof globalThis.location !== 'undefined' && globalThis.location?.href) {
135
- return new URL('/src/config/presets/runtime/', globalThis.location.href).toString().replace(/\/$/, '');
136
- }
137
- return '/src/config/presets/runtime';
138
- }
139
- }
140
-
141
- function cloneRuntimeConfig(runtimeConfig) {
142
- if (!runtimeConfig) return null;
143
- if (typeof structuredClone === 'function') {
144
- return structuredClone(runtimeConfig);
145
- }
146
- return JSON.parse(JSON.stringify(runtimeConfig));
147
- }
148
-
149
- function snapshotRuntimeState() {
150
- return {
151
- runtimeConfig: cloneRuntimeConfig(getRuntimeConfig()),
152
- activeKernelPath: getActiveKernelPath(),
153
- activeKernelPathSource: getActiveKernelPathSource(),
154
- activeKernelPathPolicy: getActiveKernelPathPolicy(),
155
- };
156
- }
157
-
158
- function restoreRuntimeState(snapshot) {
159
- if (!snapshot) {
160
- return;
161
- }
162
- setRuntimeConfig(snapshot.runtimeConfig);
163
- setActiveKernelPath(
164
- snapshot.activeKernelPath,
165
- snapshot.activeKernelPathSource || 'none',
166
- snapshot.activeKernelPathPolicy ?? null
167
- );
168
- }
169
-
170
- async function runWithRuntimeIsolationForSuite(run) {
171
- const snapshot = snapshotRuntimeState();
172
- try {
173
- return await run();
174
- } finally {
175
- restoreRuntimeState(snapshot);
176
- }
177
- }
178
-
179
- function resolveRuntimeFromConfig(config) {
180
- if (!config || typeof config !== 'object') return null;
181
- if (config.runtime && typeof config.runtime === 'object') return config.runtime;
182
- if (config.shared || config.loading || config.inference || config.emulation) return config;
183
- return null;
184
- }
185
-
186
- function sanitizeReportOutput(output) {
187
- if (output == null) return null;
188
- if (typeof output !== 'object') return output;
189
- if (ArrayBuffer.isView(output)) {
190
- return {
191
- type: output.constructor?.name || 'TypedArray',
192
- length: Number.isFinite(output.length) ? output.length : null,
193
- };
194
- }
195
- if (
196
- Number.isFinite(output?.width)
197
- && Number.isFinite(output?.height)
198
- && ArrayBuffer.isView(output?.pixels)
199
- ) {
200
- const { pixels, ...rest } = output;
201
- return {
202
- ...rest,
203
- width: output.width,
204
- height: output.height,
205
- pixels: {
206
- type: pixels.constructor?.name || 'TypedArray',
207
- length: Number.isFinite(pixels.length) ? pixels.length : null,
208
- },
209
- };
210
- }
211
- return output;
212
- }
213
-
214
- function normalizeExtends(value) {
215
- if (Array.isArray(value)) {
216
- return value.map((entry) => String(entry || '').trim()).filter(Boolean);
217
- }
218
- if (typeof value === 'string') {
219
- const trimmed = value.trim();
220
- return trimmed ? [trimmed] : [];
221
- }
222
- return [];
223
- }
224
-
225
- function normalizeExtendsPath(value) {
226
- const trimmed = String(value || '').trim();
227
- if (!trimmed) return null;
228
- return trimmed.endsWith('.json') ? trimmed : `${trimmed}.json`;
229
- }
230
-
231
- function resolveAbsoluteUrl(target, base) {
232
- try {
233
- if (base) {
234
- return new URL(target, base).toString();
235
- }
236
- if (typeof globalThis.location !== 'undefined' && globalThis.location?.href) {
237
- return new URL(target, globalThis.location.href).toString();
238
- }
239
- return new URL(target, import.meta.url).toString();
240
- } catch {
241
- return target;
242
- }
243
- }
244
-
245
- function isAbsoluteUrl(value) {
246
- return /^[a-zA-Z][a-zA-Z0-9+.-]*:/.test(value);
247
- }
248
-
249
- function joinUrl(base, path) {
250
- if (!base) return path;
251
- if (isAbsoluteUrl(base)) {
252
- return new URL(path, base.endsWith('/') ? base : `${base}/`).toString();
253
- }
254
- const normalizedBase = base.replace(/\/$/, '');
255
- const normalizedPath = path.replace(/^\//, '');
256
- return `${normalizedBase}/${normalizedPath}`;
257
- }
258
-
259
- function resolveExtendCandidates(ref, context) {
260
- const normalized = normalizeExtendsPath(ref);
261
- if (!normalized) return [];
262
- if (isAbsoluteUrl(normalized) || normalized.startsWith('/')) {
263
- return [normalized];
264
- }
265
- if (normalized.startsWith('./') || normalized.startsWith('../')) {
266
- return [resolveAbsoluteUrl(normalized, context.sourceUrl)];
267
- }
268
- if (normalized.includes('/')) {
269
- return [joinUrl(context.presetBaseUrl, normalized)];
270
- }
271
- const candidates = [];
272
- if (context.presetBaseUrl) {
273
- candidates.push(joinUrl(context.presetBaseUrl, normalized));
274
- candidates.push(joinUrl(context.presetBaseUrl, `modes/${normalized}`));
275
- }
276
- if (context.sourceUrl) {
277
- const sourceDir = resolveAbsoluteUrl('./', context.sourceUrl);
278
- candidates.push(resolveAbsoluteUrl(normalized, sourceDir));
279
- }
280
- return [...new Set(candidates)];
281
- }
282
-
283
- async function fetchRuntimeConfig(url, options = {}) {
284
- const response = await fetch(url, { signal: options.signal });
285
- if (!response.ok) {
286
- const error = new Error(`Failed to load runtime config: ${response.status}`);
287
- error.code = response.status === 404 ? 'runtime_config_not_found' : 'runtime_config_fetch_failed';
288
- throw error;
289
- }
290
- return response.json();
291
- }
292
-
293
- async function resolveRuntimeConfigExtends(config, context) {
294
- const runtime = resolveRuntimeFromConfig(config);
295
- if (!runtime) {
296
- throw new Error('Runtime config is missing runtime fields');
297
- }
298
-
299
- const extendsRefs = normalizeExtends(config.extends);
300
- let mergedRuntime = null;
301
- let mergedConfig = null;
302
-
303
- for (const ref of extendsRefs) {
304
- const base = await loadRuntimeConfigFromRef(ref, context);
305
- mergedRuntime = mergedRuntime ? mergeRuntimeValues(mergedRuntime, base.runtime) : base.runtime;
306
- mergedConfig = mergedConfig ? mergeRuntimeValues(mergedConfig, base.config) : base.config;
307
- }
308
-
309
- const combinedRuntime = mergedRuntime ? mergeRuntimeValues(mergedRuntime, runtime) : runtime;
310
- const combinedConfig = mergedConfig ? mergeRuntimeValues(mergedConfig, config) : { ...config };
311
- const resolved = { ...combinedConfig, runtime: combinedRuntime };
312
- if (resolved.extends !== undefined) {
313
- delete resolved.extends;
314
- }
315
- return { config: resolved, runtime: combinedRuntime };
316
- }
317
-
318
- async function loadRuntimeConfigChain(url, options = {}, stack = []) {
319
- const presetBaseUrl = options.presetBaseUrl || options.baseUrl || resolvePresetBaseUrl();
320
- const resolvedUrl = resolveAbsoluteUrl(url);
321
- if (stack.includes(resolvedUrl)) {
322
- throw new Error(`Runtime config extends cycle: ${[...stack, resolvedUrl].join(' -> ')}`);
323
- }
324
- const config = await fetchRuntimeConfig(resolvedUrl, options);
325
- return resolveRuntimeConfigExtends(config, {
326
- ...options,
327
- sourceUrl: resolvedUrl,
328
- presetBaseUrl,
329
- stack: [...stack, resolvedUrl],
330
- });
331
- }
332
-
333
- async function loadRuntimeConfigFromRef(ref, context) {
334
- const candidates = resolveExtendCandidates(ref, context);
335
- if (!candidates.length) {
336
- throw new Error(`Runtime config extends is invalid: ${ref}`);
337
- }
338
- let lastError = null;
339
- for (const candidate of candidates) {
340
- try {
341
- return await loadRuntimeConfigChain(candidate, context, context.stack ?? []);
342
- } catch (error) {
343
- if (error?.code === 'runtime_config_not_found') {
344
- lastError = error;
345
- continue;
346
- }
347
- throw error;
348
- }
349
- }
350
- if (lastError) {
351
- throw lastError;
352
- }
353
- throw new Error(`Runtime config extends not found: ${ref}`);
354
- }
355
-
356
- export async function loadRuntimeConfigFromUrl(url, options = {}) {
357
- if (!url) {
358
- throw new Error('runtime config url is required');
359
- }
360
- return loadRuntimeConfigChain(url, options);
361
- }
362
-
363
- export async function applyRuntimeConfigFromUrl(url, options = {}) {
364
- const { runtime } = await loadRuntimeConfigFromUrl(url, options);
365
- setRuntimeConfig(runtime);
366
- return runtime;
367
- }
368
-
369
- export async function loadRuntimePreset(presetId, options = {}) {
370
- const baseUrl = options.baseUrl || resolvePresetBaseUrl();
371
- const normalized = normalizePresetPath(presetId);
372
- if (!normalized) {
373
- throw new Error('runtime preset id is required');
374
- }
375
- const url = `${baseUrl.replace(/\/$/, '')}/${normalized}`;
376
- return loadRuntimeConfigFromUrl(url, { ...options, presetBaseUrl: baseUrl });
377
- }
378
-
379
- export async function applyRuntimePreset(presetId, options = {}) {
380
- const { runtime } = await loadRuntimePreset(presetId, options);
381
- setRuntimeConfig(runtime);
382
- return runtime;
383
- }
384
-
385
88
  export async function initializeBrowserHarness(options = {}) {
386
89
  const { modelUrl, onProgress, log } = options;
387
90
  if (!modelUrl) {
@@ -427,1308 +130,101 @@ export async function runBrowserHarness(options = {}) {
427
130
  const BROWSER_SUITE_SET = Object.freeze([
428
131
  'kernels',
429
132
  'inference',
430
- 'training',
431
- 'bench',
432
- 'debug',
433
- 'diffusion',
434
- 'energy',
435
- ]);
436
-
437
- const BROWSER_SUITE_DISPATCH_MAP = Object.freeze({
438
- kernels: 'runKernelSuite',
439
- inference: 'runInferenceSuite',
440
- training: 'runTrainingSuite',
441
- bench: 'runBenchSuite',
442
- debug: 'runInferenceSuite(debug)',
443
- diffusion: 'runDiffusionSuite',
444
- energy: 'runEnergySuite',
445
- });
446
-
447
- export function getBrowserSupportedSuites() {
448
- return [...BROWSER_SUITE_SET];
449
- }
450
-
451
- export function getBrowserSuiteDispatchMap() {
452
- return { ...BROWSER_SUITE_DISPATCH_MAP };
453
- }
454
-
455
- function createUnsupportedSuiteError(requestedSuite, context = {}) {
456
- const command = typeof context.command === 'string' && context.command.trim()
457
- ? context.command.trim()
458
- : 'run-browser-suite';
459
- const surface = typeof context.surface === 'string' && context.surface.trim()
460
- ? context.surface.trim()
461
- : 'browser';
462
- const allowedSuites = [...BROWSER_SUITE_SET];
463
- const error = new Error(
464
- `Unsupported suite "${requestedSuite}". Allowed suites: ${allowedSuites.join(', ')}. ` +
465
- `command="${command}" surface="${surface}".`
466
- );
467
- error.code = 'unsupported_suite';
468
- error.requestedSuite = requestedSuite;
469
- error.allowedSuites = allowedSuites;
470
- error.command = command;
471
- error.surface = surface;
472
- error.details = {
473
- requestedSuite,
474
- allowedSuites,
475
- command,
476
- surface,
477
- };
478
- return error;
479
- }
480
-
481
- function resolveSuiteContext(options = {}) {
482
- const command = typeof options.command === 'string' ? options.command : null;
483
- const surface = typeof options.surface === 'string' ? options.surface : null;
484
- return {
485
- command: command ?? 'run-browser-suite',
486
- surface: surface ?? 'browser',
487
- };
488
- }
489
-
490
- function normalizeSuite(value, context = {}) {
491
- const suite = String(value || '').trim().toLowerCase();
492
- if (!suite) {
493
- throw createUnsupportedSuiteError(suite, context);
494
- }
495
- const normalized = suite === 'benchmark' ? 'bench' : suite;
496
- if (!BROWSER_SUITE_SET.includes(normalized)) {
497
- throw createUnsupportedSuiteError(normalized, context);
498
- }
499
- return normalized;
500
- }
501
-
502
- export function buildSuiteSummary(suiteName, results, startTimeMs) {
503
- let passed = 0;
504
- let failed = 0;
505
- let skipped = 0;
506
- const safeResults = Array.isArray(results) ? results : [];
507
- for (const result of safeResults) {
508
- if (result.skipped) {
509
- skipped++;
510
- } else if (result.passed) {
511
- passed++;
512
- } else {
513
- failed++;
514
- }
515
- }
516
- const duration = Math.max(0, performance.now() - (Number.isFinite(startTimeMs) ? startTimeMs : performance.now()));
517
- return { suite: suiteName, passed, failed, skipped, duration, results: safeResults };
518
- }
519
-
520
- function normalizeCacheMode(value) {
521
- return value === 'cold' || value === 'warm' ? value : 'warm';
522
- }
523
-
524
- function normalizeLoadMode(value, hasModelUrl) {
525
- if (value === 'opfs' || value === 'http' || value === 'memory') {
526
- return value;
527
- }
528
- return hasModelUrl ? 'http' : 'opfs';
529
- }
530
-
531
- function isNodeRuntime() {
532
- return typeof process !== 'undefined' && !!process.versions?.node;
533
- }
534
-
535
- function normalizeWorkloadType(value) {
536
- const normalized = String(value || '').trim().toLowerCase();
537
- return normalized || null;
538
- }
539
-
540
- function safeStatsValue(value) {
541
- return Number.isFinite(value) ? Number(value) : 0;
542
- }
543
-
544
- function calculateRatePerSecond(count, durationMs) {
545
- const safeCount = safeStatsValue(count);
546
- const safeDurationMs = safeStatsValue(durationMs);
547
- if (safeCount <= 0 || safeDurationMs <= 0) return 0;
548
- return Number(((safeCount * 1000) / safeDurationMs).toFixed(2));
549
- }
550
-
551
- function buildDiffusionPerformanceArtifact({
552
- warmupRuns,
553
- timedRuns,
554
- width,
555
- height,
556
- steps,
557
- guidanceScale,
558
- avgPrefillTokens,
559
- avgDecodeTokens,
560
- cpuStats,
561
- gpuStats,
562
- }) {
563
- const cpuPrefillMs = safeStatsValue(cpuStats?.prefillMs?.median);
564
- const cpuDenoiseMs = safeStatsValue(cpuStats?.denoiseMs?.median);
565
- const cpuVaeMs = safeStatsValue(cpuStats?.vaeMs?.median);
566
- const cpuTotalMs = safeStatsValue(cpuStats?.totalMs?.median);
567
- const gpuPrefillMs = safeStatsValue(gpuStats?.prefillMs?.median);
568
- const gpuDenoiseMs = safeStatsValue(gpuStats?.denoiseMs?.median);
569
- const gpuVaeMs = safeStatsValue(gpuStats?.vaeMs?.median);
570
- const gpuTotalMs = safeStatsValue(gpuStats?.totalMs?.median);
571
- const decodeStepsPerSec = calculateRatePerSecond(steps, cpuDenoiseMs);
572
- const decodeTokensPerSec = calculateRatePerSecond(avgDecodeTokens, cpuDenoiseMs);
573
- const prefillTokensPerSec = calculateRatePerSecond(avgPrefillTokens, cpuPrefillMs);
574
-
575
- return {
576
- schemaVersion: 1,
577
- warmupRuns,
578
- timedRuns,
579
- shape: {
580
- width,
581
- height,
582
- },
583
- scheduler: {
584
- steps,
585
- guidanceScale,
586
- },
587
- cpu: {
588
- totalMs: cpuTotalMs,
589
- prefillMs: cpuPrefillMs,
590
- denoiseMs: cpuDenoiseMs,
591
- vaeMs: cpuVaeMs,
592
- },
593
- gpu: {
594
- available: gpuStats?.available === true,
595
- totalMs: gpuStats?.available === true ? gpuTotalMs : null,
596
- prefillMs: gpuStats?.available === true ? gpuPrefillMs : null,
597
- denoiseMs: gpuStats?.available === true ? gpuDenoiseMs : null,
598
- vaeMs: gpuStats?.available === true ? gpuVaeMs : null,
599
- },
600
- throughput: {
601
- prefillTokensPerSec,
602
- decodeTokensPerSec,
603
- decodeStepsPerSec,
604
- },
605
- tokens: {
606
- avgPrefillTokens: safeStatsValue(avgPrefillTokens),
607
- avgDecodeTokens: safeStatsValue(avgDecodeTokens),
608
- },
609
- };
610
- }
611
-
612
- function assertDiffusionPerformanceArtifact(metrics, contextLabel = 'diffusion') {
613
- const artifact = metrics?.performanceArtifact;
614
- if (!artifact || typeof artifact !== 'object') {
615
- throw new Error(`${contextLabel}: metrics.performanceArtifact is required.`);
616
- }
617
- if (artifact.schemaVersion !== 1) {
618
- throw new Error(`${contextLabel}: metrics.performanceArtifact.schemaVersion must be 1.`);
619
- }
620
- if (!Number.isInteger(artifact.warmupRuns) || artifact.warmupRuns < 0) {
621
- throw new Error(`${contextLabel}: metrics.performanceArtifact.warmupRuns must be a non-negative integer.`);
622
- }
623
- if (!Number.isInteger(artifact.timedRuns) || artifact.timedRuns < 1) {
624
- throw new Error(`${contextLabel}: metrics.performanceArtifact.timedRuns must be a positive integer.`);
625
- }
626
- if (!Number.isFinite(artifact?.cpu?.prefillMs)) {
627
- throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.prefillMs must be finite.`);
628
- }
629
- if (!Number.isFinite(artifact?.cpu?.denoiseMs)) {
630
- throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.denoiseMs must be finite.`);
631
- }
632
- if (!Number.isFinite(artifact?.cpu?.vaeMs)) {
633
- throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.vaeMs must be finite.`);
634
- }
635
- if (!Number.isFinite(artifact?.cpu?.totalMs)) {
636
- throw new Error(`${contextLabel}: metrics.performanceArtifact.cpu.totalMs must be finite.`);
637
- }
638
- if (!Number.isFinite(artifact?.throughput?.decodeStepsPerSec)) {
639
- throw new Error(`${contextLabel}: metrics.performanceArtifact.throughput.decodeStepsPerSec must be finite.`);
640
- }
641
- }
642
-
643
- function toTimingNumber(value, fallback = 0) {
644
- return formatMetricNumber(value, fallback, 2);
645
- }
646
-
647
- function safeToFixed(value, fallback = 0, digits = 2) {
648
- return formatMetricNumber(value, fallback, digits);
649
- }
650
-
651
- function sampleTimingNumber(stats, key, fallback = 0) {
652
- return formatMetricNumber(stats?.[key], fallback, 2);
653
- }
654
-
655
- function formatMetricNumber(value, fallback = 0, digits = 2) {
656
- const numericValue = Number(value);
657
- if (!Number.isFinite(numericValue)) return fallback;
658
- return Number(numericValue.toFixed(digits));
659
- }
660
-
661
- function buildCanonicalTiming(overrides = {}) {
662
- const cacheMode = normalizeCacheMode(overrides.cacheMode);
663
- const modelLoadMs = toTimingNumber(overrides.modelLoadMs, 0);
664
- const prefillMs = toTimingNumber(overrides.prefillMs, 0);
665
- const decodeMs = toTimingNumber(overrides.decodeMs, 0);
666
- const decodeMsPerTokenP50 = Number.isFinite(overrides.decodeMsPerTokenP50)
667
- ? toTimingNumber(overrides.decodeMsPerTokenP50)
668
- : null;
669
- const decodeMsPerTokenP95 = Number.isFinite(overrides.decodeMsPerTokenP95)
670
- ? toTimingNumber(overrides.decodeMsPerTokenP95)
671
- : null;
672
- const decodeMsPerTokenP99 = Number.isFinite(overrides.decodeMsPerTokenP99)
673
- ? toTimingNumber(overrides.decodeMsPerTokenP99)
674
- : null;
675
- const decodeTokensPerSec = Number.isFinite(overrides.decodeTokensPerSec)
676
- ? toTimingNumber(overrides.decodeTokensPerSec)
677
- : null;
678
- const prefillTokensPerSec = Number.isFinite(overrides.prefillTokensPerSec)
679
- ? toTimingNumber(overrides.prefillTokensPerSec)
680
- : null;
681
- const totalRunMs = toTimingNumber(
682
- overrides.totalRunMs,
683
- toTimingNumber(prefillMs + decodeMs)
684
- );
685
- const firstTokenMs = Number.isFinite(overrides.firstTokenMs)
686
- ? toTimingNumber(overrides.firstTokenMs)
687
- : null;
688
- const firstResponseMs = Number.isFinite(overrides.firstResponseMs)
689
- ? toTimingNumber(overrides.firstResponseMs)
690
- : toTimingNumber(modelLoadMs + totalRunMs);
691
-
692
- return {
693
- modelLoadMs,
694
- firstTokenMs,
695
- firstResponseMs,
696
- prefillMs,
697
- decodeMs,
698
- decodeMsPerTokenP50,
699
- decodeMsPerTokenP95,
700
- decodeMsPerTokenP99,
701
- decodeTokensPerSec,
702
- prefillTokensPerSec,
703
- totalRunMs,
704
- cacheMode,
705
- loadMode: overrides.loadMode,
706
- };
707
- }
708
-
709
- function buildTimingDiagnostics(timing = {}, options = {}) {
710
- const prefillSemantics = String(options.prefillSemantics || 'internal_prefill_phase');
711
- const source = String(options.source || 'doppler');
712
- const modelLoadMs = Number.isFinite(timing.modelLoadMs) ? toTimingNumber(timing.modelLoadMs) : null;
713
- const firstTokenMs = Number.isFinite(timing.firstTokenMs) ? toTimingNumber(timing.firstTokenMs) : null;
714
- const firstResponseMs = Number.isFinite(timing.firstResponseMs) ? toTimingNumber(timing.firstResponseMs) : null;
715
- const prefillMs = Number.isFinite(timing.prefillMs) ? toTimingNumber(timing.prefillMs) : null;
716
- const decodeMs = Number.isFinite(timing.decodeMs) ? toTimingNumber(timing.decodeMs) : null;
717
- const totalRunMs = Number.isFinite(timing.totalRunMs) ? toTimingNumber(timing.totalRunMs) : null;
718
-
719
- const firstResponseFromLoadAndFirstTokenMs = (
720
- Number.isFinite(modelLoadMs) && Number.isFinite(firstTokenMs)
721
- )
722
- ? toTimingNumber(modelLoadMs + firstTokenMs)
723
- : null;
724
- const runFromPrefillAndDecodeMs = (
725
- Number.isFinite(prefillMs) && Number.isFinite(decodeMs)
726
- )
727
- ? toTimingNumber(prefillMs + decodeMs)
728
- : null;
729
-
730
- const firstResponseResidualMs = (
731
- Number.isFinite(firstResponseMs) && Number.isFinite(firstResponseFromLoadAndFirstTokenMs)
732
- )
733
- ? toTimingNumber(firstResponseMs - firstResponseFromLoadAndFirstTokenMs)
734
- : null;
735
- const runResidualMs = (
736
- Number.isFinite(totalRunMs) && Number.isFinite(runFromPrefillAndDecodeMs)
737
- )
738
- ? toTimingNumber(totalRunMs - runFromPrefillAndDecodeMs)
739
- : null;
740
-
741
- return {
742
- schemaVersion: 1,
743
- source,
744
- semantics: {
745
- modelLoadMs: 'model initialization/load before generation',
746
- firstTokenMs: 'ttft from generation start',
747
- firstResponseMs: 'modelLoadMs + firstTokenMs',
748
- prefillMs: prefillSemantics,
749
- decodeMs: 'time after first token',
750
- totalRunMs: 'prefillMs + decodeMs',
751
- },
752
- componentsMs: {
753
- modelLoadMs,
754
- firstTokenMs,
755
- firstResponseMs,
756
- prefillMs,
757
- decodeMs,
758
- totalRunMs,
759
- },
760
- sumsMs: {
761
- firstResponseFromLoadAndFirstTokenMs,
762
- runFromPrefillAndDecodeMs,
763
- },
764
- residualsMs: {
765
- firstResponseResidualMs,
766
- runResidualMs,
767
- },
768
- consistent: {
769
- firstResponse: Number.isFinite(firstResponseResidualMs) ? Math.abs(firstResponseResidualMs) <= 2 : null,
770
- totalRun: Number.isFinite(runResidualMs) ? Math.abs(runResidualMs) <= 2 : null,
771
- },
772
- };
773
- }
774
-
775
- function resolveDeviceInfo() {
776
- try {
777
- return getKernelCapabilities();
778
- } catch {
779
- return null;
780
- }
781
- }
782
-
783
- async function resolveKernelPathForModel(options = {}) {
784
- const runtimeConfig = options.runtime?.runtimeConfig ?? getRuntimeConfig();
785
- let manifest = null;
786
- let manifestModelId = options.modelId || null;
787
-
788
- if (options.modelId) {
789
- await openModelStore(options.modelId);
790
- const manifestText = await loadManifestFromStore();
791
- if (manifestText) {
792
- manifest = parseManifest(manifestText);
793
- manifestModelId = manifest.modelId ?? options.modelId;
794
- }
795
- }
796
-
797
- if (!manifest) return null;
798
-
799
- const modelConfig = parseModelConfigFromManifest(manifest, runtimeConfig);
800
- const kernelPathState = resolveKernelPathState({
801
- manifest,
802
- runtimeConfig,
803
- modelConfig,
804
- });
805
- activateKernelPathState(kernelPathState);
806
- return {
807
- modelId: manifestModelId,
808
- kernelPath: kernelPathState.resolvedKernelPath,
809
- source: kernelPathState.kernelPathSource,
810
- };
811
- }
812
-
813
- async function initializeInferenceFromStorage(modelId, options = {}) {
814
- const { onProgress } = options;
815
- if (!modelId) {
816
- throw new Error('modelId is required');
817
- }
818
-
819
- if (options.runtime?.runtimeConfig) {
820
- setRuntimeConfig(options.runtime.runtimeConfig);
821
- }
822
-
823
- onProgress?.('storage', 0.05, 'Opening model store...');
824
- await openModelStore(modelId);
825
-
826
- onProgress?.('manifest', 0.1, 'Loading manifest...');
827
- const manifestText = await loadManifestFromStore();
828
- if (!manifestText) {
829
- throw new Error('Manifest not found in storage');
830
- }
831
- const manifest = parseManifest(manifestText);
832
-
833
- onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
834
- await initDevice();
835
- const device = getDevice();
836
- const capabilities = getKernelCapabilities();
837
-
838
- onProgress?.('pipeline', 0.3, 'Creating pipeline...');
839
- const pipeline = await createPipeline(manifest, {
840
- gpu: { device },
841
- runtime: options.runtime,
842
- onProgress,
843
- });
844
-
845
- return { pipeline, manifest, capabilities };
846
- }
847
-
848
- async function initializeInferenceFromSourcePath(sourcePath, options = {}) {
849
- const { onProgress } = options;
850
- if (!sourcePath || typeof sourcePath !== 'string') {
851
- throw new Error('modelUrl is required for loadMode=memory.');
852
- }
853
- if (!isNodeRuntime()) {
854
- throw new Error('loadMode=memory source runtime is currently supported on Node only.');
855
- }
856
- if (/^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//.test(sourcePath)) {
857
- throw new Error(
858
- 'loadMode=memory expects a local filesystem path (Safetensors directory or .gguf file), not an URL.'
859
- );
860
- }
861
-
862
- if (options.runtime?.runtimeConfig) {
863
- setRuntimeConfig(options.runtime.runtimeConfig);
864
- }
865
-
866
- onProgress?.('source', 0.05, 'Preparing source runtime bundle...');
867
- const { resolveNodeSourceRuntimeBundle } = await import(NODE_SOURCE_RUNTIME_MODULE_PATH);
868
- const sourceBundle = await resolveNodeSourceRuntimeBundle({
869
- inputPath: sourcePath,
870
- modelId: options.modelId || null,
871
- });
872
- if (!sourceBundle) {
873
- throw new Error(
874
- `No source-runtime model detected at "${sourcePath}". ` +
875
- 'Expected a Safetensors directory or a .gguf file path.'
876
- );
877
- }
878
-
879
- onProgress?.('gpu', 0.2, 'Initializing WebGPU...');
880
- await initDevice();
881
- const device = getDevice();
882
- const capabilities = getKernelCapabilities();
883
-
884
- onProgress?.('pipeline', 0.3, 'Creating pipeline...');
885
- const pipeline = await createPipeline(sourceBundle.manifest, {
886
- gpu: { device },
887
- runtime: options.runtime,
888
- storage: sourceBundle.storageContext,
889
- onProgress,
890
- });
891
-
892
- return {
893
- pipeline,
894
- manifest: sourceBundle.manifest,
895
- capabilities,
896
- };
897
- }
898
-
899
- async function resolveHarnessOverride(options = {}) {
900
- const input = typeof options.harnessOverride === 'function'
901
- ? await options.harnessOverride(options)
902
- : options.harnessOverride;
903
-
904
- if (!input || typeof input !== 'object') {
905
- throw new Error('harnessOverride must resolve to an object.');
906
- }
907
-
908
- if (!input.pipeline || typeof input.pipeline.generate !== 'function') {
909
- throw new Error('harnessOverride.pipeline.generate(request) is required.');
910
- }
911
-
912
- const manifest = input.manifest && typeof input.manifest === 'object'
913
- ? input.manifest
914
- : {
915
- modelId: options.modelId || 'diffusion-harness-override',
916
- modelType: 'diffusion',
917
- };
918
-
919
- const modelLoadMs = Number.isFinite(input.modelLoadMs)
920
- ? Math.max(0, input.modelLoadMs)
921
- : 0;
922
-
923
- return {
924
- ...input,
925
- manifest,
926
- modelLoadMs,
927
- };
928
- }
929
-
930
- async function initializeSuiteModel(options = {}) {
931
- if (options.harnessOverride) {
932
- if (options.runtime?.runtimeConfig) {
933
- setRuntimeConfig(options.runtime.runtimeConfig);
934
- }
935
- return resolveHarnessOverride(options);
936
- }
937
- const loadStart = performance.now();
938
- const runtime = resolveRuntime(options);
939
- const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
940
- let harness;
941
- if (loadMode === 'memory') {
942
- if (!options.modelUrl) {
943
- throw new Error('loadMode=memory requires modelUrl to be a local model path.');
944
- }
945
- harness = await initializeInferenceFromSourcePath(options.modelUrl, { ...options, runtime });
946
- } else if (options.modelId && !options.modelUrl) {
947
- harness = await initializeInferenceFromStorage(options.modelId, { ...options, runtime });
948
- } else {
949
- if (!options.modelUrl) {
950
- throw new Error('modelUrl is required for this suite');
951
- }
952
- harness = await initializeInference(options.modelUrl, {
953
- runtime,
954
- onProgress: options.onProgress,
955
- log: options.log,
956
- });
957
- }
958
- const modelLoadMs = Math.max(0, performance.now() - loadStart);
959
- return { ...harness, modelLoadMs };
960
- }
961
-
962
- async function runKernelSuite(options = {}) {
963
- const startTime = performance.now();
964
- const { testHarness, initGPU } = await import('../../tests/kernels/browser/test-page.js');
965
- const { runKernelSuite: runAllKernelTests } = await import('../../tests/kernels/browser/kernel-suite.js');
966
- await initGPU();
967
-
968
- const previousKernelPath = getActiveKernelPath();
969
- const previousKernelSource = getActiveKernelPathSource();
970
- const previousKernelPathPolicy = getActiveKernelPathPolicy();
971
- if (options.modelId) {
972
- await resolveKernelPathForModel(options);
973
- }
974
- let results = [];
975
- try {
976
- results = await runAllKernelTests(testHarness);
977
- } finally {
978
- setActiveKernelPath(previousKernelPath, previousKernelSource, previousKernelPathPolicy);
979
- }
980
-
981
- const summary = buildSuiteSummary('kernels', results, startTime);
982
- return {
983
- ...summary,
984
- deviceInfo: resolveDeviceInfo(),
985
- };
986
- }
987
-
988
-
989
-
990
-
991
-
992
- const DEFAULT_HARNESS_PROMPT = 'Summarize this input in one sentence.';
993
- const DEFAULT_RUNTIME_PLACEHOLDER_PROMPT = 'Hello from Doppler.';
994
- const DEFAULT_QWEN_PROMPT = Object.freeze({
995
- messages: Object.freeze([
996
- Object.freeze({
997
- role: 'user',
998
- content: 'Answer in one short sentence: What color is the sky on a clear day?',
999
- }),
1000
- ]),
1001
- });
1002
- const DEFAULT_TRANSLATEGEMMA_PROMPT = Object.freeze({
1003
- messages: Object.freeze([
1004
- Object.freeze({
1005
- role: 'user',
1006
- content: Object.freeze([
1007
- Object.freeze({
1008
- type: 'text',
1009
- source_lang_code: 'en',
1010
- target_lang_code: 'fr',
1011
- text: 'Hello world.',
1012
- }),
1013
- ]),
1014
- }),
1015
- ]),
1016
- });
1017
- const DEFAULT_HARNESS_MAX_TOKENS = 32;
1018
- const EMBEDDING_PREVIEW_LENGTH = 16;
1019
- const EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1 = 0.67;
1020
- const EMBEDDING_SEMANTIC_MIN_PAIR_ACC = 0.67;
1021
- const EMBEDDING_SEMANTIC_PAIR_MARGIN = 0.01;
1022
-
1023
- const EMBEDDING_SEMANTIC_RETRIEVAL_CASES = Object.freeze([
1024
- Object.freeze({
1025
- id: 'library_search',
1026
- query: 'Where can I borrow books and study quietly?',
1027
- docs: Object.freeze([
1028
- 'The city library lends books, provides study rooms, and offers free Wi-Fi.',
1029
- 'The cafe serves coffee, pastries, and sandwiches all day.',
1030
- 'The bike repair shop fixes flat tires and broken chains.',
1031
- ]),
1032
- expectedDoc: 0,
1033
- }),
1034
- Object.freeze({
1035
- id: 'password_reset',
1036
- query: 'How do I reset my account password?',
1037
- docs: Object.freeze([
1038
- 'To reset your password, open account settings and choose the forgot-password flow.',
1039
- 'Our shipping policy explains delivery timelines and tracking updates.',
1040
- 'The recipe combines tomatoes, basil, and olive oil.',
1041
- ]),
1042
- expectedDoc: 0,
1043
- }),
1044
- Object.freeze({
1045
- id: 'damaged_package',
1046
- query: 'What should I do if my package arrives damaged?',
1047
- docs: Object.freeze([
1048
- 'Contact support within seven days with photos to request a replacement for damaged items.',
1049
- 'The concert starts at 8 PM at the downtown arena.',
1050
- 'Plant roses in spring and water them twice a week.',
1051
- ]),
1052
- expectedDoc: 0,
1053
- }),
1054
- Object.freeze({
1055
- id: 'flight_change_policy',
1056
- query: 'Can I change my flight after booking?',
1057
- docs: Object.freeze([
1058
- 'The museum opens daily at 10 AM and offers guided tours on weekends.',
1059
- 'You can change your flight in Manage Booking up to 24 hours before departure, with any fare difference applied.',
1060
- 'Our gym membership includes group classes and access to the pool.',
1061
- ]),
1062
- expectedDoc: 1,
1063
- }),
1064
- Object.freeze({
1065
- id: 'wifi_troubleshoot',
1066
- query: 'Why does my home Wi-Fi keep disconnecting?',
1067
- docs: Object.freeze([
1068
- 'The dessert menu includes cheesecake, brownies, and fruit tart.',
1069
- 'You can review your recent orders in your account purchase history.',
1070
- 'Frequent Wi-Fi drops can be fixed by restarting the router, updating firmware, and changing the wireless channel.',
1071
- ]),
1072
- expectedDoc: 2,
1073
- }),
1074
- Object.freeze({
1075
- id: 'refund_deadline',
1076
- query: 'How long do I have to request a refund?',
1077
- docs: Object.freeze([
1078
- 'Refund requests are accepted within 30 days of purchase when the item is in original condition.',
1079
- 'The conference keynote starts at 9 AM in the main hall.',
1080
- 'Use a medium grind when brewing coffee with a drip machine.',
1081
- ]),
1082
- expectedDoc: 0,
1083
- }),
1084
- Object.freeze({
1085
- id: 'passport_renewal_docs',
1086
- query: 'What documents do I need to renew a passport?',
1087
- docs: Object.freeze([
1088
- 'To care for houseplants, water only when the top soil is dry.',
1089
- 'Passport renewal usually requires the application form, current passport, compliant photo, and payment.',
1090
- 'The train to downtown runs every 20 minutes during peak hours.',
1091
- ]),
1092
- expectedDoc: 1,
1093
- }),
1094
- ]);
1095
-
1096
- const EMBEDDING_SEMANTIC_PAIR_CASES = Object.freeze([
1097
- Object.freeze({
1098
- id: 'bike_paraphrase',
1099
- anchor: 'The child is riding a bicycle through the park.',
1100
- positive: 'A kid bikes along a path in the park.',
1101
- negative: 'The stock market closed lower after interest-rate news.',
1102
- }),
1103
- Object.freeze({
1104
- id: 'cancel_subscription',
1105
- anchor: 'Please cancel my subscription before renewal.',
1106
- positive: 'I want to stop the plan so it does not renew.',
1107
- negative: 'The mountain trail is closed after heavy snow.',
1108
- }),
1109
- Object.freeze({
1110
- id: 'battery_drain',
1111
- anchor: 'The laptop battery drains very quickly.',
1112
- positive: 'My notebook loses charge fast.',
1113
- negative: 'This pasta sauce tastes sweet and spicy.',
1114
- }),
1115
- Object.freeze({
1116
- id: 'order_tracking',
1117
- anchor: 'I need to track where my order is.',
1118
- positive: 'How can I check my package delivery status?',
1119
- negative: 'The violin concerto was composed in the 1800s.',
1120
- }),
1121
- Object.freeze({
1122
- id: 'account_lockout',
1123
- anchor: 'My account is locked after too many login attempts.',
1124
- positive: 'I cannot sign in because the system temporarily blocked my account.',
1125
- negative: 'Bake the cake at 350 degrees for thirty minutes.',
1126
- }),
1127
- Object.freeze({
1128
- id: 'invoice_request',
1129
- anchor: 'Please send me the invoice for last month.',
1130
- positive: 'Can you provide the billing statement for the previous month?',
1131
- negative: 'The hiking trail follows the river for five miles.',
1132
- }),
1133
- Object.freeze({
1134
- id: 'slow_internet',
1135
- anchor: 'The internet speed is much slower tonight.',
1136
- positive: 'My connection is unusually slow this evening.',
1137
- negative: 'The novel explores themes of memory and loss.',
1138
- }),
1139
- ]);
1140
-
1141
- function asText(value) {
1142
- if (typeof value !== 'string') return null;
1143
- const trimmed = value.trim();
1144
- return trimmed || null;
1145
- }
1146
-
1147
- function normalizeRetrievalFixtures(cases) {
1148
- if (!Array.isArray(cases)) return null;
1149
- const normalized = [];
1150
- for (let i = 0; i < cases.length; i++) {
1151
- const entry = cases[i];
1152
- if (!entry || typeof entry !== 'object') continue;
1153
-
1154
- const query = asText(entry.query);
1155
- const docs = Array.isArray(entry.docs) ? entry.docs.map(asText).filter(Boolean) : [];
1156
- if (!query || docs.length === 0 || !Number.isFinite(entry.expectedDoc)) {
1157
- continue;
1158
- }
1159
- const expectedDoc = Math.floor(entry.expectedDoc);
1160
- normalized.push({
1161
- id: asText(entry.id) ?? `case-${i + 1}`,
1162
- query,
1163
- docs,
1164
- expectedDoc: Math.max(0, Math.min(expectedDoc, docs.length - 1)),
1165
- });
1166
- }
1167
- return normalized.length > 0 ? normalized : null;
1168
- }
1169
-
1170
- function normalizePairFixtures(cases) {
1171
- if (!Array.isArray(cases)) return null;
1172
- const normalized = [];
1173
- for (let i = 0; i < cases.length; i++) {
1174
- const entry = cases[i];
1175
- if (!entry || typeof entry !== 'object') continue;
1176
-
1177
- const anchor = asText(entry.anchor);
1178
- const positive = asText(entry.positive);
1179
- const negative = asText(entry.negative);
1180
- if (!anchor || !positive || !negative) {
1181
- continue;
1182
- }
1183
- normalized.push({
1184
- id: asText(entry.id) ?? `pair-${i + 1}`,
1185
- anchor,
1186
- positive,
1187
- negative,
1188
- });
1189
- }
1190
- return normalized.length > 0 ? normalized : null;
1191
- }
1192
-
1193
- function resolveEmbeddingSemanticFixtures(runtimeConfig, options = null) {
1194
- const overrides = isPlainObject(options?.embeddingSemantic)
1195
- ? options.embeddingSemantic
1196
- : null;
1197
- const runtimeOverrides = runtimeConfig?.shared?.benchmark?.run?.embeddingSemantic;
1198
- const source = overrides ?? (isPlainObject(runtimeOverrides) ? runtimeOverrides : null);
1199
-
1200
- const retrievalCases = normalizeRetrievalFixtures(source?.retrievalCases)
1201
- ?? EMBEDDING_SEMANTIC_RETRIEVAL_CASES;
1202
- const pairCases = normalizePairFixtures(source?.pairCases)
1203
- ?? EMBEDDING_SEMANTIC_PAIR_CASES;
1204
- const minRetrievalTop1Acc = Number.isFinite(source?.minRetrievalTop1Acc)
1205
- ? Math.max(0, Math.min(1, Number(source.minRetrievalTop1Acc)))
1206
- : EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1;
1207
- const minPairAcc = Number.isFinite(source?.minPairAcc)
1208
- ? Math.max(0, Math.min(1, Number(source.minPairAcc)))
1209
- : EMBEDDING_SEMANTIC_MIN_PAIR_ACC;
1210
- const pairMargin = Number.isFinite(source?.pairMargin)
1211
- ? Number(source.pairMargin)
1212
- : EMBEDDING_SEMANTIC_PAIR_MARGIN;
1213
-
1214
- return {
1215
- retrievalCases,
1216
- pairCases,
1217
- minRetrievalTop1Acc,
1218
- minPairAcc,
1219
- pairMargin,
1220
- };
1221
- }
1222
-
1223
- function resolveEmbeddingSemanticStyle(pipeline) {
1224
- const manifest = pipeline?.manifest ?? null;
1225
- const style = selectRuleValue('inference', 'config', 'embeddingSemanticStyle', {
1226
- modelId: String(manifest?.modelId ?? '').toLowerCase(),
1227
- presetId: String(manifest?.inference?.presetId ?? '').toLowerCase(),
1228
- manifestModelType: String(
1229
- manifest?.config?.model_type
1230
- ?? manifest?.config?.text_config?.model_type
1231
- ?? ''
1232
- ).toLowerCase(),
1233
- });
1234
- if (typeof style === 'string' && style.length > 0) {
1235
- return style;
1236
- }
1237
- return 'default';
1238
- }
1239
-
1240
- function formatEmbeddingSemanticText(text, kind, style) {
1241
- if (style === 'embeddinggemma') {
1242
- if (kind === 'query') {
1243
- return `task: search result | query: ${text}`;
1244
- }
1245
- if (kind === 'document') {
1246
- return `title: None | text: ${text}`;
1247
- }
1248
- }
1249
- return text;
1250
- }
1251
-
1252
- function resolvePrompt(runtimeConfig) {
1253
- const runtimePrompt = runtimeConfig?.inference?.prompt;
1254
- if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
1255
- return runtimePrompt.trim();
1256
- }
1257
- return DEFAULT_HARNESS_PROMPT;
1258
- }
1259
-
1260
- function isStructuredPromptInput(value) {
1261
- return Array.isArray(value) || (value != null && typeof value === 'object');
1262
- }
1263
-
1264
- function clonePromptInput(promptInput) {
1265
- if (!isStructuredPromptInput(promptInput)) {
1266
- return promptInput;
1267
- }
1268
- if (typeof structuredClone === 'function') {
1269
- return structuredClone(promptInput);
1270
- }
1271
- return JSON.parse(JSON.stringify(promptInput));
1272
- }
1273
-
1274
- function resolvePromptTemplateType(source) {
1275
- const sourceTemplateType = asText(source?.chatTemplateType);
1276
- if (sourceTemplateType) {
1277
- return sourceTemplateType;
1278
- }
1279
- const modelConfigTemplateType = asText(source?.modelConfig?.chatTemplateType);
1280
- if (modelConfigTemplateType) {
1281
- return modelConfigTemplateType;
1282
- }
1283
- return asText(source?.manifest?.inference?.chatTemplate?.type);
1284
- }
1285
-
1286
- function buildDefaultGenerationPrompt(templateType) {
1287
- if (templateType === 'qwen') {
1288
- return clonePromptInput(DEFAULT_QWEN_PROMPT);
1289
- }
1290
- if (templateType === 'translategemma') {
1291
- return clonePromptInput(DEFAULT_TRANSLATEGEMMA_PROMPT);
1292
- }
1293
- return DEFAULT_HARNESS_PROMPT;
1294
- }
1295
-
1296
- function shouldPreferModelDefaultPrompt(runtimePrompt, templateType) {
1297
- if (templateType !== 'translategemma' && templateType !== 'qwen') {
1298
- return false;
1299
- }
1300
- if (typeof runtimePrompt !== 'string') {
1301
- return false;
1302
- }
1303
- return runtimePrompt.trim() === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT;
1304
- }
1305
-
1306
- function assertPromptContract(runtimePrompt, templateType, source = 'runtime.inference.prompt') {
1307
- if (templateType !== 'translategemma') {
1308
- return;
1309
- }
1310
- if (runtimePrompt === undefined || runtimePrompt === null) {
1311
- return;
1312
- }
1313
- if (typeof runtimePrompt === 'string') {
1314
- const trimmed = runtimePrompt.trim();
1315
- if (!trimmed || trimmed === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT) {
1316
- return;
1317
- }
1318
- throw new Error(
1319
- `TranslateGemma harness prompt contract violation: ${source} must be ` +
1320
- '{ messages: [...] } with source_lang_code/target_lang_code blocks, not a plain string.'
1321
- );
1322
- }
1323
- if (!isStructuredPromptInput(runtimePrompt)) {
1324
- throw new Error(
1325
- `TranslateGemma harness prompt contract violation: ${source} must be ` +
1326
- '{ messages: [...] } with source_lang_code/target_lang_code blocks.'
1327
- );
1328
- }
1329
- }
1330
-
1331
- function describePromptInput(promptInput) {
1332
- if (typeof promptInput === 'string') {
1333
- return promptInput.trim() || DEFAULT_HARNESS_PROMPT;
1334
- }
1335
- const firstMessage = Array.isArray(promptInput?.messages)
1336
- ? promptInput.messages[0]
1337
- : null;
1338
- const firstContent = Array.isArray(firstMessage?.content)
1339
- ? firstMessage.content[0]
1340
- : null;
1341
- const sourceLang = asText(firstContent?.source_lang_code);
1342
- const targetLang = asText(firstContent?.target_lang_code);
1343
- const text = asText(firstContent?.text);
1344
- if (sourceLang && targetLang) {
1345
- return `${sourceLang} -> ${targetLang}: ${text || '[non-text request]'}`;
1346
- }
1347
- const stringContent = asText(firstMessage?.content);
1348
- if (stringContent) {
1349
- const role = asText(firstMessage?.role) || 'user';
1350
- return `${role}: ${stringContent}`;
1351
- }
1352
- try {
1353
- return JSON.stringify(promptInput);
1354
- } catch {
1355
- return '[structured prompt]';
1356
- }
1357
- }
1358
-
1359
- function resolveGenerationPromptInput(runtimeConfig, runOverrides = null, source = null) {
1360
- const templateType = resolvePromptTemplateType(source);
1361
- const overridePrompt = runOverrides?.prompt;
1362
- assertPromptContract(overridePrompt, templateType, 'runOverrides.prompt');
1363
- if (typeof overridePrompt === 'string' && overridePrompt.trim()) {
1364
- return overridePrompt.trim();
1365
- }
1366
- if (isStructuredPromptInput(overridePrompt)) {
1367
- return clonePromptInput(overridePrompt);
1368
- }
133
+ 'training',
134
+ 'bench',
135
+ 'debug',
136
+ 'diffusion',
137
+ 'energy',
138
+ ]);
1369
139
 
1370
- const runtimePrompt = runtimeConfig?.inference?.prompt;
1371
- assertPromptContract(runtimePrompt, templateType, 'runtimeConfig.inference.prompt');
1372
- if (shouldPreferModelDefaultPrompt(runtimePrompt, templateType)) {
1373
- return buildDefaultGenerationPrompt(templateType);
1374
- }
1375
- if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
1376
- return runtimePrompt.trim();
1377
- }
1378
- if (isStructuredPromptInput(runtimePrompt)) {
1379
- return clonePromptInput(runtimePrompt);
1380
- }
140
+ const BROWSER_SUITE_DISPATCH_MAP = Object.freeze({
141
+ kernels: 'runKernelSuite',
142
+ inference: 'runInferenceSuite',
143
+ training: 'runTrainingSuite',
144
+ bench: 'runBenchSuite',
145
+ debug: 'runInferenceSuite(debug)',
146
+ diffusion: 'runDiffusionSuite',
147
+ energy: 'runEnergySuite',
148
+ });
1381
149
 
1382
- return buildDefaultGenerationPrompt(templateType);
150
+ export function getBrowserSupportedSuites() {
151
+ return [...BROWSER_SUITE_SET];
1383
152
  }
1384
153
 
1385
- function resolveMaxTokens(runtimeConfig) {
1386
- const runtimeMax = runtimeConfig?.inference?.batching?.maxTokens;
1387
- if (Number.isFinite(runtimeMax)) {
1388
- return Math.max(1, Math.floor(runtimeMax));
1389
- }
1390
- return DEFAULT_HARNESS_MAX_TOKENS;
154
+ export function getBrowserSuiteDispatchMap() {
155
+ return { ...BROWSER_SUITE_DISPATCH_MAP };
1391
156
  }
1392
157
 
1393
- function resolveBenchmarkRunSettings(runtimeConfig, source = null) {
1394
- const benchConfig = runtimeConfig?.shared?.benchmark?.run || {};
1395
- const runtimeSampling = isPlainObject(runtimeConfig?.inference?.sampling)
1396
- ? runtimeConfig.inference.sampling
1397
- : {};
1398
- const benchSampling = isPlainObject(benchConfig?.sampling)
1399
- ? benchConfig.sampling
1400
- : {};
1401
- const promptInput = typeof benchConfig.customPrompt === 'string' && benchConfig.customPrompt.trim()
1402
- ? benchConfig.customPrompt.trim()
1403
- : resolveGenerationPromptInput(runtimeConfig, null, source);
1404
- const maxTokens = Number.isFinite(benchConfig.maxNewTokens)
1405
- ? Math.max(1, Math.floor(benchConfig.maxNewTokens))
1406
- : resolveMaxTokens(runtimeConfig);
1407
-
1408
- return {
1409
- warmupRuns: Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0)),
1410
- timedRuns: Math.max(1, Math.floor(benchConfig.timedRuns ?? 1)),
1411
- prompt: promptInput,
1412
- promptLabel: describePromptInput(promptInput),
1413
- maxTokens,
1414
- sampling: {
1415
- ...runtimeSampling,
1416
- ...benchSampling,
1417
- },
158
+ function createUnsupportedSuiteError(requestedSuite, context = {}) {
159
+ const command = typeof context.command === 'string' && context.command.trim()
160
+ ? context.command.trim()
161
+ : 'run-browser-suite';
162
+ const surface = typeof context.surface === 'string' && context.surface.trim()
163
+ ? context.surface.trim()
164
+ : 'browser';
165
+ const allowedSuites = [...BROWSER_SUITE_SET];
166
+ const error = new Error(
167
+ `Unsupported suite "${requestedSuite}". Allowed suites: ${allowedSuites.join(', ')}. ` +
168
+ `command="${command}" surface="${surface}".`
169
+ );
170
+ error.code = 'unsupported_suite';
171
+ error.requestedSuite = requestedSuite;
172
+ error.allowedSuites = allowedSuites;
173
+ error.command = command;
174
+ error.surface = surface;
175
+ error.details = {
176
+ requestedSuite,
177
+ allowedSuites,
178
+ command,
179
+ surface,
1418
180
  };
181
+ return error;
1419
182
  }
1420
183
 
1421
- function summarizeEmbeddingValues(embedding) {
1422
- const values = ArrayBuffer.isView(embedding) || Array.isArray(embedding) ? embedding : null;
1423
- const embeddingDim = Number.isFinite(values?.length) ? values.length : 0;
1424
- const preview = [];
1425
-
1426
- let nonFiniteCount = 0;
1427
- let finiteCount = 0;
1428
- let min = Infinity;
1429
- let max = -Infinity;
1430
- let maxAbs = 0;
1431
- let sum = 0;
1432
- let sumSq = 0;
1433
-
1434
- for (let i = 0; i < embeddingDim; i++) {
1435
- const value = Number(values[i]);
1436
- if (preview.length < EMBEDDING_PREVIEW_LENGTH) {
1437
- preview.push(Number.isFinite(value) ? Number(value.toFixed(6)) : null);
1438
- }
1439
- if (!Number.isFinite(value)) {
1440
- nonFiniteCount++;
1441
- continue;
1442
- }
1443
- finiteCount++;
1444
- if (value < min) min = value;
1445
- if (value > max) max = value;
1446
- const abs = Math.abs(value);
1447
- if (abs > maxAbs) maxAbs = abs;
1448
- sum += value;
1449
- sumSq += value * value;
1450
- }
1451
-
1452
- const mean = finiteCount > 0 ? (sum / finiteCount) : null;
1453
- const variance = finiteCount > 0 ? Math.max(0, (sumSq / finiteCount) - ((mean || 0) * (mean || 0))) : null;
1454
- const stdDev = variance == null ? null : Math.sqrt(variance);
1455
- const l2Norm = finiteCount > 0 ? Math.sqrt(sumSq) : null;
1456
- const finiteRatio = embeddingDim > 0 ? finiteCount / embeddingDim : 0;
1457
-
184
+ function resolveSuiteContext(options = {}) {
185
+ const command = typeof options.command === 'string' ? options.command : null;
186
+ const surface = typeof options.surface === 'string' ? options.surface : null;
1458
187
  return {
1459
- embeddingDim,
1460
- nonFiniteCount,
1461
- finiteCount,
1462
- finiteRatio,
1463
- min: finiteCount > 0 ? min : null,
1464
- max: finiteCount > 0 ? max : null,
1465
- maxAbs: finiteCount > 0 ? maxAbs : null,
1466
- mean,
1467
- stdDev,
1468
- l2Norm,
1469
- preview,
188
+ command: command ?? 'run-browser-suite',
189
+ surface: surface ?? 'browser',
1470
190
  };
1471
191
  }
1472
192
 
1473
- function cosineSimilarity(a, b) {
1474
- if (!a || !b || !Number.isFinite(a.length) || !Number.isFinite(b.length)) return NaN;
1475
- if (a.length !== b.length || a.length === 0) return NaN;
1476
- let dot = 0;
1477
- let normA = 0;
1478
- let normB = 0;
1479
- for (let i = 0; i < a.length; i++) {
1480
- const av = Number(a[i]);
1481
- const bv = Number(b[i]);
1482
- if (!Number.isFinite(av) || !Number.isFinite(bv)) return NaN;
1483
- dot += av * bv;
1484
- normA += av * av;
1485
- normB += bv * bv;
1486
- }
1487
- if (normA <= 0 || normB <= 0) return NaN;
1488
- return dot / Math.sqrt(normA * normB);
1489
- }
1490
-
1491
- function top1Index(values) {
1492
- let best = -1;
1493
- let bestValue = -Infinity;
1494
- for (let i = 0; i < values.length; i++) {
1495
- const value = Number(values[i]);
1496
- if (!Number.isFinite(value)) continue;
1497
- if (value > bestValue) {
1498
- bestValue = value;
1499
- best = i;
1500
- }
193
+ function normalizeSuite(value, context = {}) {
194
+ const suite = String(value || '').trim().toLowerCase();
195
+ if (!suite) {
196
+ throw createUnsupportedSuiteError(suite, context);
1501
197
  }
1502
- return best;
1503
- }
1504
-
1505
- async function embedStandaloneText(pipeline, text) {
1506
- pipeline.reset?.();
1507
- const result = await pipeline.embed(text);
1508
- const embedding = result?.embedding;
1509
- if (!embedding || !Number.isFinite(embedding.length) || embedding.length <= 0) {
1510
- throw new Error('Semantic check embedding is missing.');
198
+ const normalized = suite === 'benchmark' ? 'bench' : suite;
199
+ if (!BROWSER_SUITE_SET.includes(normalized)) {
200
+ throw createUnsupportedSuiteError(normalized, context);
1511
201
  }
1512
- return embedding;
202
+ return normalized;
1513
203
  }
1514
204
 
1515
- async function runEmbeddingSemanticChecks(pipeline, options = null) {
1516
- const config = resolveEmbeddingSemanticFixtures(
1517
- pipeline?.runtimeConfig ?? {},
1518
- options
1519
- );
1520
- const start = performance.now();
1521
- const semanticStyle = resolveEmbeddingSemanticStyle(pipeline);
1522
- const retrieval = [];
1523
- let retrievalPassed = 0;
1524
-
1525
- for (const testCase of config.retrievalCases) {
1526
- const queryEmbedding = await embedStandaloneText(
1527
- pipeline,
1528
- formatEmbeddingSemanticText(testCase.query, 'query', semanticStyle)
1529
- );
1530
- const docEmbeddings = [];
1531
- for (const doc of testCase.docs) {
1532
- docEmbeddings.push(await embedStandaloneText(
1533
- pipeline,
1534
- formatEmbeddingSemanticText(doc, 'document', semanticStyle)
1535
- ));
1536
- }
1537
- const sims = docEmbeddings.map((docEmbedding) => cosineSimilarity(queryEmbedding, docEmbedding));
1538
- const topDoc = top1Index(sims);
1539
- const passed = topDoc === testCase.expectedDoc;
1540
- if (passed) retrievalPassed++;
1541
- retrieval.push({
1542
- id: testCase.id,
1543
- passed,
1544
- expectedDoc: testCase.expectedDoc,
1545
- topDoc,
1546
- sims: sims.map((v) => (Number.isFinite(v) ? Number(v.toFixed(6)) : null)),
1547
- });
1548
- }
205
+ async function runKernelSuite(options = {}) {
206
+ const startTime = performance.now();
207
+ const { testHarness, initGPU } = await import('../../tests/kernels/browser/test-page.js');
208
+ const { runKernelSuite: runAllKernelTests } = await import('../../tests/kernels/browser/kernel-suite.js');
209
+ await initGPU();
1549
210
 
1550
- const pairs = [];
1551
- let pairPassed = 0;
1552
- for (const testCase of config.pairCases) {
1553
- const anchor = await embedStandaloneText(
1554
- pipeline,
1555
- formatEmbeddingSemanticText(testCase.anchor, 'query', semanticStyle)
1556
- );
1557
- const positive = await embedStandaloneText(
1558
- pipeline,
1559
- formatEmbeddingSemanticText(testCase.positive, 'query', semanticStyle)
1560
- );
1561
- const negative = await embedStandaloneText(
1562
- pipeline,
1563
- formatEmbeddingSemanticText(testCase.negative, 'query', semanticStyle)
1564
- );
1565
- const simPos = cosineSimilarity(anchor, positive);
1566
- const simNeg = cosineSimilarity(anchor, negative);
1567
- const margin = simPos - simNeg;
1568
- const passed = Number.isFinite(margin) && margin > config.pairMargin;
1569
- if (passed) pairPassed++;
1570
- pairs.push({
1571
- id: testCase.id,
1572
- passed,
1573
- simPos: Number.isFinite(simPos) ? Number(simPos.toFixed(6)) : null,
1574
- simNeg: Number.isFinite(simNeg) ? Number(simNeg.toFixed(6)) : null,
1575
- margin: Number.isFinite(margin) ? Number(margin.toFixed(6)) : null,
1576
- });
211
+ const previousKernelPath = getActiveKernelPath();
212
+ const previousKernelSource = getActiveKernelPathSource();
213
+ const previousKernelPathPolicy = getActiveKernelPathPolicy();
214
+ if (options.modelId) {
215
+ await resolveKernelPathForModel(options);
1577
216
  }
1578
-
1579
- const retrievalTop1Acc = retrieval.length > 0 ? retrievalPassed / retrieval.length : 0;
1580
- const pairAcc = pairs.length > 0 ? pairPassed / pairs.length : 0;
1581
- const passed = retrievalTop1Acc >= config.minRetrievalTop1Acc
1582
- && pairAcc >= config.minPairAcc;
1583
- const failedCaseIds = [
1584
- ...retrieval.filter((item) => !item.passed).map((item) => `retrieval:${item.id}`),
1585
- ...pairs.filter((item) => !item.passed).map((item) => `pair:${item.id}`),
1586
- ];
1587
-
1588
- return {
1589
- passed,
1590
- style: semanticStyle,
1591
- retrievalTop1Acc,
1592
- pairAcc,
1593
- retrievalPassed,
1594
- retrievalTotal: retrieval.length,
1595
- pairPassed,
1596
- pairTotal: pairs.length,
1597
- minRetrievalTop1Acc: Number(config.minRetrievalTop1Acc.toFixed(4)),
1598
- minPairAcc: Number(config.minPairAcc.toFixed(4)),
1599
- pairMarginThreshold: Number(config.pairMargin.toFixed(4)),
1600
- failedCaseIds,
1601
- retrieval,
1602
- pairs,
1603
- durationMs: Math.max(1, performance.now() - start),
1604
- };
1605
- }
1606
-
1607
- // Matches pad/special tokens that indicate degenerate output: <pad>, <unused123>, <eos>,
1608
- // <bos>, <s>, </s>, [PAD], [UNK], [SEP], [CLS], and bare angle-bracket tokens.
1609
- const SPECIAL_TOKEN_RE = /^(<pad>|<unused\d*>|<eos>|<bos>|<s>|<\/s>|\[PAD\]|\[UNK\]|\[SEP\]|\[CLS\]|<[^>]{1,32}>)$/i;
1610
- const PAD_DOMINANCE_THRESHOLD = 0.5;
1611
-
1612
- function isCoherentOutput(tokens, output) {
1613
- if (tokens.length === 0) return false;
1614
- const specialTokenCount = tokens.filter((t) => SPECIAL_TOKEN_RE.test(String(t).trim())).length;
1615
- if (specialTokenCount / tokens.length >= PAD_DOMINANCE_THRESHOLD) return false;
1616
- const cleanedOutput = String(output || '')
1617
- .replace(/<[^>\n]{1,80}>/g, ' ')
1618
- .replace(/\s+/g, ' ')
1619
- .trim();
1620
- return cleanedOutput.length > 0;
1621
- }
1622
-
1623
- async function runGeneration(pipeline, runtimeConfig, runOverrides = null) {
1624
- const tokens = [];
1625
- const tokenIds = [];
1626
- const promptInput = resolveGenerationPromptInput(runtimeConfig, runOverrides, pipeline);
1627
- const promptLabel = describePromptInput(promptInput);
1628
- const useChatTemplate = runOverrides?.useChatTemplate
1629
- ?? runtimeConfig?.inference?.chatTemplate?.enabled
1630
- ?? (isStructuredPromptInput(promptInput) ? true : undefined);
1631
- const maxTokens = Number.isFinite(runOverrides?.maxTokens)
1632
- ? Math.max(1, Math.floor(runOverrides.maxTokens))
1633
- : resolveMaxTokens(runtimeConfig);
1634
- const sampling = isPlainObject(runOverrides?.sampling)
1635
- ? runOverrides.sampling
1636
- : (runtimeConfig.inference?.sampling || {});
1637
- const debugProbes = runtimeConfig.shared?.debug?.probes || [];
1638
- const profile = runtimeConfig.shared?.debug?.profiler?.enabled === true;
1639
- const disableCommandBatching = Array.isArray(debugProbes) && debugProbes.length > 0;
1640
- const start = performance.now();
1641
-
1642
- for await (const tokenText of pipeline.generate(promptInput, {
1643
- maxTokens,
1644
- temperature: sampling.temperature,
1645
- topP: sampling.topP,
1646
- topK: sampling.topK,
1647
- repetitionPenalty: sampling.repetitionPenalty,
1648
- greedyThreshold: sampling.greedyThreshold,
1649
- useChatTemplate,
1650
- profile,
1651
- disableCommandBatching,
1652
- onToken: (tokenId) => {
1653
- tokenIds.push(tokenId);
1654
- },
1655
- })) {
1656
- if (typeof tokenText === 'string') {
1657
- tokens.push(tokenText);
1658
- }
217
+ let results = [];
218
+ try {
219
+ results = await runAllKernelTests(testHarness);
220
+ } finally {
221
+ setActiveKernelPath(previousKernelPath, previousKernelSource, previousKernelPathPolicy);
1659
222
  }
1660
223
 
1661
- const durationMs = Math.max(1, performance.now() - start);
1662
- const tokensPerSec = (tokens.length / durationMs) * 1000;
1663
- const stats = typeof pipeline?.getStats === 'function'
1664
- ? (pipeline.getStats() || {})
1665
- : {};
1666
- const prefillMs = Number.isFinite(stats.prefillTimeMs) ? stats.prefillTimeMs : 0;
1667
- const ttftMs = Number.isFinite(stats.ttftMs) ? stats.ttftMs : prefillMs;
1668
- const decodeMs = Number.isFinite(stats.decodeTimeMs) ? stats.decodeTimeMs : 0;
1669
- const prefillTokens = Number.isFinite(stats.prefillTokens) ? stats.prefillTokens : 0;
1670
- const decodeTokens = Number.isFinite(stats.decodeTokens)
1671
- ? stats.decodeTokens
1672
- : Math.max(0, tokens.length - 1);
1673
- const decodeTokensPerSec = decodeMs > 0
1674
- ? (decodeTokens / decodeMs) * 1000
1675
- : 0;
1676
- const prefillTokensPerSec = prefillMs > 0
1677
- ? (prefillTokens / prefillMs) * 1000
1678
- : 0;
1679
- const prefillTokensPerSecTtft = ttftMs > 0
1680
- ? (prefillTokens / ttftMs) * 1000
1681
- : 0;
1682
- const gpu = {};
1683
- if (Number.isFinite(stats.gpuTimePrefillMs)) gpu.prefillMs = stats.gpuTimePrefillMs;
1684
- if (Number.isFinite(stats.gpuTimeDecodeMs)) gpu.decodeMs = stats.gpuTimeDecodeMs;
1685
- if (Number.isFinite(stats.decodeRecordMs)) gpu.decodeRecordMs = stats.decodeRecordMs;
1686
- if (Number.isFinite(stats.decodeSubmitWaitMs)) gpu.decodeSubmitWaitMs = stats.decodeSubmitWaitMs;
1687
- if (Number.isFinite(stats.decodeReadbackWaitMs)) gpu.decodeReadbackWaitMs = stats.decodeReadbackWaitMs;
1688
- const gpuPhase = Object.keys(gpu).length > 0 ? gpu : null;
1689
- const decodeProfileSteps = Array.isArray(stats.decodeProfileSteps)
1690
- ? stats.decodeProfileSteps
1691
- : null;
1692
-
1693
- return {
1694
- prompt: promptLabel,
1695
- promptInput,
1696
- maxTokens,
1697
- tokens,
1698
- tokenIds,
1699
- output: tokens.join(''),
1700
- durationMs,
1701
- tokensPerSec,
1702
- phase: {
1703
- totalMs: Number.isFinite(stats.totalTimeMs) ? stats.totalTimeMs : durationMs,
1704
- ttftMs,
1705
- prefillMs,
1706
- decodeMs,
1707
- prefillTokens,
1708
- decodeTokens,
1709
- prefillTokensPerSec,
1710
- prefillTokensPerSecTtft,
1711
- decodeTokensPerSec,
1712
- gpu: gpuPhase,
1713
- decodeProfileSteps,
1714
- },
1715
- };
1716
- }
1717
-
1718
- async function runEmbedding(pipeline, runtimeConfig, runOverrides = null) {
1719
- const prompt = typeof runOverrides?.prompt === 'string' && runOverrides.prompt.trim()
1720
- ? runOverrides.prompt.trim()
1721
- : resolvePrompt(runtimeConfig);
1722
- const start = performance.now();
1723
- const result = await pipeline.embed(prompt);
1724
- const durationMs = Math.max(1, performance.now() - start);
1725
- const tokenCount = Number.isFinite(result?.tokens?.length) ? result.tokens.length : 0;
1726
- const stats = summarizeEmbeddingValues(result?.embedding);
224
+ const summary = buildSuiteSummary('kernels', results, startTime);
1727
225
  return {
1728
- prompt,
1729
- tokenCount,
1730
- durationMs,
1731
- ...stats,
226
+ ...summary,
227
+ deviceInfo: resolveDeviceInfo(),
1732
228
  };
1733
229
  }
1734
230
 
@@ -1867,6 +363,7 @@ async function runInferenceSuite(options = {}) {
1867
363
  modelLoadMs: safeModelLoadMs,
1868
364
  gpu: run.phase.gpu,
1869
365
  decodeProfileSteps: run.phase.decodeProfileSteps,
366
+ generationDiagnostics: run.tokenDiagnostics,
1870
367
  };
1871
368
  }
1872
369
 
@@ -2280,6 +777,7 @@ async function runBenchSuite(options = {}) {
2280
777
  totalRunMs: totalMsStats.median,
2281
778
  decodeTokensPerSec: decodeTokensPerSecStats?.median,
2282
779
  prefillTokensPerSec: prefillTokensPerSecStats?.median,
780
+ prefillTokensPerSecTtft: prefillTokensPerSecTtftStats?.median,
2283
781
  cacheMode,
2284
782
  loadMode,
2285
783
  });
@@ -2323,261 +821,6 @@ async function runBenchSuite(options = {}) {
2323
821
  };
2324
822
  }
2325
823
 
2326
- async function runDiffusionSuite(options = {}) {
2327
- const startTime = performance.now();
2328
- const runtimeConfig = getRuntimeConfig();
2329
- const captureOutput = options.captureOutput === true;
2330
- const cacheMode = normalizeCacheMode(options.cacheMode);
2331
- const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
2332
- const benchConfig = runtimeConfig.shared?.benchmark?.run || {};
2333
- const warmupRuns = Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0));
2334
- const timedRuns = Math.max(1, Math.floor(benchConfig.timedRuns ?? 1));
2335
-
2336
- const diffusionConfig = runtimeConfig.inference?.diffusion;
2337
- if (!diffusionConfig) {
2338
- throw new Error('runtime.inference.diffusion must be set for diffusion harness runs.');
2339
- }
2340
- const scheduler = diffusionConfig.scheduler;
2341
- const latent = diffusionConfig.latent;
2342
- const prompt = resolvePrompt(runtimeConfig);
2343
- const negativePrompt = diffusionConfig.negativePrompt ?? '';
2344
-
2345
- const width = Math.floor(latent?.width);
2346
- const height = Math.floor(latent?.height);
2347
- const steps = Math.floor(scheduler?.numSteps);
2348
- const guidanceScale = scheduler?.guidanceScale;
2349
-
2350
- if (!Number.isFinite(width) || width <= 0) {
2351
- throw new Error('runtime.inference.diffusion.latent.width must be set for diffusion harness runs.');
2352
- }
2353
- if (!Number.isFinite(height) || height <= 0) {
2354
- throw new Error('runtime.inference.diffusion.latent.height must be set for diffusion harness runs.');
2355
- }
2356
- if (!Number.isFinite(steps) || steps <= 0) {
2357
- throw new Error('runtime.inference.diffusion.scheduler.numSteps must be set for diffusion harness runs.');
2358
- }
2359
- if (!Number.isFinite(guidanceScale) || guidanceScale <= 0) {
2360
- throw new Error('runtime.inference.diffusion.scheduler.guidanceScale must be set for diffusion harness runs.');
2361
- }
2362
-
2363
- const harness = await initializeSuiteModel(options);
2364
- const totalMs = [];
2365
- const prefillMs = [];
2366
- const denoiseMs = [];
2367
- const vaeMs = [];
2368
- const prefillTokens = [];
2369
- const decodeTokens = [];
2370
- const gpuTotalMs = [];
2371
- const gpuPrefillMs = [];
2372
- const gpuDenoiseMs = [];
2373
- const gpuVaeMs = [];
2374
- let output = null;
2375
-
2376
- for (let i = 0; i < warmupRuns + timedRuns; i++) {
2377
- harness.pipeline.reset?.();
2378
- const result = await harness.pipeline.generate({
2379
- prompt,
2380
- negativePrompt,
2381
- steps,
2382
- guidanceScale,
2383
- width,
2384
- height,
2385
- });
2386
- if (captureOutput && i === warmupRuns + timedRuns - 1) {
2387
- output = result;
2388
- }
2389
-
2390
- if (i < warmupRuns) continue;
2391
-
2392
- const stats = harness.pipeline.getStats?.() ?? {};
2393
- if (Number.isFinite(stats.totalTimeMs)) totalMs.push(stats.totalTimeMs);
2394
- if (Number.isFinite(stats.prefillTimeMs)) prefillMs.push(stats.prefillTimeMs);
2395
- if (Number.isFinite(stats.decodeTimeMs)) denoiseMs.push(stats.decodeTimeMs);
2396
- if (Number.isFinite(stats.vaeTimeMs)) vaeMs.push(stats.vaeTimeMs);
2397
- if (Number.isFinite(stats.prefillTokens)) prefillTokens.push(stats.prefillTokens);
2398
- if (Number.isFinite(stats.decodeTokens)) decodeTokens.push(stats.decodeTokens);
2399
-
2400
- const gpu = stats.gpu ?? null;
2401
- if (gpu?.available) {
2402
- if (Number.isFinite(gpu.totalMs)) gpuTotalMs.push(gpu.totalMs);
2403
- if (Number.isFinite(gpu.prefillMs)) gpuPrefillMs.push(gpu.prefillMs);
2404
- if (Number.isFinite(gpu.denoiseMs)) gpuDenoiseMs.push(gpu.denoiseMs);
2405
- if (Number.isFinite(gpu.vaeMs)) gpuVaeMs.push(gpu.vaeMs);
2406
- }
2407
- }
2408
-
2409
- const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
2410
- ? harness.pipeline.getMemoryStats()
2411
- : null;
2412
-
2413
- if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
2414
- await harness.pipeline.unload();
2415
- }
2416
-
2417
- const results = [
2418
- {
2419
- name: 'diffusion',
2420
- passed: totalMs.length > 0,
2421
- duration: totalMs.reduce((sum, value) => sum + value, 0),
2422
- error: totalMs.length > 0 ? undefined : 'No diffusion runs completed',
2423
- },
2424
- ];
2425
-
2426
- const summary = buildSuiteSummary('diffusion', results, startTime);
2427
- const cpuStats = {
2428
- totalMs: computeSampleStats(totalMs),
2429
- prefillMs: computeSampleStats(prefillMs),
2430
- denoiseMs: computeSampleStats(denoiseMs),
2431
- vaeMs: computeSampleStats(vaeMs),
2432
- };
2433
- const gpuStats = gpuTotalMs.length > 0
2434
- ? {
2435
- available: true,
2436
- totalMs: computeSampleStats(gpuTotalMs),
2437
- prefillMs: computeSampleStats(gpuPrefillMs),
2438
- denoiseMs: computeSampleStats(gpuDenoiseMs),
2439
- vaeMs: computeSampleStats(gpuVaeMs),
2440
- }
2441
- : { available: false };
2442
-
2443
- const avgPrefillTokens = prefillTokens.length
2444
- ? Math.round(prefillTokens.reduce((a, b) => a + b, 0) / prefillTokens.length)
2445
- : 0;
2446
- const avgDecodeTokens = decodeTokens.length
2447
- ? Math.round(decodeTokens.reduce((a, b) => a + b, 0) / decodeTokens.length)
2448
- : 0;
2449
- const prefillMsMedian = safeStatsValue(cpuStats.prefillMs?.median);
2450
- const denoiseMsMedian = safeStatsValue(cpuStats.denoiseMs?.median);
2451
- const totalMsMedian = safeStatsValue(cpuStats.totalMs?.median);
2452
- const diffusionPerformanceArtifact = buildDiffusionPerformanceArtifact({
2453
- warmupRuns,
2454
- timedRuns,
2455
- width,
2456
- height,
2457
- steps,
2458
- guidanceScale,
2459
- avgPrefillTokens,
2460
- avgDecodeTokens,
2461
- cpuStats,
2462
- gpuStats,
2463
- });
2464
- const timing = buildCanonicalTiming({
2465
- modelLoadMs: 0,
2466
- firstTokenMs: null,
2467
- firstResponseMs: null,
2468
- prefillMs: prefillMsMedian,
2469
- decodeMs: denoiseMsMedian,
2470
- totalRunMs: totalMsMedian,
2471
- prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
2472
- decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
2473
- cacheMode,
2474
- loadMode,
2475
- });
2476
- const timingDiagnostics = buildTimingDiagnostics(timing, {
2477
- source: 'doppler',
2478
- prefillSemantics: 'internal_prefill_phase',
2479
- });
2480
- const metricsWithContracts = buildSuiteContractMetrics(
2481
- 'diffusion',
2482
- {
2483
- warmupRuns,
2484
- timedRuns,
2485
- width,
2486
- height,
2487
- steps,
2488
- guidanceScale,
2489
- prompt,
2490
- avgPrefillTokens,
2491
- avgDecodeTokens,
2492
- latency: {
2493
- totalMs: cpuStats.totalMs,
2494
- prefillMs: cpuStats.prefillMs,
2495
- denoiseMs: cpuStats.denoiseMs,
2496
- vaeMs: cpuStats.vaeMs,
2497
- },
2498
- throughput: {
2499
- prefillTokensPerSec: diffusionPerformanceArtifact.throughput.prefillTokensPerSec,
2500
- decodeTokensPerSec: diffusionPerformanceArtifact.throughput.decodeTokensPerSec,
2501
- decodeStepsPerSec: diffusionPerformanceArtifact.throughput.decodeStepsPerSec,
2502
- },
2503
- cpu: cpuStats,
2504
- gpu: gpuStats,
2505
- performanceArtifact: diffusionPerformanceArtifact,
2506
- },
2507
- harness.manifest
2508
- );
2509
-
2510
- return {
2511
- ...summary,
2512
- modelId: options.modelId || harness.manifest?.modelId || 'unknown',
2513
- cacheMode,
2514
- loadMode,
2515
- env: {
2516
- library: 'doppler',
2517
- runtime: 'browser',
2518
- device: 'webgpu',
2519
- browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
2520
- browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
2521
- browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
2522
- browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
2523
- },
2524
- timing,
2525
- timingDiagnostics,
2526
- output,
2527
- metrics: metricsWithContracts,
2528
- memoryStats,
2529
- deviceInfo: resolveDeviceInfo(),
2530
- pipeline: options.keepPipeline ? harness.pipeline : null,
2531
- };
2532
- }
2533
-
2534
- async function runEnergySuite(options = {}) {
2535
- const startTime = performance.now();
2536
- const harness = await initializeSuiteModel(options);
2537
- if (harness.manifest?.modelType !== 'energy') {
2538
- throw new Error('Energy suite requires an energy model manifest.');
2539
- }
2540
-
2541
- const result = await harness.pipeline.generate();
2542
- const stats = harness.pipeline.getStats?.() ?? {};
2543
-
2544
- const memoryStats = typeof harness.pipeline?.getMemoryStats === 'function'
2545
- ? harness.pipeline.getMemoryStats()
2546
- : null;
2547
-
2548
- if (typeof harness.pipeline.unload === 'function' && !options.keepPipeline) {
2549
- await harness.pipeline.unload();
2550
- }
2551
-
2552
- const results = [
2553
- {
2554
- name: 'energy',
2555
- passed: Number.isFinite(result.energy ?? NaN),
2556
- duration: result.totalTimeMs ?? Math.max(0, performance.now() - startTime),
2557
- error: Number.isFinite(result.energy ?? NaN) ? undefined : 'Energy did not converge',
2558
- },
2559
- ];
2560
-
2561
- const summary = buildSuiteSummary('energy', results, startTime);
2562
- return {
2563
- ...summary,
2564
- modelId: options.modelId || harness.manifest?.modelId || 'unknown',
2565
- metrics: {
2566
- steps: result.steps,
2567
- energy: result.energy ?? null,
2568
- dtype: result.dtype,
2569
- shape: result.shape,
2570
- totalTimeMs: result.totalTimeMs ?? null,
2571
- energyHistory: result.energyHistory ?? [],
2572
- stateStats: result.stateStats ?? null,
2573
- readbackCount: stats.readbackCount ?? null,
2574
- },
2575
- memoryStats,
2576
- deviceInfo: resolveDeviceInfo(),
2577
- pipeline: options.keepPipeline ? harness.pipeline : null,
2578
- };
2579
- }
2580
-
2581
824
  async function dispatchBrowserSuite(suite, options) {
2582
825
  if (suite === 'kernels') {
2583
826
  return runKernelSuite(options);
@@ -2603,49 +846,6 @@ async function dispatchBrowserSuite(suite, options) {
2603
846
  return null;
2604
847
  }
2605
848
 
2606
- function collectTrainingArtifactsFromSuiteResult(suiteResult) {
2607
- const ulArtifacts = [];
2608
- const distillArtifacts = [];
2609
- const checkpointResumeTimeline = Array.isArray(suiteResult?.metrics?.checkpointResumeTimeline)
2610
- ? suiteResult.metrics.checkpointResumeTimeline
2611
- .filter((entry) => entry && typeof entry === 'object')
2612
- : [];
2613
- const addArtifact = (artifact, source = null) => {
2614
- if (!artifact || typeof artifact !== 'object' || typeof artifact.manifestPath !== 'string') {
2615
- return;
2616
- }
2617
- const stage = String(artifact.stage || '').trim();
2618
- const kind = String(artifact.kind || '').trim();
2619
- if (kind === 'distill' || stage === 'stage_a' || stage === 'stage_b') {
2620
- distillArtifacts.push(artifact);
2621
- return;
2622
- }
2623
- if (kind === 'ul' || stage === 'stage1_joint' || stage === 'stage2_base' || source === 'ul') {
2624
- ulArtifacts.push(artifact);
2625
- return;
2626
- }
2627
- ulArtifacts.push(artifact);
2628
- };
2629
-
2630
- const metricUlArtifacts = Array.isArray(suiteResult?.metrics?.ulArtifacts)
2631
- ? suiteResult.metrics.ulArtifacts
2632
- : [];
2633
- for (const artifact of metricUlArtifacts) {
2634
- addArtifact(artifact, 'ul');
2635
- }
2636
- const metricDistillArtifacts = Array.isArray(suiteResult?.metrics?.distillArtifacts)
2637
- ? suiteResult.metrics.distillArtifacts
2638
- : [];
2639
- for (const artifact of metricDistillArtifacts) {
2640
- addArtifact(artifact, 'distill');
2641
- }
2642
- const resultEntries = Array.isArray(suiteResult?.results) ? suiteResult.results : [];
2643
- for (const entry of resultEntries) {
2644
- addArtifact(entry?.artifact, null);
2645
- }
2646
- return { ulArtifacts, distillArtifacts, checkpointResumeTimeline };
2647
- }
2648
-
2649
849
  export async function runBrowserSuite(options = {}) {
2650
850
  return runWithRuntimeIsolationForSuite(async () => {
2651
851
  const suiteTimestamp = resolveReportTimestamp(options.timestamp, 'runBrowserSuite timestamp');
@@ -2711,72 +911,6 @@ export async function runBrowserSuite(options = {}) {
2711
911
  });
2712
912
  }
2713
913
 
2714
- function normalizeManifest(manifest) {
2715
- if (!manifest || typeof manifest !== 'object') {
2716
- throw new Error('Harness manifest must be an object.');
2717
- }
2718
- const runs = Array.isArray(manifest.runs) ? manifest.runs : [];
2719
- if (!runs.length) {
2720
- throw new Error('Harness manifest must include at least one run.');
2721
- }
2722
- return {
2723
- defaults: manifest.defaults ?? {},
2724
- runs,
2725
- reportModelId: manifest.reportModelId ?? manifest.id ?? 'manifest',
2726
- report: manifest.report ?? null,
2727
- };
2728
- }
2729
-
2730
- function mergeRunDefaults(defaults, run) {
2731
- return {
2732
- ...defaults,
2733
- ...run,
2734
- runtimePreset: run.runtimePreset ?? defaults.runtimePreset ?? null,
2735
- runtimeConfigUrl: run.runtimeConfigUrl ?? defaults.runtimeConfigUrl ?? null,
2736
- runtimeConfig: run.runtimeConfig ?? defaults.runtimeConfig ?? null,
2737
- suite: run.suite ?? defaults.suite ?? 'inference',
2738
- };
2739
- }
2740
-
2741
- async function applyRuntimeForRun(run, options) {
2742
- if (run.runtimeConfig) {
2743
- const runtime = resolveRuntimeFromConfig(run.runtimeConfig);
2744
- if (!runtime) {
2745
- throw new Error('runtimeConfig is missing runtime fields');
2746
- }
2747
- setRuntimeConfig(runtime);
2748
- return;
2749
- }
2750
- if (run.runtimeConfigUrl) {
2751
- await applyRuntimeConfigFromUrl(run.runtimeConfigUrl, options);
2752
- return;
2753
- }
2754
- if (run.runtimePreset) {
2755
- await applyRuntimePreset(run.runtimePreset, options);
2756
- }
2757
- }
2758
-
2759
- function summarizeManifestRuns(results) {
2760
- let passedRuns = 0;
2761
- let failedRuns = 0;
2762
- let durationMs = 0;
2763
- for (const result of results) {
2764
- const failures = (result.results || []).filter((entry) => !entry.passed && !entry.skipped);
2765
- if (failures.length > 0) {
2766
- failedRuns += 1;
2767
- } else {
2768
- passedRuns += 1;
2769
- }
2770
- durationMs += result.duration || 0;
2771
- }
2772
- return {
2773
- totalRuns: results.length,
2774
- passedRuns,
2775
- failedRuns,
2776
- durationMs,
2777
- };
2778
- }
2779
-
2780
914
  export async function runBrowserManifest(manifest, options = {}) {
2781
915
  const normalized = normalizeManifest(manifest);
2782
916
  const results = [];