@simulatte/doppler 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +16 -23
  3. package/package.json +14 -1
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +1 -1
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +7 -5
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +12 -2
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +10 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  45. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  46. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  47. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  48. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  49. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  50. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  52. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  54. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  55. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  56. package/src/config/runtime.js +6 -1
  57. package/src/config/schema/debug.schema.d.ts +5 -0
  58. package/src/config/schema/doppler.schema.js +16 -21
  59. package/src/config/schema/inference-defaults.schema.js +3 -3
  60. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  61. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  62. package/src/config/schema/manifest.schema.d.ts +2 -1
  63. package/src/config/schema/manifest.schema.js +16 -3
  64. package/src/config/training-defaults.js +30 -22
  65. package/src/converter/conversion-plan.js +94 -9
  66. package/src/converter/core.d.ts +7 -0
  67. package/src/converter/core.js +14 -9
  68. package/src/converter/execution-v0-manifest.js +4 -1
  69. package/src/converter/index.d.ts +1 -0
  70. package/src/converter/index.js +1 -0
  71. package/src/converter/manifest-inference.js +43 -12
  72. package/src/converter/parsers/diffusion.js +0 -3
  73. package/src/converter/quantization-info.js +35 -15
  74. package/src/converter/shard-packer.d.ts +1 -1
  75. package/src/converter/shard-packer.js +4 -1
  76. package/src/debug/config.js +123 -11
  77. package/src/debug/signals.js +7 -1
  78. package/src/debug/tensor.d.ts +2 -0
  79. package/src/debug/tensor.js +13 -2
  80. package/src/distribution/p2p-control-plane.js +52 -12
  81. package/src/distribution/p2p-observability.js +43 -7
  82. package/src/distribution/p2p-webrtc-browser.js +20 -0
  83. package/src/distribution/shard-delivery.js +77 -26
  84. package/src/formats/gguf/types.js +33 -16
  85. package/src/formats/rdrr/groups.d.ts +12 -4
  86. package/src/formats/rdrr/groups.js +3 -6
  87. package/src/formats/rdrr/parsing.js +39 -2
  88. package/src/formats/rdrr/types.d.ts +2 -1
  89. package/src/gpu/command-recorder.js +86 -61
  90. package/src/gpu/device.d.ts +1 -0
  91. package/src/gpu/device.js +73 -19
  92. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  93. package/src/gpu/kernel-tuner/cache.js +71 -4
  94. package/src/gpu/kernel-tuner/tuner.js +22 -4
  95. package/src/gpu/kernels/attention.js +15 -34
  96. package/src/gpu/kernels/backward/adam.js +62 -58
  97. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  98. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  99. package/src/gpu/kernels/cast.js +191 -149
  100. package/src/gpu/kernels/check-stop.js +33 -44
  101. package/src/gpu/kernels/conv2d.js +27 -17
  102. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  103. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  104. package/src/gpu/kernels/dequant.js +178 -126
  105. package/src/gpu/kernels/energy.d.ts +3 -21
  106. package/src/gpu/kernels/energy.js +111 -88
  107. package/src/gpu/kernels/feature-check.js +1 -1
  108. package/src/gpu/kernels/fused_ffn.js +84 -65
  109. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  110. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  111. package/src/gpu/kernels/gather.js +33 -15
  112. package/src/gpu/kernels/gelu.js +19 -11
  113. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  114. package/src/gpu/kernels/groupnorm.js +34 -23
  115. package/src/gpu/kernels/kv-quantize.js +5 -2
  116. package/src/gpu/kernels/layernorm.js +35 -19
  117. package/src/gpu/kernels/logit-merge.js +5 -3
  118. package/src/gpu/kernels/matmul.js +58 -39
  119. package/src/gpu/kernels/modulate.js +23 -15
  120. package/src/gpu/kernels/moe.js +221 -175
  121. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  122. package/src/gpu/kernels/relu.js +18 -10
  123. package/src/gpu/kernels/repeat_channels.js +25 -17
  124. package/src/gpu/kernels/residual.js +37 -27
  125. package/src/gpu/kernels/rmsnorm.js +57 -41
  126. package/src/gpu/kernels/rope.js +3 -0
  127. package/src/gpu/kernels/sample.js +27 -38
  128. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  129. package/src/gpu/kernels/scale.js +18 -11
  130. package/src/gpu/kernels/shader-cache.js +4 -2
  131. package/src/gpu/kernels/silu.js +120 -72
  132. package/src/gpu/kernels/softmax.js +44 -25
  133. package/src/gpu/kernels/split_qkv.js +23 -13
  134. package/src/gpu/kernels/transpose.js +18 -10
  135. package/src/gpu/kernels/transpose.wgsl +5 -3
  136. package/src/gpu/kernels/upsample2d.js +21 -13
  137. package/src/gpu/kernels/utils.js +20 -13
  138. package/src/gpu/partitioned-buffer-pool.js +10 -2
  139. package/src/gpu/perf-guards.js +2 -9
  140. package/src/gpu/profiler.js +27 -22
  141. package/src/gpu/readback-utils.d.ts +16 -0
  142. package/src/gpu/readback-utils.js +41 -0
  143. package/src/gpu/submit-tracker.js +13 -0
  144. package/src/gpu/uniform-cache.d.ts +1 -0
  145. package/src/gpu/uniform-cache.js +30 -9
  146. package/src/hotswap/intent-bundle.js +6 -0
  147. package/src/hotswap/manifest.d.ts +10 -1
  148. package/src/hotswap/manifest.js +12 -2
  149. package/src/hotswap/runtime.js +30 -8
  150. package/src/index-browser.d.ts +44 -0
  151. package/src/index-browser.js +14 -0
  152. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  153. package/src/inference/browser-harness-contract-helpers.js +28 -0
  154. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  155. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  156. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  157. package/src/inference/browser-harness-model-helpers.js +217 -0
  158. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  159. package/src/inference/browser-harness-report-helpers.js +42 -0
  160. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  161. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  162. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  163. package/src/inference/browser-harness-suite-helpers.js +268 -0
  164. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  165. package/src/inference/browser-harness-text-helpers.js +788 -0
  166. package/src/inference/browser-harness.d.ts +6 -0
  167. package/src/inference/browser-harness.js +130 -1996
  168. package/src/inference/kv-cache/base.js +140 -94
  169. package/src/inference/kv-cache/tiered.js +5 -3
  170. package/src/inference/moe-router.js +88 -56
  171. package/src/inference/multi-model-network.js +5 -3
  172. package/src/inference/network-evolution.d.ts +11 -2
  173. package/src/inference/network-evolution.js +20 -21
  174. package/src/inference/pipelines/context.d.ts +3 -0
  175. package/src/inference/pipelines/context.js +142 -2
  176. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  177. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  178. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  179. package/src/inference/pipelines/diffusion/vae.js +3 -7
  180. package/src/inference/pipelines/energy/pipeline.js +27 -21
  181. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  182. package/src/inference/pipelines/energy/quintel.js +11 -0
  183. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  184. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  185. package/src/inference/pipelines/text/attention/projections.js +151 -101
  186. package/src/inference/pipelines/text/attention/record.js +62 -8
  187. package/src/inference/pipelines/text/attention/run.js +62 -8
  188. package/src/inference/pipelines/text/config.js +3 -4
  189. package/src/inference/pipelines/text/embed.js +2 -8
  190. package/src/inference/pipelines/text/execution-plan.js +41 -19
  191. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  192. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  193. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  194. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  195. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  196. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  197. package/src/inference/pipelines/text/generator-steps.js +298 -207
  198. package/src/inference/pipelines/text/generator.js +6 -23
  199. package/src/inference/pipelines/text/init.js +78 -20
  200. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  201. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  202. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  203. package/src/inference/pipelines/text/layer.js +3 -9
  204. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  205. package/src/inference/pipelines/text/linear-attention.js +80 -6
  206. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  207. package/src/inference/pipelines/text/logits/index.js +10 -11
  208. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  209. package/src/inference/pipelines/text/logits/utils.js +9 -0
  210. package/src/inference/pipelines/text/lora-apply.js +50 -32
  211. package/src/inference/pipelines/text/model-load.js +279 -104
  212. package/src/inference/pipelines/text/moe-cache.js +5 -4
  213. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  214. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  215. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  216. package/src/inference/pipelines/text/ops.js +90 -90
  217. package/src/inference/pipelines/text/probes.js +9 -9
  218. package/src/inference/pipelines/text/weights.js +17 -7
  219. package/src/inference/pipelines/text.js +13 -1
  220. package/src/inference/speculative.d.ts +2 -2
  221. package/src/inference/speculative.js +4 -18
  222. package/src/inference/test-harness.d.ts +1 -1
  223. package/src/inference/test-harness.js +15 -5
  224. package/src/inference/tokenizer.d.ts +0 -5
  225. package/src/inference/tokenizer.js +4 -23
  226. package/src/inference/tokenizers/bpe.js +9 -0
  227. package/src/inference/tokenizers/bundled.js +20 -0
  228. package/src/inference/tokenizers/sentencepiece.js +12 -0
  229. package/src/loader/doppler-loader.js +38 -22
  230. package/src/loader/dtype-utils.js +3 -44
  231. package/src/loader/embedding-loader.js +7 -3
  232. package/src/loader/experts/expert-cache.js +13 -6
  233. package/src/loader/experts/expert-loader.js +10 -6
  234. package/src/loader/final-weights-loader.js +8 -4
  235. package/src/loader/layer-loader.js +2 -1
  236. package/src/loader/loader-state.js +2 -2
  237. package/src/loader/memory-monitor.js +8 -0
  238. package/src/loader/multi-model-loader.d.ts +14 -0
  239. package/src/loader/multi-model-loader.js +70 -24
  240. package/src/loader/shard-cache.js +81 -12
  241. package/src/loader/shard-resolver.js +25 -3
  242. package/src/loader/tensors/tensor-loader.js +209 -144
  243. package/src/loader/tensors/tensor-reader.js +76 -19
  244. package/src/loader/weight-downcast.js +1 -1
  245. package/src/memory/buffer-pool.d.ts +9 -1
  246. package/src/memory/buffer-pool.js +109 -44
  247. package/src/memory/unified-detect.js +1 -1
  248. package/src/rules/inference/kernel-path.rules.json +24 -8
  249. package/src/rules/rule-registry.js +25 -1
  250. package/src/storage/backends/opfs-store.js +68 -24
  251. package/src/storage/downloader.js +364 -83
  252. package/src/storage/index.d.ts +3 -0
  253. package/src/storage/index.js +3 -0
  254. package/src/storage/preflight.d.ts +2 -2
  255. package/src/storage/preflight.js +24 -2
  256. package/src/storage/quickstart-downloader.js +11 -5
  257. package/src/storage/registry.js +10 -4
  258. package/src/storage/reports.js +1 -1
  259. package/src/storage/shard-manager.d.ts +15 -1
  260. package/src/storage/shard-manager.js +51 -3
  261. package/src/storage/source-artifact-store.d.ts +52 -0
  262. package/src/storage/source-artifact-store.js +234 -0
  263. package/src/tooling/command-api-constants.d.ts +9 -0
  264. package/src/tooling/command-api-constants.js +9 -0
  265. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  266. package/src/tooling/command-api-family-normalizers.js +343 -0
  267. package/src/tooling/command-api-helpers.d.ts +25 -0
  268. package/src/tooling/command-api-helpers.js +262 -0
  269. package/src/tooling/command-api.js +16 -602
  270. package/src/tooling/command-envelope.js +4 -1
  271. package/src/tooling/command-runner-shared.js +52 -18
  272. package/src/tooling/lean-execution-contract.js +150 -3
  273. package/src/tooling/node-browser-command-runner.js +161 -271
  274. package/src/tooling/node-command-runner.js +29 -3
  275. package/src/tooling/node-converter.js +27 -1
  276. package/src/tooling/node-source-runtime.d.ts +1 -1
  277. package/src/tooling/node-source-runtime.js +84 -3
  278. package/src/tooling/node-webgpu.js +24 -21
  279. package/src/tooling/opfs-cache.js +21 -4
  280. package/src/tooling/runtime-input-composition.d.ts +38 -0
  281. package/src/tooling/runtime-input-composition.js +86 -0
  282. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  283. package/src/tooling/source-runtime-bundle.js +261 -34
  284. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  285. package/src/tooling/source-runtime-materializer.js +93 -0
  286. package/src/training/attention-backward.js +32 -17
  287. package/src/training/autograd.js +80 -52
  288. package/src/training/checkpoint-watch.d.ts +2 -1
  289. package/src/training/checkpoint-watch.js +39 -6
  290. package/src/training/checkpoint.js +40 -11
  291. package/src/training/clip.js +2 -1
  292. package/src/training/datasets/token-batch.js +20 -8
  293. package/src/training/distillation/checkpoint-watch.js +1 -0
  294. package/src/training/distillation/student-fixture.d.ts +22 -0
  295. package/src/training/distillation/student-fixture.js +846 -0
  296. package/src/training/distillation/suite-data.d.ts +45 -0
  297. package/src/training/distillation/suite-data.js +189 -0
  298. package/src/training/lora-pipeline.js +4 -7
  299. package/src/training/lora.js +26 -12
  300. package/src/training/loss.js +5 -6
  301. package/src/training/objectives/cross_entropy.js +2 -5
  302. package/src/training/objectives/distill_kd.js +4 -8
  303. package/src/training/objectives/distill_triplet.js +4 -8
  304. package/src/training/objectives/ul_stage2_base.js +4 -8
  305. package/src/training/operator-command.js +2 -0
  306. package/src/training/optimizer.js +19 -7
  307. package/src/training/runner.js +2 -1
  308. package/src/training/suite.js +18 -978
  309. package/src/training/tensor-factory.d.ts +9 -0
  310. package/src/training/tensor-factory.js +13 -0
  311. package/src/training/trainer.js +3 -5
  312. package/src/training/ul_dataset.js +3 -5
  313. package/src/training/workloads.js +70 -79
  314. package/src/version.js +1 -1
  315. package/tools/convert-safetensors-node.js +22 -16
  316. package/tools/doppler-cli.js +44 -25
@@ -0,0 +1,788 @@
1
+ import { selectRuleValue } from '../rules/rule-registry.js';
2
+ import { isPlainObject } from '../utils/plain-object.js';
3
+
4
+ const DEFAULT_HARNESS_PROMPT = 'Summarize this input in one sentence.';
5
+ const DEFAULT_RUNTIME_PLACEHOLDER_PROMPT = 'Hello from Doppler.';
6
+ const DEFAULT_QWEN_PROMPT = Object.freeze({
7
+ messages: Object.freeze([
8
+ Object.freeze({
9
+ role: 'user',
10
+ content: 'Answer in one short sentence: What color is the sky on a clear day?',
11
+ }),
12
+ ]),
13
+ });
14
+ const DEFAULT_TRANSLATEGEMMA_PROMPT = Object.freeze({
15
+ messages: Object.freeze([
16
+ Object.freeze({
17
+ role: 'user',
18
+ content: Object.freeze([
19
+ Object.freeze({
20
+ type: 'text',
21
+ source_lang_code: 'en',
22
+ target_lang_code: 'fr',
23
+ text: 'Hello world.',
24
+ }),
25
+ ]),
26
+ }),
27
+ ]),
28
+ });
29
+ const DEFAULT_HARNESS_MAX_TOKENS = 32;
30
+ const EMBEDDING_PREVIEW_LENGTH = 16;
31
+ const GENERATION_TOKEN_DIAGNOSTIC_LIMIT = 32;
32
+ const EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1 = 0.67;
33
+ const EMBEDDING_SEMANTIC_MIN_PAIR_ACC = 0.67;
34
+ const EMBEDDING_SEMANTIC_PAIR_MARGIN = 0.01;
35
+
36
+ const EMBEDDING_SEMANTIC_RETRIEVAL_CASES = Object.freeze([
37
+ Object.freeze({
38
+ id: 'library_search',
39
+ query: 'Where can I borrow books and study quietly?',
40
+ docs: Object.freeze([
41
+ 'The city library lends books, provides study rooms, and offers free Wi-Fi.',
42
+ 'The cafe serves coffee, pastries, and sandwiches all day.',
43
+ 'The bike repair shop fixes flat tires and broken chains.',
44
+ ]),
45
+ expectedDoc: 0,
46
+ }),
47
+ Object.freeze({
48
+ id: 'password_reset',
49
+ query: 'How do I reset my account password?',
50
+ docs: Object.freeze([
51
+ 'To reset your password, open account settings and choose the forgot-password flow.',
52
+ 'Our shipping policy explains delivery timelines and tracking updates.',
53
+ 'The recipe combines tomatoes, basil, and olive oil.',
54
+ ]),
55
+ expectedDoc: 0,
56
+ }),
57
+ Object.freeze({
58
+ id: 'damaged_package',
59
+ query: 'What should I do if my package arrives damaged?',
60
+ docs: Object.freeze([
61
+ 'Contact support within seven days with photos to request a replacement for damaged items.',
62
+ 'The concert starts at 8 PM at the downtown arena.',
63
+ 'Plant roses in spring and water them twice a week.',
64
+ ]),
65
+ expectedDoc: 0,
66
+ }),
67
+ Object.freeze({
68
+ id: 'flight_change_policy',
69
+ query: 'Can I change my flight after booking?',
70
+ docs: Object.freeze([
71
+ 'The museum opens daily at 10 AM and offers guided tours on weekends.',
72
+ 'You can change your flight in Manage Booking up to 24 hours before departure, with any fare difference applied.',
73
+ 'Our gym membership includes group classes and access to the pool.',
74
+ ]),
75
+ expectedDoc: 1,
76
+ }),
77
+ Object.freeze({
78
+ id: 'wifi_troubleshoot',
79
+ query: 'Why does my home Wi-Fi keep disconnecting?',
80
+ docs: Object.freeze([
81
+ 'The dessert menu includes cheesecake, brownies, and fruit tart.',
82
+ 'You can review your recent orders in your account purchase history.',
83
+ 'Frequent Wi-Fi drops can be fixed by restarting the router, updating firmware, and changing the wireless channel.',
84
+ ]),
85
+ expectedDoc: 2,
86
+ }),
87
+ Object.freeze({
88
+ id: 'refund_deadline',
89
+ query: 'How long do I have to request a refund?',
90
+ docs: Object.freeze([
91
+ 'Refund requests are accepted within 30 days of purchase when the item is in original condition.',
92
+ 'The conference keynote starts at 9 AM in the main hall.',
93
+ 'Use a medium grind when brewing coffee with a drip machine.',
94
+ ]),
95
+ expectedDoc: 0,
96
+ }),
97
+ Object.freeze({
98
+ id: 'passport_renewal_docs',
99
+ query: 'What documents do I need to renew a passport?',
100
+ docs: Object.freeze([
101
+ 'To care for houseplants, water only when the top soil is dry.',
102
+ 'Passport renewal usually requires the application form, current passport, compliant photo, and payment.',
103
+ 'The train to downtown runs every 20 minutes during peak hours.',
104
+ ]),
105
+ expectedDoc: 1,
106
+ }),
107
+ ]);
108
+
109
+ const EMBEDDING_SEMANTIC_PAIR_CASES = Object.freeze([
110
+ Object.freeze({
111
+ id: 'bike_paraphrase',
112
+ anchor: 'The child is riding a bicycle through the park.',
113
+ positive: 'A kid bikes along a path in the park.',
114
+ negative: 'The stock market closed lower after interest-rate news.',
115
+ }),
116
+ Object.freeze({
117
+ id: 'cancel_subscription',
118
+ anchor: 'Please cancel my subscription before renewal.',
119
+ positive: 'I want to stop the plan so it does not renew.',
120
+ negative: 'The mountain trail is closed after heavy snow.',
121
+ }),
122
+ Object.freeze({
123
+ id: 'battery_drain',
124
+ anchor: 'The laptop battery drains very quickly.',
125
+ positive: 'My notebook loses charge fast.',
126
+ negative: 'This pasta sauce tastes sweet and spicy.',
127
+ }),
128
+ Object.freeze({
129
+ id: 'order_tracking',
130
+ anchor: 'I need to track where my order is.',
131
+ positive: 'How can I check my package delivery status?',
132
+ negative: 'The violin concerto was composed in the 1800s.',
133
+ }),
134
+ Object.freeze({
135
+ id: 'account_lockout',
136
+ anchor: 'My account is locked after too many login attempts.',
137
+ positive: 'I cannot sign in because the system temporarily blocked my account.',
138
+ negative: 'Bake the cake at 350 degrees for thirty minutes.',
139
+ }),
140
+ Object.freeze({
141
+ id: 'invoice_request',
142
+ anchor: 'Please send me the invoice for last month.',
143
+ positive: 'Can you provide the billing statement for the previous month?',
144
+ negative: 'The hiking trail follows the river for five miles.',
145
+ }),
146
+ Object.freeze({
147
+ id: 'slow_internet',
148
+ anchor: 'The internet speed is much slower tonight.',
149
+ positive: 'My connection is unusually slow this evening.',
150
+ negative: 'The novel explores themes of memory and loss.',
151
+ }),
152
+ ]);
153
+
154
+ function asText(value) {
155
+ if (typeof value !== 'string') return null;
156
+ const trimmed = value.trim();
157
+ return trimmed || null;
158
+ }
159
+
160
+ function normalizeRetrievalFixtures(cases) {
161
+ if (!Array.isArray(cases)) return null;
162
+ const normalized = [];
163
+ for (let i = 0; i < cases.length; i++) {
164
+ const entry = cases[i];
165
+ if (!entry || typeof entry !== 'object') continue;
166
+
167
+ const query = asText(entry.query);
168
+ const docs = Array.isArray(entry.docs) ? entry.docs.map(asText).filter(Boolean) : [];
169
+ if (!query || docs.length === 0 || !Number.isFinite(entry.expectedDoc)) {
170
+ continue;
171
+ }
172
+ const expectedDoc = Math.floor(entry.expectedDoc);
173
+ normalized.push({
174
+ id: asText(entry.id) ?? `case-${i + 1}`,
175
+ query,
176
+ docs,
177
+ expectedDoc: Math.max(0, Math.min(expectedDoc, docs.length - 1)),
178
+ });
179
+ }
180
+ return normalized.length > 0 ? normalized : null;
181
+ }
182
+
183
+ function normalizePairFixtures(cases) {
184
+ if (!Array.isArray(cases)) return null;
185
+ const normalized = [];
186
+ for (let i = 0; i < cases.length; i++) {
187
+ const entry = cases[i];
188
+ if (!entry || typeof entry !== 'object') continue;
189
+
190
+ const anchor = asText(entry.anchor);
191
+ const positive = asText(entry.positive);
192
+ const negative = asText(entry.negative);
193
+ if (!anchor || !positive || !negative) {
194
+ continue;
195
+ }
196
+ normalized.push({
197
+ id: asText(entry.id) ?? `pair-${i + 1}`,
198
+ anchor,
199
+ positive,
200
+ negative,
201
+ });
202
+ }
203
+ return normalized.length > 0 ? normalized : null;
204
+ }
205
+
206
+ function resolveEmbeddingSemanticFixtures(runtimeConfig, options = null) {
207
+ const overrides = isPlainObject(options?.embeddingSemantic)
208
+ ? options.embeddingSemantic
209
+ : null;
210
+ const runtimeOverrides = runtimeConfig?.shared?.benchmark?.run?.embeddingSemantic;
211
+ const source = overrides ?? (isPlainObject(runtimeOverrides) ? runtimeOverrides : null);
212
+
213
+ const retrievalCases = normalizeRetrievalFixtures(source?.retrievalCases)
214
+ ?? EMBEDDING_SEMANTIC_RETRIEVAL_CASES;
215
+ const pairCases = normalizePairFixtures(source?.pairCases)
216
+ ?? EMBEDDING_SEMANTIC_PAIR_CASES;
217
+ const minRetrievalTop1Acc = Number.isFinite(source?.minRetrievalTop1Acc)
218
+ ? Math.max(0, Math.min(1, Number(source.minRetrievalTop1Acc)))
219
+ : EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1;
220
+ const minPairAcc = Number.isFinite(source?.minPairAcc)
221
+ ? Math.max(0, Math.min(1, Number(source.minPairAcc)))
222
+ : EMBEDDING_SEMANTIC_MIN_PAIR_ACC;
223
+ const pairMargin = Number.isFinite(source?.pairMargin)
224
+ ? Number(source.pairMargin)
225
+ : EMBEDDING_SEMANTIC_PAIR_MARGIN;
226
+
227
+ return {
228
+ retrievalCases,
229
+ pairCases,
230
+ minRetrievalTop1Acc,
231
+ minPairAcc,
232
+ pairMargin,
233
+ };
234
+ }
235
+
236
+ function resolveEmbeddingSemanticStyle(pipeline) {
237
+ const manifest = pipeline?.manifest ?? null;
238
+ const style = selectRuleValue('inference', 'config', 'embeddingSemanticStyle', {
239
+ modelId: String(manifest?.modelId ?? '').toLowerCase(),
240
+ presetId: String(manifest?.inference?.presetId ?? '').toLowerCase(),
241
+ manifestModelType: String(
242
+ manifest?.config?.model_type
243
+ ?? manifest?.config?.text_config?.model_type
244
+ ?? ''
245
+ ).toLowerCase(),
246
+ });
247
+ if (typeof style === 'string' && style.length > 0) {
248
+ return style;
249
+ }
250
+ return 'default';
251
+ }
252
+
253
+ function formatEmbeddingSemanticText(text, kind, style) {
254
+ if (style === 'embeddinggemma') {
255
+ if (kind === 'query') {
256
+ return `task: search result | query: ${text}`;
257
+ }
258
+ if (kind === 'document') {
259
+ return `title: None | text: ${text}`;
260
+ }
261
+ }
262
+ return text;
263
+ }
264
+
265
+ export function resolvePrompt(runtimeConfig) {
266
+ const runtimePrompt = runtimeConfig?.inference?.prompt;
267
+ if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
268
+ return runtimePrompt.trim();
269
+ }
270
+ return DEFAULT_HARNESS_PROMPT;
271
+ }
272
+
273
+ function isStructuredPromptInput(value) {
274
+ return Array.isArray(value) || (value != null && typeof value === 'object');
275
+ }
276
+
277
+ function clonePromptInput(promptInput) {
278
+ if (!isStructuredPromptInput(promptInput)) {
279
+ return promptInput;
280
+ }
281
+ if (typeof structuredClone === 'function') {
282
+ return structuredClone(promptInput);
283
+ }
284
+ return JSON.parse(JSON.stringify(promptInput));
285
+ }
286
+
287
+ function resolvePromptTemplateType(source) {
288
+ const sourceTemplateType = asText(source?.chatTemplateType);
289
+ if (sourceTemplateType) {
290
+ return sourceTemplateType;
291
+ }
292
+ const modelConfigTemplateType = asText(source?.modelConfig?.chatTemplateType);
293
+ if (modelConfigTemplateType) {
294
+ return modelConfigTemplateType;
295
+ }
296
+ return asText(source?.manifest?.inference?.chatTemplate?.type);
297
+ }
298
+
299
+ function buildDefaultGenerationPrompt(templateType) {
300
+ if (templateType === 'qwen') {
301
+ return clonePromptInput(DEFAULT_QWEN_PROMPT);
302
+ }
303
+ if (templateType === 'translategemma') {
304
+ return clonePromptInput(DEFAULT_TRANSLATEGEMMA_PROMPT);
305
+ }
306
+ return DEFAULT_HARNESS_PROMPT;
307
+ }
308
+
309
+ function shouldPreferModelDefaultPrompt(runtimePrompt, templateType) {
310
+ if (templateType !== 'translategemma' && templateType !== 'qwen') {
311
+ return false;
312
+ }
313
+ if (typeof runtimePrompt !== 'string') {
314
+ return false;
315
+ }
316
+ return runtimePrompt.trim() === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT;
317
+ }
318
+
319
+ function assertPromptContract(runtimePrompt, templateType, source = 'runtime.inference.prompt') {
320
+ if (templateType !== 'translategemma') {
321
+ return;
322
+ }
323
+ if (runtimePrompt === undefined || runtimePrompt === null) {
324
+ return;
325
+ }
326
+ if (typeof runtimePrompt === 'string') {
327
+ const trimmed = runtimePrompt.trim();
328
+ if (!trimmed || trimmed === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT) {
329
+ return;
330
+ }
331
+ throw new Error(
332
+ `TranslateGemma harness prompt contract violation: ${source} must be ` +
333
+ '{ messages: [...] } with source_lang_code/target_lang_code blocks, not a plain string.'
334
+ );
335
+ }
336
+ if (!isStructuredPromptInput(runtimePrompt)) {
337
+ throw new Error(
338
+ `TranslateGemma harness prompt contract violation: ${source} must be ` +
339
+ '{ messages: [...] } with source_lang_code/target_lang_code blocks.'
340
+ );
341
+ }
342
+ }
343
+
344
+ function describePromptInput(promptInput) {
345
+ if (typeof promptInput === 'string') {
346
+ return promptInput.trim() || DEFAULT_HARNESS_PROMPT;
347
+ }
348
+ const firstMessage = Array.isArray(promptInput?.messages)
349
+ ? promptInput.messages[0]
350
+ : null;
351
+ const firstContent = Array.isArray(firstMessage?.content)
352
+ ? firstMessage.content[0]
353
+ : null;
354
+ const sourceLang = asText(firstContent?.source_lang_code);
355
+ const targetLang = asText(firstContent?.target_lang_code);
356
+ const text = asText(firstContent?.text);
357
+ if (sourceLang && targetLang) {
358
+ return `${sourceLang} -> ${targetLang}: ${text || '[non-text request]'}`;
359
+ }
360
+ const stringContent = asText(firstMessage?.content);
361
+ if (stringContent) {
362
+ const role = asText(firstMessage?.role) || 'user';
363
+ return `${role}: ${stringContent}`;
364
+ }
365
+ try {
366
+ return JSON.stringify(promptInput);
367
+ } catch {
368
+ return '[structured prompt]';
369
+ }
370
+ }
371
+
372
+ function resolveGenerationPromptInput(runtimeConfig, runOverrides = null, source = null) {
373
+ const templateType = resolvePromptTemplateType(source);
374
+ const overridePrompt = runOverrides?.prompt;
375
+ assertPromptContract(overridePrompt, templateType, 'runOverrides.prompt');
376
+ if (typeof overridePrompt === 'string' && overridePrompt.trim()) {
377
+ return overridePrompt.trim();
378
+ }
379
+ if (isStructuredPromptInput(overridePrompt)) {
380
+ return clonePromptInput(overridePrompt);
381
+ }
382
+
383
+ const runtimePrompt = runtimeConfig?.inference?.prompt;
384
+ assertPromptContract(runtimePrompt, templateType, 'runtimeConfig.inference.prompt');
385
+ if (shouldPreferModelDefaultPrompt(runtimePrompt, templateType)) {
386
+ return buildDefaultGenerationPrompt(templateType);
387
+ }
388
+ if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
389
+ return runtimePrompt.trim();
390
+ }
391
+ if (isStructuredPromptInput(runtimePrompt)) {
392
+ return clonePromptInput(runtimePrompt);
393
+ }
394
+
395
+ return buildDefaultGenerationPrompt(templateType);
396
+ }
397
+
398
+ function resolveMaxTokens(runtimeConfig) {
399
+ const runtimeMax = runtimeConfig?.inference?.batching?.maxTokens;
400
+ if (Number.isFinite(runtimeMax)) {
401
+ return Math.max(1, Math.floor(runtimeMax));
402
+ }
403
+ return DEFAULT_HARNESS_MAX_TOKENS;
404
+ }
405
+
406
+ export function resolveBenchmarkRunSettings(runtimeConfig, source = null) {
407
+ const benchConfig = runtimeConfig?.shared?.benchmark?.run || {};
408
+ const runtimeSampling = isPlainObject(runtimeConfig?.inference?.sampling)
409
+ ? runtimeConfig.inference.sampling
410
+ : {};
411
+ const benchSampling = isPlainObject(benchConfig?.sampling)
412
+ ? benchConfig.sampling
413
+ : {};
414
+ const promptInput = typeof benchConfig.customPrompt === 'string' && benchConfig.customPrompt.trim()
415
+ ? benchConfig.customPrompt.trim()
416
+ : resolveGenerationPromptInput(runtimeConfig, null, source);
417
+ const maxTokens = Number.isFinite(benchConfig.maxNewTokens)
418
+ ? Math.max(1, Math.floor(benchConfig.maxNewTokens))
419
+ : resolveMaxTokens(runtimeConfig);
420
+
421
+ return {
422
+ warmupRuns: Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0)),
423
+ timedRuns: Math.max(1, Math.floor(benchConfig.timedRuns ?? 1)),
424
+ prompt: promptInput,
425
+ promptLabel: describePromptInput(promptInput),
426
+ maxTokens,
427
+ sampling: {
428
+ ...runtimeSampling,
429
+ ...benchSampling,
430
+ },
431
+ };
432
+ }
433
+
434
+ function summarizeEmbeddingValues(embedding) {
435
+ const values = ArrayBuffer.isView(embedding) || Array.isArray(embedding) ? embedding : null;
436
+ const embeddingDim = Number.isFinite(values?.length) ? values.length : 0;
437
+ const preview = [];
438
+
439
+ let nonFiniteCount = 0;
440
+ let finiteCount = 0;
441
+ let min = Infinity;
442
+ let max = -Infinity;
443
+ let maxAbs = 0;
444
+ let sum = 0;
445
+ let sumSq = 0;
446
+
447
+ for (let i = 0; i < embeddingDim; i++) {
448
+ const value = Number(values[i]);
449
+ if (preview.length < EMBEDDING_PREVIEW_LENGTH) {
450
+ preview.push(Number.isFinite(value) ? Number(value.toFixed(6)) : null);
451
+ }
452
+ if (!Number.isFinite(value)) {
453
+ nonFiniteCount++;
454
+ continue;
455
+ }
456
+ finiteCount++;
457
+ if (value < min) min = value;
458
+ if (value > max) max = value;
459
+ const abs = Math.abs(value);
460
+ if (abs > maxAbs) maxAbs = abs;
461
+ sum += value;
462
+ sumSq += value * value;
463
+ }
464
+
465
+ const mean = finiteCount > 0 ? (sum / finiteCount) : null;
466
+ const variance = finiteCount > 0 ? Math.max(0, (sumSq / finiteCount) - ((mean || 0) * (mean || 0))) : null;
467
+ const stdDev = variance == null ? null : Math.sqrt(variance);
468
+ const l2Norm = finiteCount > 0 ? Math.sqrt(sumSq) : null;
469
+ const finiteRatio = embeddingDim > 0 ? finiteCount / embeddingDim : 0;
470
+
471
+ return {
472
+ embeddingDim,
473
+ nonFiniteCount,
474
+ finiteCount,
475
+ finiteRatio,
476
+ min: finiteCount > 0 ? min : null,
477
+ max: finiteCount > 0 ? max : null,
478
+ maxAbs: finiteCount > 0 ? maxAbs : null,
479
+ mean,
480
+ stdDev,
481
+ l2Norm,
482
+ preview,
483
+ };
484
+ }
485
+
486
+ function cosineSimilarity(a, b) {
487
+ if (!a || !b || !Number.isFinite(a.length) || !Number.isFinite(b.length)) return NaN;
488
+ if (a.length !== b.length || a.length === 0) return NaN;
489
+ let dot = 0;
490
+ let normA = 0;
491
+ let normB = 0;
492
+ for (let i = 0; i < a.length; i++) {
493
+ const av = Number(a[i]);
494
+ const bv = Number(b[i]);
495
+ if (!Number.isFinite(av) || !Number.isFinite(bv)) return NaN;
496
+ dot += av * bv;
497
+ normA += av * av;
498
+ normB += bv * bv;
499
+ }
500
+ if (normA <= 0 || normB <= 0) return NaN;
501
+ return dot / Math.sqrt(normA * normB);
502
+ }
503
+
504
+ function top1Index(values) {
505
+ let best = -1;
506
+ let bestValue = -Infinity;
507
+ for (let i = 0; i < values.length; i++) {
508
+ const value = Number(values[i]);
509
+ if (!Number.isFinite(value)) continue;
510
+ if (value > bestValue) {
511
+ bestValue = value;
512
+ best = i;
513
+ }
514
+ }
515
+ return best;
516
+ }
517
+
518
+ async function embedStandaloneText(pipeline, text) {
519
+ pipeline.reset?.();
520
+ const result = await pipeline.embed(text);
521
+ const embedding = result?.embedding;
522
+ if (!embedding || !Number.isFinite(embedding.length) || embedding.length <= 0) {
523
+ throw new Error('Semantic check embedding is missing.');
524
+ }
525
+ return embedding;
526
+ }
527
+
528
+ export async function runEmbeddingSemanticChecks(pipeline, options = null) {
529
+ const config = resolveEmbeddingSemanticFixtures(
530
+ pipeline?.runtimeConfig ?? {},
531
+ options
532
+ );
533
+ const start = performance.now();
534
+ const semanticStyle = resolveEmbeddingSemanticStyle(pipeline);
535
+ const retrieval = [];
536
+ let retrievalPassed = 0;
537
+
538
+ for (const testCase of config.retrievalCases) {
539
+ const queryEmbedding = await embedStandaloneText(
540
+ pipeline,
541
+ formatEmbeddingSemanticText(testCase.query, 'query', semanticStyle)
542
+ );
543
+ const docEmbeddings = [];
544
+ for (const doc of testCase.docs) {
545
+ docEmbeddings.push(await embedStandaloneText(
546
+ pipeline,
547
+ formatEmbeddingSemanticText(doc, 'document', semanticStyle)
548
+ ));
549
+ }
550
+ const sims = docEmbeddings.map((docEmbedding) => cosineSimilarity(queryEmbedding, docEmbedding));
551
+ const topDoc = top1Index(sims);
552
+ const passed = topDoc === testCase.expectedDoc;
553
+ if (passed) retrievalPassed++;
554
+ retrieval.push({
555
+ id: testCase.id,
556
+ passed,
557
+ expectedDoc: testCase.expectedDoc,
558
+ topDoc,
559
+ sims: sims.map((v) => (Number.isFinite(v) ? Number(v.toFixed(6)) : null)),
560
+ });
561
+ }
562
+
563
+ const pairs = [];
564
+ let pairPassed = 0;
565
+ for (const testCase of config.pairCases) {
566
+ const anchor = await embedStandaloneText(
567
+ pipeline,
568
+ formatEmbeddingSemanticText(testCase.anchor, 'query', semanticStyle)
569
+ );
570
+ const positive = await embedStandaloneText(
571
+ pipeline,
572
+ formatEmbeddingSemanticText(testCase.positive, 'query', semanticStyle)
573
+ );
574
+ const negative = await embedStandaloneText(
575
+ pipeline,
576
+ formatEmbeddingSemanticText(testCase.negative, 'query', semanticStyle)
577
+ );
578
+ const simPos = cosineSimilarity(anchor, positive);
579
+ const simNeg = cosineSimilarity(anchor, negative);
580
+ const margin = simPos - simNeg;
581
+ const passed = Number.isFinite(margin) && margin > config.pairMargin;
582
+ if (passed) pairPassed++;
583
+ pairs.push({
584
+ id: testCase.id,
585
+ passed,
586
+ simPos: Number.isFinite(simPos) ? Number(simPos.toFixed(6)) : null,
587
+ simNeg: Number.isFinite(simNeg) ? Number(simNeg.toFixed(6)) : null,
588
+ margin: Number.isFinite(margin) ? Number(margin.toFixed(6)) : null,
589
+ });
590
+ }
591
+
592
+ const retrievalTop1Acc = retrieval.length > 0 ? retrievalPassed / retrieval.length : 0;
593
+ const pairAcc = pairs.length > 0 ? pairPassed / pairs.length : 0;
594
+ const passed = retrievalTop1Acc >= config.minRetrievalTop1Acc
595
+ && pairAcc >= config.minPairAcc;
596
+ const failedCaseIds = [
597
+ ...retrieval.filter((item) => !item.passed).map((item) => `retrieval:${item.id}`),
598
+ ...pairs.filter((item) => !item.passed).map((item) => `pair:${item.id}`),
599
+ ];
600
+
601
+ return {
602
+ passed,
603
+ style: semanticStyle,
604
+ retrievalTop1Acc,
605
+ pairAcc,
606
+ retrievalPassed,
607
+ retrievalTotal: retrieval.length,
608
+ pairPassed,
609
+ pairTotal: pairs.length,
610
+ minRetrievalTop1Acc: Number(config.minRetrievalTop1Acc.toFixed(4)),
611
+ minPairAcc: Number(config.minPairAcc.toFixed(4)),
612
+ pairMarginThreshold: Number(config.pairMargin.toFixed(4)),
613
+ failedCaseIds,
614
+ retrieval,
615
+ pairs,
616
+ durationMs: Math.max(1, performance.now() - start),
617
+ };
618
+ }
619
+
620
+ const SPECIAL_TOKEN_RE = /^(<pad>|<unused\d*>|<eos>|<bos>|<s>|<\/s>|\[PAD\]|\[UNK\]|\[SEP\]|\[CLS\]|<[^>]{1,32}>)$/i;
621
+ const PAD_DOMINANCE_THRESHOLD = 0.5;
622
+
623
+ function isSpecialLikeTokenText(value) {
624
+ if (typeof value !== 'string') return false;
625
+ return SPECIAL_TOKEN_RE.test(value.trim());
626
+ }
627
+
628
+ function summarizeGenerationTokens(tokenRecords) {
629
+ const records = Array.isArray(tokenRecords) ? tokenRecords : [];
630
+ const preview = records.slice(0, GENERATION_TOKEN_DIAGNOSTIC_LIMIT).map((record) => ({
631
+ id: record.id,
632
+ text: record.text,
633
+ fallbackText: record.fallbackText,
634
+ }));
635
+ let emptyTextCount = 0;
636
+ let specialLikeTextCount = 0;
637
+ let specialLikeFallbackCount = 0;
638
+ for (const record of records) {
639
+ if (!record || typeof record !== 'object') continue;
640
+ if (typeof record.text === 'string' && record.text.length === 0) {
641
+ emptyTextCount += 1;
642
+ }
643
+ if (isSpecialLikeTokenText(record.text)) {
644
+ specialLikeTextCount += 1;
645
+ }
646
+ if (isSpecialLikeTokenText(record.fallbackText)) {
647
+ specialLikeFallbackCount += 1;
648
+ }
649
+ }
650
+ return {
651
+ preview,
652
+ total: records.length,
653
+ omitted: Math.max(0, records.length - preview.length),
654
+ emptyTextCount,
655
+ specialLikeTextCount,
656
+ specialLikeFallbackCount,
657
+ };
658
+ }
659
+
660
+ export function isCoherentOutput(tokens, output) {
661
+ if (tokens.length === 0) return false;
662
+ const specialTokenCount = tokens.filter((t) => SPECIAL_TOKEN_RE.test(String(t).trim())).length;
663
+ if (specialTokenCount / tokens.length >= PAD_DOMINANCE_THRESHOLD) return false;
664
+ const cleanedOutput = String(output || '')
665
+ .replace(/<[^>\n]{1,80}>/g, ' ')
666
+ .replace(/\s+/g, ' ')
667
+ .trim();
668
+ return cleanedOutput.length > 0;
669
+ }
670
+
671
+ export async function runGeneration(pipeline, runtimeConfig, runOverrides = null) {
672
+ const tokens = [];
673
+ const tokenIds = [];
674
+ const tokenRecords = [];
675
+ const promptInput = resolveGenerationPromptInput(runtimeConfig, runOverrides, pipeline);
676
+ const promptLabel = describePromptInput(promptInput);
677
+ const useChatTemplate = runOverrides?.useChatTemplate
678
+ ?? runtimeConfig?.inference?.chatTemplate?.enabled
679
+ ?? (isStructuredPromptInput(promptInput) ? true : undefined);
680
+ const maxTokens = Number.isFinite(runOverrides?.maxTokens)
681
+ ? Math.max(1, Math.floor(runOverrides.maxTokens))
682
+ : resolveMaxTokens(runtimeConfig);
683
+ const sampling = isPlainObject(runOverrides?.sampling)
684
+ ? runOverrides.sampling
685
+ : (runtimeConfig.inference?.sampling || {});
686
+ const debugProbes = runtimeConfig.shared?.debug?.probes || [];
687
+ const profile = runtimeConfig.shared?.debug?.profiler?.enabled === true;
688
+ const disableCommandBatching = Array.isArray(debugProbes) && debugProbes.length > 0;
689
+ const start = performance.now();
690
+
691
+ for await (const tokenText of pipeline.generate(promptInput, {
692
+ maxTokens,
693
+ temperature: sampling.temperature,
694
+ topP: sampling.topP,
695
+ topK: sampling.topK,
696
+ repetitionPenalty: sampling.repetitionPenalty,
697
+ greedyThreshold: sampling.greedyThreshold,
698
+ useChatTemplate,
699
+ profile,
700
+ disableCommandBatching,
701
+ onToken: (tokenId, tokenText) => {
702
+ tokenIds.push(tokenId);
703
+ tokenRecords.push({
704
+ id: tokenId,
705
+ text: typeof tokenText === 'string' ? tokenText : '',
706
+ fallbackText: pipeline?.tokenizer?.decode?.([tokenId], false, false) ?? '',
707
+ });
708
+ },
709
+ })) {
710
+ if (typeof tokenText === 'string') {
711
+ tokens.push(tokenText);
712
+ }
713
+ }
714
+
715
+ const durationMs = Math.max(1, performance.now() - start);
716
+ const tokensPerSec = (tokens.length / durationMs) * 1000;
717
+ const stats = typeof pipeline?.getStats === 'function'
718
+ ? (pipeline.getStats() || {})
719
+ : {};
720
+ const prefillMs = Number.isFinite(stats.prefillTimeMs) ? stats.prefillTimeMs : 0;
721
+ const ttftMs = Number.isFinite(stats.ttftMs) ? stats.ttftMs : prefillMs;
722
+ const decodeMs = Number.isFinite(stats.decodeTimeMs) ? stats.decodeTimeMs : 0;
723
+ const prefillTokens = Number.isFinite(stats.prefillTokens) ? stats.prefillTokens : 0;
724
+ const decodeTokens = Number.isFinite(stats.decodeTokens)
725
+ ? stats.decodeTokens
726
+ : Math.max(0, tokens.length - 1);
727
+ const decodeTokensPerSec = decodeMs > 0
728
+ ? (decodeTokens / decodeMs) * 1000
729
+ : 0;
730
+ const prefillTokensPerSec = prefillMs > 0
731
+ ? (prefillTokens / prefillMs) * 1000
732
+ : 0;
733
+ const prefillTokensPerSecTtft = ttftMs > 0
734
+ ? (prefillTokens / ttftMs) * 1000
735
+ : 0;
736
+ const gpu = {};
737
+ if (Number.isFinite(stats.gpuTimePrefillMs)) gpu.prefillMs = stats.gpuTimePrefillMs;
738
+ if (Number.isFinite(stats.gpuTimeDecodeMs)) gpu.decodeMs = stats.gpuTimeDecodeMs;
739
+ if (Number.isFinite(stats.decodeRecordMs)) gpu.decodeRecordMs = stats.decodeRecordMs;
740
+ if (Number.isFinite(stats.decodeSubmitWaitMs)) gpu.decodeSubmitWaitMs = stats.decodeSubmitWaitMs;
741
+ if (Number.isFinite(stats.decodeReadbackWaitMs)) gpu.decodeReadbackWaitMs = stats.decodeReadbackWaitMs;
742
+ const gpuPhase = Object.keys(gpu).length > 0 ? gpu : null;
743
+ const decodeProfileSteps = Array.isArray(stats.decodeProfileSteps)
744
+ ? stats.decodeProfileSteps
745
+ : null;
746
+
747
+ return {
748
+ prompt: promptLabel,
749
+ promptInput,
750
+ maxTokens,
751
+ tokens,
752
+ tokenIds,
753
+ tokenDiagnostics: summarizeGenerationTokens(tokenRecords),
754
+ output: tokens.join(''),
755
+ durationMs,
756
+ tokensPerSec,
757
+ phase: {
758
+ totalMs: Number.isFinite(stats.totalTimeMs) ? stats.totalTimeMs : durationMs,
759
+ ttftMs,
760
+ prefillMs,
761
+ decodeMs,
762
+ prefillTokens,
763
+ decodeTokens,
764
+ prefillTokensPerSec,
765
+ prefillTokensPerSecTtft,
766
+ decodeTokensPerSec,
767
+ gpu: gpuPhase,
768
+ decodeProfileSteps,
769
+ },
770
+ };
771
+ }
772
+
773
+ export async function runEmbedding(pipeline, runtimeConfig, runOverrides = null) {
774
+ const prompt = typeof runOverrides?.prompt === 'string' && runOverrides.prompt.trim()
775
+ ? runOverrides.prompt.trim()
776
+ : resolvePrompt(runtimeConfig);
777
+ const start = performance.now();
778
+ const result = await pipeline.embed(prompt);
779
+ const durationMs = Math.max(1, performance.now() - start);
780
+ const tokenCount = Number.isFinite(result?.tokens?.length) ? result.tokens.length : 0;
781
+ const stats = summarizeEmbeddingValues(result?.embedding);
782
+ return {
783
+ prompt,
784
+ tokenCount,
785
+ durationMs,
786
+ ...stats,
787
+ };
788
+ }