@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,790 @@
1
+ // =============================================================================
2
+ // INFERENCE MAX REFERENCE ENVELOPES
3
+ // =============================================================================
4
+ //
5
+ // Performance benchmarks from InferenceMax testing.
6
+ // These represent achievable performance under optimal conditions.
7
+ // Key: model name or model:runtime combination
8
+ //
9
+ // TTFT = Time To First Token
10
+ // TPS = Tokens Per Second (output generation speed)
11
+ // =============================================================================
12
+ export const ENVELOPES = {
13
+ // ==========================================================================
14
+ // OpenAI Models
15
+ // ==========================================================================
16
+ 'gpt-4o': {
17
+ ttft_p50_ms: 200,
18
+ ttft_p95_ms: 500,
19
+ tps_median: 80,
20
+ tps_peak: 120,
21
+ },
22
+ 'gpt-4o-2024-11-20': {
23
+ ttft_p50_ms: 200,
24
+ ttft_p95_ms: 500,
25
+ tps_median: 80,
26
+ tps_peak: 120,
27
+ },
28
+ 'gpt-4o-mini': {
29
+ ttft_p50_ms: 150,
30
+ ttft_p95_ms: 350,
31
+ tps_median: 100,
32
+ tps_peak: 150,
33
+ },
34
+ 'gpt-4o-mini-2024-07-18': {
35
+ ttft_p50_ms: 150,
36
+ ttft_p95_ms: 350,
37
+ tps_median: 100,
38
+ tps_peak: 150,
39
+ },
40
+ 'gpt-4-turbo': {
41
+ ttft_p50_ms: 300,
42
+ ttft_p95_ms: 800,
43
+ tps_median: 60,
44
+ tps_peak: 90,
45
+ },
46
+ 'gpt-4-turbo-2024-04-09': {
47
+ ttft_p50_ms: 300,
48
+ ttft_p95_ms: 800,
49
+ tps_median: 60,
50
+ tps_peak: 90,
51
+ },
52
+ 'gpt-4.1': {
53
+ ttft_p50_ms: 180,
54
+ ttft_p95_ms: 450,
55
+ tps_median: 90,
56
+ tps_peak: 130,
57
+ },
58
+ 'gpt-4.1-2025-04-14': {
59
+ ttft_p50_ms: 180,
60
+ ttft_p95_ms: 450,
61
+ tps_median: 90,
62
+ tps_peak: 130,
63
+ },
64
+ 'gpt-4.1-mini': {
65
+ ttft_p50_ms: 120,
66
+ ttft_p95_ms: 300,
67
+ tps_median: 110,
68
+ tps_peak: 160,
69
+ },
70
+ 'gpt-4.1-nano': {
71
+ ttft_p50_ms: 80,
72
+ ttft_p95_ms: 200,
73
+ tps_median: 140,
74
+ tps_peak: 200,
75
+ },
76
+ 'gpt-3.5-turbo': {
77
+ ttft_p50_ms: 100,
78
+ ttft_p95_ms: 250,
79
+ tps_median: 120,
80
+ tps_peak: 180,
81
+ },
82
+ 'o1': {
83
+ ttft_p50_ms: 2000,
84
+ ttft_p95_ms: 8000,
85
+ tps_median: 30,
86
+ tps_peak: 50,
87
+ },
88
+ 'o1-preview': {
89
+ ttft_p50_ms: 2000,
90
+ ttft_p95_ms: 8000,
91
+ tps_median: 30,
92
+ tps_peak: 50,
93
+ },
94
+ 'o1-mini': {
95
+ ttft_p50_ms: 800,
96
+ ttft_p95_ms: 3000,
97
+ tps_median: 50,
98
+ tps_peak: 80,
99
+ },
100
+ 'o3': {
101
+ ttft_p50_ms: 1500,
102
+ ttft_p95_ms: 6000,
103
+ tps_median: 40,
104
+ tps_peak: 60,
105
+ },
106
+ 'o3-mini': {
107
+ ttft_p50_ms: 600,
108
+ ttft_p95_ms: 2000,
109
+ tps_median: 60,
110
+ tps_peak: 90,
111
+ },
112
+ // ==========================================================================
113
+ // Anthropic Models
114
+ // ==========================================================================
115
+ 'claude-3-opus': {
116
+ ttft_p50_ms: 400,
117
+ ttft_p95_ms: 1200,
118
+ tps_median: 40,
119
+ tps_peak: 60,
120
+ },
121
+ 'claude-3-opus-20240229': {
122
+ ttft_p50_ms: 400,
123
+ ttft_p95_ms: 1200,
124
+ tps_median: 40,
125
+ tps_peak: 60,
126
+ },
127
+ 'claude-3-sonnet': {
128
+ ttft_p50_ms: 250,
129
+ ttft_p95_ms: 600,
130
+ tps_median: 70,
131
+ tps_peak: 100,
132
+ },
133
+ 'claude-3-sonnet-20240229': {
134
+ ttft_p50_ms: 250,
135
+ ttft_p95_ms: 600,
136
+ tps_median: 70,
137
+ tps_peak: 100,
138
+ },
139
+ 'claude-3-haiku': {
140
+ ttft_p50_ms: 150,
141
+ ttft_p95_ms: 350,
142
+ tps_median: 100,
143
+ tps_peak: 150,
144
+ },
145
+ 'claude-3-haiku-20240307': {
146
+ ttft_p50_ms: 150,
147
+ ttft_p95_ms: 350,
148
+ tps_median: 100,
149
+ tps_peak: 150,
150
+ },
151
+ 'claude-3-5-sonnet': {
152
+ ttft_p50_ms: 200,
153
+ ttft_p95_ms: 500,
154
+ tps_median: 80,
155
+ tps_peak: 120,
156
+ },
157
+ 'claude-3-5-sonnet-20241022': {
158
+ ttft_p50_ms: 200,
159
+ ttft_p95_ms: 500,
160
+ tps_median: 80,
161
+ tps_peak: 120,
162
+ },
163
+ 'claude-3-5-haiku': {
164
+ ttft_p50_ms: 120,
165
+ ttft_p95_ms: 300,
166
+ tps_median: 110,
167
+ tps_peak: 160,
168
+ },
169
+ 'claude-3-5-haiku-20241022': {
170
+ ttft_p50_ms: 120,
171
+ ttft_p95_ms: 300,
172
+ tps_median: 110,
173
+ tps_peak: 160,
174
+ },
175
+ // Claude 4 models
176
+ 'claude-sonnet-4-20250514': {
177
+ ttft_p50_ms: 180,
178
+ ttft_p95_ms: 450,
179
+ tps_median: 90,
180
+ tps_peak: 130,
181
+ },
182
+ 'claude-4-sonnet': {
183
+ ttft_p50_ms: 180,
184
+ ttft_p95_ms: 450,
185
+ tps_median: 90,
186
+ tps_peak: 130,
187
+ },
188
+ 'claude-opus-4-20250514': {
189
+ ttft_p50_ms: 350,
190
+ ttft_p95_ms: 1000,
191
+ tps_median: 50,
192
+ tps_peak: 75,
193
+ },
194
+ 'claude-4-opus': {
195
+ ttft_p50_ms: 350,
196
+ ttft_p95_ms: 1000,
197
+ tps_median: 50,
198
+ tps_peak: 75,
199
+ },
200
+ // ==========================================================================
201
+ // Google Models
202
+ // ==========================================================================
203
+ 'gemini-pro': {
204
+ ttft_p50_ms: 250,
205
+ ttft_p95_ms: 600,
206
+ tps_median: 60,
207
+ tps_peak: 100,
208
+ },
209
+ 'gemini-1.5-pro': {
210
+ ttft_p50_ms: 300,
211
+ ttft_p95_ms: 800,
212
+ tps_median: 50,
213
+ tps_peak: 80,
214
+ },
215
+ 'gemini-1.5-pro-latest': {
216
+ ttft_p50_ms: 300,
217
+ ttft_p95_ms: 800,
218
+ tps_median: 50,
219
+ tps_peak: 80,
220
+ },
221
+ 'gemini-1.5-flash': {
222
+ ttft_p50_ms: 150,
223
+ ttft_p95_ms: 400,
224
+ tps_median: 100,
225
+ tps_peak: 150,
226
+ },
227
+ 'gemini-1.5-flash-latest': {
228
+ ttft_p50_ms: 150,
229
+ ttft_p95_ms: 400,
230
+ tps_median: 100,
231
+ tps_peak: 150,
232
+ },
233
+ 'gemini-2.0-flash': {
234
+ ttft_p50_ms: 120,
235
+ ttft_p95_ms: 300,
236
+ tps_median: 120,
237
+ tps_peak: 180,
238
+ },
239
+ 'gemini-2.0-flash-exp': {
240
+ ttft_p50_ms: 120,
241
+ ttft_p95_ms: 300,
242
+ tps_median: 120,
243
+ tps_peak: 180,
244
+ },
245
+ 'gemini-2.5-pro': {
246
+ ttft_p50_ms: 250,
247
+ ttft_p95_ms: 700,
248
+ tps_median: 60,
249
+ tps_peak: 90,
250
+ },
251
+ 'gemini-2.5-flash': {
252
+ ttft_p50_ms: 100,
253
+ ttft_p95_ms: 250,
254
+ tps_median: 130,
255
+ tps_peak: 200,
256
+ },
257
+ // ==========================================================================
258
+ // Mistral Models
259
+ // ==========================================================================
260
+ 'mistral-large': {
261
+ ttft_p50_ms: 200,
262
+ ttft_p95_ms: 500,
263
+ tps_median: 70,
264
+ tps_peak: 110,
265
+ },
266
+ 'mistral-large-latest': {
267
+ ttft_p50_ms: 200,
268
+ ttft_p95_ms: 500,
269
+ tps_median: 70,
270
+ tps_peak: 110,
271
+ },
272
+ 'mistral-medium': {
273
+ ttft_p50_ms: 150,
274
+ ttft_p95_ms: 350,
275
+ tps_median: 90,
276
+ tps_peak: 140,
277
+ },
278
+ 'mistral-small': {
279
+ ttft_p50_ms: 100,
280
+ ttft_p95_ms: 250,
281
+ tps_median: 110,
282
+ tps_peak: 160,
283
+ },
284
+ 'mistral-small-latest': {
285
+ ttft_p50_ms: 100,
286
+ ttft_p95_ms: 250,
287
+ tps_median: 110,
288
+ tps_peak: 160,
289
+ },
290
+ 'codestral': {
291
+ ttft_p50_ms: 120,
292
+ ttft_p95_ms: 300,
293
+ tps_median: 100,
294
+ tps_peak: 150,
295
+ },
296
+ 'codestral-latest': {
297
+ ttft_p50_ms: 120,
298
+ ttft_p95_ms: 300,
299
+ tps_median: 100,
300
+ tps_peak: 150,
301
+ },
302
+ // ==========================================================================
303
+ // Together AI / Fireworks / Groq (Cloud-hosted open models)
304
+ // ==========================================================================
305
+ 'mixtral-8x7b': {
306
+ ttft_p50_ms: 100,
307
+ ttft_p95_ms: 250,
308
+ tps_median: 60,
309
+ tps_peak: 100,
310
+ },
311
+ 'mixtral-8x7b-instruct': {
312
+ ttft_p50_ms: 100,
313
+ ttft_p95_ms: 250,
314
+ tps_median: 60,
315
+ tps_peak: 100,
316
+ },
317
+ 'mixtral-8x22b': {
318
+ ttft_p50_ms: 150,
319
+ ttft_p95_ms: 400,
320
+ tps_median: 40,
321
+ tps_peak: 70,
322
+ },
323
+ 'mixtral-8x22b-instruct': {
324
+ ttft_p50_ms: 150,
325
+ ttft_p95_ms: 400,
326
+ tps_median: 40,
327
+ tps_peak: 70,
328
+ },
329
+ // ==========================================================================
330
+ // Llama 3 Models (Meta)
331
+ // ==========================================================================
332
+ 'llama-3-70b': {
333
+ ttft_p50_ms: 200,
334
+ ttft_p95_ms: 600,
335
+ tps_median: 35,
336
+ tps_peak: 60,
337
+ },
338
+ 'llama-3-70b:vllm': {
339
+ ttft_p50_ms: 150,
340
+ ttft_p95_ms: 400,
341
+ tps_median: 45,
342
+ tps_peak: 90,
343
+ },
344
+ 'llama-3-70b:sglang': {
345
+ ttft_p50_ms: 120,
346
+ ttft_p95_ms: 350,
347
+ tps_median: 55,
348
+ tps_peak: 100,
349
+ },
350
+ 'llama-3-70b:tgi': {
351
+ ttft_p50_ms: 180,
352
+ ttft_p95_ms: 500,
353
+ tps_median: 35,
354
+ tps_peak: 70,
355
+ },
356
+ 'meta-llama/Llama-3-70b-chat-hf': {
357
+ ttft_p50_ms: 150,
358
+ ttft_p95_ms: 400,
359
+ tps_median: 45,
360
+ tps_peak: 90,
361
+ },
362
+ 'llama-3-8b': {
363
+ ttft_p50_ms: 80,
364
+ ttft_p95_ms: 200,
365
+ tps_median: 80,
366
+ tps_peak: 150,
367
+ },
368
+ 'llama-3-8b:vllm': {
369
+ ttft_p50_ms: 60,
370
+ ttft_p95_ms: 150,
371
+ tps_median: 100,
372
+ tps_peak: 180,
373
+ },
374
+ 'meta-llama/Llama-3-8b-chat-hf': {
375
+ ttft_p50_ms: 60,
376
+ ttft_p95_ms: 150,
377
+ tps_median: 100,
378
+ tps_peak: 180,
379
+ },
380
+ // ==========================================================================
381
+ // Llama 3.1 Models
382
+ // ==========================================================================
383
+ 'llama-3.1-405b': {
384
+ ttft_p50_ms: 400,
385
+ ttft_p95_ms: 1200,
386
+ tps_median: 20,
387
+ tps_peak: 35,
388
+ },
389
+ 'llama-3.1-405b:vllm': {
390
+ ttft_p50_ms: 300,
391
+ ttft_p95_ms: 900,
392
+ tps_median: 25,
393
+ tps_peak: 45,
394
+ },
395
+ 'llama-3.1-405b:sglang': {
396
+ ttft_p50_ms: 250,
397
+ ttft_p95_ms: 800,
398
+ tps_median: 30,
399
+ tps_peak: 50,
400
+ },
401
+ 'meta-llama/Llama-3.1-405B-Instruct': {
402
+ ttft_p50_ms: 300,
403
+ ttft_p95_ms: 900,
404
+ tps_median: 25,
405
+ tps_peak: 45,
406
+ },
407
+ 'llama-3.1-70b': {
408
+ ttft_p50_ms: 180,
409
+ ttft_p95_ms: 500,
410
+ tps_median: 40,
411
+ tps_peak: 70,
412
+ },
413
+ 'llama-3.1-70b:vllm': {
414
+ ttft_p50_ms: 140,
415
+ ttft_p95_ms: 380,
416
+ tps_median: 50,
417
+ tps_peak: 95,
418
+ },
419
+ 'llama-3.1-70b:sglang': {
420
+ ttft_p50_ms: 110,
421
+ ttft_p95_ms: 320,
422
+ tps_median: 60,
423
+ tps_peak: 110,
424
+ },
425
+ 'meta-llama/Llama-3.1-70B-Instruct': {
426
+ ttft_p50_ms: 140,
427
+ ttft_p95_ms: 380,
428
+ tps_median: 50,
429
+ tps_peak: 95,
430
+ },
431
+ 'llama-3.1-8b': {
432
+ ttft_p50_ms: 70,
433
+ ttft_p95_ms: 180,
434
+ tps_median: 90,
435
+ tps_peak: 160,
436
+ },
437
+ 'llama-3.1-8b:vllm': {
438
+ ttft_p50_ms: 50,
439
+ ttft_p95_ms: 130,
440
+ tps_median: 110,
441
+ tps_peak: 200,
442
+ },
443
+ 'meta-llama/Llama-3.1-8B-Instruct': {
444
+ ttft_p50_ms: 50,
445
+ ttft_p95_ms: 130,
446
+ tps_median: 110,
447
+ tps_peak: 200,
448
+ },
449
+ // ==========================================================================
450
+ // Llama 3.2 Models (Smaller/Edge)
451
+ // ==========================================================================
452
+ 'llama-3.2-90b': {
453
+ ttft_p50_ms: 250,
454
+ ttft_p95_ms: 700,
455
+ tps_median: 30,
456
+ tps_peak: 55,
457
+ },
458
+ 'llama-3.2-90b:vllm': {
459
+ ttft_p50_ms: 200,
460
+ ttft_p95_ms: 550,
461
+ tps_median: 38,
462
+ tps_peak: 70,
463
+ },
464
+ 'meta-llama/Llama-3.2-90B-Vision-Instruct': {
465
+ ttft_p50_ms: 200,
466
+ ttft_p95_ms: 550,
467
+ tps_median: 38,
468
+ tps_peak: 70,
469
+ },
470
+ 'llama-3.2-11b': {
471
+ ttft_p50_ms: 80,
472
+ ttft_p95_ms: 200,
473
+ tps_median: 75,
474
+ tps_peak: 140,
475
+ },
476
+ 'llama-3.2-11b:vllm': {
477
+ ttft_p50_ms: 60,
478
+ ttft_p95_ms: 150,
479
+ tps_median: 95,
480
+ tps_peak: 170,
481
+ },
482
+ 'meta-llama/Llama-3.2-11B-Vision-Instruct': {
483
+ ttft_p50_ms: 60,
484
+ ttft_p95_ms: 150,
485
+ tps_median: 95,
486
+ tps_peak: 170,
487
+ },
488
+ 'llama-3.2-3b': {
489
+ ttft_p50_ms: 40,
490
+ ttft_p95_ms: 100,
491
+ tps_median: 140,
492
+ tps_peak: 250,
493
+ },
494
+ 'llama-3.2-3b:vllm': {
495
+ ttft_p50_ms: 30,
496
+ ttft_p95_ms: 80,
497
+ tps_median: 170,
498
+ tps_peak: 300,
499
+ },
500
+ 'llama-3.2-3b:ollama': {
501
+ ttft_p50_ms: 50,
502
+ ttft_p95_ms: 120,
503
+ tps_median: 120,
504
+ tps_peak: 200,
505
+ },
506
+ 'meta-llama/Llama-3.2-3B-Instruct': {
507
+ ttft_p50_ms: 30,
508
+ ttft_p95_ms: 80,
509
+ tps_median: 170,
510
+ tps_peak: 300,
511
+ },
512
+ 'llama-3.2-1b': {
513
+ ttft_p50_ms: 25,
514
+ ttft_p95_ms: 60,
515
+ tps_median: 200,
516
+ tps_peak: 350,
517
+ },
518
+ 'llama-3.2-1b:ollama': {
519
+ ttft_p50_ms: 30,
520
+ ttft_p95_ms: 80,
521
+ tps_median: 180,
522
+ tps_peak: 300,
523
+ },
524
+ 'meta-llama/Llama-3.2-1B-Instruct': {
525
+ ttft_p50_ms: 25,
526
+ ttft_p95_ms: 60,
527
+ tps_median: 200,
528
+ tps_peak: 350,
529
+ },
530
+ // ==========================================================================
531
+ // Llama 3.3 Models
532
+ // ==========================================================================
533
+ 'llama-3.3-70b': {
534
+ ttft_p50_ms: 160,
535
+ ttft_p95_ms: 450,
536
+ tps_median: 45,
537
+ tps_peak: 80,
538
+ },
539
+ 'llama-3.3-70b:vllm': {
540
+ ttft_p50_ms: 120,
541
+ ttft_p95_ms: 350,
542
+ tps_median: 55,
543
+ tps_peak: 100,
544
+ },
545
+ 'llama-3.3-70b:sglang': {
546
+ ttft_p50_ms: 100,
547
+ ttft_p95_ms: 300,
548
+ tps_median: 65,
549
+ tps_peak: 115,
550
+ },
551
+ 'meta-llama/Llama-3.3-70B-Instruct': {
552
+ ttft_p50_ms: 120,
553
+ ttft_p95_ms: 350,
554
+ tps_median: 55,
555
+ tps_peak: 100,
556
+ },
557
+ // ==========================================================================
558
+ // Qwen Models (Alibaba)
559
+ // ==========================================================================
560
+ 'qwen-2.5-72b': {
561
+ ttft_p50_ms: 200,
562
+ ttft_p95_ms: 550,
563
+ tps_median: 40,
564
+ tps_peak: 70,
565
+ },
566
+ 'qwen-2.5-72b:vllm': {
567
+ ttft_p50_ms: 150,
568
+ ttft_p95_ms: 420,
569
+ tps_median: 50,
570
+ tps_peak: 90,
571
+ },
572
+ 'qwen-2.5-72b:sglang': {
573
+ ttft_p50_ms: 130,
574
+ ttft_p95_ms: 380,
575
+ tps_median: 55,
576
+ tps_peak: 100,
577
+ },
578
+ 'Qwen/Qwen2.5-72B-Instruct': {
579
+ ttft_p50_ms: 150,
580
+ ttft_p95_ms: 420,
581
+ tps_median: 50,
582
+ tps_peak: 90,
583
+ },
584
+ 'qwen-2.5-32b': {
585
+ ttft_p50_ms: 120,
586
+ ttft_p95_ms: 320,
587
+ tps_median: 60,
588
+ tps_peak: 100,
589
+ },
590
+ 'qwen-2.5-32b:vllm': {
591
+ ttft_p50_ms: 90,
592
+ ttft_p95_ms: 250,
593
+ tps_median: 75,
594
+ tps_peak: 130,
595
+ },
596
+ 'Qwen/Qwen2.5-32B-Instruct': {
597
+ ttft_p50_ms: 90,
598
+ ttft_p95_ms: 250,
599
+ tps_median: 75,
600
+ tps_peak: 130,
601
+ },
602
+ 'qwen-2.5-14b': {
603
+ ttft_p50_ms: 80,
604
+ ttft_p95_ms: 220,
605
+ tps_median: 80,
606
+ tps_peak: 140,
607
+ },
608
+ 'qwen-2.5-7b': {
609
+ ttft_p50_ms: 50,
610
+ ttft_p95_ms: 140,
611
+ tps_median: 110,
612
+ tps_peak: 190,
613
+ },
614
+ 'qwen-2.5-3b': {
615
+ ttft_p50_ms: 35,
616
+ ttft_p95_ms: 90,
617
+ tps_median: 150,
618
+ tps_peak: 260,
619
+ },
620
+ 'qwen-2.5-coder-32b': {
621
+ ttft_p50_ms: 120,
622
+ ttft_p95_ms: 320,
623
+ tps_median: 60,
624
+ tps_peak: 100,
625
+ },
626
+ 'qwen-2.5-coder-7b': {
627
+ ttft_p50_ms: 50,
628
+ ttft_p95_ms: 140,
629
+ tps_median: 110,
630
+ tps_peak: 190,
631
+ },
632
+ // ==========================================================================
633
+ // DeepSeek Models
634
+ // ==========================================================================
635
+ 'deepseek-v3': {
636
+ ttft_p50_ms: 300,
637
+ ttft_p95_ms: 800,
638
+ tps_median: 35,
639
+ tps_peak: 60,
640
+ },
641
+ 'deepseek-v3:vllm': {
642
+ ttft_p50_ms: 220,
643
+ ttft_p95_ms: 600,
644
+ tps_median: 45,
645
+ tps_peak: 80,
646
+ },
647
+ 'deepseek-chat': {
648
+ ttft_p50_ms: 300,
649
+ ttft_p95_ms: 800,
650
+ tps_median: 35,
651
+ tps_peak: 60,
652
+ },
653
+ 'deepseek-coder': {
654
+ ttft_p50_ms: 200,
655
+ ttft_p95_ms: 550,
656
+ tps_median: 50,
657
+ tps_peak: 85,
658
+ },
659
+ 'deepseek-coder-v2': {
660
+ ttft_p50_ms: 180,
661
+ ttft_p95_ms: 500,
662
+ tps_median: 55,
663
+ tps_peak: 95,
664
+ },
665
+ 'deepseek-r1': {
666
+ ttft_p50_ms: 1500,
667
+ ttft_p95_ms: 5000,
668
+ tps_median: 25,
669
+ tps_peak: 45,
670
+ },
671
+ 'deepseek-r1-lite': {
672
+ ttft_p50_ms: 800,
673
+ ttft_p95_ms: 2500,
674
+ tps_median: 40,
675
+ tps_peak: 70,
676
+ },
677
+ // ==========================================================================
678
+ // Cohere Models
679
+ // ==========================================================================
680
+ 'command-r': {
681
+ ttft_p50_ms: 200,
682
+ ttft_p95_ms: 500,
683
+ tps_median: 60,
684
+ tps_peak: 100,
685
+ },
686
+ 'command-r-plus': {
687
+ ttft_p50_ms: 300,
688
+ ttft_p95_ms: 800,
689
+ tps_median: 45,
690
+ tps_peak: 75,
691
+ },
692
+ 'command-light': {
693
+ ttft_p50_ms: 100,
694
+ ttft_p95_ms: 250,
695
+ tps_median: 100,
696
+ tps_peak: 150,
697
+ },
698
+ // ==========================================================================
699
+ // Local/Edge Models (Ollama, llama.cpp)
700
+ // ==========================================================================
701
+ 'phi-3': {
702
+ ttft_p50_ms: 50,
703
+ ttft_p95_ms: 130,
704
+ tps_median: 120,
705
+ tps_peak: 200,
706
+ },
707
+ 'phi-3:ollama': {
708
+ ttft_p50_ms: 60,
709
+ ttft_p95_ms: 150,
710
+ tps_median: 100,
711
+ tps_peak: 170,
712
+ },
713
+ 'phi-3-mini': {
714
+ ttft_p50_ms: 30,
715
+ ttft_p95_ms: 80,
716
+ tps_median: 160,
717
+ tps_peak: 280,
718
+ },
719
+ 'gemma-2-9b': {
720
+ ttft_p50_ms: 70,
721
+ ttft_p95_ms: 180,
722
+ tps_median: 85,
723
+ tps_peak: 150,
724
+ },
725
+ 'gemma-2-9b:ollama': {
726
+ ttft_p50_ms: 80,
727
+ ttft_p95_ms: 200,
728
+ tps_median: 75,
729
+ tps_peak: 130,
730
+ },
731
+ 'gemma-2-27b': {
732
+ ttft_p50_ms: 150,
733
+ ttft_p95_ms: 400,
734
+ tps_median: 45,
735
+ tps_peak: 80,
736
+ },
737
+ };
738
+ // =============================================================================
739
+ // PUBLIC API
740
+ // =============================================================================
741
+ /**
742
+ * Get performance envelope for a model.
743
+ *
744
+ * @param model - Model name (e.g., "gpt-4o", "llama-3-70b")
745
+ * @param runtime - Optional runtime (e.g., "vllm", "sglang")
746
+ * @returns Performance envelope or null if not found
747
+ */
748
+ export function getEnvelope(model, runtime) {
749
+ // Try with runtime suffix first
750
+ if (runtime) {
751
+ const keyWithRuntime = `${model}:${runtime}`;
752
+ if (ENVELOPES[keyWithRuntime]) {
753
+ return ENVELOPES[keyWithRuntime];
754
+ }
755
+ }
756
+ // Try exact match
757
+ if (ENVELOPES[model]) {
758
+ return ENVELOPES[model];
759
+ }
760
+ // Try case-insensitive match
761
+ const lowerModel = model.toLowerCase();
762
+ for (const [key, envelope] of Object.entries(ENVELOPES)) {
763
+ if (key.toLowerCase() === lowerModel) {
764
+ return envelope;
765
+ }
766
+ }
767
+ // Try partial match (model name contains)
768
+ for (const [key, envelope] of Object.entries(ENVELOPES)) {
769
+ const lowerKey = key.toLowerCase();
770
+ if (lowerKey.includes(lowerModel) || lowerModel.includes(lowerKey.split(':')[0])) {
771
+ return envelope;
772
+ }
773
+ }
774
+ return null;
775
+ }
776
+ /**
777
+ * Calculate throughput percentage against reference envelope.
778
+ *
779
+ * @param model - Model name
780
+ * @param actualTps - Actual tokens per second observed
781
+ * @param runtime - Optional runtime
782
+ * @returns Percentage (0-100+) or null if no envelope found
783
+ */
784
+ export function getThroughputPercent(model, actualTps, runtime) {
785
+ const envelope = getEnvelope(model, runtime);
786
+ if (!envelope)
787
+ return null;
788
+ return Math.round((actualTps / envelope.tps_median) * 100);
789
+ }
790
+ //# sourceMappingURL=envelopes.js.map