@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,814 @@
1
+ import type { PerformanceEnvelope } from './types.js';
2
+
3
+ // =============================================================================
4
+ // INFERENCE MAX REFERENCE ENVELOPES
5
+ // =============================================================================
6
+ //
7
+ // Performance benchmarks from InferenceMax testing.
8
+ // These represent achievable performance under optimal conditions.
9
+ // Key: model name or model:runtime combination
10
+ //
11
+ // TTFT = Time To First Token
12
+ // TPS = Tokens Per Second (output generation speed)
13
+ // =============================================================================
14
+
15
+ export const ENVELOPES: Record<string, PerformanceEnvelope> = {
16
+ // ==========================================================================
17
+ // OpenAI Models
18
+ // ==========================================================================
19
+ 'gpt-4o': {
20
+ ttft_p50_ms: 200,
21
+ ttft_p95_ms: 500,
22
+ tps_median: 80,
23
+ tps_peak: 120,
24
+ },
25
+ 'gpt-4o-2024-11-20': {
26
+ ttft_p50_ms: 200,
27
+ ttft_p95_ms: 500,
28
+ tps_median: 80,
29
+ tps_peak: 120,
30
+ },
31
+ 'gpt-4o-mini': {
32
+ ttft_p50_ms: 150,
33
+ ttft_p95_ms: 350,
34
+ tps_median: 100,
35
+ tps_peak: 150,
36
+ },
37
+ 'gpt-4o-mini-2024-07-18': {
38
+ ttft_p50_ms: 150,
39
+ ttft_p95_ms: 350,
40
+ tps_median: 100,
41
+ tps_peak: 150,
42
+ },
43
+ 'gpt-4-turbo': {
44
+ ttft_p50_ms: 300,
45
+ ttft_p95_ms: 800,
46
+ tps_median: 60,
47
+ tps_peak: 90,
48
+ },
49
+ 'gpt-4-turbo-2024-04-09': {
50
+ ttft_p50_ms: 300,
51
+ ttft_p95_ms: 800,
52
+ tps_median: 60,
53
+ tps_peak: 90,
54
+ },
55
+ 'gpt-4.1': {
56
+ ttft_p50_ms: 180,
57
+ ttft_p95_ms: 450,
58
+ tps_median: 90,
59
+ tps_peak: 130,
60
+ },
61
+ 'gpt-4.1-2025-04-14': {
62
+ ttft_p50_ms: 180,
63
+ ttft_p95_ms: 450,
64
+ tps_median: 90,
65
+ tps_peak: 130,
66
+ },
67
+ 'gpt-4.1-mini': {
68
+ ttft_p50_ms: 120,
69
+ ttft_p95_ms: 300,
70
+ tps_median: 110,
71
+ tps_peak: 160,
72
+ },
73
+ 'gpt-4.1-nano': {
74
+ ttft_p50_ms: 80,
75
+ ttft_p95_ms: 200,
76
+ tps_median: 140,
77
+ tps_peak: 200,
78
+ },
79
+ 'gpt-3.5-turbo': {
80
+ ttft_p50_ms: 100,
81
+ ttft_p95_ms: 250,
82
+ tps_median: 120,
83
+ tps_peak: 180,
84
+ },
85
+ 'o1': {
86
+ ttft_p50_ms: 2000,
87
+ ttft_p95_ms: 8000,
88
+ tps_median: 30,
89
+ tps_peak: 50,
90
+ },
91
+ 'o1-preview': {
92
+ ttft_p50_ms: 2000,
93
+ ttft_p95_ms: 8000,
94
+ tps_median: 30,
95
+ tps_peak: 50,
96
+ },
97
+ 'o1-mini': {
98
+ ttft_p50_ms: 800,
99
+ ttft_p95_ms: 3000,
100
+ tps_median: 50,
101
+ tps_peak: 80,
102
+ },
103
+ 'o3': {
104
+ ttft_p50_ms: 1500,
105
+ ttft_p95_ms: 6000,
106
+ tps_median: 40,
107
+ tps_peak: 60,
108
+ },
109
+ 'o3-mini': {
110
+ ttft_p50_ms: 600,
111
+ ttft_p95_ms: 2000,
112
+ tps_median: 60,
113
+ tps_peak: 90,
114
+ },
115
+
116
+ // ==========================================================================
117
+ // Anthropic Models
118
+ // ==========================================================================
119
+ 'claude-3-opus': {
120
+ ttft_p50_ms: 400,
121
+ ttft_p95_ms: 1200,
122
+ tps_median: 40,
123
+ tps_peak: 60,
124
+ },
125
+ 'claude-3-opus-20240229': {
126
+ ttft_p50_ms: 400,
127
+ ttft_p95_ms: 1200,
128
+ tps_median: 40,
129
+ tps_peak: 60,
130
+ },
131
+ 'claude-3-sonnet': {
132
+ ttft_p50_ms: 250,
133
+ ttft_p95_ms: 600,
134
+ tps_median: 70,
135
+ tps_peak: 100,
136
+ },
137
+ 'claude-3-sonnet-20240229': {
138
+ ttft_p50_ms: 250,
139
+ ttft_p95_ms: 600,
140
+ tps_median: 70,
141
+ tps_peak: 100,
142
+ },
143
+ 'claude-3-haiku': {
144
+ ttft_p50_ms: 150,
145
+ ttft_p95_ms: 350,
146
+ tps_median: 100,
147
+ tps_peak: 150,
148
+ },
149
+ 'claude-3-haiku-20240307': {
150
+ ttft_p50_ms: 150,
151
+ ttft_p95_ms: 350,
152
+ tps_median: 100,
153
+ tps_peak: 150,
154
+ },
155
+ 'claude-3-5-sonnet': {
156
+ ttft_p50_ms: 200,
157
+ ttft_p95_ms: 500,
158
+ tps_median: 80,
159
+ tps_peak: 120,
160
+ },
161
+ 'claude-3-5-sonnet-20241022': {
162
+ ttft_p50_ms: 200,
163
+ ttft_p95_ms: 500,
164
+ tps_median: 80,
165
+ tps_peak: 120,
166
+ },
167
+ 'claude-3-5-haiku': {
168
+ ttft_p50_ms: 120,
169
+ ttft_p95_ms: 300,
170
+ tps_median: 110,
171
+ tps_peak: 160,
172
+ },
173
+ 'claude-3-5-haiku-20241022': {
174
+ ttft_p50_ms: 120,
175
+ ttft_p95_ms: 300,
176
+ tps_median: 110,
177
+ tps_peak: 160,
178
+ },
179
+ // Claude 4 models
180
+ 'claude-sonnet-4-20250514': {
181
+ ttft_p50_ms: 180,
182
+ ttft_p95_ms: 450,
183
+ tps_median: 90,
184
+ tps_peak: 130,
185
+ },
186
+ 'claude-4-sonnet': {
187
+ ttft_p50_ms: 180,
188
+ ttft_p95_ms: 450,
189
+ tps_median: 90,
190
+ tps_peak: 130,
191
+ },
192
+ 'claude-opus-4-20250514': {
193
+ ttft_p50_ms: 350,
194
+ ttft_p95_ms: 1000,
195
+ tps_median: 50,
196
+ tps_peak: 75,
197
+ },
198
+ 'claude-4-opus': {
199
+ ttft_p50_ms: 350,
200
+ ttft_p95_ms: 1000,
201
+ tps_median: 50,
202
+ tps_peak: 75,
203
+ },
204
+
205
+ // ==========================================================================
206
+ // Google Models
207
+ // ==========================================================================
208
+ 'gemini-pro': {
209
+ ttft_p50_ms: 250,
210
+ ttft_p95_ms: 600,
211
+ tps_median: 60,
212
+ tps_peak: 100,
213
+ },
214
+ 'gemini-1.5-pro': {
215
+ ttft_p50_ms: 300,
216
+ ttft_p95_ms: 800,
217
+ tps_median: 50,
218
+ tps_peak: 80,
219
+ },
220
+ 'gemini-1.5-pro-latest': {
221
+ ttft_p50_ms: 300,
222
+ ttft_p95_ms: 800,
223
+ tps_median: 50,
224
+ tps_peak: 80,
225
+ },
226
+ 'gemini-1.5-flash': {
227
+ ttft_p50_ms: 150,
228
+ ttft_p95_ms: 400,
229
+ tps_median: 100,
230
+ tps_peak: 150,
231
+ },
232
+ 'gemini-1.5-flash-latest': {
233
+ ttft_p50_ms: 150,
234
+ ttft_p95_ms: 400,
235
+ tps_median: 100,
236
+ tps_peak: 150,
237
+ },
238
+ 'gemini-2.0-flash': {
239
+ ttft_p50_ms: 120,
240
+ ttft_p95_ms: 300,
241
+ tps_median: 120,
242
+ tps_peak: 180,
243
+ },
244
+ 'gemini-2.0-flash-exp': {
245
+ ttft_p50_ms: 120,
246
+ ttft_p95_ms: 300,
247
+ tps_median: 120,
248
+ tps_peak: 180,
249
+ },
250
+ 'gemini-2.5-pro': {
251
+ ttft_p50_ms: 250,
252
+ ttft_p95_ms: 700,
253
+ tps_median: 60,
254
+ tps_peak: 90,
255
+ },
256
+ 'gemini-2.5-flash': {
257
+ ttft_p50_ms: 100,
258
+ ttft_p95_ms: 250,
259
+ tps_median: 130,
260
+ tps_peak: 200,
261
+ },
262
+
263
+ // ==========================================================================
264
+ // Mistral Models
265
+ // ==========================================================================
266
+ 'mistral-large': {
267
+ ttft_p50_ms: 200,
268
+ ttft_p95_ms: 500,
269
+ tps_median: 70,
270
+ tps_peak: 110,
271
+ },
272
+ 'mistral-large-latest': {
273
+ ttft_p50_ms: 200,
274
+ ttft_p95_ms: 500,
275
+ tps_median: 70,
276
+ tps_peak: 110,
277
+ },
278
+ 'mistral-medium': {
279
+ ttft_p50_ms: 150,
280
+ ttft_p95_ms: 350,
281
+ tps_median: 90,
282
+ tps_peak: 140,
283
+ },
284
+ 'mistral-small': {
285
+ ttft_p50_ms: 100,
286
+ ttft_p95_ms: 250,
287
+ tps_median: 110,
288
+ tps_peak: 160,
289
+ },
290
+ 'mistral-small-latest': {
291
+ ttft_p50_ms: 100,
292
+ ttft_p95_ms: 250,
293
+ tps_median: 110,
294
+ tps_peak: 160,
295
+ },
296
+ 'codestral': {
297
+ ttft_p50_ms: 120,
298
+ ttft_p95_ms: 300,
299
+ tps_median: 100,
300
+ tps_peak: 150,
301
+ },
302
+ 'codestral-latest': {
303
+ ttft_p50_ms: 120,
304
+ ttft_p95_ms: 300,
305
+ tps_median: 100,
306
+ tps_peak: 150,
307
+ },
308
+
309
+ // ==========================================================================
310
+ // Together AI / Fireworks / Groq (Cloud-hosted open models)
311
+ // ==========================================================================
312
+ 'mixtral-8x7b': {
313
+ ttft_p50_ms: 100,
314
+ ttft_p95_ms: 250,
315
+ tps_median: 60,
316
+ tps_peak: 100,
317
+ },
318
+ 'mixtral-8x7b-instruct': {
319
+ ttft_p50_ms: 100,
320
+ ttft_p95_ms: 250,
321
+ tps_median: 60,
322
+ tps_peak: 100,
323
+ },
324
+ 'mixtral-8x22b': {
325
+ ttft_p50_ms: 150,
326
+ ttft_p95_ms: 400,
327
+ tps_median: 40,
328
+ tps_peak: 70,
329
+ },
330
+ 'mixtral-8x22b-instruct': {
331
+ ttft_p50_ms: 150,
332
+ ttft_p95_ms: 400,
333
+ tps_median: 40,
334
+ tps_peak: 70,
335
+ },
336
+
337
+ // ==========================================================================
338
+ // Llama 3 Models (Meta)
339
+ // ==========================================================================
340
+ 'llama-3-70b': {
341
+ ttft_p50_ms: 200,
342
+ ttft_p95_ms: 600,
343
+ tps_median: 35,
344
+ tps_peak: 60,
345
+ },
346
+ 'llama-3-70b:vllm': {
347
+ ttft_p50_ms: 150,
348
+ ttft_p95_ms: 400,
349
+ tps_median: 45,
350
+ tps_peak: 90,
351
+ },
352
+ 'llama-3-70b:sglang': {
353
+ ttft_p50_ms: 120,
354
+ ttft_p95_ms: 350,
355
+ tps_median: 55,
356
+ tps_peak: 100,
357
+ },
358
+ 'llama-3-70b:tgi': {
359
+ ttft_p50_ms: 180,
360
+ ttft_p95_ms: 500,
361
+ tps_median: 35,
362
+ tps_peak: 70,
363
+ },
364
+ 'meta-llama/Llama-3-70b-chat-hf': {
365
+ ttft_p50_ms: 150,
366
+ ttft_p95_ms: 400,
367
+ tps_median: 45,
368
+ tps_peak: 90,
369
+ },
370
+ 'llama-3-8b': {
371
+ ttft_p50_ms: 80,
372
+ ttft_p95_ms: 200,
373
+ tps_median: 80,
374
+ tps_peak: 150,
375
+ },
376
+ 'llama-3-8b:vllm': {
377
+ ttft_p50_ms: 60,
378
+ ttft_p95_ms: 150,
379
+ tps_median: 100,
380
+ tps_peak: 180,
381
+ },
382
+ 'meta-llama/Llama-3-8b-chat-hf': {
383
+ ttft_p50_ms: 60,
384
+ ttft_p95_ms: 150,
385
+ tps_median: 100,
386
+ tps_peak: 180,
387
+ },
388
+
389
+ // ==========================================================================
390
+ // Llama 3.1 Models
391
+ // ==========================================================================
392
+ 'llama-3.1-405b': {
393
+ ttft_p50_ms: 400,
394
+ ttft_p95_ms: 1200,
395
+ tps_median: 20,
396
+ tps_peak: 35,
397
+ },
398
+ 'llama-3.1-405b:vllm': {
399
+ ttft_p50_ms: 300,
400
+ ttft_p95_ms: 900,
401
+ tps_median: 25,
402
+ tps_peak: 45,
403
+ },
404
+ 'llama-3.1-405b:sglang': {
405
+ ttft_p50_ms: 250,
406
+ ttft_p95_ms: 800,
407
+ tps_median: 30,
408
+ tps_peak: 50,
409
+ },
410
+ 'meta-llama/Llama-3.1-405B-Instruct': {
411
+ ttft_p50_ms: 300,
412
+ ttft_p95_ms: 900,
413
+ tps_median: 25,
414
+ tps_peak: 45,
415
+ },
416
+ 'llama-3.1-70b': {
417
+ ttft_p50_ms: 180,
418
+ ttft_p95_ms: 500,
419
+ tps_median: 40,
420
+ tps_peak: 70,
421
+ },
422
+ 'llama-3.1-70b:vllm': {
423
+ ttft_p50_ms: 140,
424
+ ttft_p95_ms: 380,
425
+ tps_median: 50,
426
+ tps_peak: 95,
427
+ },
428
+ 'llama-3.1-70b:sglang': {
429
+ ttft_p50_ms: 110,
430
+ ttft_p95_ms: 320,
431
+ tps_median: 60,
432
+ tps_peak: 110,
433
+ },
434
+ 'meta-llama/Llama-3.1-70B-Instruct': {
435
+ ttft_p50_ms: 140,
436
+ ttft_p95_ms: 380,
437
+ tps_median: 50,
438
+ tps_peak: 95,
439
+ },
440
+ 'llama-3.1-8b': {
441
+ ttft_p50_ms: 70,
442
+ ttft_p95_ms: 180,
443
+ tps_median: 90,
444
+ tps_peak: 160,
445
+ },
446
+ 'llama-3.1-8b:vllm': {
447
+ ttft_p50_ms: 50,
448
+ ttft_p95_ms: 130,
449
+ tps_median: 110,
450
+ tps_peak: 200,
451
+ },
452
+ 'meta-llama/Llama-3.1-8B-Instruct': {
453
+ ttft_p50_ms: 50,
454
+ ttft_p95_ms: 130,
455
+ tps_median: 110,
456
+ tps_peak: 200,
457
+ },
458
+
459
+ // ==========================================================================
460
+ // Llama 3.2 Models (Smaller/Edge)
461
+ // ==========================================================================
462
+ 'llama-3.2-90b': {
463
+ ttft_p50_ms: 250,
464
+ ttft_p95_ms: 700,
465
+ tps_median: 30,
466
+ tps_peak: 55,
467
+ },
468
+ 'llama-3.2-90b:vllm': {
469
+ ttft_p50_ms: 200,
470
+ ttft_p95_ms: 550,
471
+ tps_median: 38,
472
+ tps_peak: 70,
473
+ },
474
+ 'meta-llama/Llama-3.2-90B-Vision-Instruct': {
475
+ ttft_p50_ms: 200,
476
+ ttft_p95_ms: 550,
477
+ tps_median: 38,
478
+ tps_peak: 70,
479
+ },
480
+ 'llama-3.2-11b': {
481
+ ttft_p50_ms: 80,
482
+ ttft_p95_ms: 200,
483
+ tps_median: 75,
484
+ tps_peak: 140,
485
+ },
486
+ 'llama-3.2-11b:vllm': {
487
+ ttft_p50_ms: 60,
488
+ ttft_p95_ms: 150,
489
+ tps_median: 95,
490
+ tps_peak: 170,
491
+ },
492
+ 'meta-llama/Llama-3.2-11B-Vision-Instruct': {
493
+ ttft_p50_ms: 60,
494
+ ttft_p95_ms: 150,
495
+ tps_median: 95,
496
+ tps_peak: 170,
497
+ },
498
+ 'llama-3.2-3b': {
499
+ ttft_p50_ms: 40,
500
+ ttft_p95_ms: 100,
501
+ tps_median: 140,
502
+ tps_peak: 250,
503
+ },
504
+ 'llama-3.2-3b:vllm': {
505
+ ttft_p50_ms: 30,
506
+ ttft_p95_ms: 80,
507
+ tps_median: 170,
508
+ tps_peak: 300,
509
+ },
510
+ 'llama-3.2-3b:ollama': {
511
+ ttft_p50_ms: 50,
512
+ ttft_p95_ms: 120,
513
+ tps_median: 120,
514
+ tps_peak: 200,
515
+ },
516
+ 'meta-llama/Llama-3.2-3B-Instruct': {
517
+ ttft_p50_ms: 30,
518
+ ttft_p95_ms: 80,
519
+ tps_median: 170,
520
+ tps_peak: 300,
521
+ },
522
+ 'llama-3.2-1b': {
523
+ ttft_p50_ms: 25,
524
+ ttft_p95_ms: 60,
525
+ tps_median: 200,
526
+ tps_peak: 350,
527
+ },
528
+ 'llama-3.2-1b:ollama': {
529
+ ttft_p50_ms: 30,
530
+ ttft_p95_ms: 80,
531
+ tps_median: 180,
532
+ tps_peak: 300,
533
+ },
534
+ 'meta-llama/Llama-3.2-1B-Instruct': {
535
+ ttft_p50_ms: 25,
536
+ ttft_p95_ms: 60,
537
+ tps_median: 200,
538
+ tps_peak: 350,
539
+ },
540
+
541
+ // ==========================================================================
542
+ // Llama 3.3 Models
543
+ // ==========================================================================
544
+ 'llama-3.3-70b': {
545
+ ttft_p50_ms: 160,
546
+ ttft_p95_ms: 450,
547
+ tps_median: 45,
548
+ tps_peak: 80,
549
+ },
550
+ 'llama-3.3-70b:vllm': {
551
+ ttft_p50_ms: 120,
552
+ ttft_p95_ms: 350,
553
+ tps_median: 55,
554
+ tps_peak: 100,
555
+ },
556
+ 'llama-3.3-70b:sglang': {
557
+ ttft_p50_ms: 100,
558
+ ttft_p95_ms: 300,
559
+ tps_median: 65,
560
+ tps_peak: 115,
561
+ },
562
+ 'meta-llama/Llama-3.3-70B-Instruct': {
563
+ ttft_p50_ms: 120,
564
+ ttft_p95_ms: 350,
565
+ tps_median: 55,
566
+ tps_peak: 100,
567
+ },
568
+
569
+ // ==========================================================================
570
+ // Qwen Models (Alibaba)
571
+ // ==========================================================================
572
+ 'qwen-2.5-72b': {
573
+ ttft_p50_ms: 200,
574
+ ttft_p95_ms: 550,
575
+ tps_median: 40,
576
+ tps_peak: 70,
577
+ },
578
+ 'qwen-2.5-72b:vllm': {
579
+ ttft_p50_ms: 150,
580
+ ttft_p95_ms: 420,
581
+ tps_median: 50,
582
+ tps_peak: 90,
583
+ },
584
+ 'qwen-2.5-72b:sglang': {
585
+ ttft_p50_ms: 130,
586
+ ttft_p95_ms: 380,
587
+ tps_median: 55,
588
+ tps_peak: 100,
589
+ },
590
+ 'Qwen/Qwen2.5-72B-Instruct': {
591
+ ttft_p50_ms: 150,
592
+ ttft_p95_ms: 420,
593
+ tps_median: 50,
594
+ tps_peak: 90,
595
+ },
596
+ 'qwen-2.5-32b': {
597
+ ttft_p50_ms: 120,
598
+ ttft_p95_ms: 320,
599
+ tps_median: 60,
600
+ tps_peak: 100,
601
+ },
602
+ 'qwen-2.5-32b:vllm': {
603
+ ttft_p50_ms: 90,
604
+ ttft_p95_ms: 250,
605
+ tps_median: 75,
606
+ tps_peak: 130,
607
+ },
608
+ 'Qwen/Qwen2.5-32B-Instruct': {
609
+ ttft_p50_ms: 90,
610
+ ttft_p95_ms: 250,
611
+ tps_median: 75,
612
+ tps_peak: 130,
613
+ },
614
+ 'qwen-2.5-14b': {
615
+ ttft_p50_ms: 80,
616
+ ttft_p95_ms: 220,
617
+ tps_median: 80,
618
+ tps_peak: 140,
619
+ },
620
+ 'qwen-2.5-7b': {
621
+ ttft_p50_ms: 50,
622
+ ttft_p95_ms: 140,
623
+ tps_median: 110,
624
+ tps_peak: 190,
625
+ },
626
+ 'qwen-2.5-3b': {
627
+ ttft_p50_ms: 35,
628
+ ttft_p95_ms: 90,
629
+ tps_median: 150,
630
+ tps_peak: 260,
631
+ },
632
+ 'qwen-2.5-coder-32b': {
633
+ ttft_p50_ms: 120,
634
+ ttft_p95_ms: 320,
635
+ tps_median: 60,
636
+ tps_peak: 100,
637
+ },
638
+ 'qwen-2.5-coder-7b': {
639
+ ttft_p50_ms: 50,
640
+ ttft_p95_ms: 140,
641
+ tps_median: 110,
642
+ tps_peak: 190,
643
+ },
644
+
645
+ // ==========================================================================
646
+ // DeepSeek Models
647
+ // ==========================================================================
648
+ 'deepseek-v3': {
649
+ ttft_p50_ms: 300,
650
+ ttft_p95_ms: 800,
651
+ tps_median: 35,
652
+ tps_peak: 60,
653
+ },
654
+ 'deepseek-v3:vllm': {
655
+ ttft_p50_ms: 220,
656
+ ttft_p95_ms: 600,
657
+ tps_median: 45,
658
+ tps_peak: 80,
659
+ },
660
+ 'deepseek-chat': {
661
+ ttft_p50_ms: 300,
662
+ ttft_p95_ms: 800,
663
+ tps_median: 35,
664
+ tps_peak: 60,
665
+ },
666
+ 'deepseek-coder': {
667
+ ttft_p50_ms: 200,
668
+ ttft_p95_ms: 550,
669
+ tps_median: 50,
670
+ tps_peak: 85,
671
+ },
672
+ 'deepseek-coder-v2': {
673
+ ttft_p50_ms: 180,
674
+ ttft_p95_ms: 500,
675
+ tps_median: 55,
676
+ tps_peak: 95,
677
+ },
678
+ 'deepseek-r1': {
679
+ ttft_p50_ms: 1500,
680
+ ttft_p95_ms: 5000,
681
+ tps_median: 25,
682
+ tps_peak: 45,
683
+ },
684
+ 'deepseek-r1-lite': {
685
+ ttft_p50_ms: 800,
686
+ ttft_p95_ms: 2500,
687
+ tps_median: 40,
688
+ tps_peak: 70,
689
+ },
690
+
691
+ // ==========================================================================
692
+ // Cohere Models
693
+ // ==========================================================================
694
+ 'command-r': {
695
+ ttft_p50_ms: 200,
696
+ ttft_p95_ms: 500,
697
+ tps_median: 60,
698
+ tps_peak: 100,
699
+ },
700
+ 'command-r-plus': {
701
+ ttft_p50_ms: 300,
702
+ ttft_p95_ms: 800,
703
+ tps_median: 45,
704
+ tps_peak: 75,
705
+ },
706
+ 'command-light': {
707
+ ttft_p50_ms: 100,
708
+ ttft_p95_ms: 250,
709
+ tps_median: 100,
710
+ tps_peak: 150,
711
+ },
712
+
713
+ // ==========================================================================
714
+ // Local/Edge Models (Ollama, llama.cpp)
715
+ // ==========================================================================
716
+ 'phi-3': {
717
+ ttft_p50_ms: 50,
718
+ ttft_p95_ms: 130,
719
+ tps_median: 120,
720
+ tps_peak: 200,
721
+ },
722
+ 'phi-3:ollama': {
723
+ ttft_p50_ms: 60,
724
+ ttft_p95_ms: 150,
725
+ tps_median: 100,
726
+ tps_peak: 170,
727
+ },
728
+ 'phi-3-mini': {
729
+ ttft_p50_ms: 30,
730
+ ttft_p95_ms: 80,
731
+ tps_median: 160,
732
+ tps_peak: 280,
733
+ },
734
+ 'gemma-2-9b': {
735
+ ttft_p50_ms: 70,
736
+ ttft_p95_ms: 180,
737
+ tps_median: 85,
738
+ tps_peak: 150,
739
+ },
740
+ 'gemma-2-9b:ollama': {
741
+ ttft_p50_ms: 80,
742
+ ttft_p95_ms: 200,
743
+ tps_median: 75,
744
+ tps_peak: 130,
745
+ },
746
+ 'gemma-2-27b': {
747
+ ttft_p50_ms: 150,
748
+ ttft_p95_ms: 400,
749
+ tps_median: 45,
750
+ tps_peak: 80,
751
+ },
752
+ };
753
+
754
+ // =============================================================================
755
+ // PUBLIC API
756
+ // =============================================================================
757
+
758
+ /**
759
+ * Get performance envelope for a model.
760
+ *
761
+ * @param model - Model name (e.g., "gpt-4o", "llama-3-70b")
762
+ * @param runtime - Optional runtime (e.g., "vllm", "sglang")
763
+ * @returns Performance envelope or null if not found
764
+ */
765
+ export function getEnvelope(model: string, runtime?: string): PerformanceEnvelope | null {
766
+ // Try with runtime suffix first
767
+ if (runtime) {
768
+ const keyWithRuntime = `${model}:${runtime}`;
769
+ if (ENVELOPES[keyWithRuntime]) {
770
+ return ENVELOPES[keyWithRuntime];
771
+ }
772
+ }
773
+
774
+ // Try exact match
775
+ if (ENVELOPES[model]) {
776
+ return ENVELOPES[model];
777
+ }
778
+
779
+ // Try case-insensitive match
780
+ const lowerModel = model.toLowerCase();
781
+ for (const [key, envelope] of Object.entries(ENVELOPES)) {
782
+ if (key.toLowerCase() === lowerModel) {
783
+ return envelope;
784
+ }
785
+ }
786
+
787
+ // Try partial match (model name contains)
788
+ for (const [key, envelope] of Object.entries(ENVELOPES)) {
789
+ const lowerKey = key.toLowerCase();
790
+ if (lowerKey.includes(lowerModel) || lowerModel.includes(lowerKey.split(':')[0])) {
791
+ return envelope;
792
+ }
793
+ }
794
+
795
+ return null;
796
+ }
797
+
798
+ /**
799
+ * Calculate throughput percentage against reference envelope.
800
+ *
801
+ * @param model - Model name
802
+ * @param actualTps - Actual tokens per second observed
803
+ * @param runtime - Optional runtime
804
+ * @returns Percentage (0-100+) or null if no envelope found
805
+ */
806
+ export function getThroughputPercent(
807
+ model: string,
808
+ actualTps: number,
809
+ runtime?: string
810
+ ): number | null {
811
+ const envelope = getEnvelope(model, runtime);
812
+ if (!envelope) return null;
813
+ return Math.round((actualTps / envelope.tps_median) * 100);
814
+ }