@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
package/src/impact.ts ADDED
@@ -0,0 +1,522 @@
1
+ import type { Insight, ImpactEstimate, StackLayer, ImpactType, EffortLevel } from './types.js';
2
+
3
+ // =============================================================================
4
+ // CONSTANTS - Default Impact Estimates
5
+ // =============================================================================
6
+
7
+ // Model pricing per 1M tokens (input/output average)
8
+ const MODEL_PRICING: Record<string, number> = {
9
+ 'gpt-4': 45.0,
10
+ 'gpt-4o': 7.5,
11
+ 'gpt-4-turbo': 20.0,
12
+ 'gpt-3.5-turbo': 1.0,
13
+ 'gpt-4o-mini': 0.3,
14
+ 'claude-3-opus': 37.5,
15
+ 'claude-3-sonnet': 9.0,
16
+ 'claude-3-haiku': 0.625,
17
+ 'claude-3.5-sonnet': 9.0,
18
+ 'gemini-pro': 0.5,
19
+ 'gemini-1.5-pro': 3.5,
20
+ 'gemini-1.5-flash': 0.35,
21
+ };
22
+
23
+ // Default impact estimates by optimization pattern
24
+ interface PatternImpact {
25
+ layer: StackLayer;
26
+ impactType: ImpactType;
27
+ impactPercent: number;
28
+ effort: EffortLevel;
29
+ description: string;
30
+ }
31
+
32
+ const PATTERN_IMPACTS: Record<string, PatternImpact> = {
33
+ // Application Layer - Code Patterns
34
+ 'model-downgrade': {
35
+ layer: 'application',
36
+ impactType: 'cost',
37
+ impactPercent: 70,
38
+ effort: 'low',
39
+ description: 'Use cheaper model for simple tasks',
40
+ },
41
+ 'add-caching': {
42
+ layer: 'application',
43
+ impactType: 'cost',
44
+ impactPercent: 40,
45
+ effort: 'medium',
46
+ description: 'Cache repeated similar prompts',
47
+ },
48
+ 'add-batching': {
49
+ layer: 'application',
50
+ impactType: 'throughput',
51
+ impactPercent: 60,
52
+ effort: 'medium',
53
+ description: 'Batch multiple requests together',
54
+ },
55
+ 'enable-streaming': {
56
+ layer: 'application',
57
+ impactType: 'latency',
58
+ impactPercent: 70,
59
+ effort: 'low',
60
+ description: 'Enable streaming for better TTFT',
61
+ },
62
+ 'add-retry': {
63
+ layer: 'application',
64
+ impactType: 'throughput',
65
+ impactPercent: 15,
66
+ effort: 'low',
67
+ description: 'Add retry with exponential backoff',
68
+ },
69
+ 'connection-pooling': {
70
+ layer: 'application',
71
+ impactType: 'latency',
72
+ impactPercent: 20,
73
+ effort: 'low',
74
+ description: 'Reuse HTTP connections',
75
+ },
76
+
77
+ // Model Layer - Model Selection
78
+ 'gpt4-to-gpt35': {
79
+ layer: 'model',
80
+ impactType: 'cost',
81
+ impactPercent: 97,
82
+ effort: 'low',
83
+ description: 'GPT-4 → GPT-3.5-turbo (45x cheaper)',
84
+ },
85
+ 'gpt4-to-gpt4o-mini': {
86
+ layer: 'model',
87
+ impactType: 'cost',
88
+ impactPercent: 99,
89
+ effort: 'low',
90
+ description: 'GPT-4 → GPT-4o-mini (150x cheaper)',
91
+ },
92
+ 'opus-to-haiku': {
93
+ layer: 'model',
94
+ impactType: 'cost',
95
+ impactPercent: 98,
96
+ effort: 'low',
97
+ description: 'Claude Opus → Haiku (60x cheaper)',
98
+ },
99
+ 'sonnet-to-haiku': {
100
+ layer: 'model',
101
+ impactType: 'cost',
102
+ impactPercent: 93,
103
+ effort: 'low',
104
+ description: 'Claude Sonnet → Haiku (14x cheaper)',
105
+ },
106
+
107
+ // Runtime Layer - Inference Engines
108
+ 'use-vllm': {
109
+ layer: 'runtime',
110
+ impactType: 'throughput',
111
+ impactPercent: 300,
112
+ effort: 'high',
113
+ description: 'Deploy with vLLM for 3-4x throughput',
114
+ },
115
+ 'use-sglang': {
116
+ layer: 'runtime',
117
+ impactType: 'latency',
118
+ impactPercent: 50,
119
+ effort: 'high',
120
+ description: 'Use SGLang for optimized batching',
121
+ },
122
+ 'use-tgi': {
123
+ layer: 'runtime',
124
+ impactType: 'throughput',
125
+ impactPercent: 200,
126
+ effort: 'high',
127
+ description: 'Deploy with TGI for 2-3x throughput',
128
+ },
129
+ 'continuous-batching': {
130
+ layer: 'runtime',
131
+ impactType: 'throughput',
132
+ impactPercent: 150,
133
+ effort: 'medium',
134
+ description: 'Enable continuous batching',
135
+ },
136
+ 'speculative-decoding': {
137
+ layer: 'runtime',
138
+ impactType: 'latency',
139
+ impactPercent: 40,
140
+ effort: 'high',
141
+ description: 'Use speculative decoding for faster generation',
142
+ },
143
+
144
+ // Hardware Layer - GPU/Hosting (v1.8: renamed from infrastructure)
145
+ 'dedicated-gpu': {
146
+ layer: 'hardware',
147
+ impactType: 'cost',
148
+ impactPercent: 60,
149
+ effort: 'high',
150
+ description: 'Self-host on dedicated GPUs vs API',
151
+ },
152
+ 'spot-instances': {
153
+ layer: 'hardware',
154
+ impactType: 'cost',
155
+ impactPercent: 70,
156
+ effort: 'medium',
157
+ description: 'Use spot/preemptible instances',
158
+ },
159
+ 'regional-deployment': {
160
+ layer: 'hardware',
161
+ impactType: 'latency',
162
+ impactPercent: 30,
163
+ effort: 'low',
164
+ description: 'Deploy closer to users',
165
+ },
166
+ 'autoscaling': {
167
+ layer: 'hardware',
168
+ impactType: 'cost',
169
+ impactPercent: 40,
170
+ effort: 'medium',
171
+ description: 'Implement autoscaling for variable load',
172
+ },
173
+ };
174
+
175
+ // =============================================================================
176
+ // IMPACT DETECTION
177
+ // =============================================================================
178
+
179
+ /**
180
+ * Detect which optimization pattern an insight matches
181
+ */
182
+ function detectPattern(insight: Insight): string | null {
183
+ const headline = insight.headline.toLowerCase();
184
+ const evidence = insight.evidence.toLowerCase();
185
+ const combined = `${headline} ${evidence}`;
186
+
187
+ // Model downgrade patterns
188
+ if (combined.includes('gpt-4') && (combined.includes('gpt-3.5') || combined.includes('cheaper'))) {
189
+ if (combined.includes('gpt-4o-mini') || combined.includes('mini')) {
190
+ return 'gpt4-to-gpt4o-mini';
191
+ }
192
+ return 'gpt4-to-gpt35';
193
+ }
194
+ if (combined.includes('opus') && combined.includes('haiku')) {
195
+ return 'opus-to-haiku';
196
+ }
197
+ if (combined.includes('sonnet') && combined.includes('haiku')) {
198
+ return 'sonnet-to-haiku';
199
+ }
200
+ if (combined.includes('expensive model') || combined.includes('overkill') ||
201
+ combined.includes('over-specification') || combined.includes('simple task')) {
202
+ return 'model-downgrade';
203
+ }
204
+
205
+ // Application patterns
206
+ if (combined.includes('streaming') && (combined.includes('enable') || combined.includes('missing') || combined.includes('no streaming'))) {
207
+ return 'enable-streaming';
208
+ }
209
+ if (combined.includes('batch') && (combined.includes('missing') || combined.includes('opportunity') || combined.includes('no batch'))) {
210
+ return 'add-batching';
211
+ }
212
+ if (combined.includes('cach') && (combined.includes('missing') || combined.includes('opportunity') || combined.includes('no cach'))) {
213
+ return 'add-caching';
214
+ }
215
+ if (combined.includes('retry') && (combined.includes('missing') || combined.includes('no retry'))) {
216
+ return 'add-retry';
217
+ }
218
+ if (combined.includes('connection') && combined.includes('pool')) {
219
+ return 'connection-pooling';
220
+ }
221
+
222
+ // Runtime patterns
223
+ if (combined.includes('vllm')) {
224
+ return 'use-vllm';
225
+ }
226
+ if (combined.includes('sglang')) {
227
+ return 'use-sglang';
228
+ }
229
+ if (combined.includes('tgi') || combined.includes('text generation inference')) {
230
+ return 'use-tgi';
231
+ }
232
+ if (combined.includes('continuous batch')) {
233
+ return 'continuous-batching';
234
+ }
235
+ if (combined.includes('speculative')) {
236
+ return 'speculative-decoding';
237
+ }
238
+
239
+ // Infrastructure patterns
240
+ if (combined.includes('dedicated') && combined.includes('gpu')) {
241
+ return 'dedicated-gpu';
242
+ }
243
+ if (combined.includes('spot') || combined.includes('preemptible')) {
244
+ return 'spot-instances';
245
+ }
246
+ if (combined.includes('region') && combined.includes('deploy')) {
247
+ return 'regional-deployment';
248
+ }
249
+ if (combined.includes('autoscal')) {
250
+ return 'autoscaling';
251
+ }
252
+
253
+ return null;
254
+ }
255
+
256
+ /**
257
+ * Estimate annual cost savings based on detected model usage
258
+ * Assumes 1M tokens/day as baseline
259
+ */
260
+ function estimateAnnualSavings(
261
+ currentModel: string | null,
262
+ recommendedModel: string | null,
263
+ dailyTokensMillions: number = 1
264
+ ): number | undefined {
265
+ if (!currentModel || !recommendedModel) return undefined;
266
+
267
+ const currentPrice = MODEL_PRICING[currentModel.toLowerCase()] || MODEL_PRICING['gpt-4'];
268
+ const newPrice = MODEL_PRICING[recommendedModel.toLowerCase()] || MODEL_PRICING['gpt-3.5-turbo'];
269
+
270
+ const dailySavings = (currentPrice - newPrice) * dailyTokensMillions;
271
+ return Math.round(dailySavings * 365);
272
+ }
273
+
274
+ // =============================================================================
275
+ // PUBLIC API
276
+ // =============================================================================
277
+
278
+ /**
279
+ * Estimate impact for a single insight
280
+ */
281
+ export function estimateImpact(insight: Insight): ImpactEstimate | null {
282
+ const pattern = detectPattern(insight);
283
+
284
+ if (!pattern || !PATTERN_IMPACTS[pattern]) {
285
+ // Default estimate based on category
286
+ return getDefaultEstimate(insight);
287
+ }
288
+
289
+ const patternImpact = PATTERN_IMPACTS[pattern];
290
+
291
+ return {
292
+ layer: patternImpact.layer,
293
+ impactType: patternImpact.impactType,
294
+ estimatedImpactPercent: patternImpact.impactPercent,
295
+ effort: patternImpact.effort,
296
+ confidence: 0.7,
297
+ assumptions: patternImpact.description,
298
+ };
299
+ }
300
+
301
+ /**
302
+ * Get default impact estimate based on insight category
303
+ */
304
+ function getDefaultEstimate(insight: Insight): ImpactEstimate | null {
305
+ const categoryDefaults: Record<string, Partial<ImpactEstimate>> = {
306
+ 'cost': { layer: 'application', impactType: 'cost', estimatedImpactPercent: 20, effort: 'medium' },
307
+ 'latency': { layer: 'application', impactType: 'latency', estimatedImpactPercent: 30, effort: 'medium' },
308
+ 'throughput': { layer: 'application', impactType: 'throughput', estimatedImpactPercent: 25, effort: 'medium' },
309
+ 'reliability': { layer: 'application', impactType: 'throughput', estimatedImpactPercent: 15, effort: 'low' },
310
+ 'waste': { layer: 'application', impactType: 'cost', estimatedImpactPercent: 25, effort: 'low' },
311
+ };
312
+
313
+ const defaults = categoryDefaults[insight.category];
314
+ if (!defaults) return null;
315
+
316
+ return {
317
+ layer: defaults.layer as StackLayer,
318
+ impactType: defaults.impactType as ImpactType,
319
+ estimatedImpactPercent: defaults.estimatedImpactPercent || 20,
320
+ effort: defaults.effort as EffortLevel || 'medium',
321
+ confidence: 0.5,
322
+ };
323
+ }
324
+
325
+ /**
326
+ * Add impact estimates to all insights
327
+ */
328
+ export function enrichInsightsWithImpact(insights: Insight[]): Insight[] {
329
+ return insights.map(insight => {
330
+ if (insight.impact) return insight; // Already has impact
331
+
332
+ const impact = estimateImpact(insight);
333
+ return impact ? { ...insight, impact } : insight;
334
+ });
335
+ }
336
+
337
+ /**
338
+ * Stack ranking summary
339
+ */
340
+ export interface StackRanking {
341
+ layer: StackLayer;
342
+ totalImpactPercent: number;
343
+ insightCount: number;
344
+ avgEffort: EffortLevel;
345
+ topInsights: Insight[];
346
+ }
347
+
348
+ export interface ImpactSummary {
349
+ totalPotentialImpact: {
350
+ costReductionPercent: number;
351
+ latencyReductionPercent: number;
352
+ throughputGainPercent: number;
353
+ };
354
+ stackRanking: StackRanking[];
355
+ quickWins: Insight[]; // High impact, low effort
356
+ strategicChanges: Insight[]; // High impact, high effort
357
+ prioritizedList: Insight[]; // All insights sorted by impact
358
+ }
359
+
360
+ /**
361
+ * Generate comprehensive impact summary with stack ranking
362
+ */
363
+ export function generateImpactSummary(insights: Insight[]): ImpactSummary {
364
+ const enriched = enrichInsightsWithImpact(insights);
365
+
366
+ // Calculate totals by impact type
367
+ const costInsights = enriched.filter(i => i.impact?.impactType === 'cost');
368
+ const latencyInsights = enriched.filter(i => i.impact?.impactType === 'latency');
369
+ const throughputInsights = enriched.filter(i => i.impact?.impactType === 'throughput');
370
+
371
+ const avgImpact = (items: Insight[]) => {
372
+ if (items.length === 0) return 0;
373
+ const total = items.reduce((sum, i) => sum + (i.impact?.estimatedImpactPercent || 0), 0);
374
+ return Math.round(total / items.length);
375
+ };
376
+
377
+ // Group by layer
378
+ const layerGroups = new Map<StackLayer, Insight[]>();
379
+ for (const insight of enriched) {
380
+ const layer = insight.impact?.layer || 'application';
381
+ const existing = layerGroups.get(layer) || [];
382
+ existing.push(insight);
383
+ layerGroups.set(layer, existing);
384
+ }
385
+
386
+ // Build stack ranking
387
+ const stackRanking: StackRanking[] = [];
388
+ const layerOrder: StackLayer[] = ['application', 'api', 'gateway', 'runtime', 'model', 'hardware'];
389
+
390
+ for (const layer of layerOrder) {
391
+ const layerInsights = layerGroups.get(layer) || [];
392
+ if (layerInsights.length === 0) continue;
393
+
394
+ const totalImpact = layerInsights.reduce(
395
+ (sum, i) => sum + (i.impact?.estimatedImpactPercent || 0), 0
396
+ );
397
+
398
+ // Calculate average effort
399
+ const effortScores = layerInsights.map(i => {
400
+ const e = i.impact?.effort || 'medium';
401
+ return e === 'low' ? 1 : e === 'medium' ? 2 : 3;
402
+ });
403
+ const avgEffortScore = effortScores.reduce((a, b) => a + b, 0) / effortScores.length;
404
+ const avgEffort: EffortLevel = avgEffortScore < 1.5 ? 'low' : avgEffortScore < 2.5 ? 'medium' : 'high';
405
+
406
+ stackRanking.push({
407
+ layer,
408
+ totalImpactPercent: totalImpact,
409
+ insightCount: layerInsights.length,
410
+ avgEffort,
411
+ topInsights: layerInsights
412
+ .sort((a, b) => (b.impact?.estimatedImpactPercent || 0) - (a.impact?.estimatedImpactPercent || 0))
413
+ .slice(0, 3),
414
+ });
415
+ }
416
+
417
+ // Sort stack ranking by total impact
418
+ stackRanking.sort((a, b) => b.totalImpactPercent - a.totalImpactPercent);
419
+
420
+ // Identify quick wins (high impact, low effort)
421
+ const quickWins = enriched
422
+ .filter(i => i.impact && i.impact.estimatedImpactPercent >= 40 && i.impact.effort === 'low')
423
+ .sort((a, b) => (b.impact?.estimatedImpactPercent || 0) - (a.impact?.estimatedImpactPercent || 0));
424
+
425
+ // Identify strategic changes (high impact, high effort)
426
+ const strategicChanges = enriched
427
+ .filter(i => i.impact && i.impact.estimatedImpactPercent >= 50 && i.impact.effort === 'high')
428
+ .sort((a, b) => (b.impact?.estimatedImpactPercent || 0) - (a.impact?.estimatedImpactPercent || 0));
429
+
430
+ // Prioritized list: sort by impact/effort ratio
431
+ const prioritizedList = [...enriched]
432
+ .filter(i => i.impact)
433
+ .sort((a, b) => {
434
+ const effortMultiplier = (e: EffortLevel | undefined) =>
435
+ e === 'low' ? 3 : e === 'medium' ? 2 : 1;
436
+ const scoreA = (a.impact?.estimatedImpactPercent || 0) * effortMultiplier(a.impact?.effort);
437
+ const scoreB = (b.impact?.estimatedImpactPercent || 0) * effortMultiplier(b.impact?.effort);
438
+ return scoreB - scoreA;
439
+ });
440
+
441
+ return {
442
+ totalPotentialImpact: {
443
+ costReductionPercent: avgImpact(costInsights),
444
+ latencyReductionPercent: avgImpact(latencyInsights),
445
+ throughputGainPercent: avgImpact(throughputInsights),
446
+ },
447
+ stackRanking,
448
+ quickWins,
449
+ strategicChanges,
450
+ prioritizedList,
451
+ };
452
+ }
453
+
454
+ /**
455
+ * Format impact summary as text for CLI output
456
+ * Julie Zhou design: "Headroom" terminology, intuitive metrics
457
+ *
458
+ * Key principle: Output should be understandable without narration
459
+ */
460
+ export function formatImpactSummary(summary: ImpactSummary): string {
461
+ const lines: string[] = [];
462
+
463
+ // Stack ranking by layer - show avg improvement per layer
464
+ lines.push('\x1b[2mBy Layer\x1b[0m');
465
+ for (let i = 0; i < summary.stackRanking.length; i++) {
466
+ const rank = summary.stackRanking[i];
467
+ const layerName = rank.layer.charAt(0).toUpperCase() + rank.layer.slice(1);
468
+ const avgImpact = Math.round(rank.totalImpactPercent / rank.insightCount);
469
+ lines.push(` ${i + 1}. ${layerName.padEnd(14)} ~${avgImpact}% avg (${rank.insightCount} items)`);
470
+ }
471
+
472
+ // Quick wins - high value, low effort (actionable now)
473
+ // Deduplicate by templateId+headline to avoid repetitive suggestions
474
+ if (summary.quickWins.length > 0) {
475
+ const seen = new Set<string>();
476
+ const uniqueWins = summary.quickWins.filter(insight => {
477
+ const key = `${insight.templateId || ''}:${insight.headline}`;
478
+ if (seen.has(key)) return false;
479
+ seen.add(key);
480
+ return true;
481
+ }).slice(0, 3);
482
+
483
+ if (uniqueWins.length > 0) {
484
+ lines.push('');
485
+ lines.push('\x1b[2mQuick Wins\x1b[0m');
486
+ for (const insight of uniqueWins) {
487
+ const pct = insight.impact?.estimatedImpactPercent || 0;
488
+ const type = insight.impact?.impactType || 'improvement';
489
+ const typeLabel = type === 'cost' ? 'cost reduction' : type === 'latency' ? 'latency reduction' : type;
490
+ // Use assumptions if available (more actionable), otherwise headline
491
+ const recommendation = insight.impact?.assumptions || insight.headline;
492
+ lines.push(` [!] ${recommendation} (${pct}% ${typeLabel})`);
493
+ }
494
+ }
495
+ }
496
+
497
+ // Strategic changes - high value, high effort
498
+ // Deduplicate similarly
499
+ if (summary.strategicChanges.length > 0) {
500
+ const seen = new Set<string>();
501
+ const uniqueStrategic = summary.strategicChanges.filter(insight => {
502
+ const key = `${insight.templateId || ''}:${insight.headline}`;
503
+ if (seen.has(key)) return false;
504
+ seen.add(key);
505
+ return true;
506
+ }).slice(0, 2);
507
+
508
+ if (uniqueStrategic.length > 0) {
509
+ lines.push('');
510
+ lines.push('\x1b[2mStrategic\x1b[0m');
511
+ for (const insight of uniqueStrategic) {
512
+ const pct = insight.impact?.estimatedImpactPercent || 0;
513
+ const type = insight.impact?.impactType || 'improvement';
514
+ const typeLabel = type === 'cost' ? 'cost reduction' : type === 'latency' ? 'latency reduction' : type;
515
+ const recommendation = insight.impact?.assumptions || insight.headline;
516
+ lines.push(` [+] ${recommendation} (${pct}% ${typeLabel})`);
517
+ }
518
+ }
519
+ }
520
+
521
+ return lines.join('\n');
522
+ }
package/src/index.ts ADDED
@@ -0,0 +1,83 @@
1
+ /**
2
+ * PeakInfer - LLM Inference Performance Analysis
3
+ *
4
+ * @packageDocumentation
5
+ */
6
+
7
+ // Version
8
+ export { VERSION, VERSION_DISPLAY } from './version.js';
9
+
10
+ // Core types
11
+ export type {
12
+ Provider,
13
+ Callsite,
14
+ Patterns,
15
+ CallsitePatterns,
16
+ InferenceEvent,
17
+ RuntimeSummary,
18
+ JoinedOutput,
19
+ InferenceMap,
20
+ Insight,
21
+ InsightTemplate,
22
+ PerformanceEnvelope,
23
+ ScanResult,
24
+ ScannedFile,
25
+ ScanCandidate,
26
+ } from './types.js';
27
+
28
+ // Agent
29
+ export { Agent, plan } from './agent.js';
30
+ export type { AgentOptions, AgentCallbacks, AgentResults, PlanResult, ProgressPhase, ProgressData } from './agent.js';
31
+
32
+ // Scanner
33
+ export { scan } from './scanner.js';
34
+
35
+ // Analyzer
36
+ export { analyze, analyzeFile } from './analyzer.js';
37
+
38
+ // Runtime parser
39
+ export { parseEvents, aggregate } from './runtime.js';
40
+
41
+ // Joiner
42
+ export { join } from './joiner.js';
43
+
44
+ // Templates
45
+ export { loadTemplates } from './templates.js';
46
+
47
+ // Insights
48
+ export { evaluate } from './insights.js';
49
+
50
+ // Costs
51
+ export { loadPricing, getModelCost, calculateCost } from './costs.js';
52
+
53
+ // Envelopes
54
+ export { ENVELOPES, getEnvelope, getThroughputPercent } from './envelopes.js';
55
+
56
+ // Renderer
57
+ export { createRenderer } from './renderer.js';
58
+ export type { Renderer, RendererOptions } from './renderer.js';
59
+
60
+ // HTML
61
+ export { generateHTML } from './html.js';
62
+ export type { HTMLData } from './html.js';
63
+
64
+ // Artifacts
65
+ export {
66
+ saveArtifacts,
67
+ getOutputDir,
68
+ artifactsExist,
69
+ checkResumable,
70
+ loadArtifacts,
71
+ generateRunId,
72
+ } from './artifacts.js';
73
+ export type { ArtifactData, SaveOptions } from './artifacts.js';
74
+
75
+ // Run Identity
76
+ export {
77
+ getRunDir,
78
+ createManifest,
79
+ canResume,
80
+ loadManifest,
81
+ loadCachedArtifacts,
82
+ } from './runid.js';
83
+ export type { RunInputs, RunManifest, CachedArtifacts } from './runid.js';