@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
package/src/pdf.ts ADDED
@@ -0,0 +1,110 @@
1
+ /**
2
+ * PDF Generation Module
3
+ * Converts HTML reports to well-formatted PDFs using Puppeteer
4
+ *
5
+ * Julie Zhou Design Principles:
6
+ * - "Reports exist to enable sharing, not exploration"
7
+ * - PDFs should be print-ready and professional
8
+ */
9
+
10
+ import puppeteer from 'puppeteer';
11
+
12
+ // =============================================================================
13
+ // TYPES
14
+ // =============================================================================
15
+
16
+ export interface PDFOptions {
17
+ format?: 'A4' | 'Letter';
18
+ margin?: {
19
+ top?: string;
20
+ right?: string;
21
+ bottom?: string;
22
+ left?: string;
23
+ };
24
+ printBackground?: boolean;
25
+ }
26
+
27
+ // =============================================================================
28
+ // PUBLIC API
29
+ // =============================================================================
30
+
31
+ /**
32
+ * Generate a PDF from HTML content
33
+ * @param htmlContent - The HTML string to convert
34
+ * @param outputPath - Where to save the PDF
35
+ * @param options - PDF formatting options
36
+ */
37
+ export async function generatePDF(
38
+ htmlContent: string,
39
+ outputPath: string,
40
+ options: PDFOptions = {}
41
+ ): Promise<void> {
42
+ const browser = await puppeteer.launch({
43
+ headless: true,
44
+ args: ['--no-sandbox', '--disable-setuid-sandbox'],
45
+ });
46
+
47
+ try {
48
+ const page = await browser.newPage();
49
+
50
+ // Set content and wait for styles to load
51
+ await page.setContent(htmlContent, {
52
+ waitUntil: 'networkidle0',
53
+ });
54
+
55
+ // Expand all <details> elements - PDFs are static, so collapsible sections must be open
56
+ // Julie Zhou: "Reports exist to enable sharing, not exploration"
57
+ await page.evaluate(`
58
+ document.querySelectorAll('details').forEach(d => d.setAttribute('open', ''));
59
+ `);
60
+
61
+ // Add print-specific styles for better PDF rendering
62
+ await page.addStyleTag({
63
+ content: `
64
+ @media print {
65
+ body {
66
+ -webkit-print-color-adjust: exact !important;
67
+ print-color-adjust: exact !important;
68
+ }
69
+ .finding {
70
+ break-inside: avoid;
71
+ }
72
+ details {
73
+ break-inside: avoid;
74
+ }
75
+ section {
76
+ break-inside: avoid-page;
77
+ }
78
+ }
79
+ `,
80
+ });
81
+
82
+ // Generate PDF with professional formatting
83
+ await page.pdf({
84
+ path: outputPath,
85
+ format: options.format || 'A4',
86
+ margin: options.margin || {
87
+ top: '20mm',
88
+ right: '15mm',
89
+ bottom: '20mm',
90
+ left: '15mm',
91
+ },
92
+ printBackground: options.printBackground !== false,
93
+ displayHeaderFooter: true,
94
+ headerTemplate: `
95
+ <div style="font-size: 9px; color: #6b7280; width: 100%; text-align: center; padding: 5px 0;">
96
+ PeakInfer Report
97
+ </div>
98
+ `,
99
+ footerTemplate: `
100
+ <div style="font-size: 9px; color: #6b7280; width: 100%; display: flex; justify-content: space-between; padding: 5px 15mm;">
101
+ <span>Generated by PeakInfer</span>
102
+ <span>Page <span class="pageNumber"></span> of <span class="totalPages"></span></span>
103
+ </div>
104
+ `,
105
+ });
106
+ } finally {
107
+ await browser.close();
108
+ }
109
+ }
110
+
@@ -0,0 +1,392 @@
1
+ /**
2
+ * Deploy-Time Prediction Module (v1.5)
3
+ *
4
+ * Generates latency predictions for inference points based on:
5
+ * - Model characteristics (from pricing/envelopes data)
6
+ * - Historical runtime data (if available)
7
+ * - Pattern detection (streaming, batching, caching)
8
+ *
9
+ * Surfaces potential performance risks before deployment
10
+ * to enable informed deployment decisions.
11
+ */
12
+
13
+ import type {
14
+ Callsite,
15
+ InferenceMap,
16
+ RuntimeSummary,
17
+ PredictionResult,
18
+ InferencePointPrediction,
19
+ PredictionSummary,
20
+ PredictionFactor,
21
+ RiskLevel,
22
+ LatencyPercentiles,
23
+ } from './types.js';
24
+ import { listRuns, loadRun } from './history.js';
25
+
26
+ // =============================================================================
27
+ // CONSTANTS
28
+ // =============================================================================
29
+
30
+ // Model latency estimates (ms) - heuristic defaults
31
+ const MODEL_LATENCY_ESTIMATES: Record<string, LatencyPercentiles> = {
32
+ // OpenAI models
33
+ 'gpt-4': { p50: 2000, p95: 5000, p99: 8000 },
34
+ 'gpt-4-turbo': { p50: 1500, p95: 4000, p99: 6000 },
35
+ 'gpt-4o': { p50: 1000, p95: 2500, p99: 4000 },
36
+ 'gpt-4o-mini': { p50: 500, p95: 1500, p99: 2500 },
37
+ 'gpt-3.5-turbo': { p50: 500, p95: 1500, p99: 2500 },
38
+ 'o1-preview': { p50: 5000, p95: 15000, p99: 30000 },
39
+ 'o1-mini': { p50: 2000, p95: 6000, p99: 10000 },
40
+
41
+ // Anthropic models
42
+ 'claude-3-opus': { p50: 3000, p95: 8000, p99: 15000 },
43
+ 'claude-3-sonnet': { p50: 1500, p95: 4000, p99: 7000 },
44
+ 'claude-3-haiku': { p50: 500, p95: 1500, p99: 2500 },
45
+ 'claude-3.5-sonnet': { p50: 1200, p95: 3500, p99: 6000 },
46
+
47
+ // Google models
48
+ 'gemini-pro': { p50: 1000, p95: 3000, p99: 5000 },
49
+ 'gemini-1.5-pro': { p50: 1500, p95: 4000, p99: 7000 },
50
+ 'gemini-1.5-flash': { p50: 300, p95: 800, p99: 1500 },
51
+
52
+ // Default for unknown models
53
+ 'unknown': { p50: 1000, p95: 3000, p99: 5000 },
54
+ };
55
+
56
+ // Risk thresholds (p95 latency in ms)
57
+ const RISK_THRESHOLDS = {
58
+ high: 5000, // > 5s p95 = high risk
59
+ medium: 2000, // > 2s p95 = medium risk
60
+ low: 500, // > 500ms p95 = low risk
61
+ };
62
+
63
+ // =============================================================================
64
+ // TYPES
65
+ // =============================================================================
66
+
67
+ export interface PredictOptions {
68
+ targetP95?: number; // User-specified target p95 latency (ms)
69
+ includeHistorical?: boolean; // Include historical data if available
70
+ }
71
+
72
+ // =============================================================================
73
+ // HELPERS
74
+ // =============================================================================
75
+
76
+ /**
77
+ * Get model latency estimate from known models or default.
78
+ */
79
+ function getModelLatencyEstimate(model: string | null): LatencyPercentiles {
80
+ if (!model) return MODEL_LATENCY_ESTIMATES['unknown'];
81
+
82
+ // Try exact match first
83
+ const normalized = model.toLowerCase();
84
+ for (const [key, estimate] of Object.entries(MODEL_LATENCY_ESTIMATES)) {
85
+ if (normalized.includes(key.toLowerCase())) {
86
+ return estimate;
87
+ }
88
+ }
89
+
90
+ return MODEL_LATENCY_ESTIMATES['unknown'];
91
+ }
92
+
93
+ /**
94
+ * Calculate risk level based on p95 latency.
95
+ */
96
+ function calculateRiskLevel(p95: number): RiskLevel {
97
+ if (p95 > RISK_THRESHOLDS.high) return 'high';
98
+ if (p95 > RISK_THRESHOLDS.medium) return 'medium';
99
+ if (p95 > RISK_THRESHOLDS.low) return 'low';
100
+ return 'neutral';
101
+ }
102
+
103
+ /**
104
+ * Calculate risk score (0-100) based on p95 latency.
105
+ */
106
+ function calculateRiskScore(p95: number): number {
107
+ // Scale: 0ms = 0, 10000ms = 100
108
+ return Math.min(100, Math.round((p95 / 10000) * 100));
109
+ }
110
+
111
+ /**
112
+ * Generate prediction factors based on inference point patterns.
113
+ */
114
+ function generateFactors(callsite: Callsite): PredictionFactor[] {
115
+ const factors: PredictionFactor[] = [];
116
+
117
+ // Model complexity factor
118
+ if (callsite.model) {
119
+ const isComplex = callsite.model.toLowerCase().includes('opus') ||
120
+ callsite.model.toLowerCase().includes('gpt-4') ||
121
+ callsite.model.toLowerCase().includes('o1');
122
+ factors.push({
123
+ name: 'Model complexity',
124
+ impact: isComplex ? 'negative' : 'positive',
125
+ description: isComplex
126
+ ? `${callsite.model} is a high-capability model with longer inference times`
127
+ : `${callsite.model} is optimized for speed`,
128
+ weight: 0.4,
129
+ });
130
+ }
131
+
132
+ // Streaming factor
133
+ if (callsite.patterns?.streaming) {
134
+ factors.push({
135
+ name: 'Streaming enabled',
136
+ impact: 'positive',
137
+ description: 'Streaming reduces perceived latency with incremental responses',
138
+ weight: 0.2,
139
+ });
140
+ } else {
141
+ factors.push({
142
+ name: 'No streaming',
143
+ impact: 'negative',
144
+ description: 'Synchronous requests block until complete response',
145
+ weight: 0.1,
146
+ });
147
+ }
148
+
149
+ // Batching factor
150
+ if (callsite.patterns?.batching) {
151
+ factors.push({
152
+ name: 'Batching enabled',
153
+ impact: 'positive',
154
+ description: 'Batching improves throughput and reduces per-request overhead',
155
+ weight: 0.2,
156
+ });
157
+ }
158
+
159
+ // Caching factor
160
+ if (callsite.patterns?.caching) {
161
+ factors.push({
162
+ name: 'Caching enabled',
163
+ impact: 'positive',
164
+ description: 'Cache hits bypass LLM entirely for near-zero latency',
165
+ weight: 0.3,
166
+ });
167
+ }
168
+
169
+ // Retry factor
170
+ if (callsite.patterns?.retries) {
171
+ factors.push({
172
+ name: 'Retry logic',
173
+ impact: 'neutral',
174
+ description: 'Retries improve reliability but may increase tail latency',
175
+ weight: 0.1,
176
+ });
177
+ }
178
+
179
+ // Provider factor
180
+ if (callsite.provider) {
181
+ const provider = callsite.provider.toLowerCase();
182
+ if (provider === 'openai' || provider === 'anthropic') {
183
+ factors.push({
184
+ name: 'Cloud provider',
185
+ impact: 'neutral',
186
+ description: `${callsite.provider} hosted service with variable latency`,
187
+ weight: 0.1,
188
+ });
189
+ } else if (['vllm', 'sglang', 'tgi', 'ollama'].includes(provider)) {
190
+ factors.push({
191
+ name: 'Self-hosted runtime',
192
+ impact: 'positive',
193
+ description: 'Self-hosted inference offers consistent, controllable latency',
194
+ weight: 0.2,
195
+ });
196
+ }
197
+ }
198
+
199
+ return factors;
200
+ }
201
+
202
+ /**
203
+ * Adjust latency estimate based on patterns.
204
+ */
205
+ function adjustLatencyForPatterns(
206
+ base: LatencyPercentiles,
207
+ patterns: Callsite['patterns']
208
+ ): LatencyPercentiles {
209
+ let multiplier = 1.0;
210
+
211
+ // Streaming doesn't change total latency but improves UX
212
+ // We still report actual latency
213
+
214
+ // Batching can reduce per-request latency
215
+ if (patterns?.batching) {
216
+ multiplier *= 0.8;
217
+ }
218
+
219
+ // Caching dramatically reduces effective latency (assuming 50% hit rate)
220
+ if (patterns?.caching) {
221
+ multiplier *= 0.6;
222
+ }
223
+
224
+ return {
225
+ p50: Math.round(base.p50 * multiplier),
226
+ p95: Math.round(base.p95 * multiplier),
227
+ p99: Math.round(base.p99 * multiplier),
228
+ };
229
+ }
230
+
231
+ // =============================================================================
232
+ // PUBLIC API
233
+ // =============================================================================
234
+
235
+ /**
236
+ * Generate predictions for inference points.
237
+ */
238
+ export function generatePredictions(
239
+ inferenceMap: InferenceMap,
240
+ historicalRuns: number = 0,
241
+ options: PredictOptions = {}
242
+ ): PredictionResult {
243
+ const predictions: InferencePointPrediction[] = [];
244
+
245
+ for (const callsite of inferenceMap.callsites) {
246
+ // Get base latency estimate from model
247
+ const baseLatency = getModelLatencyEstimate(callsite.model);
248
+
249
+ // Adjust for patterns
250
+ const predictedLatency = adjustLatencyForPatterns(baseLatency, callsite.patterns);
251
+
252
+ // Generate factors
253
+ const factors = generateFactors(callsite);
254
+
255
+ // Calculate risk
256
+ const risk = calculateRiskLevel(predictedLatency.p95);
257
+ const riskScore = calculateRiskScore(predictedLatency.p95);
258
+
259
+ // Determine confidence
260
+ const hasModel = !!callsite.model;
261
+ const hasPatterns = Object.values(callsite.patterns || {}).some(v => v);
262
+ const confidence = hasModel && hasPatterns ? 'high' :
263
+ hasModel || hasPatterns ? 'medium' : 'low';
264
+
265
+ predictions.push({
266
+ inferencePointId: callsite.id,
267
+ location: `${callsite.file}:${callsite.line}`,
268
+ provider: callsite.provider || undefined,
269
+ model: callsite.model || undefined,
270
+ predictedLatency,
271
+ risk,
272
+ riskScore,
273
+ factors,
274
+ confidence,
275
+ confidenceReason: confidence === 'low'
276
+ ? 'Limited information available for accurate prediction'
277
+ : confidence === 'medium'
278
+ ? 'Based on model characteristics'
279
+ : 'Based on model and pattern analysis',
280
+ });
281
+ }
282
+
283
+ // Calculate summary
284
+ const summary = calculateSummary(predictions, options.targetP95);
285
+
286
+ return {
287
+ predictions,
288
+ summary,
289
+ targetP95: options.targetP95,
290
+ generatedAt: new Date().toISOString(),
291
+ basedOnRuns: historicalRuns,
292
+ };
293
+ }
294
+
295
+ /**
296
+ * Calculate prediction summary.
297
+ */
298
+ function calculateSummary(
299
+ predictions: InferencePointPrediction[],
300
+ targetP95?: number
301
+ ): PredictionSummary {
302
+ if (predictions.length === 0) {
303
+ return {
304
+ totalPoints: 0,
305
+ highRiskCount: 0,
306
+ mediumRiskCount: 0,
307
+ lowRiskCount: 0,
308
+ averageP95: 0,
309
+ worstP95: 0,
310
+ };
311
+ }
312
+
313
+ const p95Values = predictions.map(p => p.predictedLatency.p95);
314
+ const averageP95 = Math.round(p95Values.reduce((a, b) => a + b, 0) / p95Values.length);
315
+ const worstP95 = Math.max(...p95Values);
316
+
317
+ return {
318
+ totalPoints: predictions.length,
319
+ highRiskCount: predictions.filter(p => p.risk === 'high').length,
320
+ mediumRiskCount: predictions.filter(p => p.risk === 'medium').length,
321
+ lowRiskCount: predictions.filter(p => p.risk === 'low').length,
322
+ averageP95,
323
+ worstP95,
324
+ budgetExceeded: targetP95 ? worstP95 > targetP95 : undefined,
325
+ };
326
+ }
327
+
328
+ /**
329
+ * Generate predictions with historical context.
330
+ */
331
+ export async function generatePredictionsWithHistory(
332
+ inferenceMap: InferenceMap,
333
+ path: string,
334
+ options: PredictOptions = {}
335
+ ): Promise<PredictionResult> {
336
+ // Get historical runs for context
337
+ const runs = listRuns(path);
338
+ const historicalCount = runs.length;
339
+
340
+ // Generate predictions
341
+ const result = generatePredictions(inferenceMap, historicalCount, options);
342
+
343
+ // If we have historical data with runtime info, we could enhance predictions
344
+ // For now, we use heuristic-based predictions
345
+ // Future: Use actual runtime data from historical runs
346
+
347
+ return result;
348
+ }
349
+
350
+ /**
351
+ * Format a prediction summary for display.
352
+ */
353
+ export function formatPredictionSummary(result: PredictionResult): string {
354
+ const { summary, targetP95 } = result;
355
+ const lines: string[] = [];
356
+
357
+ lines.push(`Deploy-time prediction for ${summary.totalPoints} inference points`);
358
+ lines.push('');
359
+
360
+ // Risk breakdown
361
+ if (summary.highRiskCount > 0) {
362
+ lines.push(` [!] ${summary.highRiskCount} high-risk (p95 > ${RISK_THRESHOLDS.high}ms)`);
363
+ }
364
+ if (summary.mediumRiskCount > 0) {
365
+ lines.push(` [*] ${summary.mediumRiskCount} medium-risk (p95 > ${RISK_THRESHOLDS.medium}ms)`);
366
+ }
367
+ if (summary.lowRiskCount > 0) {
368
+ lines.push(` [-] ${summary.lowRiskCount} low-risk`);
369
+ }
370
+
371
+ lines.push('');
372
+ lines.push(` Average p95: ${summary.averageP95}ms`);
373
+ lines.push(` Worst p95: ${summary.worstP95}ms`);
374
+
375
+ // Budget check
376
+ if (targetP95) {
377
+ if (summary.budgetExceeded) {
378
+ lines.push(` [!] Budget exceeded: worst p95 ${summary.worstP95}ms > target ${targetP95}ms`);
379
+ } else {
380
+ lines.push(` [OK] Within budget: worst p95 ${summary.worstP95}ms <= target ${targetP95}ms`);
381
+ }
382
+ }
383
+
384
+ return lines.join('\n');
385
+ }
386
+
387
+ /**
388
+ * Check if predictions have any high-risk items.
389
+ */
390
+ export function hasHighRiskPredictions(result: PredictionResult): boolean {
391
+ return result.summary.highRiskCount > 0;
392
+ }
@@ -0,0 +1,88 @@
1
+ /**
2
+ * =============================================================================
3
+ * SYNC NOTE: This file is copied from peakinfer-site (SOURCE OF TRUTH)
4
+ * Source: peakinfer-site/lib/prompts/loader.ts
5
+ *
6
+ * DO NOT MODIFY THIS FILE DIRECTLY IN THE CLI REPO.
7
+ * All changes must be made in peakinfer-site first, then synced here.
8
+ * =============================================================================
9
+ */
10
+
11
+ /**
12
+ * Prompt Loader
13
+ * Loads prompts from YAML config files for consistency with CLI
14
+ */
15
+
16
+ import * as fs from 'fs';
17
+ import * as path from 'path';
18
+ import * as yaml from 'yaml';
19
+ import { fileURLToPath } from 'url';
20
+
21
+ export interface PromptConfig {
22
+ id: string;
23
+ name: string;
24
+ version: string;
25
+ description: string;
26
+ system: string;
27
+ user_template?: string;
28
+ input_schema?: Record<string, string>;
29
+ output_format?: unknown;
30
+ constraints?: string[];
31
+ defaults?: Record<string, unknown>;
32
+ }
33
+
34
+ /**
35
+ * Load a prompt config from YAML file
36
+ * Note: No caching - always reads fresh to ensure latest prompts are used
37
+ */
38
+ export function loadPrompt(promptId: string): PromptConfig {
39
+ // Determine prompts directory path - works in ESM
40
+ const __filename = fileURLToPath(import.meta.url);
41
+ const __dirname = path.dirname(__filename);
42
+
43
+ // Go up from src/prompts to root, then into prompts/
44
+ const promptsDir = path.resolve(__dirname, '../../prompts');
45
+ const promptPath = path.join(promptsDir, `${promptId}.yaml`);
46
+
47
+ if (!fs.existsSync(promptPath)) {
48
+ throw new Error(`Prompt config not found: ${promptPath}`);
49
+ }
50
+
51
+ const content = fs.readFileSync(promptPath, 'utf-8');
52
+ const config = yaml.parse(content) as PromptConfig;
53
+
54
+ // Validate required fields
55
+ if (!config.id || !config.system) {
56
+ throw new Error(`Invalid prompt config: ${promptId} - missing required fields`);
57
+ }
58
+
59
+ return config;
60
+ }
61
+
62
+ /**
63
+ * Format user message using template
64
+ */
65
+ export function formatUserMessage(
66
+ template: string,
67
+ variables: Record<string, string>
68
+ ): string {
69
+ let message = template;
70
+ for (const [key, value] of Object.entries(variables)) {
71
+ message = message.replace(new RegExp(`\\{\\{${key}\\}\\}`, 'g'), value);
72
+ }
73
+ return message;
74
+ }
75
+
76
+ /**
77
+ * Get the unified analyzer prompt
78
+ */
79
+ export function getUnifiedAnalyzerPrompt(): {
80
+ system: string;
81
+ userTemplate: string;
82
+ } {
83
+ const config = loadPrompt('unified-analyzer');
84
+ return {
85
+ system: config.system,
86
+ userTemplate: config.user_template || '',
87
+ };
88
+ }