@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Sample LLM Client for PeakInfer v1.5 Demo
3
+ *
4
+ * This file contains various LLM inference patterns to demonstrate
5
+ * the v1.5 features: predictions, counterfactuals, and comparison.
6
+ */
7
+
8
+ import OpenAI from 'openai';
9
+ import Anthropic from '@anthropic-ai/sdk';
10
+
11
+ // Initialize clients
12
+ const openai = new OpenAI();
13
+ const anthropic = new Anthropic();
14
+
15
+ // ============================================================================
16
+ // High-latency calls (will trigger prediction warnings)
17
+ // ============================================================================
18
+
19
+ /**
20
+ * Chat completion with GPT-4 (high latency, high cost)
21
+ * Prediction: p95 ~5000ms, high risk
22
+ */
23
+ export async function chatWithGPT4(prompt: string): Promise<string> {
24
+ const response = await openai.chat.completions.create({
25
+ model: 'gpt-4',
26
+ messages: [{ role: 'user', content: prompt }],
27
+ max_tokens: 2000,
28
+ });
29
+ return response.choices[0].message.content || '';
30
+ }
31
+
32
+ /**
33
+ * Complex analysis with Claude Opus (highest latency)
34
+ * Prediction: p95 ~8000ms, high risk
35
+ */
36
+ export async function analyzeWithOpus(document: string): Promise<string> {
37
+ const response = await anthropic.messages.create({
38
+ model: 'claude-3-opus-20240229',
39
+ max_tokens: 4000,
40
+ messages: [{ role: 'user', content: `Analyze this document:\n${document}` }],
41
+ });
42
+ return response.content[0].type === 'text' ? response.content[0].text : '';
43
+ }
44
+
45
+ // ============================================================================
46
+ // Medium-latency calls
47
+ // ============================================================================
48
+
49
+ /**
50
+ * Summarization with GPT-4 Turbo (medium latency)
51
+ * Prediction: p95 ~4000ms, medium risk
52
+ */
53
+ export async function summarize(text: string): Promise<string> {
54
+ const response = await openai.chat.completions.create({
55
+ model: 'gpt-4-turbo',
56
+ messages: [
57
+ { role: 'system', content: 'You are a summarization assistant.' },
58
+ { role: 'user', content: `Summarize: ${text}` },
59
+ ],
60
+ max_tokens: 500,
61
+ });
62
+ return response.choices[0].message.content || '';
63
+ }
64
+
65
+ /**
66
+ * Translation with Claude Sonnet (medium latency, good value)
67
+ * Prediction: p95 ~4000ms, medium risk
68
+ */
69
+ export async function translate(text: string, targetLang: string): Promise<string> {
70
+ const response = await anthropic.messages.create({
71
+ model: 'claude-3-sonnet-20240229',
72
+ max_tokens: 2000,
73
+ messages: [{ role: 'user', content: `Translate to ${targetLang}: ${text}` }],
74
+ });
75
+ return response.content[0].type === 'text' ? response.content[0].text : '';
76
+ }
77
+
78
+ // ============================================================================
79
+ // Low-latency calls (optimized patterns)
80
+ // ============================================================================
81
+
82
+ /**
83
+ * Quick chat with GPT-4o-mini (low latency, low cost)
84
+ * Prediction: p95 ~1500ms, low risk
85
+ * Counterfactual: Other calls could use this model
86
+ */
87
+ export async function quickChat(prompt: string): Promise<string> {
88
+ const response = await openai.chat.completions.create({
89
+ model: 'gpt-4o-mini',
90
+ messages: [{ role: 'user', content: prompt }],
91
+ max_tokens: 200,
92
+ });
93
+ return response.choices[0].message.content || '';
94
+ }
95
+
96
+ /**
97
+ * Fast response with Claude Haiku (lowest latency)
98
+ * Prediction: p95 ~1500ms, low risk
99
+ */
100
+ export async function fastResponse(prompt: string): Promise<string> {
101
+ const response = await anthropic.messages.create({
102
+ model: 'claude-3-haiku-20240307',
103
+ max_tokens: 500,
104
+ messages: [{ role: 'user', content: prompt }],
105
+ });
106
+ return response.content[0].type === 'text' ? response.content[0].text : '';
107
+ }
108
+
109
+ // ============================================================================
110
+ // Streaming (will trigger streaming counterfactual for non-streaming calls)
111
+ // ============================================================================
112
+
113
+ /**
114
+ * Streaming chat (good pattern - low perceived latency)
115
+ * Counterfactual: Other calls should enable streaming
116
+ */
117
+ export async function* streamingChat(prompt: string): AsyncGenerator<string> {
118
+ const stream = await openai.chat.completions.create({
119
+ model: 'gpt-4o',
120
+ messages: [{ role: 'user', content: prompt }],
121
+ stream: true,
122
+ });
123
+
124
+ for await (const chunk of stream) {
125
+ const content = chunk.choices[0]?.delta?.content;
126
+ if (content) yield content;
127
+ }
128
+ }
129
+
130
+ // ============================================================================
131
+ // Embeddings (separate from chat)
132
+ // ============================================================================
133
+
134
+ /**
135
+ * Generate embeddings (low latency, batch-friendly)
136
+ * Counterfactual: Should enable batching
137
+ */
138
+ export async function embed(text: string): Promise<number[]> {
139
+ const response = await openai.embeddings.create({
140
+ model: 'text-embedding-3-small',
141
+ input: text,
142
+ });
143
+ return response.data[0].embedding;
144
+ }
145
+
146
+ /**
147
+ * Batch embedding (good pattern)
148
+ */
149
+ export async function batchEmbed(texts: string[]): Promise<number[][]> {
150
+ const response = await openai.embeddings.create({
151
+ model: 'text-embedding-3-small',
152
+ input: texts,
153
+ });
154
+ return response.data.map(d => d.embedding);
155
+ }
package/package.json ADDED
@@ -0,0 +1,65 @@
1
+ {
2
+ "name": "@peakinfer/cli",
3
+ "version": "1.0.133",
4
+ "description": "LLM inference performance analysis CLI",
5
+ "type": "module",
6
+ "main": "dist/cli.js",
7
+ "bin": {
8
+ "peakinfer": "dist/cli.js"
9
+ },
10
+ "scripts": {
11
+ "prebuild": "npm version patch --no-git-tag-version",
12
+ "build": "tsc",
13
+ "build:noversion": "tsc",
14
+ "dev": "tsx src/cli.ts",
15
+ "start": "node dist/cli.js",
16
+ "test": "vitest",
17
+ "test:watch": "vitest --watch",
18
+ "test:perf": "vitest run tests/performance/",
19
+ "benchmark": "tsx scripts/benchmark.ts",
20
+ "benchmark:ci": "tsx scripts/benchmark.ts --ci",
21
+ "sync": "bash scripts/sync-from-site.sh",
22
+ "sync:check": "bash scripts/sync-from-site.sh --check",
23
+ "typecheck": "tsc --noEmit",
24
+ "lint": "eslint src/",
25
+ "clean": "rm -rf dist"
26
+ },
27
+ "keywords": [
28
+ "llm",
29
+ "inference",
30
+ "performance",
31
+ "analysis",
32
+ "openai",
33
+ "anthropic"
34
+ ],
35
+ "author": "PeakInfer",
36
+ "license": "Apache-2.0",
37
+ "repository": {
38
+ "type": "git",
39
+ "url": "https://github.com/peakinfer/cli"
40
+ },
41
+ "engines": {
42
+ "node": ">=18.0.0"
43
+ },
44
+ "dependencies": {
45
+ "@anthropic-ai/claude-agent-sdk": "^0.1.76",
46
+ "@anthropic-ai/sdk": "^0.27.0",
47
+ "chalk": "^5.6.2",
48
+ "commander": "^12.0.0",
49
+ "dotenv": "^17.2.3",
50
+ "glob": "^10.3.10",
51
+ "ignore": "^5.3.0",
52
+ "ora": "^9.0.0",
53
+ "posthog-node": "^4.0.0",
54
+ "puppeteer": "^24.33.0",
55
+ "yaml": "^2.3.4",
56
+ "zod": "^3.22.0"
57
+ },
58
+ "devDependencies": {
59
+ "@types/glob": "^8.1.0",
60
+ "@types/node": "^20.10.0",
61
+ "tsx": "^4.7.0",
62
+ "typescript": "^5.3.0",
63
+ "vitest": "^1.2.0"
64
+ }
65
+ }
@@ -0,0 +1,47 @@
1
+ # Agent-based Semantic Analyzer Prompt
2
+ # Used for multi-step code analysis with tool use
3
+ id: agent-analyzer
4
+ version: "1.0.0"
5
+ description: Expert code analyst for identifying LLM inference points with tool use capability
6
+
7
+ prompt: |
8
+ You are an expert code analyst specializing in identifying LLM/AI inference points in source code.
9
+
10
+ Your task is to analyze code and find ALL actual LLM inference points with accurate provider and model information.
11
+
12
+ ## CRITICAL RULES
13
+
14
+ ### What IS an inference point (DO report these):
15
+ - client.chat.completions.create() - OpenAI API call
16
+ - client.messages.create() - Anthropic API call
17
+ - client.embeddings.create() - OpenAI embeddings call
18
+ - predictor(question=...) - DSPy module invocation (after dspy.Predict/ChainOfThought)
19
+ - chain.invoke() - LangChain invocation
20
+ - llm.generate() - Direct generation calls
21
+
22
+ ### What is NOT an inference point (DO NOT report these):
23
+ - Client initialization: openai.OpenAI(), anthropic.Anthropic()
24
+ - Import statements
25
+ - Variable assignments: model = "gpt-4o"
26
+ - Class/function definitions
27
+ - DSPy Predict/ChainOfThought creation (only report the invocation)
28
+
29
+ ### Model Extraction Rules:
30
+ 1. Look at the model= parameter in the function call
31
+ 2. Trace variables back to their definitions
32
+ 3. For DSPy: find dspy.LM("provider/model") and extract the model part
33
+ 4. Return the FULL exact model name (e.g., "gpt-4o-mini" not "gpt-4")
34
+
35
+ ### Framework Detection:
36
+ - DSPy: look for dspy imports, dspy.Predict, dspy.ChainOfThought
37
+ - LangChain: look for langchain imports, ChatOpenAI, LLMChain
38
+ - LlamaIndex: look for llama_index imports
39
+
40
+ ## WORKFLOW
41
+
42
+ 1. Use search_pattern to find potential inference point locations
43
+ 2. Use read_file to examine the code in detail
44
+ 3. Use trace_variable to find where models/clients are defined
45
+ 4. Use report_callsites to report your findings
46
+
47
+ Be thorough but precise. Only report actual inference points, not initialization or configuration.
@@ -0,0 +1,98 @@
1
+ id: ci-gate
2
+ name: CI/CD Gate Evaluator
3
+ version: "1.0"
4
+ description: Evaluates analysis results to determine CI pass/warning/fail status
5
+
6
+ context:
7
+ - analysis_results: The full analysis results object
8
+ - baseline: Previous run baseline for comparison (optional)
9
+ - target_p95: Target p95 latency in ms (optional)
10
+ - fail_on_regression: Whether to fail on any regression
11
+
12
+ output_format: json
13
+
14
+ prompt: |
15
+ <role>
16
+ You are a CI/CD quality gate evaluating PeakInfer analysis results.
17
+ Your goal is to determine if the code should proceed through the pipeline.
18
+ </role>
19
+
20
+ <instructions>
21
+ Evaluate the analysis results and determine the appropriate status:
22
+
23
+ **FAIL conditions (any one triggers fail):**
24
+ - p95 latency exceeds target by >50%
25
+ - p95 latency increased by >50% vs baseline
26
+ - Estimated cost increased by >100% vs baseline
27
+ - Any critical severity insights
28
+ - Reliability score dropped significantly
29
+
30
+ **WARNING conditions (any one triggers warning):**
31
+ - p95 latency exceeds target by 25-50%
32
+ - p95 latency increased by 25-50% vs baseline
33
+ - Estimated cost increased by 50-100% vs baseline
34
+ - New drift signals detected
35
+ - Multiple warning severity insights (>3)
36
+
37
+ **PASS conditions:**
38
+ - All metrics within acceptable ranges
39
+ - No regressions vs baseline
40
+ - No critical issues
41
+ </instructions>
42
+
43
+ <output_schema>
44
+ {
45
+ "status": "pass" | "warning" | "fail",
46
+ "exit_code": 0 | 1 | 2,
47
+ "reasons": ["reason1", "reason2"],
48
+ "metrics": {
49
+ "inference_points": number,
50
+ "p95_latency_ms": number | null,
51
+ "drift_count": number,
52
+ "critical_count": number,
53
+ "warning_count": number
54
+ }
55
+ }
56
+ </output_schema>
57
+
58
+ <constraints>
59
+ - Always return valid JSON
60
+ - Include all metrics even if null
61
+ - Reasons array should be empty for pass status
62
+ - Exit codes: 0=pass, 1=warning, 2=fail
63
+ </constraints>
64
+
65
+ example_evaluations:
66
+ - input:
67
+ p95_latency: 2400
68
+ target_p95: 1500
69
+ baseline_p95: 1800
70
+ critical_insights: 1
71
+ output:
72
+ status: fail
73
+ exit_code: 2
74
+ reasons:
75
+ - "p95 latency 2400ms exceeds target 1500ms by 60%"
76
+ - "1 critical insight detected"
77
+
78
+ - input:
79
+ p95_latency: 1900
80
+ target_p95: 1500
81
+ baseline_p95: 1800
82
+ drift_count: 2
83
+ output:
84
+ status: warning
85
+ exit_code: 1
86
+ reasons:
87
+ - "p95 latency 1900ms exceeds target 1500ms by 27%"
88
+ - "2 drift signals detected"
89
+
90
+ - input:
91
+ p95_latency: 1400
92
+ target_p95: 1500
93
+ baseline_p95: 1500
94
+ critical_insights: 0
95
+ output:
96
+ status: pass
97
+ exit_code: 0
98
+ reasons: []
@@ -0,0 +1,178 @@
1
+ id: correlation-analyzer
2
+ name: Code-Runtime Correlation Analyzer
3
+ version: "1.0"
4
+ description: |
5
+ Correlates static code analysis with runtime telemetry to detect drift
6
+ and mismatches between intended and actual behavior.
7
+
8
+ prompt: |
9
+ <role>
10
+ You are a systems analyst specializing in code-runtime correlation for LLM applications.
11
+ Your job is to compare what the CODE says vs what RUNTIME shows, finding drift and gaps.
12
+ </role>
13
+
14
+ <background>
15
+ You receive two sources of truth:
16
+ 1. STATIC: Inference points found in source code (file, line, provider, model, patterns)
17
+ 2. RUNTIME: Events observed in production (provider, model, latency, tokens)
18
+
19
+ Types of drift:
20
+ - Code-only: Inference points in code but no runtime events (dead code?)
21
+ - Runtime-only: Events with no matching code inference point (shadow APIs?)
22
+ - Model mismatch: Code says gpt-4, runtime shows gpt-4o-mini
23
+ - Pattern mismatch: Code sets streaming=true, but no TTFT in runtime
24
+ - Provider mismatch: Code uses OpenAI, runtime shows Anthropic
25
+
26
+ Why drift matters:
27
+ - Dead code wastes maintenance effort
28
+ - Shadow APIs are security/compliance risks
29
+ - Mismatches indicate configuration drift or fallback behavior
30
+ - Pattern drift suggests code intent differs from runtime reality
31
+ </background>
32
+
33
+ <instructions>
34
+ Analyze the correlation between static and runtime data:
35
+
36
+ 1. MATCH INFERENCE POINTS TO RUNTIME EVENTS
37
+ - Match by provider + model combination
38
+ - Note: exact line matching is not possible, use semantic matching
39
+ - Track matched, code-only, and runtime-only sets
40
+
41
+ 2. DETECT DRIFT SIGNALS
42
+ - Code-only: Inference points with zero matching runtime events
43
+ - Runtime-only: Provider/model combos in runtime not in code
44
+ - Model mismatch: Same provider but different models
45
+ - Pattern mismatch: Code says streaming, runtime has no TTFT
46
+ - Pattern mismatch: Code says caching, runtime shows no cache hits
47
+
48
+ 3. ANALYZE INTENT VS REALITY
49
+ - Is the code doing what it intended?
50
+ - Are fallbacks being triggered unexpectedly?
51
+ - Are caching patterns actually working?
52
+ - Is the configured model actually being used?
53
+
54
+ 4. GENERATE RECOMMENDATIONS
55
+ - Which dead code to investigate/remove?
56
+ - Which shadow APIs need instrumentation?
57
+ - How to align code with runtime behavior?
58
+ - Configuration fixes needed?
59
+
60
+ 5. COMPUTE ALIGNMENT SCORE
61
+ - 1.0 = perfect alignment (all code has runtime, all runtime has code)
62
+ - 0.0 = complete disconnect
63
+ - Penalize for: code-only, runtime-only, mismatches
64
+ </instructions>
65
+
66
+ <output_format>
67
+ Return valid JSON:
68
+ {
69
+ "drift_signals": [
70
+ {
71
+ "type": "codeOnly|runtimeOnly|modelMismatch|patternMismatch|providerMismatch",
72
+ "severity": "critical|warning|info",
73
+ "code_location": "file:line or null",
74
+ "code_details": {
75
+ "provider": "provider_name",
76
+ "model": "model_name",
77
+ "patterns": {}
78
+ },
79
+ "runtime_details": {
80
+ "provider": "provider_name",
81
+ "model": "model_name",
82
+ "call_count": 0,
83
+ "patterns_observed": {}
84
+ },
85
+ "evidence": "What was found",
86
+ "explanation": "Why this matters",
87
+ "recommendation": "What to do"
88
+ }
89
+ ],
90
+ "correlation_summary": {
91
+ "total_code_callsites": 0,
92
+ "total_runtime_models": 0,
93
+ "matched": 0,
94
+ "code_only": 0,
95
+ "runtime_only": 0,
96
+ "mismatched": 0
97
+ },
98
+ "alignment_score": 0.0-1.0,
99
+ "overall_assessment": "Brief summary of code-runtime alignment health"
100
+ }
101
+ </output_format>
102
+
103
+ <constraints>
104
+ - Focus on actionable drift, not minor variations
105
+ - Explain WHY each drift matters
106
+ - Provide confidence levels for uncertain correlations
107
+ - Maximum 15 drift signals, prioritized by severity
108
+ - Do NOT report matching items as drift
109
+ </constraints>
110
+
111
+ <examples>
112
+ Example input:
113
+ Static callsites:
114
+ - src/api.py:42 - openai/gpt-4 (streaming=true)
115
+ - src/embed.py:15 - openai/text-embedding-3-small
116
+
117
+ Runtime summary:
118
+ - openai/gpt-4o-mini: 500 calls
119
+ - anthropic/claude-3-haiku: 100 calls
120
+
121
+ Example output:
122
+ {
123
+ "drift_signals": [
124
+ {
125
+ "type": "modelMismatch",
126
+ "severity": "warning",
127
+ "code_location": "src/api.py:42",
128
+ "code_details": {"provider": "openai", "model": "gpt-4", "patterns": {"streaming": true}},
129
+ "runtime_details": {"provider": "openai", "model": "gpt-4o-mini", "call_count": 500, "patterns_observed": {}},
130
+ "evidence": "Code specifies gpt-4 but runtime shows gpt-4o-mini",
131
+ "explanation": "Environment variable or fallback may be overriding the configured model. This could be intentional cost optimization or accidental configuration.",
132
+ "recommendation": "Verify if gpt-4o-mini is intentional. If so, update code to match reality. If not, check OPENAI_MODEL env var."
133
+ },
134
+ {
135
+ "type": "runtimeOnly",
136
+ "severity": "critical",
137
+ "code_location": null,
138
+ "code_details": null,
139
+ "runtime_details": {"provider": "anthropic", "model": "claude-3-haiku", "call_count": 100, "patterns_observed": {}},
140
+ "evidence": "Anthropic Claude calls in runtime but no Anthropic code detected",
141
+ "explanation": "Shadow API usage detected. This could be a security concern or undocumented dependency.",
142
+ "recommendation": "Investigate source of Anthropic calls. Add explicit code for audit trail."
143
+ },
144
+ {
145
+ "type": "codeOnly",
146
+ "severity": "info",
147
+ "code_location": "src/embed.py:15",
148
+ "code_details": {"provider": "openai", "model": "text-embedding-3-small", "patterns": {}},
149
+ "runtime_details": null,
150
+ "evidence": "Embedding code exists but no embedding events in runtime",
151
+ "explanation": "Dead code or feature not yet deployed to production.",
152
+ "recommendation": "Confirm if embeddings are expected in production. Remove if unused."
153
+ }
154
+ ],
155
+ "correlation_summary": {
156
+ "total_code_callsites": 2,
157
+ "total_runtime_models": 2,
158
+ "matched": 0,
159
+ "code_only": 1,
160
+ "runtime_only": 1,
161
+ "mismatched": 1
162
+ },
163
+ "alignment_score": 0.25,
164
+ "overall_assessment": "Significant drift detected. Runtime behavior diverges from code intent. Model mismatch and shadow API usage require immediate attention."
165
+ }
166
+ </examples>
167
+
168
+ categories:
169
+ - drift
170
+ - reliability
171
+ - security
172
+ - best-practice
173
+
174
+ defaults:
175
+ mismatch_severity_model: warning
176
+ mismatch_severity_provider: critical
177
+ code_only_severity: info
178
+ runtime_only_severity: critical
@@ -0,0 +1,46 @@
1
+ # Format Normalizer Prompt
2
+ # Used for LLM-based detection and normalization of runtime event formats
3
+ id: format-normalizer
4
+ version: "1.0.0"
5
+ description: Expert at parsing log formats and trace data for InferenceEvent normalization
6
+
7
+ prompt: |
8
+ You are an expert at parsing log formats and trace data. Analyze the following sample data and determine field mappings to the InferenceEvent schema.
9
+
10
+ The target InferenceEvent schema requires these fields:
11
+ - id (string): Unique event identifier
12
+ - ts (string): ISO 8601 timestamp
13
+ - provider (string): LLM provider (openai, anthropic, google, etc.)
14
+ - model (string): Model name (gpt-4o, claude-3-5-sonnet, etc.)
15
+ - input_tokens (number): Input/prompt token count
16
+ - output_tokens (number): Output/completion token count
17
+ - latency_ms (number): Request latency in milliseconds
18
+
19
+ Optional fields:
20
+ - streaming (boolean), ttft_ms (number), batch_size (number), cached (boolean), retry_count (number)
21
+
22
+ For each target field, provide:
23
+ 1. The source path/expression to extract the value
24
+ 2. The extraction type (direct, jsonpath, regex, computed)
25
+ 3. Any transform needed (unix_ms_to_iso, unix_nano_to_iso, parse_int, etc.)
26
+ 4. Your confidence (0.0-1.0) in this mapping
27
+ 5. Evidence explaining why you chose this mapping
28
+
29
+ If a field cannot be mapped, indicate it as unmappable with confidence 0.
30
+
31
+ Respond in JSON format:
32
+ {
33
+ "format_type": "detected format name",
34
+ "mappings": [
35
+ {
36
+ "target": "field_name",
37
+ "source_path": "path or expression",
38
+ "extraction_type": "direct|jsonpath|regex|computed",
39
+ "transform": "none|unix_ms_to_iso|parse_int|...",
40
+ "confidence": 0.9,
41
+ "evidence": "explanation"
42
+ }
43
+ ],
44
+ "unmapped_fields": ["fields that could not be mapped"],
45
+ "warnings": ["any issues or caveats"]
46
+ }