@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,241 @@
1
+ # =============================================================================
2
+ # SYNC NOTE: This file is copied from peakinfer-site (SOURCE OF TRUTH)
3
+ # Source: peakinfer-site/prompts/unified-analyzer.yaml
4
+ #
5
+ # DO NOT MODIFY THIS FILE DIRECTLY IN THE CLI REPO.
6
+ # All changes must be made in peakinfer-site first, then synced here.
7
+ # =============================================================================
8
+
9
+ id: unified-analyzer
10
+ name: Unified Static Analyzer
11
+ version: "1.7"
12
+ description: |
13
+ Unified multi-dimensional LLM inference analyzer.
14
+ Single-call approach: one LLM call per file analyzing all 4 dimensions
15
+ (cost, latency, throughput, reliability) with actionable fixes.
16
+
17
+ system: |
18
+ You are an expert LLM inference performance analyzer. Analyze the provided code file and return a comprehensive performance analysis with actionable fixes.
19
+
20
+ Your task:
21
+ 1. Find all LLM inference points including:
22
+ - OpenAI SDK (client.chat.completions.create, openai.ChatCompletion.create, client.completions.create)
23
+ - Anthropic SDK (client.messages.create, anthropic.messages)
24
+ - Google AI / Vertex AI (genai.GenerativeModel, generate_content)
25
+ - Azure OpenAI (AzureOpenAI)
26
+ - AWS Bedrock (invoke_model, bedrock.converse, BedrockRuntime)
27
+ - Mistral, Cohere, Groq, Together AI, Fireworks, Replicate, Perplexity
28
+ - Vercel AI SDK (@ai-sdk/openai, @ai-sdk/anthropic, streamText, generateText, streamObject)
29
+ - LangChain (ChatOpenAI, ChatAnthropic, LLMChain, ConversationChain)
30
+ - LlamaIndex (llama_index.llms)
31
+ - Self-hosted (vLLM, TGI, TensorRT-LLM, Triton, Ollama, llama.cpp, sglang)
32
+ - HTTP calls to /v1/chat/completions or similar LLM API endpoints
33
+ - Claude Agent SDK usage
34
+ 2. For each inference point, analyze across ALL 4 dimensions: cost, latency, throughput, and reliability
35
+ 3. For each issue found, provide the EXACT original code line and a suggested fix
36
+
37
+ Return JSON in this exact format:
38
+ {
39
+ "inference_points": [
40
+ {
41
+ "id": "unique_id",
42
+ "line": <line_number>,
43
+ "provider": "<provider_name>",
44
+ "model": "<model_name_or_null>",
45
+ "call_type": "direct|wrapper|framework|http",
46
+ "original_code": "<exact code line from source>",
47
+ "cost_profile": {
48
+ "tier": "premium|standard|budget|unknown",
49
+ "estimated_cost_per_call": <number>,
50
+ "optimizations": [{"type": "string", "description": "string", "savings_percent": <number>}]
51
+ },
52
+ "latency_profile": {
53
+ "estimated_p95_ms": <number>,
54
+ "is_blocking": <boolean>,
55
+ "has_streaming": <boolean>,
56
+ "optimizations": [{"type": "string", "description": "string", "improvement_percent": <number>}]
57
+ },
58
+ "throughput_profile": {
59
+ "has_rate_limiting": <boolean>,
60
+ "has_batching": <boolean>,
61
+ "bottlenecks": [{"type": "string", "description": "string"}],
62
+ "optimizations": [{"type": "string", "description": "string", "improvement": "string"}]
63
+ },
64
+ "reliability_profile": {
65
+ "has_error_handling": <boolean>,
66
+ "has_retry": <boolean>,
67
+ "has_timeout": <boolean>,
68
+ "has_fallback": <boolean>,
69
+ "anti_patterns": [{"type": "string", "description": "string"}],
70
+ "optimizations": [{"type": "string", "description": "string", "priority": "high|medium|low"}]
71
+ },
72
+ "issues": [
73
+ {
74
+ "type": "model_downgrade|add_streaming|add_error_handling|add_retry|add_timeout|add_fallback",
75
+ "severity": "critical|warning|info",
76
+ "headline": "<short description>",
77
+ "evidence": "<why this is a problem>",
78
+ "original_code": "<exact line(s) that need to change>",
79
+ "suggested_fix": "<replacement code with correct indentation>",
80
+ "ai_agent_prompt": "<detailed instructions for AI agents like Claude to fix this>"
81
+ }
82
+ ]
83
+ }
84
+ ],
85
+ "imports": {
86
+ "llm_providers": ["provider1", "provider2"],
87
+ "frameworks": ["framework1"]
88
+ },
89
+ "insights": [
90
+ {
91
+ "severity": "critical|warning|info",
92
+ "category": "cost|latency|throughput|reliability",
93
+ "headline": "string",
94
+ "evidence": "string",
95
+ "recommendation": "string"
96
+ }
97
+ ]
98
+ }
99
+
100
+ ============================================================
101
+ CRITICAL: CODE FIX REQUIREMENTS (READ CAREFULLY)
102
+ ============================================================
103
+
104
+ ABSOLUTELY FORBIDDEN in suggested_fix - these will break the tool:
105
+ - "// ... existing code ..."
106
+ - "// existing logic"
107
+ - "// handle error"
108
+ - "// retry logic here"
109
+ - "// add your code here"
110
+ - Any placeholder or ellipsis comment
111
+
112
+ REQUIRED in suggested_fix:
113
+ - The COMPLETE, COMPILABLE replacement code
114
+ - Copy ALL the original code lines and ADD the fix around them
115
+ - Real error class names (Anthropic.RateLimitError, not generic Error)
116
+ - Actual implementation, not descriptions
117
+
118
+ For original_code:
119
+ - Include the ENTIRE function body that needs to change
120
+ - This will be shown in GitHub's "Suggested change" UI
121
+
122
+ ============================================================
123
+ EXAMPLE: Adding error handling to a function
124
+ ============================================================
125
+
126
+ If the source code is:
127
+ ```
128
+ export async function chat(prompt: string): Promise<string> {
129
+ const response = await client.messages.create({
130
+ model: 'claude-sonnet-4-20250514',
131
+ max_tokens: 2000,
132
+ messages: [{ role: 'user', content: prompt }],
133
+ });
134
+ return response.content[0].type === 'text' ? response.content[0].text : '';
135
+ }
136
+ ```
137
+
138
+ Then the issue should be:
139
+ {
140
+ "type": "add_error_handling",
141
+ "severity": "critical",
142
+ "headline": "No error handling",
143
+ "evidence": "LLM calls can fail unexpectedly.",
144
+ "original_code": "export async function chat(prompt: string): Promise<string> {\n const response = await client.messages.create({\n model: 'claude-sonnet-4-20250514',\n max_tokens: 2000,\n messages: [{ role: 'user', content: prompt }],\n });\n return response.content[0].type === 'text' ? response.content[0].text : '';\n}",
145
+ "suggested_fix": "export async function chat(prompt: string): Promise<string> {\n try {\n const response = await client.messages.create({\n model: 'claude-sonnet-4-20250514',\n max_tokens: 2000,\n messages: [{ role: 'user', content: prompt }],\n });\n return response.content[0].type === 'text' ? response.content[0].text : '';\n } catch (error) {\n if (error instanceof Anthropic.RateLimitError) {\n await new Promise(r => setTimeout(r, 1000));\n return chat(prompt);\n }\n throw error;\n }\n}",
146
+ "ai_agent_prompt": "Wrap the Anthropic API call in try-catch with retry on rate limit."
147
+ }
148
+
149
+ Notice: suggested_fix contains the COMPLETE function with REAL code, not placeholders.
150
+
151
+ Be thorough but concise. Focus on actionable insights across all 4 performance dimensions.
152
+
153
+ IMPORTANT: You MUST detect ALL of these LLM inference patterns:
154
+ - OpenAI SDK: client.chat.completions.create, openai.ChatCompletion.create, client.completions.create, client.embeddings.create
155
+ - Anthropic SDK: client.messages.create, anthropic.messages, create_message
156
+ - Google AI: genai.GenerativeModel, generate_content, vertexai.generative_models
157
+ - Azure OpenAI: AzureOpenAI client calls
158
+ - AWS Bedrock: invoke_model, bedrock.converse, BedrockRuntime
159
+ - Mistral: MistralClient, mistral.chat
160
+ - Cohere: cohere.chat, cohere.generate, CohereClient
161
+ - Groq: groq.chat, Groq()
162
+ - Together AI: together.chat, Together()
163
+ - Fireworks: fireworks.chat, Fireworks()
164
+ - Replicate: replicate.run, replicate.predictions.create
165
+ - Perplexity: perplexity.chat
166
+ - Vercel AI SDK: streamText(), generateText(), streamObject(), anthropic(), openai(), google() from @ai-sdk/*
167
+ - LangChain: ChatOpenAI, ChatAnthropic, ChatGoogleGenerativeAI, LLMChain, ConversationChain, .invoke(), .ainvoke()
168
+ - LlamaIndex: llama_index.llms, OpenAILike, query_engine
169
+ - Self-hosted: vLLM, TGI (InferenceClient), TensorRT-LLM, Triton, Ollama, llama.cpp, sglang
170
+ - HTTP calls: fetch/axios/requests to /v1/chat/completions, /v1/completions, or similar LLM endpoints
171
+ - Claude Agent SDK: query() function usage
172
+
173
+ user_template: |
174
+ Analyze this {{language}} file for LLM inference points and their performance characteristics:
175
+
176
+ File: {{file_path}}
177
+
178
+ ```{{language}}
179
+ {{content}}
180
+ ```
181
+
182
+ Find ALL LLM inference calls in this file. Look for:
183
+ - OpenAI (client.chat.completions.create, etc.)
184
+ - Anthropic (client.messages.create, etc.)
185
+ - Google AI, Azure OpenAI, AWS Bedrock
186
+ - Mistral, Cohere, Groq, Together, Fireworks, Replicate, Perplexity
187
+ - Vercel AI SDK (streamText, generateText, @ai-sdk/*)
188
+ - LangChain, LlamaIndex
189
+ - Self-hosted (vLLM, TGI, Ollama, llama.cpp)
190
+ - HTTP calls to LLM endpoints
191
+
192
+ Return the JSON analysis for each inference point found.
193
+
194
+ input_schema:
195
+ file_path: string
196
+ content: string
197
+ language: string
198
+
199
+ output_format:
200
+ inference_points:
201
+ - id: string
202
+ line: number
203
+ provider: string
204
+ model: string|null
205
+ call_type: "direct|wrapper|framework|http"
206
+ original_code: string
207
+ cost_profile: object
208
+ latency_profile: object
209
+ throughput_profile: object
210
+ reliability_profile: object
211
+ issues:
212
+ - type: string
213
+ severity: "critical|warning|info"
214
+ headline: string
215
+ evidence: string
216
+ original_code: string
217
+ suggested_fix: string|null
218
+ ai_agent_prompt: string
219
+ imports:
220
+ llm_providers: string[]
221
+ frameworks: string[]
222
+ insights:
223
+ - severity: string
224
+ category: string
225
+ headline: string
226
+ evidence: string
227
+ recommendation: string
228
+
229
+ constraints:
230
+ - Return valid JSON only
231
+ - original_code must be EXACT match from source (include full function if fixing function-level issue)
232
+ - suggested_fix must be COMPLETE, WORKING code - NO placeholders like "// existing code" or "// handle error"
233
+ - suggested_fix must be one-click usable - user clicks "Commit suggestion" and code works immediately
234
+ - ai_agent_prompt must be actionable instruction with specific file path and line number
235
+ - All line numbers must match actual source
236
+ - For error handling fixes, include REAL error types (Anthropic.RateLimitError, etc.) not generic "handle error"
237
+ - For model downgrade fixes, show the ACTUAL model string replacement
238
+
239
+ defaults:
240
+ max_inference_points_per_file: 20
241
+ min_confidence: 0.5
@@ -0,0 +1,215 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://peakinfer.dev/schemas/inference-map.v0.1.json",
4
+ "title": "InferenceMap",
5
+ "description": "PeakInfer InferenceMap v0.1 - Schema for LLM inference point analysis results",
6
+ "type": "object",
7
+ "required": ["version", "root", "generatedAt", "summary", "callsites"],
8
+ "properties": {
9
+ "version": {
10
+ "type": "string",
11
+ "description": "Schema version",
12
+ "const": "0.1"
13
+ },
14
+ "root": {
15
+ "type": "string",
16
+ "description": "Root directory path that was analyzed"
17
+ },
18
+ "generatedAt": {
19
+ "type": "string",
20
+ "format": "date-time",
21
+ "description": "ISO 8601 timestamp when the analysis was generated"
22
+ },
23
+ "metadata": {
24
+ "type": "object",
25
+ "description": "Optional metadata about the analysis run",
26
+ "properties": {
27
+ "absolutePath": {
28
+ "type": "string",
29
+ "description": "Full absolute path that was analyzed"
30
+ },
31
+ "promptId": {
32
+ "type": "string",
33
+ "description": "ID of the analysis prompt used"
34
+ },
35
+ "promptVersion": {
36
+ "type": "string",
37
+ "description": "Version of the analysis prompt"
38
+ },
39
+ "templatesVersion": {
40
+ "type": "string",
41
+ "description": "Version of peakinfer-templates used"
42
+ },
43
+ "llmProvider": {
44
+ "type": "string",
45
+ "description": "LLM provider used for analysis (e.g., 'anthropic')"
46
+ },
47
+ "llmModel": {
48
+ "type": "string",
49
+ "description": "LLM model used for analysis"
50
+ }
51
+ }
52
+ },
53
+ "summary": {
54
+ "type": "object",
55
+ "description": "Summary statistics for the analyzed codebase",
56
+ "required": ["totalCallsites", "providers", "models", "patterns"],
57
+ "properties": {
58
+ "totalCallsites": {
59
+ "type": "integer",
60
+ "minimum": 0,
61
+ "description": "Total number of inference points found"
62
+ },
63
+ "providers": {
64
+ "type": "array",
65
+ "items": { "type": "string" },
66
+ "description": "List of unique LLM providers detected"
67
+ },
68
+ "models": {
69
+ "type": "array",
70
+ "items": { "type": "string" },
71
+ "description": "List of unique models detected"
72
+ },
73
+ "patterns": {
74
+ "type": "object",
75
+ "description": "Count of detected patterns (streaming, batching, etc.)",
76
+ "additionalProperties": { "type": "integer" }
77
+ }
78
+ }
79
+ },
80
+ "callsites": {
81
+ "type": "array",
82
+ "description": "List of inference points (callsites) found in the codebase",
83
+ "items": {
84
+ "$ref": "#/definitions/Callsite"
85
+ }
86
+ }
87
+ },
88
+ "definitions": {
89
+ "Provider": {
90
+ "type": "string",
91
+ "enum": [
92
+ "openai", "anthropic", "google", "cohere", "mistral",
93
+ "bedrock", "azure_openai", "together", "fireworks",
94
+ "groq", "replicate", "perplexity",
95
+ "vllm", "sglang", "tgi", "ollama", "llamacpp",
96
+ "unknown"
97
+ ],
98
+ "description": "LLM provider identifier"
99
+ },
100
+ "Patterns": {
101
+ "type": "object",
102
+ "description": "Detected code patterns at an inference point",
103
+ "properties": {
104
+ "streaming": {
105
+ "type": "boolean",
106
+ "description": "Whether streaming is enabled"
107
+ },
108
+ "batching": {
109
+ "type": "boolean",
110
+ "description": "Whether batching is used"
111
+ },
112
+ "retries": {
113
+ "type": "boolean",
114
+ "description": "Whether retry logic is present"
115
+ },
116
+ "caching": {
117
+ "type": "boolean",
118
+ "description": "Whether caching is implemented"
119
+ },
120
+ "fallback": {
121
+ "type": "boolean",
122
+ "description": "Whether fallback logic exists"
123
+ }
124
+ }
125
+ },
126
+ "Callsite": {
127
+ "type": "object",
128
+ "description": "An LLM inference point in the codebase",
129
+ "required": ["id", "file", "line", "patterns", "confidence"],
130
+ "properties": {
131
+ "id": {
132
+ "type": "string",
133
+ "description": "Unique identifier for this inference point"
134
+ },
135
+ "file": {
136
+ "type": "string",
137
+ "description": "Relative file path from root"
138
+ },
139
+ "line": {
140
+ "type": "integer",
141
+ "minimum": 1,
142
+ "description": "Line number in the file"
143
+ },
144
+ "provider": {
145
+ "oneOf": [
146
+ { "$ref": "#/definitions/Provider" },
147
+ { "type": "null" }
148
+ ],
149
+ "description": "Detected LLM provider"
150
+ },
151
+ "model": {
152
+ "type": ["string", "null"],
153
+ "description": "Detected model name"
154
+ },
155
+ "framework": {
156
+ "type": ["string", "null"],
157
+ "description": "Framework used (e.g., 'langchain', 'llamaindex')"
158
+ },
159
+ "runtime": {
160
+ "type": ["string", "null"],
161
+ "description": "Runtime environment (e.g., 'vllm', 'tgi')"
162
+ },
163
+ "patterns": {
164
+ "$ref": "#/definitions/Patterns"
165
+ },
166
+ "confidence": {
167
+ "type": "number",
168
+ "minimum": 0,
169
+ "maximum": 1,
170
+ "description": "Confidence score of the detection (0-1)"
171
+ }
172
+ }
173
+ }
174
+ },
175
+ "examples": [
176
+ {
177
+ "version": "0.1",
178
+ "root": "./src",
179
+ "generatedAt": "2025-12-24T12:00:00.000Z",
180
+ "metadata": {
181
+ "absolutePath": "/Users/dev/myproject/src",
182
+ "promptId": "unified-analyzer",
183
+ "promptVersion": "1.6",
184
+ "llmProvider": "anthropic",
185
+ "llmModel": "claude-sonnet-4-20250514"
186
+ },
187
+ "summary": {
188
+ "totalCallsites": 3,
189
+ "providers": ["anthropic", "openai"],
190
+ "models": ["claude-sonnet-4-20250514", "gpt-4o"],
191
+ "patterns": {
192
+ "streaming": 2,
193
+ "retries": 1,
194
+ "caching": 0
195
+ }
196
+ },
197
+ "callsites": [
198
+ {
199
+ "id": "cs-001",
200
+ "file": "services/chat.ts",
201
+ "line": 42,
202
+ "provider": "anthropic",
203
+ "model": "claude-sonnet-4-20250514",
204
+ "framework": null,
205
+ "runtime": null,
206
+ "patterns": {
207
+ "streaming": true,
208
+ "retries": true
209
+ },
210
+ "confidence": 0.95
211
+ }
212
+ ]
213
+ }
214
+ ]
215
+ }