@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,295 @@
1
+ # Runtime Events Format
2
+
3
+ PeakInfer correlates your code with runtime behavior. This document describes how to format runtime event data for drift detection.
4
+
5
+ ## Quick Start
6
+
7
+ Export your LLM inference events as JSONL (newline-delimited JSON):
8
+
9
+ ```jsonl
10
+ {"id":"evt_1","ts":"2024-12-21T10:00:00Z","provider":"openai","model":"gpt-4","input_tokens":150,"output_tokens":50,"latency_ms":1200}
11
+ {"id":"evt_2","ts":"2024-12-21T10:00:01Z","provider":"anthropic","model":"claude-3-opus","input_tokens":200,"output_tokens":100,"latency_ms":2500}
12
+ ```
13
+
14
+ Save to a file (e.g., `events.jsonl`) and pass to PeakInfer:
15
+
16
+ ```bash
17
+ # CLI
18
+ peakinfer analyze ./src --events events.jsonl
19
+
20
+ # GitHub Action
21
+ - uses: kalmantic/peakinfer@v1
22
+ with:
23
+ path: ./src
24
+ events: ./events.jsonl
25
+ ```
26
+
27
+ ---
28
+
29
+ ## InferenceEvent Schema
30
+
31
+ Each event represents one LLM inference call:
32
+
33
+ | Field | Type | Required | Description |
34
+ |-------|------|----------|-------------|
35
+ | `id` | string | Yes | Unique event identifier |
36
+ | `ts` | string | Yes | ISO 8601 timestamp |
37
+ | `provider` | string | Yes | Provider name (see below) |
38
+ | `model` | string | Yes | Model name |
39
+ | `input_tokens` | number | Yes | Input token count |
40
+ | `output_tokens` | number | Yes | Output token count |
41
+ | `latency_ms` | number | Yes | Total latency in milliseconds |
42
+ | `intent` | string | No | Business intent (e.g., "summarize", "translate") |
43
+ | `callsite_id` | string | No | Link to code location (improves correlation) |
44
+ | `streaming` | boolean | No | Was this a streaming request? |
45
+ | `ttft_ms` | number | No | Time to first token (streaming only) |
46
+ | `batch_size` | number | No | Batch size if batched |
47
+ | `batch_id` | string | No | Batch group identifier |
48
+ | `cached` | boolean | No | Was response cached? |
49
+ | `retry_count` | number | No | Number of retries |
50
+ | `fallback_used` | boolean | No | Was fallback triggered? |
51
+ | `original_model` | string | No | Original model if fallback used |
52
+
53
+ ---
54
+
55
+ ## Provider Names
56
+
57
+ Use lowercase provider names:
58
+
59
+ | Provider | Value |
60
+ |----------|-------|
61
+ | OpenAI | `openai` |
62
+ | Anthropic | `anthropic` |
63
+ | Google | `google` |
64
+ | Azure OpenAI | `azure_openai` |
65
+ | AWS Bedrock | `bedrock` |
66
+ | Cohere | `cohere` |
67
+ | Mistral | `mistral` |
68
+ | Together AI | `together` |
69
+ | Fireworks | `fireworks` |
70
+ | Groq | `groq` |
71
+ | Replicate | `replicate` |
72
+ | Perplexity | `perplexity` |
73
+ | vLLM | `vllm` |
74
+ | SGLang | `sglang` |
75
+ | TGI | `tgi` |
76
+ | Ollama | `ollama` |
77
+ | llama.cpp | `llamacpp` |
78
+
79
+ ---
80
+
81
+ ## Supported Formats
82
+
83
+ PeakInfer auto-detects the following formats:
84
+
85
+ ### Direct Parse (No LLM Needed)
86
+
87
+ | Format | Extension | Description |
88
+ |--------|-----------|-------------|
89
+ | JSONL | `.jsonl` | Newline-delimited JSON (preferred) |
90
+ | JSON Array | `.json` | Array of event objects |
91
+ | CSV | `.csv` | Comma-separated values |
92
+ | TSV | `.tsv` | Tab-separated values |
93
+
94
+ ### Agent-Normalized (Requires API Key)
95
+
96
+ PeakInfer can parse exports from observability platforms:
97
+
98
+ | Platform | Notes |
99
+ |----------|-------|
100
+ | OpenTelemetry | OTLP traces/spans |
101
+ | Jaeger | Distributed tracing format |
102
+ | Zipkin | Tracing format |
103
+ | LangSmith | Trace exports |
104
+ | Helicone | Proxy logs |
105
+ | LiteLLM | Proxy event logs |
106
+ | Portkey | Gateway logs |
107
+
108
+ For these formats, provide your Anthropic API key:
109
+
110
+ ```bash
111
+ export ANTHROPIC_API_KEY=sk-ant-...
112
+ peakinfer analyze ./src --events otel-traces.json
113
+ ```
114
+
115
+ ---
116
+
117
+ ## CSV/TSV Column Names
118
+
119
+ For CSV/TSV files, use these column names:
120
+
121
+ ```csv
122
+ id,ts,provider,model,input_tokens,output_tokens,latency_ms,streaming
123
+ evt_1,2024-12-21T10:00:00Z,openai,gpt-4,150,50,1200,false
124
+ evt_2,2024-12-21T10:00:01Z,anthropic,claude-3-opus,200,100,2500,true
125
+ ```
126
+
127
+ Alternative column names are supported:
128
+ - `timestamp` → `ts`
129
+ - `provider_name` → `provider`
130
+ - `model_name` → `model`
131
+ - `tokens_in` → `input_tokens`
132
+ - `tokens_out` → `output_tokens`
133
+ - `latency`, `duration_ms` → `latency_ms`
134
+
135
+ ---
136
+
137
+ ## Linking Events to Code
138
+
139
+ For better correlation, add `callsite_id` to events matching your code locations:
140
+
141
+ ```jsonl
142
+ {"id":"evt_1","ts":"2024-12-21T10:00:00Z","provider":"openai","model":"gpt-4","input_tokens":150,"output_tokens":50,"latency_ms":1200,"callsite_id":"src/services/chat.ts:42"}
143
+ ```
144
+
145
+ PeakInfer will match this to the inference point at `src/services/chat.ts:42`.
146
+
147
+ ---
148
+
149
+ ## Drift Detection
150
+
151
+ When runtime data is provided, PeakInfer detects drift between code and runtime:
152
+
153
+ | Drift Type | Description |
154
+ |------------|-------------|
155
+ | `codeOnly` | Inference point in code but never called |
156
+ | `runtimeOnly` | Runtime calls with no matching code location |
157
+ | `mismatch` | Model/provider differs between code and runtime |
158
+ | `patternDrift` | Pattern mismatch (e.g., streaming in code, blocking in runtime) |
159
+
160
+ Example PR comment:
161
+
162
+ ```
163
+ 🔒 RUNTIME CORRELATION
164
+
165
+ | Location | Code | Runtime | Drift |
166
+ |----------|------|---------|-------|
167
+ | chat.ts:42 | streaming: true | streaming: 0% | patternDrift |
168
+ | api.ts:15 | gpt-4 | gpt-4-turbo | mismatch |
169
+ ```
170
+
171
+ ---
172
+
173
+ ## GitHub Action Integration
174
+
175
+ ### From File in Repository
176
+
177
+ ```yaml
178
+ - uses: kalmantic/peakinfer@v1
179
+ with:
180
+ path: ./src
181
+ events: ./traces/events.jsonl
182
+ ```
183
+
184
+ ### From URL
185
+
186
+ Fetch events from your observability platform:
187
+
188
+ ```yaml
189
+ - uses: kalmantic/peakinfer@v1
190
+ with:
191
+ path: ./src
192
+ events-url: ${{ secrets.OBSERVABILITY_URL }}
193
+ ```
194
+
195
+ ### From GitHub Artifact
196
+
197
+ If events are produced by a previous job:
198
+
199
+ ```yaml
200
+ - uses: actions/download-artifact@v4
201
+ with:
202
+ name: inference-events
203
+
204
+ - uses: kalmantic/peakinfer@v1
205
+ with:
206
+ path: ./src
207
+ events: ./inference-events/events.jsonl
208
+ ```
209
+
210
+ ---
211
+
212
+ ## Generating Events
213
+
214
+ ### OpenAI SDK (Python)
215
+
216
+ ```python
217
+ import json
218
+ import time
219
+ from openai import OpenAI
220
+
221
+ client = OpenAI()
222
+ events = []
223
+
224
+ start = time.time()
225
+ response = client.chat.completions.create(
226
+ model="gpt-4",
227
+ messages=[{"role": "user", "content": "Hello"}]
228
+ )
229
+ latency = (time.time() - start) * 1000
230
+
231
+ events.append({
232
+ "id": response.id,
233
+ "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
234
+ "provider": "openai",
235
+ "model": response.model,
236
+ "input_tokens": response.usage.prompt_tokens,
237
+ "output_tokens": response.usage.completion_tokens,
238
+ "latency_ms": int(latency),
239
+ "streaming": False,
240
+ })
241
+
242
+ # Write to JSONL
243
+ with open("events.jsonl", "a") as f:
244
+ f.write(json.dumps(events[-1]) + "\n")
245
+ ```
246
+
247
+ ### Anthropic SDK (Python)
248
+
249
+ ```python
250
+ import json
251
+ import time
252
+ import anthropic
253
+
254
+ client = anthropic.Anthropic()
255
+ events = []
256
+
257
+ start = time.time()
258
+ response = client.messages.create(
259
+ model="claude-3-opus-20240229",
260
+ max_tokens=1024,
261
+ messages=[{"role": "user", "content": "Hello"}]
262
+ )
263
+ latency = (time.time() - start) * 1000
264
+
265
+ events.append({
266
+ "id": response.id,
267
+ "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
268
+ "provider": "anthropic",
269
+ "model": response.model,
270
+ "input_tokens": response.usage.input_tokens,
271
+ "output_tokens": response.usage.output_tokens,
272
+ "latency_ms": int(latency),
273
+ "streaming": False,
274
+ })
275
+
276
+ with open("events.jsonl", "a") as f:
277
+ f.write(json.dumps(events[-1]) + "\n")
278
+ ```
279
+
280
+ ---
281
+
282
+ ## Best Practices
283
+
284
+ 1. **Export regularly**: Run event exports as part of your CI/CD pipeline
285
+ 2. **Include callsite_id**: Improves code-to-runtime correlation accuracy
286
+ 3. **Keep events recent**: Use last 24-48 hours of data for meaningful drift detection
287
+ 4. **Use streaming field**: Critical for detecting streaming drift
288
+ 5. **Track retries**: Helps identify reliability issues
289
+
290
+ ---
291
+
292
+ ## Related
293
+
294
+ - [InferenceMap Spec](inferencemap-spec.md) — Output schema for analysis results
295
+ - [README](../README.md) — Quick start guide
@@ -0,0 +1,344 @@
1
+ # InferenceMap v0.1 Specification
2
+
3
+ The InferenceMap is PeakInfer's structured output format for static analysis results. It provides a machine-readable representation of all LLM inference points in a codebase.
4
+
5
+ ## Overview
6
+
7
+ ```json
8
+ {
9
+ "version": "0.1",
10
+ "root": "./src",
11
+ "generatedAt": "2024-12-21T10:00:00Z",
12
+ "metadata": { ... },
13
+ "summary": { ... },
14
+ "callsites": [ ... ]
15
+ }
16
+ ```
17
+
18
+ ---
19
+
20
+ ## Top-Level Fields
21
+
22
+ | Field | Type | Description |
23
+ |-------|------|-------------|
24
+ | `version` | string | Schema version (currently `"0.1"`) |
25
+ | `root` | string | Analyzed path (relative or absolute) |
26
+ | `generatedAt` | string | ISO 8601 timestamp |
27
+ | `metadata` | object | Analysis context (optional) |
28
+ | `summary` | object | Aggregate statistics |
29
+ | `callsites` | array | List of inference points |
30
+
31
+ ---
32
+
33
+ ## Metadata Object
34
+
35
+ Optional context about how the analysis was performed:
36
+
37
+ ```json
38
+ {
39
+ "metadata": {
40
+ "absolutePath": "/Users/dev/project/src",
41
+ "promptId": "unified-analyzer",
42
+ "promptVersion": "1.6.0",
43
+ "templatesVersion": "1.0.0",
44
+ "llmProvider": "anthropic",
45
+ "llmModel": "claude-sonnet-4-20250514"
46
+ }
47
+ }
48
+ ```
49
+
50
+ | Field | Type | Description |
51
+ |-------|------|-------------|
52
+ | `absolutePath` | string | Full absolute path analyzed |
53
+ | `promptId` | string | Analysis prompt identifier |
54
+ | `promptVersion` | string | Analysis prompt version |
55
+ | `templatesVersion` | string | peakinfer-templates version |
56
+ | `llmProvider` | string | LLM provider used (`anthropic`, `none`) |
57
+ | `llmModel` | string | LLM model used for analysis |
58
+
59
+ ---
60
+
61
+ ## Summary Object
62
+
63
+ Aggregate statistics for quick overview:
64
+
65
+ ```json
66
+ {
67
+ "summary": {
68
+ "totalCallsites": 7,
69
+ "providers": ["openai", "anthropic"],
70
+ "models": ["gpt-4", "claude-3-opus"],
71
+ "patterns": {
72
+ "streaming": 3,
73
+ "batching": 0,
74
+ "retries": 5,
75
+ "caching": 1,
76
+ "fallback": 2
77
+ }
78
+ }
79
+ }
80
+ ```
81
+
82
+ | Field | Type | Description |
83
+ |-------|------|-------------|
84
+ | `totalCallsites` | number | Total inference points detected |
85
+ | `providers` | string[] | Unique providers found |
86
+ | `models` | string[] | Unique models found |
87
+ | `patterns` | object | Count of each pattern detected |
88
+
89
+ ---
90
+
91
+ ## Callsite Object
92
+
93
+ Each inference point (callsite) has this structure:
94
+
95
+ ```json
96
+ {
97
+ "id": "src/services/chat.ts:42",
98
+ "file": "src/services/chat.ts",
99
+ "line": 42,
100
+ "provider": "openai",
101
+ "model": "gpt-4",
102
+ "framework": "langchain",
103
+ "runtime": null,
104
+ "patterns": {
105
+ "streaming": true,
106
+ "batching": false,
107
+ "retries": true,
108
+ "caching": false,
109
+ "fallback": true
110
+ },
111
+ "confidence": 0.95
112
+ }
113
+ ```
114
+
115
+ ### Required Fields
116
+
117
+ | Field | Type | Description |
118
+ |-------|------|-------------|
119
+ | `id` | string | Unique identifier (typically `file:line`) |
120
+ | `file` | string | File path (relative to root) |
121
+ | `line` | number | Line number |
122
+ | `provider` | string \| null | Provider name |
123
+ | `model` | string \| null | Model name |
124
+ | `framework` | string \| null | Framework (langchain, llamaindex, etc.) |
125
+ | `runtime` | string \| null | Runtime (vllm, tgi, etc.) |
126
+ | `patterns` | object | Detected code patterns |
127
+ | `confidence` | number | Detection confidence (0-1) |
128
+
129
+ ### Patterns Object
130
+
131
+ | Pattern | Type | Description |
132
+ |---------|------|-------------|
133
+ | `streaming` | boolean | Streaming enabled in code |
134
+ | `batching` | boolean | Batch processing detected |
135
+ | `retries` | boolean | Retry logic present |
136
+ | `caching` | boolean | Caching implemented |
137
+ | `fallback` | boolean | Fallback logic present |
138
+
139
+ ---
140
+
141
+ ## Provider Values
142
+
143
+ Valid provider values:
144
+
145
+ ```typescript
146
+ type Provider =
147
+ | 'openai'
148
+ | 'anthropic'
149
+ | 'google'
150
+ | 'cohere'
151
+ | 'mistral'
152
+ | 'bedrock'
153
+ | 'azure_openai'
154
+ | 'together'
155
+ | 'fireworks'
156
+ | 'groq'
157
+ | 'replicate'
158
+ | 'perplexity'
159
+ | 'vllm'
160
+ | 'sglang'
161
+ | 'tgi'
162
+ | 'ollama'
163
+ | 'llamacpp'
164
+ | 'unknown';
165
+ ```
166
+
167
+ ---
168
+
169
+ ## Complete Example
170
+
171
+ ```json
172
+ {
173
+ "version": "0.1",
174
+ "root": "./src",
175
+ "generatedAt": "2024-12-21T10:00:00.000Z",
176
+ "metadata": {
177
+ "absolutePath": "/Users/dev/project/src",
178
+ "llmProvider": "anthropic",
179
+ "llmModel": "claude-sonnet-4-20250514"
180
+ },
181
+ "summary": {
182
+ "totalCallsites": 3,
183
+ "providers": ["openai", "anthropic"],
184
+ "models": ["gpt-4", "gpt-4-turbo", "claude-3-opus"],
185
+ "patterns": {
186
+ "streaming": 2,
187
+ "batching": 0,
188
+ "retries": 2,
189
+ "caching": 1,
190
+ "fallback": 1
191
+ }
192
+ },
193
+ "callsites": [
194
+ {
195
+ "id": "src/services/chat.ts:42",
196
+ "file": "src/services/chat.ts",
197
+ "line": 42,
198
+ "provider": "openai",
199
+ "model": "gpt-4",
200
+ "framework": null,
201
+ "runtime": null,
202
+ "patterns": {
203
+ "streaming": true,
204
+ "batching": false,
205
+ "retries": true,
206
+ "caching": false,
207
+ "fallback": false
208
+ },
209
+ "confidence": 0.95
210
+ },
211
+ {
212
+ "id": "src/services/summarize.ts:15",
213
+ "file": "src/services/summarize.ts",
214
+ "line": 15,
215
+ "provider": "openai",
216
+ "model": "gpt-4-turbo",
217
+ "framework": "langchain",
218
+ "runtime": null,
219
+ "patterns": {
220
+ "streaming": false,
221
+ "batching": false,
222
+ "retries": true,
223
+ "caching": true,
224
+ "fallback": true
225
+ },
226
+ "confidence": 0.88
227
+ },
228
+ {
229
+ "id": "src/api/translate.ts:78",
230
+ "file": "src/api/translate.ts",
231
+ "line": 78,
232
+ "provider": "anthropic",
233
+ "model": "claude-3-opus",
234
+ "framework": null,
235
+ "runtime": null,
236
+ "patterns": {
237
+ "streaming": true,
238
+ "batching": false,
239
+ "retries": false,
240
+ "caching": false,
241
+ "fallback": false
242
+ },
243
+ "confidence": 0.92
244
+ }
245
+ ]
246
+ }
247
+ ```
248
+
249
+ ---
250
+
251
+ ## Usage
252
+
253
+ ### CLI Output
254
+
255
+ Generate InferenceMap with the CLI:
256
+
257
+ ```bash
258
+ # Save to file
259
+ peakinfer analyze ./src --json > inference-map.json
260
+
261
+ # Or use built-in artifact saving
262
+ peakinfer analyze ./src --save
263
+ # Creates .peakinfer/inference-map.json
264
+ ```
265
+
266
+ ### Programmatic Access (TypeScript)
267
+
268
+ ```typescript
269
+ import { InferenceMap } from '@kalmantic/peakinfer';
270
+ import fs from 'fs';
271
+
272
+ const map: InferenceMap = JSON.parse(
273
+ fs.readFileSync('.peakinfer/inference-map.json', 'utf-8')
274
+ );
275
+
276
+ console.log(`Found ${map.summary.totalCallsites} inference points`);
277
+
278
+ for (const callsite of map.callsites) {
279
+ if (!callsite.patterns.retries) {
280
+ console.log(`Missing retries: ${callsite.id}`);
281
+ }
282
+ }
283
+ ```
284
+
285
+ ### Zod Schema Validation
286
+
287
+ ```typescript
288
+ import { InferenceMap as InferenceMapSchema } from '@kalmantic/peakinfer';
289
+
290
+ // Validate JSON against schema
291
+ const result = InferenceMapSchema.safeParse(jsonData);
292
+
293
+ if (!result.success) {
294
+ console.error('Invalid InferenceMap:', result.error);
295
+ }
296
+ ```
297
+
298
+ ---
299
+
300
+ ## Versioning
301
+
302
+ The `version` field indicates the schema version:
303
+
304
+ | Version | Description |
305
+ |---------|-------------|
306
+ | `0.1` | Initial specification (current) |
307
+
308
+ Future versions will maintain backward compatibility where possible.
309
+
310
+ ---
311
+
312
+ ## TypeScript Definition
313
+
314
+ The full Zod schema is available in `src/types.ts`:
315
+
316
+ ```typescript
317
+ export const InferenceMap = z.object({
318
+ version: z.string(),
319
+ root: z.string(),
320
+ generatedAt: z.string(),
321
+ metadata: z.object({
322
+ absolutePath: z.string(),
323
+ promptId: z.string().optional(),
324
+ promptVersion: z.string().optional(),
325
+ templatesVersion: z.string().optional(),
326
+ llmProvider: z.string().optional(),
327
+ llmModel: z.string().optional(),
328
+ }).optional(),
329
+ summary: z.object({
330
+ totalCallsites: z.number(),
331
+ providers: z.array(z.string()),
332
+ models: z.array(z.string()),
333
+ patterns: z.record(z.number()),
334
+ }),
335
+ callsites: z.array(Callsite),
336
+ });
337
+ ```
338
+
339
+ ---
340
+
341
+ ## Related
342
+
343
+ - [Runtime Events Format](events-format.md) — Input schema for runtime correlation
344
+ - [README](../README.md) — Quick start guide