@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,293 @@
1
+ # Migration Guide: PeakInfer v1.8 → v2.0
2
+
3
+ This guide helps you migrate from PeakInfer v1.8 to v2.0.
4
+
5
+ ## Overview of Changes
6
+
7
+ PeakInfer v2.0 is a major architectural upgrade that improves accuracy, speed, and maintainability.
8
+
9
+ | Aspect | v1.8 | v2.0 |
10
+ |--------|------|------|
11
+ | **Analysis Engine** | TypeScript + Regex | Claude Code Agent SDK |
12
+ | **Callsite Discovery** | Regex patterns | Semantic code understanding |
13
+ | **Architecture** | Multi-phase agents | Unified single-call |
14
+ | **Templates** | 27 templates | 43 templates (12 insight + 31 optimization) |
15
+ | **Speed** | ~70s per file | ~30s per file (60% faster) |
16
+ | **Cost** | ~$0.05 per file | ~$0.02 per file (60% cheaper) |
17
+ | **Pricing** | 300 credits/10 days free | 50 credits one-time (6-month expiry) |
18
+
19
+ ## Breaking Changes
20
+
21
+ ### 1. CLI Command Changes
22
+
23
+ ```bash
24
+ # v1.8: Separate commands for different analyses
25
+ peakinfer scan ./src # Discovery only
26
+ peakinfer profile ./src # Cost/latency profiling
27
+ peakinfer drift ./src --events file.jsonl
28
+
29
+ # v2.0: Unified analyze command
30
+ peakinfer analyze ./src # Full analysis
31
+ peakinfer analyze ./src --events production.jsonl # With runtime correlation
32
+ ```
33
+
34
+ ### 2. Output Format Changes
35
+
36
+ The InferenceMap schema has been updated to v0.1:
37
+
38
+ ```diff
39
+ {
40
+ - "version": "0.0.1",
41
+ + "version": "0.1",
42
+ "root": "./src",
43
+ "generatedAt": "2024-12-21T10:00:00Z",
44
+ + "metadata": {
45
+ + "promptId": "unified-analyzer",
46
+ + "promptVersion": "1.6.0",
47
+ + "llmProvider": "anthropic",
48
+ + "llmModel": "claude-sonnet-4-20250514"
49
+ + },
50
+ "summary": { ... },
51
+ "callsites": [
52
+ {
53
+ "id": "src/chat.ts:42",
54
+ "file": "src/chat.ts",
55
+ "line": 42,
56
+ "provider": "openai",
57
+ "model": "gpt-4o",
58
+ + "framework": "langchain", // NEW: Framework detection
59
+ + "runtime": null, // NEW: Runtime detection (vllm, tgi, etc.)
60
+ "patterns": {
61
+ "streaming": true,
62
+ - "retry": true, // Renamed
63
+ + "retries": true, // Renamed for consistency
64
+ "caching": false,
65
+ - "error_handling": true // Removed
66
+ + "fallback": true // NEW: Fallback pattern detection
67
+ },
68
+ "confidence": 0.95
69
+ }
70
+ ]
71
+ }
72
+ ```
73
+
74
+ ### 3. Configuration File Changes
75
+
76
+ ```yaml
77
+ # v1.8: .peakinferrc.yaml
78
+ scan:
79
+ extensions: [.ts, .js, .py]
80
+ ignore: [node_modules, dist]
81
+ profile:
82
+ include_cost: true
83
+ include_latency: true
84
+
85
+ # v2.0: .peakinfer.yaml (new name)
86
+ analyze:
87
+ extensions: [.ts, .tsx, .js, .jsx, .py] # More extensions supported
88
+ ignore: [node_modules, dist, .git, __pycache__]
89
+ prompt: unified-analyzer # Configurable prompt pack
90
+ output:
91
+ format: text # text, json
92
+ save: true # Auto-save to .peakinfer/
93
+ ```
94
+
95
+ ### 4. API Key Changes
96
+
97
+ ```bash
98
+ # v1.8: Used PEAKINFER_API_KEY for managed mode
99
+ export PEAKINFER_API_KEY=pk_xxx
100
+
101
+ # v2.0: BYOK mode uses your Anthropic key directly
102
+ export ANTHROPIC_API_KEY=sk-ant-xxx
103
+
104
+ # For managed mode (GitHub Action), use PEAKINFER_TOKEN
105
+ export PEAKINFER_TOKEN=pt_xxx
106
+ ```
107
+
108
+ ### 5. Credit System Changes
109
+
110
+ | v1.8 | v2.0 |
111
+ |------|------|
112
+ | 300 credits free (10-day refresh) | 50 credits free (one-time, 6-month expiry) |
113
+ | Pro: $20/500 credits | Starter: $19/200, Growth: $49/600, Scale: $149/2000 |
114
+ | Monthly subscription | Credit packs (no subscription) |
115
+
116
+ ## Migration Steps
117
+
118
+ ### Step 1: Update CLI
119
+
120
+ ```bash
121
+ # Uninstall v1.8
122
+ npm uninstall -g @kalmantic/peakinfer
123
+
124
+ # Install v2.0
125
+ npm install -g @kalmantic/peakinfer
126
+ ```
127
+
128
+ ### Step 2: Update Configuration
129
+
130
+ ```bash
131
+ # Rename config file
132
+ mv .peakinferrc.yaml .peakinfer.yaml
133
+
134
+ # Update config format (see example above)
135
+ ```
136
+
137
+ ### Step 3: Update API Key
138
+
139
+ ```bash
140
+ # Add to your shell profile (.bashrc, .zshrc, etc.)
141
+ export ANTHROPIC_API_KEY="your-key-here"
142
+
143
+ # Or create .env file
144
+ echo "ANTHROPIC_API_KEY=your-key-here" > .env
145
+ ```
146
+
147
+ ### Step 4: Update Scripts
148
+
149
+ ```diff
150
+ # package.json scripts
151
+ {
152
+ "scripts": {
153
+ - "peakinfer": "peakinfer scan ./src && peakinfer profile ./src"
154
+ + "peakinfer": "peakinfer analyze ./src"
155
+ }
156
+ }
157
+ ```
158
+
159
+ ### Step 5: Update GitHub Action
160
+
161
+ ```diff
162
+ # .github/workflows/peakinfer.yml
163
+ - uses: kalmantic/peakinfer-action@v1
164
+ + uses: kalmantic/peakinfer-action@v2
165
+ with:
166
+ path: ./src
167
+ - mode: scan-and-profile
168
+ + # mode is now automatic - unified analysis
169
+ events: ./events.jsonl
170
+ + events-map: timestamp=time,model=model_name # NEW: Field mapping
171
+ ```
172
+
173
+ ### Step 6: Update CI/CD Integration
174
+
175
+ ```diff
176
+ # Check output format changes in your CI scripts
177
+ - if jq -e '.callsites[] | select(.patterns.retry == false)' output.json; then
178
+ + if jq -e '.callsites[] | select(.patterns.retries == false)' output.json; then
179
+ echo "Missing retry handling detected"
180
+ exit 1
181
+ fi
182
+ ```
183
+
184
+ ## New Features in v2.0
185
+
186
+ ### 1. Framework Detection
187
+
188
+ v2.0 automatically detects LLM frameworks:
189
+
190
+ ```json
191
+ {
192
+ "id": "src/rag.ts:25",
193
+ "framework": "langchain", // langchain, llamaindex, haystack, etc.
194
+ "runtime": null
195
+ }
196
+ ```
197
+
198
+ ### 2. Self-Hosted Runtime Detection
199
+
200
+ ```json
201
+ {
202
+ "id": "src/inference.py:42",
203
+ "provider": null,
204
+ "runtime": "vllm" // vllm, tgi, ollama, sglang, etc.
205
+ }
206
+ ```
207
+
208
+ ### 3. Field Mapping for Runtime Events
209
+
210
+ Handle non-standard event formats:
211
+
212
+ ```bash
213
+ # v2.0: Map custom field names
214
+ peakinfer analyze ./src \
215
+ --events logs.jsonl \
216
+ --events-map latency_ms=duration,model=model_name,input_tokens=prompt_tokens
217
+ ```
218
+
219
+ ### 4. What-If Analysis
220
+
221
+ ```bash
222
+ # Predict impact of model changes
223
+ peakinfer whatif --model gpt-4o-mini
224
+
225
+ # Output: "Switching 5 inference points from gpt-4o to gpt-4o-mini
226
+ # would reduce monthly cost by $2,340 (67% reduction)"
227
+ ```
228
+
229
+ ### 5. Historical Comparison
230
+
231
+ ```bash
232
+ # Compare current run with baseline
233
+ peakinfer analyze ./src --compare-baseline
234
+
235
+ # Compare two specific runs
236
+ peakinfer history compare run_abc123 run_def456
237
+ ```
238
+
239
+ ### 6. Latency Prediction
240
+
241
+ ```bash
242
+ # Predict p95 latency based on InferenceMAX envelope data
243
+ peakinfer analyze ./src --predict --target-p95 2000
244
+ ```
245
+
246
+ ## Template Migration
247
+
248
+ If you have custom templates, update them to v2.0 schema:
249
+
250
+ ```diff
251
+ # Template changes
252
+ {
253
+ "id": "my-custom-template",
254
+ - "type": "insight",
255
+ + "category": "cost", # cost, drift, performance, waste
256
+ + "severity": "warning", # critical, warning, info
257
+ "match": {
258
+ "scope": "callsite",
259
+ "conditions": [
260
+ - { "field": "retry", "equals": false }
261
+ + { "field": "patterns.retries", "op": "eq", "value": false }
262
+ ]
263
+ }
264
+ }
265
+ ```
266
+
267
+ ## FAQ
268
+
269
+ ### Q: Will my v1.8 InferenceMap files still work?
270
+
271
+ A: v2.0 can read v1.8 files but will convert them on save. Consider regenerating for full v2.0 benefits.
272
+
273
+ ### Q: Do I need to change my Anthropic API key?
274
+
275
+ A: No, the same key works. Just update the environment variable name if using the old `PEAKINFER_API_KEY`.
276
+
277
+ ### Q: Are v1.8 templates compatible?
278
+
279
+ A: Mostly yes. Check the template schema changes above and update `match.conditions` syntax.
280
+
281
+ ### Q: What happened to the `scan` command?
282
+
283
+ A: It's now integrated into `analyze`. Use `peakinfer analyze ./src` for all analysis types.
284
+
285
+ ### Q: How do I get my free credits?
286
+
287
+ A: Sign in at peakinfer.com with GitHub. First-time users get 50 credits automatically.
288
+
289
+ ## Getting Help
290
+
291
+ - **Documentation:** https://peakinfer.com/docs
292
+ - **GitHub Issues:** https://github.com/Kalmantic/peakinfer/issues
293
+ - **Discord:** https://discord.gg/kalmantic
@@ -0,0 +1,142 @@
1
+ {
2
+ "version": "1.0.0",
3
+ "generated": "2025-12-24T00:00:00.000Z",
4
+ "description": "Pre-computed demo analysis for offline `peakinfer demo` command",
5
+ "inferencePoints": [
6
+ {
7
+ "id": "demo-ai-service-chat-1",
8
+ "file": "ai-service.ts",
9
+ "line": 13,
10
+ "function": "chat",
11
+ "provider": "anthropic",
12
+ "model": "claude-sonnet-4-20250514",
13
+ "streaming": true,
14
+ "costProfile": {
15
+ "estimatedCostPer1K": 0.015,
16
+ "inputTokens": 500,
17
+ "outputTokens": 2000
18
+ }
19
+ },
20
+ {
21
+ "id": "demo-ai-service-classify-2",
22
+ "file": "ai-service.ts",
23
+ "line": 29,
24
+ "function": "classifyIntent",
25
+ "provider": "openai",
26
+ "model": "gpt-4",
27
+ "streaming": false,
28
+ "costProfile": {
29
+ "estimatedCostPer1K": 0.03,
30
+ "inputTokens": 100,
31
+ "outputTokens": 50
32
+ }
33
+ },
34
+ {
35
+ "id": "demo-ai-service-summarize-3",
36
+ "file": "ai-service.ts",
37
+ "line": 42,
38
+ "function": "summarize",
39
+ "provider": "anthropic",
40
+ "model": "claude-sonnet-4-20250514",
41
+ "streaming": false,
42
+ "costProfile": {
43
+ "estimatedCostPer1K": 0.015,
44
+ "inputTokens": 2000,
45
+ "outputTokens": 500
46
+ }
47
+ },
48
+ {
49
+ "id": "demo-ai-service-batch-4",
50
+ "file": "ai-service.ts",
51
+ "line": 52,
52
+ "function": "batchAnalyze",
53
+ "provider": "anthropic",
54
+ "model": "claude-sonnet-4-20250514",
55
+ "streaming": false,
56
+ "costProfile": {
57
+ "estimatedCostPer1K": 0.015,
58
+ "inputTokens": 200,
59
+ "outputTokens": 200
60
+ }
61
+ }
62
+ ],
63
+ "drift": {
64
+ "detected": true,
65
+ "type": "streaming",
66
+ "description": "Code declares streaming but runtime shows 0% actual streams",
67
+ "evidence": {
68
+ "code": "stream: true",
69
+ "runtime": "0% streaming responses observed",
70
+ "impact": "p95 latency 2.4s instead of 400ms",
71
+ "duration": "23 days"
72
+ }
73
+ },
74
+ "issues": [
75
+ {
76
+ "id": "issue-1",
77
+ "severity": "critical",
78
+ "category": "drift",
79
+ "title": "Streaming Drift Detected",
80
+ "description": "Code says `stream: true` but runtime shows 0% streaming responses",
81
+ "impact": "6x slower response times, poor user experience",
82
+ "file": "ai-service.ts",
83
+ "line": 15,
84
+ "fix": {
85
+ "description": "Verify streaming is working end-to-end",
86
+ "effort": "1 hour"
87
+ }
88
+ },
89
+ {
90
+ "id": "issue-2",
91
+ "severity": "high",
92
+ "category": "cost",
93
+ "title": "Overpowered Model for Classification",
94
+ "description": "GPT-4 ($0.03/1K) used for simple intent classification that GPT-4o-mini ($0.00015/1K) handles equally well",
95
+ "impact": "200x cost waste on this endpoint",
96
+ "savings": "$4,200/month",
97
+ "file": "ai-service.ts",
98
+ "line": 30,
99
+ "fix": {
100
+ "description": "Switch to gpt-4o-mini for classification",
101
+ "effort": "5 minutes"
102
+ }
103
+ },
104
+ {
105
+ "id": "issue-3",
106
+ "severity": "high",
107
+ "category": "reliability",
108
+ "title": "No Error Handling",
109
+ "description": "LLM call has no try/catch, no retry logic, no timeout",
110
+ "impact": "Single API failure crashes the application",
111
+ "file": "ai-service.ts",
112
+ "line": 42,
113
+ "fix": {
114
+ "description": "Add retry with exponential backoff and timeout",
115
+ "effort": "30 minutes"
116
+ }
117
+ },
118
+ {
119
+ "id": "issue-4",
120
+ "severity": "medium",
121
+ "category": "throughput",
122
+ "title": "Sequential Batch Processing",
123
+ "description": "Items processed one at a time in a loop instead of parallel",
124
+ "impact": "50x throughput loss for batch operations",
125
+ "file": "ai-service.ts",
126
+ "line": 51,
127
+ "fix": {
128
+ "description": "Use Promise.all() for parallel processing",
129
+ "effort": "15 minutes"
130
+ }
131
+ }
132
+ ],
133
+ "summary": {
134
+ "totalInferencePoints": 4,
135
+ "providers": ["anthropic", "openai"],
136
+ "criticalIssues": 1,
137
+ "highIssues": 2,
138
+ "mediumIssues": 1,
139
+ "estimatedMonthlySavings": "$4,200",
140
+ "estimatedLatencyImprovement": "6x faster"
141
+ }
142
+ }
@@ -0,0 +1,52 @@
1
+ # PeakInfer v2.0 Demo Project
2
+
3
+ This is a sample project for demonstrating PeakInfer v2.0 features.
4
+
5
+ ## Files
6
+
7
+ - `src/llm-client.ts` - Sample LLM client with various inference patterns
8
+ - `sample-events.jsonl` - Sample runtime events for combined analysis
9
+
10
+ ## Quick Demo
11
+
12
+ ### 1. Basic Analysis
13
+ ```bash
14
+ peakinfer analyze .
15
+ ```
16
+
17
+ ### 2. With Comparison
18
+ ```bash
19
+ # Run again to see comparison
20
+ peakinfer analyze . --compare
21
+ ```
22
+
23
+ ### 3. With Prediction
24
+ ```bash
25
+ peakinfer analyze . --predict --target-p95 3000
26
+ ```
27
+
28
+ ### 4. Combined Analysis (Static + Runtime)
29
+ ```bash
30
+ peakinfer analyze . --events sample-events.jsonl --compare --predict --html --open
31
+ ```
32
+
33
+ ## Expected Results
34
+
35
+ ### Predictions
36
+ - `chatWithGPT4`: HIGH risk (p95 ~5000ms)
37
+ - `analyzeWithOpus`: HIGH risk (p95 ~8000ms)
38
+ - `summarize`: MEDIUM risk (p95 ~4000ms)
39
+ - `quickChat`: LOW risk (p95 ~1500ms)
40
+ - `fastResponse`: LOW risk (p95 ~1500ms)
41
+
42
+ ### Counterfactuals
43
+ - Model swap: gpt-4 → gpt-4o-mini (-75% latency, -90% cost)
44
+ - Model swap: claude-3-opus → claude-3.5-sonnet (-60% latency)
45
+ - Enable streaming for non-streaming calls
46
+ - Add batching for embedding calls
47
+ - Add caching layer
48
+
49
+ ### Drift (Combined Mode)
50
+ Shows mismatches between code and runtime:
51
+ - Inference points in code but not in runtime (dead code?)
52
+ - Runtime events not mapped to code (dynamic calls?)
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Demo AI Service - Shows common LLM inference issues
3
+ * This file is used by `peakinfer demo` to demonstrate drift detection
4
+ */
5
+ import Anthropic from '@anthropic-ai/sdk';
6
+ import OpenAI from 'openai';
7
+
8
+ const anthropic = new Anthropic();
9
+ const openai = new OpenAI();
10
+
11
+ // Issue 1: Streaming configured but may not be working in production
12
+ export async function chat(prompt: string): Promise<string> {
13
+ const response = await anthropic.messages.create({
14
+ model: 'claude-sonnet-4-20250514',
15
+ max_tokens: 2000,
16
+ stream: true, // <-- Code says streaming
17
+ messages: [{ role: 'user', content: prompt }],
18
+ });
19
+
20
+ let result = '';
21
+ for await (const event of response) {
22
+ if (event.type === 'content_block_delta' && event.delta.type === 'text_delta') {
23
+ result += event.delta.text;
24
+ }
25
+ }
26
+ return result;
27
+ }
28
+
29
+ // Issue 2: GPT-4 used for simple classification (overpowered model)
30
+ export async function classifyIntent(message: string): Promise<string> {
31
+ const response = await openai.chat.completions.create({
32
+ model: 'gpt-4', // <-- Expensive model for simple task
33
+ messages: [
34
+ { role: 'system', content: 'Classify the user intent as: question, complaint, feedback, or other' },
35
+ { role: 'user', content: message },
36
+ ],
37
+ max_tokens: 50,
38
+ });
39
+ return response.choices[0].message.content || 'other';
40
+ }
41
+
42
+ // Issue 3: No error handling, no retry logic
43
+ export async function summarize(text: string): Promise<string> {
44
+ // No try/catch, no retry, no timeout
45
+ const response = await anthropic.messages.create({
46
+ model: 'claude-sonnet-4-20250514',
47
+ max_tokens: 500,
48
+ messages: [{ role: 'user', content: `Summarize: ${text}` }],
49
+ });
50
+ return response.content[0].type === 'text' ? response.content[0].text : '';
51
+ }
52
+
53
+ // Issue 4: Sequential processing (throughput bottleneck)
54
+ export async function batchAnalyze(items: string[]): Promise<string[]> {
55
+ const results: string[] = [];
56
+ for (const item of items) { // <-- Sequential, should be parallel
57
+ const response = await anthropic.messages.create({
58
+ model: 'claude-sonnet-4-20250514',
59
+ max_tokens: 200,
60
+ messages: [{ role: 'user', content: `Analyze: ${item}` }],
61
+ });
62
+ results.push(response.content[0].type === 'text' ? response.content[0].text : '');
63
+ }
64
+ return results;
65
+ }
@@ -0,0 +1,15 @@
1
+ {"id":"evt_001","ts":"2024-12-15T10:00:00Z","provider":"openai","model":"gpt-4","input_tokens":150,"output_tokens":500,"latency_ms":4500}
2
+ {"id":"evt_002","ts":"2024-12-15T10:00:05Z","provider":"openai","model":"gpt-4","input_tokens":200,"output_tokens":800,"latency_ms":5200}
3
+ {"id":"evt_003","ts":"2024-12-15T10:00:10Z","provider":"anthropic","model":"claude-3-opus-20240229","input_tokens":1000,"output_tokens":2000,"latency_ms":8500}
4
+ {"id":"evt_004","ts":"2024-12-15T10:00:15Z","provider":"openai","model":"gpt-4-turbo","input_tokens":300,"output_tokens":400,"latency_ms":3800}
5
+ {"id":"evt_005","ts":"2024-12-15T10:00:20Z","provider":"openai","model":"gpt-4o-mini","input_tokens":100,"output_tokens":150,"latency_ms":1200}
6
+ {"id":"evt_006","ts":"2024-12-15T10:00:25Z","provider":"openai","model":"gpt-4o-mini","input_tokens":80,"output_tokens":120,"latency_ms":1100}
7
+ {"id":"evt_007","ts":"2024-12-15T10:00:30Z","provider":"anthropic","model":"claude-3-haiku-20240307","input_tokens":50,"output_tokens":200,"latency_ms":900}
8
+ {"id":"evt_008","ts":"2024-12-15T10:00:35Z","provider":"openai","model":"gpt-4o","input_tokens":200,"output_tokens":600,"latency_ms":2100,"streaming":true}
9
+ {"id":"evt_009","ts":"2024-12-15T10:00:40Z","provider":"openai","model":"text-embedding-3-small","input_tokens":100,"output_tokens":0,"latency_ms":150}
10
+ {"id":"evt_010","ts":"2024-12-15T10:00:45Z","provider":"openai","model":"gpt-4","input_tokens":180,"output_tokens":550,"latency_ms":4800}
11
+ {"id":"evt_011","ts":"2024-12-15T10:01:00Z","provider":"anthropic","model":"claude-3-sonnet-20240229","input_tokens":400,"output_tokens":800,"latency_ms":3500}
12
+ {"id":"evt_012","ts":"2024-12-15T10:01:05Z","provider":"openai","model":"gpt-4o-mini","input_tokens":90,"output_tokens":180,"latency_ms":1300}
13
+ {"id":"evt_013","ts":"2024-12-15T10:01:10Z","provider":"openai","model":"gpt-4","input_tokens":220,"output_tokens":700,"latency_ms":5500}
14
+ {"id":"evt_014","ts":"2024-12-15T10:01:15Z","provider":"anthropic","model":"claude-3-opus-20240229","input_tokens":1200,"output_tokens":2500,"latency_ms":9200}
15
+ {"id":"evt_015","ts":"2024-12-15T10:01:20Z","provider":"openai","model":"text-embedding-3-small","input_tokens":150,"output_tokens":0,"latency_ms":180}
@@ -0,0 +1,128 @@
1
+ /**
2
+ * AI Service for PeakInfer Demo
3
+ * Anthropic Claude only - demonstrates various inference patterns
4
+ */
5
+
6
+ import Anthropic from '@anthropic-ai/sdk';
7
+
8
+ const client = new Anthropic();
9
+
10
+ // ============================================================================
11
+ // HIGH LATENCY / HIGH COST (will trigger issues)
12
+ // ============================================================================
13
+
14
+ /**
15
+ * Complex analysis with Claude Opus (highest cost, no error handling)
16
+ * Issues expected:
17
+ * - Critical: No error handling
18
+ * - Warning: Expensive model for simple task
19
+ */
20
+ export async function analyzeDocument(document: string): Promise<string> {
21
+ const response = await client.messages.create({
22
+ model: 'claude-opus-4-20250514',
23
+ max_tokens: 4000,
24
+ messages: [{ role: 'user', content: `Analyze: ${document}` }],
25
+ });
26
+ return response.content[0].type === 'text' ? response.content[0].text : '';
27
+ }
28
+
29
+ /**
30
+ * Chat completion without streaming (latency issue)
31
+ * Issues expected:
32
+ * - Warning: No streaming enabled
33
+ * - Critical: No error handling
34
+ */
35
+ export async function chat(prompt: string): Promise<string> {
36
+ const response = await client.messages.create({
37
+ model: 'claude-sonnet-4-20250514',
38
+ max_tokens: 2000,
39
+ messages: [{ role: 'user', content: prompt }],
40
+ });
41
+ return response.content[0].type === 'text' ? response.content[0].text : '';
42
+ }
43
+
44
+ // ============================================================================
45
+ // MEDIUM PATTERNS (some issues)
46
+ // ============================================================================
47
+
48
+ /**
49
+ * Translation service (no timeout configured)
50
+ * Issues expected:
51
+ * - Warning: No timeout configured
52
+ */
53
+ export async function translate(text: string, targetLang: string): Promise<string> {
54
+ const response = await client.messages.create({
55
+ model: 'claude-3-5-sonnet-20241022',
56
+ max_tokens: 2000,
57
+ messages: [{ role: 'user', content: `Translate to ${targetLang}: ${text}` }],
58
+ });
59
+ return response.content[0].type === 'text' ? response.content[0].text : '';
60
+ }
61
+
62
+ // ============================================================================
63
+ // GOOD PATTERNS (minimal issues)
64
+ // ============================================================================
65
+
66
+ /**
67
+ * Fast response with Claude Haiku (optimized)
68
+ * - Uses cheap model
69
+ * - Has error handling
70
+ * - Has streaming
71
+ */
72
+ export async function* streamChat(prompt: string): AsyncGenerator<string> {
73
+ try {
74
+ const stream = await client.messages.stream({
75
+ model: 'claude-3-5-haiku-20241022',
76
+ max_tokens: 500,
77
+ messages: [{ role: 'user', content: prompt }],
78
+ });
79
+
80
+ for await (const event of stream) {
81
+ if (event.type === 'content_block_delta' && event.delta.type === 'text_delta') {
82
+ yield event.delta.text;
83
+ }
84
+ }
85
+ } catch (error) {
86
+ console.error('Stream error:', error);
87
+ throw error;
88
+ }
89
+ }
90
+
91
+ /**
92
+ * Robust API call with retry and fallback
93
+ * - Has error handling
94
+ * - Has retry logic
95
+ * - Has fallback model
96
+ */
97
+ export async function robustChat(prompt: string): Promise<string> {
98
+ const maxRetries = 3;
99
+ let lastError: Error | null = null;
100
+
101
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
102
+ try {
103
+ const response = await client.messages.create({
104
+ model: 'claude-3-5-sonnet-20241022',
105
+ max_tokens: 1000,
106
+ messages: [{ role: 'user', content: prompt }],
107
+ });
108
+ return response.content[0].type === 'text' ? response.content[0].text : '';
109
+ } catch (error) {
110
+ lastError = error as Error;
111
+ if (attempt < maxRetries - 1) {
112
+ await new Promise(r => setTimeout(r, 1000 * (attempt + 1)));
113
+ }
114
+ }
115
+ }
116
+
117
+ // Fallback to cheaper model
118
+ try {
119
+ const response = await client.messages.create({
120
+ model: 'claude-3-5-haiku-20241022',
121
+ max_tokens: 1000,
122
+ messages: [{ role: 'user', content: prompt }],
123
+ });
124
+ return response.content[0].type === 'text' ? response.content[0].text : '';
125
+ } catch {
126
+ throw lastError;
127
+ }
128
+ }