@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,343 @@
1
+ /**
2
+ * RuntimeAnalyzerAgent - LLM-based semantic analysis of runtime telemetry
3
+ *
4
+ * From Autonomous Agent Architecture Patterns v0.2:
5
+ * - Subagent with fresh context window
6
+ * - XML-structured prompt from prompts/runtime-analyzer.yaml
7
+ * - Returns condensed summary (insights, not raw data)
8
+ * - Uses dynamic pricing from LiteLLM API
9
+ *
10
+ * Uses Claude Agent SDK (per TDD v1.9.3)
11
+ */
12
+
13
+ import { query } from '@anthropic-ai/claude-agent-sdk';
14
+ import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk';
15
+ import { loadPrompt, type AnalysisPrompt } from '../templates.js';
16
+ import { getPricingContext, calculateTotalCost, type PricingContext } from '../costs.js';
17
+ import type { RuntimeSummary, InferenceEvent, Insight, ImpactEstimate } from '../types.js';
18
+ import { createConstrainedRegistry, type ToolRegistry } from '../tools/index.js';
19
+ import type { BaseAgent, AgentOutput } from './index.js';
20
+
21
+ // =============================================================================
22
+ // TYPES
23
+ // =============================================================================
24
+
25
+ export interface RuntimeAnalyzerInput {
26
+ events: InferenceEvent[];
27
+ runtimeSummary: RuntimeSummary;
28
+ pricingContext?: PricingContext; // Dynamic pricing from LiteLLM
29
+ }
30
+
31
+ export interface RuntimeAnalyzerOutput {
32
+ insights: Insight[];
33
+ detectedPatterns: {
34
+ applicationType: 'rag' | 'agent' | 'batch' | 'chat' | 'pipeline' | 'unknown';
35
+ multiModelPipeline: boolean;
36
+ streamingDetected: boolean;
37
+ batchingDetected: boolean;
38
+ cachingDetected: boolean;
39
+ };
40
+ summary: {
41
+ totalCalls: number;
42
+ totalTokens: number;
43
+ dominantProvider: string;
44
+ dominantModel: string;
45
+ estimatedDailyCostUSD: number;
46
+ };
47
+ }
48
+
49
+ // LLM response shape (matches prompt output_format)
50
+ interface LLMRuntimeAnalysisResult {
51
+ insights: Array<{
52
+ severity: 'critical' | 'warning' | 'info';
53
+ category: 'cost' | 'latency' | 'reliability' | 'throughput' | 'waste';
54
+ headline: string;
55
+ evidence: string;
56
+ recommendation?: string;
57
+ impact?: {
58
+ layer: 'application' | 'api' | 'gateway' | 'runtime' | 'model' | 'hardware';
59
+ impactType: 'cost' | 'latency' | 'throughput';
60
+ estimatedImpactPercent: number;
61
+ effort: 'low' | 'medium' | 'high';
62
+ };
63
+ }>;
64
+ detected_patterns: {
65
+ application_type: string;
66
+ multi_model_pipeline: boolean;
67
+ streaming_detected: boolean;
68
+ batching_detected: boolean;
69
+ caching_detected: boolean;
70
+ };
71
+ summary: {
72
+ total_calls: number;
73
+ total_tokens: number;
74
+ dominant_provider: string;
75
+ dominant_model: string;
76
+ estimated_daily_cost_usd: number;
77
+ };
78
+ }
79
+
80
+ // =============================================================================
81
+ // HELPERS
82
+ // =============================================================================
83
+
84
+ /**
85
+ * Extract text content from Claude Agent SDK messages
86
+ */
87
+ function extractTextFromMessages(messages: SDKMessage[]): string {
88
+ let text = '';
89
+ for (const msg of messages) {
90
+ if (msg.type === 'assistant' && msg.message?.content) {
91
+ for (const block of msg.message.content) {
92
+ if (block.type === 'text') {
93
+ text += block.text;
94
+ }
95
+ }
96
+ }
97
+ }
98
+ return text;
99
+ }
100
+
101
+ function buildRuntimeContext(
102
+ events: InferenceEvent[],
103
+ runtimeSummary: RuntimeSummary,
104
+ pricingContext?: PricingContext
105
+ ): string {
106
+ // Build condensed context for LLM (not raw events, but aggregated stats)
107
+ const lines: string[] = [];
108
+
109
+ lines.push('=== RUNTIME TELEMETRY SUMMARY ===');
110
+ lines.push(`Total events: ${runtimeSummary.totalEvents}`);
111
+ lines.push('');
112
+
113
+ // By provider stats
114
+ lines.push('--- BY PROVIDER ---');
115
+ for (const [provider, stats] of Object.entries(runtimeSummary.byProvider)) {
116
+ lines.push(`${provider}:`);
117
+ lines.push(` calls: ${stats.calls}`);
118
+ lines.push(` tokens_in: ${stats.tokens_in}, tokens_out: ${stats.tokens_out}`);
119
+ lines.push(` latency: p50=${stats.latency_p50}ms, p95=${stats.latency_p95}ms, p99=${stats.latency_p99}ms`);
120
+ }
121
+ lines.push('');
122
+
123
+ // By model stats
124
+ lines.push('--- BY MODEL ---');
125
+ for (const [model, stats] of Object.entries(runtimeSummary.byModel)) {
126
+ lines.push(`${model}:`);
127
+ lines.push(` calls: ${stats.calls}`);
128
+ lines.push(` tokens_in: ${stats.tokens_in}, tokens_out: ${stats.tokens_out}`);
129
+ lines.push(` latency: p50=${stats.latency_p50}ms, p95=${stats.latency_p95}ms, p99=${stats.latency_p99}ms`);
130
+ }
131
+ lines.push('');
132
+
133
+ // Global latency
134
+ lines.push('--- GLOBAL LATENCY ---');
135
+ lines.push(`p50: ${runtimeSummary.global.p50}ms`);
136
+ lines.push(`p95: ${runtimeSummary.global.p95}ms`);
137
+ lines.push(`p99: ${runtimeSummary.global.p99}ms`);
138
+ lines.push('');
139
+
140
+ // Pricing context if available
141
+ if (pricingContext && Object.keys(pricingContext.models).length > 0) {
142
+ lines.push('--- PRICING CONTEXT ($/1M tokens) ---');
143
+ lines.push(`Thresholds: expensive>${pricingContext.thresholds.expensive}, moderate>${pricingContext.thresholds.moderate}`);
144
+ for (const [model, pricing] of Object.entries(pricingContext.models)) {
145
+ lines.push(`${model}: input=$${pricing.input.toFixed(2)}, output=$${pricing.output.toFixed(2)} [${pricing.tier}]`);
146
+ }
147
+ lines.push('');
148
+
149
+ // Calculate total cost
150
+ const totalCost = calculateTotalCost(events);
151
+ lines.push(`Estimated total cost: $${totalCost.toFixed(4)}`);
152
+ }
153
+
154
+ // Runtime patterns from events
155
+ const hasStreaming = events.some(e => e.streaming === true);
156
+ const hasBatching = events.some(e => e.batch_id !== undefined);
157
+ const hasCaching = events.some(e => e.cached === true);
158
+ const hasRetries = events.some(e => (e.retry_count || 0) > 0);
159
+ const hasFallback = events.some(e => e.fallback_used === true);
160
+
161
+ lines.push('--- DETECTED RUNTIME PATTERNS ---');
162
+ lines.push(`streaming: ${hasStreaming}`);
163
+ lines.push(`batching: ${hasBatching}`);
164
+ lines.push(`caching: ${hasCaching}`);
165
+ lines.push(`retries: ${hasRetries}`);
166
+ lines.push(`fallback: ${hasFallback}`);
167
+
168
+ return lines.join('\n');
169
+ }
170
+
171
+ // =============================================================================
172
+ // AGENT IMPLEMENTATION
173
+ // =============================================================================
174
+
175
+ export const RuntimeAnalyzerAgent: BaseAgent<RuntimeAnalyzerInput, RuntimeAnalyzerOutput> = {
176
+ name: 'runtime-analyzer',
177
+ description: 'Analyze runtime telemetry for patterns, anomalies, and optimization opportunities',
178
+ tools: createConstrainedRegistry(),
179
+
180
+ async execute(input: RuntimeAnalyzerInput): Promise<AgentOutput<RuntimeAnalyzerOutput>> {
181
+ const toolsUsed: string[] = ['llm'];
182
+
183
+ // Load prompt from YAML
184
+ const promptConfig = loadPrompt('runtime-analyzer');
185
+ if (!promptConfig) {
186
+ throw new Error('[runtime-analyzer] Prompt not found: prompts/runtime-analyzer.yaml');
187
+ }
188
+
189
+ // Get pricing context for models in the data
190
+ const models = Object.keys(input.runtimeSummary.byModel);
191
+ const pricingContext = input.pricingContext || getPricingContext(models);
192
+
193
+ // Build runtime context
194
+ const runtimeContext = buildRuntimeContext(input.events, input.runtimeSummary, pricingContext);
195
+
196
+ // Check for API key
197
+ if (!process.env.ANTHROPIC_API_KEY) {
198
+ // Return fallback result without LLM
199
+ console.warn('[runtime-analyzer] No ANTHROPIC_API_KEY, returning basic analysis');
200
+ return {
201
+ result: buildFallbackResult(input.events, input.runtimeSummary),
202
+ toolsUsed: [],
203
+ };
204
+ }
205
+
206
+ try {
207
+ // Use Claude Agent SDK query() function
208
+ const agentQuery = query({
209
+ prompt: `${promptConfig.prompt}\n\n${runtimeContext}`,
210
+ options: {
211
+ model: 'claude-sonnet-4-20250514',
212
+ tools: [],
213
+ permissionMode: 'plan',
214
+ cwd: process.cwd(),
215
+ },
216
+ });
217
+
218
+ // Collect all messages from the async generator
219
+ const messages: SDKMessage[] = [];
220
+ for await (const message of agentQuery) {
221
+ messages.push(message);
222
+ }
223
+
224
+ // Extract JSON from response
225
+ const text = extractTextFromMessages(messages);
226
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
227
+
228
+ if (!jsonMatch) {
229
+ console.warn('[runtime-analyzer] No JSON in LLM response, using fallback');
230
+ return {
231
+ result: buildFallbackResult(input.events, input.runtimeSummary),
232
+ toolsUsed,
233
+ };
234
+ }
235
+
236
+ const parsed = JSON.parse(jsonMatch[0]) as LLMRuntimeAnalysisResult;
237
+
238
+ // Convert to output format
239
+ const insights: Insight[] = parsed.insights.map((i, idx) => ({
240
+ id: `runtime_${Date.now()}_${idx}`,
241
+ severity: i.severity,
242
+ category: i.category as Insight['category'],
243
+ headline: i.headline,
244
+ evidence: i.evidence,
245
+ recommendation: i.recommendation,
246
+ source: 'llm' as const,
247
+ impact: i.impact ? {
248
+ layer: i.impact.layer,
249
+ impactType: i.impact.impactType,
250
+ estimatedImpactPercent: i.impact.estimatedImpactPercent,
251
+ effort: i.impact.effort,
252
+ confidence: 0.8,
253
+ } as ImpactEstimate : undefined,
254
+ }));
255
+
256
+ return {
257
+ result: {
258
+ insights,
259
+ detectedPatterns: {
260
+ applicationType: (parsed.detected_patterns?.application_type || 'unknown') as RuntimeAnalyzerOutput['detectedPatterns']['applicationType'],
261
+ multiModelPipeline: parsed.detected_patterns?.multi_model_pipeline || false,
262
+ streamingDetected: parsed.detected_patterns?.streaming_detected || false,
263
+ batchingDetected: parsed.detected_patterns?.batching_detected || false,
264
+ cachingDetected: parsed.detected_patterns?.caching_detected || false,
265
+ },
266
+ summary: {
267
+ totalCalls: parsed.summary?.total_calls || input.runtimeSummary.totalEvents,
268
+ totalTokens: parsed.summary?.total_tokens || calculateTotalTokens(input.runtimeSummary),
269
+ dominantProvider: parsed.summary?.dominant_provider || getDominantProvider(input.runtimeSummary),
270
+ dominantModel: parsed.summary?.dominant_model || getDominantModel(input.runtimeSummary),
271
+ estimatedDailyCostUSD: parsed.summary?.estimated_daily_cost_usd || 0,
272
+ },
273
+ },
274
+ toolsUsed,
275
+ };
276
+ } catch (error) {
277
+ console.warn('[runtime-analyzer] LLM analysis failed:', error);
278
+ return {
279
+ result: buildFallbackResult(input.events, input.runtimeSummary),
280
+ toolsUsed,
281
+ };
282
+ }
283
+ },
284
+ };
285
+
286
+ // =============================================================================
287
+ // FALLBACK HELPERS
288
+ // =============================================================================
289
+
290
+ function buildFallbackResult(
291
+ events: InferenceEvent[],
292
+ runtimeSummary: RuntimeSummary
293
+ ): RuntimeAnalyzerOutput {
294
+ return {
295
+ insights: [],
296
+ detectedPatterns: {
297
+ applicationType: 'unknown',
298
+ multiModelPipeline: Object.keys(runtimeSummary.byModel).length > 1,
299
+ streamingDetected: events.some(e => e.streaming === true),
300
+ batchingDetected: events.some(e => e.batch_id !== undefined),
301
+ cachingDetected: events.some(e => e.cached === true),
302
+ },
303
+ summary: {
304
+ totalCalls: runtimeSummary.totalEvents,
305
+ totalTokens: calculateTotalTokens(runtimeSummary),
306
+ dominantProvider: getDominantProvider(runtimeSummary),
307
+ dominantModel: getDominantModel(runtimeSummary),
308
+ estimatedDailyCostUSD: 0,
309
+ },
310
+ };
311
+ }
312
+
313
+ function calculateTotalTokens(summary: RuntimeSummary): number {
314
+ let total = 0;
315
+ for (const stats of Object.values(summary.byModel)) {
316
+ total += stats.tokens_in + stats.tokens_out;
317
+ }
318
+ return total;
319
+ }
320
+
321
+ function getDominantProvider(summary: RuntimeSummary): string {
322
+ let maxCalls = 0;
323
+ let dominant = 'unknown';
324
+ for (const [provider, stats] of Object.entries(summary.byProvider)) {
325
+ if (stats.calls > maxCalls) {
326
+ maxCalls = stats.calls;
327
+ dominant = provider;
328
+ }
329
+ }
330
+ return dominant;
331
+ }
332
+
333
+ function getDominantModel(summary: RuntimeSummary): string {
334
+ let maxCalls = 0;
335
+ let dominant = 'unknown';
336
+ for (const [model, stats] of Object.entries(summary.byModel)) {
337
+ if (stats.calls > maxCalls) {
338
+ maxCalls = stats.calls;
339
+ dominant = model;
340
+ }
341
+ }
342
+ return dominant;
343
+ }