@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,534 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { CorrelationAnalyzerAgent, type CorrelationAnalyzerInput } from '../src/agents/correlation-analyzer.js';
3
+ import type { Callsite, InferenceEvent, RuntimeSummary, DriftSignal } from '../src/types.js';
4
+
5
+ // =============================================================================
6
+ // TEST FIXTURES
7
+ // =============================================================================
8
+
9
+ const makeCallsite = (overrides: Partial<Callsite> = {}): Callsite => ({
10
+ id: `cs_${Math.random().toString(36).slice(2, 9)}`,
11
+ file: 'src/api/chat.ts',
12
+ line: 42,
13
+ provider: 'openai',
14
+ model: 'gpt-4o',
15
+ framework: null,
16
+ runtime: null,
17
+ patterns: {},
18
+ confidence: 0.9,
19
+ ...overrides,
20
+ });
21
+
22
+ const makeEvent = (overrides: Partial<InferenceEvent> = {}): InferenceEvent => ({
23
+ id: `evt_${Math.random().toString(36).slice(2, 9)}`,
24
+ ts: '2024-01-01T00:00:00Z',
25
+ provider: 'openai',
26
+ model: 'gpt-4o',
27
+ input_tokens: 100,
28
+ output_tokens: 50,
29
+ latency_ms: 420,
30
+ ...overrides,
31
+ });
32
+
33
+ const makeRuntimeSummary = (overrides: Partial<RuntimeSummary> = {}): RuntimeSummary => ({
34
+ totalEvents: 100,
35
+ byProvider: {
36
+ openai: {
37
+ calls: 100,
38
+ tokens_in: 10000,
39
+ tokens_out: 5000,
40
+ latency_p50: 400,
41
+ latency_p95: 800,
42
+ latency_p99: 1200,
43
+ },
44
+ },
45
+ byModel: {
46
+ 'gpt-4o': {
47
+ calls: 100,
48
+ tokens_in: 10000,
49
+ tokens_out: 5000,
50
+ latency_p50: 400,
51
+ latency_p95: 800,
52
+ latency_p99: 1200,
53
+ },
54
+ },
55
+ global: {
56
+ p50: 400,
57
+ p95: 800,
58
+ p99: 1200,
59
+ },
60
+ ...overrides,
61
+ });
62
+
63
+ const makeInput = (overrides: Partial<CorrelationAnalyzerInput> = {}): CorrelationAnalyzerInput => ({
64
+ callsites: [makeCallsite()],
65
+ events: [makeEvent()],
66
+ runtimeSummary: makeRuntimeSummary(),
67
+ ...overrides,
68
+ });
69
+
70
+ // =============================================================================
71
+ // AGENT PROPERTIES TESTS
72
+ // =============================================================================
73
+
74
+ describe('CorrelationAnalyzerAgent', () => {
75
+ describe('agent properties', () => {
76
+ it('has correct name', () => {
77
+ expect(CorrelationAnalyzerAgent.name).toBe('correlation-analyzer');
78
+ });
79
+
80
+ it('has description', () => {
81
+ expect(CorrelationAnalyzerAgent.description).toBeDefined();
82
+ expect(CorrelationAnalyzerAgent.description.length).toBeGreaterThan(0);
83
+ });
84
+
85
+ it('has tools registry', () => {
86
+ expect(CorrelationAnalyzerAgent.tools).toBeDefined();
87
+ });
88
+ });
89
+
90
+ // =============================================================================
91
+ // OUTPUT FORMAT TESTS
92
+ // =============================================================================
93
+
94
+ describe('output format', () => {
95
+ it('returns correct structure', async () => {
96
+ const input = makeInput();
97
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
98
+
99
+ // Check insights array
100
+ expect(Array.isArray(result.insights)).toBe(true);
101
+
102
+ // Check driftSignals array
103
+ expect(Array.isArray(result.driftSignals)).toBe(true);
104
+
105
+ // Check correlationSummary structure
106
+ expect(result.correlationSummary).toHaveProperty('totalCodeCallsites');
107
+ expect(result.correlationSummary).toHaveProperty('totalRuntimeModels');
108
+ expect(result.correlationSummary).toHaveProperty('matched');
109
+ expect(result.correlationSummary).toHaveProperty('codeOnly');
110
+ expect(result.correlationSummary).toHaveProperty('runtimeOnly');
111
+ expect(result.correlationSummary).toHaveProperty('mismatched');
112
+
113
+ // Check alignment score
114
+ expect(typeof result.alignmentScore).toBe('number');
115
+ expect(result.alignmentScore).toBeGreaterThanOrEqual(0);
116
+ expect(result.alignmentScore).toBeLessThanOrEqual(1);
117
+
118
+ // Check overall assessment
119
+ expect(typeof result.overallAssessment).toBe('string');
120
+ expect(result.overallAssessment.length).toBeGreaterThan(0);
121
+ });
122
+
123
+ it('drift signals have correct type values', async () => {
124
+ const input = makeInput({
125
+ callsites: [
126
+ makeCallsite({ provider: 'anthropic', model: 'claude-3-opus' }),
127
+ ],
128
+ runtimeSummary: makeRuntimeSummary({
129
+ byModel: {
130
+ 'gpt-4o': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
131
+ },
132
+ }),
133
+ });
134
+
135
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
136
+
137
+ const validTypes: DriftSignal['type'][] = ['codeOnly', 'runtimeOnly', 'mismatch', 'patternDrift'];
138
+ for (const signal of result.driftSignals) {
139
+ expect(validTypes).toContain(signal.type);
140
+ }
141
+ });
142
+ });
143
+
144
+ // =============================================================================
145
+ // CORRELATION LOGIC TESTS
146
+ // =============================================================================
147
+
148
+ describe('correlation logic', () => {
149
+ it('calculates alignment score for perfect match', async () => {
150
+ // Same provider:model in both code and runtime
151
+ const input = makeInput({
152
+ callsites: [
153
+ makeCallsite({ provider: 'openai', model: 'gpt-4o' }),
154
+ ],
155
+ runtimeSummary: makeRuntimeSummary({
156
+ byModel: {
157
+ 'gpt-4o': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
158
+ },
159
+ }),
160
+ });
161
+
162
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
163
+
164
+ expect(result.alignmentScore).toBeGreaterThanOrEqual(0.5);
165
+ });
166
+
167
+ it('detects code-only inference points', async () => {
168
+ const input = makeInput({
169
+ callsites: [
170
+ makeCallsite({ provider: 'anthropic', model: 'claude-3-opus' }),
171
+ ],
172
+ runtimeSummary: makeRuntimeSummary({
173
+ byModel: {
174
+ 'gpt-4o': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
175
+ },
176
+ }),
177
+ });
178
+
179
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
180
+
181
+ // LLM may classify this as codeOnly, mismatch, or other drift type
182
+ // Key assertion: alignment should be low and some drift should be detected
183
+ expect(result.alignmentScore).toBeLessThan(1.0);
184
+ expect(
185
+ result.driftSignals.length > 0 ||
186
+ result.correlationSummary.codeOnly > 0 ||
187
+ result.correlationSummary.mismatched > 0
188
+ ).toBe(true);
189
+ });
190
+
191
+ it('detects runtime-only models', async () => {
192
+ const input = makeInput({
193
+ callsites: [
194
+ makeCallsite({ provider: 'openai', model: 'gpt-4o' }),
195
+ ],
196
+ runtimeSummary: makeRuntimeSummary({
197
+ byModel: {
198
+ 'gpt-4o': { calls: 50, tokens_in: 5000, tokens_out: 2500, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
199
+ 'claude-3-opus': { calls: 50, tokens_in: 5000, tokens_out: 2500, latency_p50: 500, latency_p95: 1000, latency_p99: 1500 },
200
+ },
201
+ }),
202
+ });
203
+
204
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
205
+
206
+ expect(result.driftSignals.some(s => s.type === 'runtimeOnly')).toBe(true);
207
+ expect(result.correlationSummary.runtimeOnly).toBeGreaterThan(0);
208
+ });
209
+
210
+ it('generates appropriate overall assessment for good alignment', async () => {
211
+ const input = makeInput({
212
+ callsites: [
213
+ makeCallsite({ provider: 'openai', model: 'gpt-4o' }),
214
+ ],
215
+ runtimeSummary: makeRuntimeSummary({
216
+ byModel: {
217
+ 'gpt-4o': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
218
+ },
219
+ }),
220
+ });
221
+
222
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
223
+
224
+ expect(result.overallAssessment.toLowerCase()).toMatch(/alignment|good|match/);
225
+ });
226
+
227
+ it('generates appropriate overall assessment for significant drift', async () => {
228
+ const input = makeInput({
229
+ callsites: [
230
+ makeCallsite({ provider: 'anthropic', model: 'claude-3-opus' }),
231
+ ],
232
+ runtimeSummary: makeRuntimeSummary({
233
+ byModel: {
234
+ 'gpt-4o': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
235
+ },
236
+ }),
237
+ });
238
+
239
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
240
+
241
+ // LLM may use various terms for mismatch: drift, different, significant, misalignment, mismatch
242
+ expect(result.overallAssessment.toLowerCase()).toMatch(/drift|different|significant|misalignment|mismatch/);
243
+ });
244
+ });
245
+
246
+ // =============================================================================
247
+ // CORRELATION SUMMARY TESTS
248
+ // =============================================================================
249
+
250
+ describe('correlation summary', () => {
251
+ it('counts callsites correctly', async () => {
252
+ const input = makeInput({
253
+ callsites: [
254
+ makeCallsite({ id: 'cs_1' }),
255
+ makeCallsite({ id: 'cs_2' }),
256
+ makeCallsite({ id: 'cs_3' }),
257
+ ],
258
+ });
259
+
260
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
261
+
262
+ // LLM may dedupe by provider:model or count differently
263
+ // Key assertion: should detect at least 1 callsite
264
+ expect(result.correlationSummary.totalCodeCallsites).toBeGreaterThanOrEqual(1);
265
+ });
266
+
267
+ it('counts runtime models correctly', async () => {
268
+ const input = makeInput({
269
+ runtimeSummary: makeRuntimeSummary({
270
+ byModel: {
271
+ 'gpt-4o': { calls: 50, tokens_in: 5000, tokens_out: 2500, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
272
+ 'gpt-4o-mini': { calls: 30, tokens_in: 3000, tokens_out: 1500, latency_p50: 200, latency_p95: 400, latency_p99: 600 },
273
+ 'claude-3-5-sonnet': { calls: 20, tokens_in: 2000, tokens_out: 1000, latency_p50: 500, latency_p95: 1000, latency_p99: 1500 },
274
+ },
275
+ }),
276
+ });
277
+
278
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
279
+
280
+ expect(result.correlationSummary.totalRuntimeModels).toBe(3);
281
+ });
282
+
283
+ it('identifies matched provider:model pairs', async () => {
284
+ const input = makeInput({
285
+ callsites: [
286
+ makeCallsite({ provider: 'openai', model: 'gpt-4o' }),
287
+ makeCallsite({ provider: 'openai', model: 'gpt-4o-mini' }),
288
+ ],
289
+ runtimeSummary: makeRuntimeSummary({
290
+ byModel: {
291
+ 'gpt-4o': { calls: 50, tokens_in: 5000, tokens_out: 2500, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
292
+ 'gpt-4o-mini': { calls: 50, tokens_in: 5000, tokens_out: 2500, latency_p50: 200, latency_p95: 400, latency_p99: 600 },
293
+ },
294
+ }),
295
+ });
296
+
297
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
298
+
299
+ expect(result.correlationSummary.matched).toBe(2);
300
+ });
301
+ });
302
+
303
+ // =============================================================================
304
+ // EDGE CASES
305
+ // =============================================================================
306
+
307
+ describe('edge cases', () => {
308
+ it('handles empty callsites', async () => {
309
+ const input = makeInput({
310
+ callsites: [],
311
+ });
312
+
313
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
314
+
315
+ expect(result.correlationSummary.totalCodeCallsites).toBe(0);
316
+ expect(result.correlationSummary.codeOnly).toBe(0);
317
+ });
318
+
319
+ it('handles empty runtime summary', async () => {
320
+ const input = makeInput({
321
+ runtimeSummary: makeRuntimeSummary({
322
+ byModel: {},
323
+ byProvider: {},
324
+ }),
325
+ });
326
+
327
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
328
+
329
+ expect(result.correlationSummary.totalRuntimeModels).toBe(0);
330
+ expect(result.correlationSummary.runtimeOnly).toBe(0);
331
+ });
332
+
333
+ it('handles both empty', async () => {
334
+ const input = makeInput({
335
+ callsites: [],
336
+ runtimeSummary: makeRuntimeSummary({
337
+ byModel: {},
338
+ byProvider: {},
339
+ }),
340
+ });
341
+
342
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
343
+
344
+ // Empty inputs should not crash; alignment interpretation varies (LLM vs fallback)
345
+ expect(result.alignmentScore).toBeGreaterThanOrEqual(0);
346
+ expect(result.alignmentScore).toBeLessThanOrEqual(1);
347
+ });
348
+
349
+ it('handles callsites with null provider/model', async () => {
350
+ const input = makeInput({
351
+ callsites: [
352
+ makeCallsite({ provider: null, model: null }),
353
+ ],
354
+ });
355
+
356
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
357
+
358
+ // Should not crash, but callsite won't be matchable
359
+ expect(result).toBeDefined();
360
+ });
361
+
362
+ it('handles unknown provider inference from model name', async () => {
363
+ const input = makeInput({
364
+ callsites: [],
365
+ runtimeSummary: makeRuntimeSummary({
366
+ byModel: {
367
+ 'llama-70b': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
368
+ },
369
+ }),
370
+ });
371
+
372
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
373
+
374
+ // Should handle unknown provider gracefully
375
+ expect(result.correlationSummary.totalRuntimeModels).toBe(1);
376
+ });
377
+ });
378
+
379
+ // =============================================================================
380
+ // ALIGNMENT SCORE TESTS
381
+ // =============================================================================
382
+
383
+ describe('alignment score calculation', () => {
384
+ it('returns 1.0 for perfect match', async () => {
385
+ const input = makeInput({
386
+ callsites: [
387
+ makeCallsite({ provider: 'openai', model: 'gpt-4o' }),
388
+ ],
389
+ runtimeSummary: makeRuntimeSummary({
390
+ byModel: {
391
+ 'gpt-4o': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
392
+ },
393
+ }),
394
+ });
395
+
396
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
397
+
398
+ expect(result.alignmentScore).toBe(1.0);
399
+ });
400
+
401
+ it('returns low score for complete mismatch', async () => {
402
+ const input = makeInput({
403
+ callsites: [
404
+ makeCallsite({ provider: 'openai', model: 'gpt-4o' }),
405
+ ],
406
+ runtimeSummary: makeRuntimeSummary({
407
+ byModel: {
408
+ 'claude-3-opus': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 500, latency_p95: 1000, latency_p99: 1500 },
409
+ },
410
+ }),
411
+ });
412
+
413
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
414
+
415
+ // Complete mismatch should result in low alignment score (LLM may score differently than fallback)
416
+ expect(result.alignmentScore).toBeLessThanOrEqual(0.5);
417
+ });
418
+
419
+ it('returns intermediate score for partial match', async () => {
420
+ const input = makeInput({
421
+ callsites: [
422
+ makeCallsite({ provider: 'openai', model: 'gpt-4o' }),
423
+ makeCallsite({ provider: 'anthropic', model: 'claude-3-opus' }),
424
+ ],
425
+ runtimeSummary: makeRuntimeSummary({
426
+ byModel: {
427
+ 'gpt-4o': { calls: 50, tokens_in: 5000, tokens_out: 2500, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
428
+ 'gpt-4o-mini': { calls: 50, tokens_in: 5000, tokens_out: 2500, latency_p50: 200, latency_p95: 400, latency_p99: 600 },
429
+ },
430
+ }),
431
+ });
432
+
433
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
434
+
435
+ // 1 match out of 3 unique provider:model pairs (2 code + 2 runtime, 1 overlap)
436
+ // Expected: (1 * 2) / 4 = 0.5
437
+ expect(result.alignmentScore).toBeGreaterThan(0);
438
+ expect(result.alignmentScore).toBeLessThan(1);
439
+ });
440
+ });
441
+
442
+ // =============================================================================
443
+ // DRIFT SIGNAL TESTS
444
+ // =============================================================================
445
+
446
+ describe('drift signal generation', () => {
447
+ it('generates drift signals for code/runtime mismatch', async () => {
448
+ const input = makeInput({
449
+ callsites: [
450
+ makeCallsite({ id: 'cs_orphan', provider: 'anthropic', model: 'claude-3-opus' }),
451
+ ],
452
+ runtimeSummary: makeRuntimeSummary({
453
+ byModel: {
454
+ 'gpt-4o': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
455
+ },
456
+ }),
457
+ });
458
+
459
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
460
+
461
+ // Should detect drift - either as codeOnly, runtimeOnly, or mismatch depending on LLM
462
+ expect(result.driftSignals.length + result.correlationSummary.codeOnly + result.correlationSummary.runtimeOnly).toBeGreaterThan(0);
463
+ });
464
+
465
+ it('generates runtimeOnly drift signal with correct fields', async () => {
466
+ const input = makeInput({
467
+ callsites: [
468
+ makeCallsite({ provider: 'openai', model: 'gpt-4o' }),
469
+ ],
470
+ runtimeSummary: makeRuntimeSummary({
471
+ byModel: {
472
+ 'gpt-4o': { calls: 50, tokens_in: 5000, tokens_out: 2500, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
473
+ 'claude-3-opus': { calls: 50, tokens_in: 5000, tokens_out: 2500, latency_p50: 500, latency_p95: 1000, latency_p99: 1500 },
474
+ },
475
+ }),
476
+ });
477
+
478
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
479
+
480
+ const runtimeOnlySignal = result.driftSignals.find(s => s.type === 'runtimeOnly');
481
+ expect(runtimeOnlySignal).toBeDefined();
482
+ expect(runtimeOnlySignal?.model).toBe('claude-3-opus');
483
+ expect(runtimeOnlySignal?.message).toBeDefined();
484
+ });
485
+
486
+ it('does not generate drift signals when fully aligned', async () => {
487
+ const input = makeInput({
488
+ callsites: [
489
+ makeCallsite({ provider: 'openai', model: 'gpt-4o' }),
490
+ ],
491
+ runtimeSummary: makeRuntimeSummary({
492
+ byModel: {
493
+ 'gpt-4o': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
494
+ },
495
+ }),
496
+ });
497
+
498
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
499
+
500
+ expect(result.driftSignals).toHaveLength(0);
501
+ });
502
+ });
503
+
504
+ // =============================================================================
505
+ // INSIGHTS VALIDATION (when LLM is available)
506
+ // =============================================================================
507
+
508
+ describe('insights structure', () => {
509
+ it('insights have required fields when generated', async () => {
510
+ const input = makeInput({
511
+ callsites: [
512
+ makeCallsite({ provider: 'anthropic', model: 'claude-3-opus' }),
513
+ ],
514
+ runtimeSummary: makeRuntimeSummary({
515
+ byModel: {
516
+ 'gpt-4o': { calls: 100, tokens_in: 10000, tokens_out: 5000, latency_p50: 400, latency_p95: 800, latency_p99: 1200 },
517
+ },
518
+ }),
519
+ });
520
+
521
+ const { result } = await CorrelationAnalyzerAgent.execute(input);
522
+
523
+ // Insights may be empty in fallback mode, but if present, must have structure
524
+ for (const insight of result.insights) {
525
+ expect(insight).toHaveProperty('id');
526
+ expect(insight).toHaveProperty('severity');
527
+ expect(insight).toHaveProperty('category');
528
+ expect(insight).toHaveProperty('headline');
529
+ expect(insight).toHaveProperty('evidence');
530
+ expect(['critical', 'warning', 'info']).toContain(insight.severity);
531
+ }
532
+ });
533
+ });
534
+ });