@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,347 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import {
3
+ generateCounterfactuals,
4
+ formatCounterfactualSummary,
5
+ hasSignificantOpportunities,
6
+ rankCounterfactuals,
7
+ } from '../src/counterfactuals.js';
8
+ import type { Callsite, InferenceMap } from '../src/types.js';
9
+
10
+ // =============================================================================
11
+ // TEST FIXTURES
12
+ // =============================================================================
13
+
14
+ function createCallsite(overrides: Partial<Callsite> = {}): Callsite {
15
+ return {
16
+ id: `cs_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
17
+ file: 'src/agent.ts',
18
+ line: 42,
19
+ col: 10,
20
+ provider: 'openai',
21
+ model: 'gpt-4o',
22
+ framework: null,
23
+ runtime: null,
24
+ patterns: {},
25
+ confidence: 0.9,
26
+ ...overrides,
27
+ };
28
+ }
29
+
30
+ function createInferenceMap(callsites: Callsite[]): InferenceMap {
31
+ return {
32
+ version: '0.1',
33
+ callsites,
34
+ frameworks: {},
35
+ scanResult: {
36
+ root: '/test',
37
+ files: [],
38
+ summary: { totalFiles: 0, totalLoc: 0, languages: [], totalCandidates: 0 },
39
+ },
40
+ };
41
+ }
42
+
43
+ // =============================================================================
44
+ // TESTS
45
+ // =============================================================================
46
+
47
+ describe('counterfactuals', () => {
48
+ describe('generateCounterfactuals', () => {
49
+ it('should generate model swap counterfactuals for GPT-4', () => {
50
+ const inferenceMap = createInferenceMap([
51
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4' }),
52
+ ]);
53
+
54
+ const result = generateCounterfactuals(inferenceMap);
55
+
56
+ const modelSwaps = result.counterfactuals.filter(cf => cf.type === 'model_swap');
57
+ expect(modelSwaps.length).toBeGreaterThan(0);
58
+
59
+ // Should suggest gpt-4o and gpt-4o-mini as alternatives
60
+ const suggestedModels = modelSwaps.map(cf => cf.proposedState.model);
61
+ expect(suggestedModels).toContain('gpt-4o');
62
+ });
63
+
64
+ it('should generate model swap counterfactuals for Claude Opus', () => {
65
+ const inferenceMap = createInferenceMap([
66
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'claude-3-opus' }),
67
+ ]);
68
+
69
+ const result = generateCounterfactuals(inferenceMap);
70
+
71
+ const modelSwaps = result.counterfactuals.filter(cf => cf.type === 'model_swap');
72
+ expect(modelSwaps.length).toBeGreaterThan(0);
73
+
74
+ // Should suggest Claude Sonnet/Haiku as alternatives
75
+ const suggestedModels = modelSwaps.map(cf => cf.proposedState.model);
76
+ expect(suggestedModels.some(m => m.includes('sonnet') || m.includes('haiku'))).toBe(true);
77
+ });
78
+
79
+ it('should generate batching counterfactuals for multiple unbatched callsites', () => {
80
+ const inferenceMap = createInferenceMap([
81
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4o', patterns: {} }),
82
+ createCallsite({ file: 'src/b.ts', line: 20, model: 'gpt-4o', patterns: {} }),
83
+ createCallsite({ file: 'src/c.ts', line: 30, model: 'gpt-4o', patterns: {} }),
84
+ ]);
85
+
86
+ const result = generateCounterfactuals(inferenceMap);
87
+
88
+ const batchingCfs = result.counterfactuals.filter(cf => cf.type === 'batch_optimization');
89
+ expect(batchingCfs.length).toBeGreaterThan(0);
90
+ });
91
+
92
+ it('should not generate batching counterfactuals for single callsite', () => {
93
+ const inferenceMap = createInferenceMap([
94
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4o', patterns: {} }),
95
+ ]);
96
+
97
+ const result = generateCounterfactuals(inferenceMap);
98
+
99
+ const batchingCfs = result.counterfactuals.filter(cf => cf.type === 'batch_optimization');
100
+ expect(batchingCfs.length).toBe(0);
101
+ });
102
+
103
+ it('should generate caching counterfactuals for uncached callsites', () => {
104
+ const inferenceMap = createInferenceMap([
105
+ createCallsite({ file: 'src/a.ts', line: 10, patterns: {} }),
106
+ ]);
107
+
108
+ const result = generateCounterfactuals(inferenceMap);
109
+
110
+ const cachingCfs = result.counterfactuals.filter(cf => cf.type === 'cache_addition');
111
+ expect(cachingCfs.length).toBe(1);
112
+ });
113
+
114
+ it('should not generate caching counterfactuals for cached callsites', () => {
115
+ const inferenceMap = createInferenceMap([
116
+ createCallsite({ file: 'src/a.ts', line: 10, patterns: { caching: true } }),
117
+ ]);
118
+
119
+ const result = generateCounterfactuals(inferenceMap);
120
+
121
+ const cachingCfs = result.counterfactuals.filter(cf => cf.type === 'cache_addition');
122
+ expect(cachingCfs.length).toBe(0);
123
+ });
124
+
125
+ it('should generate streaming counterfactuals for non-streaming callsites', () => {
126
+ const inferenceMap = createInferenceMap([
127
+ createCallsite({ file: 'src/a.ts', line: 10, patterns: {} }),
128
+ ]);
129
+
130
+ const result = generateCounterfactuals(inferenceMap);
131
+
132
+ const streamingCfs = result.counterfactuals.filter(cf => cf.type === 'streaming_enable');
133
+ expect(streamingCfs.length).toBe(1);
134
+ });
135
+
136
+ it('should not generate streaming counterfactuals for streaming callsites', () => {
137
+ const inferenceMap = createInferenceMap([
138
+ createCallsite({ file: 'src/a.ts', line: 10, patterns: { streaming: true } }),
139
+ ]);
140
+
141
+ const result = generateCounterfactuals(inferenceMap);
142
+
143
+ const streamingCfs = result.counterfactuals.filter(cf => cf.type === 'streaming_enable');
144
+ expect(streamingCfs.length).toBe(0);
145
+ });
146
+
147
+ it('should calculate impact for each counterfactual', () => {
148
+ const inferenceMap = createInferenceMap([
149
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4', patterns: {} }),
150
+ ]);
151
+
152
+ const result = generateCounterfactuals(inferenceMap);
153
+
154
+ for (const cf of result.counterfactuals) {
155
+ expect(cf.impact).toBeDefined();
156
+ expect(cf.impact.latencyDeltaPercent).toBeDefined();
157
+ expect(cf.impact.costDeltaPercent).toBeDefined();
158
+ }
159
+ });
160
+
161
+ it('should include tradeoffs for each counterfactual', () => {
162
+ const inferenceMap = createInferenceMap([
163
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4', patterns: {} }),
164
+ ]);
165
+
166
+ const result = generateCounterfactuals(inferenceMap);
167
+
168
+ for (const cf of result.counterfactuals) {
169
+ expect(cf.impact.tradeoffs).toBeDefined();
170
+ expect(Array.isArray(cf.impact.tradeoffs)).toBe(true);
171
+ }
172
+ });
173
+
174
+ it('should handle empty inference map', () => {
175
+ const inferenceMap = createInferenceMap([]);
176
+
177
+ const result = generateCounterfactuals(inferenceMap);
178
+
179
+ expect(result.counterfactuals.length).toBe(0);
180
+ expect(result.summary.totalOpportunities).toBe(0);
181
+ });
182
+
183
+ it('should calculate summary correctly', () => {
184
+ const inferenceMap = createInferenceMap([
185
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4', patterns: {} }),
186
+ ]);
187
+
188
+ const result = generateCounterfactuals(inferenceMap);
189
+
190
+ expect(result.summary.totalOpportunities).toBe(result.counterfactuals.length);
191
+ expect(result.summary.maxLatencySavingsPercent).toBeGreaterThanOrEqual(0);
192
+ });
193
+
194
+ it('should include affected points for each counterfactual', () => {
195
+ const callsite1 = createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4' });
196
+ const inferenceMap = createInferenceMap([callsite1]);
197
+
198
+ const result = generateCounterfactuals(inferenceMap);
199
+
200
+ for (const cf of result.counterfactuals) {
201
+ expect(cf.affectedPoints).toBeDefined();
202
+ expect(Array.isArray(cf.affectedPoints)).toBe(true);
203
+ }
204
+ });
205
+ });
206
+
207
+ describe('formatCounterfactualSummary', () => {
208
+ it('should format summary with opportunity count', () => {
209
+ const inferenceMap = createInferenceMap([
210
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4', patterns: {} }),
211
+ ]);
212
+
213
+ const result = generateCounterfactuals(inferenceMap);
214
+ const summary = formatCounterfactualSummary(result);
215
+
216
+ expect(summary).toContain('optimization opportunities');
217
+ });
218
+
219
+ it('should include latency savings in summary', () => {
220
+ const inferenceMap = createInferenceMap([
221
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4', patterns: {} }),
222
+ ]);
223
+
224
+ const result = generateCounterfactuals(inferenceMap);
225
+ const summary = formatCounterfactualSummary(result);
226
+
227
+ expect(summary).toContain('latency savings');
228
+ });
229
+ });
230
+
231
+ describe('hasSignificantOpportunities', () => {
232
+ it('should return true when high-impact opportunities exist', () => {
233
+ const inferenceMap = createInferenceMap([
234
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4', patterns: {} }),
235
+ ]);
236
+
237
+ const result = generateCounterfactuals(inferenceMap);
238
+
239
+ expect(hasSignificantOpportunities(result)).toBe(true);
240
+ });
241
+
242
+ it('should return false when no opportunities', () => {
243
+ const inferenceMap = createInferenceMap([]);
244
+
245
+ const result = generateCounterfactuals(inferenceMap);
246
+
247
+ expect(hasSignificantOpportunities(result)).toBe(false);
248
+ });
249
+ });
250
+
251
+ describe('rankCounterfactuals', () => {
252
+ it('should rank by latency when priority is latency', () => {
253
+ const inferenceMap = createInferenceMap([
254
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4', patterns: {} }),
255
+ createCallsite({ file: 'src/b.ts', line: 20, model: 'gpt-4', patterns: {} }),
256
+ ]);
257
+
258
+ const result = generateCounterfactuals(inferenceMap);
259
+ const ranked = rankCounterfactuals(result, 'latency');
260
+
261
+ // Should be sorted by latency delta (most negative first)
262
+ for (let i = 1; i < ranked.length; i++) {
263
+ expect(ranked[i].impact.latencyDeltaPercent)
264
+ .toBeGreaterThanOrEqual(ranked[i - 1].impact.latencyDeltaPercent);
265
+ }
266
+ });
267
+
268
+ it('should rank by cost when priority is cost', () => {
269
+ const inferenceMap = createInferenceMap([
270
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4', patterns: {} }),
271
+ createCallsite({ file: 'src/b.ts', line: 20, model: 'gpt-4', patterns: {} }),
272
+ ]);
273
+
274
+ const result = generateCounterfactuals(inferenceMap);
275
+ const ranked = rankCounterfactuals(result, 'cost');
276
+
277
+ // Should be sorted by cost delta (most negative first)
278
+ for (let i = 1; i < ranked.length; i++) {
279
+ expect(ranked[i].impact.costDeltaPercent)
280
+ .toBeGreaterThanOrEqual(ranked[i - 1].impact.costDeltaPercent);
281
+ }
282
+ });
283
+
284
+ it('should rank by combined score when priority is balanced', () => {
285
+ const inferenceMap = createInferenceMap([
286
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4', patterns: {} }),
287
+ createCallsite({ file: 'src/b.ts', line: 20, model: 'gpt-4', patterns: {} }),
288
+ ]);
289
+
290
+ const result = generateCounterfactuals(inferenceMap);
291
+ const ranked = rankCounterfactuals(result, 'balanced');
292
+
293
+ // Should be sorted by combined score (latency + cost)
294
+ for (let i = 1; i < ranked.length; i++) {
295
+ const scorePrev = ranked[i - 1].impact.latencyDeltaPercent + ranked[i - 1].impact.costDeltaPercent;
296
+ const scoreCurr = ranked[i].impact.latencyDeltaPercent + ranked[i].impact.costDeltaPercent;
297
+ expect(scoreCurr).toBeGreaterThanOrEqual(scorePrev);
298
+ }
299
+ });
300
+ });
301
+
302
+ describe('counterfactual types', () => {
303
+ it('should generate model_swap with correct structure', () => {
304
+ const inferenceMap = createInferenceMap([
305
+ createCallsite({ file: 'src/a.ts', line: 10, model: 'gpt-4' }),
306
+ ]);
307
+
308
+ const result = generateCounterfactuals(inferenceMap);
309
+ const modelSwap = result.counterfactuals.find(cf => cf.type === 'model_swap');
310
+
311
+ expect(modelSwap).toBeDefined();
312
+ expect(modelSwap?.currentState.model).toBe('gpt-4');
313
+ expect(modelSwap?.proposedState.model).toBeDefined();
314
+ expect(modelSwap?.confidence).toBeDefined();
315
+ expect(modelSwap?.effort).toBeDefined();
316
+ });
317
+
318
+ it('should generate cache_addition with correct structure', () => {
319
+ const inferenceMap = createInferenceMap([
320
+ createCallsite({ file: 'src/a.ts', line: 10, patterns: {} }),
321
+ ]);
322
+
323
+ const result = generateCounterfactuals(inferenceMap);
324
+ const caching = result.counterfactuals.find(cf => cf.type === 'cache_addition');
325
+
326
+ expect(caching).toBeDefined();
327
+ expect(caching?.currentState.pattern).toBe('no caching');
328
+ expect(caching?.proposedState.pattern).toBe('semantic cache');
329
+ expect(caching?.impact.costDeltaPercent).toBe(-50);
330
+ });
331
+
332
+ it('should generate streaming_enable with correct structure', () => {
333
+ const inferenceMap = createInferenceMap([
334
+ createCallsite({ file: 'src/a.ts', line: 10, patterns: {} }),
335
+ ]);
336
+
337
+ const result = generateCounterfactuals(inferenceMap);
338
+ const streaming = result.counterfactuals.find(cf => cf.type === 'streaming_enable');
339
+
340
+ expect(streaming).toBeDefined();
341
+ expect(streaming?.currentState.pattern).toBe('synchronous');
342
+ expect(streaming?.proposedState.pattern).toBe('streaming');
343
+ expect(streaming?.impact.latencyDeltaPercent).toBe(-80);
344
+ expect(streaming?.impact.costDeltaPercent).toBe(0); // Streaming doesn't affect cost
345
+ });
346
+ });
347
+ });
@@ -0,0 +1 @@
1
+ {"ts":"2024-01-01","provider":"openai","model":"gpt-4o","input_tokens":100,"output_tokens":50,"latency_ms":420}
@@ -0,0 +1 @@
1
+ {"id":"1","ts":"2024-01-01","provider":"openai","model":"gpt-4o","output_tokens":50,"latency_ms":420}
@@ -0,0 +1 @@
1
+ {"id":"1","ts":"2024-01-01","provider":"openai","model":"gpt-4o","input_tokens":100,"output_tokens":50}
@@ -0,0 +1 @@
1
+ {"id":"1","ts":"2024-01-01","provider":"openai","input_tokens":100,"output_tokens":50,"latency_ms":420}
@@ -0,0 +1 @@
1
+ {"id":"1","ts":"2024-01-01","provider":"openai","model":"gpt-4o","input_tokens":100,"latency_ms":420}
@@ -0,0 +1 @@
1
+ {"id":"1","ts":"2024-01-01","model":"gpt-4o","input_tokens":100,"output_tokens":50,"latency_ms":420}
@@ -0,0 +1 @@
1
+ {"id":"1","provider":"openai","model":"gpt-4o","input_tokens":100,"output_tokens":50,"latency_ms":420}
@@ -0,0 +1,3 @@
1
+ id,ts,provider,model,input_tokens,output_tokens,latency_ms
2
+ 1,2024-01-01T00:00:00Z,openai,gpt-4o,100,50,420
3
+ 2,2024-01-01T00:01:00Z,openai,gpt-4o,200,80,580
@@ -0,0 +1 @@
1
+ [{"id":"1","ts":"2024-01-01T00:00:00Z","provider":"anthropic","model":"claude-3-sonnet-20240229","input_tokens":150,"output_tokens":60,"latency_ms":350},{"id":"2","ts":"2024-01-01T00:01:00Z","provider":"anthropic","model":"claude-3-sonnet-20240229","input_tokens":180,"output_tokens":90,"latency_ms":400}]
@@ -0,0 +1,2 @@
1
+ {"id":"1","ts":"2024-01-01T00:00:00Z","provider":"openai","model":"gpt-4o","input_tokens":100,"output_tokens":50,"latency_ms":420}
2
+ {"id":"2","ts":"2024-01-01T00:01:00Z","provider":"openai","model":"gpt-4o","input_tokens":200,"output_tokens":80,"latency_ms":580}
@@ -0,0 +1 @@
1
+ {"id":"1","ts":"2024-01-01T00:00:00Z","provider":"openai","model":"gpt-4o","input_tokens":100,"output_tokens":50,"latency_ms":420,"callsite_id":"cs_001"}
@@ -0,0 +1 @@
1
+ {"id":"1","ts":"2024-01-01T00:00:00Z","provider":"openai","model":"gpt-4o","input_tokens":100,"output_tokens":50,"latency_ms":420,"intent":"chat"}
@@ -0,0 +1 @@
1
+ {"id":"1","ts":"2024-01-01","provider":"openai","model":"gpt-4o","input_tokens":100,"output_tokens":50,"latency_ms":"fast"}
File without changes
@@ -0,0 +1,35 @@
1
+ from openai import OpenAI
2
+ from anthropic import Anthropic
3
+ import os
4
+
5
+ openai_client = OpenAI()
6
+ anthropic_client = Anthropic()
7
+
8
+ PROVIDER = os.getenv("LLM_PROVIDER", "openai")
9
+
10
+ def route_completion(prompt: str, use_cache: bool = True) -> str:
11
+ if PROVIDER == "anthropic":
12
+ return _anthropic_completion(prompt)
13
+ return _openai_completion(prompt)
14
+
15
+ def _openai_completion(prompt: str) -> str:
16
+ response = openai_client.chat.completions.create(
17
+ model="gpt-4o",
18
+ messages=[{"role": "user", "content": prompt}],
19
+ stream=True
20
+ )
21
+ return response.choices[0].message.content
22
+
23
+ def _anthropic_completion(prompt: str) -> str:
24
+ response = anthropic_client.messages.create(
25
+ model="claude-3-sonnet-20240229",
26
+ max_tokens=1024,
27
+ messages=[{"role": "user", "content": prompt}]
28
+ )
29
+ return response.content[0].text
30
+
31
+ def with_fallback(prompt: str) -> str:
32
+ try:
33
+ return _openai_completion(prompt)
34
+ except Exception:
35
+ return _anthropic_completion(prompt)
@@ -0,0 +1,27 @@
1
+ import Anthropic from "@anthropic-ai/sdk";
2
+
3
+ const client = new Anthropic();
4
+
5
+ export async function runAgent(query: string): Promise<string> {
6
+ const response = await client.messages.create({
7
+ model: "claude-3-sonnet-20240229",
8
+ max_tokens: 1024,
9
+ messages: [{ role: "user", content: query }],
10
+ });
11
+
12
+ return response.content[0].type === "text" ? response.content[0].text : "";
13
+ }
14
+
15
+ export async function streamResponse(query: string): AsyncGenerator<string> {
16
+ const stream = await client.messages.stream({
17
+ model: "claude-3-opus-20240229",
18
+ max_tokens: 4096,
19
+ messages: [{ role: "user", content: query }],
20
+ });
21
+
22
+ for await (const chunk of stream) {
23
+ if (chunk.type === "content_block_delta") {
24
+ yield chunk.delta.text;
25
+ }
26
+ }
27
+ }
@@ -0,0 +1,33 @@
1
+ const OpenAI = require('openai');
2
+
3
+ const client = new OpenAI();
4
+
5
+ async function chat(userMessage) {
6
+ const response = await client.chat.completions.create({
7
+ model: 'gpt-4o',
8
+ messages: [
9
+ { role: 'system', content: 'You are a helpful assistant.' },
10
+ { role: 'user', content: userMessage }
11
+ ],
12
+ stream: true
13
+ });
14
+
15
+ let fullResponse = '';
16
+ for await (const chunk of response) {
17
+ const delta = chunk.choices[0]?.delta?.content || '';
18
+ fullResponse += delta;
19
+ process.stdout.write(delta);
20
+ }
21
+
22
+ return fullResponse;
23
+ }
24
+
25
+ async function embed(text) {
26
+ const response = await client.embeddings.create({
27
+ model: 'text-embedding-ada-002',
28
+ input: text
29
+ });
30
+ return response.data[0].embedding;
31
+ }
32
+
33
+ module.exports = { chat, embed };
@@ -0,0 +1,26 @@
1
+ from openai import OpenAI
2
+
3
+ client = OpenAI()
4
+
5
+ def chat_completion(prompt: str) -> str:
6
+ response = client.chat.completions.create(
7
+ model="gpt-4o",
8
+ messages=[{"role": "user", "content": prompt}],
9
+ stream=True
10
+ )
11
+ return response.choices[0].message.content
12
+
13
+ def batch_embeddings(texts: list[str]) -> list[list[float]]:
14
+ response = client.embeddings.create(
15
+ model="text-embedding-3-small",
16
+ input=texts
17
+ )
18
+ return [item.embedding for item in response.data]
19
+
20
+ async def async_completion(prompt: str) -> str:
21
+ response = await client.chat.completions.create(
22
+ model="gpt-3.5-turbo",
23
+ messages=[{"role": "user", "content": prompt}],
24
+ max_retries=3
25
+ )
26
+ return response.choices[0].message.content
@@ -0,0 +1,22 @@
1
+ from openai import OpenAI
2
+
3
+ # vLLM OpenAI-compatible server
4
+ client = OpenAI(
5
+ base_url="http://localhost:8000/v1",
6
+ api_key="dummy"
7
+ )
8
+
9
+ def generate(prompt: str, max_tokens: int = 512) -> str:
10
+ response = client.completions.create(
11
+ model="meta-llama/Llama-3-70b-chat-hf",
12
+ prompt=prompt,
13
+ max_tokens=max_tokens,
14
+ temperature=0.7
15
+ )
16
+ return response.choices[0].text
17
+
18
+ def batch_generate(prompts: list[str]) -> list[str]:
19
+ results = []
20
+ for prompt in prompts:
21
+ results.append(generate(prompt))
22
+ return results