@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
package/src/types.ts ADDED
@@ -0,0 +1,873 @@
1
+ import { z } from 'zod';
2
+
3
+ // =============================================================================
4
+ // ENUMS
5
+ // =============================================================================
6
+
7
+ export const Provider = z.enum([
8
+ 'openai', 'anthropic', 'google', 'cohere', 'mistral',
9
+ 'bedrock', 'azure_openai', 'together', 'fireworks',
10
+ 'groq', 'replicate', 'perplexity',
11
+ 'vllm', 'sglang', 'tgi', 'ollama', 'llamacpp',
12
+ 'unknown'
13
+ ]);
14
+
15
+ export const Severity = z.enum(['critical', 'warning', 'info']);
16
+
17
+ export const Category = z.enum([
18
+ 'cost', 'latency', 'drift', 'reliability', 'waste', 'throughput', 'security', 'best-practice'
19
+ ]);
20
+
21
+ // =============================================================================
22
+ // STATIC ANALYSIS
23
+ // =============================================================================
24
+
25
+ export const Patterns = z.object({
26
+ streaming: z.boolean().optional(),
27
+ batching: z.boolean().optional(),
28
+ retries: z.boolean().optional(),
29
+ caching: z.boolean().optional(),
30
+ fallback: z.boolean().optional(),
31
+ });
32
+
33
+ export const Callsite = z.object({
34
+ id: z.string(),
35
+ file: z.string(),
36
+ line: z.number(),
37
+ provider: Provider.nullable(),
38
+ model: z.string().nullable(),
39
+ framework: z.string().nullable(),
40
+ runtime: z.string().nullable(),
41
+ patterns: Patterns,
42
+ confidence: z.number().min(0).max(1),
43
+ });
44
+
45
+ export const ScanCandidate = z.object({
46
+ file: z.string(),
47
+ line: z.number(),
48
+ snippet: z.string(),
49
+ });
50
+
51
+ export const ScannedFile = z.object({
52
+ path: z.string(),
53
+ language: z.string(),
54
+ loc: z.number(),
55
+ });
56
+
57
+ export const ScanResult = z.object({
58
+ root: z.string(),
59
+ files: z.array(ScannedFile),
60
+ candidates: z.array(ScanCandidate),
61
+ summary: z.object({
62
+ totalFiles: z.number(),
63
+ totalLoc: z.number(),
64
+ languages: z.array(z.string()),
65
+ totalCandidates: z.number(),
66
+ // v1.9.5: Cost optimization stats
67
+ skippedLargeFiles: z.number().optional(),
68
+ skippedByPattern: z.number().optional(),
69
+ }),
70
+ });
71
+
72
+ export const InferenceMap = z.object({
73
+ version: z.string(),
74
+ root: z.string(),
75
+ generatedAt: z.string(),
76
+ // Report metadata
77
+ metadata: z.object({
78
+ absolutePath: z.string(), // Full absolute path analyzed
79
+ promptId: z.string().optional(), // Which analysis prompt was used
80
+ promptVersion: z.string().optional(), // Analysis prompt version
81
+ templatesVersion: z.string().optional(), // peakinfer-templates version
82
+ llmProvider: z.string().optional(), // LLM provider used (anthropic, none)
83
+ llmModel: z.string().optional(), // LLM model used for analysis
84
+ }).optional(),
85
+ summary: z.object({
86
+ totalCallsites: z.number(),
87
+ providers: z.array(z.string()),
88
+ models: z.array(z.string()),
89
+ patterns: z.record(z.number()),
90
+ }),
91
+ callsites: z.array(Callsite),
92
+ });
93
+
94
+ // =============================================================================
95
+ // RUNTIME ANALYSIS
96
+ // =============================================================================
97
+
98
+ export const InferenceEvent = z.object({
99
+ id: z.string(),
100
+ ts: z.string(),
101
+ provider: Provider,
102
+ model: z.string(),
103
+ input_tokens: z.number(),
104
+ output_tokens: z.number(),
105
+ latency_ms: z.number(),
106
+ intent: z.string().optional(),
107
+ callsite_id: z.string().optional(),
108
+ // Runtime pattern fields for drift detection
109
+ streaming: z.boolean().optional(), // Was this a streaming request?
110
+ ttft_ms: z.number().optional(), // Time to first token (streaming only)
111
+ batch_size: z.number().optional(), // If part of a batch, how many requests?
112
+ batch_id: z.string().optional(), // Group ID for batched requests
113
+ cached: z.boolean().optional(), // Was response served from cache?
114
+ retry_count: z.number().optional(), // Number of retries before success
115
+ fallback_used: z.boolean().optional(), // Was a fallback provider/model used?
116
+ original_model: z.string().optional(), // If fallback, what was the original model?
117
+ });
118
+
119
+ export const ProviderStats = z.object({
120
+ calls: z.number(),
121
+ tokens_in: z.number(),
122
+ tokens_out: z.number(),
123
+ latency_p50: z.number(),
124
+ latency_p95: z.number(),
125
+ latency_p99: z.number(),
126
+ });
127
+
128
+ export const RuntimeSummary = z.object({
129
+ totalEvents: z.number(),
130
+ byProvider: z.record(ProviderStats),
131
+ byModel: z.record(ProviderStats),
132
+ global: z.object({
133
+ p50: z.number(),
134
+ p95: z.number(),
135
+ p99: z.number(),
136
+ }),
137
+ });
138
+
139
+ // =============================================================================
140
+ // JOINED OUTPUT
141
+ // =============================================================================
142
+
143
+ export const UsageStats = z.object({
144
+ calls: z.number(),
145
+ tokens_in: z.number(),
146
+ tokens_out: z.number(),
147
+ latency_p50: z.number(),
148
+ latency_p95: z.number(),
149
+ latency_p99: z.number(),
150
+ });
151
+
152
+ export const DriftSignal = z.object({
153
+ type: z.enum(['codeOnly', 'runtimeOnly', 'mismatch', 'patternDrift']),
154
+ provider: z.string().optional(),
155
+ model: z.string().optional(),
156
+ callsiteId: z.string().optional(),
157
+ message: z.string(),
158
+ });
159
+
160
+ export const EnrichedCallsite = Callsite.extend({
161
+ usage: UsageStats.optional(),
162
+ });
163
+
164
+ export const JoinedOutput = z.object({
165
+ callsites: z.array(EnrichedCallsite),
166
+ codeOnly: z.array(Callsite),
167
+ runtimeOnly: z.array(InferenceEvent),
168
+ drift: z.array(DriftSignal),
169
+ });
170
+
171
+ // =============================================================================
172
+ // TEMPLATES & INSIGHTS
173
+ // =============================================================================
174
+
175
+ export const TemplateCondition = z.object({
176
+ field: z.string(),
177
+ op: z.enum(['eq', 'neq', 'gt', 'lt', 'gte', 'lte', 'exists', 'in', 'ratio_gt', 'ratio_lt', 'has_pattern']),
178
+ value: z.union([z.string(), z.number(), z.boolean(), z.array(z.string())]).optional(),
179
+ compare_to: z.string().optional(),
180
+ pattern: z.string().optional(),
181
+ count_gt: z.number().optional(),
182
+ });
183
+
184
+ export const InsightTemplate = z.object({
185
+ id: z.string(),
186
+ name: z.string(),
187
+ version: z.string(),
188
+ category: Category,
189
+ severity: Severity,
190
+ layer: z.enum(['application', 'api', 'gateway', 'runtime', 'model', 'hardware']).optional(), // v1.8: 6-layer architecture
191
+ match: z.object({
192
+ scope: z.enum(['callsite', 'joined', 'global', 'envelope']),
193
+ conditions: z.array(TemplateCondition),
194
+ }),
195
+ output: z.object({
196
+ headline: z.string(),
197
+ evidence: z.string(),
198
+ }),
199
+ defaults: z.record(z.number()).optional(),
200
+ });
201
+
202
+ // =============================================================================
203
+ // COMMUNITY OPTIMIZATION TEMPLATES (v1.8 - Inference Squeeze Guide)
204
+ // =============================================================================
205
+
206
+ /**
207
+ * Optimization template category - matches Inference Squeeze Guide structure
208
+ */
209
+ export const OptimizationCategory = z.enum([
210
+ 'runtime_optimization', // PyTorch to ONNX, vLLM, TensorRT
211
+ 'batching_optimization', // Continuous batching, batch sizing
212
+ 'memory_optimization', // Quantization, KV cache
213
+ 'application_optimization', // Model routing, context management
214
+ 'cost_optimization', // Budget controls, cost allocation
215
+ 'monitoring', // APM, quality monitoring, A/B testing
216
+ 'scaling', // Auto-scaling, multi-GPU
217
+ ]);
218
+
219
+ /**
220
+ * Risk level for optimization implementation
221
+ */
222
+ export const OptimizationRiskLevel = z.enum(['low', 'medium', 'high']);
223
+
224
+ /**
225
+ * Implementation step with validation and rollback
226
+ */
227
+ export const ImplementationStep = z.object({
228
+ step_id: z.string(),
229
+ name: z.string(),
230
+ executable: z.boolean().optional(),
231
+ commands: z.array(z.string()).optional(),
232
+ validation: z.object({
233
+ command: z.string().optional(),
234
+ success_criteria: z.string().optional(),
235
+ rollback_command: z.string().optional(),
236
+ }).optional(),
237
+ });
238
+
239
+ /**
240
+ * Monitoring metric configuration
241
+ */
242
+ export const MonitoringMetric = z.object({
243
+ metric: z.string(),
244
+ target: z.string(),
245
+ alert_threshold: z.string(),
246
+ });
247
+
248
+ /**
249
+ * Rollback trigger configuration
250
+ */
251
+ export const RollbackTrigger = z.object({
252
+ condition: z.string(),
253
+ action: z.string(),
254
+ });
255
+
256
+ /**
257
+ * Community Optimization Template - runbook-style templates from Inference Squeeze Guide
258
+ * These templates provide step-by-step implementation guides with ROI estimates
259
+ */
260
+ export const OptimizationTemplate = z.object({
261
+ id: z.string(),
262
+ name: z.string(),
263
+ description: z.string(),
264
+ category: OptimizationCategory,
265
+ confidence: z.number().min(0).max(1),
266
+ success_count: z.number().optional(),
267
+ verified_environments: z.number().optional(),
268
+ contributors: z.array(z.string()).optional(),
269
+ last_updated: z.string().optional(),
270
+
271
+ // Environment matching criteria
272
+ environment_match: z.record(z.union([z.string(), z.boolean(), z.array(z.string())])).optional(),
273
+
274
+ // Optimization details
275
+ optimization: z.object({
276
+ technique: z.string(),
277
+ expected_cost_reduction: z.string().optional(),
278
+ expected_latency_improvement: z.string().optional(),
279
+ expected_throughput_improvement: z.string().optional(),
280
+ expected_memory_reduction: z.string().optional(),
281
+ expected_quality_retention: z.string().optional(),
282
+ effort_estimate: z.string(),
283
+ risk_level: OptimizationRiskLevel,
284
+ }),
285
+
286
+ // Economics and ROI
287
+ economics: z.object({
288
+ baseline_calculation: z.record(z.union([z.string(), z.number()])).optional(),
289
+ projected_improvement: z.record(z.union([z.string(), z.number()])).optional(),
290
+ projected_savings: z.record(z.union([z.string(), z.number()])).optional(),
291
+ implementation_cost: z.object({
292
+ engineering_hours: z.number().optional(),
293
+ hourly_rate: z.number().optional(),
294
+ compute_hours: z.number().optional(),
295
+ total_cost: z.number(),
296
+ }).optional(),
297
+ roi_calculation: z.record(z.string()).optional(),
298
+ }).optional(),
299
+
300
+ // Implementation steps
301
+ implementation: z.object({
302
+ prerequisites: z.array(z.object({
303
+ requirement: z.string(),
304
+ validation_command: z.string().optional(),
305
+ })).optional(),
306
+ automated_steps: z.array(ImplementationStep).optional(),
307
+ }).optional(),
308
+
309
+ // Monitoring configuration
310
+ monitoring: z.object({
311
+ key_metrics: z.array(MonitoringMetric).optional(),
312
+ rollback_triggers: z.array(RollbackTrigger).optional(),
313
+ }).optional(),
314
+
315
+ // Historical results
316
+ results: z.object({
317
+ recent_implementations: z.array(z.record(z.union([z.string(), z.number()]))).optional(),
318
+ }).optional(),
319
+ });
320
+
321
+ // Stack layers for impact analysis (TDD v1.7 - 6-layer architecture)
322
+ export const StackLayer = z.enum([
323
+ 'application', // Code patterns: streaming-drift, overpowered-model, cost-concentration
324
+ 'api', // API layer: retry-explosion, untested-fallback, rate limiting
325
+ 'gateway', // Gateway/proxy layer: caching, load balancing, routing
326
+ 'runtime', // Inference engines: vLLM, sglang, TGI optimizations
327
+ 'model', // Model selection: GPT-4 vs GPT-3.5, context-accumulation, token-underutilization
328
+ 'hardware', // Hardware layer: GPU optimization, memory management
329
+ ]);
330
+
331
+ // Impact metrics
332
+ export const ImpactType = z.enum(['cost', 'latency', 'throughput']);
333
+
334
+ // Effort level for implementing the change
335
+ export const EffortLevel = z.enum(['low', 'medium', 'high']);
336
+
337
+ // Impact estimation for each insight
338
+ export const ImpactEstimate = z.object({
339
+ layer: StackLayer,
340
+ impactType: ImpactType,
341
+ estimatedImpactPercent: z.number().min(0).max(100), // 0-100% improvement
342
+ effort: EffortLevel,
343
+ annualSavingsUSD: z.number().optional(), // Estimated annual savings in USD
344
+ latencyReductionMs: z.number().optional(), // Estimated latency improvement
345
+ throughputGainPercent: z.number().optional(), // Estimated throughput improvement
346
+ confidence: z.number().min(0).max(1).optional(), // Confidence in estimate (0-1)
347
+ assumptions: z.string().optional(), // Key assumptions for this estimate
348
+ });
349
+
350
+ export const Insight = z.object({
351
+ id: z.string().optional(), // Unique insight ID
352
+ severity: Severity,
353
+ category: Category,
354
+ templateId: z.string().optional(), // Optional for LLM-generated insights
355
+ headline: z.string(),
356
+ evidence: z.string(),
357
+ location: z.string().optional(),
358
+ recommendation: z.string().optional(), // Actionable suggestion
359
+ source: z.enum(['template', 'llm']).optional(), // 'template' = pattern-based, 'llm' = semantic analysis
360
+ // Impact estimation fields
361
+ impact: ImpactEstimate.optional(), // Estimated impact of implementing this recommendation
362
+ // CodeRabbit-style fix fields (v1.6 - LLM-generated)
363
+ originalCode: z.string().optional(), // Exact code line(s) that need to change
364
+ suggestedFix: z.string().optional(), // Complete replacement code
365
+ aiAgentPrompt: z.string().optional(), // Instructions for AI agents like Copilot
366
+ fullLineFix: z.string().optional(), // Full line replacement for suggestion syntax
367
+ });
368
+
369
+ // =============================================================================
370
+ // INFERENCE MAX ENVELOPES
371
+ // =============================================================================
372
+
373
+ export const PerformanceEnvelope = z.object({
374
+ ttft_p50_ms: z.number(),
375
+ ttft_p95_ms: z.number(),
376
+ tps_median: z.number(),
377
+ tps_peak: z.number(),
378
+ });
379
+
380
+ // =============================================================================
381
+ // AGENT PLANNING
382
+ // =============================================================================
383
+
384
+ export const TaskType = z.enum([
385
+ 'scan', 'analyze', 'parse_events', 'join',
386
+ 'load_templates', 'generate_insights', 'render', 'generate_html', 'generate_pdf', 'save_artifacts',
387
+ 'save_history', // v1.5: Save run to history for comparison/prediction
388
+ 'compare', // v1.5: Compare with previous run
389
+ 'predict', // v1.5: Generate deploy-time predictions
390
+ 'counterfactuals', // v1.5: Generate what-if optimization scenarios
391
+ ]);
392
+
393
+ export const PlannedTask = z.object({
394
+ id: z.number(),
395
+ type: TaskType,
396
+ description: z.string(),
397
+ depends_on: z.array(z.number()).optional(),
398
+ });
399
+
400
+ export const ExecutionPlan = z.object({
401
+ mode: z.enum(['static', 'runtime', 'combined']),
402
+ tasks: z.array(PlannedTask),
403
+ });
404
+
405
+ export const TaskResult = z.object({
406
+ taskId: z.number(),
407
+ status: z.enum(['success', 'failed', 'skipped']),
408
+ error: z.string().optional(),
409
+ durationMs: z.number(),
410
+ });
411
+
412
+ // =============================================================================
413
+ // TYPE EXPORTS
414
+ // =============================================================================
415
+
416
+ export type Provider = z.infer<typeof Provider>;
417
+ export type Severity = z.infer<typeof Severity>;
418
+ export type Category = z.infer<typeof Category>;
419
+ export type Patterns = z.infer<typeof Patterns>;
420
+ export type CallsitePatterns = Patterns; // Alias for analyzer
421
+ export type Callsite = z.infer<typeof Callsite>;
422
+ export type ScanCandidate = z.infer<typeof ScanCandidate>;
423
+ export type ScannedFile = z.infer<typeof ScannedFile>;
424
+ export type ScanResult = z.infer<typeof ScanResult>;
425
+ export type InferenceMap = z.infer<typeof InferenceMap>;
426
+ export type InferenceEvent = z.infer<typeof InferenceEvent>;
427
+ export type ProviderStats = z.infer<typeof ProviderStats>;
428
+ export type RuntimeSummary = z.infer<typeof RuntimeSummary>;
429
+ export type UsageStats = z.infer<typeof UsageStats>;
430
+ export type DriftSignal = z.infer<typeof DriftSignal>;
431
+ export type EnrichedCallsite = z.infer<typeof EnrichedCallsite>;
432
+ export type JoinedOutput = z.infer<typeof JoinedOutput>;
433
+ export type TemplateCondition = z.infer<typeof TemplateCondition>;
434
+ export type InsightTemplate = z.infer<typeof InsightTemplate>;
435
+ export type OptimizationTemplate = z.infer<typeof OptimizationTemplate>;
436
+ export type OptimizationCategory = z.infer<typeof OptimizationCategory>;
437
+ export type OptimizationRiskLevel = z.infer<typeof OptimizationRiskLevel>;
438
+ export type StackLayer = z.infer<typeof StackLayer>;
439
+ export type ImpactType = z.infer<typeof ImpactType>;
440
+ export type EffortLevel = z.infer<typeof EffortLevel>;
441
+ export type ImpactEstimate = z.infer<typeof ImpactEstimate>;
442
+ export type Insight = z.infer<typeof Insight>;
443
+ export type PerformanceEnvelope = z.infer<typeof PerformanceEnvelope>;
444
+ export type TaskType = z.infer<typeof TaskType>;
445
+ export type PlannedTask = z.infer<typeof PlannedTask>;
446
+ export type ExecutionPlan = z.infer<typeof ExecutionPlan>;
447
+ export type TaskResult = z.infer<typeof TaskResult>;
448
+
449
+ // =============================================================================
450
+ // FORMAT DETECTION & NORMALIZATION (PRD §6.4)
451
+ // =============================================================================
452
+
453
+ /**
454
+ * Supported format types for runtime event files.
455
+ * Direct-parse formats are handled without LLM, agent-normalized formats require semantic analysis.
456
+ */
457
+ export const FormatType = z.enum([
458
+ // Direct-parse formats (no LLM needed)
459
+ 'jsonl', // Newline-delimited JSON with InferenceEvent schema
460
+ 'json_array', // JSON array of InferenceEvent objects
461
+ 'csv', // CSV with standard column names
462
+ 'tsv', // TSV with standard column names
463
+
464
+ // Agent-normalized formats (require semantic analysis)
465
+ 'otel', // OpenTelemetry OTLP traces/spans
466
+ 'jaeger', // Jaeger distributed tracing format
467
+ 'zipkin', // Zipkin tracing format
468
+ 'langsmith', // LangSmith trace exports
469
+ 'helicone', // Helicone proxy logs
470
+ 'wandb', // Weights & Biases inference logs
471
+ 'litellm', // LiteLLM proxy event logs
472
+ 'portkey', // Portkey gateway logs
473
+
474
+ // Inferred formats (heuristic detection)
475
+ 'custom_json', // Unknown JSON structure requiring field mapping
476
+ 'custom_text', // Structured text logs
477
+ 'unknown', // Could not determine format
478
+ ]);
479
+
480
+ /**
481
+ * Extraction strategy for a field mapping.
482
+ */
483
+ export const ExtractionType = z.enum([
484
+ 'direct', // Direct field access (e.g., obj.field)
485
+ 'jsonpath', // JSONPath expression
486
+ 'column', // CSV/TSV column name
487
+ 'regex', // Regular expression extraction
488
+ 'computed', // Computed from other fields (e.g., latency = end - start)
489
+ 'constant', // Fixed value for all events
490
+ ]);
491
+
492
+ /**
493
+ * Transformation to apply after extraction.
494
+ */
495
+ export const TransformType = z.enum([
496
+ 'none', // No transformation
497
+ 'unix_ms_to_iso', // Unix milliseconds to ISO timestamp
498
+ 'unix_s_to_iso', // Unix seconds to ISO timestamp
499
+ 'unix_nano_to_iso', // Unix nanoseconds to ISO timestamp
500
+ 'duration_to_ms', // Duration string (e.g., "1.5s") to milliseconds
501
+ 'parse_int', // String to integer
502
+ 'parse_float', // String to float
503
+ 'lowercase', // Lowercase string
504
+ 'provider_normalize', // Normalize provider names (e.g., "OpenAI" -> "openai")
505
+ ]);
506
+
507
+ /**
508
+ * Field mapping from source format to InferenceEvent schema.
509
+ */
510
+ export const FieldMapping = z.object({
511
+ target: z.string(), // InferenceEvent field name
512
+ source_path: z.string(), // JSONPath, column name, regex, or expression
513
+ extraction_type: ExtractionType,
514
+ transform: TransformType.optional().default('none'),
515
+ confidence: z.number().min(0).max(1), // Confidence in this mapping (0-1)
516
+ evidence: z.string().optional(), // Why this mapping was chosen
517
+ });
518
+
519
+ /**
520
+ * Result of format detection.
521
+ */
522
+ export const FormatDetectionResult = z.object({
523
+ format_type: FormatType,
524
+ confidence: z.number().min(0).max(1), // Overall detection confidence
525
+ evidence: z.string(), // Explanation of detection
526
+ sample_size: z.number(), // Number of lines/records sampled
527
+ requires_agent: z.boolean(), // Whether agent normalization is needed
528
+ });
529
+
530
+ /**
531
+ * Complete normalization result with field mappings.
532
+ */
533
+ export const NormalizationResult = z.object({
534
+ detection: FormatDetectionResult,
535
+ mappings: z.array(FieldMapping),
536
+ unmapped_fields: z.array(z.string()), // Source fields not mapped
537
+ warnings: z.array(z.string()), // Issues encountered during normalization
538
+ audit: z.object({
539
+ normalized_at: z.string(), // ISO timestamp
540
+ agent_used: z.boolean(),
541
+ codebase_context_used: z.boolean(),
542
+ llm_model: z.string().optional(),
543
+ }),
544
+ });
545
+
546
+ /**
547
+ * Options for format normalization.
548
+ */
549
+ export const NormalizationOptions = z.object({
550
+ format_hint: FormatType.optional(), // User-provided format hint
551
+ field_hints: z.record(z.string()).optional(), // User-provided field mappings
552
+ lenient: z.boolean().optional(), // Accept low-confidence mappings
553
+ strict: z.boolean().optional(), // Fail on missing required fields
554
+ codebase_context: z.any().optional(), // ScanResult for codebase-aware normalization
555
+ });
556
+
557
+ // Type exports for format detection
558
+ export type FormatType = z.infer<typeof FormatType>;
559
+ export type ExtractionType = z.infer<typeof ExtractionType>;
560
+ export type TransformType = z.infer<typeof TransformType>;
561
+ export type FieldMapping = z.infer<typeof FieldMapping>;
562
+ export type FormatDetectionResult = z.infer<typeof FormatDetectionResult>;
563
+ export type NormalizationResult = z.infer<typeof NormalizationResult>;
564
+ export type NormalizationOptions = z.infer<typeof NormalizationOptions>;
565
+
566
+ // =============================================================================
567
+ // HISTORY STORAGE (v1.5)
568
+ // =============================================================================
569
+
570
+ /**
571
+ * Analysis type for categorizing runs.
572
+ */
573
+ export const AnalysisType = z.enum(['static', 'runtime', 'combined']);
574
+
575
+ /**
576
+ * History manifest for tracking analysis runs over time.
577
+ * Distinct from runid.ts RunManifest which focuses on caching/resumability.
578
+ * This schema enables historical comparison and deploy-time prediction features.
579
+ */
580
+ export const HistoryManifest = z.object({
581
+ runId: z.string(), // Unique run identifier
582
+ timestamp: z.string().datetime(), // ISO timestamp when analysis completed
583
+ path: z.string(), // Analyzed path (absolute, for matching)
584
+ pathHash: z.string(), // Hash of normalized path for efficient lookup
585
+ analysisType: AnalysisType, // Type of analysis performed
586
+ version: z.string(), // PeakInfer version that produced this run
587
+
588
+ // Summary metrics for quick comparison
589
+ inferencePointCount: z.number(), // Number of inference points detected
590
+ eventCount: z.number().optional(), // Number of runtime events (if runtime/combined)
591
+ driftCount: z.number().optional(), // Number of drift signals (if combined)
592
+ insightCount: z.number().optional(), // Number of insights generated
593
+
594
+ // Performance context
595
+ durationMs: z.number().optional(), // Analysis duration in milliseconds
596
+
597
+ // Artifact paths relative to history directory
598
+ artifacts: z.object({
599
+ inferenceMap: z.string().optional(), // inference-map.json
600
+ analysis: z.string().optional(), // analysis.json (full results)
601
+ html: z.string().optional(), // report.html
602
+ pdf: z.string().optional(), // report.pdf
603
+ }).optional(),
604
+ });
605
+
606
+ /**
607
+ * Index of all historical runs for a project path.
608
+ * Stored at .peakinfer/history/index.json
609
+ */
610
+ export const HistoryIndex = z.object({
611
+ version: z.string(), // History format version
612
+ lastUpdated: z.string().datetime(), // Last index update
613
+ runs: z.array(z.object({
614
+ runId: z.string(),
615
+ timestamp: z.string().datetime(),
616
+ pathHash: z.string(),
617
+ analysisType: AnalysisType,
618
+ inferencePointCount: z.number(),
619
+ })),
620
+ });
621
+
622
+ // Type exports for history
623
+ export type AnalysisType = z.infer<typeof AnalysisType>;
624
+ export type HistoryManifest = z.infer<typeof HistoryManifest>;
625
+ export type HistoryIndex = z.infer<typeof HistoryIndex>;
626
+
627
+ // =============================================================================
628
+ // HISTORICAL COMPARISON (v1.5)
629
+ // =============================================================================
630
+
631
+ /**
632
+ * Change type for tracking what changed between runs.
633
+ */
634
+ export const ChangeType = z.enum(['added', 'removed', 'modified']);
635
+
636
+ /**
637
+ * A single field change within an inference point.
638
+ */
639
+ export const FieldChange = z.object({
640
+ field: z.string(), // Field name that changed
641
+ before: z.unknown(), // Previous value
642
+ after: z.unknown(), // New value
643
+ });
644
+
645
+ /**
646
+ * An inference point that changed between runs.
647
+ */
648
+ export const ChangedInferencePoint = z.object({
649
+ point: Callsite, // The inference point
650
+ changes: z.array(FieldChange), // List of field changes
651
+ });
652
+
653
+ /**
654
+ * Result of comparing two analysis runs.
655
+ * Enables "what changed" insights for pre-deploy validation.
656
+ */
657
+ export const ComparisonResult = z.object({
658
+ baseRunId: z.string(), // The baseline run ID
659
+ baseTimestamp: z.string().datetime(), // When baseline was created
660
+ currentRunId: z.string(), // The current run ID
661
+ currentTimestamp: z.string().datetime(), // When current was created
662
+
663
+ // Inference point changes
664
+ added: z.array(Callsite), // New inference points
665
+ removed: z.array(Callsite), // Removed inference points
666
+ changed: z.array(ChangedInferencePoint), // Modified inference points
667
+
668
+ // Summary metrics
669
+ metrics: z.object({
670
+ totalBefore: z.number(), // Inference points in baseline
671
+ totalAfter: z.number(), // Inference points in current
672
+ addedCount: z.number(), // Count of added points
673
+ removedCount: z.number(), // Count of removed points
674
+ changedCount: z.number(), // Count of modified points
675
+ netChange: z.number(), // Net change (added - removed)
676
+ }),
677
+
678
+ // Insight deltas
679
+ insightDeltas: z.object({
680
+ newCritical: z.number(), // New critical insights
681
+ resolvedCritical: z.number(), // Resolved critical insights
682
+ newWarnings: z.number(), // New warnings
683
+ resolvedWarnings: z.number(), // Resolved warnings
684
+ }).optional(),
685
+ });
686
+
687
+ // Type exports for comparison
688
+ export type ChangeType = z.infer<typeof ChangeType>;
689
+ export type FieldChange = z.infer<typeof FieldChange>;
690
+ export type ChangedInferencePoint = z.infer<typeof ChangedInferencePoint>;
691
+ export type ComparisonResult = z.infer<typeof ComparisonResult>;
692
+
693
+ // =============================================================================
694
+ // DEPLOY-TIME PREDICTION (v1.5)
695
+ // =============================================================================
696
+
697
+ /**
698
+ * Risk level for predictions.
699
+ */
700
+ export const RiskLevel = z.enum(['high', 'medium', 'low', 'neutral']);
701
+
702
+ /**
703
+ * Impact direction for a prediction factor.
704
+ */
705
+ export const ImpactDirection = z.enum(['positive', 'negative', 'neutral']);
706
+
707
+ /**
708
+ * A factor contributing to a latency prediction.
709
+ */
710
+ export const PredictionFactor = z.object({
711
+ name: z.string(), // Factor name (e.g., "model complexity")
712
+ impact: ImpactDirection, // How it affects latency
713
+ description: z.string(), // Human-readable explanation
714
+ weight: z.number().min(0).max(1).optional(), // Relative importance (0-1)
715
+ });
716
+
717
+ /**
718
+ * Latency percentile values.
719
+ */
720
+ export const LatencyPercentiles = z.object({
721
+ p50: z.number(), // Median latency (ms)
722
+ p95: z.number(), // 95th percentile (ms)
723
+ p99: z.number(), // 99th percentile (ms)
724
+ });
725
+
726
+ /**
727
+ * Prediction for a single inference point.
728
+ * Surfaces potential performance risks before deployment.
729
+ */
730
+ export const InferencePointPrediction = z.object({
731
+ inferencePointId: z.string(), // ID of the inference point
732
+ location: z.string(), // file:line location
733
+ provider: z.string().optional(), // Provider (e.g., openai)
734
+ model: z.string().optional(), // Model name
735
+
736
+ // Current performance (from historical data if available)
737
+ currentLatency: LatencyPercentiles.optional(),
738
+
739
+ // Predicted performance
740
+ predictedLatency: LatencyPercentiles,
741
+
742
+ // Risk assessment
743
+ risk: RiskLevel, // Overall risk level
744
+ riskScore: z.number().min(0).max(100), // Numeric risk score (0-100)
745
+
746
+ // Factors contributing to prediction
747
+ factors: z.array(PredictionFactor),
748
+
749
+ // Confidence in prediction
750
+ confidence: z.enum(['high', 'medium', 'low']),
751
+ confidenceReason: z.string().optional(), // Why confidence is high/low
752
+ });
753
+
754
+ /**
755
+ * Summary of all predictions.
756
+ */
757
+ export const PredictionSummary = z.object({
758
+ totalPoints: z.number(), // Total inference points analyzed
759
+ highRiskCount: z.number(), // High risk predictions
760
+ mediumRiskCount: z.number(), // Medium risk predictions
761
+ lowRiskCount: z.number(), // Low risk predictions
762
+ averageP95: z.number(), // Average predicted p95 latency
763
+ worstP95: z.number(), // Worst predicted p95 latency
764
+ budgetExceeded: z.boolean().optional(), // True if exceeds target latency
765
+ });
766
+
767
+ /**
768
+ * Full prediction result for deploy-time analysis.
769
+ */
770
+ export const PredictionResult = z.object({
771
+ predictions: z.array(InferencePointPrediction),
772
+ summary: PredictionSummary,
773
+ targetP95: z.number().optional(), // User-specified target p95 (ms)
774
+ generatedAt: z.string().datetime(), // When predictions were generated
775
+ basedOnRuns: z.number(), // Number of historical runs used
776
+ });
777
+
778
+ // Type exports for prediction
779
+ export type RiskLevel = z.infer<typeof RiskLevel>;
780
+ export type ImpactDirection = z.infer<typeof ImpactDirection>;
781
+ export type PredictionFactor = z.infer<typeof PredictionFactor>;
782
+ export type LatencyPercentiles = z.infer<typeof LatencyPercentiles>;
783
+ export type InferencePointPrediction = z.infer<typeof InferencePointPrediction>;
784
+ export type PredictionSummary = z.infer<typeof PredictionSummary>;
785
+ export type PredictionResult = z.infer<typeof PredictionResult>;
786
+
787
+ // =============================================================================
788
+ // COUNTERFACTUAL INSIGHTS (v1.5)
789
+ // =============================================================================
790
+
791
+ /**
792
+ * Type of counterfactual optimization scenario.
793
+ */
794
+ export const CounterfactualType = z.enum([
795
+ 'model_swap', // Swap to a different model (e.g., cheaper or faster)
796
+ 'batch_optimization', // Add batching to reduce per-request overhead
797
+ 'cache_addition', // Add caching to bypass LLM for repeated queries
798
+ 'provider_change', // Change provider (e.g., cloud → self-hosted)
799
+ 'streaming_enable', // Enable streaming for better perceived latency
800
+ ]);
801
+
802
+ /**
803
+ * Current and proposed state for a counterfactual.
804
+ */
805
+ export const CounterfactualState = z.object({
806
+ model: z.string().optional(), // Model name
807
+ provider: z.string().optional(), // Provider name
808
+ pattern: z.string().optional(), // Pattern (streaming, batching, etc.)
809
+ estimatedLatency: z.number(), // p95 latency estimate (ms)
810
+ estimatedCost: z.number(), // Cost per 1K calls ($)
811
+ });
812
+
813
+ /**
814
+ * Impact assessment for a counterfactual.
815
+ */
816
+ export const CounterfactualImpact = z.object({
817
+ latencyDelta: z.number(), // Change in p95 latency (ms, negative = improvement)
818
+ latencyDeltaPercent: z.number(), // Percentage change in latency
819
+ costDelta: z.number(), // Change in cost per 1K calls ($, negative = savings)
820
+ costDeltaPercent: z.number(), // Percentage change in cost
821
+ tradeoffs: z.array(z.string()), // Tradeoffs to consider
822
+ });
823
+
824
+ /**
825
+ * A single counterfactual "what if" scenario.
826
+ * Shows the road not taken and its potential impact.
827
+ */
828
+ export const Counterfactual = z.object({
829
+ id: z.string(), // Unique identifier
830
+ type: CounterfactualType, // Type of optimization
831
+ headline: z.string(), // Short description (e.g., "Switch to GPT-4o-mini")
832
+ description: z.string(), // Detailed explanation
833
+
834
+ currentState: CounterfactualState, // Current configuration
835
+ proposedState: CounterfactualState, // Proposed configuration
836
+
837
+ impact: CounterfactualImpact, // Estimated impact
838
+
839
+ confidence: z.enum(['high', 'medium', 'low']),
840
+ confidenceReason: z.string().optional(),
841
+
842
+ affectedPoints: z.array(z.string()), // Inference point IDs affected
843
+ effort: z.enum(['low', 'medium', 'high']), // Implementation effort
844
+ });
845
+
846
+ /**
847
+ * Summary of counterfactual opportunities.
848
+ */
849
+ export const CounterfactualSummary = z.object({
850
+ totalOpportunities: z.number(), // Total counterfactuals identified
851
+ maxLatencySavingsMs: z.number(), // Max latency reduction achievable (ms)
852
+ maxLatencySavingsPercent: z.number(), // Max latency reduction percentage
853
+ maxCostSavings: z.number(), // Max cost savings achievable ($)
854
+ maxCostSavingsPercent: z.number(), // Max cost savings percentage
855
+ byType: z.record(z.number()), // Count by counterfactual type
856
+ });
857
+
858
+ /**
859
+ * Full counterfactual analysis result.
860
+ */
861
+ export const CounterfactualResult = z.object({
862
+ counterfactuals: z.array(Counterfactual),
863
+ summary: CounterfactualSummary,
864
+ generatedAt: z.string().datetime(), // When analysis was performed
865
+ });
866
+
867
+ // Type exports for counterfactuals
868
+ export type CounterfactualType = z.infer<typeof CounterfactualType>;
869
+ export type CounterfactualState = z.infer<typeof CounterfactualState>;
870
+ export type CounterfactualImpact = z.infer<typeof CounterfactualImpact>;
871
+ export type Counterfactual = z.infer<typeof Counterfactual>;
872
+ export type CounterfactualSummary = z.infer<typeof CounterfactualSummary>;
873
+ export type CounterfactualResult = z.infer<typeof CounterfactualResult>;