@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
package/dist/types.js ADDED
@@ -0,0 +1,703 @@
1
+ import { z } from 'zod';
2
+ // =============================================================================
3
+ // ENUMS
4
+ // =============================================================================
5
+ export const Provider = z.enum([
6
+ 'openai', 'anthropic', 'google', 'cohere', 'mistral',
7
+ 'bedrock', 'azure_openai', 'together', 'fireworks',
8
+ 'groq', 'replicate', 'perplexity',
9
+ 'vllm', 'sglang', 'tgi', 'ollama', 'llamacpp',
10
+ 'unknown'
11
+ ]);
12
+ export const Severity = z.enum(['critical', 'warning', 'info']);
13
+ export const Category = z.enum([
14
+ 'cost', 'latency', 'drift', 'reliability', 'waste', 'throughput', 'security', 'best-practice'
15
+ ]);
16
+ // =============================================================================
17
+ // STATIC ANALYSIS
18
+ // =============================================================================
19
+ export const Patterns = z.object({
20
+ streaming: z.boolean().optional(),
21
+ batching: z.boolean().optional(),
22
+ retries: z.boolean().optional(),
23
+ caching: z.boolean().optional(),
24
+ fallback: z.boolean().optional(),
25
+ });
26
+ export const Callsite = z.object({
27
+ id: z.string(),
28
+ file: z.string(),
29
+ line: z.number(),
30
+ provider: Provider.nullable(),
31
+ model: z.string().nullable(),
32
+ framework: z.string().nullable(),
33
+ runtime: z.string().nullable(),
34
+ patterns: Patterns,
35
+ confidence: z.number().min(0).max(1),
36
+ });
37
+ export const ScanCandidate = z.object({
38
+ file: z.string(),
39
+ line: z.number(),
40
+ snippet: z.string(),
41
+ });
42
+ export const ScannedFile = z.object({
43
+ path: z.string(),
44
+ language: z.string(),
45
+ loc: z.number(),
46
+ });
47
+ export const ScanResult = z.object({
48
+ root: z.string(),
49
+ files: z.array(ScannedFile),
50
+ candidates: z.array(ScanCandidate),
51
+ summary: z.object({
52
+ totalFiles: z.number(),
53
+ totalLoc: z.number(),
54
+ languages: z.array(z.string()),
55
+ totalCandidates: z.number(),
56
+ // v1.9.5: Cost optimization stats
57
+ skippedLargeFiles: z.number().optional(),
58
+ skippedByPattern: z.number().optional(),
59
+ }),
60
+ });
61
+ export const InferenceMap = z.object({
62
+ version: z.string(),
63
+ root: z.string(),
64
+ generatedAt: z.string(),
65
+ // Report metadata
66
+ metadata: z.object({
67
+ absolutePath: z.string(), // Full absolute path analyzed
68
+ promptId: z.string().optional(), // Which analysis prompt was used
69
+ promptVersion: z.string().optional(), // Analysis prompt version
70
+ templatesVersion: z.string().optional(), // peakinfer-templates version
71
+ llmProvider: z.string().optional(), // LLM provider used (anthropic, none)
72
+ llmModel: z.string().optional(), // LLM model used for analysis
73
+ }).optional(),
74
+ summary: z.object({
75
+ totalCallsites: z.number(),
76
+ providers: z.array(z.string()),
77
+ models: z.array(z.string()),
78
+ patterns: z.record(z.number()),
79
+ }),
80
+ callsites: z.array(Callsite),
81
+ });
82
+ // =============================================================================
83
+ // RUNTIME ANALYSIS
84
+ // =============================================================================
85
+ export const InferenceEvent = z.object({
86
+ id: z.string(),
87
+ ts: z.string(),
88
+ provider: Provider,
89
+ model: z.string(),
90
+ input_tokens: z.number(),
91
+ output_tokens: z.number(),
92
+ latency_ms: z.number(),
93
+ intent: z.string().optional(),
94
+ callsite_id: z.string().optional(),
95
+ // Runtime pattern fields for drift detection
96
+ streaming: z.boolean().optional(), // Was this a streaming request?
97
+ ttft_ms: z.number().optional(), // Time to first token (streaming only)
98
+ batch_size: z.number().optional(), // If part of a batch, how many requests?
99
+ batch_id: z.string().optional(), // Group ID for batched requests
100
+ cached: z.boolean().optional(), // Was response served from cache?
101
+ retry_count: z.number().optional(), // Number of retries before success
102
+ fallback_used: z.boolean().optional(), // Was a fallback provider/model used?
103
+ original_model: z.string().optional(), // If fallback, what was the original model?
104
+ });
105
+ export const ProviderStats = z.object({
106
+ calls: z.number(),
107
+ tokens_in: z.number(),
108
+ tokens_out: z.number(),
109
+ latency_p50: z.number(),
110
+ latency_p95: z.number(),
111
+ latency_p99: z.number(),
112
+ });
113
+ export const RuntimeSummary = z.object({
114
+ totalEvents: z.number(),
115
+ byProvider: z.record(ProviderStats),
116
+ byModel: z.record(ProviderStats),
117
+ global: z.object({
118
+ p50: z.number(),
119
+ p95: z.number(),
120
+ p99: z.number(),
121
+ }),
122
+ });
123
+ // =============================================================================
124
+ // JOINED OUTPUT
125
+ // =============================================================================
126
+ export const UsageStats = z.object({
127
+ calls: z.number(),
128
+ tokens_in: z.number(),
129
+ tokens_out: z.number(),
130
+ latency_p50: z.number(),
131
+ latency_p95: z.number(),
132
+ latency_p99: z.number(),
133
+ });
134
+ export const DriftSignal = z.object({
135
+ type: z.enum(['codeOnly', 'runtimeOnly', 'mismatch', 'patternDrift']),
136
+ provider: z.string().optional(),
137
+ model: z.string().optional(),
138
+ callsiteId: z.string().optional(),
139
+ message: z.string(),
140
+ });
141
+ export const EnrichedCallsite = Callsite.extend({
142
+ usage: UsageStats.optional(),
143
+ });
144
+ export const JoinedOutput = z.object({
145
+ callsites: z.array(EnrichedCallsite),
146
+ codeOnly: z.array(Callsite),
147
+ runtimeOnly: z.array(InferenceEvent),
148
+ drift: z.array(DriftSignal),
149
+ });
150
+ // =============================================================================
151
+ // TEMPLATES & INSIGHTS
152
+ // =============================================================================
153
+ export const TemplateCondition = z.object({
154
+ field: z.string(),
155
+ op: z.enum(['eq', 'neq', 'gt', 'lt', 'gte', 'lte', 'exists', 'in', 'ratio_gt', 'ratio_lt', 'has_pattern']),
156
+ value: z.union([z.string(), z.number(), z.boolean(), z.array(z.string())]).optional(),
157
+ compare_to: z.string().optional(),
158
+ pattern: z.string().optional(),
159
+ count_gt: z.number().optional(),
160
+ });
161
+ export const InsightTemplate = z.object({
162
+ id: z.string(),
163
+ name: z.string(),
164
+ version: z.string(),
165
+ category: Category,
166
+ severity: Severity,
167
+ layer: z.enum(['application', 'api', 'gateway', 'runtime', 'model', 'hardware']).optional(), // v1.8: 6-layer architecture
168
+ match: z.object({
169
+ scope: z.enum(['callsite', 'joined', 'global', 'envelope']),
170
+ conditions: z.array(TemplateCondition),
171
+ }),
172
+ output: z.object({
173
+ headline: z.string(),
174
+ evidence: z.string(),
175
+ }),
176
+ defaults: z.record(z.number()).optional(),
177
+ });
178
+ // =============================================================================
179
+ // COMMUNITY OPTIMIZATION TEMPLATES (v1.8 - Inference Squeeze Guide)
180
+ // =============================================================================
181
+ /**
182
+ * Optimization template category - matches Inference Squeeze Guide structure
183
+ */
184
+ export const OptimizationCategory = z.enum([
185
+ 'runtime_optimization', // PyTorch to ONNX, vLLM, TensorRT
186
+ 'batching_optimization', // Continuous batching, batch sizing
187
+ 'memory_optimization', // Quantization, KV cache
188
+ 'application_optimization', // Model routing, context management
189
+ 'cost_optimization', // Budget controls, cost allocation
190
+ 'monitoring', // APM, quality monitoring, A/B testing
191
+ 'scaling', // Auto-scaling, multi-GPU
192
+ ]);
193
+ /**
194
+ * Risk level for optimization implementation
195
+ */
196
+ export const OptimizationRiskLevel = z.enum(['low', 'medium', 'high']);
197
+ /**
198
+ * Implementation step with validation and rollback
199
+ */
200
+ export const ImplementationStep = z.object({
201
+ step_id: z.string(),
202
+ name: z.string(),
203
+ executable: z.boolean().optional(),
204
+ commands: z.array(z.string()).optional(),
205
+ validation: z.object({
206
+ command: z.string().optional(),
207
+ success_criteria: z.string().optional(),
208
+ rollback_command: z.string().optional(),
209
+ }).optional(),
210
+ });
211
+ /**
212
+ * Monitoring metric configuration
213
+ */
214
+ export const MonitoringMetric = z.object({
215
+ metric: z.string(),
216
+ target: z.string(),
217
+ alert_threshold: z.string(),
218
+ });
219
+ /**
220
+ * Rollback trigger configuration
221
+ */
222
+ export const RollbackTrigger = z.object({
223
+ condition: z.string(),
224
+ action: z.string(),
225
+ });
226
+ /**
227
+ * Community Optimization Template - runbook-style templates from Inference Squeeze Guide
228
+ * These templates provide step-by-step implementation guides with ROI estimates
229
+ */
230
+ export const OptimizationTemplate = z.object({
231
+ id: z.string(),
232
+ name: z.string(),
233
+ description: z.string(),
234
+ category: OptimizationCategory,
235
+ confidence: z.number().min(0).max(1),
236
+ success_count: z.number().optional(),
237
+ verified_environments: z.number().optional(),
238
+ contributors: z.array(z.string()).optional(),
239
+ last_updated: z.string().optional(),
240
+ // Environment matching criteria
241
+ environment_match: z.record(z.union([z.string(), z.boolean(), z.array(z.string())])).optional(),
242
+ // Optimization details
243
+ optimization: z.object({
244
+ technique: z.string(),
245
+ expected_cost_reduction: z.string().optional(),
246
+ expected_latency_improvement: z.string().optional(),
247
+ expected_throughput_improvement: z.string().optional(),
248
+ expected_memory_reduction: z.string().optional(),
249
+ expected_quality_retention: z.string().optional(),
250
+ effort_estimate: z.string(),
251
+ risk_level: OptimizationRiskLevel,
252
+ }),
253
+ // Economics and ROI
254
+ economics: z.object({
255
+ baseline_calculation: z.record(z.union([z.string(), z.number()])).optional(),
256
+ projected_improvement: z.record(z.union([z.string(), z.number()])).optional(),
257
+ projected_savings: z.record(z.union([z.string(), z.number()])).optional(),
258
+ implementation_cost: z.object({
259
+ engineering_hours: z.number().optional(),
260
+ hourly_rate: z.number().optional(),
261
+ compute_hours: z.number().optional(),
262
+ total_cost: z.number(),
263
+ }).optional(),
264
+ roi_calculation: z.record(z.string()).optional(),
265
+ }).optional(),
266
+ // Implementation steps
267
+ implementation: z.object({
268
+ prerequisites: z.array(z.object({
269
+ requirement: z.string(),
270
+ validation_command: z.string().optional(),
271
+ })).optional(),
272
+ automated_steps: z.array(ImplementationStep).optional(),
273
+ }).optional(),
274
+ // Monitoring configuration
275
+ monitoring: z.object({
276
+ key_metrics: z.array(MonitoringMetric).optional(),
277
+ rollback_triggers: z.array(RollbackTrigger).optional(),
278
+ }).optional(),
279
+ // Historical results
280
+ results: z.object({
281
+ recent_implementations: z.array(z.record(z.union([z.string(), z.number()]))).optional(),
282
+ }).optional(),
283
+ });
284
+ // Stack layers for impact analysis (TDD v1.7 - 6-layer architecture)
285
+ export const StackLayer = z.enum([
286
+ 'application', // Code patterns: streaming-drift, overpowered-model, cost-concentration
287
+ 'api', // API layer: retry-explosion, untested-fallback, rate limiting
288
+ 'gateway', // Gateway/proxy layer: caching, load balancing, routing
289
+ 'runtime', // Inference engines: vLLM, sglang, TGI optimizations
290
+ 'model', // Model selection: GPT-4 vs GPT-3.5, context-accumulation, token-underutilization
291
+ 'hardware', // Hardware layer: GPU optimization, memory management
292
+ ]);
293
+ // Impact metrics
294
+ export const ImpactType = z.enum(['cost', 'latency', 'throughput']);
295
+ // Effort level for implementing the change
296
+ export const EffortLevel = z.enum(['low', 'medium', 'high']);
297
+ // Impact estimation for each insight
298
+ export const ImpactEstimate = z.object({
299
+ layer: StackLayer,
300
+ impactType: ImpactType,
301
+ estimatedImpactPercent: z.number().min(0).max(100), // 0-100% improvement
302
+ effort: EffortLevel,
303
+ annualSavingsUSD: z.number().optional(), // Estimated annual savings in USD
304
+ latencyReductionMs: z.number().optional(), // Estimated latency improvement
305
+ throughputGainPercent: z.number().optional(), // Estimated throughput improvement
306
+ confidence: z.number().min(0).max(1).optional(), // Confidence in estimate (0-1)
307
+ assumptions: z.string().optional(), // Key assumptions for this estimate
308
+ });
309
+ export const Insight = z.object({
310
+ id: z.string().optional(), // Unique insight ID
311
+ severity: Severity,
312
+ category: Category,
313
+ templateId: z.string().optional(), // Optional for LLM-generated insights
314
+ headline: z.string(),
315
+ evidence: z.string(),
316
+ location: z.string().optional(),
317
+ recommendation: z.string().optional(), // Actionable suggestion
318
+ source: z.enum(['template', 'llm']).optional(), // 'template' = pattern-based, 'llm' = semantic analysis
319
+ // Impact estimation fields
320
+ impact: ImpactEstimate.optional(), // Estimated impact of implementing this recommendation
321
+ // CodeRabbit-style fix fields (v1.6 - LLM-generated)
322
+ originalCode: z.string().optional(), // Exact code line(s) that need to change
323
+ suggestedFix: z.string().optional(), // Complete replacement code
324
+ aiAgentPrompt: z.string().optional(), // Instructions for AI agents like Copilot
325
+ fullLineFix: z.string().optional(), // Full line replacement for suggestion syntax
326
+ });
327
+ // =============================================================================
328
+ // INFERENCE MAX ENVELOPES
329
+ // =============================================================================
330
+ export const PerformanceEnvelope = z.object({
331
+ ttft_p50_ms: z.number(),
332
+ ttft_p95_ms: z.number(),
333
+ tps_median: z.number(),
334
+ tps_peak: z.number(),
335
+ });
336
+ // =============================================================================
337
+ // AGENT PLANNING
338
+ // =============================================================================
339
+ export const TaskType = z.enum([
340
+ 'scan', 'analyze', 'parse_events', 'join',
341
+ 'load_templates', 'generate_insights', 'render', 'generate_html', 'generate_pdf', 'save_artifacts',
342
+ 'save_history', // v1.5: Save run to history for comparison/prediction
343
+ 'compare', // v1.5: Compare with previous run
344
+ 'predict', // v1.5: Generate deploy-time predictions
345
+ 'counterfactuals', // v1.5: Generate what-if optimization scenarios
346
+ ]);
347
+ export const PlannedTask = z.object({
348
+ id: z.number(),
349
+ type: TaskType,
350
+ description: z.string(),
351
+ depends_on: z.array(z.number()).optional(),
352
+ });
353
+ export const ExecutionPlan = z.object({
354
+ mode: z.enum(['static', 'runtime', 'combined']),
355
+ tasks: z.array(PlannedTask),
356
+ });
357
+ export const TaskResult = z.object({
358
+ taskId: z.number(),
359
+ status: z.enum(['success', 'failed', 'skipped']),
360
+ error: z.string().optional(),
361
+ durationMs: z.number(),
362
+ });
363
+ // =============================================================================
364
+ // FORMAT DETECTION & NORMALIZATION (PRD §6.4)
365
+ // =============================================================================
366
+ /**
367
+ * Supported format types for runtime event files.
368
+ * Direct-parse formats are handled without LLM, agent-normalized formats require semantic analysis.
369
+ */
370
+ export const FormatType = z.enum([
371
+ // Direct-parse formats (no LLM needed)
372
+ 'jsonl', // Newline-delimited JSON with InferenceEvent schema
373
+ 'json_array', // JSON array of InferenceEvent objects
374
+ 'csv', // CSV with standard column names
375
+ 'tsv', // TSV with standard column names
376
+ // Agent-normalized formats (require semantic analysis)
377
+ 'otel', // OpenTelemetry OTLP traces/spans
378
+ 'jaeger', // Jaeger distributed tracing format
379
+ 'zipkin', // Zipkin tracing format
380
+ 'langsmith', // LangSmith trace exports
381
+ 'helicone', // Helicone proxy logs
382
+ 'wandb', // Weights & Biases inference logs
383
+ 'litellm', // LiteLLM proxy event logs
384
+ 'portkey', // Portkey gateway logs
385
+ // Inferred formats (heuristic detection)
386
+ 'custom_json', // Unknown JSON structure requiring field mapping
387
+ 'custom_text', // Structured text logs
388
+ 'unknown', // Could not determine format
389
+ ]);
390
+ /**
391
+ * Extraction strategy for a field mapping.
392
+ */
393
+ export const ExtractionType = z.enum([
394
+ 'direct', // Direct field access (e.g., obj.field)
395
+ 'jsonpath', // JSONPath expression
396
+ 'column', // CSV/TSV column name
397
+ 'regex', // Regular expression extraction
398
+ 'computed', // Computed from other fields (e.g., latency = end - start)
399
+ 'constant', // Fixed value for all events
400
+ ]);
401
+ /**
402
+ * Transformation to apply after extraction.
403
+ */
404
+ export const TransformType = z.enum([
405
+ 'none', // No transformation
406
+ 'unix_ms_to_iso', // Unix milliseconds to ISO timestamp
407
+ 'unix_s_to_iso', // Unix seconds to ISO timestamp
408
+ 'unix_nano_to_iso', // Unix nanoseconds to ISO timestamp
409
+ 'duration_to_ms', // Duration string (e.g., "1.5s") to milliseconds
410
+ 'parse_int', // String to integer
411
+ 'parse_float', // String to float
412
+ 'lowercase', // Lowercase string
413
+ 'provider_normalize', // Normalize provider names (e.g., "OpenAI" -> "openai")
414
+ ]);
415
+ /**
416
+ * Field mapping from source format to InferenceEvent schema.
417
+ */
418
+ export const FieldMapping = z.object({
419
+ target: z.string(), // InferenceEvent field name
420
+ source_path: z.string(), // JSONPath, column name, regex, or expression
421
+ extraction_type: ExtractionType,
422
+ transform: TransformType.optional().default('none'),
423
+ confidence: z.number().min(0).max(1), // Confidence in this mapping (0-1)
424
+ evidence: z.string().optional(), // Why this mapping was chosen
425
+ });
426
+ /**
427
+ * Result of format detection.
428
+ */
429
+ export const FormatDetectionResult = z.object({
430
+ format_type: FormatType,
431
+ confidence: z.number().min(0).max(1), // Overall detection confidence
432
+ evidence: z.string(), // Explanation of detection
433
+ sample_size: z.number(), // Number of lines/records sampled
434
+ requires_agent: z.boolean(), // Whether agent normalization is needed
435
+ });
436
+ /**
437
+ * Complete normalization result with field mappings.
438
+ */
439
+ export const NormalizationResult = z.object({
440
+ detection: FormatDetectionResult,
441
+ mappings: z.array(FieldMapping),
442
+ unmapped_fields: z.array(z.string()), // Source fields not mapped
443
+ warnings: z.array(z.string()), // Issues encountered during normalization
444
+ audit: z.object({
445
+ normalized_at: z.string(), // ISO timestamp
446
+ agent_used: z.boolean(),
447
+ codebase_context_used: z.boolean(),
448
+ llm_model: z.string().optional(),
449
+ }),
450
+ });
451
+ /**
452
+ * Options for format normalization.
453
+ */
454
+ export const NormalizationOptions = z.object({
455
+ format_hint: FormatType.optional(), // User-provided format hint
456
+ field_hints: z.record(z.string()).optional(), // User-provided field mappings
457
+ lenient: z.boolean().optional(), // Accept low-confidence mappings
458
+ strict: z.boolean().optional(), // Fail on missing required fields
459
+ codebase_context: z.any().optional(), // ScanResult for codebase-aware normalization
460
+ });
461
+ // =============================================================================
462
+ // HISTORY STORAGE (v1.5)
463
+ // =============================================================================
464
+ /**
465
+ * Analysis type for categorizing runs.
466
+ */
467
+ export const AnalysisType = z.enum(['static', 'runtime', 'combined']);
468
+ /**
469
+ * History manifest for tracking analysis runs over time.
470
+ * Distinct from runid.ts RunManifest which focuses on caching/resumability.
471
+ * This schema enables historical comparison and deploy-time prediction features.
472
+ */
473
+ export const HistoryManifest = z.object({
474
+ runId: z.string(), // Unique run identifier
475
+ timestamp: z.string().datetime(), // ISO timestamp when analysis completed
476
+ path: z.string(), // Analyzed path (absolute, for matching)
477
+ pathHash: z.string(), // Hash of normalized path for efficient lookup
478
+ analysisType: AnalysisType, // Type of analysis performed
479
+ version: z.string(), // PeakInfer version that produced this run
480
+ // Summary metrics for quick comparison
481
+ inferencePointCount: z.number(), // Number of inference points detected
482
+ eventCount: z.number().optional(), // Number of runtime events (if runtime/combined)
483
+ driftCount: z.number().optional(), // Number of drift signals (if combined)
484
+ insightCount: z.number().optional(), // Number of insights generated
485
+ // Performance context
486
+ durationMs: z.number().optional(), // Analysis duration in milliseconds
487
+ // Artifact paths relative to history directory
488
+ artifacts: z.object({
489
+ inferenceMap: z.string().optional(), // inference-map.json
490
+ analysis: z.string().optional(), // analysis.json (full results)
491
+ html: z.string().optional(), // report.html
492
+ pdf: z.string().optional(), // report.pdf
493
+ }).optional(),
494
+ });
495
+ /**
496
+ * Index of all historical runs for a project path.
497
+ * Stored at .peakinfer/history/index.json
498
+ */
499
+ export const HistoryIndex = z.object({
500
+ version: z.string(), // History format version
501
+ lastUpdated: z.string().datetime(), // Last index update
502
+ runs: z.array(z.object({
503
+ runId: z.string(),
504
+ timestamp: z.string().datetime(),
505
+ pathHash: z.string(),
506
+ analysisType: AnalysisType,
507
+ inferencePointCount: z.number(),
508
+ })),
509
+ });
510
+ // =============================================================================
511
+ // HISTORICAL COMPARISON (v1.5)
512
+ // =============================================================================
513
+ /**
514
+ * Change type for tracking what changed between runs.
515
+ */
516
+ export const ChangeType = z.enum(['added', 'removed', 'modified']);
517
+ /**
518
+ * A single field change within an inference point.
519
+ */
520
+ export const FieldChange = z.object({
521
+ field: z.string(), // Field name that changed
522
+ before: z.unknown(), // Previous value
523
+ after: z.unknown(), // New value
524
+ });
525
+ /**
526
+ * An inference point that changed between runs.
527
+ */
528
+ export const ChangedInferencePoint = z.object({
529
+ point: Callsite, // The inference point
530
+ changes: z.array(FieldChange), // List of field changes
531
+ });
532
+ /**
533
+ * Result of comparing two analysis runs.
534
+ * Enables "what changed" insights for pre-deploy validation.
535
+ */
536
+ export const ComparisonResult = z.object({
537
+ baseRunId: z.string(), // The baseline run ID
538
+ baseTimestamp: z.string().datetime(), // When baseline was created
539
+ currentRunId: z.string(), // The current run ID
540
+ currentTimestamp: z.string().datetime(), // When current was created
541
+ // Inference point changes
542
+ added: z.array(Callsite), // New inference points
543
+ removed: z.array(Callsite), // Removed inference points
544
+ changed: z.array(ChangedInferencePoint), // Modified inference points
545
+ // Summary metrics
546
+ metrics: z.object({
547
+ totalBefore: z.number(), // Inference points in baseline
548
+ totalAfter: z.number(), // Inference points in current
549
+ addedCount: z.number(), // Count of added points
550
+ removedCount: z.number(), // Count of removed points
551
+ changedCount: z.number(), // Count of modified points
552
+ netChange: z.number(), // Net change (added - removed)
553
+ }),
554
+ // Insight deltas
555
+ insightDeltas: z.object({
556
+ newCritical: z.number(), // New critical insights
557
+ resolvedCritical: z.number(), // Resolved critical insights
558
+ newWarnings: z.number(), // New warnings
559
+ resolvedWarnings: z.number(), // Resolved warnings
560
+ }).optional(),
561
+ });
562
+ // =============================================================================
563
+ // DEPLOY-TIME PREDICTION (v1.5)
564
+ // =============================================================================
565
+ /**
566
+ * Risk level for predictions.
567
+ */
568
+ export const RiskLevel = z.enum(['high', 'medium', 'low', 'neutral']);
569
+ /**
570
+ * Impact direction for a prediction factor.
571
+ */
572
+ export const ImpactDirection = z.enum(['positive', 'negative', 'neutral']);
573
+ /**
574
+ * A factor contributing to a latency prediction.
575
+ */
576
+ export const PredictionFactor = z.object({
577
+ name: z.string(), // Factor name (e.g., "model complexity")
578
+ impact: ImpactDirection, // How it affects latency
579
+ description: z.string(), // Human-readable explanation
580
+ weight: z.number().min(0).max(1).optional(), // Relative importance (0-1)
581
+ });
582
+ /**
583
+ * Latency percentile values.
584
+ */
585
+ export const LatencyPercentiles = z.object({
586
+ p50: z.number(), // Median latency (ms)
587
+ p95: z.number(), // 95th percentile (ms)
588
+ p99: z.number(), // 99th percentile (ms)
589
+ });
590
+ /**
591
+ * Prediction for a single inference point.
592
+ * Surfaces potential performance risks before deployment.
593
+ */
594
+ export const InferencePointPrediction = z.object({
595
+ inferencePointId: z.string(), // ID of the inference point
596
+ location: z.string(), // file:line location
597
+ provider: z.string().optional(), // Provider (e.g., openai)
598
+ model: z.string().optional(), // Model name
599
+ // Current performance (from historical data if available)
600
+ currentLatency: LatencyPercentiles.optional(),
601
+ // Predicted performance
602
+ predictedLatency: LatencyPercentiles,
603
+ // Risk assessment
604
+ risk: RiskLevel, // Overall risk level
605
+ riskScore: z.number().min(0).max(100), // Numeric risk score (0-100)
606
+ // Factors contributing to prediction
607
+ factors: z.array(PredictionFactor),
608
+ // Confidence in prediction
609
+ confidence: z.enum(['high', 'medium', 'low']),
610
+ confidenceReason: z.string().optional(), // Why confidence is high/low
611
+ });
612
+ /**
613
+ * Summary of all predictions.
614
+ */
615
+ export const PredictionSummary = z.object({
616
+ totalPoints: z.number(), // Total inference points analyzed
617
+ highRiskCount: z.number(), // High risk predictions
618
+ mediumRiskCount: z.number(), // Medium risk predictions
619
+ lowRiskCount: z.number(), // Low risk predictions
620
+ averageP95: z.number(), // Average predicted p95 latency
621
+ worstP95: z.number(), // Worst predicted p95 latency
622
+ budgetExceeded: z.boolean().optional(), // True if exceeds target latency
623
+ });
624
+ /**
625
+ * Full prediction result for deploy-time analysis.
626
+ */
627
+ export const PredictionResult = z.object({
628
+ predictions: z.array(InferencePointPrediction),
629
+ summary: PredictionSummary,
630
+ targetP95: z.number().optional(), // User-specified target p95 (ms)
631
+ generatedAt: z.string().datetime(), // When predictions were generated
632
+ basedOnRuns: z.number(), // Number of historical runs used
633
+ });
634
+ // =============================================================================
635
+ // COUNTERFACTUAL INSIGHTS (v1.5)
636
+ // =============================================================================
637
+ /**
638
+ * Type of counterfactual optimization scenario.
639
+ */
640
+ export const CounterfactualType = z.enum([
641
+ 'model_swap', // Swap to a different model (e.g., cheaper or faster)
642
+ 'batch_optimization', // Add batching to reduce per-request overhead
643
+ 'cache_addition', // Add caching to bypass LLM for repeated queries
644
+ 'provider_change', // Change provider (e.g., cloud → self-hosted)
645
+ 'streaming_enable', // Enable streaming for better perceived latency
646
+ ]);
647
+ /**
648
+ * Current and proposed state for a counterfactual.
649
+ */
650
+ export const CounterfactualState = z.object({
651
+ model: z.string().optional(), // Model name
652
+ provider: z.string().optional(), // Provider name
653
+ pattern: z.string().optional(), // Pattern (streaming, batching, etc.)
654
+ estimatedLatency: z.number(), // p95 latency estimate (ms)
655
+ estimatedCost: z.number(), // Cost per 1K calls ($)
656
+ });
657
+ /**
658
+ * Impact assessment for a counterfactual.
659
+ */
660
+ export const CounterfactualImpact = z.object({
661
+ latencyDelta: z.number(), // Change in p95 latency (ms, negative = improvement)
662
+ latencyDeltaPercent: z.number(), // Percentage change in latency
663
+ costDelta: z.number(), // Change in cost per 1K calls ($, negative = savings)
664
+ costDeltaPercent: z.number(), // Percentage change in cost
665
+ tradeoffs: z.array(z.string()), // Tradeoffs to consider
666
+ });
667
+ /**
668
+ * A single counterfactual "what if" scenario.
669
+ * Shows the road not taken and its potential impact.
670
+ */
671
+ export const Counterfactual = z.object({
672
+ id: z.string(), // Unique identifier
673
+ type: CounterfactualType, // Type of optimization
674
+ headline: z.string(), // Short description (e.g., "Switch to GPT-4o-mini")
675
+ description: z.string(), // Detailed explanation
676
+ currentState: CounterfactualState, // Current configuration
677
+ proposedState: CounterfactualState, // Proposed configuration
678
+ impact: CounterfactualImpact, // Estimated impact
679
+ confidence: z.enum(['high', 'medium', 'low']),
680
+ confidenceReason: z.string().optional(),
681
+ affectedPoints: z.array(z.string()), // Inference point IDs affected
682
+ effort: z.enum(['low', 'medium', 'high']), // Implementation effort
683
+ });
684
+ /**
685
+ * Summary of counterfactual opportunities.
686
+ */
687
+ export const CounterfactualSummary = z.object({
688
+ totalOpportunities: z.number(), // Total counterfactuals identified
689
+ maxLatencySavingsMs: z.number(), // Max latency reduction achievable (ms)
690
+ maxLatencySavingsPercent: z.number(), // Max latency reduction percentage
691
+ maxCostSavings: z.number(), // Max cost savings achievable ($)
692
+ maxCostSavingsPercent: z.number(), // Max cost savings percentage
693
+ byType: z.record(z.number()), // Count by counterfactual type
694
+ });
695
+ /**
696
+ * Full counterfactual analysis result.
697
+ */
698
+ export const CounterfactualResult = z.object({
699
+ counterfactuals: z.array(Counterfactual),
700
+ summary: CounterfactualSummary,
701
+ generatedAt: z.string().datetime(), // When analysis was performed
702
+ });
703
+ //# sourceMappingURL=types.js.map