@peakinfer/cli 1.0.133
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -0
- package/.env.example +6 -0
- package/.github/workflows/peakinfer.yml +64 -0
- package/CHANGELOG.md +31 -0
- package/LICENSE +190 -0
- package/README.md +335 -0
- package/data/inferencemax.json +274 -0
- package/dist/agent-analyzer.d.ts +45 -0
- package/dist/agent-analyzer.d.ts.map +1 -0
- package/dist/agent-analyzer.js +374 -0
- package/dist/agent-analyzer.js.map +1 -0
- package/dist/agent.d.ts +76 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +965 -0
- package/dist/agent.js.map +1 -0
- package/dist/agents/correlation-analyzer.d.ts +34 -0
- package/dist/agents/correlation-analyzer.d.ts.map +1 -0
- package/dist/agents/correlation-analyzer.js +261 -0
- package/dist/agents/correlation-analyzer.js.map +1 -0
- package/dist/agents/index.d.ts +91 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +111 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/runtime-analyzer.d.ts +38 -0
- package/dist/agents/runtime-analyzer.d.ts.map +1 -0
- package/dist/agents/runtime-analyzer.js +244 -0
- package/dist/agents/runtime-analyzer.js.map +1 -0
- package/dist/analysis-types.d.ts +500 -0
- package/dist/analysis-types.d.ts.map +1 -0
- package/dist/analysis-types.js +11 -0
- package/dist/analysis-types.js.map +1 -0
- package/dist/analytics.d.ts +25 -0
- package/dist/analytics.d.ts.map +1 -0
- package/dist/analytics.js +94 -0
- package/dist/analytics.js.map +1 -0
- package/dist/analyzer.d.ts +48 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +547 -0
- package/dist/analyzer.js.map +1 -0
- package/dist/artifacts.d.ts +44 -0
- package/dist/artifacts.d.ts.map +1 -0
- package/dist/artifacts.js +165 -0
- package/dist/artifacts.js.map +1 -0
- package/dist/benchmarks/index.d.ts +88 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +205 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +427 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/ci.d.ts +19 -0
- package/dist/commands/ci.d.ts.map +1 -0
- package/dist/commands/ci.js +253 -0
- package/dist/commands/ci.js.map +1 -0
- package/dist/commands/config.d.ts +16 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +249 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/demo.d.ts +15 -0
- package/dist/commands/demo.d.ts.map +1 -0
- package/dist/commands/demo.js +106 -0
- package/dist/commands/demo.js.map +1 -0
- package/dist/commands/export.d.ts +14 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +209 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/history.d.ts +15 -0
- package/dist/commands/history.d.ts.map +1 -0
- package/dist/commands/history.js +389 -0
- package/dist/commands/history.js.map +1 -0
- package/dist/commands/template.d.ts +14 -0
- package/dist/commands/template.d.ts.map +1 -0
- package/dist/commands/template.js +341 -0
- package/dist/commands/template.js.map +1 -0
- package/dist/commands/validate-map.d.ts +12 -0
- package/dist/commands/validate-map.d.ts.map +1 -0
- package/dist/commands/validate-map.js +274 -0
- package/dist/commands/validate-map.js.map +1 -0
- package/dist/commands/whatif.d.ts +17 -0
- package/dist/commands/whatif.d.ts.map +1 -0
- package/dist/commands/whatif.js +206 -0
- package/dist/commands/whatif.js.map +1 -0
- package/dist/comparison.d.ts +38 -0
- package/dist/comparison.d.ts.map +1 -0
- package/dist/comparison.js +223 -0
- package/dist/comparison.js.map +1 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +158 -0
- package/dist/config.js.map +1 -0
- package/dist/connectors/helicone.d.ts +9 -0
- package/dist/connectors/helicone.d.ts.map +1 -0
- package/dist/connectors/helicone.js +106 -0
- package/dist/connectors/helicone.js.map +1 -0
- package/dist/connectors/index.d.ts +37 -0
- package/dist/connectors/index.d.ts.map +1 -0
- package/dist/connectors/index.js +65 -0
- package/dist/connectors/index.js.map +1 -0
- package/dist/connectors/langsmith.d.ts +9 -0
- package/dist/connectors/langsmith.d.ts.map +1 -0
- package/dist/connectors/langsmith.js +122 -0
- package/dist/connectors/langsmith.js.map +1 -0
- package/dist/connectors/types.d.ts +83 -0
- package/dist/connectors/types.d.ts.map +1 -0
- package/dist/connectors/types.js +98 -0
- package/dist/connectors/types.js.map +1 -0
- package/dist/cost-estimator.d.ts +46 -0
- package/dist/cost-estimator.d.ts.map +1 -0
- package/dist/cost-estimator.js +104 -0
- package/dist/cost-estimator.js.map +1 -0
- package/dist/costs.d.ts +57 -0
- package/dist/costs.d.ts.map +1 -0
- package/dist/costs.js +251 -0
- package/dist/costs.js.map +1 -0
- package/dist/counterfactuals.d.ts +29 -0
- package/dist/counterfactuals.d.ts.map +1 -0
- package/dist/counterfactuals.js +448 -0
- package/dist/counterfactuals.js.map +1 -0
- package/dist/enhancement-prompts.d.ts +41 -0
- package/dist/enhancement-prompts.d.ts.map +1 -0
- package/dist/enhancement-prompts.js +88 -0
- package/dist/enhancement-prompts.js.map +1 -0
- package/dist/envelopes.d.ts +20 -0
- package/dist/envelopes.d.ts.map +1 -0
- package/dist/envelopes.js +790 -0
- package/dist/envelopes.js.map +1 -0
- package/dist/format-normalizer.d.ts +71 -0
- package/dist/format-normalizer.d.ts.map +1 -0
- package/dist/format-normalizer.js +1331 -0
- package/dist/format-normalizer.js.map +1 -0
- package/dist/history.d.ts +79 -0
- package/dist/history.d.ts.map +1 -0
- package/dist/history.js +313 -0
- package/dist/history.js.map +1 -0
- package/dist/html.d.ts +11 -0
- package/dist/html.d.ts.map +1 -0
- package/dist/html.js +463 -0
- package/dist/html.js.map +1 -0
- package/dist/impact.d.ts +42 -0
- package/dist/impact.d.ts.map +1 -0
- package/dist/impact.js +443 -0
- package/dist/impact.js.map +1 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +34 -0
- package/dist/index.js.map +1 -0
- package/dist/insights.d.ts +5 -0
- package/dist/insights.d.ts.map +1 -0
- package/dist/insights.js +271 -0
- package/dist/insights.js.map +1 -0
- package/dist/joiner.d.ts +9 -0
- package/dist/joiner.d.ts.map +1 -0
- package/dist/joiner.js +247 -0
- package/dist/joiner.js.map +1 -0
- package/dist/orchestrator.d.ts +34 -0
- package/dist/orchestrator.d.ts.map +1 -0
- package/dist/orchestrator.js +827 -0
- package/dist/orchestrator.js.map +1 -0
- package/dist/pdf.d.ts +26 -0
- package/dist/pdf.d.ts.map +1 -0
- package/dist/pdf.js +84 -0
- package/dist/pdf.js.map +1 -0
- package/dist/prediction.d.ts +33 -0
- package/dist/prediction.d.ts.map +1 -0
- package/dist/prediction.js +316 -0
- package/dist/prediction.js.map +1 -0
- package/dist/prompts/loader.d.ts +38 -0
- package/dist/prompts/loader.d.ts.map +1 -0
- package/dist/prompts/loader.js +60 -0
- package/dist/prompts/loader.js.map +1 -0
- package/dist/renderer.d.ts +64 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +923 -0
- package/dist/renderer.js.map +1 -0
- package/dist/runid.d.ts +57 -0
- package/dist/runid.d.ts.map +1 -0
- package/dist/runid.js +199 -0
- package/dist/runid.js.map +1 -0
- package/dist/runtime.d.ts +29 -0
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +366 -0
- package/dist/runtime.js.map +1 -0
- package/dist/scanner.d.ts +11 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +426 -0
- package/dist/scanner.js.map +1 -0
- package/dist/templates.d.ts +120 -0
- package/dist/templates.d.ts.map +1 -0
- package/dist/templates.js +429 -0
- package/dist/templates.js.map +1 -0
- package/dist/tools/index.d.ts +153 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +177 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/types.d.ts +3647 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +703 -0
- package/dist/types.js.map +1 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +23 -0
- package/dist/version.js.map +1 -0
- package/docs/demo-guide.md +423 -0
- package/docs/events-format.md +295 -0
- package/docs/inferencemap-spec.md +344 -0
- package/docs/migration-v2.md +293 -0
- package/fixtures/demo/precomputed.json +142 -0
- package/fixtures/demo-project/README.md +52 -0
- package/fixtures/demo-project/ai-service.ts +65 -0
- package/fixtures/demo-project/sample-events.jsonl +15 -0
- package/fixtures/demo-project/src/ai-service.ts +128 -0
- package/fixtures/demo-project/src/llm-client.ts +155 -0
- package/package.json +65 -0
- package/prompts/agent-analyzer.yaml +47 -0
- package/prompts/ci-gate.yaml +98 -0
- package/prompts/correlation-analyzer.yaml +178 -0
- package/prompts/format-normalizer.yaml +46 -0
- package/prompts/peak-performance.yaml +180 -0
- package/prompts/pr-comment.yaml +111 -0
- package/prompts/runtime-analyzer.yaml +189 -0
- package/prompts/unified-analyzer.yaml +241 -0
- package/schemas/inference-map.v0.1.json +215 -0
- package/scripts/benchmark.ts +394 -0
- package/scripts/demo-v1.5.sh +158 -0
- package/scripts/sync-from-site.sh +197 -0
- package/scripts/validate-sync.sh +178 -0
- package/src/agent-analyzer.ts +481 -0
- package/src/agent.ts +1232 -0
- package/src/agents/correlation-analyzer.ts +353 -0
- package/src/agents/index.ts +235 -0
- package/src/agents/runtime-analyzer.ts +343 -0
- package/src/analysis-types.ts +558 -0
- package/src/analytics.ts +100 -0
- package/src/analyzer.ts +692 -0
- package/src/artifacts.ts +218 -0
- package/src/benchmarks/index.ts +309 -0
- package/src/cli.ts +503 -0
- package/src/commands/ci.ts +336 -0
- package/src/commands/config.ts +288 -0
- package/src/commands/demo.ts +175 -0
- package/src/commands/export.ts +297 -0
- package/src/commands/history.ts +425 -0
- package/src/commands/template.ts +385 -0
- package/src/commands/validate-map.ts +324 -0
- package/src/commands/whatif.ts +272 -0
- package/src/comparison.ts +283 -0
- package/src/config.ts +188 -0
- package/src/connectors/helicone.ts +164 -0
- package/src/connectors/index.ts +93 -0
- package/src/connectors/langsmith.ts +179 -0
- package/src/connectors/types.ts +180 -0
- package/src/cost-estimator.ts +146 -0
- package/src/costs.ts +347 -0
- package/src/counterfactuals.ts +516 -0
- package/src/enhancement-prompts.ts +118 -0
- package/src/envelopes.ts +814 -0
- package/src/format-normalizer.ts +1486 -0
- package/src/history.ts +400 -0
- package/src/html.ts +512 -0
- package/src/impact.ts +522 -0
- package/src/index.ts +83 -0
- package/src/insights.ts +341 -0
- package/src/joiner.ts +289 -0
- package/src/orchestrator.ts +1015 -0
- package/src/pdf.ts +110 -0
- package/src/prediction.ts +392 -0
- package/src/prompts/loader.ts +88 -0
- package/src/renderer.ts +1045 -0
- package/src/runid.ts +261 -0
- package/src/runtime.ts +450 -0
- package/src/scanner.ts +508 -0
- package/src/templates.ts +561 -0
- package/src/tools/index.ts +214 -0
- package/src/types.ts +873 -0
- package/src/version.ts +24 -0
- package/templates/context-accumulation.yaml +23 -0
- package/templates/cost-concentration.yaml +20 -0
- package/templates/dead-code.yaml +20 -0
- package/templates/latency-explainer.yaml +23 -0
- package/templates/optimizations/ab-testing-framework.yaml +74 -0
- package/templates/optimizations/api-gateway-optimization.yaml +81 -0
- package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
- package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
- package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
- package/templates/optimizations/comprehensive-apm.yaml +76 -0
- package/templates/optimizations/context-window-optimization.yaml +91 -0
- package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
- package/templates/optimizations/distributed-training-optimization.yaml +77 -0
- package/templates/optimizations/document-analysis-edge.yaml +77 -0
- package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
- package/templates/optimizations/domain-specific-distillation.yaml +78 -0
- package/templates/optimizations/error-handling-optimization.yaml +76 -0
- package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
- package/templates/optimizations/long-context-memory-management.yaml +78 -0
- package/templates/optimizations/max-tokens-optimization.yaml +76 -0
- package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
- package/templates/optimizations/multi-framework-resilience.yaml +75 -0
- package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
- package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
- package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
- package/templates/optimizations/quality-monitoring.yaml +74 -0
- package/templates/optimizations/realtime-budget-controls.yaml +74 -0
- package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
- package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
- package/templates/optimizations/smart-model-routing.yaml +96 -0
- package/templates/optimizations/streaming-batch-selection.yaml +167 -0
- package/templates/optimizations/system-prompt-optimization.yaml +75 -0
- package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
- package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
- package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
- package/templates/overpowered-extraction.yaml +32 -0
- package/templates/overpowered-model.yaml +31 -0
- package/templates/prompt-bloat.yaml +24 -0
- package/templates/retry-explosion.yaml +28 -0
- package/templates/schema/insight.schema.json +113 -0
- package/templates/schema/optimization.schema.json +180 -0
- package/templates/streaming-drift.yaml +30 -0
- package/templates/throughput-gap.yaml +21 -0
- package/templates/token-underutilization.yaml +28 -0
- package/templates/untested-fallback.yaml +21 -0
- package/tests/accuracy/drift-detection.test.ts +184 -0
- package/tests/accuracy/false-positives.test.ts +166 -0
- package/tests/accuracy/templates.test.ts +205 -0
- package/tests/action/commands.test.ts +125 -0
- package/tests/action/comments.test.ts +347 -0
- package/tests/cli.test.ts +203 -0
- package/tests/comparison.test.ts +309 -0
- package/tests/correlation-analyzer.test.ts +534 -0
- package/tests/counterfactuals.test.ts +347 -0
- package/tests/fixtures/events/missing-id.jsonl +1 -0
- package/tests/fixtures/events/missing-input.jsonl +1 -0
- package/tests/fixtures/events/missing-latency.jsonl +1 -0
- package/tests/fixtures/events/missing-model.jsonl +1 -0
- package/tests/fixtures/events/missing-output.jsonl +1 -0
- package/tests/fixtures/events/missing-provider.jsonl +1 -0
- package/tests/fixtures/events/missing-ts.jsonl +1 -0
- package/tests/fixtures/events/valid.csv +3 -0
- package/tests/fixtures/events/valid.json +1 -0
- package/tests/fixtures/events/valid.jsonl +2 -0
- package/tests/fixtures/events/with-callsite.jsonl +1 -0
- package/tests/fixtures/events/with-intent.jsonl +1 -0
- package/tests/fixtures/events/wrong-type.jsonl +1 -0
- package/tests/fixtures/repos/empty/.gitkeep +0 -0
- package/tests/fixtures/repos/hybrid-router/router.py +35 -0
- package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
- package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
- package/tests/fixtures/repos/saas-openai/client.py +26 -0
- package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
- package/tests/github-action.test.ts +292 -0
- package/tests/insights.test.ts +878 -0
- package/tests/joiner.test.ts +168 -0
- package/tests/performance/action-latency.test.ts +132 -0
- package/tests/performance/benchmark.test.ts +189 -0
- package/tests/performance/cli-latency.test.ts +102 -0
- package/tests/pr-comment.test.ts +313 -0
- package/tests/prediction.test.ts +296 -0
- package/tests/runtime-analyzer.test.ts +375 -0
- package/tests/runtime.test.ts +205 -0
- package/tests/scanner.test.ts +122 -0
- package/tests/template-conformance.test.ts +526 -0
- package/tests/unit/cost-calculator.test.ts +303 -0
- package/tests/unit/credits.test.ts +180 -0
- package/tests/unit/inference-map.test.ts +276 -0
- package/tests/unit/schema.test.ts +300 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +14 -0
package/src/types.ts
ADDED
|
@@ -0,0 +1,873 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
// =============================================================================
|
|
4
|
+
// ENUMS
|
|
5
|
+
// =============================================================================
|
|
6
|
+
|
|
7
|
+
export const Provider = z.enum([
|
|
8
|
+
'openai', 'anthropic', 'google', 'cohere', 'mistral',
|
|
9
|
+
'bedrock', 'azure_openai', 'together', 'fireworks',
|
|
10
|
+
'groq', 'replicate', 'perplexity',
|
|
11
|
+
'vllm', 'sglang', 'tgi', 'ollama', 'llamacpp',
|
|
12
|
+
'unknown'
|
|
13
|
+
]);
|
|
14
|
+
|
|
15
|
+
export const Severity = z.enum(['critical', 'warning', 'info']);
|
|
16
|
+
|
|
17
|
+
export const Category = z.enum([
|
|
18
|
+
'cost', 'latency', 'drift', 'reliability', 'waste', 'throughput', 'security', 'best-practice'
|
|
19
|
+
]);
|
|
20
|
+
|
|
21
|
+
// =============================================================================
|
|
22
|
+
// STATIC ANALYSIS
|
|
23
|
+
// =============================================================================
|
|
24
|
+
|
|
25
|
+
export const Patterns = z.object({
|
|
26
|
+
streaming: z.boolean().optional(),
|
|
27
|
+
batching: z.boolean().optional(),
|
|
28
|
+
retries: z.boolean().optional(),
|
|
29
|
+
caching: z.boolean().optional(),
|
|
30
|
+
fallback: z.boolean().optional(),
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
export const Callsite = z.object({
|
|
34
|
+
id: z.string(),
|
|
35
|
+
file: z.string(),
|
|
36
|
+
line: z.number(),
|
|
37
|
+
provider: Provider.nullable(),
|
|
38
|
+
model: z.string().nullable(),
|
|
39
|
+
framework: z.string().nullable(),
|
|
40
|
+
runtime: z.string().nullable(),
|
|
41
|
+
patterns: Patterns,
|
|
42
|
+
confidence: z.number().min(0).max(1),
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
export const ScanCandidate = z.object({
|
|
46
|
+
file: z.string(),
|
|
47
|
+
line: z.number(),
|
|
48
|
+
snippet: z.string(),
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
export const ScannedFile = z.object({
|
|
52
|
+
path: z.string(),
|
|
53
|
+
language: z.string(),
|
|
54
|
+
loc: z.number(),
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
export const ScanResult = z.object({
|
|
58
|
+
root: z.string(),
|
|
59
|
+
files: z.array(ScannedFile),
|
|
60
|
+
candidates: z.array(ScanCandidate),
|
|
61
|
+
summary: z.object({
|
|
62
|
+
totalFiles: z.number(),
|
|
63
|
+
totalLoc: z.number(),
|
|
64
|
+
languages: z.array(z.string()),
|
|
65
|
+
totalCandidates: z.number(),
|
|
66
|
+
// v1.9.5: Cost optimization stats
|
|
67
|
+
skippedLargeFiles: z.number().optional(),
|
|
68
|
+
skippedByPattern: z.number().optional(),
|
|
69
|
+
}),
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
export const InferenceMap = z.object({
|
|
73
|
+
version: z.string(),
|
|
74
|
+
root: z.string(),
|
|
75
|
+
generatedAt: z.string(),
|
|
76
|
+
// Report metadata
|
|
77
|
+
metadata: z.object({
|
|
78
|
+
absolutePath: z.string(), // Full absolute path analyzed
|
|
79
|
+
promptId: z.string().optional(), // Which analysis prompt was used
|
|
80
|
+
promptVersion: z.string().optional(), // Analysis prompt version
|
|
81
|
+
templatesVersion: z.string().optional(), // peakinfer-templates version
|
|
82
|
+
llmProvider: z.string().optional(), // LLM provider used (anthropic, none)
|
|
83
|
+
llmModel: z.string().optional(), // LLM model used for analysis
|
|
84
|
+
}).optional(),
|
|
85
|
+
summary: z.object({
|
|
86
|
+
totalCallsites: z.number(),
|
|
87
|
+
providers: z.array(z.string()),
|
|
88
|
+
models: z.array(z.string()),
|
|
89
|
+
patterns: z.record(z.number()),
|
|
90
|
+
}),
|
|
91
|
+
callsites: z.array(Callsite),
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
// =============================================================================
|
|
95
|
+
// RUNTIME ANALYSIS
|
|
96
|
+
// =============================================================================
|
|
97
|
+
|
|
98
|
+
export const InferenceEvent = z.object({
|
|
99
|
+
id: z.string(),
|
|
100
|
+
ts: z.string(),
|
|
101
|
+
provider: Provider,
|
|
102
|
+
model: z.string(),
|
|
103
|
+
input_tokens: z.number(),
|
|
104
|
+
output_tokens: z.number(),
|
|
105
|
+
latency_ms: z.number(),
|
|
106
|
+
intent: z.string().optional(),
|
|
107
|
+
callsite_id: z.string().optional(),
|
|
108
|
+
// Runtime pattern fields for drift detection
|
|
109
|
+
streaming: z.boolean().optional(), // Was this a streaming request?
|
|
110
|
+
ttft_ms: z.number().optional(), // Time to first token (streaming only)
|
|
111
|
+
batch_size: z.number().optional(), // If part of a batch, how many requests?
|
|
112
|
+
batch_id: z.string().optional(), // Group ID for batched requests
|
|
113
|
+
cached: z.boolean().optional(), // Was response served from cache?
|
|
114
|
+
retry_count: z.number().optional(), // Number of retries before success
|
|
115
|
+
fallback_used: z.boolean().optional(), // Was a fallback provider/model used?
|
|
116
|
+
original_model: z.string().optional(), // If fallback, what was the original model?
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
export const ProviderStats = z.object({
|
|
120
|
+
calls: z.number(),
|
|
121
|
+
tokens_in: z.number(),
|
|
122
|
+
tokens_out: z.number(),
|
|
123
|
+
latency_p50: z.number(),
|
|
124
|
+
latency_p95: z.number(),
|
|
125
|
+
latency_p99: z.number(),
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
export const RuntimeSummary = z.object({
|
|
129
|
+
totalEvents: z.number(),
|
|
130
|
+
byProvider: z.record(ProviderStats),
|
|
131
|
+
byModel: z.record(ProviderStats),
|
|
132
|
+
global: z.object({
|
|
133
|
+
p50: z.number(),
|
|
134
|
+
p95: z.number(),
|
|
135
|
+
p99: z.number(),
|
|
136
|
+
}),
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
// =============================================================================
|
|
140
|
+
// JOINED OUTPUT
|
|
141
|
+
// =============================================================================
|
|
142
|
+
|
|
143
|
+
export const UsageStats = z.object({
|
|
144
|
+
calls: z.number(),
|
|
145
|
+
tokens_in: z.number(),
|
|
146
|
+
tokens_out: z.number(),
|
|
147
|
+
latency_p50: z.number(),
|
|
148
|
+
latency_p95: z.number(),
|
|
149
|
+
latency_p99: z.number(),
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
export const DriftSignal = z.object({
|
|
153
|
+
type: z.enum(['codeOnly', 'runtimeOnly', 'mismatch', 'patternDrift']),
|
|
154
|
+
provider: z.string().optional(),
|
|
155
|
+
model: z.string().optional(),
|
|
156
|
+
callsiteId: z.string().optional(),
|
|
157
|
+
message: z.string(),
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
export const EnrichedCallsite = Callsite.extend({
|
|
161
|
+
usage: UsageStats.optional(),
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
export const JoinedOutput = z.object({
|
|
165
|
+
callsites: z.array(EnrichedCallsite),
|
|
166
|
+
codeOnly: z.array(Callsite),
|
|
167
|
+
runtimeOnly: z.array(InferenceEvent),
|
|
168
|
+
drift: z.array(DriftSignal),
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// =============================================================================
|
|
172
|
+
// TEMPLATES & INSIGHTS
|
|
173
|
+
// =============================================================================
|
|
174
|
+
|
|
175
|
+
export const TemplateCondition = z.object({
|
|
176
|
+
field: z.string(),
|
|
177
|
+
op: z.enum(['eq', 'neq', 'gt', 'lt', 'gte', 'lte', 'exists', 'in', 'ratio_gt', 'ratio_lt', 'has_pattern']),
|
|
178
|
+
value: z.union([z.string(), z.number(), z.boolean(), z.array(z.string())]).optional(),
|
|
179
|
+
compare_to: z.string().optional(),
|
|
180
|
+
pattern: z.string().optional(),
|
|
181
|
+
count_gt: z.number().optional(),
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
export const InsightTemplate = z.object({
|
|
185
|
+
id: z.string(),
|
|
186
|
+
name: z.string(),
|
|
187
|
+
version: z.string(),
|
|
188
|
+
category: Category,
|
|
189
|
+
severity: Severity,
|
|
190
|
+
layer: z.enum(['application', 'api', 'gateway', 'runtime', 'model', 'hardware']).optional(), // v1.8: 6-layer architecture
|
|
191
|
+
match: z.object({
|
|
192
|
+
scope: z.enum(['callsite', 'joined', 'global', 'envelope']),
|
|
193
|
+
conditions: z.array(TemplateCondition),
|
|
194
|
+
}),
|
|
195
|
+
output: z.object({
|
|
196
|
+
headline: z.string(),
|
|
197
|
+
evidence: z.string(),
|
|
198
|
+
}),
|
|
199
|
+
defaults: z.record(z.number()).optional(),
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
// =============================================================================
|
|
203
|
+
// COMMUNITY OPTIMIZATION TEMPLATES (v1.8 - Inference Squeeze Guide)
|
|
204
|
+
// =============================================================================
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Optimization template category - matches Inference Squeeze Guide structure
|
|
208
|
+
*/
|
|
209
|
+
export const OptimizationCategory = z.enum([
|
|
210
|
+
'runtime_optimization', // PyTorch to ONNX, vLLM, TensorRT
|
|
211
|
+
'batching_optimization', // Continuous batching, batch sizing
|
|
212
|
+
'memory_optimization', // Quantization, KV cache
|
|
213
|
+
'application_optimization', // Model routing, context management
|
|
214
|
+
'cost_optimization', // Budget controls, cost allocation
|
|
215
|
+
'monitoring', // APM, quality monitoring, A/B testing
|
|
216
|
+
'scaling', // Auto-scaling, multi-GPU
|
|
217
|
+
]);
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Risk level for optimization implementation
|
|
221
|
+
*/
|
|
222
|
+
export const OptimizationRiskLevel = z.enum(['low', 'medium', 'high']);
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Implementation step with validation and rollback
|
|
226
|
+
*/
|
|
227
|
+
export const ImplementationStep = z.object({
|
|
228
|
+
step_id: z.string(),
|
|
229
|
+
name: z.string(),
|
|
230
|
+
executable: z.boolean().optional(),
|
|
231
|
+
commands: z.array(z.string()).optional(),
|
|
232
|
+
validation: z.object({
|
|
233
|
+
command: z.string().optional(),
|
|
234
|
+
success_criteria: z.string().optional(),
|
|
235
|
+
rollback_command: z.string().optional(),
|
|
236
|
+
}).optional(),
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Monitoring metric configuration
|
|
241
|
+
*/
|
|
242
|
+
export const MonitoringMetric = z.object({
|
|
243
|
+
metric: z.string(),
|
|
244
|
+
target: z.string(),
|
|
245
|
+
alert_threshold: z.string(),
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Rollback trigger configuration
|
|
250
|
+
*/
|
|
251
|
+
export const RollbackTrigger = z.object({
|
|
252
|
+
condition: z.string(),
|
|
253
|
+
action: z.string(),
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Community Optimization Template - runbook-style templates from Inference Squeeze Guide
|
|
258
|
+
* These templates provide step-by-step implementation guides with ROI estimates
|
|
259
|
+
*/
|
|
260
|
+
export const OptimizationTemplate = z.object({
|
|
261
|
+
id: z.string(),
|
|
262
|
+
name: z.string(),
|
|
263
|
+
description: z.string(),
|
|
264
|
+
category: OptimizationCategory,
|
|
265
|
+
confidence: z.number().min(0).max(1),
|
|
266
|
+
success_count: z.number().optional(),
|
|
267
|
+
verified_environments: z.number().optional(),
|
|
268
|
+
contributors: z.array(z.string()).optional(),
|
|
269
|
+
last_updated: z.string().optional(),
|
|
270
|
+
|
|
271
|
+
// Environment matching criteria
|
|
272
|
+
environment_match: z.record(z.union([z.string(), z.boolean(), z.array(z.string())])).optional(),
|
|
273
|
+
|
|
274
|
+
// Optimization details
|
|
275
|
+
optimization: z.object({
|
|
276
|
+
technique: z.string(),
|
|
277
|
+
expected_cost_reduction: z.string().optional(),
|
|
278
|
+
expected_latency_improvement: z.string().optional(),
|
|
279
|
+
expected_throughput_improvement: z.string().optional(),
|
|
280
|
+
expected_memory_reduction: z.string().optional(),
|
|
281
|
+
expected_quality_retention: z.string().optional(),
|
|
282
|
+
effort_estimate: z.string(),
|
|
283
|
+
risk_level: OptimizationRiskLevel,
|
|
284
|
+
}),
|
|
285
|
+
|
|
286
|
+
// Economics and ROI
|
|
287
|
+
economics: z.object({
|
|
288
|
+
baseline_calculation: z.record(z.union([z.string(), z.number()])).optional(),
|
|
289
|
+
projected_improvement: z.record(z.union([z.string(), z.number()])).optional(),
|
|
290
|
+
projected_savings: z.record(z.union([z.string(), z.number()])).optional(),
|
|
291
|
+
implementation_cost: z.object({
|
|
292
|
+
engineering_hours: z.number().optional(),
|
|
293
|
+
hourly_rate: z.number().optional(),
|
|
294
|
+
compute_hours: z.number().optional(),
|
|
295
|
+
total_cost: z.number(),
|
|
296
|
+
}).optional(),
|
|
297
|
+
roi_calculation: z.record(z.string()).optional(),
|
|
298
|
+
}).optional(),
|
|
299
|
+
|
|
300
|
+
// Implementation steps
|
|
301
|
+
implementation: z.object({
|
|
302
|
+
prerequisites: z.array(z.object({
|
|
303
|
+
requirement: z.string(),
|
|
304
|
+
validation_command: z.string().optional(),
|
|
305
|
+
})).optional(),
|
|
306
|
+
automated_steps: z.array(ImplementationStep).optional(),
|
|
307
|
+
}).optional(),
|
|
308
|
+
|
|
309
|
+
// Monitoring configuration
|
|
310
|
+
monitoring: z.object({
|
|
311
|
+
key_metrics: z.array(MonitoringMetric).optional(),
|
|
312
|
+
rollback_triggers: z.array(RollbackTrigger).optional(),
|
|
313
|
+
}).optional(),
|
|
314
|
+
|
|
315
|
+
// Historical results
|
|
316
|
+
results: z.object({
|
|
317
|
+
recent_implementations: z.array(z.record(z.union([z.string(), z.number()]))).optional(),
|
|
318
|
+
}).optional(),
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
// Stack layers for impact analysis (TDD v1.7 - 6-layer architecture)
|
|
322
|
+
export const StackLayer = z.enum([
|
|
323
|
+
'application', // Code patterns: streaming-drift, overpowered-model, cost-concentration
|
|
324
|
+
'api', // API layer: retry-explosion, untested-fallback, rate limiting
|
|
325
|
+
'gateway', // Gateway/proxy layer: caching, load balancing, routing
|
|
326
|
+
'runtime', // Inference engines: vLLM, sglang, TGI optimizations
|
|
327
|
+
'model', // Model selection: GPT-4 vs GPT-3.5, context-accumulation, token-underutilization
|
|
328
|
+
'hardware', // Hardware layer: GPU optimization, memory management
|
|
329
|
+
]);
|
|
330
|
+
|
|
331
|
+
// Impact metrics
|
|
332
|
+
export const ImpactType = z.enum(['cost', 'latency', 'throughput']);
|
|
333
|
+
|
|
334
|
+
// Effort level for implementing the change
|
|
335
|
+
export const EffortLevel = z.enum(['low', 'medium', 'high']);
|
|
336
|
+
|
|
337
|
+
// Impact estimation for each insight
|
|
338
|
+
export const ImpactEstimate = z.object({
|
|
339
|
+
layer: StackLayer,
|
|
340
|
+
impactType: ImpactType,
|
|
341
|
+
estimatedImpactPercent: z.number().min(0).max(100), // 0-100% improvement
|
|
342
|
+
effort: EffortLevel,
|
|
343
|
+
annualSavingsUSD: z.number().optional(), // Estimated annual savings in USD
|
|
344
|
+
latencyReductionMs: z.number().optional(), // Estimated latency improvement
|
|
345
|
+
throughputGainPercent: z.number().optional(), // Estimated throughput improvement
|
|
346
|
+
confidence: z.number().min(0).max(1).optional(), // Confidence in estimate (0-1)
|
|
347
|
+
assumptions: z.string().optional(), // Key assumptions for this estimate
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
export const Insight = z.object({
|
|
351
|
+
id: z.string().optional(), // Unique insight ID
|
|
352
|
+
severity: Severity,
|
|
353
|
+
category: Category,
|
|
354
|
+
templateId: z.string().optional(), // Optional for LLM-generated insights
|
|
355
|
+
headline: z.string(),
|
|
356
|
+
evidence: z.string(),
|
|
357
|
+
location: z.string().optional(),
|
|
358
|
+
recommendation: z.string().optional(), // Actionable suggestion
|
|
359
|
+
source: z.enum(['template', 'llm']).optional(), // 'template' = pattern-based, 'llm' = semantic analysis
|
|
360
|
+
// Impact estimation fields
|
|
361
|
+
impact: ImpactEstimate.optional(), // Estimated impact of implementing this recommendation
|
|
362
|
+
// CodeRabbit-style fix fields (v1.6 - LLM-generated)
|
|
363
|
+
originalCode: z.string().optional(), // Exact code line(s) that need to change
|
|
364
|
+
suggestedFix: z.string().optional(), // Complete replacement code
|
|
365
|
+
aiAgentPrompt: z.string().optional(), // Instructions for AI agents like Copilot
|
|
366
|
+
fullLineFix: z.string().optional(), // Full line replacement for suggestion syntax
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
// =============================================================================
|
|
370
|
+
// INFERENCE MAX ENVELOPES
|
|
371
|
+
// =============================================================================
|
|
372
|
+
|
|
373
|
+
export const PerformanceEnvelope = z.object({
|
|
374
|
+
ttft_p50_ms: z.number(),
|
|
375
|
+
ttft_p95_ms: z.number(),
|
|
376
|
+
tps_median: z.number(),
|
|
377
|
+
tps_peak: z.number(),
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
// =============================================================================
|
|
381
|
+
// AGENT PLANNING
|
|
382
|
+
// =============================================================================
|
|
383
|
+
|
|
384
|
+
export const TaskType = z.enum([
|
|
385
|
+
'scan', 'analyze', 'parse_events', 'join',
|
|
386
|
+
'load_templates', 'generate_insights', 'render', 'generate_html', 'generate_pdf', 'save_artifacts',
|
|
387
|
+
'save_history', // v1.5: Save run to history for comparison/prediction
|
|
388
|
+
'compare', // v1.5: Compare with previous run
|
|
389
|
+
'predict', // v1.5: Generate deploy-time predictions
|
|
390
|
+
'counterfactuals', // v1.5: Generate what-if optimization scenarios
|
|
391
|
+
]);
|
|
392
|
+
|
|
393
|
+
export const PlannedTask = z.object({
|
|
394
|
+
id: z.number(),
|
|
395
|
+
type: TaskType,
|
|
396
|
+
description: z.string(),
|
|
397
|
+
depends_on: z.array(z.number()).optional(),
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
export const ExecutionPlan = z.object({
|
|
401
|
+
mode: z.enum(['static', 'runtime', 'combined']),
|
|
402
|
+
tasks: z.array(PlannedTask),
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
export const TaskResult = z.object({
|
|
406
|
+
taskId: z.number(),
|
|
407
|
+
status: z.enum(['success', 'failed', 'skipped']),
|
|
408
|
+
error: z.string().optional(),
|
|
409
|
+
durationMs: z.number(),
|
|
410
|
+
});
|
|
411
|
+
|
|
412
|
+
// =============================================================================
|
|
413
|
+
// TYPE EXPORTS
|
|
414
|
+
// =============================================================================
|
|
415
|
+
|
|
416
|
+
export type Provider = z.infer<typeof Provider>;
|
|
417
|
+
export type Severity = z.infer<typeof Severity>;
|
|
418
|
+
export type Category = z.infer<typeof Category>;
|
|
419
|
+
export type Patterns = z.infer<typeof Patterns>;
|
|
420
|
+
export type CallsitePatterns = Patterns; // Alias for analyzer
|
|
421
|
+
export type Callsite = z.infer<typeof Callsite>;
|
|
422
|
+
export type ScanCandidate = z.infer<typeof ScanCandidate>;
|
|
423
|
+
export type ScannedFile = z.infer<typeof ScannedFile>;
|
|
424
|
+
export type ScanResult = z.infer<typeof ScanResult>;
|
|
425
|
+
export type InferenceMap = z.infer<typeof InferenceMap>;
|
|
426
|
+
export type InferenceEvent = z.infer<typeof InferenceEvent>;
|
|
427
|
+
export type ProviderStats = z.infer<typeof ProviderStats>;
|
|
428
|
+
export type RuntimeSummary = z.infer<typeof RuntimeSummary>;
|
|
429
|
+
export type UsageStats = z.infer<typeof UsageStats>;
|
|
430
|
+
export type DriftSignal = z.infer<typeof DriftSignal>;
|
|
431
|
+
export type EnrichedCallsite = z.infer<typeof EnrichedCallsite>;
|
|
432
|
+
export type JoinedOutput = z.infer<typeof JoinedOutput>;
|
|
433
|
+
export type TemplateCondition = z.infer<typeof TemplateCondition>;
|
|
434
|
+
export type InsightTemplate = z.infer<typeof InsightTemplate>;
|
|
435
|
+
export type OptimizationTemplate = z.infer<typeof OptimizationTemplate>;
|
|
436
|
+
export type OptimizationCategory = z.infer<typeof OptimizationCategory>;
|
|
437
|
+
export type OptimizationRiskLevel = z.infer<typeof OptimizationRiskLevel>;
|
|
438
|
+
export type StackLayer = z.infer<typeof StackLayer>;
|
|
439
|
+
export type ImpactType = z.infer<typeof ImpactType>;
|
|
440
|
+
export type EffortLevel = z.infer<typeof EffortLevel>;
|
|
441
|
+
export type ImpactEstimate = z.infer<typeof ImpactEstimate>;
|
|
442
|
+
export type Insight = z.infer<typeof Insight>;
|
|
443
|
+
export type PerformanceEnvelope = z.infer<typeof PerformanceEnvelope>;
|
|
444
|
+
export type TaskType = z.infer<typeof TaskType>;
|
|
445
|
+
export type PlannedTask = z.infer<typeof PlannedTask>;
|
|
446
|
+
export type ExecutionPlan = z.infer<typeof ExecutionPlan>;
|
|
447
|
+
export type TaskResult = z.infer<typeof TaskResult>;
|
|
448
|
+
|
|
449
|
+
// =============================================================================
|
|
450
|
+
// FORMAT DETECTION & NORMALIZATION (PRD §6.4)
|
|
451
|
+
// =============================================================================
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Supported format types for runtime event files.
|
|
455
|
+
* Direct-parse formats are handled without LLM, agent-normalized formats require semantic analysis.
|
|
456
|
+
*/
|
|
457
|
+
export const FormatType = z.enum([
|
|
458
|
+
// Direct-parse formats (no LLM needed)
|
|
459
|
+
'jsonl', // Newline-delimited JSON with InferenceEvent schema
|
|
460
|
+
'json_array', // JSON array of InferenceEvent objects
|
|
461
|
+
'csv', // CSV with standard column names
|
|
462
|
+
'tsv', // TSV with standard column names
|
|
463
|
+
|
|
464
|
+
// Agent-normalized formats (require semantic analysis)
|
|
465
|
+
'otel', // OpenTelemetry OTLP traces/spans
|
|
466
|
+
'jaeger', // Jaeger distributed tracing format
|
|
467
|
+
'zipkin', // Zipkin tracing format
|
|
468
|
+
'langsmith', // LangSmith trace exports
|
|
469
|
+
'helicone', // Helicone proxy logs
|
|
470
|
+
'wandb', // Weights & Biases inference logs
|
|
471
|
+
'litellm', // LiteLLM proxy event logs
|
|
472
|
+
'portkey', // Portkey gateway logs
|
|
473
|
+
|
|
474
|
+
// Inferred formats (heuristic detection)
|
|
475
|
+
'custom_json', // Unknown JSON structure requiring field mapping
|
|
476
|
+
'custom_text', // Structured text logs
|
|
477
|
+
'unknown', // Could not determine format
|
|
478
|
+
]);
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* Extraction strategy for a field mapping.
|
|
482
|
+
*/
|
|
483
|
+
export const ExtractionType = z.enum([
|
|
484
|
+
'direct', // Direct field access (e.g., obj.field)
|
|
485
|
+
'jsonpath', // JSONPath expression
|
|
486
|
+
'column', // CSV/TSV column name
|
|
487
|
+
'regex', // Regular expression extraction
|
|
488
|
+
'computed', // Computed from other fields (e.g., latency = end - start)
|
|
489
|
+
'constant', // Fixed value for all events
|
|
490
|
+
]);
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Transformation to apply after extraction.
|
|
494
|
+
*/
|
|
495
|
+
export const TransformType = z.enum([
|
|
496
|
+
'none', // No transformation
|
|
497
|
+
'unix_ms_to_iso', // Unix milliseconds to ISO timestamp
|
|
498
|
+
'unix_s_to_iso', // Unix seconds to ISO timestamp
|
|
499
|
+
'unix_nano_to_iso', // Unix nanoseconds to ISO timestamp
|
|
500
|
+
'duration_to_ms', // Duration string (e.g., "1.5s") to milliseconds
|
|
501
|
+
'parse_int', // String to integer
|
|
502
|
+
'parse_float', // String to float
|
|
503
|
+
'lowercase', // Lowercase string
|
|
504
|
+
'provider_normalize', // Normalize provider names (e.g., "OpenAI" -> "openai")
|
|
505
|
+
]);
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Field mapping from source format to InferenceEvent schema.
|
|
509
|
+
*/
|
|
510
|
+
export const FieldMapping = z.object({
|
|
511
|
+
target: z.string(), // InferenceEvent field name
|
|
512
|
+
source_path: z.string(), // JSONPath, column name, regex, or expression
|
|
513
|
+
extraction_type: ExtractionType,
|
|
514
|
+
transform: TransformType.optional().default('none'),
|
|
515
|
+
confidence: z.number().min(0).max(1), // Confidence in this mapping (0-1)
|
|
516
|
+
evidence: z.string().optional(), // Why this mapping was chosen
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
/**
|
|
520
|
+
* Result of format detection.
|
|
521
|
+
*/
|
|
522
|
+
export const FormatDetectionResult = z.object({
|
|
523
|
+
format_type: FormatType,
|
|
524
|
+
confidence: z.number().min(0).max(1), // Overall detection confidence
|
|
525
|
+
evidence: z.string(), // Explanation of detection
|
|
526
|
+
sample_size: z.number(), // Number of lines/records sampled
|
|
527
|
+
requires_agent: z.boolean(), // Whether agent normalization is needed
|
|
528
|
+
});
|
|
529
|
+
|
|
530
|
+
/**
|
|
531
|
+
* Complete normalization result with field mappings.
|
|
532
|
+
*/
|
|
533
|
+
export const NormalizationResult = z.object({
|
|
534
|
+
detection: FormatDetectionResult,
|
|
535
|
+
mappings: z.array(FieldMapping),
|
|
536
|
+
unmapped_fields: z.array(z.string()), // Source fields not mapped
|
|
537
|
+
warnings: z.array(z.string()), // Issues encountered during normalization
|
|
538
|
+
audit: z.object({
|
|
539
|
+
normalized_at: z.string(), // ISO timestamp
|
|
540
|
+
agent_used: z.boolean(),
|
|
541
|
+
codebase_context_used: z.boolean(),
|
|
542
|
+
llm_model: z.string().optional(),
|
|
543
|
+
}),
|
|
544
|
+
});
|
|
545
|
+
|
|
546
|
+
/**
|
|
547
|
+
* Options for format normalization.
|
|
548
|
+
*/
|
|
549
|
+
export const NormalizationOptions = z.object({
|
|
550
|
+
format_hint: FormatType.optional(), // User-provided format hint
|
|
551
|
+
field_hints: z.record(z.string()).optional(), // User-provided field mappings
|
|
552
|
+
lenient: z.boolean().optional(), // Accept low-confidence mappings
|
|
553
|
+
strict: z.boolean().optional(), // Fail on missing required fields
|
|
554
|
+
codebase_context: z.any().optional(), // ScanResult for codebase-aware normalization
|
|
555
|
+
});
|
|
556
|
+
|
|
557
|
+
// Type exports for format detection
|
|
558
|
+
export type FormatType = z.infer<typeof FormatType>;
|
|
559
|
+
export type ExtractionType = z.infer<typeof ExtractionType>;
|
|
560
|
+
export type TransformType = z.infer<typeof TransformType>;
|
|
561
|
+
export type FieldMapping = z.infer<typeof FieldMapping>;
|
|
562
|
+
export type FormatDetectionResult = z.infer<typeof FormatDetectionResult>;
|
|
563
|
+
export type NormalizationResult = z.infer<typeof NormalizationResult>;
|
|
564
|
+
export type NormalizationOptions = z.infer<typeof NormalizationOptions>;
|
|
565
|
+
|
|
566
|
+
// =============================================================================
|
|
567
|
+
// HISTORY STORAGE (v1.5)
|
|
568
|
+
// =============================================================================
|
|
569
|
+
|
|
570
|
+
/**
|
|
571
|
+
* Analysis type for categorizing runs.
|
|
572
|
+
*/
|
|
573
|
+
export const AnalysisType = z.enum(['static', 'runtime', 'combined']);
|
|
574
|
+
|
|
575
|
+
/**
|
|
576
|
+
* History manifest for tracking analysis runs over time.
|
|
577
|
+
* Distinct from runid.ts RunManifest which focuses on caching/resumability.
|
|
578
|
+
* This schema enables historical comparison and deploy-time prediction features.
|
|
579
|
+
*/
|
|
580
|
+
export const HistoryManifest = z.object({
|
|
581
|
+
runId: z.string(), // Unique run identifier
|
|
582
|
+
timestamp: z.string().datetime(), // ISO timestamp when analysis completed
|
|
583
|
+
path: z.string(), // Analyzed path (absolute, for matching)
|
|
584
|
+
pathHash: z.string(), // Hash of normalized path for efficient lookup
|
|
585
|
+
analysisType: AnalysisType, // Type of analysis performed
|
|
586
|
+
version: z.string(), // PeakInfer version that produced this run
|
|
587
|
+
|
|
588
|
+
// Summary metrics for quick comparison
|
|
589
|
+
inferencePointCount: z.number(), // Number of inference points detected
|
|
590
|
+
eventCount: z.number().optional(), // Number of runtime events (if runtime/combined)
|
|
591
|
+
driftCount: z.number().optional(), // Number of drift signals (if combined)
|
|
592
|
+
insightCount: z.number().optional(), // Number of insights generated
|
|
593
|
+
|
|
594
|
+
// Performance context
|
|
595
|
+
durationMs: z.number().optional(), // Analysis duration in milliseconds
|
|
596
|
+
|
|
597
|
+
// Artifact paths relative to history directory
|
|
598
|
+
artifacts: z.object({
|
|
599
|
+
inferenceMap: z.string().optional(), // inference-map.json
|
|
600
|
+
analysis: z.string().optional(), // analysis.json (full results)
|
|
601
|
+
html: z.string().optional(), // report.html
|
|
602
|
+
pdf: z.string().optional(), // report.pdf
|
|
603
|
+
}).optional(),
|
|
604
|
+
});
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Index of all historical runs for a project path.
|
|
608
|
+
* Stored at .peakinfer/history/index.json
|
|
609
|
+
*/
|
|
610
|
+
export const HistoryIndex = z.object({
|
|
611
|
+
version: z.string(), // History format version
|
|
612
|
+
lastUpdated: z.string().datetime(), // Last index update
|
|
613
|
+
runs: z.array(z.object({
|
|
614
|
+
runId: z.string(),
|
|
615
|
+
timestamp: z.string().datetime(),
|
|
616
|
+
pathHash: z.string(),
|
|
617
|
+
analysisType: AnalysisType,
|
|
618
|
+
inferencePointCount: z.number(),
|
|
619
|
+
})),
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
// Type exports for history
|
|
623
|
+
export type AnalysisType = z.infer<typeof AnalysisType>;
|
|
624
|
+
export type HistoryManifest = z.infer<typeof HistoryManifest>;
|
|
625
|
+
export type HistoryIndex = z.infer<typeof HistoryIndex>;
|
|
626
|
+
|
|
627
|
+
// =============================================================================
|
|
628
|
+
// HISTORICAL COMPARISON (v1.5)
|
|
629
|
+
// =============================================================================
|
|
630
|
+
|
|
631
|
+
/**
|
|
632
|
+
* Change type for tracking what changed between runs.
|
|
633
|
+
*/
|
|
634
|
+
export const ChangeType = z.enum(['added', 'removed', 'modified']);
|
|
635
|
+
|
|
636
|
+
/**
|
|
637
|
+
* A single field change within an inference point.
|
|
638
|
+
*/
|
|
639
|
+
export const FieldChange = z.object({
|
|
640
|
+
field: z.string(), // Field name that changed
|
|
641
|
+
before: z.unknown(), // Previous value
|
|
642
|
+
after: z.unknown(), // New value
|
|
643
|
+
});
|
|
644
|
+
|
|
645
|
+
/**
|
|
646
|
+
* An inference point that changed between runs.
|
|
647
|
+
*/
|
|
648
|
+
export const ChangedInferencePoint = z.object({
|
|
649
|
+
point: Callsite, // The inference point
|
|
650
|
+
changes: z.array(FieldChange), // List of field changes
|
|
651
|
+
});
|
|
652
|
+
|
|
653
|
+
/**
|
|
654
|
+
* Result of comparing two analysis runs.
|
|
655
|
+
* Enables "what changed" insights for pre-deploy validation.
|
|
656
|
+
*/
|
|
657
|
+
export const ComparisonResult = z.object({
|
|
658
|
+
baseRunId: z.string(), // The baseline run ID
|
|
659
|
+
baseTimestamp: z.string().datetime(), // When baseline was created
|
|
660
|
+
currentRunId: z.string(), // The current run ID
|
|
661
|
+
currentTimestamp: z.string().datetime(), // When current was created
|
|
662
|
+
|
|
663
|
+
// Inference point changes
|
|
664
|
+
added: z.array(Callsite), // New inference points
|
|
665
|
+
removed: z.array(Callsite), // Removed inference points
|
|
666
|
+
changed: z.array(ChangedInferencePoint), // Modified inference points
|
|
667
|
+
|
|
668
|
+
// Summary metrics
|
|
669
|
+
metrics: z.object({
|
|
670
|
+
totalBefore: z.number(), // Inference points in baseline
|
|
671
|
+
totalAfter: z.number(), // Inference points in current
|
|
672
|
+
addedCount: z.number(), // Count of added points
|
|
673
|
+
removedCount: z.number(), // Count of removed points
|
|
674
|
+
changedCount: z.number(), // Count of modified points
|
|
675
|
+
netChange: z.number(), // Net change (added - removed)
|
|
676
|
+
}),
|
|
677
|
+
|
|
678
|
+
// Insight deltas
|
|
679
|
+
insightDeltas: z.object({
|
|
680
|
+
newCritical: z.number(), // New critical insights
|
|
681
|
+
resolvedCritical: z.number(), // Resolved critical insights
|
|
682
|
+
newWarnings: z.number(), // New warnings
|
|
683
|
+
resolvedWarnings: z.number(), // Resolved warnings
|
|
684
|
+
}).optional(),
|
|
685
|
+
});
|
|
686
|
+
|
|
687
|
+
// Type exports for comparison
|
|
688
|
+
export type ChangeType = z.infer<typeof ChangeType>;
|
|
689
|
+
export type FieldChange = z.infer<typeof FieldChange>;
|
|
690
|
+
export type ChangedInferencePoint = z.infer<typeof ChangedInferencePoint>;
|
|
691
|
+
export type ComparisonResult = z.infer<typeof ComparisonResult>;
|
|
692
|
+
|
|
693
|
+
// =============================================================================
|
|
694
|
+
// DEPLOY-TIME PREDICTION (v1.5)
|
|
695
|
+
// =============================================================================
|
|
696
|
+
|
|
697
|
+
/**
|
|
698
|
+
* Risk level for predictions.
|
|
699
|
+
*/
|
|
700
|
+
export const RiskLevel = z.enum(['high', 'medium', 'low', 'neutral']);
|
|
701
|
+
|
|
702
|
+
/**
|
|
703
|
+
* Impact direction for a prediction factor.
|
|
704
|
+
*/
|
|
705
|
+
export const ImpactDirection = z.enum(['positive', 'negative', 'neutral']);
|
|
706
|
+
|
|
707
|
+
/**
|
|
708
|
+
* A factor contributing to a latency prediction.
|
|
709
|
+
*/
|
|
710
|
+
export const PredictionFactor = z.object({
|
|
711
|
+
name: z.string(), // Factor name (e.g., "model complexity")
|
|
712
|
+
impact: ImpactDirection, // How it affects latency
|
|
713
|
+
description: z.string(), // Human-readable explanation
|
|
714
|
+
weight: z.number().min(0).max(1).optional(), // Relative importance (0-1)
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
/**
|
|
718
|
+
* Latency percentile values.
|
|
719
|
+
*/
|
|
720
|
+
export const LatencyPercentiles = z.object({
|
|
721
|
+
p50: z.number(), // Median latency (ms)
|
|
722
|
+
p95: z.number(), // 95th percentile (ms)
|
|
723
|
+
p99: z.number(), // 99th percentile (ms)
|
|
724
|
+
});
|
|
725
|
+
|
|
726
|
+
/**
|
|
727
|
+
* Prediction for a single inference point.
|
|
728
|
+
* Surfaces potential performance risks before deployment.
|
|
729
|
+
*/
|
|
730
|
+
export const InferencePointPrediction = z.object({
|
|
731
|
+
inferencePointId: z.string(), // ID of the inference point
|
|
732
|
+
location: z.string(), // file:line location
|
|
733
|
+
provider: z.string().optional(), // Provider (e.g., openai)
|
|
734
|
+
model: z.string().optional(), // Model name
|
|
735
|
+
|
|
736
|
+
// Current performance (from historical data if available)
|
|
737
|
+
currentLatency: LatencyPercentiles.optional(),
|
|
738
|
+
|
|
739
|
+
// Predicted performance
|
|
740
|
+
predictedLatency: LatencyPercentiles,
|
|
741
|
+
|
|
742
|
+
// Risk assessment
|
|
743
|
+
risk: RiskLevel, // Overall risk level
|
|
744
|
+
riskScore: z.number().min(0).max(100), // Numeric risk score (0-100)
|
|
745
|
+
|
|
746
|
+
// Factors contributing to prediction
|
|
747
|
+
factors: z.array(PredictionFactor),
|
|
748
|
+
|
|
749
|
+
// Confidence in prediction
|
|
750
|
+
confidence: z.enum(['high', 'medium', 'low']),
|
|
751
|
+
confidenceReason: z.string().optional(), // Why confidence is high/low
|
|
752
|
+
});
|
|
753
|
+
|
|
754
|
+
/**
|
|
755
|
+
* Summary of all predictions.
|
|
756
|
+
*/
|
|
757
|
+
export const PredictionSummary = z.object({
|
|
758
|
+
totalPoints: z.number(), // Total inference points analyzed
|
|
759
|
+
highRiskCount: z.number(), // High risk predictions
|
|
760
|
+
mediumRiskCount: z.number(), // Medium risk predictions
|
|
761
|
+
lowRiskCount: z.number(), // Low risk predictions
|
|
762
|
+
averageP95: z.number(), // Average predicted p95 latency
|
|
763
|
+
worstP95: z.number(), // Worst predicted p95 latency
|
|
764
|
+
budgetExceeded: z.boolean().optional(), // True if exceeds target latency
|
|
765
|
+
});
|
|
766
|
+
|
|
767
|
+
/**
|
|
768
|
+
* Full prediction result for deploy-time analysis.
|
|
769
|
+
*/
|
|
770
|
+
export const PredictionResult = z.object({
|
|
771
|
+
predictions: z.array(InferencePointPrediction),
|
|
772
|
+
summary: PredictionSummary,
|
|
773
|
+
targetP95: z.number().optional(), // User-specified target p95 (ms)
|
|
774
|
+
generatedAt: z.string().datetime(), // When predictions were generated
|
|
775
|
+
basedOnRuns: z.number(), // Number of historical runs used
|
|
776
|
+
});
|
|
777
|
+
|
|
778
|
+
// Type exports for prediction
|
|
779
|
+
export type RiskLevel = z.infer<typeof RiskLevel>;
|
|
780
|
+
export type ImpactDirection = z.infer<typeof ImpactDirection>;
|
|
781
|
+
export type PredictionFactor = z.infer<typeof PredictionFactor>;
|
|
782
|
+
export type LatencyPercentiles = z.infer<typeof LatencyPercentiles>;
|
|
783
|
+
export type InferencePointPrediction = z.infer<typeof InferencePointPrediction>;
|
|
784
|
+
export type PredictionSummary = z.infer<typeof PredictionSummary>;
|
|
785
|
+
export type PredictionResult = z.infer<typeof PredictionResult>;
|
|
786
|
+
|
|
787
|
+
// =============================================================================
|
|
788
|
+
// COUNTERFACTUAL INSIGHTS (v1.5)
|
|
789
|
+
// =============================================================================
|
|
790
|
+
|
|
791
|
+
/**
|
|
792
|
+
* Type of counterfactual optimization scenario.
|
|
793
|
+
*/
|
|
794
|
+
export const CounterfactualType = z.enum([
|
|
795
|
+
'model_swap', // Swap to a different model (e.g., cheaper or faster)
|
|
796
|
+
'batch_optimization', // Add batching to reduce per-request overhead
|
|
797
|
+
'cache_addition', // Add caching to bypass LLM for repeated queries
|
|
798
|
+
'provider_change', // Change provider (e.g., cloud → self-hosted)
|
|
799
|
+
'streaming_enable', // Enable streaming for better perceived latency
|
|
800
|
+
]);
|
|
801
|
+
|
|
802
|
+
/**
|
|
803
|
+
* Current and proposed state for a counterfactual.
|
|
804
|
+
*/
|
|
805
|
+
export const CounterfactualState = z.object({
|
|
806
|
+
model: z.string().optional(), // Model name
|
|
807
|
+
provider: z.string().optional(), // Provider name
|
|
808
|
+
pattern: z.string().optional(), // Pattern (streaming, batching, etc.)
|
|
809
|
+
estimatedLatency: z.number(), // p95 latency estimate (ms)
|
|
810
|
+
estimatedCost: z.number(), // Cost per 1K calls ($)
|
|
811
|
+
});
|
|
812
|
+
|
|
813
|
+
/**
|
|
814
|
+
* Impact assessment for a counterfactual.
|
|
815
|
+
*/
|
|
816
|
+
export const CounterfactualImpact = z.object({
|
|
817
|
+
latencyDelta: z.number(), // Change in p95 latency (ms, negative = improvement)
|
|
818
|
+
latencyDeltaPercent: z.number(), // Percentage change in latency
|
|
819
|
+
costDelta: z.number(), // Change in cost per 1K calls ($, negative = savings)
|
|
820
|
+
costDeltaPercent: z.number(), // Percentage change in cost
|
|
821
|
+
tradeoffs: z.array(z.string()), // Tradeoffs to consider
|
|
822
|
+
});
|
|
823
|
+
|
|
824
|
+
/**
|
|
825
|
+
* A single counterfactual "what if" scenario.
|
|
826
|
+
* Shows the road not taken and its potential impact.
|
|
827
|
+
*/
|
|
828
|
+
export const Counterfactual = z.object({
|
|
829
|
+
id: z.string(), // Unique identifier
|
|
830
|
+
type: CounterfactualType, // Type of optimization
|
|
831
|
+
headline: z.string(), // Short description (e.g., "Switch to GPT-4o-mini")
|
|
832
|
+
description: z.string(), // Detailed explanation
|
|
833
|
+
|
|
834
|
+
currentState: CounterfactualState, // Current configuration
|
|
835
|
+
proposedState: CounterfactualState, // Proposed configuration
|
|
836
|
+
|
|
837
|
+
impact: CounterfactualImpact, // Estimated impact
|
|
838
|
+
|
|
839
|
+
confidence: z.enum(['high', 'medium', 'low']),
|
|
840
|
+
confidenceReason: z.string().optional(),
|
|
841
|
+
|
|
842
|
+
affectedPoints: z.array(z.string()), // Inference point IDs affected
|
|
843
|
+
effort: z.enum(['low', 'medium', 'high']), // Implementation effort
|
|
844
|
+
});
|
|
845
|
+
|
|
846
|
+
/**
|
|
847
|
+
* Summary of counterfactual opportunities.
|
|
848
|
+
*/
|
|
849
|
+
export const CounterfactualSummary = z.object({
|
|
850
|
+
totalOpportunities: z.number(), // Total counterfactuals identified
|
|
851
|
+
maxLatencySavingsMs: z.number(), // Max latency reduction achievable (ms)
|
|
852
|
+
maxLatencySavingsPercent: z.number(), // Max latency reduction percentage
|
|
853
|
+
maxCostSavings: z.number(), // Max cost savings achievable ($)
|
|
854
|
+
maxCostSavingsPercent: z.number(), // Max cost savings percentage
|
|
855
|
+
byType: z.record(z.number()), // Count by counterfactual type
|
|
856
|
+
});
|
|
857
|
+
|
|
858
|
+
/**
|
|
859
|
+
* Full counterfactual analysis result.
|
|
860
|
+
*/
|
|
861
|
+
export const CounterfactualResult = z.object({
|
|
862
|
+
counterfactuals: z.array(Counterfactual),
|
|
863
|
+
summary: CounterfactualSummary,
|
|
864
|
+
generatedAt: z.string().datetime(), // When analysis was performed
|
|
865
|
+
});
|
|
866
|
+
|
|
867
|
+
// Type exports for counterfactuals
|
|
868
|
+
export type CounterfactualType = z.infer<typeof CounterfactualType>;
|
|
869
|
+
export type CounterfactualState = z.infer<typeof CounterfactualState>;
|
|
870
|
+
export type CounterfactualImpact = z.infer<typeof CounterfactualImpact>;
|
|
871
|
+
export type Counterfactual = z.infer<typeof Counterfactual>;
|
|
872
|
+
export type CounterfactualSummary = z.infer<typeof CounterfactualSummary>;
|
|
873
|
+
export type CounterfactualResult = z.infer<typeof CounterfactualResult>;
|