@peakinfer/cli 1.0.133
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -0
- package/.env.example +6 -0
- package/.github/workflows/peakinfer.yml +64 -0
- package/CHANGELOG.md +31 -0
- package/LICENSE +190 -0
- package/README.md +335 -0
- package/data/inferencemax.json +274 -0
- package/dist/agent-analyzer.d.ts +45 -0
- package/dist/agent-analyzer.d.ts.map +1 -0
- package/dist/agent-analyzer.js +374 -0
- package/dist/agent-analyzer.js.map +1 -0
- package/dist/agent.d.ts +76 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +965 -0
- package/dist/agent.js.map +1 -0
- package/dist/agents/correlation-analyzer.d.ts +34 -0
- package/dist/agents/correlation-analyzer.d.ts.map +1 -0
- package/dist/agents/correlation-analyzer.js +261 -0
- package/dist/agents/correlation-analyzer.js.map +1 -0
- package/dist/agents/index.d.ts +91 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +111 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/runtime-analyzer.d.ts +38 -0
- package/dist/agents/runtime-analyzer.d.ts.map +1 -0
- package/dist/agents/runtime-analyzer.js +244 -0
- package/dist/agents/runtime-analyzer.js.map +1 -0
- package/dist/analysis-types.d.ts +500 -0
- package/dist/analysis-types.d.ts.map +1 -0
- package/dist/analysis-types.js +11 -0
- package/dist/analysis-types.js.map +1 -0
- package/dist/analytics.d.ts +25 -0
- package/dist/analytics.d.ts.map +1 -0
- package/dist/analytics.js +94 -0
- package/dist/analytics.js.map +1 -0
- package/dist/analyzer.d.ts +48 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +547 -0
- package/dist/analyzer.js.map +1 -0
- package/dist/artifacts.d.ts +44 -0
- package/dist/artifacts.d.ts.map +1 -0
- package/dist/artifacts.js +165 -0
- package/dist/artifacts.js.map +1 -0
- package/dist/benchmarks/index.d.ts +88 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +205 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +427 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/ci.d.ts +19 -0
- package/dist/commands/ci.d.ts.map +1 -0
- package/dist/commands/ci.js +253 -0
- package/dist/commands/ci.js.map +1 -0
- package/dist/commands/config.d.ts +16 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +249 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/demo.d.ts +15 -0
- package/dist/commands/demo.d.ts.map +1 -0
- package/dist/commands/demo.js +106 -0
- package/dist/commands/demo.js.map +1 -0
- package/dist/commands/export.d.ts +14 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +209 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/history.d.ts +15 -0
- package/dist/commands/history.d.ts.map +1 -0
- package/dist/commands/history.js +389 -0
- package/dist/commands/history.js.map +1 -0
- package/dist/commands/template.d.ts +14 -0
- package/dist/commands/template.d.ts.map +1 -0
- package/dist/commands/template.js +341 -0
- package/dist/commands/template.js.map +1 -0
- package/dist/commands/validate-map.d.ts +12 -0
- package/dist/commands/validate-map.d.ts.map +1 -0
- package/dist/commands/validate-map.js +274 -0
- package/dist/commands/validate-map.js.map +1 -0
- package/dist/commands/whatif.d.ts +17 -0
- package/dist/commands/whatif.d.ts.map +1 -0
- package/dist/commands/whatif.js +206 -0
- package/dist/commands/whatif.js.map +1 -0
- package/dist/comparison.d.ts +38 -0
- package/dist/comparison.d.ts.map +1 -0
- package/dist/comparison.js +223 -0
- package/dist/comparison.js.map +1 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +158 -0
- package/dist/config.js.map +1 -0
- package/dist/connectors/helicone.d.ts +9 -0
- package/dist/connectors/helicone.d.ts.map +1 -0
- package/dist/connectors/helicone.js +106 -0
- package/dist/connectors/helicone.js.map +1 -0
- package/dist/connectors/index.d.ts +37 -0
- package/dist/connectors/index.d.ts.map +1 -0
- package/dist/connectors/index.js +65 -0
- package/dist/connectors/index.js.map +1 -0
- package/dist/connectors/langsmith.d.ts +9 -0
- package/dist/connectors/langsmith.d.ts.map +1 -0
- package/dist/connectors/langsmith.js +122 -0
- package/dist/connectors/langsmith.js.map +1 -0
- package/dist/connectors/types.d.ts +83 -0
- package/dist/connectors/types.d.ts.map +1 -0
- package/dist/connectors/types.js +98 -0
- package/dist/connectors/types.js.map +1 -0
- package/dist/cost-estimator.d.ts +46 -0
- package/dist/cost-estimator.d.ts.map +1 -0
- package/dist/cost-estimator.js +104 -0
- package/dist/cost-estimator.js.map +1 -0
- package/dist/costs.d.ts +57 -0
- package/dist/costs.d.ts.map +1 -0
- package/dist/costs.js +251 -0
- package/dist/costs.js.map +1 -0
- package/dist/counterfactuals.d.ts +29 -0
- package/dist/counterfactuals.d.ts.map +1 -0
- package/dist/counterfactuals.js +448 -0
- package/dist/counterfactuals.js.map +1 -0
- package/dist/enhancement-prompts.d.ts +41 -0
- package/dist/enhancement-prompts.d.ts.map +1 -0
- package/dist/enhancement-prompts.js +88 -0
- package/dist/enhancement-prompts.js.map +1 -0
- package/dist/envelopes.d.ts +20 -0
- package/dist/envelopes.d.ts.map +1 -0
- package/dist/envelopes.js +790 -0
- package/dist/envelopes.js.map +1 -0
- package/dist/format-normalizer.d.ts +71 -0
- package/dist/format-normalizer.d.ts.map +1 -0
- package/dist/format-normalizer.js +1331 -0
- package/dist/format-normalizer.js.map +1 -0
- package/dist/history.d.ts +79 -0
- package/dist/history.d.ts.map +1 -0
- package/dist/history.js +313 -0
- package/dist/history.js.map +1 -0
- package/dist/html.d.ts +11 -0
- package/dist/html.d.ts.map +1 -0
- package/dist/html.js +463 -0
- package/dist/html.js.map +1 -0
- package/dist/impact.d.ts +42 -0
- package/dist/impact.d.ts.map +1 -0
- package/dist/impact.js +443 -0
- package/dist/impact.js.map +1 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +34 -0
- package/dist/index.js.map +1 -0
- package/dist/insights.d.ts +5 -0
- package/dist/insights.d.ts.map +1 -0
- package/dist/insights.js +271 -0
- package/dist/insights.js.map +1 -0
- package/dist/joiner.d.ts +9 -0
- package/dist/joiner.d.ts.map +1 -0
- package/dist/joiner.js +247 -0
- package/dist/joiner.js.map +1 -0
- package/dist/orchestrator.d.ts +34 -0
- package/dist/orchestrator.d.ts.map +1 -0
- package/dist/orchestrator.js +827 -0
- package/dist/orchestrator.js.map +1 -0
- package/dist/pdf.d.ts +26 -0
- package/dist/pdf.d.ts.map +1 -0
- package/dist/pdf.js +84 -0
- package/dist/pdf.js.map +1 -0
- package/dist/prediction.d.ts +33 -0
- package/dist/prediction.d.ts.map +1 -0
- package/dist/prediction.js +316 -0
- package/dist/prediction.js.map +1 -0
- package/dist/prompts/loader.d.ts +38 -0
- package/dist/prompts/loader.d.ts.map +1 -0
- package/dist/prompts/loader.js +60 -0
- package/dist/prompts/loader.js.map +1 -0
- package/dist/renderer.d.ts +64 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +923 -0
- package/dist/renderer.js.map +1 -0
- package/dist/runid.d.ts +57 -0
- package/dist/runid.d.ts.map +1 -0
- package/dist/runid.js +199 -0
- package/dist/runid.js.map +1 -0
- package/dist/runtime.d.ts +29 -0
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +366 -0
- package/dist/runtime.js.map +1 -0
- package/dist/scanner.d.ts +11 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +426 -0
- package/dist/scanner.js.map +1 -0
- package/dist/templates.d.ts +120 -0
- package/dist/templates.d.ts.map +1 -0
- package/dist/templates.js +429 -0
- package/dist/templates.js.map +1 -0
- package/dist/tools/index.d.ts +153 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +177 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/types.d.ts +3647 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +703 -0
- package/dist/types.js.map +1 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +23 -0
- package/dist/version.js.map +1 -0
- package/docs/demo-guide.md +423 -0
- package/docs/events-format.md +295 -0
- package/docs/inferencemap-spec.md +344 -0
- package/docs/migration-v2.md +293 -0
- package/fixtures/demo/precomputed.json +142 -0
- package/fixtures/demo-project/README.md +52 -0
- package/fixtures/demo-project/ai-service.ts +65 -0
- package/fixtures/demo-project/sample-events.jsonl +15 -0
- package/fixtures/demo-project/src/ai-service.ts +128 -0
- package/fixtures/demo-project/src/llm-client.ts +155 -0
- package/package.json +65 -0
- package/prompts/agent-analyzer.yaml +47 -0
- package/prompts/ci-gate.yaml +98 -0
- package/prompts/correlation-analyzer.yaml +178 -0
- package/prompts/format-normalizer.yaml +46 -0
- package/prompts/peak-performance.yaml +180 -0
- package/prompts/pr-comment.yaml +111 -0
- package/prompts/runtime-analyzer.yaml +189 -0
- package/prompts/unified-analyzer.yaml +241 -0
- package/schemas/inference-map.v0.1.json +215 -0
- package/scripts/benchmark.ts +394 -0
- package/scripts/demo-v1.5.sh +158 -0
- package/scripts/sync-from-site.sh +197 -0
- package/scripts/validate-sync.sh +178 -0
- package/src/agent-analyzer.ts +481 -0
- package/src/agent.ts +1232 -0
- package/src/agents/correlation-analyzer.ts +353 -0
- package/src/agents/index.ts +235 -0
- package/src/agents/runtime-analyzer.ts +343 -0
- package/src/analysis-types.ts +558 -0
- package/src/analytics.ts +100 -0
- package/src/analyzer.ts +692 -0
- package/src/artifacts.ts +218 -0
- package/src/benchmarks/index.ts +309 -0
- package/src/cli.ts +503 -0
- package/src/commands/ci.ts +336 -0
- package/src/commands/config.ts +288 -0
- package/src/commands/demo.ts +175 -0
- package/src/commands/export.ts +297 -0
- package/src/commands/history.ts +425 -0
- package/src/commands/template.ts +385 -0
- package/src/commands/validate-map.ts +324 -0
- package/src/commands/whatif.ts +272 -0
- package/src/comparison.ts +283 -0
- package/src/config.ts +188 -0
- package/src/connectors/helicone.ts +164 -0
- package/src/connectors/index.ts +93 -0
- package/src/connectors/langsmith.ts +179 -0
- package/src/connectors/types.ts +180 -0
- package/src/cost-estimator.ts +146 -0
- package/src/costs.ts +347 -0
- package/src/counterfactuals.ts +516 -0
- package/src/enhancement-prompts.ts +118 -0
- package/src/envelopes.ts +814 -0
- package/src/format-normalizer.ts +1486 -0
- package/src/history.ts +400 -0
- package/src/html.ts +512 -0
- package/src/impact.ts +522 -0
- package/src/index.ts +83 -0
- package/src/insights.ts +341 -0
- package/src/joiner.ts +289 -0
- package/src/orchestrator.ts +1015 -0
- package/src/pdf.ts +110 -0
- package/src/prediction.ts +392 -0
- package/src/prompts/loader.ts +88 -0
- package/src/renderer.ts +1045 -0
- package/src/runid.ts +261 -0
- package/src/runtime.ts +450 -0
- package/src/scanner.ts +508 -0
- package/src/templates.ts +561 -0
- package/src/tools/index.ts +214 -0
- package/src/types.ts +873 -0
- package/src/version.ts +24 -0
- package/templates/context-accumulation.yaml +23 -0
- package/templates/cost-concentration.yaml +20 -0
- package/templates/dead-code.yaml +20 -0
- package/templates/latency-explainer.yaml +23 -0
- package/templates/optimizations/ab-testing-framework.yaml +74 -0
- package/templates/optimizations/api-gateway-optimization.yaml +81 -0
- package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
- package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
- package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
- package/templates/optimizations/comprehensive-apm.yaml +76 -0
- package/templates/optimizations/context-window-optimization.yaml +91 -0
- package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
- package/templates/optimizations/distributed-training-optimization.yaml +77 -0
- package/templates/optimizations/document-analysis-edge.yaml +77 -0
- package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
- package/templates/optimizations/domain-specific-distillation.yaml +78 -0
- package/templates/optimizations/error-handling-optimization.yaml +76 -0
- package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
- package/templates/optimizations/long-context-memory-management.yaml +78 -0
- package/templates/optimizations/max-tokens-optimization.yaml +76 -0
- package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
- package/templates/optimizations/multi-framework-resilience.yaml +75 -0
- package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
- package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
- package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
- package/templates/optimizations/quality-monitoring.yaml +74 -0
- package/templates/optimizations/realtime-budget-controls.yaml +74 -0
- package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
- package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
- package/templates/optimizations/smart-model-routing.yaml +96 -0
- package/templates/optimizations/streaming-batch-selection.yaml +167 -0
- package/templates/optimizations/system-prompt-optimization.yaml +75 -0
- package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
- package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
- package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
- package/templates/overpowered-extraction.yaml +32 -0
- package/templates/overpowered-model.yaml +31 -0
- package/templates/prompt-bloat.yaml +24 -0
- package/templates/retry-explosion.yaml +28 -0
- package/templates/schema/insight.schema.json +113 -0
- package/templates/schema/optimization.schema.json +180 -0
- package/templates/streaming-drift.yaml +30 -0
- package/templates/throughput-gap.yaml +21 -0
- package/templates/token-underutilization.yaml +28 -0
- package/templates/untested-fallback.yaml +21 -0
- package/tests/accuracy/drift-detection.test.ts +184 -0
- package/tests/accuracy/false-positives.test.ts +166 -0
- package/tests/accuracy/templates.test.ts +205 -0
- package/tests/action/commands.test.ts +125 -0
- package/tests/action/comments.test.ts +347 -0
- package/tests/cli.test.ts +203 -0
- package/tests/comparison.test.ts +309 -0
- package/tests/correlation-analyzer.test.ts +534 -0
- package/tests/counterfactuals.test.ts +347 -0
- package/tests/fixtures/events/missing-id.jsonl +1 -0
- package/tests/fixtures/events/missing-input.jsonl +1 -0
- package/tests/fixtures/events/missing-latency.jsonl +1 -0
- package/tests/fixtures/events/missing-model.jsonl +1 -0
- package/tests/fixtures/events/missing-output.jsonl +1 -0
- package/tests/fixtures/events/missing-provider.jsonl +1 -0
- package/tests/fixtures/events/missing-ts.jsonl +1 -0
- package/tests/fixtures/events/valid.csv +3 -0
- package/tests/fixtures/events/valid.json +1 -0
- package/tests/fixtures/events/valid.jsonl +2 -0
- package/tests/fixtures/events/with-callsite.jsonl +1 -0
- package/tests/fixtures/events/with-intent.jsonl +1 -0
- package/tests/fixtures/events/wrong-type.jsonl +1 -0
- package/tests/fixtures/repos/empty/.gitkeep +0 -0
- package/tests/fixtures/repos/hybrid-router/router.py +35 -0
- package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
- package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
- package/tests/fixtures/repos/saas-openai/client.py +26 -0
- package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
- package/tests/github-action.test.ts +292 -0
- package/tests/insights.test.ts +878 -0
- package/tests/joiner.test.ts +168 -0
- package/tests/performance/action-latency.test.ts +132 -0
- package/tests/performance/benchmark.test.ts +189 -0
- package/tests/performance/cli-latency.test.ts +102 -0
- package/tests/pr-comment.test.ts +313 -0
- package/tests/prediction.test.ts +296 -0
- package/tests/runtime-analyzer.test.ts +375 -0
- package/tests/runtime.test.ts +205 -0
- package/tests/scanner.test.ts +122 -0
- package/tests/template-conformance.test.ts +526 -0
- package/tests/unit/cost-calculator.test.ts +303 -0
- package/tests/unit/credits.test.ts +180 -0
- package/tests/unit/inference-map.test.ts +276 -0
- package/tests/unit/schema.test.ts +300 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +14 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared types for runtime data connectors
|
|
3
|
+
*
|
|
4
|
+
* These types define the normalized format for runtime events
|
|
5
|
+
* from various sources (Helicone, LangSmith, etc.)
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export interface NormalizedEvent {
|
|
9
|
+
id: string;
|
|
10
|
+
timestamp: string;
|
|
11
|
+
model: string;
|
|
12
|
+
provider: string;
|
|
13
|
+
latency_ms: number;
|
|
14
|
+
prompt_tokens?: number;
|
|
15
|
+
completion_tokens?: number;
|
|
16
|
+
total_tokens?: number;
|
|
17
|
+
cost_usd?: number;
|
|
18
|
+
success: boolean;
|
|
19
|
+
error?: string;
|
|
20
|
+
streaming?: boolean;
|
|
21
|
+
// Trace metadata
|
|
22
|
+
trace_id?: string;
|
|
23
|
+
span_id?: string;
|
|
24
|
+
parent_span_id?: string;
|
|
25
|
+
// Request metadata
|
|
26
|
+
request_path?: string;
|
|
27
|
+
user_id?: string;
|
|
28
|
+
session_id?: string;
|
|
29
|
+
// Raw data for debugging
|
|
30
|
+
raw?: Record<string, unknown>;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface ConnectorSummary {
|
|
34
|
+
total_requests: number;
|
|
35
|
+
total_cost_usd: number;
|
|
36
|
+
avg_latency_ms: number;
|
|
37
|
+
p50_latency_ms: number;
|
|
38
|
+
p95_latency_ms: number;
|
|
39
|
+
p99_latency_ms: number;
|
|
40
|
+
error_rate: number;
|
|
41
|
+
streaming_rate: number;
|
|
42
|
+
by_model: Record<string, {
|
|
43
|
+
count: number;
|
|
44
|
+
cost: number;
|
|
45
|
+
avg_latency_ms: number;
|
|
46
|
+
p95_latency_ms: number;
|
|
47
|
+
error_rate: number;
|
|
48
|
+
}>;
|
|
49
|
+
by_provider: Record<string, {
|
|
50
|
+
count: number;
|
|
51
|
+
cost: number;
|
|
52
|
+
}>;
|
|
53
|
+
time_range: {
|
|
54
|
+
start: string;
|
|
55
|
+
end: string;
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface ConnectorResult {
|
|
60
|
+
events: NormalizedEvent[];
|
|
61
|
+
summary: ConnectorSummary;
|
|
62
|
+
metadata: {
|
|
63
|
+
source: 'helicone' | 'langsmith';
|
|
64
|
+
fetched_at: string;
|
|
65
|
+
total_fetched: number;
|
|
66
|
+
truncated: boolean;
|
|
67
|
+
api_version?: string;
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export interface ConnectorConfig {
|
|
72
|
+
apiKey: string;
|
|
73
|
+
limit?: number;
|
|
74
|
+
startDate?: string;
|
|
75
|
+
endDate?: string;
|
|
76
|
+
filter?: {
|
|
77
|
+
model?: string;
|
|
78
|
+
provider?: string;
|
|
79
|
+
success?: boolean;
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export class ConnectorError extends Error {
|
|
84
|
+
constructor(
|
|
85
|
+
message: string,
|
|
86
|
+
public readonly source: 'helicone' | 'langsmith',
|
|
87
|
+
public readonly statusCode?: number,
|
|
88
|
+
public readonly code?: string
|
|
89
|
+
) {
|
|
90
|
+
super(message);
|
|
91
|
+
this.name = 'ConnectorError';
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Helper functions for calculating summary statistics
|
|
96
|
+
export function calculatePercentile(values: number[], percentile: number): number {
|
|
97
|
+
if (values.length === 0) return 0;
|
|
98
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
99
|
+
const index = Math.ceil((percentile / 100) * sorted.length) - 1;
|
|
100
|
+
return sorted[Math.max(0, index)];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function calculateSummary(events: NormalizedEvent[]): ConnectorSummary {
|
|
104
|
+
if (events.length === 0) {
|
|
105
|
+
return {
|
|
106
|
+
total_requests: 0,
|
|
107
|
+
total_cost_usd: 0,
|
|
108
|
+
avg_latency_ms: 0,
|
|
109
|
+
p50_latency_ms: 0,
|
|
110
|
+
p95_latency_ms: 0,
|
|
111
|
+
p99_latency_ms: 0,
|
|
112
|
+
error_rate: 0,
|
|
113
|
+
streaming_rate: 0,
|
|
114
|
+
by_model: {},
|
|
115
|
+
by_provider: {},
|
|
116
|
+
time_range: { start: '', end: '' },
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const latencies = events.map(e => e.latency_ms).filter(l => l > 0);
|
|
121
|
+
const costs = events.map(e => e.cost_usd || 0);
|
|
122
|
+
const errors = events.filter(e => !e.success).length;
|
|
123
|
+
const streaming = events.filter(e => e.streaming).length;
|
|
124
|
+
|
|
125
|
+
// Group by model
|
|
126
|
+
const byModel: ConnectorSummary['by_model'] = {};
|
|
127
|
+
for (const event of events) {
|
|
128
|
+
const model = event.model || 'unknown';
|
|
129
|
+
if (!byModel[model]) {
|
|
130
|
+
byModel[model] = { count: 0, cost: 0, avg_latency_ms: 0, p95_latency_ms: 0, error_rate: 0 };
|
|
131
|
+
}
|
|
132
|
+
byModel[model].count++;
|
|
133
|
+
byModel[model].cost += event.cost_usd || 0;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Calculate per-model stats
|
|
137
|
+
for (const model of Object.keys(byModel)) {
|
|
138
|
+
const modelEvents = events.filter(e => (e.model || 'unknown') === model);
|
|
139
|
+
const modelLatencies = modelEvents.map(e => e.latency_ms).filter(l => l > 0);
|
|
140
|
+
const modelErrors = modelEvents.filter(e => !e.success).length;
|
|
141
|
+
|
|
142
|
+
byModel[model].avg_latency_ms = modelLatencies.length > 0
|
|
143
|
+
? Math.round(modelLatencies.reduce((a, b) => a + b, 0) / modelLatencies.length)
|
|
144
|
+
: 0;
|
|
145
|
+
byModel[model].p95_latency_ms = calculatePercentile(modelLatencies, 95);
|
|
146
|
+
byModel[model].error_rate = modelEvents.length > 0 ? modelErrors / modelEvents.length : 0;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Group by provider
|
|
150
|
+
const byProvider: ConnectorSummary['by_provider'] = {};
|
|
151
|
+
for (const event of events) {
|
|
152
|
+
const provider = event.provider || 'unknown';
|
|
153
|
+
if (!byProvider[provider]) {
|
|
154
|
+
byProvider[provider] = { count: 0, cost: 0 };
|
|
155
|
+
}
|
|
156
|
+
byProvider[provider].count++;
|
|
157
|
+
byProvider[provider].cost += event.cost_usd || 0;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Time range
|
|
161
|
+
const timestamps = events.map(e => new Date(e.timestamp).getTime()).filter(t => !isNaN(t));
|
|
162
|
+
const timeRange = {
|
|
163
|
+
start: timestamps.length > 0 ? new Date(Math.min(...timestamps)).toISOString() : '',
|
|
164
|
+
end: timestamps.length > 0 ? new Date(Math.max(...timestamps)).toISOString() : '',
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
total_requests: events.length,
|
|
169
|
+
total_cost_usd: costs.reduce((a, b) => a + b, 0),
|
|
170
|
+
avg_latency_ms: latencies.length > 0 ? Math.round(latencies.reduce((a, b) => a + b, 0) / latencies.length) : 0,
|
|
171
|
+
p50_latency_ms: calculatePercentile(latencies, 50),
|
|
172
|
+
p95_latency_ms: calculatePercentile(latencies, 95),
|
|
173
|
+
p99_latency_ms: calculatePercentile(latencies, 99),
|
|
174
|
+
error_rate: events.length > 0 ? errors / events.length : 0,
|
|
175
|
+
streaming_rate: events.length > 0 ? streaming / events.length : 0,
|
|
176
|
+
by_model: byModel,
|
|
177
|
+
by_provider: byProvider,
|
|
178
|
+
time_range: timeRange,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost Estimation Module for PeakInfer CLI
|
|
3
|
+
*
|
|
4
|
+
* Estimates LLM API costs before running analysis to prevent surprise bills
|
|
5
|
+
* on large repositories. Uses LiteLLM pricing data with 24hr cache.
|
|
6
|
+
*
|
|
7
|
+
* PRD v1.9.3 Section 2.3: Cost Estimation (Pre-Analysis Transparency)
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { scan } from './scanner.js';
|
|
11
|
+
import { loadPricing, getModelCost } from './costs.js';
|
|
12
|
+
|
|
13
|
+
// =============================================================================
|
|
14
|
+
// TYPES
|
|
15
|
+
// =============================================================================
|
|
16
|
+
|
|
17
|
+
export interface CostEstimate {
|
|
18
|
+
model: string;
|
|
19
|
+
filesToScan: number;
|
|
20
|
+
estimatedInputTokens: number;
|
|
21
|
+
estimatedOutputTokens: number;
|
|
22
|
+
inputCost: number;
|
|
23
|
+
outputCost: number;
|
|
24
|
+
totalCost: number;
|
|
25
|
+
pricing: {
|
|
26
|
+
inputPerMillion: number;
|
|
27
|
+
outputPerMillion: number;
|
|
28
|
+
source: 'litellm' | 'fallback';
|
|
29
|
+
};
|
|
30
|
+
warnings: CostWarning[];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface CostWarning {
|
|
34
|
+
level: 'yellow' | 'red' | 'critical';
|
|
35
|
+
message: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// =============================================================================
|
|
39
|
+
// CONSTANTS
|
|
40
|
+
// =============================================================================
|
|
41
|
+
|
|
42
|
+
// Token estimation constants (based on empirical analysis of codebases)
|
|
43
|
+
const AVG_TOKENS_PER_FILE = 2000; // Average input tokens per code file
|
|
44
|
+
const AVG_OUTPUT_RATIO = 0.35; // Output tokens as ratio of input
|
|
45
|
+
|
|
46
|
+
// Warning thresholds (in USD)
|
|
47
|
+
const WARNING_THRESHOLD_YELLOW = 5;
|
|
48
|
+
const WARNING_THRESHOLD_RED = 20;
|
|
49
|
+
const WARNING_THRESHOLD_CRITICAL = 100;
|
|
50
|
+
|
|
51
|
+
// Default model if not specified (matches agent.ts default)
|
|
52
|
+
const DEFAULT_MODEL = 'claude-sonnet-4-20250514';
|
|
53
|
+
|
|
54
|
+
// Fallback pricing if LiteLLM fetch fails (Claude Sonnet pricing)
|
|
55
|
+
const FALLBACK_PRICING = {
|
|
56
|
+
input: 3.00, // $3.00 per 1M input tokens
|
|
57
|
+
output: 15.00, // $15.00 per 1M output tokens
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
// =============================================================================
|
|
61
|
+
// MAIN FUNCTION
|
|
62
|
+
// =============================================================================
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Estimate the cost of analyzing a codebase before running the analysis.
|
|
66
|
+
*
|
|
67
|
+
* @param path - Path to the codebase to analyze
|
|
68
|
+
* @param options - Optional model override
|
|
69
|
+
* @returns Cost estimate with warnings
|
|
70
|
+
*/
|
|
71
|
+
export async function estimateAnalysisCost(
|
|
72
|
+
path: string,
|
|
73
|
+
options: { model?: string } = {}
|
|
74
|
+
): Promise<CostEstimate> {
|
|
75
|
+
const model = options.model || DEFAULT_MODEL;
|
|
76
|
+
|
|
77
|
+
// Step 1: Scan codebase to count files
|
|
78
|
+
const scanResult = await scan(path);
|
|
79
|
+
const filesToScan = scanResult.summary.totalFiles;
|
|
80
|
+
|
|
81
|
+
// Step 2: Estimate tokens
|
|
82
|
+
const estimatedInputTokens = filesToScan * AVG_TOKENS_PER_FILE;
|
|
83
|
+
const estimatedOutputTokens = Math.round(estimatedInputTokens * AVG_OUTPUT_RATIO);
|
|
84
|
+
|
|
85
|
+
// Step 3: Load pricing data
|
|
86
|
+
await loadPricing();
|
|
87
|
+
const modelCost = getModelCost(model);
|
|
88
|
+
|
|
89
|
+
// Determine if using LiteLLM or fallback
|
|
90
|
+
const useFallback = modelCost.input === 0 && modelCost.output === 0;
|
|
91
|
+
const pricing = useFallback
|
|
92
|
+
? { input: FALLBACK_PRICING.input, output: FALLBACK_PRICING.output }
|
|
93
|
+
: { input: modelCost.input, output: modelCost.output };
|
|
94
|
+
|
|
95
|
+
// Step 4: Calculate costs
|
|
96
|
+
const inputCost = (estimatedInputTokens * pricing.input) / 1_000_000;
|
|
97
|
+
const outputCost = (estimatedOutputTokens * pricing.output) / 1_000_000;
|
|
98
|
+
const totalCost = inputCost + outputCost;
|
|
99
|
+
|
|
100
|
+
// Step 5: Generate warnings based on cost
|
|
101
|
+
const warnings: CostWarning[] = [];
|
|
102
|
+
|
|
103
|
+
if (totalCost > WARNING_THRESHOLD_CRITICAL) {
|
|
104
|
+
warnings.push({
|
|
105
|
+
level: 'critical',
|
|
106
|
+
message: 'Very high cost estimate. Strongly recommend limiting scope.',
|
|
107
|
+
});
|
|
108
|
+
} else if (totalCost > WARNING_THRESHOLD_RED) {
|
|
109
|
+
warnings.push({
|
|
110
|
+
level: 'red',
|
|
111
|
+
message: 'High cost estimate. Consider analyzing a subdirectory.',
|
|
112
|
+
});
|
|
113
|
+
} else if (totalCost > WARNING_THRESHOLD_YELLOW) {
|
|
114
|
+
warnings.push({
|
|
115
|
+
level: 'yellow',
|
|
116
|
+
message: 'Moderate cost. Consider analyzing a subdirectory for faster results.',
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
model,
|
|
122
|
+
filesToScan,
|
|
123
|
+
estimatedInputTokens,
|
|
124
|
+
estimatedOutputTokens,
|
|
125
|
+
inputCost,
|
|
126
|
+
outputCost,
|
|
127
|
+
totalCost,
|
|
128
|
+
pricing: {
|
|
129
|
+
inputPerMillion: pricing.input,
|
|
130
|
+
outputPerMillion: pricing.output,
|
|
131
|
+
source: useFallback ? 'fallback' : 'litellm',
|
|
132
|
+
},
|
|
133
|
+
warnings,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Check if estimated cost exceeds a threshold.
|
|
139
|
+
*
|
|
140
|
+
* @param estimate - Cost estimate to check
|
|
141
|
+
* @param maxCost - Maximum allowed cost in USD
|
|
142
|
+
* @returns true if cost exceeds threshold
|
|
143
|
+
*/
|
|
144
|
+
export function exceedsMaxCost(estimate: CostEstimate, maxCost: number): boolean {
|
|
145
|
+
return estimate.totalCost > maxCost;
|
|
146
|
+
}
|
package/src/costs.ts
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
2
|
+
import { join } from 'path';
|
|
3
|
+
|
|
4
|
+
// =============================================================================
|
|
5
|
+
// TYPES
|
|
6
|
+
// =============================================================================
|
|
7
|
+
|
|
8
|
+
export interface ModelCost {
|
|
9
|
+
input: number; // per 1M tokens
|
|
10
|
+
output: number; // per 1M tokens
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface PricingCache {
|
|
14
|
+
data: Record<string, ModelCost>;
|
|
15
|
+
fetchedAt: number;
|
|
16
|
+
source: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// =============================================================================
|
|
20
|
+
// PRICING PROVIDER INTERFACE
|
|
21
|
+
// =============================================================================
|
|
22
|
+
// This interface allows swapping LiteLLM for a local implementation.
|
|
23
|
+
// To replace: implement PricingProvider and call setPricingProvider().
|
|
24
|
+
|
|
25
|
+
export interface PricingProvider {
|
|
26
|
+
name: string;
|
|
27
|
+
fetch(): Promise<Record<string, ModelCost>>;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// =============================================================================
|
|
31
|
+
// LITELLM PROVIDER (Default)
|
|
32
|
+
// =============================================================================
|
|
33
|
+
|
|
34
|
+
const LITELLM_PRICING_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json';
|
|
35
|
+
|
|
36
|
+
function normalizeLiteLLMPricing(litellmData: Record<string, unknown>): Record<string, ModelCost> {
|
|
37
|
+
const result: Record<string, ModelCost> = {};
|
|
38
|
+
|
|
39
|
+
for (const [model, info] of Object.entries(litellmData)) {
|
|
40
|
+
if (typeof info !== 'object' || info === null) continue;
|
|
41
|
+
|
|
42
|
+
const data = info as Record<string, unknown>;
|
|
43
|
+
const inputCost = data.input_cost_per_token;
|
|
44
|
+
const outputCost = data.output_cost_per_token;
|
|
45
|
+
|
|
46
|
+
if (typeof inputCost === 'number' && typeof outputCost === 'number') {
|
|
47
|
+
// Convert per-token to per-1M-tokens
|
|
48
|
+
result[model] = {
|
|
49
|
+
input: inputCost * 1_000_000,
|
|
50
|
+
output: outputCost * 1_000_000,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return result;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const litellmProvider: PricingProvider = {
|
|
59
|
+
name: 'litellm',
|
|
60
|
+
async fetch(): Promise<Record<string, ModelCost>> {
|
|
61
|
+
const response = await fetch(LITELLM_PRICING_URL);
|
|
62
|
+
if (!response.ok) {
|
|
63
|
+
throw new Error(`HTTP ${response.status}`);
|
|
64
|
+
}
|
|
65
|
+
const rawData = await response.json() as Record<string, unknown>;
|
|
66
|
+
return normalizeLiteLLMPricing(rawData);
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// =============================================================================
|
|
71
|
+
// LOCAL PROVIDER (Ready for future use)
|
|
72
|
+
// =============================================================================
|
|
73
|
+
// Uncomment and populate to use local pricing instead of LiteLLM.
|
|
74
|
+
// Then call: setPricingProvider(localProvider)
|
|
75
|
+
|
|
76
|
+
/*
|
|
77
|
+
const localProvider: PricingProvider = {
|
|
78
|
+
name: 'local',
|
|
79
|
+
async fetch(): Promise<Record<string, ModelCost>> {
|
|
80
|
+
// Local pricing data - $/1M tokens
|
|
81
|
+
return {
|
|
82
|
+
'gpt-4o': { input: 2.50, output: 10.00 },
|
|
83
|
+
'gpt-4o-mini': { input: 0.15, output: 0.60 },
|
|
84
|
+
'gpt-4-turbo': { input: 10.00, output: 30.00 },
|
|
85
|
+
'claude-3-5-sonnet': { input: 3.00, output: 15.00 },
|
|
86
|
+
'claude-3-haiku': { input: 0.25, output: 1.25 },
|
|
87
|
+
'claude-sonnet-4-20250514': { input: 3.00, output: 15.00 },
|
|
88
|
+
// Add more models as needed
|
|
89
|
+
};
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
*/
|
|
93
|
+
|
|
94
|
+
// =============================================================================
|
|
95
|
+
// CONSTANTS
|
|
96
|
+
// =============================================================================
|
|
97
|
+
|
|
98
|
+
const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
|
|
99
|
+
const CACHE_DIR = '.peakinfer/cache';
|
|
100
|
+
const CACHE_FILE = 'pricing.json';
|
|
101
|
+
|
|
102
|
+
// =============================================================================
|
|
103
|
+
// STATE
|
|
104
|
+
// =============================================================================
|
|
105
|
+
|
|
106
|
+
let pricingCache: PricingCache | null = null;
|
|
107
|
+
let activeProvider: PricingProvider = litellmProvider;
|
|
108
|
+
|
|
109
|
+
// =============================================================================
|
|
110
|
+
// PROVIDER MANAGEMENT
|
|
111
|
+
// =============================================================================
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Set a custom pricing provider.
|
|
115
|
+
* Call this before loadPricing() to use a different data source.
|
|
116
|
+
*
|
|
117
|
+
* Example:
|
|
118
|
+
* setPricingProvider({ name: 'local', fetch: async () => ({ ... }) });
|
|
119
|
+
*/
|
|
120
|
+
export function setPricingProvider(provider: PricingProvider): void {
|
|
121
|
+
activeProvider = provider;
|
|
122
|
+
// Invalidate cache when provider changes
|
|
123
|
+
pricingCache = null;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
// =============================================================================
|
|
128
|
+
// HELPERS
|
|
129
|
+
// =============================================================================
|
|
130
|
+
|
|
131
|
+
function getCachePath(): string {
|
|
132
|
+
return join(process.cwd(), CACHE_DIR, CACHE_FILE);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function loadCacheFromDisk(): PricingCache | null {
|
|
136
|
+
const cachePath = getCachePath();
|
|
137
|
+
if (!existsSync(cachePath)) {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
try {
|
|
141
|
+
const raw = readFileSync(cachePath, 'utf-8');
|
|
142
|
+
return JSON.parse(raw) as PricingCache;
|
|
143
|
+
} catch {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function saveCacheToDisk(cache: PricingCache): void {
|
|
149
|
+
const cachePath = getCachePath();
|
|
150
|
+
const cacheDir = join(process.cwd(), CACHE_DIR);
|
|
151
|
+
if (!existsSync(cacheDir)) {
|
|
152
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
153
|
+
}
|
|
154
|
+
writeFileSync(cachePath, JSON.stringify(cache, null, 2));
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// =============================================================================
|
|
158
|
+
// PUBLIC API
|
|
159
|
+
// =============================================================================
|
|
160
|
+
|
|
161
|
+
export function isCacheValid(): boolean {
|
|
162
|
+
if (!pricingCache) return false;
|
|
163
|
+
// Also invalidate if provider changed
|
|
164
|
+
if (pricingCache.source !== activeProvider.name) return false;
|
|
165
|
+
return Date.now() - pricingCache.fetchedAt < CACHE_TTL_MS;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export async function loadPricing(): Promise<void> {
|
|
169
|
+
// Check memory cache
|
|
170
|
+
if (isCacheValid()) {
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Check disk cache (only if same provider)
|
|
175
|
+
const diskCache = loadCacheFromDisk();
|
|
176
|
+
if (diskCache &&
|
|
177
|
+
diskCache.source === activeProvider.name &&
|
|
178
|
+
Date.now() - diskCache.fetchedAt < CACHE_TTL_MS) {
|
|
179
|
+
pricingCache = diskCache;
|
|
180
|
+
return;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Fetch from active provider
|
|
184
|
+
try {
|
|
185
|
+
const data = await activeProvider.fetch();
|
|
186
|
+
|
|
187
|
+
pricingCache = {
|
|
188
|
+
data,
|
|
189
|
+
fetchedAt: Date.now(),
|
|
190
|
+
source: activeProvider.name,
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
saveCacheToDisk(pricingCache);
|
|
194
|
+
} catch (error) {
|
|
195
|
+
// Fall back to stale cache if available and same provider
|
|
196
|
+
if (diskCache && diskCache.source === activeProvider.name) {
|
|
197
|
+
console.warn(`[costs] Failed to fetch from ${activeProvider.name}, using stale cache`);
|
|
198
|
+
pricingCache = diskCache;
|
|
199
|
+
return;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// No cache at all - use empty with warning
|
|
203
|
+
console.warn(`[costs] Failed to fetch from ${activeProvider.name}, no cache available`);
|
|
204
|
+
pricingCache = {
|
|
205
|
+
data: {},
|
|
206
|
+
fetchedAt: Date.now(),
|
|
207
|
+
source: activeProvider.name,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
export function getModelCost(model: string): ModelCost {
|
|
213
|
+
if (!pricingCache) {
|
|
214
|
+
return { input: 0, output: 0 };
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Try exact match
|
|
218
|
+
if (pricingCache.data[model]) {
|
|
219
|
+
return pricingCache.data[model];
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Try with provider prefix variations
|
|
223
|
+
const variations = [
|
|
224
|
+
model,
|
|
225
|
+
`openai/${model}`,
|
|
226
|
+
`anthropic/${model}`,
|
|
227
|
+
`azure/${model}`,
|
|
228
|
+
`together_ai/${model}`,
|
|
229
|
+
`fireworks_ai/${model}`,
|
|
230
|
+
];
|
|
231
|
+
|
|
232
|
+
for (const variant of variations) {
|
|
233
|
+
if (pricingCache.data[variant]) {
|
|
234
|
+
return pricingCache.data[variant];
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Try partial match (model name contains)
|
|
239
|
+
const lowerModel = model.toLowerCase();
|
|
240
|
+
for (const [key, cost] of Object.entries(pricingCache.data)) {
|
|
241
|
+
if (key.toLowerCase().includes(lowerModel)) {
|
|
242
|
+
return cost;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return { input: 0, output: 0 };
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
export function calculateCost(
|
|
250
|
+
model: string,
|
|
251
|
+
inputTokens: number,
|
|
252
|
+
outputTokens: number
|
|
253
|
+
): number {
|
|
254
|
+
const cost = getModelCost(model);
|
|
255
|
+
return (inputTokens * cost.input + outputTokens * cost.output) / 1_000_000;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
export function setTestPricing(data: Record<string, { input: number; output: number }>): void {
|
|
259
|
+
pricingCache = {
|
|
260
|
+
data,
|
|
261
|
+
fetchedAt: Date.now(),
|
|
262
|
+
source: 'test',
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// =============================================================================
|
|
267
|
+
// PRICING CONTEXT FOR LLM
|
|
268
|
+
// =============================================================================
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Pricing tier classification based on cost per 1M tokens
|
|
272
|
+
*/
|
|
273
|
+
export type PricingTier = 'expensive' | 'moderate' | 'cheap' | 'unknown';
|
|
274
|
+
|
|
275
|
+
const EXPENSIVE_THRESHOLD = 10.0; // >$10/1M = expensive
|
|
276
|
+
const MODERATE_THRESHOLD = 1.0; // $1-10/1M = moderate
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Classify a model into pricing tiers
|
|
280
|
+
*/
|
|
281
|
+
export function classifyModelCost(model: string): PricingTier {
|
|
282
|
+
const cost = getModelCost(model);
|
|
283
|
+
if (cost.input === 0 && cost.output === 0) {
|
|
284
|
+
return 'unknown';
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Use average of input/output cost for classification
|
|
288
|
+
const avgCost = (cost.input + cost.output) / 2;
|
|
289
|
+
|
|
290
|
+
if (avgCost > EXPENSIVE_THRESHOLD) {
|
|
291
|
+
return 'expensive';
|
|
292
|
+
} else if (avgCost > MODERATE_THRESHOLD) {
|
|
293
|
+
return 'moderate';
|
|
294
|
+
} else {
|
|
295
|
+
return 'cheap';
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Get pricing context for LLM analysis
|
|
301
|
+
* Returns a condensed pricing map for models used in the data
|
|
302
|
+
*/
|
|
303
|
+
export interface PricingContext {
|
|
304
|
+
models: Record<string, {
|
|
305
|
+
input: number; // $/1M tokens
|
|
306
|
+
output: number; // $/1M tokens
|
|
307
|
+
tier: PricingTier;
|
|
308
|
+
}>;
|
|
309
|
+
thresholds: {
|
|
310
|
+
expensive: number;
|
|
311
|
+
moderate: number;
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
export function getPricingContext(models: string[]): PricingContext {
|
|
316
|
+
const result: PricingContext = {
|
|
317
|
+
models: {},
|
|
318
|
+
thresholds: {
|
|
319
|
+
expensive: EXPENSIVE_THRESHOLD,
|
|
320
|
+
moderate: MODERATE_THRESHOLD,
|
|
321
|
+
},
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
for (const model of models) {
|
|
325
|
+
const cost = getModelCost(model);
|
|
326
|
+
result.models[model] = {
|
|
327
|
+
input: cost.input,
|
|
328
|
+
output: cost.output,
|
|
329
|
+
tier: classifyModelCost(model),
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
return result;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Calculate total cost for a set of events
|
|
338
|
+
*/
|
|
339
|
+
export function calculateTotalCost(events: Array<{
|
|
340
|
+
model: string;
|
|
341
|
+
input_tokens: number;
|
|
342
|
+
output_tokens: number;
|
|
343
|
+
}>): number {
|
|
344
|
+
return events.reduce((total, event) => {
|
|
345
|
+
return total + calculateCost(event.model, event.input_tokens, event.output_tokens);
|
|
346
|
+
}, 0);
|
|
347
|
+
}
|