@peakinfer/cli 1.0.133
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -0
- package/.env.example +6 -0
- package/.github/workflows/peakinfer.yml +64 -0
- package/CHANGELOG.md +31 -0
- package/LICENSE +190 -0
- package/README.md +335 -0
- package/data/inferencemax.json +274 -0
- package/dist/agent-analyzer.d.ts +45 -0
- package/dist/agent-analyzer.d.ts.map +1 -0
- package/dist/agent-analyzer.js +374 -0
- package/dist/agent-analyzer.js.map +1 -0
- package/dist/agent.d.ts +76 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +965 -0
- package/dist/agent.js.map +1 -0
- package/dist/agents/correlation-analyzer.d.ts +34 -0
- package/dist/agents/correlation-analyzer.d.ts.map +1 -0
- package/dist/agents/correlation-analyzer.js +261 -0
- package/dist/agents/correlation-analyzer.js.map +1 -0
- package/dist/agents/index.d.ts +91 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +111 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/runtime-analyzer.d.ts +38 -0
- package/dist/agents/runtime-analyzer.d.ts.map +1 -0
- package/dist/agents/runtime-analyzer.js +244 -0
- package/dist/agents/runtime-analyzer.js.map +1 -0
- package/dist/analysis-types.d.ts +500 -0
- package/dist/analysis-types.d.ts.map +1 -0
- package/dist/analysis-types.js +11 -0
- package/dist/analysis-types.js.map +1 -0
- package/dist/analytics.d.ts +25 -0
- package/dist/analytics.d.ts.map +1 -0
- package/dist/analytics.js +94 -0
- package/dist/analytics.js.map +1 -0
- package/dist/analyzer.d.ts +48 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +547 -0
- package/dist/analyzer.js.map +1 -0
- package/dist/artifacts.d.ts +44 -0
- package/dist/artifacts.d.ts.map +1 -0
- package/dist/artifacts.js +165 -0
- package/dist/artifacts.js.map +1 -0
- package/dist/benchmarks/index.d.ts +88 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +205 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +427 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/ci.d.ts +19 -0
- package/dist/commands/ci.d.ts.map +1 -0
- package/dist/commands/ci.js +253 -0
- package/dist/commands/ci.js.map +1 -0
- package/dist/commands/config.d.ts +16 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +249 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/demo.d.ts +15 -0
- package/dist/commands/demo.d.ts.map +1 -0
- package/dist/commands/demo.js +106 -0
- package/dist/commands/demo.js.map +1 -0
- package/dist/commands/export.d.ts +14 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +209 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/history.d.ts +15 -0
- package/dist/commands/history.d.ts.map +1 -0
- package/dist/commands/history.js +389 -0
- package/dist/commands/history.js.map +1 -0
- package/dist/commands/template.d.ts +14 -0
- package/dist/commands/template.d.ts.map +1 -0
- package/dist/commands/template.js +341 -0
- package/dist/commands/template.js.map +1 -0
- package/dist/commands/validate-map.d.ts +12 -0
- package/dist/commands/validate-map.d.ts.map +1 -0
- package/dist/commands/validate-map.js +274 -0
- package/dist/commands/validate-map.js.map +1 -0
- package/dist/commands/whatif.d.ts +17 -0
- package/dist/commands/whatif.d.ts.map +1 -0
- package/dist/commands/whatif.js +206 -0
- package/dist/commands/whatif.js.map +1 -0
- package/dist/comparison.d.ts +38 -0
- package/dist/comparison.d.ts.map +1 -0
- package/dist/comparison.js +223 -0
- package/dist/comparison.js.map +1 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +158 -0
- package/dist/config.js.map +1 -0
- package/dist/connectors/helicone.d.ts +9 -0
- package/dist/connectors/helicone.d.ts.map +1 -0
- package/dist/connectors/helicone.js +106 -0
- package/dist/connectors/helicone.js.map +1 -0
- package/dist/connectors/index.d.ts +37 -0
- package/dist/connectors/index.d.ts.map +1 -0
- package/dist/connectors/index.js +65 -0
- package/dist/connectors/index.js.map +1 -0
- package/dist/connectors/langsmith.d.ts +9 -0
- package/dist/connectors/langsmith.d.ts.map +1 -0
- package/dist/connectors/langsmith.js +122 -0
- package/dist/connectors/langsmith.js.map +1 -0
- package/dist/connectors/types.d.ts +83 -0
- package/dist/connectors/types.d.ts.map +1 -0
- package/dist/connectors/types.js +98 -0
- package/dist/connectors/types.js.map +1 -0
- package/dist/cost-estimator.d.ts +46 -0
- package/dist/cost-estimator.d.ts.map +1 -0
- package/dist/cost-estimator.js +104 -0
- package/dist/cost-estimator.js.map +1 -0
- package/dist/costs.d.ts +57 -0
- package/dist/costs.d.ts.map +1 -0
- package/dist/costs.js +251 -0
- package/dist/costs.js.map +1 -0
- package/dist/counterfactuals.d.ts +29 -0
- package/dist/counterfactuals.d.ts.map +1 -0
- package/dist/counterfactuals.js +448 -0
- package/dist/counterfactuals.js.map +1 -0
- package/dist/enhancement-prompts.d.ts +41 -0
- package/dist/enhancement-prompts.d.ts.map +1 -0
- package/dist/enhancement-prompts.js +88 -0
- package/dist/enhancement-prompts.js.map +1 -0
- package/dist/envelopes.d.ts +20 -0
- package/dist/envelopes.d.ts.map +1 -0
- package/dist/envelopes.js +790 -0
- package/dist/envelopes.js.map +1 -0
- package/dist/format-normalizer.d.ts +71 -0
- package/dist/format-normalizer.d.ts.map +1 -0
- package/dist/format-normalizer.js +1331 -0
- package/dist/format-normalizer.js.map +1 -0
- package/dist/history.d.ts +79 -0
- package/dist/history.d.ts.map +1 -0
- package/dist/history.js +313 -0
- package/dist/history.js.map +1 -0
- package/dist/html.d.ts +11 -0
- package/dist/html.d.ts.map +1 -0
- package/dist/html.js +463 -0
- package/dist/html.js.map +1 -0
- package/dist/impact.d.ts +42 -0
- package/dist/impact.d.ts.map +1 -0
- package/dist/impact.js +443 -0
- package/dist/impact.js.map +1 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +34 -0
- package/dist/index.js.map +1 -0
- package/dist/insights.d.ts +5 -0
- package/dist/insights.d.ts.map +1 -0
- package/dist/insights.js +271 -0
- package/dist/insights.js.map +1 -0
- package/dist/joiner.d.ts +9 -0
- package/dist/joiner.d.ts.map +1 -0
- package/dist/joiner.js +247 -0
- package/dist/joiner.js.map +1 -0
- package/dist/orchestrator.d.ts +34 -0
- package/dist/orchestrator.d.ts.map +1 -0
- package/dist/orchestrator.js +827 -0
- package/dist/orchestrator.js.map +1 -0
- package/dist/pdf.d.ts +26 -0
- package/dist/pdf.d.ts.map +1 -0
- package/dist/pdf.js +84 -0
- package/dist/pdf.js.map +1 -0
- package/dist/prediction.d.ts +33 -0
- package/dist/prediction.d.ts.map +1 -0
- package/dist/prediction.js +316 -0
- package/dist/prediction.js.map +1 -0
- package/dist/prompts/loader.d.ts +38 -0
- package/dist/prompts/loader.d.ts.map +1 -0
- package/dist/prompts/loader.js +60 -0
- package/dist/prompts/loader.js.map +1 -0
- package/dist/renderer.d.ts +64 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +923 -0
- package/dist/renderer.js.map +1 -0
- package/dist/runid.d.ts +57 -0
- package/dist/runid.d.ts.map +1 -0
- package/dist/runid.js +199 -0
- package/dist/runid.js.map +1 -0
- package/dist/runtime.d.ts +29 -0
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +366 -0
- package/dist/runtime.js.map +1 -0
- package/dist/scanner.d.ts +11 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +426 -0
- package/dist/scanner.js.map +1 -0
- package/dist/templates.d.ts +120 -0
- package/dist/templates.d.ts.map +1 -0
- package/dist/templates.js +429 -0
- package/dist/templates.js.map +1 -0
- package/dist/tools/index.d.ts +153 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +177 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/types.d.ts +3647 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +703 -0
- package/dist/types.js.map +1 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +23 -0
- package/dist/version.js.map +1 -0
- package/docs/demo-guide.md +423 -0
- package/docs/events-format.md +295 -0
- package/docs/inferencemap-spec.md +344 -0
- package/docs/migration-v2.md +293 -0
- package/fixtures/demo/precomputed.json +142 -0
- package/fixtures/demo-project/README.md +52 -0
- package/fixtures/demo-project/ai-service.ts +65 -0
- package/fixtures/demo-project/sample-events.jsonl +15 -0
- package/fixtures/demo-project/src/ai-service.ts +128 -0
- package/fixtures/demo-project/src/llm-client.ts +155 -0
- package/package.json +65 -0
- package/prompts/agent-analyzer.yaml +47 -0
- package/prompts/ci-gate.yaml +98 -0
- package/prompts/correlation-analyzer.yaml +178 -0
- package/prompts/format-normalizer.yaml +46 -0
- package/prompts/peak-performance.yaml +180 -0
- package/prompts/pr-comment.yaml +111 -0
- package/prompts/runtime-analyzer.yaml +189 -0
- package/prompts/unified-analyzer.yaml +241 -0
- package/schemas/inference-map.v0.1.json +215 -0
- package/scripts/benchmark.ts +394 -0
- package/scripts/demo-v1.5.sh +158 -0
- package/scripts/sync-from-site.sh +197 -0
- package/scripts/validate-sync.sh +178 -0
- package/src/agent-analyzer.ts +481 -0
- package/src/agent.ts +1232 -0
- package/src/agents/correlation-analyzer.ts +353 -0
- package/src/agents/index.ts +235 -0
- package/src/agents/runtime-analyzer.ts +343 -0
- package/src/analysis-types.ts +558 -0
- package/src/analytics.ts +100 -0
- package/src/analyzer.ts +692 -0
- package/src/artifacts.ts +218 -0
- package/src/benchmarks/index.ts +309 -0
- package/src/cli.ts +503 -0
- package/src/commands/ci.ts +336 -0
- package/src/commands/config.ts +288 -0
- package/src/commands/demo.ts +175 -0
- package/src/commands/export.ts +297 -0
- package/src/commands/history.ts +425 -0
- package/src/commands/template.ts +385 -0
- package/src/commands/validate-map.ts +324 -0
- package/src/commands/whatif.ts +272 -0
- package/src/comparison.ts +283 -0
- package/src/config.ts +188 -0
- package/src/connectors/helicone.ts +164 -0
- package/src/connectors/index.ts +93 -0
- package/src/connectors/langsmith.ts +179 -0
- package/src/connectors/types.ts +180 -0
- package/src/cost-estimator.ts +146 -0
- package/src/costs.ts +347 -0
- package/src/counterfactuals.ts +516 -0
- package/src/enhancement-prompts.ts +118 -0
- package/src/envelopes.ts +814 -0
- package/src/format-normalizer.ts +1486 -0
- package/src/history.ts +400 -0
- package/src/html.ts +512 -0
- package/src/impact.ts +522 -0
- package/src/index.ts +83 -0
- package/src/insights.ts +341 -0
- package/src/joiner.ts +289 -0
- package/src/orchestrator.ts +1015 -0
- package/src/pdf.ts +110 -0
- package/src/prediction.ts +392 -0
- package/src/prompts/loader.ts +88 -0
- package/src/renderer.ts +1045 -0
- package/src/runid.ts +261 -0
- package/src/runtime.ts +450 -0
- package/src/scanner.ts +508 -0
- package/src/templates.ts +561 -0
- package/src/tools/index.ts +214 -0
- package/src/types.ts +873 -0
- package/src/version.ts +24 -0
- package/templates/context-accumulation.yaml +23 -0
- package/templates/cost-concentration.yaml +20 -0
- package/templates/dead-code.yaml +20 -0
- package/templates/latency-explainer.yaml +23 -0
- package/templates/optimizations/ab-testing-framework.yaml +74 -0
- package/templates/optimizations/api-gateway-optimization.yaml +81 -0
- package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
- package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
- package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
- package/templates/optimizations/comprehensive-apm.yaml +76 -0
- package/templates/optimizations/context-window-optimization.yaml +91 -0
- package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
- package/templates/optimizations/distributed-training-optimization.yaml +77 -0
- package/templates/optimizations/document-analysis-edge.yaml +77 -0
- package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
- package/templates/optimizations/domain-specific-distillation.yaml +78 -0
- package/templates/optimizations/error-handling-optimization.yaml +76 -0
- package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
- package/templates/optimizations/long-context-memory-management.yaml +78 -0
- package/templates/optimizations/max-tokens-optimization.yaml +76 -0
- package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
- package/templates/optimizations/multi-framework-resilience.yaml +75 -0
- package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
- package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
- package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
- package/templates/optimizations/quality-monitoring.yaml +74 -0
- package/templates/optimizations/realtime-budget-controls.yaml +74 -0
- package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
- package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
- package/templates/optimizations/smart-model-routing.yaml +96 -0
- package/templates/optimizations/streaming-batch-selection.yaml +167 -0
- package/templates/optimizations/system-prompt-optimization.yaml +75 -0
- package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
- package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
- package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
- package/templates/overpowered-extraction.yaml +32 -0
- package/templates/overpowered-model.yaml +31 -0
- package/templates/prompt-bloat.yaml +24 -0
- package/templates/retry-explosion.yaml +28 -0
- package/templates/schema/insight.schema.json +113 -0
- package/templates/schema/optimization.schema.json +180 -0
- package/templates/streaming-drift.yaml +30 -0
- package/templates/throughput-gap.yaml +21 -0
- package/templates/token-underutilization.yaml +28 -0
- package/templates/untested-fallback.yaml +21 -0
- package/tests/accuracy/drift-detection.test.ts +184 -0
- package/tests/accuracy/false-positives.test.ts +166 -0
- package/tests/accuracy/templates.test.ts +205 -0
- package/tests/action/commands.test.ts +125 -0
- package/tests/action/comments.test.ts +347 -0
- package/tests/cli.test.ts +203 -0
- package/tests/comparison.test.ts +309 -0
- package/tests/correlation-analyzer.test.ts +534 -0
- package/tests/counterfactuals.test.ts +347 -0
- package/tests/fixtures/events/missing-id.jsonl +1 -0
- package/tests/fixtures/events/missing-input.jsonl +1 -0
- package/tests/fixtures/events/missing-latency.jsonl +1 -0
- package/tests/fixtures/events/missing-model.jsonl +1 -0
- package/tests/fixtures/events/missing-output.jsonl +1 -0
- package/tests/fixtures/events/missing-provider.jsonl +1 -0
- package/tests/fixtures/events/missing-ts.jsonl +1 -0
- package/tests/fixtures/events/valid.csv +3 -0
- package/tests/fixtures/events/valid.json +1 -0
- package/tests/fixtures/events/valid.jsonl +2 -0
- package/tests/fixtures/events/with-callsite.jsonl +1 -0
- package/tests/fixtures/events/with-intent.jsonl +1 -0
- package/tests/fixtures/events/wrong-type.jsonl +1 -0
- package/tests/fixtures/repos/empty/.gitkeep +0 -0
- package/tests/fixtures/repos/hybrid-router/router.py +35 -0
- package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
- package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
- package/tests/fixtures/repos/saas-openai/client.py +26 -0
- package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
- package/tests/github-action.test.ts +292 -0
- package/tests/insights.test.ts +878 -0
- package/tests/joiner.test.ts +168 -0
- package/tests/performance/action-latency.test.ts +132 -0
- package/tests/performance/benchmark.test.ts +189 -0
- package/tests/performance/cli-latency.test.ts +102 -0
- package/tests/pr-comment.test.ts +313 -0
- package/tests/prediction.test.ts +296 -0
- package/tests/runtime-analyzer.test.ts +375 -0
- package/tests/runtime.test.ts +205 -0
- package/tests/scanner.test.ts +122 -0
- package/tests/template-conformance.test.ts +526 -0
- package/tests/unit/cost-calculator.test.ts +303 -0
- package/tests/unit/credits.test.ts +180 -0
- package/tests/unit/inference-map.test.ts +276 -0
- package/tests/unit/schema.test.ts +300 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +14 -0
|
@@ -0,0 +1,516 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Counterfactual Insights Module (v1.5)
|
|
3
|
+
*
|
|
4
|
+
* Generates "what if" optimization scenarios for inference points based on:
|
|
5
|
+
* - Model alternatives (cheaper/faster models)
|
|
6
|
+
* - Pattern opportunities (batching, caching, streaming)
|
|
7
|
+
* - Provider alternatives (cloud vs self-hosted)
|
|
8
|
+
*
|
|
9
|
+
* Shows the road not taken and its potential impact,
|
|
10
|
+
* enabling informed optimization decisions.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type {
|
|
14
|
+
Callsite,
|
|
15
|
+
InferenceMap,
|
|
16
|
+
Counterfactual,
|
|
17
|
+
CounterfactualResult,
|
|
18
|
+
CounterfactualSummary,
|
|
19
|
+
CounterfactualType,
|
|
20
|
+
CounterfactualState,
|
|
21
|
+
CounterfactualImpact,
|
|
22
|
+
} from './types.js';
|
|
23
|
+
|
|
24
|
+
// =============================================================================
|
|
25
|
+
// CONSTANTS
|
|
26
|
+
// =============================================================================
|
|
27
|
+
|
|
28
|
+
// Model alternatives with their characteristics
|
|
29
|
+
const MODEL_ALTERNATIVES: Record<string, Array<{
|
|
30
|
+
model: string;
|
|
31
|
+
provider: string;
|
|
32
|
+
latencyMultiplier: number; // Relative to original (< 1 = faster)
|
|
33
|
+
costMultiplier: number; // Relative to original (< 1 = cheaper)
|
|
34
|
+
tradeoffs: string[];
|
|
35
|
+
}>> = {
|
|
36
|
+
// GPT-4 alternatives
|
|
37
|
+
'gpt-4': [
|
|
38
|
+
{ model: 'gpt-4o', provider: 'openai', latencyMultiplier: 0.5, costMultiplier: 0.5, tradeoffs: ['Similar capability, significantly faster'] },
|
|
39
|
+
{ model: 'gpt-4o-mini', provider: 'openai', latencyMultiplier: 0.25, costMultiplier: 0.1, tradeoffs: ['Good for simpler tasks', 'May reduce quality on complex reasoning'] },
|
|
40
|
+
{ model: 'claude-3-haiku', provider: 'anthropic', latencyMultiplier: 0.25, costMultiplier: 0.08, tradeoffs: ['Fast and cheap', 'Different provider', 'May need prompt adjustments'] },
|
|
41
|
+
],
|
|
42
|
+
'gpt-4-turbo': [
|
|
43
|
+
{ model: 'gpt-4o', provider: 'openai', latencyMultiplier: 0.7, costMultiplier: 0.7, tradeoffs: ['Newer model, similar capability'] },
|
|
44
|
+
{ model: 'gpt-4o-mini', provider: 'openai', latencyMultiplier: 0.3, costMultiplier: 0.15, tradeoffs: ['Good for simpler tasks'] },
|
|
45
|
+
],
|
|
46
|
+
// Claude alternatives
|
|
47
|
+
'claude-3-opus': [
|
|
48
|
+
{ model: 'claude-3-sonnet', provider: 'anthropic', latencyMultiplier: 0.5, costMultiplier: 0.2, tradeoffs: ['Good balance of speed and capability'] },
|
|
49
|
+
{ model: 'claude-3-haiku', provider: 'anthropic', latencyMultiplier: 0.2, costMultiplier: 0.04, tradeoffs: ['Very fast', 'Best for simple tasks'] },
|
|
50
|
+
{ model: 'claude-3.5-sonnet', provider: 'anthropic', latencyMultiplier: 0.4, costMultiplier: 0.15, tradeoffs: ['Often matches Opus quality at lower cost'] },
|
|
51
|
+
],
|
|
52
|
+
'claude-3-sonnet': [
|
|
53
|
+
{ model: 'claude-3-haiku', provider: 'anthropic', latencyMultiplier: 0.4, costMultiplier: 0.2, tradeoffs: ['Faster', 'May reduce quality'] },
|
|
54
|
+
{ model: 'claude-3.5-sonnet', provider: 'anthropic', latencyMultiplier: 0.8, costMultiplier: 1.0, tradeoffs: ['Improved capability at similar cost'] },
|
|
55
|
+
],
|
|
56
|
+
// Gemini alternatives
|
|
57
|
+
'gemini-1.5-pro': [
|
|
58
|
+
{ model: 'gemini-1.5-flash', provider: 'google', latencyMultiplier: 0.2, costMultiplier: 0.1, tradeoffs: ['Much faster', 'Good for most tasks'] },
|
|
59
|
+
],
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
// Base costs per 1K calls (rough estimates for counterfactual calculations)
|
|
63
|
+
const MODEL_COSTS: Record<string, number> = {
|
|
64
|
+
'gpt-4': 0.90,
|
|
65
|
+
'gpt-4-turbo': 0.30,
|
|
66
|
+
'gpt-4o': 0.15,
|
|
67
|
+
'gpt-4o-mini': 0.015,
|
|
68
|
+
'gpt-3.5-turbo': 0.015,
|
|
69
|
+
'claude-3-opus': 0.45,
|
|
70
|
+
'claude-3-sonnet': 0.09,
|
|
71
|
+
'claude-3-haiku': 0.0075,
|
|
72
|
+
'claude-3.5-sonnet': 0.09,
|
|
73
|
+
'gemini-1.5-pro': 0.105,
|
|
74
|
+
'gemini-1.5-flash': 0.0105,
|
|
75
|
+
'gemini-pro': 0.015,
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
// Base latencies (p95 in ms)
|
|
79
|
+
const MODEL_LATENCIES: Record<string, number> = {
|
|
80
|
+
'gpt-4': 5000,
|
|
81
|
+
'gpt-4-turbo': 4000,
|
|
82
|
+
'gpt-4o': 2500,
|
|
83
|
+
'gpt-4o-mini': 1500,
|
|
84
|
+
'gpt-3.5-turbo': 1500,
|
|
85
|
+
'claude-3-opus': 8000,
|
|
86
|
+
'claude-3-sonnet': 4000,
|
|
87
|
+
'claude-3-haiku': 1500,
|
|
88
|
+
'claude-3.5-sonnet': 3500,
|
|
89
|
+
'gemini-1.5-pro': 4000,
|
|
90
|
+
'gemini-1.5-flash': 800,
|
|
91
|
+
'gemini-pro': 3000,
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
// =============================================================================
|
|
95
|
+
// HELPERS
|
|
96
|
+
// =============================================================================
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Get model cost estimate.
|
|
100
|
+
*/
|
|
101
|
+
function getModelCost(model: string | null): number {
|
|
102
|
+
if (!model) return 0.10; // Default
|
|
103
|
+
const normalized = model.toLowerCase();
|
|
104
|
+
for (const [key, cost] of Object.entries(MODEL_COSTS)) {
|
|
105
|
+
if (normalized.includes(key.toLowerCase())) {
|
|
106
|
+
return cost;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return 0.10; // Default
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Get model latency estimate (p95 in ms).
|
|
114
|
+
*/
|
|
115
|
+
function getModelLatency(model: string | null): number {
|
|
116
|
+
if (!model) return 3000; // Default
|
|
117
|
+
const normalized = model.toLowerCase();
|
|
118
|
+
for (const [key, latency] of Object.entries(MODEL_LATENCIES)) {
|
|
119
|
+
if (normalized.includes(key.toLowerCase())) {
|
|
120
|
+
return latency;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return 3000; // Default
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Find model alternatives for a given model.
|
|
128
|
+
*/
|
|
129
|
+
function findModelAlternatives(model: string | null): typeof MODEL_ALTERNATIVES[string] | null {
|
|
130
|
+
if (!model) return null;
|
|
131
|
+
const normalized = model.toLowerCase();
|
|
132
|
+
for (const [key, alternatives] of Object.entries(MODEL_ALTERNATIVES)) {
|
|
133
|
+
if (normalized.includes(key.toLowerCase())) {
|
|
134
|
+
return alternatives;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Generate a unique counterfactual ID.
|
|
142
|
+
*/
|
|
143
|
+
function generateId(): string {
|
|
144
|
+
return `cf_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// =============================================================================
|
|
148
|
+
// COUNTERFACTUAL GENERATORS
|
|
149
|
+
// =============================================================================
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Generate model swap counterfactuals.
|
|
153
|
+
*/
|
|
154
|
+
function generateModelSwapCounterfactuals(callsites: Callsite[]): Counterfactual[] {
|
|
155
|
+
const counterfactuals: Counterfactual[] = [];
|
|
156
|
+
|
|
157
|
+
// Group callsites by model
|
|
158
|
+
const byModel = new Map<string, Callsite[]>();
|
|
159
|
+
for (const cs of callsites) {
|
|
160
|
+
if (cs.model) {
|
|
161
|
+
if (!byModel.has(cs.model)) {
|
|
162
|
+
byModel.set(cs.model, []);
|
|
163
|
+
}
|
|
164
|
+
byModel.get(cs.model)!.push(cs);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Generate alternatives for each model
|
|
169
|
+
for (const [model, sites] of byModel) {
|
|
170
|
+
const alternatives = findModelAlternatives(model);
|
|
171
|
+
if (!alternatives) continue;
|
|
172
|
+
|
|
173
|
+
const currentLatency = getModelLatency(model);
|
|
174
|
+
const currentCost = getModelCost(model);
|
|
175
|
+
|
|
176
|
+
for (const alt of alternatives) {
|
|
177
|
+
const proposedLatency = Math.round(currentLatency * alt.latencyMultiplier);
|
|
178
|
+
const proposedCost = currentCost * alt.costMultiplier;
|
|
179
|
+
|
|
180
|
+
const latencyDelta = proposedLatency - currentLatency;
|
|
181
|
+
const costDelta = proposedCost - currentCost;
|
|
182
|
+
|
|
183
|
+
// Only suggest if there's meaningful improvement
|
|
184
|
+
if (latencyDelta >= 0 && costDelta >= 0) continue;
|
|
185
|
+
|
|
186
|
+
counterfactuals.push({
|
|
187
|
+
id: generateId(),
|
|
188
|
+
type: 'model_swap',
|
|
189
|
+
headline: `Switch from ${model} to ${alt.model}`,
|
|
190
|
+
description: `Replace ${model} with ${alt.model} for ${sites.length} inference point${sites.length !== 1 ? 's' : ''}`,
|
|
191
|
+
currentState: {
|
|
192
|
+
model,
|
|
193
|
+
provider: sites[0]?.provider || undefined,
|
|
194
|
+
estimatedLatency: currentLatency,
|
|
195
|
+
estimatedCost: currentCost,
|
|
196
|
+
},
|
|
197
|
+
proposedState: {
|
|
198
|
+
model: alt.model,
|
|
199
|
+
provider: alt.provider,
|
|
200
|
+
estimatedLatency: proposedLatency,
|
|
201
|
+
estimatedCost: proposedCost,
|
|
202
|
+
},
|
|
203
|
+
impact: {
|
|
204
|
+
latencyDelta,
|
|
205
|
+
latencyDeltaPercent: Math.round((latencyDelta / currentLatency) * 100),
|
|
206
|
+
costDelta,
|
|
207
|
+
costDeltaPercent: Math.round((costDelta / currentCost) * 100),
|
|
208
|
+
tradeoffs: alt.tradeoffs,
|
|
209
|
+
},
|
|
210
|
+
confidence: 'medium',
|
|
211
|
+
confidenceReason: 'Based on typical model performance characteristics',
|
|
212
|
+
affectedPoints: sites.map(s => s.id),
|
|
213
|
+
effort: 'low',
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return counterfactuals;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Generate batching optimization counterfactuals.
|
|
223
|
+
*/
|
|
224
|
+
function generateBatchingCounterfactuals(callsites: Callsite[]): Counterfactual[] {
|
|
225
|
+
const counterfactuals: Counterfactual[] = [];
|
|
226
|
+
|
|
227
|
+
// Find inference points without batching
|
|
228
|
+
const unbatched = callsites.filter(cs => !cs.patterns?.batching);
|
|
229
|
+
if (unbatched.length < 2) return counterfactuals; // Need multiple calls to batch
|
|
230
|
+
|
|
231
|
+
// Group by model for batch suggestions
|
|
232
|
+
const byModel = new Map<string, Callsite[]>();
|
|
233
|
+
for (const cs of unbatched) {
|
|
234
|
+
const key = cs.model || 'unknown';
|
|
235
|
+
if (!byModel.has(key)) {
|
|
236
|
+
byModel.set(key, []);
|
|
237
|
+
}
|
|
238
|
+
byModel.get(key)!.push(cs);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
for (const [model, sites] of byModel) {
|
|
242
|
+
if (sites.length < 2) continue; // Need multiple for batching
|
|
243
|
+
|
|
244
|
+
const currentLatency = getModelLatency(model);
|
|
245
|
+
const currentCost = getModelCost(model);
|
|
246
|
+
|
|
247
|
+
// Batching typically reduces per-request latency by 20% and cost by 10%
|
|
248
|
+
const proposedLatency = Math.round(currentLatency * 0.8);
|
|
249
|
+
const proposedCost = currentCost * 0.9;
|
|
250
|
+
|
|
251
|
+
counterfactuals.push({
|
|
252
|
+
id: generateId(),
|
|
253
|
+
type: 'batch_optimization',
|
|
254
|
+
headline: `Enable batching for ${model}`,
|
|
255
|
+
description: `Batch ${sites.length} ${model} calls together to reduce overhead`,
|
|
256
|
+
currentState: {
|
|
257
|
+
model,
|
|
258
|
+
pattern: 'individual requests',
|
|
259
|
+
estimatedLatency: currentLatency,
|
|
260
|
+
estimatedCost: currentCost,
|
|
261
|
+
},
|
|
262
|
+
proposedState: {
|
|
263
|
+
model,
|
|
264
|
+
pattern: 'batched requests',
|
|
265
|
+
estimatedLatency: proposedLatency,
|
|
266
|
+
estimatedCost: proposedCost,
|
|
267
|
+
},
|
|
268
|
+
impact: {
|
|
269
|
+
latencyDelta: proposedLatency - currentLatency,
|
|
270
|
+
latencyDeltaPercent: -20,
|
|
271
|
+
costDelta: proposedCost - currentCost,
|
|
272
|
+
costDeltaPercent: -10,
|
|
273
|
+
tradeoffs: [
|
|
274
|
+
'Requires collecting requests before processing',
|
|
275
|
+
'May increase individual request latency if batch window is long',
|
|
276
|
+
'Need to handle partial batch failures',
|
|
277
|
+
],
|
|
278
|
+
},
|
|
279
|
+
confidence: 'medium',
|
|
280
|
+
confidenceReason: 'Batching typically provides 10-30% improvements',
|
|
281
|
+
affectedPoints: sites.map(s => s.id),
|
|
282
|
+
effort: 'medium',
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return counterfactuals;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Generate caching counterfactuals.
|
|
291
|
+
*/
|
|
292
|
+
function generateCachingCounterfactuals(callsites: Callsite[]): Counterfactual[] {
|
|
293
|
+
const counterfactuals: Counterfactual[] = [];
|
|
294
|
+
|
|
295
|
+
// Find inference points without caching
|
|
296
|
+
const uncached = callsites.filter(cs => !cs.patterns?.caching);
|
|
297
|
+
if (uncached.length === 0) return counterfactuals;
|
|
298
|
+
|
|
299
|
+
// Calculate aggregate impact
|
|
300
|
+
const totalLatency = uncached.reduce((sum, cs) => sum + getModelLatency(cs.model), 0);
|
|
301
|
+
const avgLatency = Math.round(totalLatency / uncached.length);
|
|
302
|
+
const totalCost = uncached.reduce((sum, cs) => sum + getModelCost(cs.model), 0);
|
|
303
|
+
const avgCost = totalCost / uncached.length;
|
|
304
|
+
|
|
305
|
+
// Caching with 50% hit rate reduces effective latency and cost by 50%
|
|
306
|
+
const cacheHitRate = 0.5;
|
|
307
|
+
const proposedLatency = Math.round(avgLatency * (1 - cacheHitRate));
|
|
308
|
+
const proposedCost = avgCost * (1 - cacheHitRate);
|
|
309
|
+
|
|
310
|
+
counterfactuals.push({
|
|
311
|
+
id: generateId(),
|
|
312
|
+
type: 'cache_addition',
|
|
313
|
+
headline: 'Add semantic caching layer',
|
|
314
|
+
description: `Add caching for ${uncached.length} inference point${uncached.length !== 1 ? 's' : ''} to avoid redundant LLM calls`,
|
|
315
|
+
currentState: {
|
|
316
|
+
pattern: 'no caching',
|
|
317
|
+
estimatedLatency: avgLatency,
|
|
318
|
+
estimatedCost: avgCost,
|
|
319
|
+
},
|
|
320
|
+
proposedState: {
|
|
321
|
+
pattern: 'semantic cache',
|
|
322
|
+
estimatedLatency: proposedLatency,
|
|
323
|
+
estimatedCost: proposedCost,
|
|
324
|
+
},
|
|
325
|
+
impact: {
|
|
326
|
+
latencyDelta: proposedLatency - avgLatency,
|
|
327
|
+
latencyDeltaPercent: -50,
|
|
328
|
+
costDelta: proposedCost - avgCost,
|
|
329
|
+
costDeltaPercent: -50,
|
|
330
|
+
tradeoffs: [
|
|
331
|
+
'Assumes ~50% cache hit rate (varies by use case)',
|
|
332
|
+
'Need to manage cache invalidation',
|
|
333
|
+
'May return stale results for time-sensitive queries',
|
|
334
|
+
'Requires similarity matching infrastructure',
|
|
335
|
+
],
|
|
336
|
+
},
|
|
337
|
+
confidence: 'low',
|
|
338
|
+
confidenceReason: 'Cache hit rate varies significantly by use case',
|
|
339
|
+
affectedPoints: uncached.map(cs => cs.id),
|
|
340
|
+
effort: 'high',
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
return counterfactuals;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Generate streaming counterfactuals.
|
|
348
|
+
*/
|
|
349
|
+
function generateStreamingCounterfactuals(callsites: Callsite[]): Counterfactual[] {
|
|
350
|
+
const counterfactuals: Counterfactual[] = [];
|
|
351
|
+
|
|
352
|
+
// Find inference points without streaming
|
|
353
|
+
const nonStreaming = callsites.filter(cs => !cs.patterns?.streaming);
|
|
354
|
+
if (nonStreaming.length === 0) return counterfactuals;
|
|
355
|
+
|
|
356
|
+
const avgLatency = Math.round(
|
|
357
|
+
nonStreaming.reduce((sum, cs) => sum + getModelLatency(cs.model), 0) / nonStreaming.length
|
|
358
|
+
);
|
|
359
|
+
|
|
360
|
+
// Streaming reduces time-to-first-token significantly (perceived latency)
|
|
361
|
+
const proposedPerceivedLatency = Math.round(avgLatency * 0.2); // First token in 20% of total time
|
|
362
|
+
|
|
363
|
+
counterfactuals.push({
|
|
364
|
+
id: generateId(),
|
|
365
|
+
type: 'streaming_enable',
|
|
366
|
+
headline: 'Enable response streaming',
|
|
367
|
+
description: `Enable streaming for ${nonStreaming.length} inference point${nonStreaming.length !== 1 ? 's' : ''} to reduce perceived latency`,
|
|
368
|
+
currentState: {
|
|
369
|
+
pattern: 'synchronous',
|
|
370
|
+
estimatedLatency: avgLatency,
|
|
371
|
+
estimatedCost: 0, // Streaming doesn't affect cost
|
|
372
|
+
},
|
|
373
|
+
proposedState: {
|
|
374
|
+
pattern: 'streaming',
|
|
375
|
+
estimatedLatency: proposedPerceivedLatency, // Time to first token
|
|
376
|
+
estimatedCost: 0,
|
|
377
|
+
},
|
|
378
|
+
impact: {
|
|
379
|
+
latencyDelta: proposedPerceivedLatency - avgLatency,
|
|
380
|
+
latencyDeltaPercent: -80, // 80% reduction in perceived latency
|
|
381
|
+
costDelta: 0,
|
|
382
|
+
costDeltaPercent: 0,
|
|
383
|
+
tradeoffs: [
|
|
384
|
+
'Total response time unchanged, but first token arrives faster',
|
|
385
|
+
'Requires UI changes to display incremental output',
|
|
386
|
+
'May complicate error handling',
|
|
387
|
+
],
|
|
388
|
+
},
|
|
389
|
+
confidence: 'high',
|
|
390
|
+
confidenceReason: 'Streaming consistently improves perceived latency',
|
|
391
|
+
affectedPoints: nonStreaming.map(cs => cs.id),
|
|
392
|
+
effort: 'low',
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
return counterfactuals;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// =============================================================================
|
|
399
|
+
// PUBLIC API
|
|
400
|
+
// =============================================================================
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* Generate counterfactual insights for inference points.
|
|
404
|
+
*/
|
|
405
|
+
export function generateCounterfactuals(inferenceMap: InferenceMap): CounterfactualResult {
|
|
406
|
+
const callsites = inferenceMap.callsites;
|
|
407
|
+
const counterfactuals: Counterfactual[] = [];
|
|
408
|
+
|
|
409
|
+
// Generate all types of counterfactuals
|
|
410
|
+
counterfactuals.push(...generateModelSwapCounterfactuals(callsites));
|
|
411
|
+
counterfactuals.push(...generateBatchingCounterfactuals(callsites));
|
|
412
|
+
counterfactuals.push(...generateCachingCounterfactuals(callsites));
|
|
413
|
+
counterfactuals.push(...generateStreamingCounterfactuals(callsites));
|
|
414
|
+
|
|
415
|
+
// Sort by impact (latency savings)
|
|
416
|
+
counterfactuals.sort((a, b) => a.impact.latencyDeltaPercent - b.impact.latencyDeltaPercent);
|
|
417
|
+
|
|
418
|
+
// Calculate summary
|
|
419
|
+
const summary = calculateSummary(counterfactuals);
|
|
420
|
+
|
|
421
|
+
return {
|
|
422
|
+
counterfactuals,
|
|
423
|
+
summary,
|
|
424
|
+
generatedAt: new Date().toISOString(),
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Calculate counterfactual summary.
|
|
430
|
+
*/
|
|
431
|
+
function calculateSummary(counterfactuals: Counterfactual[]): CounterfactualSummary {
|
|
432
|
+
if (counterfactuals.length === 0) {
|
|
433
|
+
return {
|
|
434
|
+
totalOpportunities: 0,
|
|
435
|
+
maxLatencySavingsMs: 0,
|
|
436
|
+
maxLatencySavingsPercent: 0,
|
|
437
|
+
maxCostSavings: 0,
|
|
438
|
+
maxCostSavingsPercent: 0,
|
|
439
|
+
byType: {},
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Find max savings
|
|
444
|
+
const maxLatencySavingsMs = Math.abs(Math.min(...counterfactuals.map(c => c.impact.latencyDelta)));
|
|
445
|
+
const maxLatencySavingsPercent = Math.abs(Math.min(...counterfactuals.map(c => c.impact.latencyDeltaPercent)));
|
|
446
|
+
const maxCostSavings = Math.abs(Math.min(...counterfactuals.map(c => c.impact.costDelta)));
|
|
447
|
+
const maxCostSavingsPercent = Math.abs(Math.min(...counterfactuals.map(c => c.impact.costDeltaPercent)));
|
|
448
|
+
|
|
449
|
+
// Count by type
|
|
450
|
+
const byType: Record<string, number> = {};
|
|
451
|
+
for (const cf of counterfactuals) {
|
|
452
|
+
byType[cf.type] = (byType[cf.type] || 0) + 1;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
return {
|
|
456
|
+
totalOpportunities: counterfactuals.length,
|
|
457
|
+
maxLatencySavingsMs,
|
|
458
|
+
maxLatencySavingsPercent,
|
|
459
|
+
maxCostSavings,
|
|
460
|
+
maxCostSavingsPercent,
|
|
461
|
+
byType,
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Format counterfactual summary for display.
|
|
467
|
+
*/
|
|
468
|
+
export function formatCounterfactualSummary(result: CounterfactualResult): string {
|
|
469
|
+
const { summary } = result;
|
|
470
|
+
const lines: string[] = [];
|
|
471
|
+
|
|
472
|
+
lines.push(`${summary.totalOpportunities} optimization opportunities identified`);
|
|
473
|
+
lines.push('');
|
|
474
|
+
|
|
475
|
+
if (summary.maxLatencySavingsPercent > 0) {
|
|
476
|
+
lines.push(` Max latency savings: ${summary.maxLatencySavingsPercent}% (${summary.maxLatencySavingsMs}ms)`);
|
|
477
|
+
}
|
|
478
|
+
if (summary.maxCostSavingsPercent > 0) {
|
|
479
|
+
lines.push(` Max cost savings: ${summary.maxCostSavingsPercent}%`);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
return lines.join('\n');
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Check if there are significant counterfactual opportunities.
|
|
487
|
+
*/
|
|
488
|
+
export function hasSignificantOpportunities(result: CounterfactualResult): boolean {
|
|
489
|
+
return result.summary.totalOpportunities > 0 &&
|
|
490
|
+
(result.summary.maxLatencySavingsPercent >= 20 || result.summary.maxCostSavingsPercent >= 20);
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Rank counterfactuals by a specific priority.
|
|
495
|
+
*/
|
|
496
|
+
export function rankCounterfactuals(
|
|
497
|
+
result: CounterfactualResult,
|
|
498
|
+
priority: 'latency' | 'cost' | 'balanced'
|
|
499
|
+
): Counterfactual[] {
|
|
500
|
+
const counterfactuals = [...result.counterfactuals];
|
|
501
|
+
|
|
502
|
+
switch (priority) {
|
|
503
|
+
case 'latency':
|
|
504
|
+
return counterfactuals.sort((a, b) => a.impact.latencyDeltaPercent - b.impact.latencyDeltaPercent);
|
|
505
|
+
case 'cost':
|
|
506
|
+
return counterfactuals.sort((a, b) => a.impact.costDeltaPercent - b.impact.costDeltaPercent);
|
|
507
|
+
case 'balanced':
|
|
508
|
+
default:
|
|
509
|
+
// Score combines latency and cost savings (both negative = better)
|
|
510
|
+
return counterfactuals.sort((a, b) => {
|
|
511
|
+
const scoreA = a.impact.latencyDeltaPercent + a.impact.costDeltaPercent;
|
|
512
|
+
const scoreB = b.impact.latencyDeltaPercent + b.impact.costDeltaPercent;
|
|
513
|
+
return scoreA - scoreB;
|
|
514
|
+
});
|
|
515
|
+
}
|
|
516
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhancement Prompts (v1.9.5)
|
|
3
|
+
*
|
|
4
|
+
* Suggests additional layers to enhance analysis.
|
|
5
|
+
* Progressive Enhancement Model:
|
|
6
|
+
* - Code (required)
|
|
7
|
+
* - Runtime (optional) - drift detection
|
|
8
|
+
* - Benchmarks (optional) - performance comparison
|
|
9
|
+
* - Evals (optional, future) - quality gating
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import chalk from 'chalk';
|
|
13
|
+
|
|
14
|
+
export interface EnhancementPrompt {
|
|
15
|
+
layer: string;
|
|
16
|
+
message: string;
|
|
17
|
+
cli: string;
|
|
18
|
+
action: string;
|
|
19
|
+
docsUrl: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface LayerStatus {
|
|
23
|
+
runtime: boolean;
|
|
24
|
+
benchmarks: boolean;
|
|
25
|
+
evals: boolean;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Get enhancement prompts for missing layers
|
|
30
|
+
*/
|
|
31
|
+
export function getEnhancementPrompts(
|
|
32
|
+
layers: LayerStatus,
|
|
33
|
+
hasRecommendations: boolean
|
|
34
|
+
): EnhancementPrompt[] {
|
|
35
|
+
const prompts: EnhancementPrompt[] = [];
|
|
36
|
+
|
|
37
|
+
if (!layers.runtime) {
|
|
38
|
+
prompts.push({
|
|
39
|
+
layer: 'Runtime',
|
|
40
|
+
message: 'Detect drift between code and actual behavior',
|
|
41
|
+
cli: 'peakinfer analyze ./src --runtime helicone',
|
|
42
|
+
action: 'runtime-source: helicone\nruntime-api-key: ${{ secrets.HELICONE_API_KEY }}',
|
|
43
|
+
docsUrl: 'https://peakinfer.com/docs/runtime',
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (!layers.benchmarks) {
|
|
48
|
+
prompts.push({
|
|
49
|
+
layer: 'Benchmarks',
|
|
50
|
+
message: 'Compare to InferenceMAX benchmarks',
|
|
51
|
+
cli: 'peakinfer analyze ./src --benchmark',
|
|
52
|
+
action: 'include-benchmarks: true',
|
|
53
|
+
docsUrl: 'https://peakinfer.com/docs/benchmarks',
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (!layers.evals && hasRecommendations) {
|
|
58
|
+
prompts.push({
|
|
59
|
+
layer: 'Evals',
|
|
60
|
+
message: 'Gate recommendations by quality scores',
|
|
61
|
+
cli: 'peakinfer analyze ./src --evals braintrust',
|
|
62
|
+
action: 'evals-source: braintrust\nevals-api-key: ${{ secrets.BRAINTRUST_API_KEY }}',
|
|
63
|
+
docsUrl: 'https://peakinfer.com/docs/evals',
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return prompts;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Render enhancement prompts in CLI
|
|
72
|
+
*/
|
|
73
|
+
export function renderPromptsCLI(prompts: EnhancementPrompt[]): void {
|
|
74
|
+
if (prompts.length === 0) return;
|
|
75
|
+
|
|
76
|
+
console.log('\n' + chalk.dim('─'.repeat(60)));
|
|
77
|
+
console.log(chalk.cyan('\nENHANCE YOUR ANALYSIS\n'));
|
|
78
|
+
|
|
79
|
+
for (const prompt of prompts) {
|
|
80
|
+
console.log(chalk.white(`Add ${prompt.layer}:`), chalk.dim(prompt.message));
|
|
81
|
+
console.log(chalk.dim(` ${prompt.cli}`));
|
|
82
|
+
console.log('');
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Render layer status line
|
|
88
|
+
*/
|
|
89
|
+
export function renderLayerStatus(layers: LayerStatus & { code: boolean }): string {
|
|
90
|
+
const status = (enabled: boolean) => enabled ? chalk.green('✓') : chalk.dim('○');
|
|
91
|
+
return `Layers: Code ${status(layers.code)} | Runtime ${status(layers.runtime)} | Benchmarks ${status(layers.benchmarks)} | Evals ${status(layers.evals)}`;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Generate enhancement prompts for GitHub Action comment
|
|
96
|
+
*/
|
|
97
|
+
export function generateActionPrompts(
|
|
98
|
+
prompts: EnhancementPrompt[]
|
|
99
|
+
): string {
|
|
100
|
+
if (prompts.length === 0) return '';
|
|
101
|
+
|
|
102
|
+
const lines: string[] = [
|
|
103
|
+
'<details>',
|
|
104
|
+
'<summary>Enhance Your Analysis</summary>',
|
|
105
|
+
'',
|
|
106
|
+
];
|
|
107
|
+
|
|
108
|
+
for (const prompt of prompts) {
|
|
109
|
+
lines.push(`**Add ${prompt.layer}:** ${prompt.message}`);
|
|
110
|
+
lines.push('```yaml');
|
|
111
|
+
lines.push(prompt.action);
|
|
112
|
+
lines.push('```');
|
|
113
|
+
lines.push('');
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
lines.push('</details>');
|
|
117
|
+
return lines.join('\n');
|
|
118
|
+
}
|