@peakinfer/cli 1.0.133
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -0
- package/.env.example +6 -0
- package/.github/workflows/peakinfer.yml +64 -0
- package/CHANGELOG.md +31 -0
- package/LICENSE +190 -0
- package/README.md +335 -0
- package/data/inferencemax.json +274 -0
- package/dist/agent-analyzer.d.ts +45 -0
- package/dist/agent-analyzer.d.ts.map +1 -0
- package/dist/agent-analyzer.js +374 -0
- package/dist/agent-analyzer.js.map +1 -0
- package/dist/agent.d.ts +76 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +965 -0
- package/dist/agent.js.map +1 -0
- package/dist/agents/correlation-analyzer.d.ts +34 -0
- package/dist/agents/correlation-analyzer.d.ts.map +1 -0
- package/dist/agents/correlation-analyzer.js +261 -0
- package/dist/agents/correlation-analyzer.js.map +1 -0
- package/dist/agents/index.d.ts +91 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +111 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/runtime-analyzer.d.ts +38 -0
- package/dist/agents/runtime-analyzer.d.ts.map +1 -0
- package/dist/agents/runtime-analyzer.js +244 -0
- package/dist/agents/runtime-analyzer.js.map +1 -0
- package/dist/analysis-types.d.ts +500 -0
- package/dist/analysis-types.d.ts.map +1 -0
- package/dist/analysis-types.js +11 -0
- package/dist/analysis-types.js.map +1 -0
- package/dist/analytics.d.ts +25 -0
- package/dist/analytics.d.ts.map +1 -0
- package/dist/analytics.js +94 -0
- package/dist/analytics.js.map +1 -0
- package/dist/analyzer.d.ts +48 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +547 -0
- package/dist/analyzer.js.map +1 -0
- package/dist/artifacts.d.ts +44 -0
- package/dist/artifacts.d.ts.map +1 -0
- package/dist/artifacts.js +165 -0
- package/dist/artifacts.js.map +1 -0
- package/dist/benchmarks/index.d.ts +88 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +205 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +427 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/ci.d.ts +19 -0
- package/dist/commands/ci.d.ts.map +1 -0
- package/dist/commands/ci.js +253 -0
- package/dist/commands/ci.js.map +1 -0
- package/dist/commands/config.d.ts +16 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +249 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/demo.d.ts +15 -0
- package/dist/commands/demo.d.ts.map +1 -0
- package/dist/commands/demo.js +106 -0
- package/dist/commands/demo.js.map +1 -0
- package/dist/commands/export.d.ts +14 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +209 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/history.d.ts +15 -0
- package/dist/commands/history.d.ts.map +1 -0
- package/dist/commands/history.js +389 -0
- package/dist/commands/history.js.map +1 -0
- package/dist/commands/template.d.ts +14 -0
- package/dist/commands/template.d.ts.map +1 -0
- package/dist/commands/template.js +341 -0
- package/dist/commands/template.js.map +1 -0
- package/dist/commands/validate-map.d.ts +12 -0
- package/dist/commands/validate-map.d.ts.map +1 -0
- package/dist/commands/validate-map.js +274 -0
- package/dist/commands/validate-map.js.map +1 -0
- package/dist/commands/whatif.d.ts +17 -0
- package/dist/commands/whatif.d.ts.map +1 -0
- package/dist/commands/whatif.js +206 -0
- package/dist/commands/whatif.js.map +1 -0
- package/dist/comparison.d.ts +38 -0
- package/dist/comparison.d.ts.map +1 -0
- package/dist/comparison.js +223 -0
- package/dist/comparison.js.map +1 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +158 -0
- package/dist/config.js.map +1 -0
- package/dist/connectors/helicone.d.ts +9 -0
- package/dist/connectors/helicone.d.ts.map +1 -0
- package/dist/connectors/helicone.js +106 -0
- package/dist/connectors/helicone.js.map +1 -0
- package/dist/connectors/index.d.ts +37 -0
- package/dist/connectors/index.d.ts.map +1 -0
- package/dist/connectors/index.js +65 -0
- package/dist/connectors/index.js.map +1 -0
- package/dist/connectors/langsmith.d.ts +9 -0
- package/dist/connectors/langsmith.d.ts.map +1 -0
- package/dist/connectors/langsmith.js +122 -0
- package/dist/connectors/langsmith.js.map +1 -0
- package/dist/connectors/types.d.ts +83 -0
- package/dist/connectors/types.d.ts.map +1 -0
- package/dist/connectors/types.js +98 -0
- package/dist/connectors/types.js.map +1 -0
- package/dist/cost-estimator.d.ts +46 -0
- package/dist/cost-estimator.d.ts.map +1 -0
- package/dist/cost-estimator.js +104 -0
- package/dist/cost-estimator.js.map +1 -0
- package/dist/costs.d.ts +57 -0
- package/dist/costs.d.ts.map +1 -0
- package/dist/costs.js +251 -0
- package/dist/costs.js.map +1 -0
- package/dist/counterfactuals.d.ts +29 -0
- package/dist/counterfactuals.d.ts.map +1 -0
- package/dist/counterfactuals.js +448 -0
- package/dist/counterfactuals.js.map +1 -0
- package/dist/enhancement-prompts.d.ts +41 -0
- package/dist/enhancement-prompts.d.ts.map +1 -0
- package/dist/enhancement-prompts.js +88 -0
- package/dist/enhancement-prompts.js.map +1 -0
- package/dist/envelopes.d.ts +20 -0
- package/dist/envelopes.d.ts.map +1 -0
- package/dist/envelopes.js +790 -0
- package/dist/envelopes.js.map +1 -0
- package/dist/format-normalizer.d.ts +71 -0
- package/dist/format-normalizer.d.ts.map +1 -0
- package/dist/format-normalizer.js +1331 -0
- package/dist/format-normalizer.js.map +1 -0
- package/dist/history.d.ts +79 -0
- package/dist/history.d.ts.map +1 -0
- package/dist/history.js +313 -0
- package/dist/history.js.map +1 -0
- package/dist/html.d.ts +11 -0
- package/dist/html.d.ts.map +1 -0
- package/dist/html.js +463 -0
- package/dist/html.js.map +1 -0
- package/dist/impact.d.ts +42 -0
- package/dist/impact.d.ts.map +1 -0
- package/dist/impact.js +443 -0
- package/dist/impact.js.map +1 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +34 -0
- package/dist/index.js.map +1 -0
- package/dist/insights.d.ts +5 -0
- package/dist/insights.d.ts.map +1 -0
- package/dist/insights.js +271 -0
- package/dist/insights.js.map +1 -0
- package/dist/joiner.d.ts +9 -0
- package/dist/joiner.d.ts.map +1 -0
- package/dist/joiner.js +247 -0
- package/dist/joiner.js.map +1 -0
- package/dist/orchestrator.d.ts +34 -0
- package/dist/orchestrator.d.ts.map +1 -0
- package/dist/orchestrator.js +827 -0
- package/dist/orchestrator.js.map +1 -0
- package/dist/pdf.d.ts +26 -0
- package/dist/pdf.d.ts.map +1 -0
- package/dist/pdf.js +84 -0
- package/dist/pdf.js.map +1 -0
- package/dist/prediction.d.ts +33 -0
- package/dist/prediction.d.ts.map +1 -0
- package/dist/prediction.js +316 -0
- package/dist/prediction.js.map +1 -0
- package/dist/prompts/loader.d.ts +38 -0
- package/dist/prompts/loader.d.ts.map +1 -0
- package/dist/prompts/loader.js +60 -0
- package/dist/prompts/loader.js.map +1 -0
- package/dist/renderer.d.ts +64 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +923 -0
- package/dist/renderer.js.map +1 -0
- package/dist/runid.d.ts +57 -0
- package/dist/runid.d.ts.map +1 -0
- package/dist/runid.js +199 -0
- package/dist/runid.js.map +1 -0
- package/dist/runtime.d.ts +29 -0
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +366 -0
- package/dist/runtime.js.map +1 -0
- package/dist/scanner.d.ts +11 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +426 -0
- package/dist/scanner.js.map +1 -0
- package/dist/templates.d.ts +120 -0
- package/dist/templates.d.ts.map +1 -0
- package/dist/templates.js +429 -0
- package/dist/templates.js.map +1 -0
- package/dist/tools/index.d.ts +153 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +177 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/types.d.ts +3647 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +703 -0
- package/dist/types.js.map +1 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +23 -0
- package/dist/version.js.map +1 -0
- package/docs/demo-guide.md +423 -0
- package/docs/events-format.md +295 -0
- package/docs/inferencemap-spec.md +344 -0
- package/docs/migration-v2.md +293 -0
- package/fixtures/demo/precomputed.json +142 -0
- package/fixtures/demo-project/README.md +52 -0
- package/fixtures/demo-project/ai-service.ts +65 -0
- package/fixtures/demo-project/sample-events.jsonl +15 -0
- package/fixtures/demo-project/src/ai-service.ts +128 -0
- package/fixtures/demo-project/src/llm-client.ts +155 -0
- package/package.json +65 -0
- package/prompts/agent-analyzer.yaml +47 -0
- package/prompts/ci-gate.yaml +98 -0
- package/prompts/correlation-analyzer.yaml +178 -0
- package/prompts/format-normalizer.yaml +46 -0
- package/prompts/peak-performance.yaml +180 -0
- package/prompts/pr-comment.yaml +111 -0
- package/prompts/runtime-analyzer.yaml +189 -0
- package/prompts/unified-analyzer.yaml +241 -0
- package/schemas/inference-map.v0.1.json +215 -0
- package/scripts/benchmark.ts +394 -0
- package/scripts/demo-v1.5.sh +158 -0
- package/scripts/sync-from-site.sh +197 -0
- package/scripts/validate-sync.sh +178 -0
- package/src/agent-analyzer.ts +481 -0
- package/src/agent.ts +1232 -0
- package/src/agents/correlation-analyzer.ts +353 -0
- package/src/agents/index.ts +235 -0
- package/src/agents/runtime-analyzer.ts +343 -0
- package/src/analysis-types.ts +558 -0
- package/src/analytics.ts +100 -0
- package/src/analyzer.ts +692 -0
- package/src/artifacts.ts +218 -0
- package/src/benchmarks/index.ts +309 -0
- package/src/cli.ts +503 -0
- package/src/commands/ci.ts +336 -0
- package/src/commands/config.ts +288 -0
- package/src/commands/demo.ts +175 -0
- package/src/commands/export.ts +297 -0
- package/src/commands/history.ts +425 -0
- package/src/commands/template.ts +385 -0
- package/src/commands/validate-map.ts +324 -0
- package/src/commands/whatif.ts +272 -0
- package/src/comparison.ts +283 -0
- package/src/config.ts +188 -0
- package/src/connectors/helicone.ts +164 -0
- package/src/connectors/index.ts +93 -0
- package/src/connectors/langsmith.ts +179 -0
- package/src/connectors/types.ts +180 -0
- package/src/cost-estimator.ts +146 -0
- package/src/costs.ts +347 -0
- package/src/counterfactuals.ts +516 -0
- package/src/enhancement-prompts.ts +118 -0
- package/src/envelopes.ts +814 -0
- package/src/format-normalizer.ts +1486 -0
- package/src/history.ts +400 -0
- package/src/html.ts +512 -0
- package/src/impact.ts +522 -0
- package/src/index.ts +83 -0
- package/src/insights.ts +341 -0
- package/src/joiner.ts +289 -0
- package/src/orchestrator.ts +1015 -0
- package/src/pdf.ts +110 -0
- package/src/prediction.ts +392 -0
- package/src/prompts/loader.ts +88 -0
- package/src/renderer.ts +1045 -0
- package/src/runid.ts +261 -0
- package/src/runtime.ts +450 -0
- package/src/scanner.ts +508 -0
- package/src/templates.ts +561 -0
- package/src/tools/index.ts +214 -0
- package/src/types.ts +873 -0
- package/src/version.ts +24 -0
- package/templates/context-accumulation.yaml +23 -0
- package/templates/cost-concentration.yaml +20 -0
- package/templates/dead-code.yaml +20 -0
- package/templates/latency-explainer.yaml +23 -0
- package/templates/optimizations/ab-testing-framework.yaml +74 -0
- package/templates/optimizations/api-gateway-optimization.yaml +81 -0
- package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
- package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
- package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
- package/templates/optimizations/comprehensive-apm.yaml +76 -0
- package/templates/optimizations/context-window-optimization.yaml +91 -0
- package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
- package/templates/optimizations/distributed-training-optimization.yaml +77 -0
- package/templates/optimizations/document-analysis-edge.yaml +77 -0
- package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
- package/templates/optimizations/domain-specific-distillation.yaml +78 -0
- package/templates/optimizations/error-handling-optimization.yaml +76 -0
- package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
- package/templates/optimizations/long-context-memory-management.yaml +78 -0
- package/templates/optimizations/max-tokens-optimization.yaml +76 -0
- package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
- package/templates/optimizations/multi-framework-resilience.yaml +75 -0
- package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
- package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
- package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
- package/templates/optimizations/quality-monitoring.yaml +74 -0
- package/templates/optimizations/realtime-budget-controls.yaml +74 -0
- package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
- package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
- package/templates/optimizations/smart-model-routing.yaml +96 -0
- package/templates/optimizations/streaming-batch-selection.yaml +167 -0
- package/templates/optimizations/system-prompt-optimization.yaml +75 -0
- package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
- package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
- package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
- package/templates/overpowered-extraction.yaml +32 -0
- package/templates/overpowered-model.yaml +31 -0
- package/templates/prompt-bloat.yaml +24 -0
- package/templates/retry-explosion.yaml +28 -0
- package/templates/schema/insight.schema.json +113 -0
- package/templates/schema/optimization.schema.json +180 -0
- package/templates/streaming-drift.yaml +30 -0
- package/templates/throughput-gap.yaml +21 -0
- package/templates/token-underutilization.yaml +28 -0
- package/templates/untested-fallback.yaml +21 -0
- package/tests/accuracy/drift-detection.test.ts +184 -0
- package/tests/accuracy/false-positives.test.ts +166 -0
- package/tests/accuracy/templates.test.ts +205 -0
- package/tests/action/commands.test.ts +125 -0
- package/tests/action/comments.test.ts +347 -0
- package/tests/cli.test.ts +203 -0
- package/tests/comparison.test.ts +309 -0
- package/tests/correlation-analyzer.test.ts +534 -0
- package/tests/counterfactuals.test.ts +347 -0
- package/tests/fixtures/events/missing-id.jsonl +1 -0
- package/tests/fixtures/events/missing-input.jsonl +1 -0
- package/tests/fixtures/events/missing-latency.jsonl +1 -0
- package/tests/fixtures/events/missing-model.jsonl +1 -0
- package/tests/fixtures/events/missing-output.jsonl +1 -0
- package/tests/fixtures/events/missing-provider.jsonl +1 -0
- package/tests/fixtures/events/missing-ts.jsonl +1 -0
- package/tests/fixtures/events/valid.csv +3 -0
- package/tests/fixtures/events/valid.json +1 -0
- package/tests/fixtures/events/valid.jsonl +2 -0
- package/tests/fixtures/events/with-callsite.jsonl +1 -0
- package/tests/fixtures/events/with-intent.jsonl +1 -0
- package/tests/fixtures/events/wrong-type.jsonl +1 -0
- package/tests/fixtures/repos/empty/.gitkeep +0 -0
- package/tests/fixtures/repos/hybrid-router/router.py +35 -0
- package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
- package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
- package/tests/fixtures/repos/saas-openai/client.py +26 -0
- package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
- package/tests/github-action.test.ts +292 -0
- package/tests/insights.test.ts +878 -0
- package/tests/joiner.test.ts +168 -0
- package/tests/performance/action-latency.test.ts +132 -0
- package/tests/performance/benchmark.test.ts +189 -0
- package/tests/performance/cli-latency.test.ts +102 -0
- package/tests/pr-comment.test.ts +313 -0
- package/tests/prediction.test.ts +296 -0
- package/tests/runtime-analyzer.test.ts +375 -0
- package/tests/runtime.test.ts +205 -0
- package/tests/scanner.test.ts +122 -0
- package/tests/template-conformance.test.ts +526 -0
- package/tests/unit/cost-calculator.test.ts +303 -0
- package/tests/unit/credits.test.ts +180 -0
- package/tests/unit/inference-map.test.ts +276 -0
- package/tests/unit/schema.test.ts +300 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +14 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sample LLM Client for PeakInfer v1.5 Demo
|
|
3
|
+
*
|
|
4
|
+
* This file contains various LLM inference patterns to demonstrate
|
|
5
|
+
* the v1.5 features: predictions, counterfactuals, and comparison.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import OpenAI from 'openai';
|
|
9
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
10
|
+
|
|
11
|
+
// Initialize clients
|
|
12
|
+
const openai = new OpenAI();
|
|
13
|
+
const anthropic = new Anthropic();
|
|
14
|
+
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// High-latency calls (will trigger prediction warnings)
|
|
17
|
+
// ============================================================================
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Chat completion with GPT-4 (high latency, high cost)
|
|
21
|
+
* Prediction: p95 ~5000ms, high risk
|
|
22
|
+
*/
|
|
23
|
+
export async function chatWithGPT4(prompt: string): Promise<string> {
|
|
24
|
+
const response = await openai.chat.completions.create({
|
|
25
|
+
model: 'gpt-4',
|
|
26
|
+
messages: [{ role: 'user', content: prompt }],
|
|
27
|
+
max_tokens: 2000,
|
|
28
|
+
});
|
|
29
|
+
return response.choices[0].message.content || '';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Complex analysis with Claude Opus (highest latency)
|
|
34
|
+
* Prediction: p95 ~8000ms, high risk
|
|
35
|
+
*/
|
|
36
|
+
export async function analyzeWithOpus(document: string): Promise<string> {
|
|
37
|
+
const response = await anthropic.messages.create({
|
|
38
|
+
model: 'claude-3-opus-20240229',
|
|
39
|
+
max_tokens: 4000,
|
|
40
|
+
messages: [{ role: 'user', content: `Analyze this document:\n${document}` }],
|
|
41
|
+
});
|
|
42
|
+
return response.content[0].type === 'text' ? response.content[0].text : '';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ============================================================================
|
|
46
|
+
// Medium-latency calls
|
|
47
|
+
// ============================================================================
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Summarization with GPT-4 Turbo (medium latency)
|
|
51
|
+
* Prediction: p95 ~4000ms, medium risk
|
|
52
|
+
*/
|
|
53
|
+
export async function summarize(text: string): Promise<string> {
|
|
54
|
+
const response = await openai.chat.completions.create({
|
|
55
|
+
model: 'gpt-4-turbo',
|
|
56
|
+
messages: [
|
|
57
|
+
{ role: 'system', content: 'You are a summarization assistant.' },
|
|
58
|
+
{ role: 'user', content: `Summarize: ${text}` },
|
|
59
|
+
],
|
|
60
|
+
max_tokens: 500,
|
|
61
|
+
});
|
|
62
|
+
return response.choices[0].message.content || '';
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Translation with Claude Sonnet (medium latency, good value)
|
|
67
|
+
* Prediction: p95 ~4000ms, medium risk
|
|
68
|
+
*/
|
|
69
|
+
export async function translate(text: string, targetLang: string): Promise<string> {
|
|
70
|
+
const response = await anthropic.messages.create({
|
|
71
|
+
model: 'claude-3-sonnet-20240229',
|
|
72
|
+
max_tokens: 2000,
|
|
73
|
+
messages: [{ role: 'user', content: `Translate to ${targetLang}: ${text}` }],
|
|
74
|
+
});
|
|
75
|
+
return response.content[0].type === 'text' ? response.content[0].text : '';
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ============================================================================
|
|
79
|
+
// Low-latency calls (optimized patterns)
|
|
80
|
+
// ============================================================================
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Quick chat with GPT-4o-mini (low latency, low cost)
|
|
84
|
+
* Prediction: p95 ~1500ms, low risk
|
|
85
|
+
* Counterfactual: Other calls could use this model
|
|
86
|
+
*/
|
|
87
|
+
export async function quickChat(prompt: string): Promise<string> {
|
|
88
|
+
const response = await openai.chat.completions.create({
|
|
89
|
+
model: 'gpt-4o-mini',
|
|
90
|
+
messages: [{ role: 'user', content: prompt }],
|
|
91
|
+
max_tokens: 200,
|
|
92
|
+
});
|
|
93
|
+
return response.choices[0].message.content || '';
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Fast response with Claude Haiku (lowest latency)
|
|
98
|
+
* Prediction: p95 ~1500ms, low risk
|
|
99
|
+
*/
|
|
100
|
+
export async function fastResponse(prompt: string): Promise<string> {
|
|
101
|
+
const response = await anthropic.messages.create({
|
|
102
|
+
model: 'claude-3-haiku-20240307',
|
|
103
|
+
max_tokens: 500,
|
|
104
|
+
messages: [{ role: 'user', content: prompt }],
|
|
105
|
+
});
|
|
106
|
+
return response.content[0].type === 'text' ? response.content[0].text : '';
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// ============================================================================
|
|
110
|
+
// Streaming (will trigger streaming counterfactual for non-streaming calls)
|
|
111
|
+
// ============================================================================
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Streaming chat (good pattern - low perceived latency)
|
|
115
|
+
* Counterfactual: Other calls should enable streaming
|
|
116
|
+
*/
|
|
117
|
+
export async function* streamingChat(prompt: string): AsyncGenerator<string> {
|
|
118
|
+
const stream = await openai.chat.completions.create({
|
|
119
|
+
model: 'gpt-4o',
|
|
120
|
+
messages: [{ role: 'user', content: prompt }],
|
|
121
|
+
stream: true,
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
for await (const chunk of stream) {
|
|
125
|
+
const content = chunk.choices[0]?.delta?.content;
|
|
126
|
+
if (content) yield content;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ============================================================================
|
|
131
|
+
// Embeddings (separate from chat)
|
|
132
|
+
// ============================================================================
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Generate embeddings (low latency, batch-friendly)
|
|
136
|
+
* Counterfactual: Should enable batching
|
|
137
|
+
*/
|
|
138
|
+
export async function embed(text: string): Promise<number[]> {
|
|
139
|
+
const response = await openai.embeddings.create({
|
|
140
|
+
model: 'text-embedding-3-small',
|
|
141
|
+
input: text,
|
|
142
|
+
});
|
|
143
|
+
return response.data[0].embedding;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Batch embedding (good pattern)
|
|
148
|
+
*/
|
|
149
|
+
export async function batchEmbed(texts: string[]): Promise<number[][]> {
|
|
150
|
+
const response = await openai.embeddings.create({
|
|
151
|
+
model: 'text-embedding-3-small',
|
|
152
|
+
input: texts,
|
|
153
|
+
});
|
|
154
|
+
return response.data.map(d => d.embedding);
|
|
155
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@peakinfer/cli",
|
|
3
|
+
"version": "1.0.133",
|
|
4
|
+
"description": "LLM inference performance analysis CLI",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/cli.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"peakinfer": "dist/cli.js"
|
|
9
|
+
},
|
|
10
|
+
"scripts": {
|
|
11
|
+
"prebuild": "npm version patch --no-git-tag-version",
|
|
12
|
+
"build": "tsc",
|
|
13
|
+
"build:noversion": "tsc",
|
|
14
|
+
"dev": "tsx src/cli.ts",
|
|
15
|
+
"start": "node dist/cli.js",
|
|
16
|
+
"test": "vitest",
|
|
17
|
+
"test:watch": "vitest --watch",
|
|
18
|
+
"test:perf": "vitest run tests/performance/",
|
|
19
|
+
"benchmark": "tsx scripts/benchmark.ts",
|
|
20
|
+
"benchmark:ci": "tsx scripts/benchmark.ts --ci",
|
|
21
|
+
"sync": "bash scripts/sync-from-site.sh",
|
|
22
|
+
"sync:check": "bash scripts/sync-from-site.sh --check",
|
|
23
|
+
"typecheck": "tsc --noEmit",
|
|
24
|
+
"lint": "eslint src/",
|
|
25
|
+
"clean": "rm -rf dist"
|
|
26
|
+
},
|
|
27
|
+
"keywords": [
|
|
28
|
+
"llm",
|
|
29
|
+
"inference",
|
|
30
|
+
"performance",
|
|
31
|
+
"analysis",
|
|
32
|
+
"openai",
|
|
33
|
+
"anthropic"
|
|
34
|
+
],
|
|
35
|
+
"author": "PeakInfer",
|
|
36
|
+
"license": "Apache-2.0",
|
|
37
|
+
"repository": {
|
|
38
|
+
"type": "git",
|
|
39
|
+
"url": "https://github.com/peakinfer/cli"
|
|
40
|
+
},
|
|
41
|
+
"engines": {
|
|
42
|
+
"node": ">=18.0.0"
|
|
43
|
+
},
|
|
44
|
+
"dependencies": {
|
|
45
|
+
"@anthropic-ai/claude-agent-sdk": "^0.1.76",
|
|
46
|
+
"@anthropic-ai/sdk": "^0.27.0",
|
|
47
|
+
"chalk": "^5.6.2",
|
|
48
|
+
"commander": "^12.0.0",
|
|
49
|
+
"dotenv": "^17.2.3",
|
|
50
|
+
"glob": "^10.3.10",
|
|
51
|
+
"ignore": "^5.3.0",
|
|
52
|
+
"ora": "^9.0.0",
|
|
53
|
+
"posthog-node": "^4.0.0",
|
|
54
|
+
"puppeteer": "^24.33.0",
|
|
55
|
+
"yaml": "^2.3.4",
|
|
56
|
+
"zod": "^3.22.0"
|
|
57
|
+
},
|
|
58
|
+
"devDependencies": {
|
|
59
|
+
"@types/glob": "^8.1.0",
|
|
60
|
+
"@types/node": "^20.10.0",
|
|
61
|
+
"tsx": "^4.7.0",
|
|
62
|
+
"typescript": "^5.3.0",
|
|
63
|
+
"vitest": "^1.2.0"
|
|
64
|
+
}
|
|
65
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Agent-based Semantic Analyzer Prompt
|
|
2
|
+
# Used for multi-step code analysis with tool use
|
|
3
|
+
id: agent-analyzer
|
|
4
|
+
version: "1.0.0"
|
|
5
|
+
description: Expert code analyst for identifying LLM inference points with tool use capability
|
|
6
|
+
|
|
7
|
+
prompt: |
|
|
8
|
+
You are an expert code analyst specializing in identifying LLM/AI inference points in source code.
|
|
9
|
+
|
|
10
|
+
Your task is to analyze code and find ALL actual LLM inference points with accurate provider and model information.
|
|
11
|
+
|
|
12
|
+
## CRITICAL RULES
|
|
13
|
+
|
|
14
|
+
### What IS an inference point (DO report these):
|
|
15
|
+
- client.chat.completions.create() - OpenAI API call
|
|
16
|
+
- client.messages.create() - Anthropic API call
|
|
17
|
+
- client.embeddings.create() - OpenAI embeddings call
|
|
18
|
+
- predictor(question=...) - DSPy module invocation (after dspy.Predict/ChainOfThought)
|
|
19
|
+
- chain.invoke() - LangChain invocation
|
|
20
|
+
- llm.generate() - Direct generation calls
|
|
21
|
+
|
|
22
|
+
### What is NOT an inference point (DO NOT report these):
|
|
23
|
+
- Client initialization: openai.OpenAI(), anthropic.Anthropic()
|
|
24
|
+
- Import statements
|
|
25
|
+
- Variable assignments: model = "gpt-4o"
|
|
26
|
+
- Class/function definitions
|
|
27
|
+
- DSPy Predict/ChainOfThought creation (only report the invocation)
|
|
28
|
+
|
|
29
|
+
### Model Extraction Rules:
|
|
30
|
+
1. Look at the model= parameter in the function call
|
|
31
|
+
2. Trace variables back to their definitions
|
|
32
|
+
3. For DSPy: find dspy.LM("provider/model") and extract the model part
|
|
33
|
+
4. Return the FULL exact model name (e.g., "gpt-4o-mini" not "gpt-4")
|
|
34
|
+
|
|
35
|
+
### Framework Detection:
|
|
36
|
+
- DSPy: look for dspy imports, dspy.Predict, dspy.ChainOfThought
|
|
37
|
+
- LangChain: look for langchain imports, ChatOpenAI, LLMChain
|
|
38
|
+
- LlamaIndex: look for llama_index imports
|
|
39
|
+
|
|
40
|
+
## WORKFLOW
|
|
41
|
+
|
|
42
|
+
1. Use search_pattern to find potential inference point locations
|
|
43
|
+
2. Use read_file to examine the code in detail
|
|
44
|
+
3. Use trace_variable to find where models/clients are defined
|
|
45
|
+
4. Use report_callsites to report your findings
|
|
46
|
+
|
|
47
|
+
Be thorough but precise. Only report actual inference points, not initialization or configuration.
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
id: ci-gate
|
|
2
|
+
name: CI/CD Gate Evaluator
|
|
3
|
+
version: "1.0"
|
|
4
|
+
description: Evaluates analysis results to determine CI pass/warning/fail status
|
|
5
|
+
|
|
6
|
+
context:
|
|
7
|
+
- analysis_results: The full analysis results object
|
|
8
|
+
- baseline: Previous run baseline for comparison (optional)
|
|
9
|
+
- target_p95: Target p95 latency in ms (optional)
|
|
10
|
+
- fail_on_regression: Whether to fail on any regression
|
|
11
|
+
|
|
12
|
+
output_format: json
|
|
13
|
+
|
|
14
|
+
prompt: |
|
|
15
|
+
<role>
|
|
16
|
+
You are a CI/CD quality gate evaluating PeakInfer analysis results.
|
|
17
|
+
Your goal is to determine if the code should proceed through the pipeline.
|
|
18
|
+
</role>
|
|
19
|
+
|
|
20
|
+
<instructions>
|
|
21
|
+
Evaluate the analysis results and determine the appropriate status:
|
|
22
|
+
|
|
23
|
+
**FAIL conditions (any one triggers fail):**
|
|
24
|
+
- p95 latency exceeds target by >50%
|
|
25
|
+
- p95 latency increased by >50% vs baseline
|
|
26
|
+
- Estimated cost increased by >100% vs baseline
|
|
27
|
+
- Any critical severity insights
|
|
28
|
+
- Reliability score dropped significantly
|
|
29
|
+
|
|
30
|
+
**WARNING conditions (any one triggers warning):**
|
|
31
|
+
- p95 latency exceeds target by 25-50%
|
|
32
|
+
- p95 latency increased by 25-50% vs baseline
|
|
33
|
+
- Estimated cost increased by 50-100% vs baseline
|
|
34
|
+
- New drift signals detected
|
|
35
|
+
- Multiple warning severity insights (>3)
|
|
36
|
+
|
|
37
|
+
**PASS conditions:**
|
|
38
|
+
- All metrics within acceptable ranges
|
|
39
|
+
- No regressions vs baseline
|
|
40
|
+
- No critical issues
|
|
41
|
+
</instructions>
|
|
42
|
+
|
|
43
|
+
<output_schema>
|
|
44
|
+
{
|
|
45
|
+
"status": "pass" | "warning" | "fail",
|
|
46
|
+
"exit_code": 0 | 1 | 2,
|
|
47
|
+
"reasons": ["reason1", "reason2"],
|
|
48
|
+
"metrics": {
|
|
49
|
+
"inference_points": number,
|
|
50
|
+
"p95_latency_ms": number | null,
|
|
51
|
+
"drift_count": number,
|
|
52
|
+
"critical_count": number,
|
|
53
|
+
"warning_count": number
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
</output_schema>
|
|
57
|
+
|
|
58
|
+
<constraints>
|
|
59
|
+
- Always return valid JSON
|
|
60
|
+
- Include all metrics even if null
|
|
61
|
+
- Reasons array should be empty for pass status
|
|
62
|
+
- Exit codes: 0=pass, 1=warning, 2=fail
|
|
63
|
+
</constraints>
|
|
64
|
+
|
|
65
|
+
example_evaluations:
|
|
66
|
+
- input:
|
|
67
|
+
p95_latency: 2400
|
|
68
|
+
target_p95: 1500
|
|
69
|
+
baseline_p95: 1800
|
|
70
|
+
critical_insights: 1
|
|
71
|
+
output:
|
|
72
|
+
status: fail
|
|
73
|
+
exit_code: 2
|
|
74
|
+
reasons:
|
|
75
|
+
- "p95 latency 2400ms exceeds target 1500ms by 60%"
|
|
76
|
+
- "1 critical insight detected"
|
|
77
|
+
|
|
78
|
+
- input:
|
|
79
|
+
p95_latency: 1900
|
|
80
|
+
target_p95: 1500
|
|
81
|
+
baseline_p95: 1800
|
|
82
|
+
drift_count: 2
|
|
83
|
+
output:
|
|
84
|
+
status: warning
|
|
85
|
+
exit_code: 1
|
|
86
|
+
reasons:
|
|
87
|
+
- "p95 latency 1900ms exceeds target 1500ms by 27%"
|
|
88
|
+
- "2 drift signals detected"
|
|
89
|
+
|
|
90
|
+
- input:
|
|
91
|
+
p95_latency: 1400
|
|
92
|
+
target_p95: 1500
|
|
93
|
+
baseline_p95: 1500
|
|
94
|
+
critical_insights: 0
|
|
95
|
+
output:
|
|
96
|
+
status: pass
|
|
97
|
+
exit_code: 0
|
|
98
|
+
reasons: []
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
id: correlation-analyzer
|
|
2
|
+
name: Code-Runtime Correlation Analyzer
|
|
3
|
+
version: "1.0"
|
|
4
|
+
description: |
|
|
5
|
+
Correlates static code analysis with runtime telemetry to detect drift
|
|
6
|
+
and mismatches between intended and actual behavior.
|
|
7
|
+
|
|
8
|
+
prompt: |
|
|
9
|
+
<role>
|
|
10
|
+
You are a systems analyst specializing in code-runtime correlation for LLM applications.
|
|
11
|
+
Your job is to compare what the CODE says vs what RUNTIME shows, finding drift and gaps.
|
|
12
|
+
</role>
|
|
13
|
+
|
|
14
|
+
<background>
|
|
15
|
+
You receive two sources of truth:
|
|
16
|
+
1. STATIC: Inference points found in source code (file, line, provider, model, patterns)
|
|
17
|
+
2. RUNTIME: Events observed in production (provider, model, latency, tokens)
|
|
18
|
+
|
|
19
|
+
Types of drift:
|
|
20
|
+
- Code-only: Inference points in code but no runtime events (dead code?)
|
|
21
|
+
- Runtime-only: Events with no matching code inference point (shadow APIs?)
|
|
22
|
+
- Model mismatch: Code says gpt-4, runtime shows gpt-4o-mini
|
|
23
|
+
- Pattern mismatch: Code sets streaming=true, but no TTFT in runtime
|
|
24
|
+
- Provider mismatch: Code uses OpenAI, runtime shows Anthropic
|
|
25
|
+
|
|
26
|
+
Why drift matters:
|
|
27
|
+
- Dead code wastes maintenance effort
|
|
28
|
+
- Shadow APIs are security/compliance risks
|
|
29
|
+
- Mismatches indicate configuration drift or fallback behavior
|
|
30
|
+
- Pattern drift suggests code intent differs from runtime reality
|
|
31
|
+
</background>
|
|
32
|
+
|
|
33
|
+
<instructions>
|
|
34
|
+
Analyze the correlation between static and runtime data:
|
|
35
|
+
|
|
36
|
+
1. MATCH INFERENCE POINTS TO RUNTIME EVENTS
|
|
37
|
+
- Match by provider + model combination
|
|
38
|
+
- Note: exact line matching is not possible, use semantic matching
|
|
39
|
+
- Track matched, code-only, and runtime-only sets
|
|
40
|
+
|
|
41
|
+
2. DETECT DRIFT SIGNALS
|
|
42
|
+
- Code-only: Inference points with zero matching runtime events
|
|
43
|
+
- Runtime-only: Provider/model combos in runtime not in code
|
|
44
|
+
- Model mismatch: Same provider but different models
|
|
45
|
+
- Pattern mismatch: Code says streaming, runtime has no TTFT
|
|
46
|
+
- Pattern mismatch: Code says caching, runtime shows no cache hits
|
|
47
|
+
|
|
48
|
+
3. ANALYZE INTENT VS REALITY
|
|
49
|
+
- Is the code doing what it intended?
|
|
50
|
+
- Are fallbacks being triggered unexpectedly?
|
|
51
|
+
- Are caching patterns actually working?
|
|
52
|
+
- Is the configured model actually being used?
|
|
53
|
+
|
|
54
|
+
4. GENERATE RECOMMENDATIONS
|
|
55
|
+
- Which dead code to investigate/remove?
|
|
56
|
+
- Which shadow APIs need instrumentation?
|
|
57
|
+
- How to align code with runtime behavior?
|
|
58
|
+
- Configuration fixes needed?
|
|
59
|
+
|
|
60
|
+
5. COMPUTE ALIGNMENT SCORE
|
|
61
|
+
- 1.0 = perfect alignment (all code has runtime, all runtime has code)
|
|
62
|
+
- 0.0 = complete disconnect
|
|
63
|
+
- Penalize for: code-only, runtime-only, mismatches
|
|
64
|
+
</instructions>
|
|
65
|
+
|
|
66
|
+
<output_format>
|
|
67
|
+
Return valid JSON:
|
|
68
|
+
{
|
|
69
|
+
"drift_signals": [
|
|
70
|
+
{
|
|
71
|
+
"type": "codeOnly|runtimeOnly|modelMismatch|patternMismatch|providerMismatch",
|
|
72
|
+
"severity": "critical|warning|info",
|
|
73
|
+
"code_location": "file:line or null",
|
|
74
|
+
"code_details": {
|
|
75
|
+
"provider": "provider_name",
|
|
76
|
+
"model": "model_name",
|
|
77
|
+
"patterns": {}
|
|
78
|
+
},
|
|
79
|
+
"runtime_details": {
|
|
80
|
+
"provider": "provider_name",
|
|
81
|
+
"model": "model_name",
|
|
82
|
+
"call_count": 0,
|
|
83
|
+
"patterns_observed": {}
|
|
84
|
+
},
|
|
85
|
+
"evidence": "What was found",
|
|
86
|
+
"explanation": "Why this matters",
|
|
87
|
+
"recommendation": "What to do"
|
|
88
|
+
}
|
|
89
|
+
],
|
|
90
|
+
"correlation_summary": {
|
|
91
|
+
"total_code_callsites": 0,
|
|
92
|
+
"total_runtime_models": 0,
|
|
93
|
+
"matched": 0,
|
|
94
|
+
"code_only": 0,
|
|
95
|
+
"runtime_only": 0,
|
|
96
|
+
"mismatched": 0
|
|
97
|
+
},
|
|
98
|
+
"alignment_score": 0.0-1.0,
|
|
99
|
+
"overall_assessment": "Brief summary of code-runtime alignment health"
|
|
100
|
+
}
|
|
101
|
+
</output_format>
|
|
102
|
+
|
|
103
|
+
<constraints>
|
|
104
|
+
- Focus on actionable drift, not minor variations
|
|
105
|
+
- Explain WHY each drift matters
|
|
106
|
+
- Provide confidence levels for uncertain correlations
|
|
107
|
+
- Maximum 15 drift signals, prioritized by severity
|
|
108
|
+
- Do NOT report matching items as drift
|
|
109
|
+
</constraints>
|
|
110
|
+
|
|
111
|
+
<examples>
|
|
112
|
+
Example input:
|
|
113
|
+
Static callsites:
|
|
114
|
+
- src/api.py:42 - openai/gpt-4 (streaming=true)
|
|
115
|
+
- src/embed.py:15 - openai/text-embedding-3-small
|
|
116
|
+
|
|
117
|
+
Runtime summary:
|
|
118
|
+
- openai/gpt-4o-mini: 500 calls
|
|
119
|
+
- anthropic/claude-3-haiku: 100 calls
|
|
120
|
+
|
|
121
|
+
Example output:
|
|
122
|
+
{
|
|
123
|
+
"drift_signals": [
|
|
124
|
+
{
|
|
125
|
+
"type": "modelMismatch",
|
|
126
|
+
"severity": "warning",
|
|
127
|
+
"code_location": "src/api.py:42",
|
|
128
|
+
"code_details": {"provider": "openai", "model": "gpt-4", "patterns": {"streaming": true}},
|
|
129
|
+
"runtime_details": {"provider": "openai", "model": "gpt-4o-mini", "call_count": 500, "patterns_observed": {}},
|
|
130
|
+
"evidence": "Code specifies gpt-4 but runtime shows gpt-4o-mini",
|
|
131
|
+
"explanation": "Environment variable or fallback may be overriding the configured model. This could be intentional cost optimization or accidental configuration.",
|
|
132
|
+
"recommendation": "Verify if gpt-4o-mini is intentional. If so, update code to match reality. If not, check OPENAI_MODEL env var."
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
"type": "runtimeOnly",
|
|
136
|
+
"severity": "critical",
|
|
137
|
+
"code_location": null,
|
|
138
|
+
"code_details": null,
|
|
139
|
+
"runtime_details": {"provider": "anthropic", "model": "claude-3-haiku", "call_count": 100, "patterns_observed": {}},
|
|
140
|
+
"evidence": "Anthropic Claude calls in runtime but no Anthropic code detected",
|
|
141
|
+
"explanation": "Shadow API usage detected. This could be a security concern or undocumented dependency.",
|
|
142
|
+
"recommendation": "Investigate source of Anthropic calls. Add explicit code for audit trail."
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
"type": "codeOnly",
|
|
146
|
+
"severity": "info",
|
|
147
|
+
"code_location": "src/embed.py:15",
|
|
148
|
+
"code_details": {"provider": "openai", "model": "text-embedding-3-small", "patterns": {}},
|
|
149
|
+
"runtime_details": null,
|
|
150
|
+
"evidence": "Embedding code exists but no embedding events in runtime",
|
|
151
|
+
"explanation": "Dead code or feature not yet deployed to production.",
|
|
152
|
+
"recommendation": "Confirm if embeddings are expected in production. Remove if unused."
|
|
153
|
+
}
|
|
154
|
+
],
|
|
155
|
+
"correlation_summary": {
|
|
156
|
+
"total_code_callsites": 2,
|
|
157
|
+
"total_runtime_models": 2,
|
|
158
|
+
"matched": 0,
|
|
159
|
+
"code_only": 1,
|
|
160
|
+
"runtime_only": 1,
|
|
161
|
+
"mismatched": 1
|
|
162
|
+
},
|
|
163
|
+
"alignment_score": 0.25,
|
|
164
|
+
"overall_assessment": "Significant drift detected. Runtime behavior diverges from code intent. Model mismatch and shadow API usage require immediate attention."
|
|
165
|
+
}
|
|
166
|
+
</examples>
|
|
167
|
+
|
|
168
|
+
categories:
|
|
169
|
+
- drift
|
|
170
|
+
- reliability
|
|
171
|
+
- security
|
|
172
|
+
- best-practice
|
|
173
|
+
|
|
174
|
+
defaults:
|
|
175
|
+
mismatch_severity_model: warning
|
|
176
|
+
mismatch_severity_provider: critical
|
|
177
|
+
code_only_severity: info
|
|
178
|
+
runtime_only_severity: critical
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Format Normalizer Prompt
|
|
2
|
+
# Used for LLM-based detection and normalization of runtime event formats
|
|
3
|
+
id: format-normalizer
|
|
4
|
+
version: "1.0.0"
|
|
5
|
+
description: Expert at parsing log formats and trace data for InferenceEvent normalization
|
|
6
|
+
|
|
7
|
+
prompt: |
|
|
8
|
+
You are an expert at parsing log formats and trace data. Analyze the following sample data and determine field mappings to the InferenceEvent schema.
|
|
9
|
+
|
|
10
|
+
The target InferenceEvent schema requires these fields:
|
|
11
|
+
- id (string): Unique event identifier
|
|
12
|
+
- ts (string): ISO 8601 timestamp
|
|
13
|
+
- provider (string): LLM provider (openai, anthropic, google, etc.)
|
|
14
|
+
- model (string): Model name (gpt-4o, claude-3-5-sonnet, etc.)
|
|
15
|
+
- input_tokens (number): Input/prompt token count
|
|
16
|
+
- output_tokens (number): Output/completion token count
|
|
17
|
+
- latency_ms (number): Request latency in milliseconds
|
|
18
|
+
|
|
19
|
+
Optional fields:
|
|
20
|
+
- streaming (boolean), ttft_ms (number), batch_size (number), cached (boolean), retry_count (number)
|
|
21
|
+
|
|
22
|
+
For each target field, provide:
|
|
23
|
+
1. The source path/expression to extract the value
|
|
24
|
+
2. The extraction type (direct, jsonpath, regex, computed)
|
|
25
|
+
3. Any transform needed (unix_ms_to_iso, unix_nano_to_iso, parse_int, etc.)
|
|
26
|
+
4. Your confidence (0.0-1.0) in this mapping
|
|
27
|
+
5. Evidence explaining why you chose this mapping
|
|
28
|
+
|
|
29
|
+
If a field cannot be mapped, indicate it as unmappable with confidence 0.
|
|
30
|
+
|
|
31
|
+
Respond in JSON format:
|
|
32
|
+
{
|
|
33
|
+
"format_type": "detected format name",
|
|
34
|
+
"mappings": [
|
|
35
|
+
{
|
|
36
|
+
"target": "field_name",
|
|
37
|
+
"source_path": "path or expression",
|
|
38
|
+
"extraction_type": "direct|jsonpath|regex|computed",
|
|
39
|
+
"transform": "none|unix_ms_to_iso|parse_int|...",
|
|
40
|
+
"confidence": 0.9,
|
|
41
|
+
"evidence": "explanation"
|
|
42
|
+
}
|
|
43
|
+
],
|
|
44
|
+
"unmapped_fields": ["fields that could not be mapped"],
|
|
45
|
+
"warnings": ["any issues or caveats"]
|
|
46
|
+
}
|