@peakinfer/cli 1.0.133
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -0
- package/.env.example +6 -0
- package/.github/workflows/peakinfer.yml +64 -0
- package/CHANGELOG.md +31 -0
- package/LICENSE +190 -0
- package/README.md +335 -0
- package/data/inferencemax.json +274 -0
- package/dist/agent-analyzer.d.ts +45 -0
- package/dist/agent-analyzer.d.ts.map +1 -0
- package/dist/agent-analyzer.js +374 -0
- package/dist/agent-analyzer.js.map +1 -0
- package/dist/agent.d.ts +76 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +965 -0
- package/dist/agent.js.map +1 -0
- package/dist/agents/correlation-analyzer.d.ts +34 -0
- package/dist/agents/correlation-analyzer.d.ts.map +1 -0
- package/dist/agents/correlation-analyzer.js +261 -0
- package/dist/agents/correlation-analyzer.js.map +1 -0
- package/dist/agents/index.d.ts +91 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +111 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/runtime-analyzer.d.ts +38 -0
- package/dist/agents/runtime-analyzer.d.ts.map +1 -0
- package/dist/agents/runtime-analyzer.js +244 -0
- package/dist/agents/runtime-analyzer.js.map +1 -0
- package/dist/analysis-types.d.ts +500 -0
- package/dist/analysis-types.d.ts.map +1 -0
- package/dist/analysis-types.js +11 -0
- package/dist/analysis-types.js.map +1 -0
- package/dist/analytics.d.ts +25 -0
- package/dist/analytics.d.ts.map +1 -0
- package/dist/analytics.js +94 -0
- package/dist/analytics.js.map +1 -0
- package/dist/analyzer.d.ts +48 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +547 -0
- package/dist/analyzer.js.map +1 -0
- package/dist/artifacts.d.ts +44 -0
- package/dist/artifacts.d.ts.map +1 -0
- package/dist/artifacts.js +165 -0
- package/dist/artifacts.js.map +1 -0
- package/dist/benchmarks/index.d.ts +88 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +205 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +427 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/ci.d.ts +19 -0
- package/dist/commands/ci.d.ts.map +1 -0
- package/dist/commands/ci.js +253 -0
- package/dist/commands/ci.js.map +1 -0
- package/dist/commands/config.d.ts +16 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +249 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/demo.d.ts +15 -0
- package/dist/commands/demo.d.ts.map +1 -0
- package/dist/commands/demo.js +106 -0
- package/dist/commands/demo.js.map +1 -0
- package/dist/commands/export.d.ts +14 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +209 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/history.d.ts +15 -0
- package/dist/commands/history.d.ts.map +1 -0
- package/dist/commands/history.js +389 -0
- package/dist/commands/history.js.map +1 -0
- package/dist/commands/template.d.ts +14 -0
- package/dist/commands/template.d.ts.map +1 -0
- package/dist/commands/template.js +341 -0
- package/dist/commands/template.js.map +1 -0
- package/dist/commands/validate-map.d.ts +12 -0
- package/dist/commands/validate-map.d.ts.map +1 -0
- package/dist/commands/validate-map.js +274 -0
- package/dist/commands/validate-map.js.map +1 -0
- package/dist/commands/whatif.d.ts +17 -0
- package/dist/commands/whatif.d.ts.map +1 -0
- package/dist/commands/whatif.js +206 -0
- package/dist/commands/whatif.js.map +1 -0
- package/dist/comparison.d.ts +38 -0
- package/dist/comparison.d.ts.map +1 -0
- package/dist/comparison.js +223 -0
- package/dist/comparison.js.map +1 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +158 -0
- package/dist/config.js.map +1 -0
- package/dist/connectors/helicone.d.ts +9 -0
- package/dist/connectors/helicone.d.ts.map +1 -0
- package/dist/connectors/helicone.js +106 -0
- package/dist/connectors/helicone.js.map +1 -0
- package/dist/connectors/index.d.ts +37 -0
- package/dist/connectors/index.d.ts.map +1 -0
- package/dist/connectors/index.js +65 -0
- package/dist/connectors/index.js.map +1 -0
- package/dist/connectors/langsmith.d.ts +9 -0
- package/dist/connectors/langsmith.d.ts.map +1 -0
- package/dist/connectors/langsmith.js +122 -0
- package/dist/connectors/langsmith.js.map +1 -0
- package/dist/connectors/types.d.ts +83 -0
- package/dist/connectors/types.d.ts.map +1 -0
- package/dist/connectors/types.js +98 -0
- package/dist/connectors/types.js.map +1 -0
- package/dist/cost-estimator.d.ts +46 -0
- package/dist/cost-estimator.d.ts.map +1 -0
- package/dist/cost-estimator.js +104 -0
- package/dist/cost-estimator.js.map +1 -0
- package/dist/costs.d.ts +57 -0
- package/dist/costs.d.ts.map +1 -0
- package/dist/costs.js +251 -0
- package/dist/costs.js.map +1 -0
- package/dist/counterfactuals.d.ts +29 -0
- package/dist/counterfactuals.d.ts.map +1 -0
- package/dist/counterfactuals.js +448 -0
- package/dist/counterfactuals.js.map +1 -0
- package/dist/enhancement-prompts.d.ts +41 -0
- package/dist/enhancement-prompts.d.ts.map +1 -0
- package/dist/enhancement-prompts.js +88 -0
- package/dist/enhancement-prompts.js.map +1 -0
- package/dist/envelopes.d.ts +20 -0
- package/dist/envelopes.d.ts.map +1 -0
- package/dist/envelopes.js +790 -0
- package/dist/envelopes.js.map +1 -0
- package/dist/format-normalizer.d.ts +71 -0
- package/dist/format-normalizer.d.ts.map +1 -0
- package/dist/format-normalizer.js +1331 -0
- package/dist/format-normalizer.js.map +1 -0
- package/dist/history.d.ts +79 -0
- package/dist/history.d.ts.map +1 -0
- package/dist/history.js +313 -0
- package/dist/history.js.map +1 -0
- package/dist/html.d.ts +11 -0
- package/dist/html.d.ts.map +1 -0
- package/dist/html.js +463 -0
- package/dist/html.js.map +1 -0
- package/dist/impact.d.ts +42 -0
- package/dist/impact.d.ts.map +1 -0
- package/dist/impact.js +443 -0
- package/dist/impact.js.map +1 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +34 -0
- package/dist/index.js.map +1 -0
- package/dist/insights.d.ts +5 -0
- package/dist/insights.d.ts.map +1 -0
- package/dist/insights.js +271 -0
- package/dist/insights.js.map +1 -0
- package/dist/joiner.d.ts +9 -0
- package/dist/joiner.d.ts.map +1 -0
- package/dist/joiner.js +247 -0
- package/dist/joiner.js.map +1 -0
- package/dist/orchestrator.d.ts +34 -0
- package/dist/orchestrator.d.ts.map +1 -0
- package/dist/orchestrator.js +827 -0
- package/dist/orchestrator.js.map +1 -0
- package/dist/pdf.d.ts +26 -0
- package/dist/pdf.d.ts.map +1 -0
- package/dist/pdf.js +84 -0
- package/dist/pdf.js.map +1 -0
- package/dist/prediction.d.ts +33 -0
- package/dist/prediction.d.ts.map +1 -0
- package/dist/prediction.js +316 -0
- package/dist/prediction.js.map +1 -0
- package/dist/prompts/loader.d.ts +38 -0
- package/dist/prompts/loader.d.ts.map +1 -0
- package/dist/prompts/loader.js +60 -0
- package/dist/prompts/loader.js.map +1 -0
- package/dist/renderer.d.ts +64 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +923 -0
- package/dist/renderer.js.map +1 -0
- package/dist/runid.d.ts +57 -0
- package/dist/runid.d.ts.map +1 -0
- package/dist/runid.js +199 -0
- package/dist/runid.js.map +1 -0
- package/dist/runtime.d.ts +29 -0
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +366 -0
- package/dist/runtime.js.map +1 -0
- package/dist/scanner.d.ts +11 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +426 -0
- package/dist/scanner.js.map +1 -0
- package/dist/templates.d.ts +120 -0
- package/dist/templates.d.ts.map +1 -0
- package/dist/templates.js +429 -0
- package/dist/templates.js.map +1 -0
- package/dist/tools/index.d.ts +153 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +177 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/types.d.ts +3647 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +703 -0
- package/dist/types.js.map +1 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +23 -0
- package/dist/version.js.map +1 -0
- package/docs/demo-guide.md +423 -0
- package/docs/events-format.md +295 -0
- package/docs/inferencemap-spec.md +344 -0
- package/docs/migration-v2.md +293 -0
- package/fixtures/demo/precomputed.json +142 -0
- package/fixtures/demo-project/README.md +52 -0
- package/fixtures/demo-project/ai-service.ts +65 -0
- package/fixtures/demo-project/sample-events.jsonl +15 -0
- package/fixtures/demo-project/src/ai-service.ts +128 -0
- package/fixtures/demo-project/src/llm-client.ts +155 -0
- package/package.json +65 -0
- package/prompts/agent-analyzer.yaml +47 -0
- package/prompts/ci-gate.yaml +98 -0
- package/prompts/correlation-analyzer.yaml +178 -0
- package/prompts/format-normalizer.yaml +46 -0
- package/prompts/peak-performance.yaml +180 -0
- package/prompts/pr-comment.yaml +111 -0
- package/prompts/runtime-analyzer.yaml +189 -0
- package/prompts/unified-analyzer.yaml +241 -0
- package/schemas/inference-map.v0.1.json +215 -0
- package/scripts/benchmark.ts +394 -0
- package/scripts/demo-v1.5.sh +158 -0
- package/scripts/sync-from-site.sh +197 -0
- package/scripts/validate-sync.sh +178 -0
- package/src/agent-analyzer.ts +481 -0
- package/src/agent.ts +1232 -0
- package/src/agents/correlation-analyzer.ts +353 -0
- package/src/agents/index.ts +235 -0
- package/src/agents/runtime-analyzer.ts +343 -0
- package/src/analysis-types.ts +558 -0
- package/src/analytics.ts +100 -0
- package/src/analyzer.ts +692 -0
- package/src/artifacts.ts +218 -0
- package/src/benchmarks/index.ts +309 -0
- package/src/cli.ts +503 -0
- package/src/commands/ci.ts +336 -0
- package/src/commands/config.ts +288 -0
- package/src/commands/demo.ts +175 -0
- package/src/commands/export.ts +297 -0
- package/src/commands/history.ts +425 -0
- package/src/commands/template.ts +385 -0
- package/src/commands/validate-map.ts +324 -0
- package/src/commands/whatif.ts +272 -0
- package/src/comparison.ts +283 -0
- package/src/config.ts +188 -0
- package/src/connectors/helicone.ts +164 -0
- package/src/connectors/index.ts +93 -0
- package/src/connectors/langsmith.ts +179 -0
- package/src/connectors/types.ts +180 -0
- package/src/cost-estimator.ts +146 -0
- package/src/costs.ts +347 -0
- package/src/counterfactuals.ts +516 -0
- package/src/enhancement-prompts.ts +118 -0
- package/src/envelopes.ts +814 -0
- package/src/format-normalizer.ts +1486 -0
- package/src/history.ts +400 -0
- package/src/html.ts +512 -0
- package/src/impact.ts +522 -0
- package/src/index.ts +83 -0
- package/src/insights.ts +341 -0
- package/src/joiner.ts +289 -0
- package/src/orchestrator.ts +1015 -0
- package/src/pdf.ts +110 -0
- package/src/prediction.ts +392 -0
- package/src/prompts/loader.ts +88 -0
- package/src/renderer.ts +1045 -0
- package/src/runid.ts +261 -0
- package/src/runtime.ts +450 -0
- package/src/scanner.ts +508 -0
- package/src/templates.ts +561 -0
- package/src/tools/index.ts +214 -0
- package/src/types.ts +873 -0
- package/src/version.ts +24 -0
- package/templates/context-accumulation.yaml +23 -0
- package/templates/cost-concentration.yaml +20 -0
- package/templates/dead-code.yaml +20 -0
- package/templates/latency-explainer.yaml +23 -0
- package/templates/optimizations/ab-testing-framework.yaml +74 -0
- package/templates/optimizations/api-gateway-optimization.yaml +81 -0
- package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
- package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
- package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
- package/templates/optimizations/comprehensive-apm.yaml +76 -0
- package/templates/optimizations/context-window-optimization.yaml +91 -0
- package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
- package/templates/optimizations/distributed-training-optimization.yaml +77 -0
- package/templates/optimizations/document-analysis-edge.yaml +77 -0
- package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
- package/templates/optimizations/domain-specific-distillation.yaml +78 -0
- package/templates/optimizations/error-handling-optimization.yaml +76 -0
- package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
- package/templates/optimizations/long-context-memory-management.yaml +78 -0
- package/templates/optimizations/max-tokens-optimization.yaml +76 -0
- package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
- package/templates/optimizations/multi-framework-resilience.yaml +75 -0
- package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
- package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
- package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
- package/templates/optimizations/quality-monitoring.yaml +74 -0
- package/templates/optimizations/realtime-budget-controls.yaml +74 -0
- package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
- package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
- package/templates/optimizations/smart-model-routing.yaml +96 -0
- package/templates/optimizations/streaming-batch-selection.yaml +167 -0
- package/templates/optimizations/system-prompt-optimization.yaml +75 -0
- package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
- package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
- package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
- package/templates/overpowered-extraction.yaml +32 -0
- package/templates/overpowered-model.yaml +31 -0
- package/templates/prompt-bloat.yaml +24 -0
- package/templates/retry-explosion.yaml +28 -0
- package/templates/schema/insight.schema.json +113 -0
- package/templates/schema/optimization.schema.json +180 -0
- package/templates/streaming-drift.yaml +30 -0
- package/templates/throughput-gap.yaml +21 -0
- package/templates/token-underutilization.yaml +28 -0
- package/templates/untested-fallback.yaml +21 -0
- package/tests/accuracy/drift-detection.test.ts +184 -0
- package/tests/accuracy/false-positives.test.ts +166 -0
- package/tests/accuracy/templates.test.ts +205 -0
- package/tests/action/commands.test.ts +125 -0
- package/tests/action/comments.test.ts +347 -0
- package/tests/cli.test.ts +203 -0
- package/tests/comparison.test.ts +309 -0
- package/tests/correlation-analyzer.test.ts +534 -0
- package/tests/counterfactuals.test.ts +347 -0
- package/tests/fixtures/events/missing-id.jsonl +1 -0
- package/tests/fixtures/events/missing-input.jsonl +1 -0
- package/tests/fixtures/events/missing-latency.jsonl +1 -0
- package/tests/fixtures/events/missing-model.jsonl +1 -0
- package/tests/fixtures/events/missing-output.jsonl +1 -0
- package/tests/fixtures/events/missing-provider.jsonl +1 -0
- package/tests/fixtures/events/missing-ts.jsonl +1 -0
- package/tests/fixtures/events/valid.csv +3 -0
- package/tests/fixtures/events/valid.json +1 -0
- package/tests/fixtures/events/valid.jsonl +2 -0
- package/tests/fixtures/events/with-callsite.jsonl +1 -0
- package/tests/fixtures/events/with-intent.jsonl +1 -0
- package/tests/fixtures/events/wrong-type.jsonl +1 -0
- package/tests/fixtures/repos/empty/.gitkeep +0 -0
- package/tests/fixtures/repos/hybrid-router/router.py +35 -0
- package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
- package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
- package/tests/fixtures/repos/saas-openai/client.py +26 -0
- package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
- package/tests/github-action.test.ts +292 -0
- package/tests/insights.test.ts +878 -0
- package/tests/joiner.test.ts +168 -0
- package/tests/performance/action-latency.test.ts +132 -0
- package/tests/performance/benchmark.test.ts +189 -0
- package/tests/performance/cli-latency.test.ts +102 -0
- package/tests/pr-comment.test.ts +313 -0
- package/tests/prediction.test.ts +296 -0
- package/tests/runtime-analyzer.test.ts +375 -0
- package/tests/runtime.test.ts +205 -0
- package/tests/scanner.test.ts +122 -0
- package/tests/template-conformance.test.ts +526 -0
- package/tests/unit/cost-calculator.test.ts +303 -0
- package/tests/unit/credits.test.ts +180 -0
- package/tests/unit/inference-map.test.ts +276 -0
- package/tests/unit/schema.test.ts +300 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +14 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
# Migration Guide: PeakInfer v1.8 → v2.0
|
|
2
|
+
|
|
3
|
+
This guide helps you migrate from PeakInfer v1.8 to v2.0.
|
|
4
|
+
|
|
5
|
+
## Overview of Changes
|
|
6
|
+
|
|
7
|
+
PeakInfer v2.0 is a major architectural upgrade that improves accuracy, speed, and maintainability.
|
|
8
|
+
|
|
9
|
+
| Aspect | v1.8 | v2.0 |
|
|
10
|
+
|--------|------|------|
|
|
11
|
+
| **Analysis Engine** | TypeScript + Regex | Claude Code Agent SDK |
|
|
12
|
+
| **Callsite Discovery** | Regex patterns | Semantic code understanding |
|
|
13
|
+
| **Architecture** | Multi-phase agents | Unified single-call |
|
|
14
|
+
| **Templates** | 27 templates | 43 templates (12 insight + 31 optimization) |
|
|
15
|
+
| **Speed** | ~70s per file | ~30s per file (60% faster) |
|
|
16
|
+
| **Cost** | ~$0.05 per file | ~$0.02 per file (60% cheaper) |
|
|
17
|
+
| **Pricing** | 300 credits/10 days free | 50 credits one-time (6-month expiry) |
|
|
18
|
+
|
|
19
|
+
## Breaking Changes
|
|
20
|
+
|
|
21
|
+
### 1. CLI Command Changes
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# v1.8: Separate commands for different analyses
|
|
25
|
+
peakinfer scan ./src # Discovery only
|
|
26
|
+
peakinfer profile ./src # Cost/latency profiling
|
|
27
|
+
peakinfer drift ./src --events file.jsonl
|
|
28
|
+
|
|
29
|
+
# v2.0: Unified analyze command
|
|
30
|
+
peakinfer analyze ./src # Full analysis
|
|
31
|
+
peakinfer analyze ./src --events production.jsonl # With runtime correlation
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### 2. Output Format Changes
|
|
35
|
+
|
|
36
|
+
The InferenceMap schema has been updated to v0.1:
|
|
37
|
+
|
|
38
|
+
```diff
|
|
39
|
+
{
|
|
40
|
+
- "version": "0.0.1",
|
|
41
|
+
+ "version": "0.1",
|
|
42
|
+
"root": "./src",
|
|
43
|
+
"generatedAt": "2024-12-21T10:00:00Z",
|
|
44
|
+
+ "metadata": {
|
|
45
|
+
+ "promptId": "unified-analyzer",
|
|
46
|
+
+ "promptVersion": "1.6.0",
|
|
47
|
+
+ "llmProvider": "anthropic",
|
|
48
|
+
+ "llmModel": "claude-sonnet-4-20250514"
|
|
49
|
+
+ },
|
|
50
|
+
"summary": { ... },
|
|
51
|
+
"callsites": [
|
|
52
|
+
{
|
|
53
|
+
"id": "src/chat.ts:42",
|
|
54
|
+
"file": "src/chat.ts",
|
|
55
|
+
"line": 42,
|
|
56
|
+
"provider": "openai",
|
|
57
|
+
"model": "gpt-4o",
|
|
58
|
+
+ "framework": "langchain", // NEW: Framework detection
|
|
59
|
+
+ "runtime": null, // NEW: Runtime detection (vllm, tgi, etc.)
|
|
60
|
+
"patterns": {
|
|
61
|
+
"streaming": true,
|
|
62
|
+
- "retry": true, // Renamed
|
|
63
|
+
+ "retries": true, // Renamed for consistency
|
|
64
|
+
"caching": false,
|
|
65
|
+
- "error_handling": true // Removed
|
|
66
|
+
+ "fallback": true // NEW: Fallback pattern detection
|
|
67
|
+
},
|
|
68
|
+
"confidence": 0.95
|
|
69
|
+
}
|
|
70
|
+
]
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### 3. Configuration File Changes
|
|
75
|
+
|
|
76
|
+
```yaml
|
|
77
|
+
# v1.8: .peakinferrc.yaml
|
|
78
|
+
scan:
|
|
79
|
+
extensions: [.ts, .js, .py]
|
|
80
|
+
ignore: [node_modules, dist]
|
|
81
|
+
profile:
|
|
82
|
+
include_cost: true
|
|
83
|
+
include_latency: true
|
|
84
|
+
|
|
85
|
+
# v2.0: .peakinfer.yaml (new name)
|
|
86
|
+
analyze:
|
|
87
|
+
extensions: [.ts, .tsx, .js, .jsx, .py] # More extensions supported
|
|
88
|
+
ignore: [node_modules, dist, .git, __pycache__]
|
|
89
|
+
prompt: unified-analyzer # Configurable prompt pack
|
|
90
|
+
output:
|
|
91
|
+
format: text # text, json
|
|
92
|
+
save: true # Auto-save to .peakinfer/
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### 4. API Key Changes
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# v1.8: Used PEAKINFER_API_KEY for managed mode
|
|
99
|
+
export PEAKINFER_API_KEY=pk_xxx
|
|
100
|
+
|
|
101
|
+
# v2.0: BYOK mode uses your Anthropic key directly
|
|
102
|
+
export ANTHROPIC_API_KEY=sk-ant-xxx
|
|
103
|
+
|
|
104
|
+
# For managed mode (GitHub Action), use PEAKINFER_TOKEN
|
|
105
|
+
export PEAKINFER_TOKEN=pt_xxx
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### 5. Credit System Changes
|
|
109
|
+
|
|
110
|
+
| v1.8 | v2.0 |
|
|
111
|
+
|------|------|
|
|
112
|
+
| 300 credits free (10-day refresh) | 50 credits free (one-time, 6-month expiry) |
|
|
113
|
+
| Pro: $20/500 credits | Starter: $19/200, Growth: $49/600, Scale: $149/2000 |
|
|
114
|
+
| Monthly subscription | Credit packs (no subscription) |
|
|
115
|
+
|
|
116
|
+
## Migration Steps
|
|
117
|
+
|
|
118
|
+
### Step 1: Update CLI
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# Uninstall v1.8
|
|
122
|
+
npm uninstall -g @kalmantic/peakinfer
|
|
123
|
+
|
|
124
|
+
# Install v2.0
|
|
125
|
+
npm install -g @kalmantic/peakinfer
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Step 2: Update Configuration
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
# Rename config file
|
|
132
|
+
mv .peakinferrc.yaml .peakinfer.yaml
|
|
133
|
+
|
|
134
|
+
# Update config format (see example above)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Step 3: Update API Key
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# Add to your shell profile (.bashrc, .zshrc, etc.)
|
|
141
|
+
export ANTHROPIC_API_KEY="your-key-here"
|
|
142
|
+
|
|
143
|
+
# Or create .env file
|
|
144
|
+
echo "ANTHROPIC_API_KEY=your-key-here" > .env
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Step 4: Update Scripts
|
|
148
|
+
|
|
149
|
+
```diff
|
|
150
|
+
# package.json scripts
|
|
151
|
+
{
|
|
152
|
+
"scripts": {
|
|
153
|
+
- "peakinfer": "peakinfer scan ./src && peakinfer profile ./src"
|
|
154
|
+
+ "peakinfer": "peakinfer analyze ./src"
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Step 5: Update GitHub Action
|
|
160
|
+
|
|
161
|
+
```diff
|
|
162
|
+
# .github/workflows/peakinfer.yml
|
|
163
|
+
- uses: kalmantic/peakinfer-action@v1
|
|
164
|
+
+ uses: kalmantic/peakinfer-action@v2
|
|
165
|
+
with:
|
|
166
|
+
path: ./src
|
|
167
|
+
- mode: scan-and-profile
|
|
168
|
+
+ # mode is now automatic - unified analysis
|
|
169
|
+
events: ./events.jsonl
|
|
170
|
+
+ events-map: timestamp=time,model=model_name # NEW: Field mapping
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Step 6: Update CI/CD Integration
|
|
174
|
+
|
|
175
|
+
```diff
|
|
176
|
+
# Check output format changes in your CI scripts
|
|
177
|
+
- if jq -e '.callsites[] | select(.patterns.retry == false)' output.json; then
|
|
178
|
+
+ if jq -e '.callsites[] | select(.patterns.retries == false)' output.json; then
|
|
179
|
+
echo "Missing retry handling detected"
|
|
180
|
+
exit 1
|
|
181
|
+
fi
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## New Features in v2.0
|
|
185
|
+
|
|
186
|
+
### 1. Framework Detection
|
|
187
|
+
|
|
188
|
+
v2.0 automatically detects LLM frameworks:
|
|
189
|
+
|
|
190
|
+
```json
|
|
191
|
+
{
|
|
192
|
+
"id": "src/rag.ts:25",
|
|
193
|
+
"framework": "langchain", // langchain, llamaindex, haystack, etc.
|
|
194
|
+
"runtime": null
|
|
195
|
+
}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### 2. Self-Hosted Runtime Detection
|
|
199
|
+
|
|
200
|
+
```json
|
|
201
|
+
{
|
|
202
|
+
"id": "src/inference.py:42",
|
|
203
|
+
"provider": null,
|
|
204
|
+
"runtime": "vllm" // vllm, tgi, ollama, sglang, etc.
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### 3. Field Mapping for Runtime Events
|
|
209
|
+
|
|
210
|
+
Handle non-standard event formats:
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
# v2.0: Map custom field names
|
|
214
|
+
peakinfer analyze ./src \
|
|
215
|
+
--events logs.jsonl \
|
|
216
|
+
--events-map latency_ms=duration,model=model_name,input_tokens=prompt_tokens
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### 4. What-If Analysis
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
# Predict impact of model changes
|
|
223
|
+
peakinfer whatif --model gpt-4o-mini
|
|
224
|
+
|
|
225
|
+
# Output: "Switching 5 inference points from gpt-4o to gpt-4o-mini
|
|
226
|
+
# would reduce monthly cost by $2,340 (67% reduction)"
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### 5. Historical Comparison
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
# Compare current run with baseline
|
|
233
|
+
peakinfer analyze ./src --compare-baseline
|
|
234
|
+
|
|
235
|
+
# Compare two specific runs
|
|
236
|
+
peakinfer history compare run_abc123 run_def456
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### 6. Latency Prediction
|
|
240
|
+
|
|
241
|
+
```bash
|
|
242
|
+
# Predict p95 latency based on InferenceMAX envelope data
|
|
243
|
+
peakinfer analyze ./src --predict --target-p95 2000
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Template Migration
|
|
247
|
+
|
|
248
|
+
If you have custom templates, update them to v2.0 schema:
|
|
249
|
+
|
|
250
|
+
```diff
|
|
251
|
+
# Template changes
|
|
252
|
+
{
|
|
253
|
+
"id": "my-custom-template",
|
|
254
|
+
- "type": "insight",
|
|
255
|
+
+ "category": "cost", # cost, drift, performance, waste
|
|
256
|
+
+ "severity": "warning", # critical, warning, info
|
|
257
|
+
"match": {
|
|
258
|
+
"scope": "callsite",
|
|
259
|
+
"conditions": [
|
|
260
|
+
- { "field": "retry", "equals": false }
|
|
261
|
+
+ { "field": "patterns.retries", "op": "eq", "value": false }
|
|
262
|
+
]
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
## FAQ
|
|
268
|
+
|
|
269
|
+
### Q: Will my v1.8 InferenceMap files still work?
|
|
270
|
+
|
|
271
|
+
A: v2.0 can read v1.8 files but will convert them on save. Consider regenerating for full v2.0 benefits.
|
|
272
|
+
|
|
273
|
+
### Q: Do I need to change my Anthropic API key?
|
|
274
|
+
|
|
275
|
+
A: No, the same key works. Just update the environment variable name if using the old `PEAKINFER_API_KEY`.
|
|
276
|
+
|
|
277
|
+
### Q: Are v1.8 templates compatible?
|
|
278
|
+
|
|
279
|
+
A: Mostly yes. Check the template schema changes above and update `match.conditions` syntax.
|
|
280
|
+
|
|
281
|
+
### Q: What happened to the `scan` command?
|
|
282
|
+
|
|
283
|
+
A: It's now integrated into `analyze`. Use `peakinfer analyze ./src` for all analysis types.
|
|
284
|
+
|
|
285
|
+
### Q: How do I get my free credits?
|
|
286
|
+
|
|
287
|
+
A: Sign in at peakinfer.com with GitHub. First-time users get 50 credits automatically.
|
|
288
|
+
|
|
289
|
+
## Getting Help
|
|
290
|
+
|
|
291
|
+
- **Documentation:** https://peakinfer.com/docs
|
|
292
|
+
- **GitHub Issues:** https://github.com/Kalmantic/peakinfer/issues
|
|
293
|
+
- **Discord:** https://discord.gg/kalmantic
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1.0.0",
|
|
3
|
+
"generated": "2025-12-24T00:00:00.000Z",
|
|
4
|
+
"description": "Pre-computed demo analysis for offline `peakinfer demo` command",
|
|
5
|
+
"inferencePoints": [
|
|
6
|
+
{
|
|
7
|
+
"id": "demo-ai-service-chat-1",
|
|
8
|
+
"file": "ai-service.ts",
|
|
9
|
+
"line": 13,
|
|
10
|
+
"function": "chat",
|
|
11
|
+
"provider": "anthropic",
|
|
12
|
+
"model": "claude-sonnet-4-20250514",
|
|
13
|
+
"streaming": true,
|
|
14
|
+
"costProfile": {
|
|
15
|
+
"estimatedCostPer1K": 0.015,
|
|
16
|
+
"inputTokens": 500,
|
|
17
|
+
"outputTokens": 2000
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "demo-ai-service-classify-2",
|
|
22
|
+
"file": "ai-service.ts",
|
|
23
|
+
"line": 29,
|
|
24
|
+
"function": "classifyIntent",
|
|
25
|
+
"provider": "openai",
|
|
26
|
+
"model": "gpt-4",
|
|
27
|
+
"streaming": false,
|
|
28
|
+
"costProfile": {
|
|
29
|
+
"estimatedCostPer1K": 0.03,
|
|
30
|
+
"inputTokens": 100,
|
|
31
|
+
"outputTokens": 50
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"id": "demo-ai-service-summarize-3",
|
|
36
|
+
"file": "ai-service.ts",
|
|
37
|
+
"line": 42,
|
|
38
|
+
"function": "summarize",
|
|
39
|
+
"provider": "anthropic",
|
|
40
|
+
"model": "claude-sonnet-4-20250514",
|
|
41
|
+
"streaming": false,
|
|
42
|
+
"costProfile": {
|
|
43
|
+
"estimatedCostPer1K": 0.015,
|
|
44
|
+
"inputTokens": 2000,
|
|
45
|
+
"outputTokens": 500
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"id": "demo-ai-service-batch-4",
|
|
50
|
+
"file": "ai-service.ts",
|
|
51
|
+
"line": 52,
|
|
52
|
+
"function": "batchAnalyze",
|
|
53
|
+
"provider": "anthropic",
|
|
54
|
+
"model": "claude-sonnet-4-20250514",
|
|
55
|
+
"streaming": false,
|
|
56
|
+
"costProfile": {
|
|
57
|
+
"estimatedCostPer1K": 0.015,
|
|
58
|
+
"inputTokens": 200,
|
|
59
|
+
"outputTokens": 200
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
],
|
|
63
|
+
"drift": {
|
|
64
|
+
"detected": true,
|
|
65
|
+
"type": "streaming",
|
|
66
|
+
"description": "Code declares streaming but runtime shows 0% actual streams",
|
|
67
|
+
"evidence": {
|
|
68
|
+
"code": "stream: true",
|
|
69
|
+
"runtime": "0% streaming responses observed",
|
|
70
|
+
"impact": "p95 latency 2.4s instead of 400ms",
|
|
71
|
+
"duration": "23 days"
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
"issues": [
|
|
75
|
+
{
|
|
76
|
+
"id": "issue-1",
|
|
77
|
+
"severity": "critical",
|
|
78
|
+
"category": "drift",
|
|
79
|
+
"title": "Streaming Drift Detected",
|
|
80
|
+
"description": "Code says `stream: true` but runtime shows 0% streaming responses",
|
|
81
|
+
"impact": "6x slower response times, poor user experience",
|
|
82
|
+
"file": "ai-service.ts",
|
|
83
|
+
"line": 15,
|
|
84
|
+
"fix": {
|
|
85
|
+
"description": "Verify streaming is working end-to-end",
|
|
86
|
+
"effort": "1 hour"
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"id": "issue-2",
|
|
91
|
+
"severity": "high",
|
|
92
|
+
"category": "cost",
|
|
93
|
+
"title": "Overpowered Model for Classification",
|
|
94
|
+
"description": "GPT-4 ($0.03/1K) used for simple intent classification that GPT-4o-mini ($0.00015/1K) handles equally well",
|
|
95
|
+
"impact": "200x cost waste on this endpoint",
|
|
96
|
+
"savings": "$4,200/month",
|
|
97
|
+
"file": "ai-service.ts",
|
|
98
|
+
"line": 30,
|
|
99
|
+
"fix": {
|
|
100
|
+
"description": "Switch to gpt-4o-mini for classification",
|
|
101
|
+
"effort": "5 minutes"
|
|
102
|
+
}
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"id": "issue-3",
|
|
106
|
+
"severity": "high",
|
|
107
|
+
"category": "reliability",
|
|
108
|
+
"title": "No Error Handling",
|
|
109
|
+
"description": "LLM call has no try/catch, no retry logic, no timeout",
|
|
110
|
+
"impact": "Single API failure crashes the application",
|
|
111
|
+
"file": "ai-service.ts",
|
|
112
|
+
"line": 42,
|
|
113
|
+
"fix": {
|
|
114
|
+
"description": "Add retry with exponential backoff and timeout",
|
|
115
|
+
"effort": "30 minutes"
|
|
116
|
+
}
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"id": "issue-4",
|
|
120
|
+
"severity": "medium",
|
|
121
|
+
"category": "throughput",
|
|
122
|
+
"title": "Sequential Batch Processing",
|
|
123
|
+
"description": "Items processed one at a time in a loop instead of parallel",
|
|
124
|
+
"impact": "50x throughput loss for batch operations",
|
|
125
|
+
"file": "ai-service.ts",
|
|
126
|
+
"line": 51,
|
|
127
|
+
"fix": {
|
|
128
|
+
"description": "Use Promise.all() for parallel processing",
|
|
129
|
+
"effort": "15 minutes"
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
],
|
|
133
|
+
"summary": {
|
|
134
|
+
"totalInferencePoints": 4,
|
|
135
|
+
"providers": ["anthropic", "openai"],
|
|
136
|
+
"criticalIssues": 1,
|
|
137
|
+
"highIssues": 2,
|
|
138
|
+
"mediumIssues": 1,
|
|
139
|
+
"estimatedMonthlySavings": "$4,200",
|
|
140
|
+
"estimatedLatencyImprovement": "6x faster"
|
|
141
|
+
}
|
|
142
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# PeakInfer v2.0 Demo Project
|
|
2
|
+
|
|
3
|
+
This is a sample project for demonstrating PeakInfer v2.0 features.
|
|
4
|
+
|
|
5
|
+
## Files
|
|
6
|
+
|
|
7
|
+
- `src/llm-client.ts` - Sample LLM client with various inference patterns
|
|
8
|
+
- `sample-events.jsonl` - Sample runtime events for combined analysis
|
|
9
|
+
|
|
10
|
+
## Quick Demo
|
|
11
|
+
|
|
12
|
+
### 1. Basic Analysis
|
|
13
|
+
```bash
|
|
14
|
+
peakinfer analyze .
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### 2. With Comparison
|
|
18
|
+
```bash
|
|
19
|
+
# Run again to see comparison
|
|
20
|
+
peakinfer analyze . --compare
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### 3. With Prediction
|
|
24
|
+
```bash
|
|
25
|
+
peakinfer analyze . --predict --target-p95 3000
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### 4. Combined Analysis (Static + Runtime)
|
|
29
|
+
```bash
|
|
30
|
+
peakinfer analyze . --events sample-events.jsonl --compare --predict --html --open
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Expected Results
|
|
34
|
+
|
|
35
|
+
### Predictions
|
|
36
|
+
- `chatWithGPT4`: HIGH risk (p95 ~5000ms)
|
|
37
|
+
- `analyzeWithOpus`: HIGH risk (p95 ~8000ms)
|
|
38
|
+
- `summarize`: MEDIUM risk (p95 ~4000ms)
|
|
39
|
+
- `quickChat`: LOW risk (p95 ~1500ms)
|
|
40
|
+
- `fastResponse`: LOW risk (p95 ~1500ms)
|
|
41
|
+
|
|
42
|
+
### Counterfactuals
|
|
43
|
+
- Model swap: gpt-4 → gpt-4o-mini (-75% latency, -90% cost)
|
|
44
|
+
- Model swap: claude-3-opus → claude-3.5-sonnet (-60% latency)
|
|
45
|
+
- Enable streaming for non-streaming calls
|
|
46
|
+
- Add batching for embedding calls
|
|
47
|
+
- Add caching layer
|
|
48
|
+
|
|
49
|
+
### Drift (Combined Mode)
|
|
50
|
+
Shows mismatches between code and runtime:
|
|
51
|
+
- Inference points in code but not in runtime (dead code?)
|
|
52
|
+
- Runtime events not mapped to code (dynamic calls?)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Demo AI Service - Shows common LLM inference issues
|
|
3
|
+
* This file is used by `peakinfer demo` to demonstrate drift detection
|
|
4
|
+
*/
|
|
5
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
6
|
+
import OpenAI from 'openai';
|
|
7
|
+
|
|
8
|
+
const anthropic = new Anthropic();
|
|
9
|
+
const openai = new OpenAI();
|
|
10
|
+
|
|
11
|
+
// Issue 1: Streaming configured but may not be working in production
|
|
12
|
+
export async function chat(prompt: string): Promise<string> {
|
|
13
|
+
const response = await anthropic.messages.create({
|
|
14
|
+
model: 'claude-sonnet-4-20250514',
|
|
15
|
+
max_tokens: 2000,
|
|
16
|
+
stream: true, // <-- Code says streaming
|
|
17
|
+
messages: [{ role: 'user', content: prompt }],
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
let result = '';
|
|
21
|
+
for await (const event of response) {
|
|
22
|
+
if (event.type === 'content_block_delta' && event.delta.type === 'text_delta') {
|
|
23
|
+
result += event.delta.text;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return result;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Issue 2: GPT-4 used for simple classification (overpowered model)
|
|
30
|
+
export async function classifyIntent(message: string): Promise<string> {
|
|
31
|
+
const response = await openai.chat.completions.create({
|
|
32
|
+
model: 'gpt-4', // <-- Expensive model for simple task
|
|
33
|
+
messages: [
|
|
34
|
+
{ role: 'system', content: 'Classify the user intent as: question, complaint, feedback, or other' },
|
|
35
|
+
{ role: 'user', content: message },
|
|
36
|
+
],
|
|
37
|
+
max_tokens: 50,
|
|
38
|
+
});
|
|
39
|
+
return response.choices[0].message.content || 'other';
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Issue 3: No error handling, no retry logic
|
|
43
|
+
export async function summarize(text: string): Promise<string> {
|
|
44
|
+
// No try/catch, no retry, no timeout
|
|
45
|
+
const response = await anthropic.messages.create({
|
|
46
|
+
model: 'claude-sonnet-4-20250514',
|
|
47
|
+
max_tokens: 500,
|
|
48
|
+
messages: [{ role: 'user', content: `Summarize: ${text}` }],
|
|
49
|
+
});
|
|
50
|
+
return response.content[0].type === 'text' ? response.content[0].text : '';
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Issue 4: Sequential processing (throughput bottleneck)
|
|
54
|
+
export async function batchAnalyze(items: string[]): Promise<string[]> {
|
|
55
|
+
const results: string[] = [];
|
|
56
|
+
for (const item of items) { // <-- Sequential, should be parallel
|
|
57
|
+
const response = await anthropic.messages.create({
|
|
58
|
+
model: 'claude-sonnet-4-20250514',
|
|
59
|
+
max_tokens: 200,
|
|
60
|
+
messages: [{ role: 'user', content: `Analyze: ${item}` }],
|
|
61
|
+
});
|
|
62
|
+
results.push(response.content[0].type === 'text' ? response.content[0].text : '');
|
|
63
|
+
}
|
|
64
|
+
return results;
|
|
65
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{"id":"evt_001","ts":"2024-12-15T10:00:00Z","provider":"openai","model":"gpt-4","input_tokens":150,"output_tokens":500,"latency_ms":4500}
|
|
2
|
+
{"id":"evt_002","ts":"2024-12-15T10:00:05Z","provider":"openai","model":"gpt-4","input_tokens":200,"output_tokens":800,"latency_ms":5200}
|
|
3
|
+
{"id":"evt_003","ts":"2024-12-15T10:00:10Z","provider":"anthropic","model":"claude-3-opus-20240229","input_tokens":1000,"output_tokens":2000,"latency_ms":8500}
|
|
4
|
+
{"id":"evt_004","ts":"2024-12-15T10:00:15Z","provider":"openai","model":"gpt-4-turbo","input_tokens":300,"output_tokens":400,"latency_ms":3800}
|
|
5
|
+
{"id":"evt_005","ts":"2024-12-15T10:00:20Z","provider":"openai","model":"gpt-4o-mini","input_tokens":100,"output_tokens":150,"latency_ms":1200}
|
|
6
|
+
{"id":"evt_006","ts":"2024-12-15T10:00:25Z","provider":"openai","model":"gpt-4o-mini","input_tokens":80,"output_tokens":120,"latency_ms":1100}
|
|
7
|
+
{"id":"evt_007","ts":"2024-12-15T10:00:30Z","provider":"anthropic","model":"claude-3-haiku-20240307","input_tokens":50,"output_tokens":200,"latency_ms":900}
|
|
8
|
+
{"id":"evt_008","ts":"2024-12-15T10:00:35Z","provider":"openai","model":"gpt-4o","input_tokens":200,"output_tokens":600,"latency_ms":2100,"streaming":true}
|
|
9
|
+
{"id":"evt_009","ts":"2024-12-15T10:00:40Z","provider":"openai","model":"text-embedding-3-small","input_tokens":100,"output_tokens":0,"latency_ms":150}
|
|
10
|
+
{"id":"evt_010","ts":"2024-12-15T10:00:45Z","provider":"openai","model":"gpt-4","input_tokens":180,"output_tokens":550,"latency_ms":4800}
|
|
11
|
+
{"id":"evt_011","ts":"2024-12-15T10:01:00Z","provider":"anthropic","model":"claude-3-sonnet-20240229","input_tokens":400,"output_tokens":800,"latency_ms":3500}
|
|
12
|
+
{"id":"evt_012","ts":"2024-12-15T10:01:05Z","provider":"openai","model":"gpt-4o-mini","input_tokens":90,"output_tokens":180,"latency_ms":1300}
|
|
13
|
+
{"id":"evt_013","ts":"2024-12-15T10:01:10Z","provider":"openai","model":"gpt-4","input_tokens":220,"output_tokens":700,"latency_ms":5500}
|
|
14
|
+
{"id":"evt_014","ts":"2024-12-15T10:01:15Z","provider":"anthropic","model":"claude-3-opus-20240229","input_tokens":1200,"output_tokens":2500,"latency_ms":9200}
|
|
15
|
+
{"id":"evt_015","ts":"2024-12-15T10:01:20Z","provider":"openai","model":"text-embedding-3-small","input_tokens":150,"output_tokens":0,"latency_ms":180}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI Service for PeakInfer Demo
|
|
3
|
+
* Anthropic Claude only - demonstrates various inference patterns
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
7
|
+
|
|
8
|
+
const client = new Anthropic();
|
|
9
|
+
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// HIGH LATENCY / HIGH COST (will trigger issues)
|
|
12
|
+
// ============================================================================
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Complex analysis with Claude Opus (highest cost, no error handling)
|
|
16
|
+
* Issues expected:
|
|
17
|
+
* - Critical: No error handling
|
|
18
|
+
* - Warning: Expensive model for simple task
|
|
19
|
+
*/
|
|
20
|
+
export async function analyzeDocument(document: string): Promise<string> {
|
|
21
|
+
const response = await client.messages.create({
|
|
22
|
+
model: 'claude-opus-4-20250514',
|
|
23
|
+
max_tokens: 4000,
|
|
24
|
+
messages: [{ role: 'user', content: `Analyze: ${document}` }],
|
|
25
|
+
});
|
|
26
|
+
return response.content[0].type === 'text' ? response.content[0].text : '';
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Chat completion without streaming (latency issue)
|
|
31
|
+
* Issues expected:
|
|
32
|
+
* - Warning: No streaming enabled
|
|
33
|
+
* - Critical: No error handling
|
|
34
|
+
*/
|
|
35
|
+
export async function chat(prompt: string): Promise<string> {
|
|
36
|
+
const response = await client.messages.create({
|
|
37
|
+
model: 'claude-sonnet-4-20250514',
|
|
38
|
+
max_tokens: 2000,
|
|
39
|
+
messages: [{ role: 'user', content: prompt }],
|
|
40
|
+
});
|
|
41
|
+
return response.content[0].type === 'text' ? response.content[0].text : '';
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ============================================================================
|
|
45
|
+
// MEDIUM PATTERNS (some issues)
|
|
46
|
+
// ============================================================================
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Translation service (no timeout configured)
|
|
50
|
+
* Issues expected:
|
|
51
|
+
* - Warning: No timeout configured
|
|
52
|
+
*/
|
|
53
|
+
export async function translate(text: string, targetLang: string): Promise<string> {
|
|
54
|
+
const response = await client.messages.create({
|
|
55
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
56
|
+
max_tokens: 2000,
|
|
57
|
+
messages: [{ role: 'user', content: `Translate to ${targetLang}: ${text}` }],
|
|
58
|
+
});
|
|
59
|
+
return response.content[0].type === 'text' ? response.content[0].text : '';
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ============================================================================
|
|
63
|
+
// GOOD PATTERNS (minimal issues)
|
|
64
|
+
// ============================================================================
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Fast response with Claude Haiku (optimized)
|
|
68
|
+
* - Uses cheap model
|
|
69
|
+
* - Has error handling
|
|
70
|
+
* - Has streaming
|
|
71
|
+
*/
|
|
72
|
+
export async function* streamChat(prompt: string): AsyncGenerator<string> {
|
|
73
|
+
try {
|
|
74
|
+
const stream = await client.messages.stream({
|
|
75
|
+
model: 'claude-3-5-haiku-20241022',
|
|
76
|
+
max_tokens: 500,
|
|
77
|
+
messages: [{ role: 'user', content: prompt }],
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
for await (const event of stream) {
|
|
81
|
+
if (event.type === 'content_block_delta' && event.delta.type === 'text_delta') {
|
|
82
|
+
yield event.delta.text;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
} catch (error) {
|
|
86
|
+
console.error('Stream error:', error);
|
|
87
|
+
throw error;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Robust API call with retry and fallback
|
|
93
|
+
* - Has error handling
|
|
94
|
+
* - Has retry logic
|
|
95
|
+
* - Has fallback model
|
|
96
|
+
*/
|
|
97
|
+
export async function robustChat(prompt: string): Promise<string> {
|
|
98
|
+
const maxRetries = 3;
|
|
99
|
+
let lastError: Error | null = null;
|
|
100
|
+
|
|
101
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
102
|
+
try {
|
|
103
|
+
const response = await client.messages.create({
|
|
104
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
105
|
+
max_tokens: 1000,
|
|
106
|
+
messages: [{ role: 'user', content: prompt }],
|
|
107
|
+
});
|
|
108
|
+
return response.content[0].type === 'text' ? response.content[0].text : '';
|
|
109
|
+
} catch (error) {
|
|
110
|
+
lastError = error as Error;
|
|
111
|
+
if (attempt < maxRetries - 1) {
|
|
112
|
+
await new Promise(r => setTimeout(r, 1000 * (attempt + 1)));
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Fallback to cheaper model
|
|
118
|
+
try {
|
|
119
|
+
const response = await client.messages.create({
|
|
120
|
+
model: 'claude-3-5-haiku-20241022',
|
|
121
|
+
max_tokens: 1000,
|
|
122
|
+
messages: [{ role: 'user', content: prompt }],
|
|
123
|
+
});
|
|
124
|
+
return response.content[0].type === 'text' ? response.content[0].text : '';
|
|
125
|
+
} catch {
|
|
126
|
+
throw lastError;
|
|
127
|
+
}
|
|
128
|
+
}
|