@peakinfer/cli 1.0.133
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -0
- package/.env.example +6 -0
- package/.github/workflows/peakinfer.yml +64 -0
- package/CHANGELOG.md +31 -0
- package/LICENSE +190 -0
- package/README.md +335 -0
- package/data/inferencemax.json +274 -0
- package/dist/agent-analyzer.d.ts +45 -0
- package/dist/agent-analyzer.d.ts.map +1 -0
- package/dist/agent-analyzer.js +374 -0
- package/dist/agent-analyzer.js.map +1 -0
- package/dist/agent.d.ts +76 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +965 -0
- package/dist/agent.js.map +1 -0
- package/dist/agents/correlation-analyzer.d.ts +34 -0
- package/dist/agents/correlation-analyzer.d.ts.map +1 -0
- package/dist/agents/correlation-analyzer.js +261 -0
- package/dist/agents/correlation-analyzer.js.map +1 -0
- package/dist/agents/index.d.ts +91 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +111 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/runtime-analyzer.d.ts +38 -0
- package/dist/agents/runtime-analyzer.d.ts.map +1 -0
- package/dist/agents/runtime-analyzer.js +244 -0
- package/dist/agents/runtime-analyzer.js.map +1 -0
- package/dist/analysis-types.d.ts +500 -0
- package/dist/analysis-types.d.ts.map +1 -0
- package/dist/analysis-types.js +11 -0
- package/dist/analysis-types.js.map +1 -0
- package/dist/analytics.d.ts +25 -0
- package/dist/analytics.d.ts.map +1 -0
- package/dist/analytics.js +94 -0
- package/dist/analytics.js.map +1 -0
- package/dist/analyzer.d.ts +48 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +547 -0
- package/dist/analyzer.js.map +1 -0
- package/dist/artifacts.d.ts +44 -0
- package/dist/artifacts.d.ts.map +1 -0
- package/dist/artifacts.js +165 -0
- package/dist/artifacts.js.map +1 -0
- package/dist/benchmarks/index.d.ts +88 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +205 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +427 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/ci.d.ts +19 -0
- package/dist/commands/ci.d.ts.map +1 -0
- package/dist/commands/ci.js +253 -0
- package/dist/commands/ci.js.map +1 -0
- package/dist/commands/config.d.ts +16 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +249 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/demo.d.ts +15 -0
- package/dist/commands/demo.d.ts.map +1 -0
- package/dist/commands/demo.js +106 -0
- package/dist/commands/demo.js.map +1 -0
- package/dist/commands/export.d.ts +14 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +209 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/history.d.ts +15 -0
- package/dist/commands/history.d.ts.map +1 -0
- package/dist/commands/history.js +389 -0
- package/dist/commands/history.js.map +1 -0
- package/dist/commands/template.d.ts +14 -0
- package/dist/commands/template.d.ts.map +1 -0
- package/dist/commands/template.js +341 -0
- package/dist/commands/template.js.map +1 -0
- package/dist/commands/validate-map.d.ts +12 -0
- package/dist/commands/validate-map.d.ts.map +1 -0
- package/dist/commands/validate-map.js +274 -0
- package/dist/commands/validate-map.js.map +1 -0
- package/dist/commands/whatif.d.ts +17 -0
- package/dist/commands/whatif.d.ts.map +1 -0
- package/dist/commands/whatif.js +206 -0
- package/dist/commands/whatif.js.map +1 -0
- package/dist/comparison.d.ts +38 -0
- package/dist/comparison.d.ts.map +1 -0
- package/dist/comparison.js +223 -0
- package/dist/comparison.js.map +1 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +158 -0
- package/dist/config.js.map +1 -0
- package/dist/connectors/helicone.d.ts +9 -0
- package/dist/connectors/helicone.d.ts.map +1 -0
- package/dist/connectors/helicone.js +106 -0
- package/dist/connectors/helicone.js.map +1 -0
- package/dist/connectors/index.d.ts +37 -0
- package/dist/connectors/index.d.ts.map +1 -0
- package/dist/connectors/index.js +65 -0
- package/dist/connectors/index.js.map +1 -0
- package/dist/connectors/langsmith.d.ts +9 -0
- package/dist/connectors/langsmith.d.ts.map +1 -0
- package/dist/connectors/langsmith.js +122 -0
- package/dist/connectors/langsmith.js.map +1 -0
- package/dist/connectors/types.d.ts +83 -0
- package/dist/connectors/types.d.ts.map +1 -0
- package/dist/connectors/types.js +98 -0
- package/dist/connectors/types.js.map +1 -0
- package/dist/cost-estimator.d.ts +46 -0
- package/dist/cost-estimator.d.ts.map +1 -0
- package/dist/cost-estimator.js +104 -0
- package/dist/cost-estimator.js.map +1 -0
- package/dist/costs.d.ts +57 -0
- package/dist/costs.d.ts.map +1 -0
- package/dist/costs.js +251 -0
- package/dist/costs.js.map +1 -0
- package/dist/counterfactuals.d.ts +29 -0
- package/dist/counterfactuals.d.ts.map +1 -0
- package/dist/counterfactuals.js +448 -0
- package/dist/counterfactuals.js.map +1 -0
- package/dist/enhancement-prompts.d.ts +41 -0
- package/dist/enhancement-prompts.d.ts.map +1 -0
- package/dist/enhancement-prompts.js +88 -0
- package/dist/enhancement-prompts.js.map +1 -0
- package/dist/envelopes.d.ts +20 -0
- package/dist/envelopes.d.ts.map +1 -0
- package/dist/envelopes.js +790 -0
- package/dist/envelopes.js.map +1 -0
- package/dist/format-normalizer.d.ts +71 -0
- package/dist/format-normalizer.d.ts.map +1 -0
- package/dist/format-normalizer.js +1331 -0
- package/dist/format-normalizer.js.map +1 -0
- package/dist/history.d.ts +79 -0
- package/dist/history.d.ts.map +1 -0
- package/dist/history.js +313 -0
- package/dist/history.js.map +1 -0
- package/dist/html.d.ts +11 -0
- package/dist/html.d.ts.map +1 -0
- package/dist/html.js +463 -0
- package/dist/html.js.map +1 -0
- package/dist/impact.d.ts +42 -0
- package/dist/impact.d.ts.map +1 -0
- package/dist/impact.js +443 -0
- package/dist/impact.js.map +1 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +34 -0
- package/dist/index.js.map +1 -0
- package/dist/insights.d.ts +5 -0
- package/dist/insights.d.ts.map +1 -0
- package/dist/insights.js +271 -0
- package/dist/insights.js.map +1 -0
- package/dist/joiner.d.ts +9 -0
- package/dist/joiner.d.ts.map +1 -0
- package/dist/joiner.js +247 -0
- package/dist/joiner.js.map +1 -0
- package/dist/orchestrator.d.ts +34 -0
- package/dist/orchestrator.d.ts.map +1 -0
- package/dist/orchestrator.js +827 -0
- package/dist/orchestrator.js.map +1 -0
- package/dist/pdf.d.ts +26 -0
- package/dist/pdf.d.ts.map +1 -0
- package/dist/pdf.js +84 -0
- package/dist/pdf.js.map +1 -0
- package/dist/prediction.d.ts +33 -0
- package/dist/prediction.d.ts.map +1 -0
- package/dist/prediction.js +316 -0
- package/dist/prediction.js.map +1 -0
- package/dist/prompts/loader.d.ts +38 -0
- package/dist/prompts/loader.d.ts.map +1 -0
- package/dist/prompts/loader.js +60 -0
- package/dist/prompts/loader.js.map +1 -0
- package/dist/renderer.d.ts +64 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +923 -0
- package/dist/renderer.js.map +1 -0
- package/dist/runid.d.ts +57 -0
- package/dist/runid.d.ts.map +1 -0
- package/dist/runid.js +199 -0
- package/dist/runid.js.map +1 -0
- package/dist/runtime.d.ts +29 -0
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +366 -0
- package/dist/runtime.js.map +1 -0
- package/dist/scanner.d.ts +11 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +426 -0
- package/dist/scanner.js.map +1 -0
- package/dist/templates.d.ts +120 -0
- package/dist/templates.d.ts.map +1 -0
- package/dist/templates.js +429 -0
- package/dist/templates.js.map +1 -0
- package/dist/tools/index.d.ts +153 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +177 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/types.d.ts +3647 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +703 -0
- package/dist/types.js.map +1 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +23 -0
- package/dist/version.js.map +1 -0
- package/docs/demo-guide.md +423 -0
- package/docs/events-format.md +295 -0
- package/docs/inferencemap-spec.md +344 -0
- package/docs/migration-v2.md +293 -0
- package/fixtures/demo/precomputed.json +142 -0
- package/fixtures/demo-project/README.md +52 -0
- package/fixtures/demo-project/ai-service.ts +65 -0
- package/fixtures/demo-project/sample-events.jsonl +15 -0
- package/fixtures/demo-project/src/ai-service.ts +128 -0
- package/fixtures/demo-project/src/llm-client.ts +155 -0
- package/package.json +65 -0
- package/prompts/agent-analyzer.yaml +47 -0
- package/prompts/ci-gate.yaml +98 -0
- package/prompts/correlation-analyzer.yaml +178 -0
- package/prompts/format-normalizer.yaml +46 -0
- package/prompts/peak-performance.yaml +180 -0
- package/prompts/pr-comment.yaml +111 -0
- package/prompts/runtime-analyzer.yaml +189 -0
- package/prompts/unified-analyzer.yaml +241 -0
- package/schemas/inference-map.v0.1.json +215 -0
- package/scripts/benchmark.ts +394 -0
- package/scripts/demo-v1.5.sh +158 -0
- package/scripts/sync-from-site.sh +197 -0
- package/scripts/validate-sync.sh +178 -0
- package/src/agent-analyzer.ts +481 -0
- package/src/agent.ts +1232 -0
- package/src/agents/correlation-analyzer.ts +353 -0
- package/src/agents/index.ts +235 -0
- package/src/agents/runtime-analyzer.ts +343 -0
- package/src/analysis-types.ts +558 -0
- package/src/analytics.ts +100 -0
- package/src/analyzer.ts +692 -0
- package/src/artifacts.ts +218 -0
- package/src/benchmarks/index.ts +309 -0
- package/src/cli.ts +503 -0
- package/src/commands/ci.ts +336 -0
- package/src/commands/config.ts +288 -0
- package/src/commands/demo.ts +175 -0
- package/src/commands/export.ts +297 -0
- package/src/commands/history.ts +425 -0
- package/src/commands/template.ts +385 -0
- package/src/commands/validate-map.ts +324 -0
- package/src/commands/whatif.ts +272 -0
- package/src/comparison.ts +283 -0
- package/src/config.ts +188 -0
- package/src/connectors/helicone.ts +164 -0
- package/src/connectors/index.ts +93 -0
- package/src/connectors/langsmith.ts +179 -0
- package/src/connectors/types.ts +180 -0
- package/src/cost-estimator.ts +146 -0
- package/src/costs.ts +347 -0
- package/src/counterfactuals.ts +516 -0
- package/src/enhancement-prompts.ts +118 -0
- package/src/envelopes.ts +814 -0
- package/src/format-normalizer.ts +1486 -0
- package/src/history.ts +400 -0
- package/src/html.ts +512 -0
- package/src/impact.ts +522 -0
- package/src/index.ts +83 -0
- package/src/insights.ts +341 -0
- package/src/joiner.ts +289 -0
- package/src/orchestrator.ts +1015 -0
- package/src/pdf.ts +110 -0
- package/src/prediction.ts +392 -0
- package/src/prompts/loader.ts +88 -0
- package/src/renderer.ts +1045 -0
- package/src/runid.ts +261 -0
- package/src/runtime.ts +450 -0
- package/src/scanner.ts +508 -0
- package/src/templates.ts +561 -0
- package/src/tools/index.ts +214 -0
- package/src/types.ts +873 -0
- package/src/version.ts +24 -0
- package/templates/context-accumulation.yaml +23 -0
- package/templates/cost-concentration.yaml +20 -0
- package/templates/dead-code.yaml +20 -0
- package/templates/latency-explainer.yaml +23 -0
- package/templates/optimizations/ab-testing-framework.yaml +74 -0
- package/templates/optimizations/api-gateway-optimization.yaml +81 -0
- package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
- package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
- package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
- package/templates/optimizations/comprehensive-apm.yaml +76 -0
- package/templates/optimizations/context-window-optimization.yaml +91 -0
- package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
- package/templates/optimizations/distributed-training-optimization.yaml +77 -0
- package/templates/optimizations/document-analysis-edge.yaml +77 -0
- package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
- package/templates/optimizations/domain-specific-distillation.yaml +78 -0
- package/templates/optimizations/error-handling-optimization.yaml +76 -0
- package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
- package/templates/optimizations/long-context-memory-management.yaml +78 -0
- package/templates/optimizations/max-tokens-optimization.yaml +76 -0
- package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
- package/templates/optimizations/multi-framework-resilience.yaml +75 -0
- package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
- package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
- package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
- package/templates/optimizations/quality-monitoring.yaml +74 -0
- package/templates/optimizations/realtime-budget-controls.yaml +74 -0
- package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
- package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
- package/templates/optimizations/smart-model-routing.yaml +96 -0
- package/templates/optimizations/streaming-batch-selection.yaml +167 -0
- package/templates/optimizations/system-prompt-optimization.yaml +75 -0
- package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
- package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
- package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
- package/templates/overpowered-extraction.yaml +32 -0
- package/templates/overpowered-model.yaml +31 -0
- package/templates/prompt-bloat.yaml +24 -0
- package/templates/retry-explosion.yaml +28 -0
- package/templates/schema/insight.schema.json +113 -0
- package/templates/schema/optimization.schema.json +180 -0
- package/templates/streaming-drift.yaml +30 -0
- package/templates/throughput-gap.yaml +21 -0
- package/templates/token-underutilization.yaml +28 -0
- package/templates/untested-fallback.yaml +21 -0
- package/tests/accuracy/drift-detection.test.ts +184 -0
- package/tests/accuracy/false-positives.test.ts +166 -0
- package/tests/accuracy/templates.test.ts +205 -0
- package/tests/action/commands.test.ts +125 -0
- package/tests/action/comments.test.ts +347 -0
- package/tests/cli.test.ts +203 -0
- package/tests/comparison.test.ts +309 -0
- package/tests/correlation-analyzer.test.ts +534 -0
- package/tests/counterfactuals.test.ts +347 -0
- package/tests/fixtures/events/missing-id.jsonl +1 -0
- package/tests/fixtures/events/missing-input.jsonl +1 -0
- package/tests/fixtures/events/missing-latency.jsonl +1 -0
- package/tests/fixtures/events/missing-model.jsonl +1 -0
- package/tests/fixtures/events/missing-output.jsonl +1 -0
- package/tests/fixtures/events/missing-provider.jsonl +1 -0
- package/tests/fixtures/events/missing-ts.jsonl +1 -0
- package/tests/fixtures/events/valid.csv +3 -0
- package/tests/fixtures/events/valid.json +1 -0
- package/tests/fixtures/events/valid.jsonl +2 -0
- package/tests/fixtures/events/with-callsite.jsonl +1 -0
- package/tests/fixtures/events/with-intent.jsonl +1 -0
- package/tests/fixtures/events/wrong-type.jsonl +1 -0
- package/tests/fixtures/repos/empty/.gitkeep +0 -0
- package/tests/fixtures/repos/hybrid-router/router.py +35 -0
- package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
- package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
- package/tests/fixtures/repos/saas-openai/client.py +26 -0
- package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
- package/tests/github-action.test.ts +292 -0
- package/tests/insights.test.ts +878 -0
- package/tests/joiner.test.ts +168 -0
- package/tests/performance/action-latency.test.ts +132 -0
- package/tests/performance/benchmark.test.ts +189 -0
- package/tests/performance/cli-latency.test.ts +102 -0
- package/tests/pr-comment.test.ts +313 -0
- package/tests/prediction.test.ts +296 -0
- package/tests/runtime-analyzer.test.ts +375 -0
- package/tests/runtime.test.ts +205 -0
- package/tests/scanner.test.ts +122 -0
- package/tests/template-conformance.test.ts +526 -0
- package/tests/unit/cost-calculator.test.ts +303 -0
- package/tests/unit/credits.test.ts +180 -0
- package/tests/unit/inference-map.test.ts +276 -0
- package/tests/unit/schema.test.ts +300 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +14 -0
package/src/artifacts.ts
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import { mkdirSync, writeFileSync, existsSync, readFileSync, symlinkSync, unlinkSync } from 'fs';
|
|
2
|
+
import { join, relative } from 'path';
|
|
3
|
+
import type { InferenceMap, Insight, JoinedOutput, RuntimeSummary } from './types.js';
|
|
4
|
+
import { generateRunId, getRunDir, createManifest, canResume, loadCachedArtifacts, type RunInputs, type RunManifest } from './runid.js';
|
|
5
|
+
|
|
6
|
+
// =============================================================================
|
|
7
|
+
// CONSTANTS
|
|
8
|
+
// =============================================================================
|
|
9
|
+
|
|
10
|
+
const OUTPUT_DIR = '.peakinfer';
|
|
11
|
+
|
|
12
|
+
// =============================================================================
|
|
13
|
+
// TYPES
|
|
14
|
+
// =============================================================================
|
|
15
|
+
|
|
16
|
+
export interface ArtifactData {
|
|
17
|
+
inferenceMap?: InferenceMap;
|
|
18
|
+
insights?: Insight[];
|
|
19
|
+
joined?: JoinedOutput;
|
|
20
|
+
runtime?: RuntimeSummary;
|
|
21
|
+
html?: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface SaveOptions {
|
|
25
|
+
runId?: string;
|
|
26
|
+
inputs?: RunInputs;
|
|
27
|
+
projectName?: string; // For human-friendly report naming
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// =============================================================================
|
|
31
|
+
// HELPERS
|
|
32
|
+
// =============================================================================
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Convert project name to a URL/file-safe slug
|
|
36
|
+
* Julie Zhou: Human-friendly naming for shareability
|
|
37
|
+
*/
|
|
38
|
+
function toSlug(name: string): string {
|
|
39
|
+
return name
|
|
40
|
+
.toLowerCase()
|
|
41
|
+
.replace(/[^a-z0-9]+/g, '_') // Replace non-alphanumeric with underscore
|
|
42
|
+
.replace(/^_+|_+$/g, '') // Trim leading/trailing underscores
|
|
43
|
+
.substring(0, 50); // Limit length
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function ensureDir(dir: string): void {
|
|
47
|
+
if (!existsSync(dir)) {
|
|
48
|
+
mkdirSync(dir, { recursive: true });
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function writeJSON(filePath: string, data: unknown): void {
|
|
53
|
+
writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf-8');
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function updateLatestSymlink(baseDir: string, runId: string): void {
|
|
57
|
+
const latestPath = join(baseDir, 'latest');
|
|
58
|
+
const targetPath = join('runs', runId);
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
// Remove existing symlink
|
|
62
|
+
if (existsSync(latestPath)) {
|
|
63
|
+
unlinkSync(latestPath);
|
|
64
|
+
}
|
|
65
|
+
// Create new symlink
|
|
66
|
+
symlinkSync(targetPath, latestPath);
|
|
67
|
+
} catch {
|
|
68
|
+
// Symlinks may not work on all systems (e.g., Windows without admin)
|
|
69
|
+
// Fallback: write a text file with the run ID
|
|
70
|
+
writeFileSync(latestPath, runId, 'utf-8');
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// =============================================================================
|
|
75
|
+
// PUBLIC API
|
|
76
|
+
// =============================================================================
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Save all analysis artifacts to .peakinfer/runs/<runId>/ directory
|
|
80
|
+
* Also maintains backward compatibility with root-level artifacts
|
|
81
|
+
*/
|
|
82
|
+
export function saveArtifacts(
|
|
83
|
+
data: ArtifactData,
|
|
84
|
+
outputDir: string = OUTPUT_DIR,
|
|
85
|
+
options: SaveOptions = {}
|
|
86
|
+
): string[] {
|
|
87
|
+
const savedFiles: string[] = [];
|
|
88
|
+
const artifactNames: string[] = [];
|
|
89
|
+
|
|
90
|
+
// Determine run directory
|
|
91
|
+
let runDir = outputDir;
|
|
92
|
+
if (options.runId) {
|
|
93
|
+
runDir = getRunDir(outputDir, options.runId);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
ensureDir(runDir);
|
|
97
|
+
|
|
98
|
+
// 1. InferenceMap - the core output
|
|
99
|
+
if (data.inferenceMap) {
|
|
100
|
+
const filePath = join(runDir, 'inferencemap.json');
|
|
101
|
+
writeJSON(filePath, data.inferenceMap);
|
|
102
|
+
savedFiles.push(filePath);
|
|
103
|
+
artifactNames.push('inferencemap.json');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// 2. Insights - the findings
|
|
107
|
+
if (data.insights) {
|
|
108
|
+
const filePath = join(runDir, 'insights.json');
|
|
109
|
+
writeJSON(filePath, data.insights);
|
|
110
|
+
savedFiles.push(filePath);
|
|
111
|
+
artifactNames.push('insights.json');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// 3. Joined data - static + runtime correlation
|
|
115
|
+
if (data.joined) {
|
|
116
|
+
const filePath = join(runDir, 'joined.json');
|
|
117
|
+
writeJSON(filePath, data.joined);
|
|
118
|
+
savedFiles.push(filePath);
|
|
119
|
+
artifactNames.push('joined.json');
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// 4. Runtime summary - aggregated metrics
|
|
123
|
+
if (data.runtime) {
|
|
124
|
+
const filePath = join(runDir, 'runtime.json');
|
|
125
|
+
writeJSON(filePath, data.runtime);
|
|
126
|
+
savedFiles.push(filePath);
|
|
127
|
+
artifactNames.push('runtime.json');
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// 5. HTML report - human-friendly naming for shareability
|
|
131
|
+
// Julie Zhou: "Reports exist to enable sharing, not exploration"
|
|
132
|
+
const reportFileName = options.projectName
|
|
133
|
+
? `${toSlug(options.projectName)}_peakinfer_report.html`
|
|
134
|
+
: 'report.html';
|
|
135
|
+
|
|
136
|
+
if (data.html) {
|
|
137
|
+
const filePath = join(runDir, reportFileName);
|
|
138
|
+
writeFileSync(filePath, data.html, 'utf-8');
|
|
139
|
+
savedFiles.push(filePath);
|
|
140
|
+
artifactNames.push(reportFileName);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// 6. Save manifest if runId provided
|
|
144
|
+
if (options.runId && options.inputs) {
|
|
145
|
+
const manifest = createManifest(options.runId, options.inputs, artifactNames, 'complete');
|
|
146
|
+
const manifestPath = join(runDir, 'manifest.json');
|
|
147
|
+
writeJSON(manifestPath, manifest);
|
|
148
|
+
|
|
149
|
+
// Update latest symlink
|
|
150
|
+
updateLatestSymlink(outputDir, options.runId);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// 7. Also save to root level for backward compatibility
|
|
154
|
+
if (options.runId && runDir !== outputDir) {
|
|
155
|
+
ensureDir(outputDir);
|
|
156
|
+
if (data.inferenceMap) {
|
|
157
|
+
writeJSON(join(outputDir, 'inferencemap.json'), data.inferenceMap);
|
|
158
|
+
}
|
|
159
|
+
if (data.insights) {
|
|
160
|
+
writeJSON(join(outputDir, 'insights.json'), data.insights);
|
|
161
|
+
}
|
|
162
|
+
if (data.joined) {
|
|
163
|
+
writeJSON(join(outputDir, 'joined.json'), data.joined);
|
|
164
|
+
}
|
|
165
|
+
if (data.runtime) {
|
|
166
|
+
writeJSON(join(outputDir, 'runtime.json'), data.runtime);
|
|
167
|
+
}
|
|
168
|
+
if (data.html) {
|
|
169
|
+
writeFileSync(join(outputDir, reportFileName), data.html, 'utf-8');
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return savedFiles;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Get the output directory path
|
|
178
|
+
*/
|
|
179
|
+
export function getOutputDir(): string {
|
|
180
|
+
return OUTPUT_DIR;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Check if artifacts exist from a previous run
|
|
185
|
+
*/
|
|
186
|
+
export function artifactsExist(outputDir: string = OUTPUT_DIR): boolean {
|
|
187
|
+
return existsSync(join(outputDir, 'inferencemap.json'));
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Check if a run can be resumed with cached artifacts
|
|
192
|
+
*/
|
|
193
|
+
export function checkResumable(inputs: RunInputs, outputDir: string = OUTPUT_DIR): {
|
|
194
|
+
canResume: boolean;
|
|
195
|
+
runId: string;
|
|
196
|
+
runDir: string;
|
|
197
|
+
} {
|
|
198
|
+
const runId = generateRunId(inputs);
|
|
199
|
+
const runDir = getRunDir(outputDir, runId);
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
canResume: canResume(runDir, inputs),
|
|
203
|
+
runId,
|
|
204
|
+
runDir,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Load artifacts from a previous run
|
|
210
|
+
*/
|
|
211
|
+
export function loadArtifacts(runDir: string): ArtifactData {
|
|
212
|
+
return loadCachedArtifacts(runDir);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Get a new run ID for given inputs
|
|
217
|
+
*/
|
|
218
|
+
export { generateRunId } from './runid.js';
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* InferenceMAX Benchmark Integration
|
|
3
|
+
*
|
|
4
|
+
* Provides benchmark comparison for LLM inference performance.
|
|
5
|
+
* Data sourced from the InferenceMAX benchmark suite.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { readFileSync } from 'fs';
|
|
9
|
+
import { join, dirname } from 'path';
|
|
10
|
+
import { fileURLToPath } from 'url';
|
|
11
|
+
|
|
12
|
+
// Get the directory of the current module
|
|
13
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
14
|
+
const __dirname = dirname(__filename);
|
|
15
|
+
|
|
16
|
+
interface BenchmarkMetrics {
|
|
17
|
+
ttft_ms: number;
|
|
18
|
+
p50_latency_ms: number;
|
|
19
|
+
p95_latency_ms: number;
|
|
20
|
+
p99_latency_ms: number;
|
|
21
|
+
throughput_tps: number;
|
|
22
|
+
cost_per_1k_input: number;
|
|
23
|
+
cost_per_1k_output: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface BenchmarkEntry {
|
|
27
|
+
model: string;
|
|
28
|
+
provider: string;
|
|
29
|
+
framework: string;
|
|
30
|
+
hardware: string;
|
|
31
|
+
metrics: BenchmarkMetrics;
|
|
32
|
+
optimal_config?: Record<string, unknown>;
|
|
33
|
+
notes?: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface BenchmarkData {
|
|
37
|
+
version: string;
|
|
38
|
+
last_updated: string;
|
|
39
|
+
source: string;
|
|
40
|
+
benchmarks: Record<string, BenchmarkEntry>;
|
|
41
|
+
model_aliases: Record<string, string>;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export interface BenchmarkComparison {
|
|
45
|
+
pointId: string;
|
|
46
|
+
model: string;
|
|
47
|
+
framework: string;
|
|
48
|
+
hardware: string;
|
|
49
|
+
your_metrics: {
|
|
50
|
+
p95_latency_ms?: number;
|
|
51
|
+
ttft_ms?: number;
|
|
52
|
+
throughput_tps?: number;
|
|
53
|
+
};
|
|
54
|
+
benchmark_metrics: BenchmarkMetrics;
|
|
55
|
+
gaps: {
|
|
56
|
+
p95_latency?: { value: number; percent: number; description: string };
|
|
57
|
+
ttft?: { value: number; percent: number; description: string };
|
|
58
|
+
throughput?: { value: number; percent: number; description: string };
|
|
59
|
+
};
|
|
60
|
+
overall_gap: string;
|
|
61
|
+
optimal_config?: Record<string, unknown>;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
let benchmarkData: BenchmarkData | null = null;
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Load benchmark data from JSON file
|
|
68
|
+
*/
|
|
69
|
+
function loadBenchmarks(): BenchmarkData {
|
|
70
|
+
if (benchmarkData) return benchmarkData;
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
const dataPath = join(__dirname, '../../data/inferencemax.json');
|
|
74
|
+
const content = readFileSync(dataPath, 'utf-8');
|
|
75
|
+
benchmarkData = JSON.parse(content) as BenchmarkData;
|
|
76
|
+
return benchmarkData;
|
|
77
|
+
} catch (error) {
|
|
78
|
+
throw new Error(`Failed to load benchmark data: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Normalize model name for lookup
|
|
84
|
+
*/
|
|
85
|
+
function normalizeModel(model: string): string {
|
|
86
|
+
return model
|
|
87
|
+
.toLowerCase()
|
|
88
|
+
.replace(/[_\s]/g, '-')
|
|
89
|
+
.replace(/-+/g, '-')
|
|
90
|
+
.replace(/^-|-$/g, '');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Get benchmark for a specific model
|
|
95
|
+
*/
|
|
96
|
+
export function getBenchmark(
|
|
97
|
+
model: string,
|
|
98
|
+
framework = 'api',
|
|
99
|
+
hardware = 'api'
|
|
100
|
+
): BenchmarkEntry | null {
|
|
101
|
+
const data = loadBenchmarks();
|
|
102
|
+
const normalizedModel = normalizeModel(model);
|
|
103
|
+
|
|
104
|
+
// Try exact key match first
|
|
105
|
+
const exactKey = `${normalizedModel}:${framework}:${hardware}`;
|
|
106
|
+
if (data.benchmarks[exactKey]) {
|
|
107
|
+
return data.benchmarks[exactKey];
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Try alias lookup
|
|
111
|
+
const alias = data.model_aliases[normalizedModel];
|
|
112
|
+
if (alias && data.benchmarks[alias]) {
|
|
113
|
+
return data.benchmarks[alias];
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Try model name with default framework:hardware
|
|
117
|
+
const defaultKey = `${normalizedModel}:api:api`;
|
|
118
|
+
if (data.benchmarks[defaultKey]) {
|
|
119
|
+
return data.benchmarks[defaultKey];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Try partial model name matches
|
|
123
|
+
for (const key of Object.keys(data.benchmarks)) {
|
|
124
|
+
const benchModel = normalizeModel(data.benchmarks[key].model);
|
|
125
|
+
if (benchModel.includes(normalizedModel) || normalizedModel.includes(benchModel)) {
|
|
126
|
+
return data.benchmarks[key];
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Compare user metrics to benchmark
|
|
135
|
+
*/
|
|
136
|
+
export function compareToBenchmark(
|
|
137
|
+
pointId: string,
|
|
138
|
+
model: string,
|
|
139
|
+
userMetrics: {
|
|
140
|
+
p95_latency_ms?: number;
|
|
141
|
+
ttft_ms?: number;
|
|
142
|
+
throughput_tps?: number;
|
|
143
|
+
},
|
|
144
|
+
framework = 'api',
|
|
145
|
+
hardware = 'api'
|
|
146
|
+
): BenchmarkComparison | null {
|
|
147
|
+
const benchmark = getBenchmark(model, framework, hardware);
|
|
148
|
+
if (!benchmark) return null;
|
|
149
|
+
|
|
150
|
+
const gaps: BenchmarkComparison['gaps'] = {};
|
|
151
|
+
|
|
152
|
+
// P95 Latency gap
|
|
153
|
+
if (userMetrics.p95_latency_ms && benchmark.metrics.p95_latency_ms) {
|
|
154
|
+
const diff = userMetrics.p95_latency_ms - benchmark.metrics.p95_latency_ms;
|
|
155
|
+
const percent = Math.round((diff / benchmark.metrics.p95_latency_ms) * 100);
|
|
156
|
+
gaps.p95_latency = {
|
|
157
|
+
value: diff,
|
|
158
|
+
percent,
|
|
159
|
+
description: formatGapDescription(diff, percent, 'ms', 'slower', 'faster'),
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// TTFT gap
|
|
164
|
+
if (userMetrics.ttft_ms && benchmark.metrics.ttft_ms) {
|
|
165
|
+
const diff = userMetrics.ttft_ms - benchmark.metrics.ttft_ms;
|
|
166
|
+
const percent = Math.round((diff / benchmark.metrics.ttft_ms) * 100);
|
|
167
|
+
gaps.ttft = {
|
|
168
|
+
value: diff,
|
|
169
|
+
percent,
|
|
170
|
+
description: formatGapDescription(diff, percent, 'ms', 'slower', 'faster'),
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Throughput gap (inverse - higher is better)
|
|
175
|
+
if (userMetrics.throughput_tps && benchmark.metrics.throughput_tps) {
|
|
176
|
+
const diff = benchmark.metrics.throughput_tps - userMetrics.throughput_tps;
|
|
177
|
+
const percent = Math.round((diff / benchmark.metrics.throughput_tps) * 100);
|
|
178
|
+
gaps.throughput = {
|
|
179
|
+
value: -diff, // Negative means user is faster
|
|
180
|
+
percent: -percent,
|
|
181
|
+
description: formatGapDescription(diff, percent, 'tps', 'below', 'above'),
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Calculate overall gap description
|
|
186
|
+
const overallGap = calculateOverallGap(gaps);
|
|
187
|
+
|
|
188
|
+
return {
|
|
189
|
+
pointId,
|
|
190
|
+
model: benchmark.model,
|
|
191
|
+
framework: benchmark.framework,
|
|
192
|
+
hardware: benchmark.hardware,
|
|
193
|
+
your_metrics: userMetrics,
|
|
194
|
+
benchmark_metrics: benchmark.metrics,
|
|
195
|
+
gaps,
|
|
196
|
+
overall_gap: overallGap,
|
|
197
|
+
optimal_config: benchmark.optimal_config,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function formatGapDescription(
|
|
202
|
+
diff: number,
|
|
203
|
+
percent: number,
|
|
204
|
+
unit: string,
|
|
205
|
+
worseWord: string,
|
|
206
|
+
betterWord: string
|
|
207
|
+
): string {
|
|
208
|
+
if (diff === 0 || percent === 0) {
|
|
209
|
+
return 'On par with benchmark';
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const absPercent = Math.abs(percent);
|
|
213
|
+
const absDiff = Math.abs(diff);
|
|
214
|
+
const word = diff > 0 ? worseWord : betterWord;
|
|
215
|
+
|
|
216
|
+
if (absPercent > 100) {
|
|
217
|
+
const multiplier = (absPercent / 100 + 1).toFixed(1);
|
|
218
|
+
return `${multiplier}x ${word}`;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return `${absPercent}% ${word} (${diff > 0 ? '+' : ''}${absDiff}${unit})`;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function calculateOverallGap(gaps: BenchmarkComparison['gaps']): string {
|
|
225
|
+
const issues: string[] = [];
|
|
226
|
+
|
|
227
|
+
if (gaps.p95_latency && gaps.p95_latency.percent > 50) {
|
|
228
|
+
issues.push(`latency ${gaps.p95_latency.description}`);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (gaps.ttft && gaps.ttft.percent > 50) {
|
|
232
|
+
issues.push(`TTFT ${gaps.ttft.description}`);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (gaps.throughput && gaps.throughput.percent < -30) {
|
|
236
|
+
issues.push(`throughput ${gaps.throughput.description}`);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if (issues.length === 0) {
|
|
240
|
+
if (Object.keys(gaps).length === 0) {
|
|
241
|
+
return 'No metrics to compare';
|
|
242
|
+
}
|
|
243
|
+
return 'Performing within benchmark range';
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return issues.join(', ');
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Get all available benchmarks
|
|
251
|
+
*/
|
|
252
|
+
export function listBenchmarks(): BenchmarkEntry[] {
|
|
253
|
+
const data = loadBenchmarks();
|
|
254
|
+
return Object.values(data.benchmarks);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Get benchmark data version
|
|
259
|
+
*/
|
|
260
|
+
export function getBenchmarkVersion(): { version: string; lastUpdated: string } {
|
|
261
|
+
const data = loadBenchmarks();
|
|
262
|
+
return {
|
|
263
|
+
version: data.version,
|
|
264
|
+
lastUpdated: data.last_updated,
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Check if benchmark data is available for a model
|
|
270
|
+
*/
|
|
271
|
+
export function hasBenchmark(model: string): boolean {
|
|
272
|
+
return getBenchmark(model) !== null;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Format benchmark comparison for display
|
|
277
|
+
*/
|
|
278
|
+
export function formatBenchmarkComparison(comparison: BenchmarkComparison): string {
|
|
279
|
+
const lines: string[] = [];
|
|
280
|
+
|
|
281
|
+
lines.push(`Model: ${comparison.model}`);
|
|
282
|
+
lines.push(`Framework: ${comparison.framework} | Hardware: ${comparison.hardware}`);
|
|
283
|
+
lines.push('');
|
|
284
|
+
|
|
285
|
+
if (comparison.gaps.p95_latency) {
|
|
286
|
+
lines.push(`P95 Latency: Your ${comparison.your_metrics.p95_latency_ms}ms | Benchmark ${comparison.benchmark_metrics.p95_latency_ms}ms | ${comparison.gaps.p95_latency.description}`);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
if (comparison.gaps.ttft) {
|
|
290
|
+
lines.push(`TTFT: Your ${comparison.your_metrics.ttft_ms}ms | Benchmark ${comparison.benchmark_metrics.ttft_ms}ms | ${comparison.gaps.ttft.description}`);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if (comparison.gaps.throughput) {
|
|
294
|
+
lines.push(`Throughput: Your ${comparison.your_metrics.throughput_tps} tps | Benchmark ${comparison.benchmark_metrics.throughput_tps} tps | ${comparison.gaps.throughput.description}`);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
lines.push('');
|
|
298
|
+
lines.push(`Overall: ${comparison.overall_gap}`);
|
|
299
|
+
|
|
300
|
+
if (comparison.optimal_config) {
|
|
301
|
+
lines.push('');
|
|
302
|
+
lines.push('Optimal Config:');
|
|
303
|
+
for (const [key, value] of Object.entries(comparison.optimal_config)) {
|
|
304
|
+
lines.push(` ${key}: ${value}`);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return lines.join('\n');
|
|
309
|
+
}
|