npm - @peakinfer/cli - Versions diffs - 1.0.133 - Mend

@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (367) hide show

package/.claude/settings.local.json +8 -0
package/.env.example +6 -0
package/.github/workflows/peakinfer.yml +64 -0
package/CHANGELOG.md +31 -0
package/LICENSE +190 -0
package/README.md +335 -0
package/data/inferencemax.json +274 -0
package/dist/agent-analyzer.d.ts +45 -0
package/dist/agent-analyzer.d.ts.map +1 -0
package/dist/agent-analyzer.js +374 -0
package/dist/agent-analyzer.js.map +1 -0
package/dist/agent.d.ts +76 -0
package/dist/agent.d.ts.map +1 -0
package/dist/agent.js +965 -0
package/dist/agent.js.map +1 -0
package/dist/agents/correlation-analyzer.d.ts +34 -0
package/dist/agents/correlation-analyzer.d.ts.map +1 -0
package/dist/agents/correlation-analyzer.js +261 -0
package/dist/agents/correlation-analyzer.js.map +1 -0
package/dist/agents/index.d.ts +91 -0
package/dist/agents/index.d.ts.map +1 -0
package/dist/agents/index.js +111 -0
package/dist/agents/index.js.map +1 -0
package/dist/agents/runtime-analyzer.d.ts +38 -0
package/dist/agents/runtime-analyzer.d.ts.map +1 -0
package/dist/agents/runtime-analyzer.js +244 -0
package/dist/agents/runtime-analyzer.js.map +1 -0
package/dist/analysis-types.d.ts +500 -0
package/dist/analysis-types.d.ts.map +1 -0
package/dist/analysis-types.js +11 -0
package/dist/analysis-types.js.map +1 -0
package/dist/analytics.d.ts +25 -0
package/dist/analytics.d.ts.map +1 -0
package/dist/analytics.js +94 -0
package/dist/analytics.js.map +1 -0
package/dist/analyzer.d.ts +48 -0
package/dist/analyzer.d.ts.map +1 -0
package/dist/analyzer.js +547 -0
package/dist/analyzer.js.map +1 -0
package/dist/artifacts.d.ts +44 -0
package/dist/artifacts.d.ts.map +1 -0
package/dist/artifacts.js +165 -0
package/dist/artifacts.js.map +1 -0
package/dist/benchmarks/index.d.ts +88 -0
package/dist/benchmarks/index.d.ts.map +1 -0
package/dist/benchmarks/index.js +205 -0
package/dist/benchmarks/index.js.map +1 -0
package/dist/cli.d.ts +3 -0
package/dist/cli.d.ts.map +1 -0
package/dist/cli.js +427 -0
package/dist/cli.js.map +1 -0
package/dist/commands/ci.d.ts +19 -0
package/dist/commands/ci.d.ts.map +1 -0
package/dist/commands/ci.js +253 -0
package/dist/commands/ci.js.map +1 -0
package/dist/commands/config.d.ts +16 -0
package/dist/commands/config.d.ts.map +1 -0
package/dist/commands/config.js +249 -0
package/dist/commands/config.js.map +1 -0
package/dist/commands/demo.d.ts +15 -0
package/dist/commands/demo.d.ts.map +1 -0
package/dist/commands/demo.js +106 -0
package/dist/commands/demo.js.map +1 -0
package/dist/commands/export.d.ts +14 -0
package/dist/commands/export.d.ts.map +1 -0
package/dist/commands/export.js +209 -0
package/dist/commands/export.js.map +1 -0
package/dist/commands/history.d.ts +15 -0
package/dist/commands/history.d.ts.map +1 -0
package/dist/commands/history.js +389 -0
package/dist/commands/history.js.map +1 -0
package/dist/commands/template.d.ts +14 -0
package/dist/commands/template.d.ts.map +1 -0
package/dist/commands/template.js +341 -0
package/dist/commands/template.js.map +1 -0
package/dist/commands/validate-map.d.ts +12 -0
package/dist/commands/validate-map.d.ts.map +1 -0
package/dist/commands/validate-map.js +274 -0
package/dist/commands/validate-map.js.map +1 -0
package/dist/commands/whatif.d.ts +17 -0
package/dist/commands/whatif.d.ts.map +1 -0
package/dist/commands/whatif.js +206 -0
package/dist/commands/whatif.js.map +1 -0
package/dist/comparison.d.ts +38 -0
package/dist/comparison.d.ts.map +1 -0
package/dist/comparison.js +223 -0
package/dist/comparison.js.map +1 -0
package/dist/config.d.ts +42 -0
package/dist/config.d.ts.map +1 -0
package/dist/config.js +158 -0
package/dist/config.js.map +1 -0
package/dist/connectors/helicone.d.ts +9 -0
package/dist/connectors/helicone.d.ts.map +1 -0
package/dist/connectors/helicone.js +106 -0
package/dist/connectors/helicone.js.map +1 -0
package/dist/connectors/index.d.ts +37 -0
package/dist/connectors/index.d.ts.map +1 -0
package/dist/connectors/index.js +65 -0
package/dist/connectors/index.js.map +1 -0
package/dist/connectors/langsmith.d.ts +9 -0
package/dist/connectors/langsmith.d.ts.map +1 -0
package/dist/connectors/langsmith.js +122 -0
package/dist/connectors/langsmith.js.map +1 -0
package/dist/connectors/types.d.ts +83 -0
package/dist/connectors/types.d.ts.map +1 -0
package/dist/connectors/types.js +98 -0
package/dist/connectors/types.js.map +1 -0
package/dist/cost-estimator.d.ts +46 -0
package/dist/cost-estimator.d.ts.map +1 -0
package/dist/cost-estimator.js +104 -0
package/dist/cost-estimator.js.map +1 -0
package/dist/costs.d.ts +57 -0
package/dist/costs.d.ts.map +1 -0
package/dist/costs.js +251 -0
package/dist/costs.js.map +1 -0
package/dist/counterfactuals.d.ts +29 -0
package/dist/counterfactuals.d.ts.map +1 -0
package/dist/counterfactuals.js +448 -0
package/dist/counterfactuals.js.map +1 -0
package/dist/enhancement-prompts.d.ts +41 -0
package/dist/enhancement-prompts.d.ts.map +1 -0
package/dist/enhancement-prompts.js +88 -0
package/dist/enhancement-prompts.js.map +1 -0
package/dist/envelopes.d.ts +20 -0
package/dist/envelopes.d.ts.map +1 -0
package/dist/envelopes.js +790 -0
package/dist/envelopes.js.map +1 -0
package/dist/format-normalizer.d.ts +71 -0
package/dist/format-normalizer.d.ts.map +1 -0
package/dist/format-normalizer.js +1331 -0
package/dist/format-normalizer.js.map +1 -0
package/dist/history.d.ts +79 -0
package/dist/history.d.ts.map +1 -0
package/dist/history.js +313 -0
package/dist/history.js.map +1 -0
package/dist/html.d.ts +11 -0
package/dist/html.d.ts.map +1 -0
package/dist/html.js +463 -0
package/dist/html.js.map +1 -0
package/dist/impact.d.ts +42 -0
package/dist/impact.d.ts.map +1 -0
package/dist/impact.js +443 -0
package/dist/impact.js.map +1 -0
package/dist/index.d.ts +26 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +34 -0
package/dist/index.js.map +1 -0
package/dist/insights.d.ts +5 -0
package/dist/insights.d.ts.map +1 -0
package/dist/insights.js +271 -0
package/dist/insights.js.map +1 -0
package/dist/joiner.d.ts +9 -0
package/dist/joiner.d.ts.map +1 -0
package/dist/joiner.js +247 -0
package/dist/joiner.js.map +1 -0
package/dist/orchestrator.d.ts +34 -0
package/dist/orchestrator.d.ts.map +1 -0
package/dist/orchestrator.js +827 -0
package/dist/orchestrator.js.map +1 -0
package/dist/pdf.d.ts +26 -0
package/dist/pdf.d.ts.map +1 -0
package/dist/pdf.js +84 -0
package/dist/pdf.js.map +1 -0
package/dist/prediction.d.ts +33 -0
package/dist/prediction.d.ts.map +1 -0
package/dist/prediction.js +316 -0
package/dist/prediction.js.map +1 -0
package/dist/prompts/loader.d.ts +38 -0
package/dist/prompts/loader.d.ts.map +1 -0
package/dist/prompts/loader.js +60 -0
package/dist/prompts/loader.js.map +1 -0
package/dist/renderer.d.ts +64 -0
package/dist/renderer.d.ts.map +1 -0
package/dist/renderer.js +923 -0
package/dist/renderer.js.map +1 -0
package/dist/runid.d.ts +57 -0
package/dist/runid.d.ts.map +1 -0
package/dist/runid.js +199 -0
package/dist/runid.js.map +1 -0
package/dist/runtime.d.ts +29 -0
package/dist/runtime.d.ts.map +1 -0
package/dist/runtime.js +366 -0
package/dist/runtime.js.map +1 -0
package/dist/scanner.d.ts +11 -0
package/dist/scanner.d.ts.map +1 -0
package/dist/scanner.js +426 -0
package/dist/scanner.js.map +1 -0
package/dist/templates.d.ts +120 -0
package/dist/templates.d.ts.map +1 -0
package/dist/templates.js +429 -0
package/dist/templates.js.map +1 -0
package/dist/tools/index.d.ts +153 -0
package/dist/tools/index.d.ts.map +1 -0
package/dist/tools/index.js +177 -0
package/dist/tools/index.js.map +1 -0
package/dist/types.d.ts +3647 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +703 -0
package/dist/types.js.map +1 -0
package/dist/version.d.ts +7 -0
package/dist/version.d.ts.map +1 -0
package/dist/version.js +23 -0
package/dist/version.js.map +1 -0
package/docs/demo-guide.md +423 -0
package/docs/events-format.md +295 -0
package/docs/inferencemap-spec.md +344 -0
package/docs/migration-v2.md +293 -0
package/fixtures/demo/precomputed.json +142 -0
package/fixtures/demo-project/README.md +52 -0
package/fixtures/demo-project/ai-service.ts +65 -0
package/fixtures/demo-project/sample-events.jsonl +15 -0
package/fixtures/demo-project/src/ai-service.ts +128 -0
package/fixtures/demo-project/src/llm-client.ts +155 -0
package/package.json +65 -0
package/prompts/agent-analyzer.yaml +47 -0
package/prompts/ci-gate.yaml +98 -0
package/prompts/correlation-analyzer.yaml +178 -0
package/prompts/format-normalizer.yaml +46 -0
package/prompts/peak-performance.yaml +180 -0
package/prompts/pr-comment.yaml +111 -0
package/prompts/runtime-analyzer.yaml +189 -0
package/prompts/unified-analyzer.yaml +241 -0
package/schemas/inference-map.v0.1.json +215 -0
package/scripts/benchmark.ts +394 -0
package/scripts/demo-v1.5.sh +158 -0
package/scripts/sync-from-site.sh +197 -0
package/scripts/validate-sync.sh +178 -0
package/src/agent-analyzer.ts +481 -0
package/src/agent.ts +1232 -0
package/src/agents/correlation-analyzer.ts +353 -0
package/src/agents/index.ts +235 -0
package/src/agents/runtime-analyzer.ts +343 -0
package/src/analysis-types.ts +558 -0
package/src/analytics.ts +100 -0
package/src/analyzer.ts +692 -0
package/src/artifacts.ts +218 -0
package/src/benchmarks/index.ts +309 -0
package/src/cli.ts +503 -0
package/src/commands/ci.ts +336 -0
package/src/commands/config.ts +288 -0
package/src/commands/demo.ts +175 -0
package/src/commands/export.ts +297 -0
package/src/commands/history.ts +425 -0
package/src/commands/template.ts +385 -0
package/src/commands/validate-map.ts +324 -0
package/src/commands/whatif.ts +272 -0
package/src/comparison.ts +283 -0
package/src/config.ts +188 -0
package/src/connectors/helicone.ts +164 -0
package/src/connectors/index.ts +93 -0
package/src/connectors/langsmith.ts +179 -0
package/src/connectors/types.ts +180 -0
package/src/cost-estimator.ts +146 -0
package/src/costs.ts +347 -0
package/src/counterfactuals.ts +516 -0
package/src/enhancement-prompts.ts +118 -0
package/src/envelopes.ts +814 -0
package/src/format-normalizer.ts +1486 -0
package/src/history.ts +400 -0
package/src/html.ts +512 -0
package/src/impact.ts +522 -0
package/src/index.ts +83 -0
package/src/insights.ts +341 -0
package/src/joiner.ts +289 -0
package/src/orchestrator.ts +1015 -0
package/src/pdf.ts +110 -0
package/src/prediction.ts +392 -0
package/src/prompts/loader.ts +88 -0
package/src/renderer.ts +1045 -0
package/src/runid.ts +261 -0
package/src/runtime.ts +450 -0
package/src/scanner.ts +508 -0
package/src/templates.ts +561 -0
package/src/tools/index.ts +214 -0
package/src/types.ts +873 -0
package/src/version.ts +24 -0
package/templates/context-accumulation.yaml +23 -0
package/templates/cost-concentration.yaml +20 -0
package/templates/dead-code.yaml +20 -0
package/templates/latency-explainer.yaml +23 -0
package/templates/optimizations/ab-testing-framework.yaml +74 -0
package/templates/optimizations/api-gateway-optimization.yaml +81 -0
package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
package/templates/optimizations/comprehensive-apm.yaml +76 -0
package/templates/optimizations/context-window-optimization.yaml +91 -0
package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
package/templates/optimizations/distributed-training-optimization.yaml +77 -0
package/templates/optimizations/document-analysis-edge.yaml +77 -0
package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
package/templates/optimizations/domain-specific-distillation.yaml +78 -0
package/templates/optimizations/error-handling-optimization.yaml +76 -0
package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
package/templates/optimizations/long-context-memory-management.yaml +78 -0
package/templates/optimizations/max-tokens-optimization.yaml +76 -0
package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
package/templates/optimizations/multi-framework-resilience.yaml +75 -0
package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
package/templates/optimizations/quality-monitoring.yaml +74 -0
package/templates/optimizations/realtime-budget-controls.yaml +74 -0
package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
package/templates/optimizations/smart-model-routing.yaml +96 -0
package/templates/optimizations/streaming-batch-selection.yaml +167 -0
package/templates/optimizations/system-prompt-optimization.yaml +75 -0
package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
package/templates/overpowered-extraction.yaml +32 -0
package/templates/overpowered-model.yaml +31 -0
package/templates/prompt-bloat.yaml +24 -0
package/templates/retry-explosion.yaml +28 -0
package/templates/schema/insight.schema.json +113 -0
package/templates/schema/optimization.schema.json +180 -0
package/templates/streaming-drift.yaml +30 -0
package/templates/throughput-gap.yaml +21 -0
package/templates/token-underutilization.yaml +28 -0
package/templates/untested-fallback.yaml +21 -0
package/tests/accuracy/drift-detection.test.ts +184 -0
package/tests/accuracy/false-positives.test.ts +166 -0
package/tests/accuracy/templates.test.ts +205 -0
package/tests/action/commands.test.ts +125 -0
package/tests/action/comments.test.ts +347 -0
package/tests/cli.test.ts +203 -0
package/tests/comparison.test.ts +309 -0
package/tests/correlation-analyzer.test.ts +534 -0
package/tests/counterfactuals.test.ts +347 -0
package/tests/fixtures/events/missing-id.jsonl +1 -0
package/tests/fixtures/events/missing-input.jsonl +1 -0
package/tests/fixtures/events/missing-latency.jsonl +1 -0
package/tests/fixtures/events/missing-model.jsonl +1 -0
package/tests/fixtures/events/missing-output.jsonl +1 -0
package/tests/fixtures/events/missing-provider.jsonl +1 -0
package/tests/fixtures/events/missing-ts.jsonl +1 -0
package/tests/fixtures/events/valid.csv +3 -0
package/tests/fixtures/events/valid.json +1 -0
package/tests/fixtures/events/valid.jsonl +2 -0
package/tests/fixtures/events/with-callsite.jsonl +1 -0
package/tests/fixtures/events/with-intent.jsonl +1 -0
package/tests/fixtures/events/wrong-type.jsonl +1 -0
package/tests/fixtures/repos/empty/.gitkeep +0 -0
package/tests/fixtures/repos/hybrid-router/router.py +35 -0
package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
package/tests/fixtures/repos/saas-openai/client.py +26 -0
package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
package/tests/github-action.test.ts +292 -0
package/tests/insights.test.ts +878 -0
package/tests/joiner.test.ts +168 -0
package/tests/performance/action-latency.test.ts +132 -0
package/tests/performance/benchmark.test.ts +189 -0
package/tests/performance/cli-latency.test.ts +102 -0
package/tests/pr-comment.test.ts +313 -0
package/tests/prediction.test.ts +296 -0
package/tests/runtime-analyzer.test.ts +375 -0
package/tests/runtime.test.ts +205 -0
package/tests/scanner.test.ts +122 -0
package/tests/template-conformance.test.ts +526 -0
package/tests/unit/cost-calculator.test.ts +303 -0
package/tests/unit/credits.test.ts +180 -0
package/tests/unit/inference-map.test.ts +276 -0
package/tests/unit/schema.test.ts +300 -0
package/tsconfig.json +20 -0
package/vitest.config.ts +14 -0

package/prompts/unified-analyzer.yaml ADDED Viewed

@@ -0,0 +1,241 @@
+# =============================================================================
+# SYNC NOTE: This file is copied from peakinfer-site (SOURCE OF TRUTH)
+# Source: peakinfer-site/prompts/unified-analyzer.yaml
+#
+# DO NOT MODIFY THIS FILE DIRECTLY IN THE CLI REPO.
+# All changes must be made in peakinfer-site first, then synced here.
+# =============================================================================
+id: unified-analyzer
+name: Unified Static Analyzer
+version: "1.7"
+description: |
+  Unified multi-dimensional LLM inference analyzer.
+  Single-call approach: one LLM call per file analyzing all 4 dimensions
+  (cost, latency, throughput, reliability) with actionable fixes.
+system: |
+  You are an expert LLM inference performance analyzer. Analyze the provided code file and return a comprehensive performance analysis with actionable fixes.
+  Your task:
+  1. Find all LLM inference points including:
+     - OpenAI SDK (client.chat.completions.create, openai.ChatCompletion.create, client.completions.create)
+     - Anthropic SDK (client.messages.create, anthropic.messages)
+     - Google AI / Vertex AI (genai.GenerativeModel, generate_content)
+     - Azure OpenAI (AzureOpenAI)
+     - AWS Bedrock (invoke_model, bedrock.converse, BedrockRuntime)
+     - Mistral, Cohere, Groq, Together AI, Fireworks, Replicate, Perplexity
+     - Vercel AI SDK (@ai-sdk/openai, @ai-sdk/anthropic, streamText, generateText, streamObject)
+     - LangChain (ChatOpenAI, ChatAnthropic, LLMChain, ConversationChain)
+     - LlamaIndex (llama_index.llms)
+     - Self-hosted (vLLM, TGI, TensorRT-LLM, Triton, Ollama, llama.cpp, sglang)
+     - HTTP calls to /v1/chat/completions or similar LLM API endpoints
+     - Claude Agent SDK usage
+  2. For each inference point, analyze across ALL 4 dimensions: cost, latency, throughput, and reliability
+  3. For each issue found, provide the EXACT original code line and a suggested fix
+  Return JSON in this exact format:
+  {
+    "inference_points": [
+      {
+        "id": "unique_id",
+        "line": <line_number>,
+        "provider": "<provider_name>",
+        "model": "<model_name_or_null>",
+        "call_type": "direct|wrapper|framework|http",
+        "original_code": "<exact code line from source>",
+        "cost_profile": {
+          "tier": "premium|standard|budget|unknown",
+          "estimated_cost_per_call": <number>,
+          "optimizations": [{"type": "string", "description": "string", "savings_percent": <number>}]
+        },
+        "latency_profile": {
+          "estimated_p95_ms": <number>,
+          "is_blocking": <boolean>,
+          "has_streaming": <boolean>,
+          "optimizations": [{"type": "string", "description": "string", "improvement_percent": <number>}]
+        },
+        "throughput_profile": {
+          "has_rate_limiting": <boolean>,
+          "has_batching": <boolean>,
+          "bottlenecks": [{"type": "string", "description": "string"}],
+          "optimizations": [{"type": "string", "description": "string", "improvement": "string"}]
+        },
+        "reliability_profile": {
+          "has_error_handling": <boolean>,
+          "has_retry": <boolean>,
+          "has_timeout": <boolean>,
+          "has_fallback": <boolean>,
+          "anti_patterns": [{"type": "string", "description": "string"}],
+          "optimizations": [{"type": "string", "description": "string", "priority": "high|medium|low"}]
+        },
+        "issues": [
+          {
+            "type": "model_downgrade|add_streaming|add_error_handling|add_retry|add_timeout|add_fallback",
+            "severity": "critical|warning|info",
+            "headline": "<short description>",
+            "evidence": "<why this is a problem>",
+            "original_code": "<exact line(s) that need to change>",
+            "suggested_fix": "<replacement code with correct indentation>",
+            "ai_agent_prompt": "<detailed instructions for AI agents like Claude to fix this>"
+          }
+        ]
+      }
+    ],
+    "imports": {
+      "llm_providers": ["provider1", "provider2"],
+      "frameworks": ["framework1"]
+    },
+    "insights": [
+      {
+        "severity": "critical|warning|info",
+        "category": "cost|latency|throughput|reliability",
+        "headline": "string",
+        "evidence": "string",
+        "recommendation": "string"
+      }
+    ]
+  }
+  ============================================================
+  CRITICAL: CODE FIX REQUIREMENTS (READ CAREFULLY)
+  ============================================================
+  ABSOLUTELY FORBIDDEN in suggested_fix - these will break the tool:
+  - "// ... existing code ..."
+  - "// existing logic"
+  - "// handle error"
+  - "// retry logic here"
+  - "// add your code here"
+  - Any placeholder or ellipsis comment
+  REQUIRED in suggested_fix:
+  - The COMPLETE, COMPILABLE replacement code
+  - Copy ALL the original code lines and ADD the fix around them
+  - Real error class names (Anthropic.RateLimitError, not generic Error)
+  - Actual implementation, not descriptions
+  For original_code:
+  - Include the ENTIRE function body that needs to change
+  - This will be shown in GitHub's "Suggested change" UI
+  ============================================================
+  EXAMPLE: Adding error handling to a function
+  ============================================================
+  If the source code is:
+  ```
+  export async function chat(prompt: string): Promise<string> {
+    const response = await client.messages.create({
+      model: 'claude-sonnet-4-20250514',
+      max_tokens: 2000,
+      messages: [{ role: 'user', content: prompt }],
+    });
+    return response.content[0].type === 'text' ? response.content[0].text : '';
+  }
+  ```
+  Then the issue should be:
+  {
+    "type": "add_error_handling",
+    "severity": "critical",
+    "headline": "No error handling",
+    "evidence": "LLM calls can fail unexpectedly.",
+    "original_code": "export async function chat(prompt: string): Promise<string> {\n  const response = await client.messages.create({\n    model: 'claude-sonnet-4-20250514',\n    max_tokens: 2000,\n    messages: [{ role: 'user', content: prompt }],\n  });\n  return response.content[0].type === 'text' ? response.content[0].text : '';\n}",
+    "suggested_fix": "export async function chat(prompt: string): Promise<string> {\n  try {\n    const response = await client.messages.create({\n      model: 'claude-sonnet-4-20250514',\n      max_tokens: 2000,\n      messages: [{ role: 'user', content: prompt }],\n    });\n    return response.content[0].type === 'text' ? response.content[0].text : '';\n  } catch (error) {\n    if (error instanceof Anthropic.RateLimitError) {\n      await new Promise(r => setTimeout(r, 1000));\n      return chat(prompt);\n    }\n    throw error;\n  }\n}",
+    "ai_agent_prompt": "Wrap the Anthropic API call in try-catch with retry on rate limit."
+  }
+  Notice: suggested_fix contains the COMPLETE function with REAL code, not placeholders.
+  Be thorough but concise. Focus on actionable insights across all 4 performance dimensions.
+  IMPORTANT: You MUST detect ALL of these LLM inference patterns:
+  - OpenAI SDK: client.chat.completions.create, openai.ChatCompletion.create, client.completions.create, client.embeddings.create
+  - Anthropic SDK: client.messages.create, anthropic.messages, create_message
+  - Google AI: genai.GenerativeModel, generate_content, vertexai.generative_models
+  - Azure OpenAI: AzureOpenAI client calls
+  - AWS Bedrock: invoke_model, bedrock.converse, BedrockRuntime
+  - Mistral: MistralClient, mistral.chat
+  - Cohere: cohere.chat, cohere.generate, CohereClient
+  - Groq: groq.chat, Groq()
+  - Together AI: together.chat, Together()
+  - Fireworks: fireworks.chat, Fireworks()
+  - Replicate: replicate.run, replicate.predictions.create
+  - Perplexity: perplexity.chat
+  - Vercel AI SDK: streamText(), generateText(), streamObject(), anthropic(), openai(), google() from @ai-sdk/*
+  - LangChain: ChatOpenAI, ChatAnthropic, ChatGoogleGenerativeAI, LLMChain, ConversationChain, .invoke(), .ainvoke()
+  - LlamaIndex: llama_index.llms, OpenAILike, query_engine
+  - Self-hosted: vLLM, TGI (InferenceClient), TensorRT-LLM, Triton, Ollama, llama.cpp, sglang
+  - HTTP calls: fetch/axios/requests to /v1/chat/completions, /v1/completions, or similar LLM endpoints
+  - Claude Agent SDK: query() function usage
+user_template: |
+  Analyze this {{language}} file for LLM inference points and their performance characteristics:
+  File: {{file_path}}
+  ```{{language}}
+  {{content}}
+  ```
+  Find ALL LLM inference calls in this file. Look for:
+  - OpenAI (client.chat.completions.create, etc.)
+  - Anthropic (client.messages.create, etc.)
+  - Google AI, Azure OpenAI, AWS Bedrock
+  - Mistral, Cohere, Groq, Together, Fireworks, Replicate, Perplexity
+  - Vercel AI SDK (streamText, generateText, @ai-sdk/*)
+  - LangChain, LlamaIndex
+  - Self-hosted (vLLM, TGI, Ollama, llama.cpp)
+  - HTTP calls to LLM endpoints
+  Return the JSON analysis for each inference point found.
+input_schema:
+  file_path: string
+  content: string
+  language: string
+output_format:
+  inference_points:
+    - id: string
+      line: number
+      provider: string
+      model: string|null
+      call_type: "direct|wrapper|framework|http"
+      original_code: string
+      cost_profile: object
+      latency_profile: object
+      throughput_profile: object
+      reliability_profile: object
+      issues:
+        - type: string
+          severity: "critical|warning|info"
+          headline: string
+          evidence: string
+          original_code: string
+          suggested_fix: string|null
+          ai_agent_prompt: string
+  imports:
+    llm_providers: string[]
+    frameworks: string[]
+  insights:
+    - severity: string
+      category: string
+      headline: string
+      evidence: string
+      recommendation: string
+constraints:
+  - Return valid JSON only
+  - original_code must be EXACT match from source (include full function if fixing function-level issue)
+  - suggested_fix must be COMPLETE, WORKING code - NO placeholders like "// existing code" or "// handle error"
+  - suggested_fix must be one-click usable - user clicks "Commit suggestion" and code works immediately
+  - ai_agent_prompt must be actionable instruction with specific file path and line number
+  - All line numbers must match actual source
+  - For error handling fixes, include REAL error types (Anthropic.RateLimitError, etc.) not generic "handle error"
+  - For model downgrade fixes, show the ACTUAL model string replacement
+defaults:
+  max_inference_points_per_file: 20
+  min_confidence: 0.5

package/schemas/inference-map.v0.1.json ADDED Viewed

@@ -0,0 +1,215 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://peakinfer.dev/schemas/inference-map.v0.1.json",
+  "title": "InferenceMap",
+  "description": "PeakInfer InferenceMap v0.1 - Schema for LLM inference point analysis results",
+  "type": "object",
+  "required": ["version", "root", "generatedAt", "summary", "callsites"],
+  "properties": {
+    "version": {
+      "type": "string",
+      "description": "Schema version",
+      "const": "0.1"
+    },
+    "root": {
+      "type": "string",
+      "description": "Root directory path that was analyzed"
+    },
+    "generatedAt": {
+      "type": "string",
+      "format": "date-time",
+      "description": "ISO 8601 timestamp when the analysis was generated"
+    },
+    "metadata": {
+      "type": "object",
+      "description": "Optional metadata about the analysis run",
+      "properties": {
+        "absolutePath": {
+          "type": "string",
+          "description": "Full absolute path that was analyzed"
+        },
+        "promptId": {
+          "type": "string",
+          "description": "ID of the analysis prompt used"
+        },
+        "promptVersion": {
+          "type": "string",
+          "description": "Version of the analysis prompt"
+        },
+        "templatesVersion": {
+          "type": "string",
+          "description": "Version of peakinfer-templates used"
+        },
+        "llmProvider": {
+          "type": "string",
+          "description": "LLM provider used for analysis (e.g., 'anthropic')"
+        },
+        "llmModel": {
+          "type": "string",
+          "description": "LLM model used for analysis"
+        }
+      }
+    },
+    "summary": {
+      "type": "object",
+      "description": "Summary statistics for the analyzed codebase",
+      "required": ["totalCallsites", "providers", "models", "patterns"],
+      "properties": {
+        "totalCallsites": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Total number of inference points found"
+        },
+        "providers": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "List of unique LLM providers detected"
+        },
+        "models": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "List of unique models detected"
+        },
+        "patterns": {
+          "type": "object",
+          "description": "Count of detected patterns (streaming, batching, etc.)",
+          "additionalProperties": { "type": "integer" }
+        }
+      }
+    },
+    "callsites": {
+      "type": "array",
+      "description": "List of inference points (callsites) found in the codebase",
+      "items": {
+        "$ref": "#/definitions/Callsite"
+      }
+    }
+  },
+  "definitions": {
+    "Provider": {
+      "type": "string",
+      "enum": [
+        "openai", "anthropic", "google", "cohere", "mistral",
+        "bedrock", "azure_openai", "together", "fireworks",
+        "groq", "replicate", "perplexity",
+        "vllm", "sglang", "tgi", "ollama", "llamacpp",
+        "unknown"
+      ],
+      "description": "LLM provider identifier"
+    },
+    "Patterns": {
+      "type": "object",
+      "description": "Detected code patterns at an inference point",
+      "properties": {
+        "streaming": {
+          "type": "boolean",
+          "description": "Whether streaming is enabled"
+        },
+        "batching": {
+          "type": "boolean",
+          "description": "Whether batching is used"
+        },
+        "retries": {
+          "type": "boolean",
+          "description": "Whether retry logic is present"
+        },
+        "caching": {
+          "type": "boolean",
+          "description": "Whether caching is implemented"
+        },
+        "fallback": {
+          "type": "boolean",
+          "description": "Whether fallback logic exists"
+        }
+      }
+    },
+    "Callsite": {
+      "type": "object",
+      "description": "An LLM inference point in the codebase",
+      "required": ["id", "file", "line", "patterns", "confidence"],
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "Unique identifier for this inference point"
+        },
+        "file": {
+          "type": "string",
+          "description": "Relative file path from root"
+        },
+        "line": {
+          "type": "integer",
+          "minimum": 1,
+          "description": "Line number in the file"
+        },
+        "provider": {
+          "oneOf": [
+            { "$ref": "#/definitions/Provider" },
+            { "type": "null" }
+          ],
+          "description": "Detected LLM provider"
+        },
+        "model": {
+          "type": ["string", "null"],
+          "description": "Detected model name"
+        },
+        "framework": {
+          "type": ["string", "null"],
+          "description": "Framework used (e.g., 'langchain', 'llamaindex')"
+        },
+        "runtime": {
+          "type": ["string", "null"],
+          "description": "Runtime environment (e.g., 'vllm', 'tgi')"
+        },
+        "patterns": {
+          "$ref": "#/definitions/Patterns"
+        },
+        "confidence": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 1,
+          "description": "Confidence score of the detection (0-1)"
+        }
+      }
+    }
+  },
+  "examples": [
+    {
+      "version": "0.1",
+      "root": "./src",
+      "generatedAt": "2025-12-24T12:00:00.000Z",
+      "metadata": {
+        "absolutePath": "/Users/dev/myproject/src",
+        "promptId": "unified-analyzer",
+        "promptVersion": "1.6",
+        "llmProvider": "anthropic",
+        "llmModel": "claude-sonnet-4-20250514"
+      },
+      "summary": {
+        "totalCallsites": 3,
+        "providers": ["anthropic", "openai"],
+        "models": ["claude-sonnet-4-20250514", "gpt-4o"],
+        "patterns": {
+          "streaming": 2,
+          "retries": 1,
+          "caching": 0
+        }
+      },
+      "callsites": [
+        {
+          "id": "cs-001",
+          "file": "services/chat.ts",
+          "line": 42,
+          "provider": "anthropic",
+          "model": "claude-sonnet-4-20250514",
+          "framework": null,
+          "runtime": null,
+          "patterns": {
+            "streaming": true,
+            "retries": true
+          },
+          "confidence": 0.95
+        }
+      ]
+    }
+  ]
+}