@vigolium/piolium 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +117 -0
- package/agents/access-auditor.md +300 -0
- package/agents/assumption-breaker.md +154 -0
- package/agents/attack-designer.md +116 -0
- package/agents/code-scanner.md +139 -0
- package/agents/concurrency-auditor.md +238 -0
- package/agents/confirm-writer.md +257 -0
- package/agents/context-reviewer.md +274 -0
- package/agents/cross-verifier.md +165 -0
- package/agents/cve-scout.md +381 -0
- package/agents/env-builder.md +282 -0
- package/agents/env-profiler.md +205 -0
- package/agents/evidence-collector.md +140 -0
- package/agents/finding-grader.md +142 -0
- package/agents/finding-writer.md +148 -0
- package/agents/flow-tracer.md +106 -0
- package/agents/goal-backtracer.md +146 -0
- package/agents/history-miner.md +467 -0
- package/agents/independent-verifier.md +118 -0
- package/agents/intent-mapper.md +183 -0
- package/agents/longshot-collector.md +128 -0
- package/agents/longshot-prober.md +126 -0
- package/agents/patch-auditor.md +73 -0
- package/agents/poc-author.md +124 -0
- package/agents/poc-runner.md +194 -0
- package/agents/probe-lead.md +269 -0
- package/agents/red-challenger.md +101 -0
- package/agents/report-composer.md +208 -0
- package/agents/review-adjudicator.md +216 -0
- package/agents/spec-auditor.md +155 -0
- package/agents/taint-tracer.md +265 -0
- package/agents/test-locator.md +209 -0
- package/agents/threat-modeler.md +132 -0
- package/agents/variant-scanner.md +108 -0
- package/agents/variant-spotter.md +110 -0
- package/bin/piolium.mjs +376 -0
- package/extensions/piolium/_vendor/yaml.bundle.d.mts +6 -0
- package/extensions/piolium/_vendor/yaml.bundle.mjs +139 -0
- package/extensions/piolium/agent-runner.ts +322 -0
- package/extensions/piolium/agents.ts +266 -0
- package/extensions/piolium/audit-state.ts +522 -0
- package/extensions/piolium/bundled-resources.ts +97 -0
- package/extensions/piolium/candidate-scan.ts +966 -0
- package/extensions/piolium/command-target.ts +177 -0
- package/extensions/piolium/console-stream.ts +57 -0
- package/extensions/piolium/export-results.ts +380 -0
- package/extensions/piolium/findings.ts +448 -0
- package/extensions/piolium/heartbeat.ts +182 -0
- package/extensions/piolium/help.ts +234 -0
- package/extensions/piolium/index.ts +1865 -0
- package/extensions/piolium/longshot.ts +530 -0
- package/extensions/piolium/matcher-suggestions.ts +196 -0
- package/extensions/piolium/matcher-utils.ts +83 -0
- package/extensions/piolium/modes/balanced.ts +750 -0
- package/extensions/piolium/modes/confirm-bootstrap.ts +186 -0
- package/extensions/piolium/modes/confirm.ts +697 -0
- package/extensions/piolium/modes/deep.ts +917 -0
- package/extensions/piolium/modes/diff.ts +177 -0
- package/extensions/piolium/modes/lite.ts +540 -0
- package/extensions/piolium/modes/longshot.ts +595 -0
- package/extensions/piolium/modes/merge.ts +204 -0
- package/extensions/piolium/modes/phase-runner.ts +267 -0
- package/extensions/piolium/modes/reinvest.ts +546 -0
- package/extensions/piolium/modes/revisit.ts +279 -0
- package/extensions/piolium/modes.ts +48 -0
- package/extensions/piolium/phase-labels.ts +123 -0
- package/extensions/piolium/phase-status-strip.ts +92 -0
- package/extensions/piolium/prompt-prefix-editor.ts +39 -0
- package/extensions/piolium/providers/anthropic-vertex.ts +836 -0
- package/extensions/piolium/recon.ts +409 -0
- package/extensions/piolium/result-stats.ts +105 -0
- package/extensions/piolium/retry.ts +120 -0
- package/extensions/piolium/scheduler.ts +212 -0
- package/extensions/piolium/secrets.ts +368 -0
- package/extensions/piolium/tools/web-tools.ts +148 -0
- package/package.json +77 -0
- package/skills/agentic-actions-auditor/SKILL.md +327 -0
- package/skills/agentic-actions-auditor/references/action-profiles.md +186 -0
- package/skills/agentic-actions-auditor/references/cross-file-resolution.md +209 -0
- package/skills/agentic-actions-auditor/references/foundations.md +94 -0
- package/skills/agentic-actions-auditor/references/vector-a-env-var-intermediary.md +77 -0
- package/skills/agentic-actions-auditor/references/vector-b-direct-expression-injection.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-c-cli-data-fetch.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-d-pr-target-checkout.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-e-error-log-injection.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-f-subshell-expansion.md +82 -0
- package/skills/agentic-actions-auditor/references/vector-g-eval-of-ai-output.md +91 -0
- package/skills/agentic-actions-auditor/references/vector-h-dangerous-sandbox-configs.md +102 -0
- package/skills/agentic-actions-auditor/references/vector-i-wildcard-allowlists.md +88 -0
- package/skills/audit/SKILL.md +562 -0
- package/skills/audit/assets/icon.svg +7 -0
- package/skills/audit/hooks/scripts/validate_phase_output.py +550 -0
- package/skills/audit/references/adversarial-review.md +148 -0
- package/skills/audit/references/architecture-aware-sast.md +306 -0
- package/skills/audit/references/audit-workflow.md +737 -0
- package/skills/audit/references/chamber-protocol.md +384 -0
- package/skills/audit/references/creative-attack-modes.md +221 -0
- package/skills/audit/references/deep-analysis.md +273 -0
- package/skills/audit/references/domain-attack-playbooks.md +1129 -0
- package/skills/audit/references/knowledge-base-template.md +513 -0
- package/skills/audit/references/real-env-validation.md +191 -0
- package/skills/audit/references/report-templates.md +417 -0
- package/skills/audit/references/triage-and-prereqs.md +134 -0
- package/skills/audit/scripts/consolidate_drafts.py +554 -0
- package/skills/audit/scripts/partition_findings.py +152 -0
- package/skills/audit/scripts/rg-hotspots.sh +121 -0
- package/skills/audit/scripts/stamp_file_state.py +349 -0
- package/skills/code-reviewer/SKILL.md +65 -0
- package/skills/codeql/SKILL.md +281 -0
- package/skills/codeql/references/build-fixes.md +90 -0
- package/skills/codeql/references/diagnostic-query-templates.md +339 -0
- package/skills/codeql/references/extension-yaml-format.md +209 -0
- package/skills/codeql/references/important-only-suite.md +153 -0
- package/skills/codeql/references/language-details.md +207 -0
- package/skills/codeql/references/macos-arm64e-workaround.md +179 -0
- package/skills/codeql/references/performance-tuning.md +111 -0
- package/skills/codeql/references/quality-assessment.md +172 -0
- package/skills/codeql/references/ruleset-catalog.md +63 -0
- package/skills/codeql/references/run-all-suite.md +92 -0
- package/skills/codeql/references/sarif-processing.md +79 -0
- package/skills/codeql/references/threat-models.md +51 -0
- package/skills/codeql/workflows/build-database.md +280 -0
- package/skills/codeql/workflows/create-data-extensions.md +261 -0
- package/skills/codeql/workflows/run-analysis.md +301 -0
- package/skills/differential-review/SKILL.md +220 -0
- package/skills/differential-review/adversarial.md +203 -0
- package/skills/differential-review/methodology.md +234 -0
- package/skills/differential-review/patterns.md +300 -0
- package/skills/differential-review/reporting.md +369 -0
- package/skills/fp-check/SKILL.md +125 -0
- package/skills/fp-check/references/bug-class-verification.md +114 -0
- package/skills/fp-check/references/deep-verification.md +143 -0
- package/skills/fp-check/references/evidence-templates.md +91 -0
- package/skills/fp-check/references/false-positive-patterns.md +115 -0
- package/skills/fp-check/references/gate-reviews.md +27 -0
- package/skills/fp-check/references/standard-verification.md +78 -0
- package/skills/insecure-defaults/SKILL.md +117 -0
- package/skills/insecure-defaults/references/examples.md +409 -0
- package/skills/last30days/SKILL.md +444 -0
- package/skills/sarif-parsing/SKILL.md +483 -0
- package/skills/sarif-parsing/resources/jq-queries.md +162 -0
- package/skills/sarif-parsing/resources/sarif_helpers.py +331 -0
- package/skills/security-threat-model/LICENSE.txt +201 -0
- package/skills/security-threat-model/SKILL.md +81 -0
- package/skills/security-threat-model/agents/openai.yaml +4 -0
- package/skills/security-threat-model/references/prompt-template.md +255 -0
- package/skills/security-threat-model/references/security-controls-and-assets.md +32 -0
- package/skills/semgrep/SKILL.md +212 -0
- package/skills/semgrep/references/rulesets.md +162 -0
- package/skills/semgrep/references/scan-modes.md +110 -0
- package/skills/semgrep/references/scanner-task-prompt.md +140 -0
- package/skills/semgrep/scripts/merge_sarif.py +203 -0
- package/skills/semgrep/workflows/scan-workflow.md +311 -0
- package/skills/semgrep-rule-creator/SKILL.md +168 -0
- package/skills/semgrep-rule-creator/references/quick-reference.md +202 -0
- package/skills/semgrep-rule-creator/references/workflow.md +240 -0
- package/skills/semgrep-rule-variant-creator/SKILL.md +205 -0
- package/skills/semgrep-rule-variant-creator/references/applicability-analysis.md +250 -0
- package/skills/semgrep-rule-variant-creator/references/language-syntax-guide.md +324 -0
- package/skills/semgrep-rule-variant-creator/references/workflow.md +518 -0
- package/skills/sharp-edges/SKILL.md +292 -0
- package/skills/sharp-edges/references/auth-patterns.md +252 -0
- package/skills/sharp-edges/references/case-studies.md +274 -0
- package/skills/sharp-edges/references/config-patterns.md +333 -0
- package/skills/sharp-edges/references/crypto-apis.md +190 -0
- package/skills/sharp-edges/references/lang-c.md +205 -0
- package/skills/sharp-edges/references/lang-csharp.md +285 -0
- package/skills/sharp-edges/references/lang-go.md +270 -0
- package/skills/sharp-edges/references/lang-java.md +263 -0
- package/skills/sharp-edges/references/lang-javascript.md +269 -0
- package/skills/sharp-edges/references/lang-kotlin.md +265 -0
- package/skills/sharp-edges/references/lang-php.md +245 -0
- package/skills/sharp-edges/references/lang-python.md +274 -0
- package/skills/sharp-edges/references/lang-ruby.md +273 -0
- package/skills/sharp-edges/references/lang-rust.md +272 -0
- package/skills/sharp-edges/references/lang-swift.md +287 -0
- package/skills/sharp-edges/references/language-specific.md +588 -0
- package/skills/spec-to-code-compliance/SKILL.md +357 -0
- package/skills/spec-to-code-compliance/resources/COMPLETENESS_CHECKLIST.md +69 -0
- package/skills/spec-to-code-compliance/resources/IR_EXAMPLES.md +417 -0
- package/skills/spec-to-code-compliance/resources/OUTPUT_REQUIREMENTS.md +105 -0
- package/skills/supply-chain-risk-auditor/SKILL.md +67 -0
- package/skills/supply-chain-risk-auditor/resources/results-template.md +41 -0
- package/skills/variant-analysis/METHODOLOGY.md +327 -0
- package/skills/variant-analysis/SKILL.md +142 -0
- package/skills/variant-analysis/resources/codeql/cpp.ql +119 -0
- package/skills/variant-analysis/resources/codeql/go.ql +69 -0
- package/skills/variant-analysis/resources/codeql/java.ql +71 -0
- package/skills/variant-analysis/resources/codeql/javascript.ql +63 -0
- package/skills/variant-analysis/resources/codeql/python.ql +80 -0
- package/skills/variant-analysis/resources/semgrep/cpp.yaml +98 -0
- package/skills/variant-analysis/resources/semgrep/go.yaml +63 -0
- package/skills/variant-analysis/resources/semgrep/java.yaml +61 -0
- package/skills/variant-analysis/resources/semgrep/javascript.yaml +60 -0
- package/skills/variant-analysis/resources/semgrep/python.yaml +72 -0
- package/skills/variant-analysis/resources/variant-report-template.md +75 -0
- package/skills/vuln-report/SKILL.md +137 -0
- package/skills/vuln-report/agents/openai.yaml +4 -0
- package/skills/vuln-report/references/report-template.md +135 -0
- package/skills/wooyun-legacy/SKILL.md +367 -0
- package/skills/wooyun-legacy/references/bank-penetration.md +222 -0
- package/skills/wooyun-legacy/references/checklists/command-execution-checklist.md +119 -0
- package/skills/wooyun-legacy/references/checklists/csrf-checklist.md +74 -0
- package/skills/wooyun-legacy/references/checklists/file-upload-checklist.md +108 -0
- package/skills/wooyun-legacy/references/checklists/info-disclosure-checklist.md +114 -0
- package/skills/wooyun-legacy/references/checklists/logic-flaws-checklist.md +95 -0
- package/skills/wooyun-legacy/references/checklists/misconfig-checklist.md +124 -0
- package/skills/wooyun-legacy/references/checklists/path-traversal-checklist.md +87 -0
- package/skills/wooyun-legacy/references/checklists/rce-checklist.md +93 -0
- package/skills/wooyun-legacy/references/checklists/sql-injection-checklist.md +97 -0
- package/skills/wooyun-legacy/references/checklists/ssrf-checklist.md +99 -0
- package/skills/wooyun-legacy/references/checklists/unauthorized-access-checklist.md +89 -0
- package/skills/wooyun-legacy/references/checklists/weak-password-checklist.md +115 -0
- package/skills/wooyun-legacy/references/checklists/xss-checklist.md +103 -0
- package/skills/wooyun-legacy/references/checklists/xxe-checklist.md +130 -0
- package/skills/wooyun-legacy/references/info-disclosure.md +975 -0
- package/skills/wooyun-legacy/references/logic-flaws.md +721 -0
- package/skills/wooyun-legacy/references/path-traversal.md +1191 -0
- package/skills/wooyun-legacy/references/telecom-penetration.md +156 -0
- package/skills/wooyun-legacy/references/unauthorized-access.md +980 -0
- package/skills/wooyun-legacy/references/xss.md +746 -0
- package/skills/zeroize-audit/SKILL.md +371 -0
- package/skills/zeroize-audit/configs/c.yaml +21 -0
- package/skills/zeroize-audit/configs/default.yaml +128 -0
- package/skills/zeroize-audit/configs/rust.yaml +83 -0
- package/skills/zeroize-audit/prompts/report_template.md +238 -0
- package/skills/zeroize-audit/prompts/system.md +163 -0
- package/skills/zeroize-audit/prompts/task.md +97 -0
- package/skills/zeroize-audit/references/compile-commands.md +231 -0
- package/skills/zeroize-audit/references/detection-strategy.md +191 -0
- package/skills/zeroize-audit/references/ir-analysis.md +252 -0
- package/skills/zeroize-audit/references/mcp-analysis.md +221 -0
- package/skills/zeroize-audit/references/poc-generation.md +470 -0
- package/skills/zeroize-audit/references/rust-zeroization-patterns.md +867 -0
- package/skills/zeroize-audit/schemas/input.json +83 -0
- package/skills/zeroize-audit/schemas/output.json +140 -0
- package/skills/zeroize-audit/tools/analyze_asm.sh +202 -0
- package/skills/zeroize-audit/tools/analyze_cfg.py +381 -0
- package/skills/zeroize-audit/tools/analyze_heap.sh +211 -0
- package/skills/zeroize-audit/tools/analyze_ir_semantic.py +429 -0
- package/skills/zeroize-audit/tools/diff_ir.sh +135 -0
- package/skills/zeroize-audit/tools/diff_rust_mir.sh +189 -0
- package/skills/zeroize-audit/tools/emit_asm.sh +67 -0
- package/skills/zeroize-audit/tools/emit_ir.sh +77 -0
- package/skills/zeroize-audit/tools/emit_rust_asm.sh +178 -0
- package/skills/zeroize-audit/tools/emit_rust_ir.sh +150 -0
- package/skills/zeroize-audit/tools/emit_rust_mir.sh +158 -0
- package/skills/zeroize-audit/tools/extract_compile_flags.py +284 -0
- package/skills/zeroize-audit/tools/generate_poc.py +1329 -0
- package/skills/zeroize-audit/tools/mcp/apply_confidence_gates.py +113 -0
- package/skills/zeroize-audit/tools/mcp/check_mcp.sh +68 -0
- package/skills/zeroize-audit/tools/mcp/normalize_mcp_evidence.py +125 -0
- package/skills/zeroize-audit/tools/scripts/check_llvm_patterns.py +481 -0
- package/skills/zeroize-audit/tools/scripts/check_mir_patterns.py +554 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm.py +424 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_aarch64.py +300 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_x86.py +283 -0
- package/skills/zeroize-audit/tools/scripts/find_dangerous_apis.py +375 -0
- package/skills/zeroize-audit/tools/scripts/semantic_audit.py +923 -0
- package/skills/zeroize-audit/tools/track_dataflow.sh +196 -0
- package/skills/zeroize-audit/tools/validate_rust_toolchain.sh +298 -0
- package/skills/zeroize-audit/workflows/phase-0-preflight.md +150 -0
- package/skills/zeroize-audit/workflows/phase-1-source-analysis.md +144 -0
- package/skills/zeroize-audit/workflows/phase-2-compiler-analysis.md +139 -0
- package/skills/zeroize-audit/workflows/phase-3-interim-report.md +46 -0
- package/skills/zeroize-audit/workflows/phase-4-poc-generation.md +46 -0
- package/skills/zeroize-audit/workflows/phase-5-poc-validation.md +136 -0
- package/skills/zeroize-audit/workflows/phase-6-final-report.md +44 -0
- package/skills/zeroize-audit/workflows/phase-7-test-generation.md +42 -0
- package/themes/piolium-srcery.json +94 -0
|
@@ -0,0 +1,836 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `anthropic-vertex` provider — Anthropic Claude served by Google Vertex AI.
|
|
3
|
+
*
|
|
4
|
+
* Pi-ai already ships a built-in `google-vertex` provider for Gemini models
|
|
5
|
+
* (api: "google-vertex", driven by `@google/genai`). It does not cover Claude
|
|
6
|
+
* on Vertex, so we register a separate provider here and let users pick:
|
|
7
|
+
*
|
|
8
|
+
* pi --provider anthropic-vertex --model claude-opus-4-6@default
|
|
9
|
+
* pi --provider google-vertex --model gemini-2.5-flash
|
|
10
|
+
*
|
|
11
|
+
* Credentials come from Google ADC (`gcloud auth application-default login`)
|
|
12
|
+
* or `GOOGLE_APPLICATION_CREDENTIALS`. Project/region resolve in this order:
|
|
13
|
+
*
|
|
14
|
+
* project: options.project → GOOGLE_CLOUD_PROJECT → GCLOUD_PROJECT
|
|
15
|
+
* → ANTHROPIC_VERTEX_PROJECT_ID → `gcloud config get-value project`
|
|
16
|
+
* region: options.region → GOOGLE_CLOUD_LOCATION → CLOUD_ML_REGION
|
|
17
|
+
* → "us-east5"
|
|
18
|
+
*
|
|
19
|
+
* Ported from https://github.com/basnijholt/pi-anthropic-vertex (MIT) so
|
|
20
|
+
* piolium installs ship Vertex/Claude support without a second extension.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { execSync } from "node:child_process";
|
|
24
|
+
import type {
|
|
25
|
+
Tool as AnthropicTool,
|
|
26
|
+
MessageCreateParamsStreaming,
|
|
27
|
+
MessageParam,
|
|
28
|
+
RawMessageStreamEvent,
|
|
29
|
+
} from "@anthropic-ai/sdk/resources/messages.js";
|
|
30
|
+
import { AnthropicVertex } from "@anthropic-ai/vertex-sdk";
|
|
31
|
+
import {
|
|
32
|
+
type Api,
|
|
33
|
+
type AssistantMessage,
|
|
34
|
+
type CacheRetention,
|
|
35
|
+
type Context,
|
|
36
|
+
type ImageContent,
|
|
37
|
+
type Message,
|
|
38
|
+
type Model,
|
|
39
|
+
type SimpleStreamOptions,
|
|
40
|
+
type StopReason,
|
|
41
|
+
type StreamFunction,
|
|
42
|
+
type StreamOptions,
|
|
43
|
+
type TextContent,
|
|
44
|
+
type ThinkingBudgets,
|
|
45
|
+
type ThinkingContent,
|
|
46
|
+
type ThinkingLevel,
|
|
47
|
+
type Tool,
|
|
48
|
+
type ToolCall,
|
|
49
|
+
type ToolResultMessage,
|
|
50
|
+
calculateCost,
|
|
51
|
+
createAssistantMessageEventStream,
|
|
52
|
+
} from "@earendil-works/pi-ai";
|
|
53
|
+
import type { ExtensionAPI, ProviderModelConfig } from "@earendil-works/pi-coding-agent";
|
|
54
|
+
|
|
55
|
+
const PROVIDER_NAME = "anthropic-vertex";
|
|
56
|
+
const API_NAME = "anthropic-vertex";
|
|
57
|
+
const DEFAULT_REGION = "us-east5";
|
|
58
|
+
const BASE_URL = "https://{region}-aiplatform.googleapis.com";
|
|
59
|
+
|
|
60
|
+
const MODELS: ProviderModelConfig[] = [
|
|
61
|
+
{
|
|
62
|
+
id: "claude-sonnet-4-5@20250929",
|
|
63
|
+
name: "Claude Sonnet 4.5 (Vertex AI)",
|
|
64
|
+
reasoning: true,
|
|
65
|
+
input: ["text", "image"],
|
|
66
|
+
cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
|
|
67
|
+
contextWindow: 200000,
|
|
68
|
+
maxTokens: 64000,
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
id: "claude-opus-4-5@20251101",
|
|
72
|
+
name: "Claude Opus 4.5 (Vertex AI)",
|
|
73
|
+
reasoning: true,
|
|
74
|
+
input: ["text", "image"],
|
|
75
|
+
cost: { input: 15, output: 75, cacheRead: 0.5, cacheWrite: 6.25 },
|
|
76
|
+
contextWindow: 200000,
|
|
77
|
+
maxTokens: 32000,
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
id: "claude-opus-4-6@default",
|
|
81
|
+
name: "Claude Opus 4.6 (Vertex AI)",
|
|
82
|
+
reasoning: true,
|
|
83
|
+
input: ["text", "image"],
|
|
84
|
+
cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
|
|
85
|
+
contextWindow: 200000,
|
|
86
|
+
maxTokens: 128000,
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
id: "claude-haiku-4-5@20251001",
|
|
90
|
+
name: "Claude Haiku 4.5 (Vertex AI)",
|
|
91
|
+
reasoning: false,
|
|
92
|
+
input: ["text", "image"],
|
|
93
|
+
cost: { input: 1, output: 5, cacheRead: 0.1, cacheWrite: 1.25 },
|
|
94
|
+
contextWindow: 200000,
|
|
95
|
+
maxTokens: 8192,
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
id: "claude-sonnet-4-20250514",
|
|
99
|
+
name: "Claude Sonnet 4 (Vertex AI)",
|
|
100
|
+
reasoning: true,
|
|
101
|
+
input: ["text", "image"],
|
|
102
|
+
cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
|
|
103
|
+
contextWindow: 200000,
|
|
104
|
+
maxTokens: 64000,
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
id: "claude-3-5-sonnet-v2@20241022",
|
|
108
|
+
name: "Claude 3.5 Sonnet v2 (Vertex AI)",
|
|
109
|
+
reasoning: false,
|
|
110
|
+
input: ["text", "image"],
|
|
111
|
+
cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
|
|
112
|
+
contextWindow: 200000,
|
|
113
|
+
maxTokens: 8192,
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
id: "claude-3-5-haiku@20241022",
|
|
117
|
+
name: "Claude 3.5 Haiku (Vertex AI)",
|
|
118
|
+
reasoning: false,
|
|
119
|
+
input: ["text", "image"],
|
|
120
|
+
cost: { input: 1, output: 5, cacheRead: 0.1, cacheWrite: 1.25 },
|
|
121
|
+
contextWindow: 200000,
|
|
122
|
+
maxTokens: 8192,
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
id: "claude-3-opus@20240229",
|
|
126
|
+
name: "Claude 3 Opus (Vertex AI)",
|
|
127
|
+
reasoning: false,
|
|
128
|
+
input: ["text", "image"],
|
|
129
|
+
cost: { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 },
|
|
130
|
+
contextWindow: 200000,
|
|
131
|
+
maxTokens: 4096,
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
id: "claude-3-haiku@20240307",
|
|
135
|
+
name: "Claude 3 Haiku (Vertex AI)",
|
|
136
|
+
reasoning: false,
|
|
137
|
+
input: ["text", "image"],
|
|
138
|
+
cost: { input: 0.25, output: 1.25, cacheRead: 0.025, cacheWrite: 0.3125 },
|
|
139
|
+
contextWindow: 200000,
|
|
140
|
+
maxTokens: 4096,
|
|
141
|
+
},
|
|
142
|
+
];
|
|
143
|
+
|
|
144
|
+
type AnthropicVertexEffort = "low" | "medium" | "high" | "max";
|
|
145
|
+
|
|
146
|
+
interface AnthropicVertexOptions extends StreamOptions {
|
|
147
|
+
thinkingEnabled?: boolean;
|
|
148
|
+
thinkingBudgetTokens?: number;
|
|
149
|
+
effort?: AnthropicVertexEffort;
|
|
150
|
+
interleavedThinking?: boolean;
|
|
151
|
+
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
|
152
|
+
project?: string;
|
|
153
|
+
region?: string;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
type ToolCallStreamingBlock = ToolCall & {
|
|
157
|
+
partialJson: string;
|
|
158
|
+
index: number;
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
type AnthropicStreamingBlock =
|
|
162
|
+
| (TextContent & { index: number })
|
|
163
|
+
| (ThinkingContent & { index: number })
|
|
164
|
+
| ToolCallStreamingBlock;
|
|
165
|
+
|
|
166
|
+
// MessageParam.content is `string | Array<ContentBlockParam>`; we only push to
|
|
167
|
+
// the array shape, so derive that variant once.
|
|
168
|
+
type MessageContentBlocks = Exclude<MessageParam["content"], string>;
|
|
169
|
+
type MessageContentBlock = MessageContentBlocks[number];
|
|
170
|
+
|
|
171
|
+
function sanitizeSurrogates(text: string): string {
|
|
172
|
+
return text.replace(/[\uD800-\uDFFF]/g, "�");
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function mergeHeaders(
|
|
176
|
+
...sources: Array<Record<string, string> | undefined>
|
|
177
|
+
): Record<string, string> {
|
|
178
|
+
const merged: Record<string, string> = {};
|
|
179
|
+
for (const source of sources) {
|
|
180
|
+
if (source) Object.assign(merged, source);
|
|
181
|
+
}
|
|
182
|
+
return merged;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function supportsAdaptiveThinking(modelId: string): boolean {
|
|
186
|
+
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6");
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function mapThinkingLevelToEffort(level: SimpleStreamOptions["reasoning"]): AnthropicVertexEffort {
|
|
190
|
+
switch (level) {
|
|
191
|
+
case "minimal":
|
|
192
|
+
case "low":
|
|
193
|
+
return "low";
|
|
194
|
+
case "medium":
|
|
195
|
+
return "medium";
|
|
196
|
+
case "high":
|
|
197
|
+
return "high";
|
|
198
|
+
case "xhigh":
|
|
199
|
+
return "max";
|
|
200
|
+
default:
|
|
201
|
+
return "high";
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
function mapStopReason(reason: string): StopReason {
|
|
206
|
+
switch (reason) {
|
|
207
|
+
case "end_turn":
|
|
208
|
+
case "pause_turn":
|
|
209
|
+
case "stop_sequence":
|
|
210
|
+
return "stop";
|
|
211
|
+
case "max_tokens":
|
|
212
|
+
return "length";
|
|
213
|
+
case "tool_use":
|
|
214
|
+
return "toolUse";
|
|
215
|
+
case "refusal":
|
|
216
|
+
case "sensitive":
|
|
217
|
+
return "error";
|
|
218
|
+
default:
|
|
219
|
+
return "error";
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {
|
|
224
|
+
if (cacheRetention) return cacheRetention;
|
|
225
|
+
if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") return "long";
|
|
226
|
+
return "short";
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function getCacheControl(
|
|
230
|
+
baseUrl: string,
|
|
231
|
+
cacheRetention?: CacheRetention,
|
|
232
|
+
): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } {
|
|
233
|
+
const retention = resolveCacheRetention(cacheRetention);
|
|
234
|
+
if (retention === "none") return { retention };
|
|
235
|
+
const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
|
|
236
|
+
return {
|
|
237
|
+
retention,
|
|
238
|
+
cacheControl: { type: "ephemeral", ...(ttl ? { ttl } : {}) },
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function convertContentBlocks(content: Array<TextContent | ImageContent>) {
|
|
243
|
+
const hasImages = content.some((c) => c.type === "image");
|
|
244
|
+
if (!hasImages) {
|
|
245
|
+
return sanitizeSurrogates(content.map((c) => (c.type === "text" ? c.text : "")).join("\n"));
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const blocks = content.map((block) => {
|
|
249
|
+
if (block.type === "text") {
|
|
250
|
+
return {
|
|
251
|
+
type: "text" as const,
|
|
252
|
+
text: sanitizeSurrogates(block.text),
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
return {
|
|
256
|
+
type: "image" as const,
|
|
257
|
+
source: {
|
|
258
|
+
type: "base64" as const,
|
|
259
|
+
media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
|
260
|
+
data: block.data,
|
|
261
|
+
},
|
|
262
|
+
};
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
if (!blocks.some((b) => b.type === "text")) {
|
|
266
|
+
blocks.unshift({ type: "text", text: "(see attached image)" });
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
return blocks;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function convertMessages(
|
|
273
|
+
messages: Message[],
|
|
274
|
+
model: Model<Api>,
|
|
275
|
+
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
|
276
|
+
): MessageParam[] {
|
|
277
|
+
const params: MessageParam[] = [];
|
|
278
|
+
|
|
279
|
+
for (let i = 0; i < messages.length; i++) {
|
|
280
|
+
const msg = messages[i];
|
|
281
|
+
if (!msg) continue;
|
|
282
|
+
|
|
283
|
+
if (msg.role === "user") {
|
|
284
|
+
if (typeof msg.content === "string") {
|
|
285
|
+
if (msg.content.trim().length > 0) {
|
|
286
|
+
params.push({ role: "user", content: sanitizeSurrogates(msg.content) });
|
|
287
|
+
}
|
|
288
|
+
} else {
|
|
289
|
+
const blocks = msg.content
|
|
290
|
+
.map((item) => {
|
|
291
|
+
if (item.type === "text") {
|
|
292
|
+
return { type: "text" as const, text: sanitizeSurrogates(item.text) };
|
|
293
|
+
}
|
|
294
|
+
return {
|
|
295
|
+
type: "image" as const,
|
|
296
|
+
source: {
|
|
297
|
+
type: "base64" as const,
|
|
298
|
+
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
|
299
|
+
data: item.data,
|
|
300
|
+
},
|
|
301
|
+
};
|
|
302
|
+
})
|
|
303
|
+
.filter((block) => {
|
|
304
|
+
if (block.type === "text") return block.text.trim().length > 0;
|
|
305
|
+
return model.input.includes("image");
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
if (blocks.length > 0) {
|
|
309
|
+
params.push({ role: "user", content: blocks });
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
} else if (msg.role === "assistant") {
|
|
313
|
+
const blocks: MessageContentBlocks = [];
|
|
314
|
+
for (const block of msg.content) {
|
|
315
|
+
if (block.type === "text") {
|
|
316
|
+
if (block.text.trim().length === 0) continue;
|
|
317
|
+
blocks.push({ type: "text", text: sanitizeSurrogates(block.text) });
|
|
318
|
+
} else if (block.type === "thinking") {
|
|
319
|
+
if (block.thinking.trim().length === 0) continue;
|
|
320
|
+
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
|
|
321
|
+
blocks.push({ type: "text", text: sanitizeSurrogates(block.thinking) });
|
|
322
|
+
} else {
|
|
323
|
+
blocks.push({
|
|
324
|
+
type: "thinking",
|
|
325
|
+
thinking: sanitizeSurrogates(block.thinking),
|
|
326
|
+
signature: block.thinkingSignature,
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
} else if (block.type === "toolCall") {
|
|
330
|
+
blocks.push({
|
|
331
|
+
type: "tool_use",
|
|
332
|
+
id: block.id,
|
|
333
|
+
name: block.name,
|
|
334
|
+
input: block.arguments ?? {},
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (blocks.length > 0) {
|
|
340
|
+
params.push({ role: "assistant", content: blocks });
|
|
341
|
+
}
|
|
342
|
+
} else if (msg.role === "toolResult") {
|
|
343
|
+
const toolResults: Array<{
|
|
344
|
+
type: "tool_result";
|
|
345
|
+
tool_use_id: string;
|
|
346
|
+
content: ReturnType<typeof convertContentBlocks>;
|
|
347
|
+
is_error: boolean;
|
|
348
|
+
}> = [];
|
|
349
|
+
|
|
350
|
+
toolResults.push({
|
|
351
|
+
type: "tool_result",
|
|
352
|
+
tool_use_id: msg.toolCallId,
|
|
353
|
+
content: convertContentBlocks(msg.content),
|
|
354
|
+
is_error: msg.isError,
|
|
355
|
+
});
|
|
356
|
+
|
|
357
|
+
let j = i + 1;
|
|
358
|
+
while (j < messages.length && messages[j]?.role === "toolResult") {
|
|
359
|
+
const next = messages[j] as ToolResultMessage;
|
|
360
|
+
toolResults.push({
|
|
361
|
+
type: "tool_result",
|
|
362
|
+
tool_use_id: next.toolCallId,
|
|
363
|
+
content: convertContentBlocks(next.content),
|
|
364
|
+
is_error: next.isError,
|
|
365
|
+
});
|
|
366
|
+
j++;
|
|
367
|
+
}
|
|
368
|
+
i = j - 1;
|
|
369
|
+
params.push({ role: "user", content: toolResults });
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
if (cacheControl && params.length > 0) {
|
|
374
|
+
const last = params[params.length - 1];
|
|
375
|
+
if (last && last.role === "user") {
|
|
376
|
+
if (Array.isArray(last.content)) {
|
|
377
|
+
const block = last.content[last.content.length - 1] as MessageContentBlock | undefined;
|
|
378
|
+
if (
|
|
379
|
+
block &&
|
|
380
|
+
(block.type === "text" || block.type === "image" || block.type === "tool_result")
|
|
381
|
+
) {
|
|
382
|
+
(block as { cache_control?: { type: "ephemeral"; ttl?: "1h" } }).cache_control = cacheControl;
|
|
383
|
+
}
|
|
384
|
+
} else {
|
|
385
|
+
last.content = [{ type: "text", text: last.content, cache_control: cacheControl }];
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
return params;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
function convertTools(tools: Tool[] | undefined): AnthropicTool[] {
|
|
394
|
+
if (!tools) return [];
|
|
395
|
+
return tools.map((tool) => {
|
|
396
|
+
const schema = tool.parameters as { properties?: Record<string, unknown>; required?: string[] };
|
|
397
|
+
return {
|
|
398
|
+
name: tool.name,
|
|
399
|
+
description: tool.description,
|
|
400
|
+
input_schema: {
|
|
401
|
+
type: "object",
|
|
402
|
+
properties: schema.properties ?? {},
|
|
403
|
+
required: schema.required ?? [],
|
|
404
|
+
},
|
|
405
|
+
};
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
function parseStreamingJson(partial: string): Record<string, unknown> {
|
|
410
|
+
if (partial.trim().length === 0) return {};
|
|
411
|
+
try {
|
|
412
|
+
return JSON.parse(partial) as Record<string, unknown>;
|
|
413
|
+
} catch {
|
|
414
|
+
return {};
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
let cachedGcloudProject: string | undefined;
|
|
419
|
+
let gcloudProjectResolved = false;
|
|
420
|
+
|
|
421
|
+
function readProjectFromGcloud(): string | undefined {
|
|
422
|
+
if (gcloudProjectResolved) return cachedGcloudProject;
|
|
423
|
+
gcloudProjectResolved = true;
|
|
424
|
+
try {
|
|
425
|
+
const value = execSync("gcloud config get-value project", {
|
|
426
|
+
encoding: "utf-8",
|
|
427
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
428
|
+
timeout: 3000,
|
|
429
|
+
}).trim();
|
|
430
|
+
cachedGcloudProject = value.length > 0 ? value : undefined;
|
|
431
|
+
} catch {
|
|
432
|
+
cachedGcloudProject = undefined;
|
|
433
|
+
}
|
|
434
|
+
return cachedGcloudProject;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
function resolveProject(options?: AnthropicVertexOptions): string | undefined {
|
|
438
|
+
return (
|
|
439
|
+
options?.project ??
|
|
440
|
+
process.env.GOOGLE_CLOUD_PROJECT ??
|
|
441
|
+
process.env.GCLOUD_PROJECT ??
|
|
442
|
+
process.env.ANTHROPIC_VERTEX_PROJECT_ID ??
|
|
443
|
+
readProjectFromGcloud()
|
|
444
|
+
);
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
function resolveRegion(options?: AnthropicVertexOptions): string {
|
|
448
|
+
return (
|
|
449
|
+
options?.region ??
|
|
450
|
+
process.env.GOOGLE_CLOUD_LOCATION ??
|
|
451
|
+
process.env.CLOUD_ML_REGION ??
|
|
452
|
+
DEFAULT_REGION
|
|
453
|
+
);
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
function createClient(model: Model<Api>, options?: AnthropicVertexOptions): AnthropicVertex {
|
|
457
|
+
const betaFeatures = ["fine-grained-tool-streaming-2025-05-14"];
|
|
458
|
+
if (options?.interleavedThinking ?? true) {
|
|
459
|
+
betaFeatures.push("interleaved-thinking-2025-05-14");
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const project = resolveProject(options);
|
|
463
|
+
if (!project) {
|
|
464
|
+
throw new Error(
|
|
465
|
+
"Anthropic Vertex requires a project ID. Set ANTHROPIC_VERTEX_PROJECT_ID or GOOGLE_CLOUD_PROJECT/GCLOUD_PROJECT.",
|
|
466
|
+
);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
return new AnthropicVertex({
|
|
470
|
+
projectId: project,
|
|
471
|
+
region: resolveRegion(options),
|
|
472
|
+
defaultHeaders: mergeHeaders(
|
|
473
|
+
{
|
|
474
|
+
accept: "application/json",
|
|
475
|
+
"anthropic-beta": betaFeatures.join(","),
|
|
476
|
+
},
|
|
477
|
+
model.headers,
|
|
478
|
+
options?.headers,
|
|
479
|
+
),
|
|
480
|
+
});
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
function buildParams(
|
|
484
|
+
model: Model<Api>,
|
|
485
|
+
context: Context,
|
|
486
|
+
options?: AnthropicVertexOptions,
|
|
487
|
+
): MessageCreateParamsStreaming {
|
|
488
|
+
const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);
|
|
489
|
+
const params: MessageCreateParamsStreaming = {
|
|
490
|
+
model: model.id,
|
|
491
|
+
messages: convertMessages(context.messages, model, cacheControl),
|
|
492
|
+
max_tokens: options?.maxTokens ?? (model.maxTokens / 3) | 0,
|
|
493
|
+
stream: true,
|
|
494
|
+
};
|
|
495
|
+
|
|
496
|
+
if (context.systemPrompt) {
|
|
497
|
+
params.system = [
|
|
498
|
+
{
|
|
499
|
+
type: "text",
|
|
500
|
+
text: sanitizeSurrogates(context.systemPrompt),
|
|
501
|
+
...(cacheControl ? { cache_control: cacheControl } : {}),
|
|
502
|
+
},
|
|
503
|
+
];
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
if (options?.temperature !== undefined) {
|
|
507
|
+
params.temperature = options.temperature;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
if (context.tools) {
|
|
511
|
+
params.tools = convertTools(context.tools);
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
if (options?.thinkingEnabled && model.reasoning) {
|
|
515
|
+
if (supportsAdaptiveThinking(model.id)) {
|
|
516
|
+
params.thinking = { type: "adaptive" };
|
|
517
|
+
if (options.effort) {
|
|
518
|
+
params.output_config = { effort: options.effort };
|
|
519
|
+
}
|
|
520
|
+
} else {
|
|
521
|
+
params.thinking = {
|
|
522
|
+
type: "enabled",
|
|
523
|
+
budget_tokens: options.thinkingBudgetTokens ?? 1024,
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
if (options?.metadata && typeof options.metadata.user_id === "string") {
|
|
529
|
+
params.metadata = { user_id: options.metadata.user_id };
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
if (options?.toolChoice) {
|
|
533
|
+
params.tool_choice =
|
|
534
|
+
typeof options.toolChoice === "string" ? { type: options.toolChoice } : options.toolChoice;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
return params;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
const streamAnthropicVertex: StreamFunction<Api, AnthropicVertexOptions> = (
|
|
541
|
+
model,
|
|
542
|
+
context,
|
|
543
|
+
options,
|
|
544
|
+
) => {
|
|
545
|
+
const stream = createAssistantMessageEventStream();
|
|
546
|
+
|
|
547
|
+
(async () => {
|
|
548
|
+
const output: AssistantMessage = {
|
|
549
|
+
role: "assistant",
|
|
550
|
+
content: [],
|
|
551
|
+
api: model.api,
|
|
552
|
+
provider: model.provider,
|
|
553
|
+
model: model.id,
|
|
554
|
+
usage: {
|
|
555
|
+
input: 0,
|
|
556
|
+
output: 0,
|
|
557
|
+
cacheRead: 0,
|
|
558
|
+
cacheWrite: 0,
|
|
559
|
+
totalTokens: 0,
|
|
560
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
561
|
+
},
|
|
562
|
+
stopReason: "stop",
|
|
563
|
+
timestamp: Date.now(),
|
|
564
|
+
};
|
|
565
|
+
|
|
566
|
+
try {
|
|
567
|
+
const client = createClient(model, options);
|
|
568
|
+
const params = buildParams(model, context, options);
|
|
569
|
+
await options?.onPayload?.(params, model);
|
|
570
|
+
const events = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
|
|
571
|
+
|
|
572
|
+
stream.push({ type: "start", partial: output });
|
|
573
|
+
const blocks = output.content as AnthropicStreamingBlock[];
|
|
574
|
+
|
|
575
|
+
for await (const event of events as AsyncIterable<RawMessageStreamEvent>) {
|
|
576
|
+
if (event.type === "message_start") {
|
|
577
|
+
output.usage.input = event.message.usage.input_tokens || 0;
|
|
578
|
+
output.usage.output = event.message.usage.output_tokens || 0;
|
|
579
|
+
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
|
|
580
|
+
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
|
|
581
|
+
output.usage.totalTokens =
|
|
582
|
+
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
|
583
|
+
calculateCost(model, output.usage);
|
|
584
|
+
} else if (event.type === "content_block_start") {
|
|
585
|
+
if (event.content_block.type === "text") {
|
|
586
|
+
blocks.push({ type: "text", text: "", index: event.index });
|
|
587
|
+
stream.push({ type: "text_start", contentIndex: blocks.length - 1, partial: output });
|
|
588
|
+
} else if (event.content_block.type === "thinking") {
|
|
589
|
+
blocks.push({
|
|
590
|
+
type: "thinking",
|
|
591
|
+
thinking: "",
|
|
592
|
+
thinkingSignature: "",
|
|
593
|
+
index: event.index,
|
|
594
|
+
});
|
|
595
|
+
stream.push({
|
|
596
|
+
type: "thinking_start",
|
|
597
|
+
contentIndex: blocks.length - 1,
|
|
598
|
+
partial: output,
|
|
599
|
+
});
|
|
600
|
+
} else if (event.content_block.type === "tool_use") {
|
|
601
|
+
blocks.push({
|
|
602
|
+
type: "toolCall",
|
|
603
|
+
id: event.content_block.id,
|
|
604
|
+
name: event.content_block.name,
|
|
605
|
+
arguments: (event.content_block.input as Record<string, unknown>) ?? {},
|
|
606
|
+
partialJson: "",
|
|
607
|
+
index: event.index,
|
|
608
|
+
});
|
|
609
|
+
stream.push({
|
|
610
|
+
type: "toolcall_start",
|
|
611
|
+
contentIndex: blocks.length - 1,
|
|
612
|
+
partial: output,
|
|
613
|
+
});
|
|
614
|
+
}
|
|
615
|
+
} else if (event.type === "content_block_delta") {
|
|
616
|
+
const index = blocks.findIndex((b) => b.index === event.index);
|
|
617
|
+
const block = blocks[index];
|
|
618
|
+
if (!block) continue;
|
|
619
|
+
|
|
620
|
+
if (event.delta.type === "text_delta" && block.type === "text") {
|
|
621
|
+
block.text += event.delta.text;
|
|
622
|
+
stream.push({
|
|
623
|
+
type: "text_delta",
|
|
624
|
+
contentIndex: index,
|
|
625
|
+
delta: event.delta.text,
|
|
626
|
+
partial: output,
|
|
627
|
+
});
|
|
628
|
+
} else if (event.delta.type === "thinking_delta" && block.type === "thinking") {
|
|
629
|
+
block.thinking += event.delta.thinking;
|
|
630
|
+
stream.push({
|
|
631
|
+
type: "thinking_delta",
|
|
632
|
+
contentIndex: index,
|
|
633
|
+
delta: event.delta.thinking,
|
|
634
|
+
partial: output,
|
|
635
|
+
});
|
|
636
|
+
} else if (event.delta.type === "input_json_delta" && block.type === "toolCall") {
|
|
637
|
+
block.partialJson += event.delta.partial_json;
|
|
638
|
+
const parsed = parseStreamingJson(block.partialJson);
|
|
639
|
+
if (Object.keys(parsed).length > 0) {
|
|
640
|
+
block.arguments = parsed;
|
|
641
|
+
}
|
|
642
|
+
stream.push({
|
|
643
|
+
type: "toolcall_delta",
|
|
644
|
+
contentIndex: index,
|
|
645
|
+
delta: event.delta.partial_json,
|
|
646
|
+
partial: output,
|
|
647
|
+
});
|
|
648
|
+
} else if (event.delta.type === "signature_delta" && block.type === "thinking") {
|
|
649
|
+
block.thinkingSignature = (block.thinkingSignature ?? "") + event.delta.signature;
|
|
650
|
+
}
|
|
651
|
+
} else if (event.type === "content_block_stop") {
|
|
652
|
+
const index = blocks.findIndex((b) => b.index === event.index);
|
|
653
|
+
const block = blocks[index];
|
|
654
|
+
if (!block) continue;
|
|
655
|
+
(block as { index?: number }).index = undefined;
|
|
656
|
+
if (block.type === "text") {
|
|
657
|
+
stream.push({
|
|
658
|
+
type: "text_end",
|
|
659
|
+
contentIndex: index,
|
|
660
|
+
content: block.text,
|
|
661
|
+
partial: output,
|
|
662
|
+
});
|
|
663
|
+
} else if (block.type === "thinking") {
|
|
664
|
+
stream.push({
|
|
665
|
+
type: "thinking_end",
|
|
666
|
+
contentIndex: index,
|
|
667
|
+
content: block.thinking,
|
|
668
|
+
partial: output,
|
|
669
|
+
});
|
|
670
|
+
} else if (block.type === "toolCall") {
|
|
671
|
+
const parsed = parseStreamingJson(block.partialJson);
|
|
672
|
+
if (Object.keys(parsed).length > 0) {
|
|
673
|
+
block.arguments = parsed;
|
|
674
|
+
}
|
|
675
|
+
(block as { partialJson?: string }).partialJson = undefined;
|
|
676
|
+
stream.push({
|
|
677
|
+
type: "toolcall_end",
|
|
678
|
+
contentIndex: index,
|
|
679
|
+
toolCall: block,
|
|
680
|
+
partial: output,
|
|
681
|
+
});
|
|
682
|
+
}
|
|
683
|
+
} else if (event.type === "message_delta") {
|
|
684
|
+
if (event.delta.stop_reason) {
|
|
685
|
+
output.stopReason = mapStopReason(event.delta.stop_reason);
|
|
686
|
+
}
|
|
687
|
+
if (event.usage.input_tokens != null) {
|
|
688
|
+
output.usage.input = event.usage.input_tokens;
|
|
689
|
+
}
|
|
690
|
+
if (event.usage.output_tokens != null) {
|
|
691
|
+
output.usage.output = event.usage.output_tokens;
|
|
692
|
+
}
|
|
693
|
+
if (event.usage.cache_read_input_tokens != null) {
|
|
694
|
+
output.usage.cacheRead = event.usage.cache_read_input_tokens;
|
|
695
|
+
}
|
|
696
|
+
if (event.usage.cache_creation_input_tokens != null) {
|
|
697
|
+
output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
|
|
698
|
+
}
|
|
699
|
+
output.usage.totalTokens =
|
|
700
|
+
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
|
701
|
+
calculateCost(model, output.usage);
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
if (options?.signal?.aborted) {
|
|
706
|
+
throw new Error("Request was aborted");
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
if (
|
|
710
|
+
output.stopReason !== "stop" &&
|
|
711
|
+
output.stopReason !== "length" &&
|
|
712
|
+
output.stopReason !== "toolUse"
|
|
713
|
+
) {
|
|
714
|
+
throw new Error("An unknown error occurred");
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
718
|
+
stream.end();
|
|
719
|
+
} catch (error) {
|
|
720
|
+
for (const block of output.content) {
|
|
721
|
+
(block as { index?: number }).index = undefined;
|
|
722
|
+
(block as { partialJson?: string }).partialJson = undefined;
|
|
723
|
+
}
|
|
724
|
+
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
725
|
+
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
|
726
|
+
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
727
|
+
stream.end();
|
|
728
|
+
}
|
|
729
|
+
})();
|
|
730
|
+
|
|
731
|
+
return stream;
|
|
732
|
+
};
|
|
733
|
+
|
|
734
|
+
function clampReasoning(
|
|
735
|
+
level: ThinkingLevel | undefined,
|
|
736
|
+
): Exclude<ThinkingLevel, "xhigh"> | undefined {
|
|
737
|
+
if (!level) return undefined;
|
|
738
|
+
return level === "xhigh" ? "high" : level;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
function adjustMaxTokensForThinking(
|
|
742
|
+
baseMaxTokens: number,
|
|
743
|
+
modelMaxTokens: number,
|
|
744
|
+
reasoningLevel: ThinkingLevel,
|
|
745
|
+
customBudgets?: ThinkingBudgets,
|
|
746
|
+
): { maxTokens: number; thinkingBudget: number } {
|
|
747
|
+
const defaultBudgets: Required<ThinkingBudgets> = {
|
|
748
|
+
minimal: 1024,
|
|
749
|
+
low: 2048,
|
|
750
|
+
medium: 8192,
|
|
751
|
+
high: 16384,
|
|
752
|
+
};
|
|
753
|
+
const budgets = { ...defaultBudgets, ...customBudgets };
|
|
754
|
+
|
|
755
|
+
const minOutputTokens = 1024;
|
|
756
|
+
const level = clampReasoning(reasoningLevel) ?? "high";
|
|
757
|
+
let thinkingBudget = budgets[level] ?? defaultBudgets.high;
|
|
758
|
+
const maxTokens = Math.min(baseMaxTokens + thinkingBudget, modelMaxTokens);
|
|
759
|
+
|
|
760
|
+
if (maxTokens <= thinkingBudget) {
|
|
761
|
+
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
return { maxTokens, thinkingBudget };
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
const streamSimpleAnthropicVertex: StreamFunction<Api, SimpleStreamOptions> = (
|
|
768
|
+
model,
|
|
769
|
+
context,
|
|
770
|
+
options,
|
|
771
|
+
) => {
|
|
772
|
+
const base: AnthropicVertexOptions = {
|
|
773
|
+
...(options?.temperature !== undefined ? { temperature: options.temperature } : {}),
|
|
774
|
+
maxTokens: options?.maxTokens ?? Math.min(model.maxTokens, 32000),
|
|
775
|
+
...(options?.signal ? { signal: options.signal } : {}),
|
|
776
|
+
...(options?.apiKey ? { apiKey: options.apiKey } : {}),
|
|
777
|
+
...(options?.cacheRetention ? { cacheRetention: options.cacheRetention } : {}),
|
|
778
|
+
...(options?.sessionId ? { sessionId: options.sessionId } : {}),
|
|
779
|
+
...(options?.headers ? { headers: options.headers } : {}),
|
|
780
|
+
...(options?.onPayload ? { onPayload: options.onPayload } : {}),
|
|
781
|
+
...(options?.maxRetryDelayMs !== undefined ? { maxRetryDelayMs: options.maxRetryDelayMs } : {}),
|
|
782
|
+
...(options?.metadata ? { metadata: options.metadata } : {}),
|
|
783
|
+
};
|
|
784
|
+
|
|
785
|
+
if (!options?.reasoning) {
|
|
786
|
+
return streamAnthropicVertex(model, context, {
|
|
787
|
+
...base,
|
|
788
|
+
thinkingEnabled: false,
|
|
789
|
+
});
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
if (supportsAdaptiveThinking(model.id)) {
|
|
793
|
+
const effort = mapThinkingLevelToEffort(options.reasoning);
|
|
794
|
+
return streamAnthropicVertex(model, context, {
|
|
795
|
+
...base,
|
|
796
|
+
thinkingEnabled: true,
|
|
797
|
+
effort,
|
|
798
|
+
});
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
const adjusted = adjustMaxTokensForThinking(
|
|
802
|
+
base.maxTokens ?? 0,
|
|
803
|
+
model.maxTokens,
|
|
804
|
+
options.reasoning,
|
|
805
|
+
options.thinkingBudgets,
|
|
806
|
+
);
|
|
807
|
+
|
|
808
|
+
return streamAnthropicVertex(model, context, {
|
|
809
|
+
...base,
|
|
810
|
+
maxTokens: adjusted.maxTokens,
|
|
811
|
+
thinkingEnabled: true,
|
|
812
|
+
thinkingBudgetTokens: adjusted.thinkingBudget,
|
|
813
|
+
});
|
|
814
|
+
};
|
|
815
|
+
|
|
816
|
+
/**
|
|
817
|
+
* Register the `anthropic-vertex` provider with pi-coding-agent.
|
|
818
|
+
*
|
|
819
|
+
* Adds the provider + Claude-on-Vertex models. Pi-ai's built-in
|
|
820
|
+
* `google-vertex` provider keeps serving Gemini models untouched.
|
|
821
|
+
*
|
|
822
|
+
* The `apiKey` field is required by the registry but isn't used as an
|
|
823
|
+
* HTTP header here — `AnthropicVertex` authenticates via Google ADC. We
|
|
824
|
+
* emit the resolved project id so `pi`'s auth-status check has something
|
|
825
|
+
* non-empty to display.
|
|
826
|
+
*/
|
|
827
|
+
export function registerAnthropicVertex(pi: ExtensionAPI): void {
|
|
828
|
+
pi.registerProvider(PROVIDER_NAME, {
|
|
829
|
+
baseUrl: BASE_URL,
|
|
830
|
+
api: API_NAME,
|
|
831
|
+
apiKey:
|
|
832
|
+
"!sh -lc 'printf %s \"${ANTHROPIC_VERTEX_PROJECT_ID:-${GOOGLE_CLOUD_PROJECT:-${GCLOUD_PROJECT:-$(gcloud config get-value project 2>/dev/null)}}}\"'",
|
|
833
|
+
models: MODELS,
|
|
834
|
+
streamSimple: streamSimpleAnthropicVertex,
|
|
835
|
+
});
|
|
836
|
+
}
|