@gajae-code/ai 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2644 -0
- package/README.md +1181 -0
- package/dist/types/api-registry.d.ts +30 -0
- package/dist/types/auth-broker/client.d.ts +66 -0
- package/dist/types/auth-broker/index.d.ts +5 -0
- package/dist/types/auth-broker/refresher.d.ts +25 -0
- package/dist/types/auth-broker/remote-store.d.ts +96 -0
- package/dist/types/auth-broker/server.d.ts +32 -0
- package/dist/types/auth-broker/types.d.ts +105 -0
- package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
- package/dist/types/auth-gateway/http.d.ts +39 -0
- package/dist/types/auth-gateway/index.d.ts +3 -0
- package/dist/types/auth-gateway/server.d.ts +17 -0
- package/dist/types/auth-gateway/types.d.ts +115 -0
- package/dist/types/auth-storage.d.ts +641 -0
- package/dist/types/cli.d.ts +2 -0
- package/dist/types/index.d.ts +49 -0
- package/dist/types/model-cache.d.ts +17 -0
- package/dist/types/model-manager.d.ts +62 -0
- package/dist/types/model-thinking.d.ts +71 -0
- package/dist/types/models.d.ts +12 -0
- package/dist/types/provider-details.d.ts +24 -0
- package/dist/types/provider-models/bundled-references.d.ts +4 -0
- package/dist/types/provider-models/descriptors.d.ts +48 -0
- package/dist/types/provider-models/google.d.ts +20 -0
- package/dist/types/provider-models/index.d.ts +5 -0
- package/dist/types/provider-models/ollama.d.ts +7 -0
- package/dist/types/provider-models/openai-compat.d.ts +237 -0
- package/dist/types/provider-models/special.d.ts +16 -0
- package/dist/types/providers/amazon-bedrock.d.ts +36 -0
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +450 -0
- package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
- package/dist/types/providers/anthropic.d.ts +188 -0
- package/dist/types/providers/aws-credentials.d.ts +43 -0
- package/dist/types/providers/aws-eventstream.d.ts +38 -0
- package/dist/types/providers/aws-sigv4.d.ts +55 -0
- package/dist/types/providers/azure-openai-responses.d.ts +15 -0
- package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
- package/dist/types/providers/cursor.d.ts +42 -0
- package/dist/types/providers/error-message.d.ts +27 -0
- package/dist/types/providers/github-copilot-headers.d.ts +40 -0
- package/dist/types/providers/gitlab-duo.d.ts +27 -0
- package/dist/types/providers/google-auth.d.ts +24 -0
- package/dist/types/providers/google-gemini-cli.d.ts +72 -0
- package/dist/types/providers/google-gemini-headers.d.ts +18 -0
- package/dist/types/providers/google-shared.d.ts +163 -0
- package/dist/types/providers/google-types.d.ts +138 -0
- package/dist/types/providers/google-vertex.d.ts +7 -0
- package/dist/types/providers/google.d.ts +4 -0
- package/dist/types/providers/grammar.d.ts +1 -0
- package/dist/types/providers/kimi.d.ts +27 -0
- package/dist/types/providers/mock.d.ts +175 -0
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +814 -0
- package/dist/types/providers/openai-chat-server.d.ts +16 -0
- package/dist/types/providers/openai-codex/constants.d.ts +26 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
- package/dist/types/providers/openai-codex-responses.d.ts +67 -0
- package/dist/types/providers/openai-completions-compat.d.ts +25 -0
- package/dist/types/providers/openai-completions.d.ts +33 -0
- package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
- package/dist/types/providers/openai-responses-server.d.ts +17 -0
- package/dist/types/providers/openai-responses-shared.d.ts +89 -0
- package/dist/types/providers/openai-responses.d.ts +32 -0
- package/dist/types/providers/pi-native-client.d.ts +13 -0
- package/dist/types/providers/pi-native-server.d.ts +68 -0
- package/dist/types/providers/register-builtins.d.ts +31 -0
- package/dist/types/providers/synthetic.d.ts +26 -0
- package/dist/types/providers/transform-messages.d.ts +12 -0
- package/dist/types/providers/vision-guard.d.ts +8 -0
- package/dist/types/rate-limit-utils.d.ts +19 -0
- package/dist/types/stream.d.ts +24 -0
- package/dist/types/types.d.ts +746 -0
- package/dist/types/usage/claude.d.ts +3 -0
- package/dist/types/usage/gemini.d.ts +2 -0
- package/dist/types/usage/github-copilot.d.ts +7 -0
- package/dist/types/usage/google-antigravity.d.ts +2 -0
- package/dist/types/usage/kimi.d.ts +2 -0
- package/dist/types/usage/minimax-code.d.ts +2 -0
- package/dist/types/usage/openai-codex.d.ts +3 -0
- package/dist/types/usage/shared.d.ts +1 -0
- package/dist/types/usage/zai.d.ts +2 -0
- package/dist/types/usage.d.ts +258 -0
- package/dist/types/utils/abort.d.ts +19 -0
- package/dist/types/utils/anthropic-auth.d.ts +31 -0
- package/dist/types/utils/discovery/antigravity.d.ts +61 -0
- package/dist/types/utils/discovery/codex.d.ts +38 -0
- package/dist/types/utils/discovery/cursor.d.ts +23 -0
- package/dist/types/utils/discovery/gemini.d.ts +25 -0
- package/dist/types/utils/discovery/index.d.ts +4 -0
- package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
- package/dist/types/utils/event-stream.d.ts +28 -0
- package/dist/types/utils/fireworks-model-id.d.ts +10 -0
- package/dist/types/utils/foundry.d.ts +1 -0
- package/dist/types/utils/h2-fetch.d.ts +22 -0
- package/dist/types/utils/http-inspector.d.ts +31 -0
- package/dist/types/utils/idle-iterator.d.ts +67 -0
- package/dist/types/utils/json-parse.d.ts +10 -0
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
- package/dist/types/utils/oauth/anthropic.d.ts +22 -0
- package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
- package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
- package/dist/types/utils/oauth/callback-server.d.ts +57 -0
- package/dist/types/utils/oauth/cerebras.d.ts +1 -0
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
- package/dist/types/utils/oauth/cursor.d.ts +15 -0
- package/dist/types/utils/oauth/deepseek.d.ts +10 -0
- package/dist/types/utils/oauth/firepass.d.ts +1 -0
- package/dist/types/utils/oauth/fireworks.d.ts +1 -0
- package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
- package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
- package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
- package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
- package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
- package/dist/types/utils/oauth/huggingface.d.ts +19 -0
- package/dist/types/utils/oauth/index.d.ts +38 -0
- package/dist/types/utils/oauth/kagi.d.ts +17 -0
- package/dist/types/utils/oauth/kilo.d.ts +5 -0
- package/dist/types/utils/oauth/kimi.d.ts +21 -0
- package/dist/types/utils/oauth/litellm.d.ts +18 -0
- package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
- package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
- package/dist/types/utils/oauth/moonshot.d.ts +1 -0
- package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
- package/dist/types/utils/oauth/nvidia.d.ts +18 -0
- package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
- package/dist/types/utils/oauth/ollama.d.ts +18 -0
- package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
- package/dist/types/utils/oauth/opencode.d.ts +18 -0
- package/dist/types/utils/oauth/parallel.d.ts +17 -0
- package/dist/types/utils/oauth/perplexity.d.ts +9 -0
- package/dist/types/utils/oauth/pkce.d.ts +8 -0
- package/dist/types/utils/oauth/qianfan.d.ts +17 -0
- package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
- package/dist/types/utils/oauth/synthetic.d.ts +1 -0
- package/dist/types/utils/oauth/tavily.d.ts +17 -0
- package/dist/types/utils/oauth/together.d.ts +1 -0
- package/dist/types/utils/oauth/types.d.ts +44 -0
- package/dist/types/utils/oauth/venice.d.ts +18 -0
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
- package/dist/types/utils/oauth/vllm.d.ts +16 -0
- package/dist/types/utils/oauth/xiaomi.d.ts +19 -0
- package/dist/types/utils/oauth/zai.d.ts +18 -0
- package/dist/types/utils/oauth/zenmux.d.ts +1 -0
- package/dist/types/utils/overflow.d.ts +54 -0
- package/dist/types/utils/parse-bind.d.ts +23 -0
- package/dist/types/utils/provider-response.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +3 -0
- package/dist/types/utils/retry.d.ts +26 -0
- package/dist/types/utils/schema/adapt.d.ts +24 -0
- package/dist/types/utils/schema/compatibility.d.ts +30 -0
- package/dist/types/utils/schema/dereference.d.ts +11 -0
- package/dist/types/utils/schema/draft.d.ts +10 -0
- package/dist/types/utils/schema/equality.d.ts +4 -0
- package/dist/types/utils/schema/fields.d.ts +49 -0
- package/dist/types/utils/schema/index.d.ts +13 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
- package/dist/types/utils/schema/meta-validator.d.ts +2 -0
- package/dist/types/utils/schema/normalize.d.ts +93 -0
- package/dist/types/utils/schema/spill.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +25 -0
- package/dist/types/utils/schema/types.d.ts +4 -0
- package/dist/types/utils/schema/wire.d.ts +54 -0
- package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
- package/dist/types/utils/sse-debug.d.ts +10 -0
- package/dist/types/utils/tool-call-healing.d.ts +71 -0
- package/dist/types/utils/tool-choice.d.ts +50 -0
- package/dist/types/utils/validation.d.ts +17 -0
- package/dist/types/utils.d.ts +28 -0
- package/package.json +146 -0
- package/src/api-registry.ts +96 -0
- package/src/auth-broker/client.ts +358 -0
- package/src/auth-broker/index.ts +5 -0
- package/src/auth-broker/refresher.ts +127 -0
- package/src/auth-broker/remote-store.ts +623 -0
- package/src/auth-broker/server.ts +644 -0
- package/src/auth-broker/types.ts +127 -0
- package/src/auth-broker/wire-schemas.ts +200 -0
- package/src/auth-gateway/http.ts +194 -0
- package/src/auth-gateway/index.ts +3 -0
- package/src/auth-gateway/server.ts +717 -0
- package/src/auth-gateway/types.ts +134 -0
- package/src/auth-storage.ts +4104 -0
- package/src/cli.ts +262 -0
- package/src/index.ts +54 -0
- package/src/model-cache.ts +129 -0
- package/src/model-manager.ts +450 -0
- package/src/model-thinking.ts +691 -0
- package/src/models.json +73853 -0
- package/src/models.json.d.ts +9 -0
- package/src/models.ts +56 -0
- package/src/prompts/turn-aborted-guidance.md +4 -0
- package/src/provider-details.ts +90 -0
- package/src/provider-models/bundled-references.ts +38 -0
- package/src/provider-models/descriptors.ts +308 -0
- package/src/provider-models/google.ts +91 -0
- package/src/provider-models/index.ts +5 -0
- package/src/provider-models/ollama.ts +153 -0
- package/src/provider-models/openai-compat.ts +2275 -0
- package/src/provider-models/special.ts +67 -0
- package/src/providers/amazon-bedrock.ts +849 -0
- package/src/providers/anthropic-messages-server-schema.ts +229 -0
- package/src/providers/anthropic-messages-server.ts +677 -0
- package/src/providers/anthropic.ts +2696 -0
- package/src/providers/aws-credentials.ts +501 -0
- package/src/providers/aws-eventstream.ts +185 -0
- package/src/providers/aws-sigv4.ts +218 -0
- package/src/providers/azure-openai-responses.ts +337 -0
- package/src/providers/cursor/gen/agent_pb.ts +15274 -0
- package/src/providers/cursor/proto/agent.proto +3526 -0
- package/src/providers/cursor/proto/buf.gen.yaml +6 -0
- package/src/providers/cursor/proto/buf.yaml +17 -0
- package/src/providers/cursor.ts +2561 -0
- package/src/providers/error-message.ts +21 -0
- package/src/providers/github-copilot-headers.ts +140 -0
- package/src/providers/gitlab-duo.ts +372 -0
- package/src/providers/google-auth.ts +252 -0
- package/src/providers/google-gemini-cli.ts +795 -0
- package/src/providers/google-gemini-headers.ts +41 -0
- package/src/providers/google-shared.ts +902 -0
- package/src/providers/google-types.ts +167 -0
- package/src/providers/google-vertex.ts +88 -0
- package/src/providers/google.ts +41 -0
- package/src/providers/grammar.ts +70 -0
- package/src/providers/kimi.ts +52 -0
- package/src/providers/mock.ts +500 -0
- package/src/providers/ollama.ts +544 -0
- package/src/providers/openai-anthropic-shim.ts +138 -0
- package/src/providers/openai-chat-server-schema.ts +243 -0
- package/src/providers/openai-chat-server.ts +628 -0
- package/src/providers/openai-codex/constants.ts +43 -0
- package/src/providers/openai-codex/request-transformer.ts +161 -0
- package/src/providers/openai-codex/response-handler.ts +81 -0
- package/src/providers/openai-codex-responses.ts +2598 -0
- package/src/providers/openai-completions-compat.ts +279 -0
- package/src/providers/openai-completions.ts +1853 -0
- package/src/providers/openai-responses-server-schema.ts +290 -0
- package/src/providers/openai-responses-server.ts +1183 -0
- package/src/providers/openai-responses-shared.ts +800 -0
- package/src/providers/openai-responses.ts +621 -0
- package/src/providers/pi-native-client.ts +228 -0
- package/src/providers/pi-native-server.ts +210 -0
- package/src/providers/register-builtins.ts +412 -0
- package/src/providers/synthetic.ts +50 -0
- package/src/providers/transform-messages.ts +309 -0
- package/src/providers/vision-guard.ts +31 -0
- package/src/rate-limit-utils.ts +84 -0
- package/src/stream.ts +895 -0
- package/src/types.ts +884 -0
- package/src/usage/claude.ts +431 -0
- package/src/usage/gemini.ts +250 -0
- package/src/usage/github-copilot.ts +421 -0
- package/src/usage/google-antigravity.ts +201 -0
- package/src/usage/kimi.ts +271 -0
- package/src/usage/minimax-code.ts +31 -0
- package/src/usage/openai-codex.ts +503 -0
- package/src/usage/shared.ts +10 -0
- package/src/usage/zai.ts +247 -0
- package/src/usage.ts +183 -0
- package/src/utils/abort.ts +51 -0
- package/src/utils/anthropic-auth.ts +87 -0
- package/src/utils/discovery/antigravity.ts +261 -0
- package/src/utils/discovery/codex.ts +371 -0
- package/src/utils/discovery/cursor.ts +306 -0
- package/src/utils/discovery/gemini.ts +248 -0
- package/src/utils/discovery/index.ts +4 -0
- package/src/utils/discovery/openai-compatible.ts +224 -0
- package/src/utils/event-stream.ts +142 -0
- package/src/utils/fireworks-model-id.ts +30 -0
- package/src/utils/foundry.ts +8 -0
- package/src/utils/h2-fetch.ts +60 -0
- package/src/utils/http-inspector.ts +176 -0
- package/src/utils/idle-iterator.ts +250 -0
- package/src/utils/json-parse.ts +148 -0
- package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
- package/src/utils/oauth/anthropic.ts +200 -0
- package/src/utils/oauth/api-key-login.ts +87 -0
- package/src/utils/oauth/api-key-validation.ts +92 -0
- package/src/utils/oauth/callback-server.ts +276 -0
- package/src/utils/oauth/cerebras.ts +16 -0
- package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
- package/src/utils/oauth/cursor.ts +157 -0
- package/src/utils/oauth/deepseek.ts +53 -0
- package/src/utils/oauth/firepass.ts +24 -0
- package/src/utils/oauth/fireworks.ts +15 -0
- package/src/utils/oauth/github-copilot.ts +362 -0
- package/src/utils/oauth/gitlab-duo.ts +123 -0
- package/src/utils/oauth/google-antigravity.ts +200 -0
- package/src/utils/oauth/google-gemini-cli.ts +256 -0
- package/src/utils/oauth/google-oauth-shared.ts +110 -0
- package/src/utils/oauth/huggingface.ts +62 -0
- package/src/utils/oauth/index.ts +444 -0
- package/src/utils/oauth/kagi.ts +47 -0
- package/src/utils/oauth/kilo.ts +87 -0
- package/src/utils/oauth/kimi.ts +254 -0
- package/src/utils/oauth/litellm.ts +47 -0
- package/src/utils/oauth/lm-studio.ts +38 -0
- package/src/utils/oauth/minimax-code.ts +78 -0
- package/src/utils/oauth/moonshot.ts +16 -0
- package/src/utils/oauth/nanogpt.ts +15 -0
- package/src/utils/oauth/nvidia.ts +70 -0
- package/src/utils/oauth/oauth.html +199 -0
- package/src/utils/oauth/ollama-cloud.ts +28 -0
- package/src/utils/oauth/ollama.ts +47 -0
- package/src/utils/oauth/openai-codex.ts +299 -0
- package/src/utils/oauth/opencode.ts +49 -0
- package/src/utils/oauth/parallel.ts +46 -0
- package/src/utils/oauth/perplexity.ts +206 -0
- package/src/utils/oauth/pkce.ts +18 -0
- package/src/utils/oauth/qianfan.ts +58 -0
- package/src/utils/oauth/qwen-portal.ts +60 -0
- package/src/utils/oauth/synthetic.ts +16 -0
- package/src/utils/oauth/tavily.ts +46 -0
- package/src/utils/oauth/together.ts +16 -0
- package/src/utils/oauth/types.ts +94 -0
- package/src/utils/oauth/venice.ts +59 -0
- package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
- package/src/utils/oauth/vllm.ts +40 -0
- package/src/utils/oauth/xiaomi.ts +137 -0
- package/src/utils/oauth/zai.ts +60 -0
- package/src/utils/oauth/zenmux.ts +15 -0
- package/src/utils/overflow.ts +137 -0
- package/src/utils/parse-bind.ts +54 -0
- package/src/utils/provider-response.ts +30 -0
- package/src/utils/retry-after.ts +110 -0
- package/src/utils/retry.ts +54 -0
- package/src/utils/schema/CONSTRAINTS.md +164 -0
- package/src/utils/schema/adapt.ts +36 -0
- package/src/utils/schema/compatibility.ts +435 -0
- package/src/utils/schema/dereference.ts +98 -0
- package/src/utils/schema/draft.ts +341 -0
- package/src/utils/schema/equality.ts +97 -0
- package/src/utils/schema/fields.ts +190 -0
- package/src/utils/schema/index.ts +13 -0
- package/src/utils/schema/json-schema-validator.ts +577 -0
- package/src/utils/schema/meta-validator.ts +167 -0
- package/src/utils/schema/normalize.ts +1588 -0
- package/src/utils/schema/spill.ts +43 -0
- package/src/utils/schema/stamps.ts +97 -0
- package/src/utils/schema/types.ts +11 -0
- package/src/utils/schema/wire.ts +213 -0
- package/src/utils/schema/zod-decontaminate.ts +331 -0
- package/src/utils/sse-debug.ts +289 -0
- package/src/utils/tool-call-healing.ts +271 -0
- package/src/utils/tool-choice.ts +99 -0
- package/src/utils/validation.ts +1019 -0
- package/src/utils.ts +166 -0
|
@@ -0,0 +1,849 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Amazon Bedrock Converse Stream provider.
|
|
3
|
+
*
|
|
4
|
+
* Talks directly to `bedrock-runtime.{region}.amazonaws.com` over HTTPS with
|
|
5
|
+
* SigV4 signing and decodes the `application/vnd.amazon.eventstream` response.
|
|
6
|
+
* No `@aws-sdk/*`, no `@smithy/*`, no `proxy-agent`. Proxies are honored via
|
|
7
|
+
* Bun's native `HTTPS_PROXY` support.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { $env, $flag, extractHttpStatusFromError, fetchWithRetry } from "@gajae-code/utils";
|
|
11
|
+
import type { Effort } from "../model-thinking";
|
|
12
|
+
import { mapEffortToAnthropicAdaptiveEffort, requireSupportedEffort } from "../model-thinking";
|
|
13
|
+
import { calculateCost } from "../models";
|
|
14
|
+
import type {
|
|
15
|
+
Api,
|
|
16
|
+
AssistantMessage,
|
|
17
|
+
CacheRetention,
|
|
18
|
+
Context,
|
|
19
|
+
Model,
|
|
20
|
+
StopReason,
|
|
21
|
+
StreamFunction,
|
|
22
|
+
StreamOptions,
|
|
23
|
+
TextContent,
|
|
24
|
+
ThinkingBudgets,
|
|
25
|
+
ThinkingContent,
|
|
26
|
+
Tool,
|
|
27
|
+
ToolCall,
|
|
28
|
+
ToolResultMessage,
|
|
29
|
+
} from "../types";
|
|
30
|
+
import { normalizeToolCallId, resolveCacheRetention } from "../utils";
|
|
31
|
+
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
32
|
+
import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
|
|
33
|
+
import { parseStreamingJson } from "../utils/json-parse";
|
|
34
|
+
import { toolWireSchema } from "../utils/schema/wire";
|
|
35
|
+
import { resolveAwsCredentials } from "./aws-credentials";
|
|
36
|
+
import { decodeEventStream } from "./aws-eventstream";
|
|
37
|
+
import { signRequest } from "./aws-sigv4";
|
|
38
|
+
import { transformMessages } from "./transform-messages";
|
|
39
|
+
|
|
40
|
+
export type BedrockThinkingDisplay = "summarized" | "omitted";
|
|
41
|
+
|
|
42
|
+
export interface BedrockOptions extends StreamOptions {
|
|
43
|
+
region?: string;
|
|
44
|
+
profile?: string;
|
|
45
|
+
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
|
46
|
+
/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
|
|
47
|
+
reasoning?: Effort;
|
|
48
|
+
/* Custom token budgets per thinking level. Overrides default budgets. */
|
|
49
|
+
thinkingBudgets?: ThinkingBudgets;
|
|
50
|
+
/* Only supported by Anthropic model 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/Anthropic model-messages-extended-thinking.html#Anthropic model-messages-extended-thinking-tool-use-interleaved */
|
|
51
|
+
interleavedThinking?: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Controls how Anthropic model returns thinking content in Bedrock responses.
|
|
54
|
+
* - `"summarized"`: thinking blocks include human-readable summaries (default here).
|
|
55
|
+
* - `"omitted"`: thinking content is suppressed; the encrypted signature still
|
|
56
|
+
* travels back for multi-turn continuity.
|
|
57
|
+
*
|
|
58
|
+
* Starting with Anthropic model Opus 4.7 the Anthropic API default is `"omitted"`, which
|
|
59
|
+
* leaves callers waiting on a silent stream during long reasoning runs (issue
|
|
60
|
+
* #1373). We default to `"summarized"` so adaptive-thinking models that accept
|
|
61
|
+
* the field keep producing visible thinking deltas. Older adaptive-thinking
|
|
62
|
+
* models (Opus 4.6, Sonnet 4.6+) reject the field, so we omit it for them.
|
|
63
|
+
*/
|
|
64
|
+
thinkingDisplay?: BedrockThinkingDisplay;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
type Block = (TextContent | ThinkingContent | ToolCall) & { index?: number; partialJson?: string };
|
|
68
|
+
|
|
69
|
+
// ---------- Bedrock wire-format types ----------
|
|
70
|
+
// Mirrors only what we actually consume from `ConverseStreamRequest` /
|
|
71
|
+
// `ConverseStreamOutput`. Keeps us decoupled from `@aws-sdk/client-bedrock-runtime`.
|
|
72
|
+
|
|
73
|
+
interface CachePoint {
|
|
74
|
+
cachePoint: { type: "default"; ttl?: "5m" | "1h" };
|
|
75
|
+
}
|
|
76
|
+
interface TextBlockWire {
|
|
77
|
+
text: string;
|
|
78
|
+
}
|
|
79
|
+
interface ImageBlockWire {
|
|
80
|
+
image: { format: "jpeg" | "png" | "gif" | "webp"; source: { bytes: string } };
|
|
81
|
+
}
|
|
82
|
+
interface ToolUseBlockWire {
|
|
83
|
+
toolUse: { toolUseId: string; name: string; input: unknown };
|
|
84
|
+
}
|
|
85
|
+
interface ToolResultBlockWire {
|
|
86
|
+
toolResult: {
|
|
87
|
+
toolUseId: string;
|
|
88
|
+
content: Array<TextBlockWire | ImageBlockWire>;
|
|
89
|
+
status: "success" | "error";
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
interface ReasoningBlockWire {
|
|
93
|
+
reasoningContent: { reasoningText: { text: string; signature?: string } };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
type UserContent = TextBlockWire | ImageBlockWire | ToolResultBlockWire | CachePoint;
|
|
97
|
+
type AssistantContent = TextBlockWire | ToolUseBlockWire | ReasoningBlockWire;
|
|
98
|
+
type SystemContent = TextBlockWire | CachePoint;
|
|
99
|
+
|
|
100
|
+
interface WireMessage {
|
|
101
|
+
role: "user" | "assistant";
|
|
102
|
+
content: Array<UserContent | AssistantContent>;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
interface WireToolSpec {
|
|
106
|
+
toolSpec: { name: string; description: string; inputSchema: { json: unknown } };
|
|
107
|
+
}
|
|
108
|
+
interface WireToolChoice {
|
|
109
|
+
auto?: Record<string, never>;
|
|
110
|
+
any?: Record<string, never>;
|
|
111
|
+
tool?: { name: string };
|
|
112
|
+
}
|
|
113
|
+
interface WireToolConfig {
|
|
114
|
+
tools: WireToolSpec[];
|
|
115
|
+
toolChoice?: WireToolChoice;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
interface ConverseStreamRequest {
|
|
119
|
+
messages: WireMessage[];
|
|
120
|
+
system?: SystemContent[];
|
|
121
|
+
inferenceConfig?: { maxTokens?: number; temperature?: number; topP?: number };
|
|
122
|
+
toolConfig?: WireToolConfig;
|
|
123
|
+
additionalModelRequestFields?: Record<string, unknown>;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Streaming events (snake_case matches the JSON envelope key, but Bedrock uses camelCase).
|
|
127
|
+
interface MessageStartEvent {
|
|
128
|
+
role: "user" | "assistant";
|
|
129
|
+
}
|
|
130
|
+
interface ContentBlockStartEvent {
|
|
131
|
+
contentBlockIndex: number;
|
|
132
|
+
start?: { toolUse?: { toolUseId?: string; name?: string } };
|
|
133
|
+
}
|
|
134
|
+
interface ContentBlockDeltaEvent {
|
|
135
|
+
contentBlockIndex: number;
|
|
136
|
+
delta?: {
|
|
137
|
+
text?: string;
|
|
138
|
+
toolUse?: { input?: string };
|
|
139
|
+
reasoningContent?: { text?: string; signature?: string };
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
interface ContentBlockStopEvent {
|
|
143
|
+
contentBlockIndex: number;
|
|
144
|
+
}
|
|
145
|
+
interface MessageStopEvent {
|
|
146
|
+
stopReason?: string;
|
|
147
|
+
}
|
|
148
|
+
interface MetadataEvent {
|
|
149
|
+
usage?: {
|
|
150
|
+
inputTokens?: number;
|
|
151
|
+
outputTokens?: number;
|
|
152
|
+
cacheReadInputTokens?: number;
|
|
153
|
+
cacheWriteInputTokens?: number;
|
|
154
|
+
totalTokens?: number;
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
|
|
159
|
+
model: Model<"bedrock-converse-stream">,
|
|
160
|
+
context: Context,
|
|
161
|
+
options: BedrockOptions,
|
|
162
|
+
): AssistantMessageEventStream => {
|
|
163
|
+
const stream = new AssistantMessageEventStream();
|
|
164
|
+
|
|
165
|
+
(async () => {
|
|
166
|
+
const startTime = Date.now();
|
|
167
|
+
let firstTokenTime: number | undefined;
|
|
168
|
+
|
|
169
|
+
const output: AssistantMessage = {
|
|
170
|
+
role: "assistant",
|
|
171
|
+
content: [],
|
|
172
|
+
api: "bedrock-converse-stream" as Api,
|
|
173
|
+
provider: model.provider,
|
|
174
|
+
model: model.id,
|
|
175
|
+
usage: {
|
|
176
|
+
input: 0,
|
|
177
|
+
output: 0,
|
|
178
|
+
cacheRead: 0,
|
|
179
|
+
cacheWrite: 0,
|
|
180
|
+
totalTokens: 0,
|
|
181
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
182
|
+
},
|
|
183
|
+
stopReason: "stop",
|
|
184
|
+
timestamp: Date.now(),
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
const blocks = output.content as Block[];
|
|
188
|
+
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
189
|
+
const region = options.region || $env.AWS_REGION || $env.AWS_DEFAULT_REGION || "us-east-1";
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
const cacheRetention = resolveCacheRetention(options.cacheRetention);
|
|
193
|
+
const toolConfig = convertToolConfig(context.tools, options.toolChoice);
|
|
194
|
+
let additionalModelRequestFields = buildAdditionalModelRequestFields(model, options);
|
|
195
|
+
|
|
196
|
+
// Bedrock rejects thinking + forced tool_choice ("any" or specific tool).
|
|
197
|
+
// When tool_choice forces tool use, disable thinking to avoid API errors.
|
|
198
|
+
if (toolConfig?.toolChoice && additionalModelRequestFields) {
|
|
199
|
+
const tc = toolConfig.toolChoice;
|
|
200
|
+
if (tc.any || tc.tool) additionalModelRequestFields = undefined;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const commandInput: ConverseStreamRequest = {
|
|
204
|
+
messages: convertMessages(context, model, cacheRetention),
|
|
205
|
+
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
|
|
206
|
+
inferenceConfig: {
|
|
207
|
+
maxTokens: options.maxTokens,
|
|
208
|
+
temperature: options.temperature,
|
|
209
|
+
topP: options.topP,
|
|
210
|
+
},
|
|
211
|
+
toolConfig,
|
|
212
|
+
additionalModelRequestFields,
|
|
213
|
+
};
|
|
214
|
+
options?.onPayload?.(commandInput);
|
|
215
|
+
|
|
216
|
+
const host = `bedrock-runtime.${region}.amazonaws.com`;
|
|
217
|
+
const url = `https://${host}/model/${encodeURIComponent(model.id)}/converse-stream`;
|
|
218
|
+
const urlPath = `/model/${encodeURIComponent(model.id)}/converse-stream`;
|
|
219
|
+
rawRequestDump = {
|
|
220
|
+
provider: model.provider,
|
|
221
|
+
api: output.api,
|
|
222
|
+
model: model.id,
|
|
223
|
+
method: "POST",
|
|
224
|
+
url,
|
|
225
|
+
body: commandInput,
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
let credentials: { accessKeyId: string; secretAccessKey: string; sessionToken?: string };
|
|
229
|
+
if ($flag("AWS_BEDROCK_SKIP_AUTH")) {
|
|
230
|
+
credentials = { accessKeyId: "dummy-access-key", secretAccessKey: "dummy-secret-key" };
|
|
231
|
+
} else {
|
|
232
|
+
credentials = await resolveAwsCredentials({
|
|
233
|
+
profile: options.profile,
|
|
234
|
+
region,
|
|
235
|
+
signal: options.signal,
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const bodyText = JSON.stringify(commandInput);
|
|
240
|
+
const body = new TextEncoder().encode(bodyText);
|
|
241
|
+
const baseHeaders: Record<string, string> = {
|
|
242
|
+
"content-type": "application/json",
|
|
243
|
+
accept: "application/vnd.amazon.eventstream",
|
|
244
|
+
};
|
|
245
|
+
const signed = await signRequest({
|
|
246
|
+
method: "POST",
|
|
247
|
+
host,
|
|
248
|
+
path: urlPath,
|
|
249
|
+
body,
|
|
250
|
+
region,
|
|
251
|
+
service: "bedrock",
|
|
252
|
+
credentials,
|
|
253
|
+
headers: baseHeaders,
|
|
254
|
+
});
|
|
255
|
+
const requestHeaders: Record<string, string> = { ...baseHeaders, ...signed };
|
|
256
|
+
|
|
257
|
+
const response = await fetchWithRetry(url, {
|
|
258
|
+
method: "POST",
|
|
259
|
+
headers: requestHeaders,
|
|
260
|
+
body,
|
|
261
|
+
signal: options.signal,
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
if (!response.ok) {
|
|
265
|
+
const errBody = await response.text().catch(() => "");
|
|
266
|
+
throw withHttpStatus(
|
|
267
|
+
new Error(`Bedrock HTTP ${response.status}: ${errBody.slice(0, 1000)}`),
|
|
268
|
+
response.status,
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
if (!response.body) throw new Error("Bedrock response has no body");
|
|
272
|
+
|
|
273
|
+
// Track first event for the abort/diagnostic path (currently informational).
|
|
274
|
+
for await (const message of decodeEventStream(response.body)) {
|
|
275
|
+
const messageType = message.headers[":message-type"];
|
|
276
|
+
const eventType = message.headers[":event-type"];
|
|
277
|
+
|
|
278
|
+
if (messageType === "exception") {
|
|
279
|
+
const exceptionType = message.headers[":exception-type"] || "Exception";
|
|
280
|
+
const payload = safeParsePayload(message.payload) as { message?: string } | undefined;
|
|
281
|
+
const errorMessage = payload?.message || new TextDecoder().decode(message.payload);
|
|
282
|
+
const status = exceptionType === "validationException" ? 400 : 0;
|
|
283
|
+
const err = new Error(`${exceptionType}: ${errorMessage}`);
|
|
284
|
+
throw status ? withHttpStatus(err, status) : err;
|
|
285
|
+
}
|
|
286
|
+
if (messageType === "error") {
|
|
287
|
+
const code = message.headers[":error-code"] || "UnknownError";
|
|
288
|
+
const errorMessage = message.headers[":error-message"] || new TextDecoder().decode(message.payload);
|
|
289
|
+
throw new Error(`${code}: ${errorMessage}`);
|
|
290
|
+
}
|
|
291
|
+
if (messageType !== "event") continue;
|
|
292
|
+
|
|
293
|
+
const payload = safeParsePayload(message.payload);
|
|
294
|
+
if (!payload) continue;
|
|
295
|
+
|
|
296
|
+
switch (eventType) {
|
|
297
|
+
case "messageStart": {
|
|
298
|
+
// no-op: first event marker is implicit by stream entry.
|
|
299
|
+
const ev = payload as MessageStartEvent;
|
|
300
|
+
if (ev.role !== "assistant") {
|
|
301
|
+
throw new Error("Unexpected assistant message start but got user message start instead");
|
|
302
|
+
}
|
|
303
|
+
stream.push({ type: "start", partial: output });
|
|
304
|
+
break;
|
|
305
|
+
}
|
|
306
|
+
case "contentBlockStart": {
|
|
307
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
308
|
+
handleContentBlockStart(payload as ContentBlockStartEvent, blocks, output, stream);
|
|
309
|
+
break;
|
|
310
|
+
}
|
|
311
|
+
case "contentBlockDelta": {
|
|
312
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
313
|
+
handleContentBlockDelta(payload as ContentBlockDeltaEvent, blocks, output, stream);
|
|
314
|
+
break;
|
|
315
|
+
}
|
|
316
|
+
case "contentBlockStop": {
|
|
317
|
+
handleContentBlockStop(payload as ContentBlockStopEvent, blocks, output, stream);
|
|
318
|
+
break;
|
|
319
|
+
}
|
|
320
|
+
case "messageStop": {
|
|
321
|
+
const ev = payload as MessageStopEvent;
|
|
322
|
+
output.stopReason = mapStopReason(ev.stopReason);
|
|
323
|
+
break;
|
|
324
|
+
}
|
|
325
|
+
case "metadata": {
|
|
326
|
+
handleMetadata(payload as MetadataEvent, model, output);
|
|
327
|
+
break;
|
|
328
|
+
}
|
|
329
|
+
default:
|
|
330
|
+
// Unknown event types (Bedrock may add new ones) — ignore.
|
|
331
|
+
break;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
if (options.signal?.aborted) throw new Error("Request was aborted");
|
|
336
|
+
|
|
337
|
+
if (output.stopReason === "error" || output.stopReason === "aborted") {
|
|
338
|
+
throw new Error(output.errorMessage ?? "An unknown error occurred");
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
output.duration = Date.now() - startTime;
|
|
342
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
343
|
+
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
344
|
+
stream.end();
|
|
345
|
+
} catch (error) {
|
|
346
|
+
for (const block of output.content) {
|
|
347
|
+
delete (block as Block).index;
|
|
348
|
+
delete (block as Block).partialJson;
|
|
349
|
+
}
|
|
350
|
+
output.stopReason = options.signal?.aborted ? "aborted" : "error";
|
|
351
|
+
output.errorStatus = extractHttpStatusFromError(error);
|
|
352
|
+
const baseMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
|
353
|
+
// Enrich error with thinking block diagnostics for signature-related failures
|
|
354
|
+
let diagnostics = "";
|
|
355
|
+
if (baseMessage.includes("signature") || baseMessage.includes("thinking")) {
|
|
356
|
+
const thinkingBlocks = context.messages
|
|
357
|
+
.filter((m): m is AssistantMessage => m.role === "assistant")
|
|
358
|
+
.flatMap((m, mi) =>
|
|
359
|
+
m.content
|
|
360
|
+
.filter(b => b.type === "thinking")
|
|
361
|
+
.map((b, bi) => ({
|
|
362
|
+
msg: mi,
|
|
363
|
+
block: bi,
|
|
364
|
+
stop: m.stopReason,
|
|
365
|
+
sigLen: b.thinkingSignature?.length ?? -1,
|
|
366
|
+
thinkLen: b.thinking.length,
|
|
367
|
+
})),
|
|
368
|
+
);
|
|
369
|
+
if (thinkingBlocks.length > 0) {
|
|
370
|
+
diagnostics = `\n[thinking-diag] ${JSON.stringify(thinkingBlocks)}`;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
output.errorMessage = await appendRawHttpRequestDumpFor400(baseMessage + diagnostics, error, rawRequestDump);
|
|
374
|
+
output.duration = Date.now() - startTime;
|
|
375
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
376
|
+
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
377
|
+
stream.end();
|
|
378
|
+
}
|
|
379
|
+
})();
|
|
380
|
+
|
|
381
|
+
return stream;
|
|
382
|
+
};
|
|
383
|
+
|
|
384
|
+
function safeParsePayload(payload: Uint8Array): unknown {
|
|
385
|
+
if (payload.length === 0) return {};
|
|
386
|
+
try {
|
|
387
|
+
return JSON.parse(new TextDecoder().decode(payload));
|
|
388
|
+
} catch {
|
|
389
|
+
return undefined;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
function handleContentBlockStart(
|
|
394
|
+
event: ContentBlockStartEvent,
|
|
395
|
+
blocks: Block[],
|
|
396
|
+
output: AssistantMessage,
|
|
397
|
+
stream: AssistantMessageEventStream,
|
|
398
|
+
): void {
|
|
399
|
+
const index = event.contentBlockIndex;
|
|
400
|
+
const start = event.start;
|
|
401
|
+
|
|
402
|
+
if (start?.toolUse) {
|
|
403
|
+
const block: Block = {
|
|
404
|
+
type: "toolCall",
|
|
405
|
+
id: normalizeToolCallId(start.toolUse.toolUseId || ""),
|
|
406
|
+
name: start.toolUse.name || "",
|
|
407
|
+
arguments: {},
|
|
408
|
+
partialJson: "",
|
|
409
|
+
index,
|
|
410
|
+
};
|
|
411
|
+
output.content.push(block);
|
|
412
|
+
stream.push({ type: "toolcall_start", contentIndex: blocks.length - 1, partial: output });
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function handleContentBlockDelta(
|
|
417
|
+
event: ContentBlockDeltaEvent,
|
|
418
|
+
blocks: Block[],
|
|
419
|
+
output: AssistantMessage,
|
|
420
|
+
stream: AssistantMessageEventStream,
|
|
421
|
+
): void {
|
|
422
|
+
const contentBlockIndex = event.contentBlockIndex;
|
|
423
|
+
const delta = event.delta;
|
|
424
|
+
let index = blocks.findIndex(b => b.index === contentBlockIndex);
|
|
425
|
+
let block = blocks[index];
|
|
426
|
+
|
|
427
|
+
if (delta?.text !== undefined) {
|
|
428
|
+
// If no text block exists yet, create one — `handleContentBlockStart` is not sent for text blocks
|
|
429
|
+
if (!block) {
|
|
430
|
+
const newBlock: Block = { type: "text", text: "", index: contentBlockIndex };
|
|
431
|
+
output.content.push(newBlock);
|
|
432
|
+
index = blocks.length - 1;
|
|
433
|
+
block = blocks[index];
|
|
434
|
+
stream.push({ type: "text_start", contentIndex: index, partial: output });
|
|
435
|
+
}
|
|
436
|
+
if (block.type === "text") {
|
|
437
|
+
block.text += delta.text;
|
|
438
|
+
stream.push({ type: "text_delta", contentIndex: index, delta: delta.text, partial: output });
|
|
439
|
+
}
|
|
440
|
+
} else if (delta?.toolUse && block?.type === "toolCall") {
|
|
441
|
+
block.partialJson = (block.partialJson || "") + (delta.toolUse.input || "");
|
|
442
|
+
block.arguments = parseStreamingJson(block.partialJson);
|
|
443
|
+
stream.push({ type: "toolcall_delta", contentIndex: index, delta: delta.toolUse.input || "", partial: output });
|
|
444
|
+
} else if (delta?.reasoningContent) {
|
|
445
|
+
let thinkingBlock = block;
|
|
446
|
+
let thinkingIndex = index;
|
|
447
|
+
|
|
448
|
+
if (!thinkingBlock) {
|
|
449
|
+
const newBlock: Block = { type: "thinking", thinking: "", thinkingSignature: "", index: contentBlockIndex };
|
|
450
|
+
output.content.push(newBlock);
|
|
451
|
+
thinkingIndex = blocks.length - 1;
|
|
452
|
+
thinkingBlock = blocks[thinkingIndex];
|
|
453
|
+
stream.push({ type: "thinking_start", contentIndex: thinkingIndex, partial: output });
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
if (thinkingBlock?.type === "thinking") {
|
|
457
|
+
if (delta.reasoningContent.text) {
|
|
458
|
+
thinkingBlock.thinking += delta.reasoningContent.text;
|
|
459
|
+
stream.push({
|
|
460
|
+
type: "thinking_delta",
|
|
461
|
+
contentIndex: thinkingIndex,
|
|
462
|
+
delta: delta.reasoningContent.text,
|
|
463
|
+
partial: output,
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
if (delta.reasoningContent.signature) {
|
|
467
|
+
thinkingBlock.thinkingSignature =
|
|
468
|
+
(thinkingBlock.thinkingSignature || "") + delta.reasoningContent.signature;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
function handleMetadata(event: MetadataEvent, model: Model<"bedrock-converse-stream">, output: AssistantMessage): void {
|
|
475
|
+
if (event.usage) {
|
|
476
|
+
output.usage.input = event.usage.inputTokens || 0;
|
|
477
|
+
output.usage.output = event.usage.outputTokens || 0;
|
|
478
|
+
output.usage.cacheRead = event.usage.cacheReadInputTokens || 0;
|
|
479
|
+
output.usage.cacheWrite = event.usage.cacheWriteInputTokens || 0;
|
|
480
|
+
output.usage.totalTokens = event.usage.totalTokens || output.usage.input + output.usage.output;
|
|
481
|
+
calculateCost(model, output.usage);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
function handleContentBlockStop(
|
|
486
|
+
event: ContentBlockStopEvent,
|
|
487
|
+
blocks: Block[],
|
|
488
|
+
output: AssistantMessage,
|
|
489
|
+
stream: AssistantMessageEventStream,
|
|
490
|
+
): void {
|
|
491
|
+
const index = blocks.findIndex(b => b.index === event.contentBlockIndex);
|
|
492
|
+
const block = blocks[index];
|
|
493
|
+
if (!block) return;
|
|
494
|
+
delete (block as Block).index;
|
|
495
|
+
|
|
496
|
+
switch (block.type) {
|
|
497
|
+
case "text":
|
|
498
|
+
stream.push({ type: "text_end", contentIndex: index, content: block.text, partial: output });
|
|
499
|
+
break;
|
|
500
|
+
case "thinking":
|
|
501
|
+
stream.push({ type: "thinking_end", contentIndex: index, content: block.thinking, partial: output });
|
|
502
|
+
break;
|
|
503
|
+
case "toolCall":
|
|
504
|
+
block.arguments = parseStreamingJson(block.partialJson);
|
|
505
|
+
delete (block as Block).partialJson;
|
|
506
|
+
stream.push({ type: "toolcall_end", contentIndex: index, toolCall: block, partial: output });
|
|
507
|
+
break;
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
/**
|
|
512
|
+
* Check if the model supports prompt caching.
|
|
513
|
+
* Supported: Anthropic model 3.5 Haiku, Anthropic model 3.7 Sonnet, Anthropic model 4.x+ models, Haiku 4.5+
|
|
514
|
+
*
|
|
515
|
+
* For base models and system-defined inference profiles the model ID / ARN
|
|
516
|
+
* contains the model name, so we can decide locally.
|
|
517
|
+
*
|
|
518
|
+
* For application inference profiles (whose ARNs don't contain the model name),
|
|
519
|
+
* set AWS_BEDROCK_FORCE_CACHE=1 to enable cache points. Amazon Nova models
|
|
520
|
+
* have automatic caching and don't need explicit cache points.
|
|
521
|
+
*/
|
|
522
|
+
function supportsPromptCaching(model: Model<"bedrock-converse-stream">): boolean {
|
|
523
|
+
if (model.cost.cacheRead || model.cost.cacheWrite) return true;
|
|
524
|
+
const id = model.id.toLowerCase();
|
|
525
|
+
// Anthropic model 4.x models (opus-4, sonnet-4, haiku-4)
|
|
526
|
+
if (id.includes("claude") && (id.includes("-4-") || id.includes("-4."))) return true;
|
|
527
|
+
// Anthropic model 3.5 Haiku, Anthropic model 3.7 Sonnet (legacy naming)
|
|
528
|
+
if (id.includes("claude-3-7-sonnet") || id.includes("claude-3-5-haiku")) return true;
|
|
529
|
+
// Anthropic model Haiku 4.5+ (new naming)
|
|
530
|
+
if (id.includes("claude-haiku")) return true;
|
|
531
|
+
// Application inference profiles don't contain the model name in the ARN.
|
|
532
|
+
// Allow users to force cache points via environment variable.
|
|
533
|
+
if (typeof process !== "undefined" && $flag("AWS_BEDROCK_FORCE_CACHE")) return true;
|
|
534
|
+
return false;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Check if the model supports thinking signatures in reasoningContent.
|
|
539
|
+
* Only Anthropic Anthropic model models support the signature field.
|
|
540
|
+
* Other models (Nova, Titan, Mistral, Llama, etc.) reject it with:
|
|
541
|
+
* "This model doesn't support the reasoningContent.reasoningText.signature field"
|
|
542
|
+
*/
|
|
543
|
+
function supportsThinkingSignature(model: Model<"bedrock-converse-stream">): boolean {
|
|
544
|
+
const id = model.id.toLowerCase();
|
|
545
|
+
return id.includes("anthropic.claude") || id.includes("anthropic/claude");
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
function buildSystemPrompt(
|
|
549
|
+
systemPrompt: readonly string[] | undefined,
|
|
550
|
+
model: Model<"bedrock-converse-stream">,
|
|
551
|
+
cacheRetention: CacheRetention,
|
|
552
|
+
): SystemContent[] | undefined {
|
|
553
|
+
const prompts = systemPrompt?.map(prompt => prompt.toWellFormed()).filter(prompt => prompt.length > 0) ?? [];
|
|
554
|
+
if (prompts.length === 0) return undefined;
|
|
555
|
+
|
|
556
|
+
const blocks: SystemContent[] = prompts.map(prompt => ({ text: prompt }));
|
|
557
|
+
|
|
558
|
+
// Add cache point for supported Anthropic model models
|
|
559
|
+
if (cacheRetention !== "none" && supportsPromptCaching(model)) {
|
|
560
|
+
blocks.push({
|
|
561
|
+
cachePoint: { type: "default", ...(cacheRetention === "long" ? { ttl: "1h" } : {}) },
|
|
562
|
+
});
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
return blocks;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
function convertMessages(
|
|
569
|
+
context: Context,
|
|
570
|
+
model: Model<"bedrock-converse-stream">,
|
|
571
|
+
cacheRetention: CacheRetention,
|
|
572
|
+
): WireMessage[] {
|
|
573
|
+
const result: WireMessage[] = [];
|
|
574
|
+
const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
|
|
575
|
+
|
|
576
|
+
for (let i = 0; i < transformedMessages.length; i++) {
|
|
577
|
+
const m = transformedMessages[i];
|
|
578
|
+
|
|
579
|
+
switch (m.role) {
|
|
580
|
+
case "developer":
|
|
581
|
+
case "user":
|
|
582
|
+
if (typeof m.content === "string") {
|
|
583
|
+
// Skip empty user messages
|
|
584
|
+
if (!m.content || m.content.trim() === "") continue;
|
|
585
|
+
result.push({ role: "user", content: [{ text: m.content.toWellFormed() }] });
|
|
586
|
+
} else {
|
|
587
|
+
const contentBlocks: UserContent[] = [];
|
|
588
|
+
for (const c of m.content) {
|
|
589
|
+
switch (c.type) {
|
|
590
|
+
case "text": {
|
|
591
|
+
const text = c.text.toWellFormed();
|
|
592
|
+
if (text.trim().length === 0) continue;
|
|
593
|
+
contentBlocks.push({ text });
|
|
594
|
+
break;
|
|
595
|
+
}
|
|
596
|
+
case "image":
|
|
597
|
+
contentBlocks.push({ image: createImageBlock(c.mimeType, c.data) });
|
|
598
|
+
break;
|
|
599
|
+
default:
|
|
600
|
+
throw new Error("Unknown user content type");
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
// Skip message if all blocks filtered out
|
|
604
|
+
if (contentBlocks.length === 0) continue;
|
|
605
|
+
result.push({ role: "user", content: contentBlocks });
|
|
606
|
+
}
|
|
607
|
+
break;
|
|
608
|
+
case "assistant": {
|
|
609
|
+
// Skip assistant messages with empty content (e.g., from aborted requests)
|
|
610
|
+
// Bedrock rejects messages with empty content arrays
|
|
611
|
+
if (m.content.length === 0) continue;
|
|
612
|
+
const contentBlocks: AssistantContent[] = [];
|
|
613
|
+
for (const c of m.content) {
|
|
614
|
+
switch (c.type) {
|
|
615
|
+
case "text":
|
|
616
|
+
// Skip empty text blocks
|
|
617
|
+
if (c.text.trim().length === 0) continue;
|
|
618
|
+
contentBlocks.push({ text: c.text.toWellFormed() });
|
|
619
|
+
break;
|
|
620
|
+
case "toolCall":
|
|
621
|
+
contentBlocks.push({
|
|
622
|
+
toolUse: {
|
|
623
|
+
toolUseId: normalizeToolCallId(c.id),
|
|
624
|
+
name: c.name,
|
|
625
|
+
input: c.arguments,
|
|
626
|
+
},
|
|
627
|
+
});
|
|
628
|
+
break;
|
|
629
|
+
case "thinking":
|
|
630
|
+
// Skip empty thinking blocks
|
|
631
|
+
if (c.thinking.trim().length === 0) continue;
|
|
632
|
+
// Thinking blocks require a valid signature when sent as reasoningContent.
|
|
633
|
+
// If the signature is missing (e.g., from an aborted stream), or the model
|
|
634
|
+
// doesn't support signatures, convert to plain text instead.
|
|
635
|
+
if (supportsThinkingSignature(model) && c.thinkingSignature) {
|
|
636
|
+
contentBlocks.push({
|
|
637
|
+
reasoningContent: {
|
|
638
|
+
reasoningText: { text: c.thinking.toWellFormed(), signature: c.thinkingSignature },
|
|
639
|
+
},
|
|
640
|
+
});
|
|
641
|
+
} else if (!supportsThinkingSignature(model)) {
|
|
642
|
+
// Model doesn't support signatures at all — send as unsigned reasoning
|
|
643
|
+
contentBlocks.push({
|
|
644
|
+
reasoningContent: { reasoningText: { text: c.thinking.toWellFormed() } },
|
|
645
|
+
});
|
|
646
|
+
} else {
|
|
647
|
+
// Model requires signature but we don't have one — demote to text
|
|
648
|
+
contentBlocks.push({ text: `[Thinking]: ${c.thinking.toWellFormed()}` });
|
|
649
|
+
}
|
|
650
|
+
break;
|
|
651
|
+
default:
|
|
652
|
+
throw new Error("Unknown assistant content type");
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
// Skip if all content blocks were filtered out
|
|
656
|
+
if (contentBlocks.length === 0) continue;
|
|
657
|
+
result.push({ role: "assistant", content: contentBlocks });
|
|
658
|
+
break;
|
|
659
|
+
}
|
|
660
|
+
case "toolResult": {
|
|
661
|
+
// Collect all consecutive toolResult messages into a single user message —
|
|
662
|
+
// Bedrock requires all tool results to be in one message.
|
|
663
|
+
const toolResults: ToolResultBlockWire[] = [];
|
|
664
|
+
toolResults.push({
|
|
665
|
+
toolResult: {
|
|
666
|
+
toolUseId: normalizeToolCallId(m.toolCallId),
|
|
667
|
+
content: m.content.map(c =>
|
|
668
|
+
c.type === "image"
|
|
669
|
+
? { image: createImageBlock(c.mimeType, c.data) }
|
|
670
|
+
: { text: c.text.toWellFormed() },
|
|
671
|
+
),
|
|
672
|
+
status: m.isError ? "error" : "success",
|
|
673
|
+
},
|
|
674
|
+
});
|
|
675
|
+
|
|
676
|
+
let j = i + 1;
|
|
677
|
+
while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
|
|
678
|
+
const nextMsg = transformedMessages[j] as ToolResultMessage;
|
|
679
|
+
toolResults.push({
|
|
680
|
+
toolResult: {
|
|
681
|
+
toolUseId: normalizeToolCallId(nextMsg.toolCallId),
|
|
682
|
+
content: nextMsg.content.map(c =>
|
|
683
|
+
c.type === "image"
|
|
684
|
+
? { image: createImageBlock(c.mimeType, c.data) }
|
|
685
|
+
: { text: c.text.toWellFormed() },
|
|
686
|
+
),
|
|
687
|
+
status: nextMsg.isError ? "error" : "success",
|
|
688
|
+
},
|
|
689
|
+
});
|
|
690
|
+
j++;
|
|
691
|
+
}
|
|
692
|
+
i = j - 1;
|
|
693
|
+
|
|
694
|
+
result.push({ role: "user", content: toolResults });
|
|
695
|
+
break;
|
|
696
|
+
}
|
|
697
|
+
default:
|
|
698
|
+
throw new Error("Unknown message role");
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
// Add cache point to the last user message for supported Anthropic model models
|
|
703
|
+
if (cacheRetention !== "none" && supportsPromptCaching(model) && result.length > 0) {
|
|
704
|
+
const lastMessage = result[result.length - 1];
|
|
705
|
+
if (lastMessage.role === "user" && lastMessage.content) {
|
|
706
|
+
(lastMessage.content as UserContent[]).push({
|
|
707
|
+
cachePoint: { type: "default", ...(cacheRetention === "long" ? { ttl: "1h" } : {}) },
|
|
708
|
+
});
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
return result;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
function convertToolConfig(
|
|
716
|
+
tools: Tool[] | undefined,
|
|
717
|
+
toolChoice: BedrockOptions["toolChoice"],
|
|
718
|
+
): WireToolConfig | undefined {
|
|
719
|
+
if (!tools?.length || toolChoice === "none") return undefined;
|
|
720
|
+
|
|
721
|
+
const bedrockTools: WireToolSpec[] = tools.map(tool => ({
|
|
722
|
+
toolSpec: {
|
|
723
|
+
name: tool.name,
|
|
724
|
+
description: tool.description || "",
|
|
725
|
+
inputSchema: { json: toolWireSchema(tool) },
|
|
726
|
+
},
|
|
727
|
+
}));
|
|
728
|
+
|
|
729
|
+
let bedrockToolChoice: WireToolChoice | undefined;
|
|
730
|
+
switch (toolChoice) {
|
|
731
|
+
case "auto":
|
|
732
|
+
bedrockToolChoice = { auto: {} };
|
|
733
|
+
break;
|
|
734
|
+
case "any":
|
|
735
|
+
bedrockToolChoice = { any: {} };
|
|
736
|
+
break;
|
|
737
|
+
default:
|
|
738
|
+
if (toolChoice?.type === "tool") {
|
|
739
|
+
bedrockToolChoice = { tool: { name: toolChoice.name } };
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
return { tools: bedrockTools, toolChoice: bedrockToolChoice };
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
function mapStopReason(reason: string | undefined): StopReason {
|
|
747
|
+
switch (reason) {
|
|
748
|
+
case "end_turn":
|
|
749
|
+
case "stop_sequence":
|
|
750
|
+
return "stop";
|
|
751
|
+
case "max_tokens":
|
|
752
|
+
case "model_context_window_exceeded":
|
|
753
|
+
return "length";
|
|
754
|
+
case "tool_use":
|
|
755
|
+
return "toolUse";
|
|
756
|
+
default:
|
|
757
|
+
return "error";
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
function buildAdditionalModelRequestFields(
|
|
762
|
+
model: Model<"bedrock-converse-stream">,
|
|
763
|
+
options: BedrockOptions,
|
|
764
|
+
): Record<string, unknown> | undefined {
|
|
765
|
+
const reasoning = options.reasoning;
|
|
766
|
+
if (!reasoning || !model.reasoning) return undefined;
|
|
767
|
+
|
|
768
|
+
const mode = model.thinking?.mode;
|
|
769
|
+
if (mode === "anthropic-adaptive") {
|
|
770
|
+
const effort = mapEffortToAnthropicAdaptiveEffort(model, reasoning);
|
|
771
|
+
// Starting with Anthropic model Opus 4.7, Anthropic switched the adaptive-thinking
|
|
772
|
+
// default to "omitted", which silently suppresses streamed reasoning and
|
|
773
|
+
// can read as a stalled stream during long reasoning runs (issue #1373).
|
|
774
|
+
// Opt back into "summarized" by default on models that accept the field.
|
|
775
|
+
const adaptive: { type: "adaptive"; display?: BedrockThinkingDisplay } = { type: "adaptive" };
|
|
776
|
+
if (supportsAdaptiveThinkingDisplay(model.id)) {
|
|
777
|
+
adaptive.display = options.thinkingDisplay ?? "summarized";
|
|
778
|
+
}
|
|
779
|
+
return {
|
|
780
|
+
thinking: adaptive,
|
|
781
|
+
output_config: { effort },
|
|
782
|
+
};
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
const level = requireSupportedEffort(model, reasoning);
|
|
786
|
+
const defaultBudgets: Record<Effort, number> = {
|
|
787
|
+
minimal: 1024,
|
|
788
|
+
low: 2048,
|
|
789
|
+
medium: 8192,
|
|
790
|
+
high: 16384,
|
|
791
|
+
xhigh: 32768,
|
|
792
|
+
};
|
|
793
|
+
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[level];
|
|
794
|
+
|
|
795
|
+
const result: Record<string, unknown> = {
|
|
796
|
+
thinking: {
|
|
797
|
+
type: "enabled",
|
|
798
|
+
budget_tokens: budget,
|
|
799
|
+
display: options.thinkingDisplay ?? "summarized",
|
|
800
|
+
},
|
|
801
|
+
};
|
|
802
|
+
|
|
803
|
+
if (options.interleavedThinking) {
|
|
804
|
+
result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
return result;
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
/**
|
|
811
|
+
* Adaptive thinking `display` is supported starting with Anthropic model Opus 4.7.
|
|
812
|
+
* Older adaptive-thinking models (Opus 4.6, Sonnet 4.6+) reject the field.
|
|
813
|
+
* Bedrock model ids are prefixed with region/inference-profile slugs (e.g.
|
|
814
|
+
* `eu.anthropic.Anthropic model-opus-4-7-...`); the regex matches the `Anthropic model-opus-X-Y`
|
|
815
|
+
* fragment regardless of prefix.
|
|
816
|
+
*/
|
|
817
|
+
function supportsAdaptiveThinkingDisplay(modelId: string): boolean {
|
|
818
|
+
const match = /claude-opus-(\d+)-(\d+)/.exec(modelId);
|
|
819
|
+
if (!match) return false;
|
|
820
|
+
const major = Number(match[1]);
|
|
821
|
+
const minor = Number(match[2]);
|
|
822
|
+
return major > 4 || (major === 4 && minor >= 7);
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
/**
|
|
826
|
+
* Bedrock's wire format expects the image as `{ source: { bytes: <base64-string> }, format }`.
|
|
827
|
+
* The caller already passes base64-encoded data, so no decode/re-encode round-trip is needed.
|
|
828
|
+
*/
|
|
829
|
+
function createImageBlock(mimeType: string, data: string): ImageBlockWire["image"] {
|
|
830
|
+
let format: "jpeg" | "png" | "gif" | "webp";
|
|
831
|
+
switch (mimeType) {
|
|
832
|
+
case "image/jpeg":
|
|
833
|
+
case "image/jpg":
|
|
834
|
+
format = "jpeg";
|
|
835
|
+
break;
|
|
836
|
+
case "image/png":
|
|
837
|
+
format = "png";
|
|
838
|
+
break;
|
|
839
|
+
case "image/gif":
|
|
840
|
+
format = "gif";
|
|
841
|
+
break;
|
|
842
|
+
case "image/webp":
|
|
843
|
+
format = "webp";
|
|
844
|
+
break;
|
|
845
|
+
default:
|
|
846
|
+
throw new Error(`Unknown image type: ${mimeType}`);
|
|
847
|
+
}
|
|
848
|
+
return { source: { bytes: data }, format };
|
|
849
|
+
}
|