@avadisabelle/ava-pi-ai 0.64.9 → 0.65.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/models.d.ts +1 -1
- package/dist/models.d.ts.map +1 -1
- package/dist/models.generated.d.ts +860 -422
- package/dist/models.generated.d.ts.map +1 -1
- package/dist/models.generated.js +966 -513
- package/dist/models.generated.js.map +1 -1
- package/dist/models.js +5 -2
- package/dist/models.js.map +1 -1
- package/dist/providers/amazon-bedrock.d.ts +23 -0
- package/dist/providers/amazon-bedrock.d.ts.map +1 -1
- package/dist/providers/amazon-bedrock.js +138 -33
- package/dist/providers/amazon-bedrock.js.map +1 -1
- package/dist/providers/anthropic.d.ts +16 -2
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +63 -26
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/azure-openai-responses.d.ts.map +1 -1
- package/dist/providers/azure-openai-responses.js +14 -15
- package/dist/providers/azure-openai-responses.js.map +1 -1
- package/dist/providers/faux.d.ts +56 -0
- package/dist/providers/faux.d.ts.map +1 -0
- package/dist/providers/faux.js +368 -0
- package/dist/providers/faux.js.map +1 -0
- package/dist/providers/google-gemini-cli.d.ts.map +1 -1
- package/dist/providers/google-gemini-cli.js +20 -1
- package/dist/providers/google-gemini-cli.js.map +1 -1
- package/dist/providers/google-shared.d.ts.map +1 -1
- package/dist/providers/google-shared.js +28 -1
- package/dist/providers/google-shared.js.map +1 -1
- package/dist/providers/google-vertex.d.ts.map +1 -1
- package/dist/providers/google-vertex.js +20 -2
- package/dist/providers/google-vertex.js.map +1 -1
- package/dist/providers/google.d.ts.map +1 -1
- package/dist/providers/google.js +45 -4
- package/dist/providers/google.js.map +1 -1
- package/dist/providers/mistral.d.ts +3 -0
- package/dist/providers/mistral.d.ts.map +1 -1
- package/dist/providers/mistral.js +37 -4
- package/dist/providers/mistral.js.map +1 -1
- package/dist/providers/openai-codex-responses.d.ts +2 -0
- package/dist/providers/openai-codex-responses.d.ts.map +1 -1
- package/dist/providers/openai-codex-responses.js +49 -12
- package/dist/providers/openai-codex-responses.js.map +1 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +48 -14
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai-responses-shared.d.ts +1 -0
- package/dist/providers/openai-responses-shared.d.ts.map +1 -1
- package/dist/providers/openai-responses-shared.js +39 -10
- package/dist/providers/openai-responses-shared.js.map +1 -1
- package/dist/providers/openai-responses.d.ts.map +1 -1
- package/dist/providers/openai-responses.js +23 -18
- package/dist/providers/openai-responses.js.map +1 -1
- package/dist/providers/simple-options.d.ts.map +1 -1
- package/dist/providers/simple-options.js +4 -1
- package/dist/providers/simple-options.js.map +1 -1
- package/dist/providers/transform-messages.d.ts.map +1 -1
- package/dist/providers/transform-messages.js +20 -32
- package/dist/providers/transform-messages.js.map +1 -1
- package/dist/types.d.ts +82 -4
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/headers.d.ts +2 -0
- package/dist/utils/headers.d.ts.map +1 -0
- package/dist/utils/headers.js +8 -0
- package/dist/utils/headers.js.map +1 -0
- package/dist/utils/overflow.d.ts +4 -3
- package/dist/utils/overflow.d.ts.map +1 -1
- package/dist/utils/overflow.js +28 -13
- package/dist/utils/overflow.js.map +1 -1
- package/package.json +4 -4
package/dist/models.js
CHANGED
|
@@ -32,13 +32,16 @@ export function calculateCost(model, usage) {
|
|
|
32
32
|
*
|
|
33
33
|
* Supported today:
|
|
34
34
|
* - GPT-5.2 / GPT-5.3 / GPT-5.4 model families
|
|
35
|
-
* - Opus 4.6 models (xhigh maps to adaptive effort "max" on Anthropic-compatible providers)
|
|
35
|
+
* - Opus 4.6+ models (xhigh maps to adaptive effort "max" on Anthropic-compatible providers)
|
|
36
36
|
*/
|
|
37
37
|
export function supportsXhigh(model) {
|
|
38
38
|
if (model.id.includes("gpt-5.2") || model.id.includes("gpt-5.3") || model.id.includes("gpt-5.4")) {
|
|
39
39
|
return true;
|
|
40
40
|
}
|
|
41
|
-
if (model.id.includes("opus-4-6") ||
|
|
41
|
+
if (model.id.includes("opus-4-6") ||
|
|
42
|
+
model.id.includes("opus-4.6") ||
|
|
43
|
+
model.id.includes("opus-4-7") ||
|
|
44
|
+
model.id.includes("opus-4.7")) {
|
|
42
45
|
return true;
|
|
43
46
|
}
|
|
44
47
|
return false;
|
package/dist/models.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAG/C,MAAM,aAAa,GAAyC,IAAI,GAAG,EAAE,CAAC;AAEtE,iDAAiD;AACjD,KAAK,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;IACzD,MAAM,cAAc,GAAG,IAAI,GAAG,EAAsB,CAAC;IACrD,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAClD,cAAc,CAAC,GAAG,CAAC,EAAE,EAAE,KAAmB,CAAC,CAAC;IAC7C,CAAC;IACD,aAAa,CAAC,GAAG,CAAC,QAAQ,EAAE,cAAc,CAAC,CAAC;AAC7C,CAAC;AAOD,MAAM,UAAU,QAAQ,CACvB,QAAmB,EACnB,OAAiB,EACsB;IACvC,MAAM,cAAc,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACnD,OAAO,cAAc,EAAE,GAAG,CAAC,OAAiB,CAAyC,CAAC;AAAA,CACtF;AAED,MAAM,UAAU,YAAY,GAAoB;IAC/C,OAAO,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,CAAoB,CAAC;AAAA,CAC3D;AAED,MAAM,UAAU,SAAS,CACxB,QAAmB,EAC8C;IACjE,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC3C,OAAO,MAAM,CAAC,CAAC,CAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAoE,CAAC,CAAC,CAAC,EAAE,CAAC;AAAA,CACrH;AAED,MAAM,UAAU,aAAa,CAAmB,KAAkB,EAAE,KAAY,EAAiB;IAChG,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC;IAC9D,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACjE,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,GAAG,KAAK,CAAC,SAAS,CAAC;IAC1E,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,GAAG,KAAK,CAAC,UAAU,CAAC;IAC7E,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC;IACvG,OAAO,KAAK,CAAC,IAAI,CAAC;AAAA,CAClB;AAED;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAmB,KAAkB,EAAW;IAC5E,IAAI,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAClG,OAAO,IAAI,CAAC;IACb,CAAC;IAED,
|
|
1
|
+
{"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAG/C,MAAM,aAAa,GAAyC,IAAI,GAAG,EAAE,CAAC;AAEtE,iDAAiD;AACjD,KAAK,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;IACzD,MAAM,cAAc,GAAG,IAAI,GAAG,EAAsB,CAAC;IACrD,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAClD,cAAc,CAAC,GAAG,CAAC,EAAE,EAAE,KAAmB,CAAC,CAAC;IAC7C,CAAC;IACD,aAAa,CAAC,GAAG,CAAC,QAAQ,EAAE,cAAc,CAAC,CAAC;AAC7C,CAAC;AAOD,MAAM,UAAU,QAAQ,CACvB,QAAmB,EACnB,OAAiB,EACsB;IACvC,MAAM,cAAc,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACnD,OAAO,cAAc,EAAE,GAAG,CAAC,OAAiB,CAAyC,CAAC;AAAA,CACtF;AAED,MAAM,UAAU,YAAY,GAAoB;IAC/C,OAAO,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,CAAoB,CAAC;AAAA,CAC3D;AAED,MAAM,UAAU,SAAS,CACxB,QAAmB,EAC8C;IACjE,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC3C,OAAO,MAAM,CAAC,CAAC,CAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAoE,CAAC,CAAC,CAAC,EAAE,CAAC;AAAA,CACrH;AAED,MAAM,UAAU,aAAa,CAAmB,KAAkB,EAAE,KAAY,EAAiB;IAChG,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC;IAC9D,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACjE,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,GAAG,KAAK,CAAC,SAAS,CAAC;IAC1E,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,GAAG,KAAK,CAAC,UAAU,CAAC;IAC7E,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC;IACvG,OAAO,KAAK,CAAC,IAAI,CAAC;AAAA,CAClB;AAED;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAmB,KAAkB,EAAW;IAC5E,IAAI,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAClG,OAAO,IAAI,CAAC;IACb,CAAC;IAED,IACC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC7B,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC7B,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC7B,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,EAC5B,CAAC;QACF,OAAO,IAAI,CAAC;IACb,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC7B,CAAiC,EACjC,CAAiC,EACvB;IACV,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC;IAC3B,OAAO,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,QAAQ,CAAC;AAAA,CAClD","sourcesContent":["import { MODELS } from \"./models.generated.js\";\nimport type { Api, KnownProvider, Model, Usage } from \"./types.js\";\n\nconst modelRegistry: Map<string, Map<string, Model<Api>>> = new Map();\n\n// Initialize registry from MODELS on module load\nfor (const [provider, models] of Object.entries(MODELS)) {\n\tconst providerModels = new Map<string, Model<Api>>();\n\tfor (const [id, model] of Object.entries(models)) {\n\t\tproviderModels.set(id, model as Model<Api>);\n\t}\n\tmodelRegistry.set(provider, providerModels);\n}\n\ntype ModelApi<\n\tTProvider extends KnownProvider,\n\tTModelId extends keyof (typeof MODELS)[TProvider],\n> = (typeof MODELS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never;\n\nexport function getModel<TProvider extends KnownProvider, TModelId extends keyof (typeof MODELS)[TProvider]>(\n\tprovider: TProvider,\n\tmodelId: TModelId,\n): Model<ModelApi<TProvider, TModelId>> {\n\tconst providerModels = modelRegistry.get(provider);\n\treturn providerModels?.get(modelId as string) as Model<ModelApi<TProvider, TModelId>>;\n}\n\nexport function getProviders(): KnownProvider[] {\n\treturn Array.from(modelRegistry.keys()) as KnownProvider[];\n}\n\nexport function getModels<TProvider extends KnownProvider>(\n\tprovider: TProvider,\n): Model<ModelApi<TProvider, keyof (typeof MODELS)[TProvider]>>[] {\n\tconst models = modelRegistry.get(provider);\n\treturn models ? (Array.from(models.values()) as Model<ModelApi<TProvider, keyof (typeof MODELS)[TProvider]>>[]) : [];\n}\n\nexport function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage): Usage[\"cost\"] {\n\tusage.cost.input = (model.cost.input / 1000000) * usage.input;\n\tusage.cost.output = (model.cost.output / 1000000) * usage.output;\n\tusage.cost.cacheRead = (model.cost.cacheRead / 1000000) * usage.cacheRead;\n\tusage.cost.cacheWrite = (model.cost.cacheWrite / 1000000) * usage.cacheWrite;\n\tusage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite;\n\treturn usage.cost;\n}\n\n/**\n * Check if a model supports xhigh thinking level.\n *\n * Supported today:\n * - GPT-5.2 / GPT-5.3 / GPT-5.4 model families\n * - Opus 4.6+ models (xhigh maps to adaptive effort \"max\" on Anthropic-compatible providers)\n */\nexport function supportsXhigh<TApi extends Api>(model: Model<TApi>): boolean {\n\tif (model.id.includes(\"gpt-5.2\") || model.id.includes(\"gpt-5.3\") || model.id.includes(\"gpt-5.4\")) {\n\t\treturn true;\n\t}\n\n\tif (\n\t\tmodel.id.includes(\"opus-4-6\") ||\n\t\tmodel.id.includes(\"opus-4.6\") ||\n\t\tmodel.id.includes(\"opus-4-7\") ||\n\t\tmodel.id.includes(\"opus-4.7\")\n\t) {\n\t\treturn true;\n\t}\n\n\treturn false;\n}\n\n/**\n * Check if two models are equal by comparing both their id and provider.\n * Returns false if either model is null or undefined.\n */\nexport function modelsAreEqual<TApi extends Api>(\n\ta: Model<TApi> | null | undefined,\n\tb: Model<TApi> | null | undefined,\n): boolean {\n\tif (!a || !b) return false;\n\treturn a.id === b.id && a.provider === b.provider;\n}\n"]}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { SimpleStreamOptions, StreamFunction, StreamOptions, ThinkingBudgets, ThinkingLevel } from "../types.js";
|
|
2
|
+
export type BedrockThinkingDisplay = "summarized" | "omitted";
|
|
2
3
|
export interface BedrockOptions extends StreamOptions {
|
|
3
4
|
region?: string;
|
|
4
5
|
profile?: string;
|
|
@@ -9,6 +10,28 @@ export interface BedrockOptions extends StreamOptions {
|
|
|
9
10
|
reasoning?: ThinkingLevel;
|
|
10
11
|
thinkingBudgets?: ThinkingBudgets;
|
|
11
12
|
interleavedThinking?: boolean;
|
|
13
|
+
/**
|
|
14
|
+
* Controls how Claude's thinking content is returned in responses.
|
|
15
|
+
* - "summarized": Thinking blocks contain summarized thinking text (default here).
|
|
16
|
+
* - "omitted": Thinking content is redacted but the signature still travels back
|
|
17
|
+
* for multi-turn continuity, reducing time-to-first-text-token.
|
|
18
|
+
*
|
|
19
|
+
* Note: Anthropic's API default for Claude Opus 4.7 and Mythos Preview is
|
|
20
|
+
* "omitted". We default to "summarized" here to keep behavior consistent with
|
|
21
|
+
* older Claude 4 models. Only applies to Claude models on Bedrock.
|
|
22
|
+
*/
|
|
23
|
+
thinkingDisplay?: BedrockThinkingDisplay;
|
|
24
|
+
/** Key-value pairs attached to the inference request for cost allocation tagging.
|
|
25
|
+
* Keys: max 64 chars, no `aws:` prefix. Values: max 256 chars. Max 50 pairs.
|
|
26
|
+
* Tags appear in AWS Cost Explorer split cost allocation data.
|
|
27
|
+
* @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ConverseStream.html */
|
|
28
|
+
requestMetadata?: Record<string, string>;
|
|
29
|
+
/** Bearer token for Bedrock API key authentication.
|
|
30
|
+
* When set, bypasses SigV4 signing and sends Authorization: Bearer <token> instead.
|
|
31
|
+
* Requires `bedrock:CallWithBearerToken` IAM permission on the token's identity.
|
|
32
|
+
* Set via AWS_BEARER_TOKEN_BEDROCK env var or pass directly.
|
|
33
|
+
* @see https://docs.aws.amazon.com/service-authorization/latest/reference/list_amazonbedrock.html */
|
|
34
|
+
bearerToken?: string;
|
|
12
35
|
}
|
|
13
36
|
export declare const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOptions>;
|
|
14
37
|
export declare const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", SimpleStreamOptions>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"amazon-bedrock.d.ts","sourceRoot":"","sources":["../../src/providers/amazon-bedrock.ts"],"names":[],"mappings":"AAuBA,OAAO,KAAK,EAMX,mBAAmB,EAEnB,cAAc,EACd,aAAa,EAEb,eAAe,EAEf,aAAa,EAIb,MAAM,aAAa,CAAC;AAOrB,MAAM,WAAW,cAAe,SAAQ,aAAa;IACpD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IAEtE,SAAS,CAAC,EAAE,aAAa,CAAC;IAE1B,eAAe,CAAC,EAAE,eAAe,CAAC;IAElC,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC9B;AAID,eAAO,MAAM,aAAa,EAAE,cAAc,CAAC,yBAAyB,EAAE,cAAc,CA2JnF,CAAC;AAEF,eAAO,MAAM,mBAAmB,EAAE,cAAc,CAAC,yBAAyB,EAAE,mBAAmB,CA0C9F,CAAC","sourcesContent":["import {\n\tBedrockRuntimeClient,\n\ttype BedrockRuntimeClientConfig,\n\tStopReason as BedrockStopReason,\n\ttype Tool as BedrockTool,\n\tCachePointType,\n\tCacheTTL,\n\ttype ContentBlock,\n\ttype ContentBlockDeltaEvent,\n\ttype ContentBlockStartEvent,\n\ttype ContentBlockStopEvent,\n\tConversationRole,\n\tConverseStreamCommand,\n\ttype ConverseStreamMetadataEvent,\n\tImageFormat,\n\ttype Message,\n\ttype SystemContentBlock,\n\ttype ToolChoice,\n\ttype ToolConfiguration,\n\tToolResultStatus,\n} from \"@aws-sdk/client-bedrock-runtime\";\n\nimport { calculateCost } from \"../models.js\";\nimport type {\n\tApi,\n\tAssistantMessage,\n\tCacheRetention,\n\tContext,\n\tModel,\n\tSimpleStreamOptions,\n\tStopReason,\n\tStreamFunction,\n\tStreamOptions,\n\tTextContent,\n\tThinkingBudgets,\n\tThinkingContent,\n\tThinkingLevel,\n\tTool,\n\tToolCall,\n\tToolResultMessage,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { parseStreamingJson } from \"../utils/json-parse.js\";\nimport { sanitizeSurrogates } from \"../utils/sanitize-unicode.js\";\nimport { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from \"./simple-options.js\";\nimport { transformMessages } from \"./transform-messages.js\";\n\nexport interface BedrockOptions extends StreamOptions {\n\tregion?: string;\n\tprofile?: string;\n\ttoolChoice?: \"auto\" | \"any\" | \"none\" | { type: \"tool\"; name: string };\n\t/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */\n\treasoning?: ThinkingLevel;\n\t/* Custom token budgets per thinking level. Overrides default budgets. */\n\tthinkingBudgets?: ThinkingBudgets;\n\t/* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */\n\tinterleavedThinking?: boolean;\n}\n\ntype Block = (TextContent | ThinkingContent | ToolCall) & { index?: number; partialJson?: string };\n\nexport const streamBedrock: StreamFunction<\"bedrock-converse-stream\", BedrockOptions> = (\n\tmodel: Model<\"bedrock-converse-stream\">,\n\tcontext: Context,\n\toptions: BedrockOptions = {},\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t(async () => {\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: \"bedrock-converse-stream\" as Api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\tconst blocks = output.content as Block[];\n\n\t\tconst config: BedrockRuntimeClientConfig = {\n\t\t\tprofile: options.profile,\n\t\t};\n\n\t\t// in Node.js/Bun environment only\n\t\tif (typeof process !== \"undefined\" && (process.versions?.node || process.versions?.bun)) {\n\t\t\t// Region resolution: explicit option > env vars > SDK default chain.\n\t\t\t// When AWS_PROFILE is set, we leave region undefined so the SDK can\n\t\t\t// resovle it from aws profile configs. Otherwise fall back to us-east-1.\n\t\t\tconst explicitRegion = options.region || process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION;\n\t\t\tif (explicitRegion) {\n\t\t\t\tconfig.region = explicitRegion;\n\t\t\t} else if (!process.env.AWS_PROFILE) {\n\t\t\t\tconfig.region = \"us-east-1\";\n\t\t\t}\n\n\t\t\t// Support proxies that don't need authentication\n\t\t\tif (process.env.AWS_BEDROCK_SKIP_AUTH === \"1\") {\n\t\t\t\tconfig.credentials = {\n\t\t\t\t\taccessKeyId: \"dummy-access-key\",\n\t\t\t\t\tsecretAccessKey: \"dummy-secret-key\",\n\t\t\t\t};\n\t\t\t}\n\n\t\t\tif (\n\t\t\t\tprocess.env.HTTP_PROXY ||\n\t\t\t\tprocess.env.HTTPS_PROXY ||\n\t\t\t\tprocess.env.NO_PROXY ||\n\t\t\t\tprocess.env.http_proxy ||\n\t\t\t\tprocess.env.https_proxy ||\n\t\t\t\tprocess.env.no_proxy\n\t\t\t) {\n\t\t\t\tconst nodeHttpHandler = await import(\"@smithy/node-http-handler\");\n\t\t\t\tconst proxyAgent = await import(\"proxy-agent\");\n\n\t\t\t\tconst agent = new proxyAgent.ProxyAgent();\n\n\t\t\t\t// Bedrock runtime uses NodeHttp2Handler by default since v3.798.0, which is based\n\t\t\t\t// on `http2` module and has no support for http agent.\n\t\t\t\t// Use NodeHttpHandler to support http agent.\n\t\t\t\tconfig.requestHandler = new nodeHttpHandler.NodeHttpHandler({\n\t\t\t\t\thttpAgent: agent,\n\t\t\t\t\thttpsAgent: agent,\n\t\t\t\t});\n\t\t\t} else if (process.env.AWS_BEDROCK_FORCE_HTTP1 === \"1\") {\n\t\t\t\t// Some custom endpoints require HTTP/1.1 instead of HTTP/2\n\t\t\t\tconst nodeHttpHandler = await import(\"@smithy/node-http-handler\");\n\t\t\t\tconfig.requestHandler = new nodeHttpHandler.NodeHttpHandler();\n\t\t\t}\n\t\t} else {\n\t\t\t// Non-Node environment (browser): fall back to us-east-1 since\n\t\t\t// there's no config file resolution available.\n\t\t\tconfig.region = options.region || \"us-east-1\";\n\t\t}\n\n\t\ttry {\n\t\t\tconst client = new BedrockRuntimeClient(config);\n\n\t\t\tconst cacheRetention = resolveCacheRetention(options.cacheRetention);\n\t\t\tlet commandInput = {\n\t\t\t\tmodelId: model.id,\n\t\t\t\tmessages: convertMessages(context, model, cacheRetention),\n\t\t\t\tsystem: buildSystemPrompt(context.systemPrompt, model, cacheRetention),\n\t\t\t\tinferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },\n\t\t\t\ttoolConfig: convertToolConfig(context.tools, options.toolChoice),\n\t\t\t\tadditionalModelRequestFields: buildAdditionalModelRequestFields(model, options),\n\t\t\t};\n\t\t\tconst nextCommandInput = await options?.onPayload?.(commandInput, model);\n\t\t\tif (nextCommandInput !== undefined) {\n\t\t\t\tcommandInput = nextCommandInput as typeof commandInput;\n\t\t\t}\n\t\t\tconst command = new ConverseStreamCommand(commandInput);\n\n\t\t\tconst response = await client.send(command, { abortSignal: options.signal });\n\n\t\t\tfor await (const item of response.stream!) {\n\t\t\t\tif (item.messageStart) {\n\t\t\t\t\tif (item.messageStart.role !== ConversationRole.ASSISTANT) {\n\t\t\t\t\t\tthrow new Error(\"Unexpected assistant message start but got user message start instead\");\n\t\t\t\t\t}\n\t\t\t\t\tstream.push({ type: \"start\", partial: output });\n\t\t\t\t} else if (item.contentBlockStart) {\n\t\t\t\t\thandleContentBlockStart(item.contentBlockStart, blocks, output, stream);\n\t\t\t\t} else if (item.contentBlockDelta) {\n\t\t\t\t\thandleContentBlockDelta(item.contentBlockDelta, blocks, output, stream);\n\t\t\t\t} else if (item.contentBlockStop) {\n\t\t\t\t\thandleContentBlockStop(item.contentBlockStop, blocks, output, stream);\n\t\t\t\t} else if (item.messageStop) {\n\t\t\t\t\toutput.stopReason = mapStopReason(item.messageStop.stopReason);\n\t\t\t\t} else if (item.metadata) {\n\t\t\t\t\thandleMetadata(item.metadata, model, output);\n\t\t\t\t} else if (item.internalServerException) {\n\t\t\t\t\tthrow new Error(`Internal server error: ${item.internalServerException.message}`);\n\t\t\t\t} else if (item.modelStreamErrorException) {\n\t\t\t\t\tthrow new Error(`Model stream error: ${item.modelStreamErrorException.message}`);\n\t\t\t\t} else if (item.validationException) {\n\t\t\t\t\tthrow new Error(`Validation error: ${item.validationException.message}`);\n\t\t\t\t} else if (item.throttlingException) {\n\t\t\t\t\tthrow new Error(`Throttling error: ${item.throttlingException.message}`);\n\t\t\t\t} else if (item.serviceUnavailableException) {\n\t\t\t\t\tthrow new Error(`Service unavailable: ${item.serviceUnavailableException.message}`);\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (options.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"error\" || output.stopReason === \"aborted\") {\n\t\t\t\tthrow new Error(\"An unknown error occurred\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) {\n\t\t\t\tdelete (block as Block).index;\n\t\t\t\tdelete (block as Block).partialJson;\n\t\t\t}\n\t\t\toutput.stopReason = options.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\nexport const streamSimpleBedrock: StreamFunction<\"bedrock-converse-stream\", SimpleStreamOptions> = (\n\tmodel: Model<\"bedrock-converse-stream\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst base = buildBaseOptions(model, options, undefined);\n\tif (!options?.reasoning) {\n\t\treturn streamBedrock(model, context, { ...base, reasoning: undefined } satisfies BedrockOptions);\n\t}\n\n\tif (model.id.includes(\"anthropic.claude\") || model.id.includes(\"anthropic/claude\")) {\n\t\tif (supportsAdaptiveThinking(model.id)) {\n\t\t\treturn streamBedrock(model, context, {\n\t\t\t\t...base,\n\t\t\t\treasoning: options.reasoning,\n\t\t\t\tthinkingBudgets: options.thinkingBudgets,\n\t\t\t} satisfies BedrockOptions);\n\t\t}\n\n\t\tconst adjusted = adjustMaxTokensForThinking(\n\t\t\tbase.maxTokens || 0,\n\t\t\tmodel.maxTokens,\n\t\t\toptions.reasoning,\n\t\t\toptions.thinkingBudgets,\n\t\t);\n\n\t\treturn streamBedrock(model, context, {\n\t\t\t...base,\n\t\t\tmaxTokens: adjusted.maxTokens,\n\t\t\treasoning: options.reasoning,\n\t\t\tthinkingBudgets: {\n\t\t\t\t...(options.thinkingBudgets || {}),\n\t\t\t\t[clampReasoning(options.reasoning)!]: adjusted.thinkingBudget,\n\t\t\t},\n\t\t} satisfies BedrockOptions);\n\t}\n\n\treturn streamBedrock(model, context, {\n\t\t...base,\n\t\treasoning: options.reasoning,\n\t\tthinkingBudgets: options.thinkingBudgets,\n\t} satisfies BedrockOptions);\n};\n\nfunction handleContentBlockStart(\n\tevent: ContentBlockStartEvent,\n\tblocks: Block[],\n\toutput: AssistantMessage,\n\tstream: AssistantMessageEventStream,\n): void {\n\tconst index = event.contentBlockIndex!;\n\tconst start = event.start;\n\n\tif (start?.toolUse) {\n\t\tconst block: Block = {\n\t\t\ttype: \"toolCall\",\n\t\t\tid: start.toolUse.toolUseId || \"\",\n\t\t\tname: start.toolUse.name || \"\",\n\t\t\targuments: {},\n\t\t\tpartialJson: \"\",\n\t\t\tindex,\n\t\t};\n\t\toutput.content.push(block);\n\t\tstream.push({ type: \"toolcall_start\", contentIndex: blocks.length - 1, partial: output });\n\t}\n}\n\nfunction handleContentBlockDelta(\n\tevent: ContentBlockDeltaEvent,\n\tblocks: Block[],\n\toutput: AssistantMessage,\n\tstream: AssistantMessageEventStream,\n): void {\n\tconst contentBlockIndex = event.contentBlockIndex!;\n\tconst delta = event.delta;\n\tlet index = blocks.findIndex((b) => b.index === contentBlockIndex);\n\tlet block = blocks[index];\n\n\tif (delta?.text !== undefined) {\n\t\t// If no text block exists yet, create one, as `handleContentBlockStart` is not sent for text blocks\n\t\tif (!block) {\n\t\t\tconst newBlock: Block = { type: \"text\", text: \"\", index: contentBlockIndex };\n\t\t\toutput.content.push(newBlock);\n\t\t\tindex = blocks.length - 1;\n\t\t\tblock = blocks[index];\n\t\t\tstream.push({ type: \"text_start\", contentIndex: index, partial: output });\n\t\t}\n\t\tif (block.type === \"text\") {\n\t\t\tblock.text += delta.text;\n\t\t\tstream.push({ type: \"text_delta\", contentIndex: index, delta: delta.text, partial: output });\n\t\t}\n\t} else if (delta?.toolUse && block?.type === \"toolCall\") {\n\t\tblock.partialJson = (block.partialJson || \"\") + (delta.toolUse.input || \"\");\n\t\tblock.arguments = parseStreamingJson(block.partialJson);\n\t\tstream.push({ type: \"toolcall_delta\", contentIndex: index, delta: delta.toolUse.input || \"\", partial: output });\n\t} else if (delta?.reasoningContent) {\n\t\tlet thinkingBlock = block;\n\t\tlet thinkingIndex = index;\n\n\t\tif (!thinkingBlock) {\n\t\t\tconst newBlock: Block = { type: \"thinking\", thinking: \"\", thinkingSignature: \"\", index: contentBlockIndex };\n\t\t\toutput.content.push(newBlock);\n\t\t\tthinkingIndex = blocks.length - 1;\n\t\t\tthinkingBlock = blocks[thinkingIndex];\n\t\t\tstream.push({ type: \"thinking_start\", contentIndex: thinkingIndex, partial: output });\n\t\t}\n\n\t\tif (thinkingBlock?.type === \"thinking\") {\n\t\t\tif (delta.reasoningContent.text) {\n\t\t\t\tthinkingBlock.thinking += delta.reasoningContent.text;\n\t\t\t\tstream.push({\n\t\t\t\t\ttype: \"thinking_delta\",\n\t\t\t\t\tcontentIndex: thinkingIndex,\n\t\t\t\t\tdelta: delta.reasoningContent.text,\n\t\t\t\t\tpartial: output,\n\t\t\t\t});\n\t\t\t}\n\t\t\tif (delta.reasoningContent.signature) {\n\t\t\t\tthinkingBlock.thinkingSignature =\n\t\t\t\t\t(thinkingBlock.thinkingSignature || \"\") + delta.reasoningContent.signature;\n\t\t\t}\n\t\t}\n\t}\n}\n\nfunction handleMetadata(\n\tevent: ConverseStreamMetadataEvent,\n\tmodel: Model<\"bedrock-converse-stream\">,\n\toutput: AssistantMessage,\n): void {\n\tif (event.usage) {\n\t\toutput.usage.input = event.usage.inputTokens || 0;\n\t\toutput.usage.output = event.usage.outputTokens || 0;\n\t\toutput.usage.cacheRead = event.usage.cacheReadInputTokens || 0;\n\t\toutput.usage.cacheWrite = event.usage.cacheWriteInputTokens || 0;\n\t\toutput.usage.totalTokens = event.usage.totalTokens || output.usage.input + output.usage.output;\n\t\tcalculateCost(model, output.usage);\n\t}\n}\n\nfunction handleContentBlockStop(\n\tevent: ContentBlockStopEvent,\n\tblocks: Block[],\n\toutput: AssistantMessage,\n\tstream: AssistantMessageEventStream,\n): void {\n\tconst index = blocks.findIndex((b) => b.index === event.contentBlockIndex);\n\tconst block = blocks[index];\n\tif (!block) return;\n\tdelete (block as Block).index;\n\n\tswitch (block.type) {\n\t\tcase \"text\":\n\t\t\tstream.push({ type: \"text_end\", contentIndex: index, content: block.text, partial: output });\n\t\t\tbreak;\n\t\tcase \"thinking\":\n\t\t\tstream.push({ type: \"thinking_end\", contentIndex: index, content: block.thinking, partial: output });\n\t\t\tbreak;\n\t\tcase \"toolCall\":\n\t\t\tblock.arguments = parseStreamingJson(block.partialJson);\n\t\t\tdelete (block as Block).partialJson;\n\t\t\tstream.push({ type: \"toolcall_end\", contentIndex: index, toolCall: block, partial: output });\n\t\t\tbreak;\n\t}\n}\n\n/**\n * Check if the model supports adaptive thinking (Opus 4.6 and Sonnet 4.6).\n */\nfunction supportsAdaptiveThinking(modelId: string): boolean {\n\treturn (\n\t\tmodelId.includes(\"opus-4-6\") ||\n\t\tmodelId.includes(\"opus-4.6\") ||\n\t\tmodelId.includes(\"sonnet-4-6\") ||\n\t\tmodelId.includes(\"sonnet-4.6\")\n\t);\n}\n\nfunction mapThinkingLevelToEffort(\n\tlevel: SimpleStreamOptions[\"reasoning\"],\n\tmodelId: string,\n): \"low\" | \"medium\" | \"high\" | \"max\" {\n\tswitch (level) {\n\t\tcase \"minimal\":\n\t\tcase \"low\":\n\t\t\treturn \"low\";\n\t\tcase \"medium\":\n\t\t\treturn \"medium\";\n\t\tcase \"high\":\n\t\t\treturn \"high\";\n\t\tcase \"xhigh\":\n\t\t\treturn modelId.includes(\"opus-4-6\") || modelId.includes(\"opus-4.6\") ? \"max\" : \"high\";\n\t\tdefault:\n\t\t\treturn \"high\";\n\t}\n}\n\n/**\n * Resolve cache retention preference.\n * Defaults to \"short\" and uses PI_CACHE_RETENTION for backward compatibility.\n */\nfunction resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {\n\tif (cacheRetention) {\n\t\treturn cacheRetention;\n\t}\n\tif (typeof process !== \"undefined\" && process.env.PI_CACHE_RETENTION === \"long\") {\n\t\treturn \"long\";\n\t}\n\treturn \"short\";\n}\n\n/**\n * Check if the model supports prompt caching.\n * Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models\n *\n * For base models and system-defined inference profiles the model ID / ARN\n * contains the model name, so we can decide locally.\n *\n * For application inference profiles (whose ARNs don't contain the model name),\n * set AWS_BEDROCK_FORCE_CACHE=1 to enable cache points. Amazon Nova models\n * have automatic caching and don't need explicit cache points.\n */\nfunction supportsPromptCaching(model: Model<\"bedrock-converse-stream\">): boolean {\n\tconst id = model.id.toLowerCase();\n\tif (!id.includes(\"claude\")) {\n\t\t// Application inference profiles don't contain the model name in the ARN.\n\t\t// Allow users to force cache points via environment variable.\n\t\tif (typeof process !== \"undefined\" && process.env.AWS_BEDROCK_FORCE_CACHE === \"1\") return true;\n\t\treturn false;\n\t}\n\t// Claude 4.x models (opus-4, sonnet-4, haiku-4)\n\tif (id.includes(\"-4-\") || id.includes(\"-4.\")) return true;\n\t// Claude 3.7 Sonnet\n\tif (id.includes(\"claude-3-7-sonnet\")) return true;\n\t// Claude 3.5 Haiku\n\tif (id.includes(\"claude-3-5-haiku\")) return true;\n\treturn false;\n}\n\n/**\n * Check if the model supports thinking signatures in reasoningContent.\n * Only Anthropic Claude models support the signature field.\n * Other models (OpenAI, Qwen, Minimax, Moonshot, etc.) reject it with:\n * \"This model doesn't support the reasoningContent.reasoningText.signature field\"\n */\nfunction supportsThinkingSignature(model: Model<\"bedrock-converse-stream\">): boolean {\n\tconst id = model.id.toLowerCase();\n\treturn id.includes(\"anthropic.claude\") || id.includes(\"anthropic/claude\");\n}\n\nfunction buildSystemPrompt(\n\tsystemPrompt: string | undefined,\n\tmodel: Model<\"bedrock-converse-stream\">,\n\tcacheRetention: CacheRetention,\n): SystemContentBlock[] | undefined {\n\tif (!systemPrompt) return undefined;\n\n\tconst blocks: SystemContentBlock[] = [{ text: sanitizeSurrogates(systemPrompt) }];\n\n\t// Add cache point for supported Claude models when caching is enabled\n\tif (cacheRetention !== \"none\" && supportsPromptCaching(model)) {\n\t\tblocks.push({\n\t\t\tcachePoint: { type: CachePointType.DEFAULT, ...(cacheRetention === \"long\" ? { ttl: CacheTTL.ONE_HOUR } : {}) },\n\t\t});\n\t}\n\n\treturn blocks;\n}\n\nfunction normalizeToolCallId(id: string): string {\n\tconst sanitized = id.replace(/[^a-zA-Z0-9_-]/g, \"_\");\n\treturn sanitized.length > 64 ? sanitized.slice(0, 64) : sanitized;\n}\n\nfunction convertMessages(\n\tcontext: Context,\n\tmodel: Model<\"bedrock-converse-stream\">,\n\tcacheRetention: CacheRetention,\n): Message[] {\n\tconst result: Message[] = [];\n\tconst transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);\n\n\tfor (let i = 0; i < transformedMessages.length; i++) {\n\t\tconst m = transformedMessages[i];\n\n\t\tswitch (m.role) {\n\t\t\tcase \"user\":\n\t\t\t\tresult.push({\n\t\t\t\t\trole: ConversationRole.USER,\n\t\t\t\t\tcontent:\n\t\t\t\t\t\ttypeof m.content === \"string\"\n\t\t\t\t\t\t\t? [{ text: sanitizeSurrogates(m.content) }]\n\t\t\t\t\t\t\t: m.content.map((c) => {\n\t\t\t\t\t\t\t\t\tswitch (c.type) {\n\t\t\t\t\t\t\t\t\t\tcase \"text\":\n\t\t\t\t\t\t\t\t\t\t\treturn { text: sanitizeSurrogates(c.text) };\n\t\t\t\t\t\t\t\t\t\tcase \"image\":\n\t\t\t\t\t\t\t\t\t\t\treturn { image: createImageBlock(c.mimeType, c.data) };\n\t\t\t\t\t\t\t\t\t\tdefault:\n\t\t\t\t\t\t\t\t\t\t\tthrow new Error(\"Unknown user content type\");\n\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t}),\n\t\t\t\t});\n\t\t\t\tbreak;\n\t\t\tcase \"assistant\": {\n\t\t\t\t// Skip assistant messages with empty content (e.g., from aborted requests)\n\t\t\t\t// Bedrock rejects messages with empty content arrays\n\t\t\t\tif (m.content.length === 0) {\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t\tconst contentBlocks: ContentBlock[] = [];\n\t\t\t\tfor (const c of m.content) {\n\t\t\t\t\tswitch (c.type) {\n\t\t\t\t\t\tcase \"text\":\n\t\t\t\t\t\t\t// Skip empty text blocks\n\t\t\t\t\t\t\tif (c.text.trim().length === 0) continue;\n\t\t\t\t\t\t\tcontentBlocks.push({ text: sanitizeSurrogates(c.text) });\n\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\tcase \"toolCall\":\n\t\t\t\t\t\t\tcontentBlocks.push({\n\t\t\t\t\t\t\t\ttoolUse: { toolUseId: c.id, name: c.name, input: c.arguments },\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\tcase \"thinking\":\n\t\t\t\t\t\t\t// Skip empty thinking blocks\n\t\t\t\t\t\t\tif (c.thinking.trim().length === 0) continue;\n\t\t\t\t\t\t\t// Only Anthropic models support the signature field in reasoningText.\n\t\t\t\t\t\t\t// For other models, we omit the signature to avoid errors like:\n\t\t\t\t\t\t\t// \"This model doesn't support the reasoningContent.reasoningText.signature field\"\n\t\t\t\t\t\t\tif (supportsThinkingSignature(model)) {\n\t\t\t\t\t\t\t\t// Signatures arrive after thinking deltas. If a partial or externally\n\t\t\t\t\t\t\t\t// persisted message lacks a signature, Bedrock rejects the replayed\n\t\t\t\t\t\t\t\t// reasoning block. Fall back to plain text, matching Anthropic.\n\t\t\t\t\t\t\t\tif (!c.thinkingSignature || c.thinkingSignature.trim().length === 0) {\n\t\t\t\t\t\t\t\t\tcontentBlocks.push({ text: sanitizeSurrogates(c.thinking) });\n\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\tcontentBlocks.push({\n\t\t\t\t\t\t\t\t\t\treasoningContent: {\n\t\t\t\t\t\t\t\t\t\t\treasoningText: {\n\t\t\t\t\t\t\t\t\t\t\t\ttext: sanitizeSurrogates(c.thinking),\n\t\t\t\t\t\t\t\t\t\t\t\tsignature: c.thinkingSignature,\n\t\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\tcontentBlocks.push({\n\t\t\t\t\t\t\t\t\treasoningContent: {\n\t\t\t\t\t\t\t\t\t\treasoningText: { text: sanitizeSurrogates(c.thinking) },\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\tdefault:\n\t\t\t\t\t\t\tthrow new Error(\"Unknown assistant content type\");\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\t// Skip if all content blocks were filtered out\n\t\t\t\tif (contentBlocks.length === 0) {\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t\tresult.push({\n\t\t\t\t\trole: ConversationRole.ASSISTANT,\n\t\t\t\t\tcontent: contentBlocks,\n\t\t\t\t});\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tcase \"toolResult\": {\n\t\t\t\t// Collect all consecutive toolResult messages into a single user message\n\t\t\t\t// Bedrock requires all tool results to be in one message\n\t\t\t\tconst toolResults: ContentBlock.ToolResultMember[] = [];\n\n\t\t\t\t// Add current tool result with all content blocks combined\n\t\t\t\ttoolResults.push({\n\t\t\t\t\ttoolResult: {\n\t\t\t\t\t\ttoolUseId: m.toolCallId,\n\t\t\t\t\t\tcontent: m.content.map((c) =>\n\t\t\t\t\t\t\tc.type === \"image\"\n\t\t\t\t\t\t\t\t? { image: createImageBlock(c.mimeType, c.data) }\n\t\t\t\t\t\t\t\t: { text: sanitizeSurrogates(c.text) },\n\t\t\t\t\t\t),\n\t\t\t\t\t\tstatus: m.isError ? ToolResultStatus.ERROR : ToolResultStatus.SUCCESS,\n\t\t\t\t\t},\n\t\t\t\t});\n\n\t\t\t\t// Look ahead for consecutive toolResult messages\n\t\t\t\tlet j = i + 1;\n\t\t\t\twhile (j < transformedMessages.length && transformedMessages[j].role === \"toolResult\") {\n\t\t\t\t\tconst nextMsg = transformedMessages[j] as ToolResultMessage;\n\t\t\t\t\ttoolResults.push({\n\t\t\t\t\t\ttoolResult: {\n\t\t\t\t\t\t\ttoolUseId: nextMsg.toolCallId,\n\t\t\t\t\t\t\tcontent: nextMsg.content.map((c) =>\n\t\t\t\t\t\t\t\tc.type === \"image\"\n\t\t\t\t\t\t\t\t\t? { image: createImageBlock(c.mimeType, c.data) }\n\t\t\t\t\t\t\t\t\t: { text: sanitizeSurrogates(c.text) },\n\t\t\t\t\t\t\t),\n\t\t\t\t\t\t\tstatus: nextMsg.isError ? ToolResultStatus.ERROR : ToolResultStatus.SUCCESS,\n\t\t\t\t\t\t},\n\t\t\t\t\t});\n\t\t\t\t\tj++;\n\t\t\t\t}\n\n\t\t\t\t// Skip the messages we've already processed\n\t\t\t\ti = j - 1;\n\n\t\t\t\tresult.push({\n\t\t\t\t\trole: ConversationRole.USER,\n\t\t\t\t\tcontent: toolResults,\n\t\t\t\t});\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tdefault:\n\t\t\t\tthrow new Error(\"Unknown message role\");\n\t\t}\n\t}\n\n\t// Add cache point to the last user message for supported Claude models when caching is enabled\n\tif (cacheRetention !== \"none\" && supportsPromptCaching(model) && result.length > 0) {\n\t\tconst lastMessage = result[result.length - 1];\n\t\tif (lastMessage.role === ConversationRole.USER && lastMessage.content) {\n\t\t\t(lastMessage.content as ContentBlock[]).push({\n\t\t\t\tcachePoint: {\n\t\t\t\t\ttype: CachePointType.DEFAULT,\n\t\t\t\t\t...(cacheRetention === \"long\" ? { ttl: CacheTTL.ONE_HOUR } : {}),\n\t\t\t\t},\n\t\t\t});\n\t\t}\n\t}\n\n\treturn result;\n}\n\nfunction convertToolConfig(\n\ttools: Tool[] | undefined,\n\ttoolChoice: BedrockOptions[\"toolChoice\"],\n): ToolConfiguration | undefined {\n\tif (!tools?.length || toolChoice === \"none\") return undefined;\n\n\tconst bedrockTools: BedrockTool[] = tools.map((tool) => ({\n\t\ttoolSpec: {\n\t\t\tname: tool.name,\n\t\t\tdescription: tool.description,\n\t\t\tinputSchema: { json: tool.parameters },\n\t\t},\n\t}));\n\n\tlet bedrockToolChoice: ToolChoice | undefined;\n\tswitch (toolChoice) {\n\t\tcase \"auto\":\n\t\t\tbedrockToolChoice = { auto: {} };\n\t\t\tbreak;\n\t\tcase \"any\":\n\t\t\tbedrockToolChoice = { any: {} };\n\t\t\tbreak;\n\t\tdefault:\n\t\t\tif (toolChoice?.type === \"tool\") {\n\t\t\t\tbedrockToolChoice = { tool: { name: toolChoice.name } };\n\t\t\t}\n\t}\n\n\treturn { tools: bedrockTools, toolChoice: bedrockToolChoice };\n}\n\nfunction mapStopReason(reason: string | undefined): StopReason {\n\tswitch (reason) {\n\t\tcase BedrockStopReason.END_TURN:\n\t\tcase BedrockStopReason.STOP_SEQUENCE:\n\t\t\treturn \"stop\";\n\t\tcase BedrockStopReason.MAX_TOKENS:\n\t\tcase BedrockStopReason.MODEL_CONTEXT_WINDOW_EXCEEDED:\n\t\t\treturn \"length\";\n\t\tcase BedrockStopReason.TOOL_USE:\n\t\t\treturn \"toolUse\";\n\t\tdefault:\n\t\t\treturn \"error\";\n\t}\n}\n\nfunction buildAdditionalModelRequestFields(\n\tmodel: Model<\"bedrock-converse-stream\">,\n\toptions: BedrockOptions,\n): Record<string, any> | undefined {\n\tif (!options.reasoning || !model.reasoning) {\n\t\treturn undefined;\n\t}\n\n\tif (model.id.includes(\"anthropic.claude\") || model.id.includes(\"anthropic/claude\")) {\n\t\tconst result: Record<string, any> = supportsAdaptiveThinking(model.id)\n\t\t\t? {\n\t\t\t\t\tthinking: { type: \"adaptive\" },\n\t\t\t\t\toutput_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) },\n\t\t\t\t}\n\t\t\t: (() => {\n\t\t\t\t\tconst defaultBudgets: Record<ThinkingLevel, number> = {\n\t\t\t\t\t\tminimal: 1024,\n\t\t\t\t\t\tlow: 2048,\n\t\t\t\t\t\tmedium: 8192,\n\t\t\t\t\t\thigh: 16384,\n\t\t\t\t\t\txhigh: 16384, // Claude doesn't support xhigh, clamp to high\n\t\t\t\t\t};\n\n\t\t\t\t\t// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)\n\t\t\t\t\tconst level = options.reasoning === \"xhigh\" ? \"high\" : options.reasoning;\n\t\t\t\t\tconst budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];\n\n\t\t\t\t\treturn {\n\t\t\t\t\t\tthinking: {\n\t\t\t\t\t\t\ttype: \"enabled\",\n\t\t\t\t\t\t\tbudget_tokens: budget,\n\t\t\t\t\t\t},\n\t\t\t\t\t};\n\t\t\t\t})();\n\n\t\tif (!supportsAdaptiveThinking(model.id) && (options.interleavedThinking ?? true)) {\n\t\t\tresult.anthropic_beta = [\"interleaved-thinking-2025-05-14\"];\n\t\t}\n\n\t\treturn result;\n\t}\n\n\treturn undefined;\n}\n\nfunction createImageBlock(mimeType: string, data: string) {\n\tlet format: ImageFormat;\n\tswitch (mimeType) {\n\t\tcase \"image/jpeg\":\n\t\tcase \"image/jpg\":\n\t\t\tformat = ImageFormat.JPEG;\n\t\t\tbreak;\n\t\tcase \"image/png\":\n\t\t\tformat = ImageFormat.PNG;\n\t\t\tbreak;\n\t\tcase \"image/gif\":\n\t\t\tformat = ImageFormat.GIF;\n\t\t\tbreak;\n\t\tcase \"image/webp\":\n\t\t\tformat = ImageFormat.WEBP;\n\t\t\tbreak;\n\t\tdefault:\n\t\t\tthrow new Error(`Unknown image type: ${mimeType}`);\n\t}\n\n\tconst binaryString = atob(data);\n\tconst bytes = new Uint8Array(binaryString.length);\n\tfor (let i = 0; i < binaryString.length; i++) {\n\t\tbytes[i] = binaryString.charCodeAt(i);\n\t}\n\n\treturn { source: { bytes }, format };\n}\n"]}
|
|
1
|
+
{"version":3,"file":"amazon-bedrock.d.ts","sourceRoot":"","sources":["../../src/providers/amazon-bedrock.ts"],"names":[],"mappings":"AAuBA,OAAO,KAAK,EAMX,mBAAmB,EAEnB,cAAc,EACd,aAAa,EAEb,eAAe,EAEf,aAAa,EAIb,MAAM,aAAa,CAAC;AAOrB,MAAM,MAAM,sBAAsB,GAAG,YAAY,GAAG,SAAS,CAAC;AAE9D,MAAM,WAAW,cAAe,SAAQ,aAAa;IACpD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IAEtE,SAAS,CAAC,EAAE,aAAa,CAAC;IAE1B,eAAe,CAAC,EAAE,eAAe,CAAC;IAElC,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B;;;;;;;;;OASG;IACH,eAAe,CAAC,EAAE,sBAAsB,CAAC;IACzC;;;sGAGkG;IAClG,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACzC;;;;yGAIqG;IACrG,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAID,eAAO,MAAM,aAAa,EAAE,cAAc,CAAC,yBAAyB,EAAE,cAAc,CAqLnF,CAAC;AAgCF,eAAO,MAAM,mBAAmB,EAAE,cAAc,CAAC,yBAAyB,EAAE,mBAAmB,CA0C9F,CAAC","sourcesContent":["import {\n\tBedrockRuntimeClient,\n\ttype BedrockRuntimeClientConfig,\n\tBedrockRuntimeServiceException,\n\tStopReason as BedrockStopReason,\n\ttype Tool as BedrockTool,\n\tCachePointType,\n\tCacheTTL,\n\ttype ContentBlock,\n\ttype ContentBlockDeltaEvent,\n\ttype ContentBlockStartEvent,\n\ttype ContentBlockStopEvent,\n\tConversationRole,\n\tConverseStreamCommand,\n\ttype ConverseStreamMetadataEvent,\n\tImageFormat,\n\ttype Message,\n\ttype SystemContentBlock,\n\ttype ToolChoice,\n\ttype ToolConfiguration,\n\tToolResultStatus,\n} from \"@aws-sdk/client-bedrock-runtime\";\nimport { calculateCost } from \"../models.js\";\nimport type {\n\tApi,\n\tAssistantMessage,\n\tCacheRetention,\n\tContext,\n\tModel,\n\tSimpleStreamOptions,\n\tStopReason,\n\tStreamFunction,\n\tStreamOptions,\n\tTextContent,\n\tThinkingBudgets,\n\tThinkingContent,\n\tThinkingLevel,\n\tTool,\n\tToolCall,\n\tToolResultMessage,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { parseStreamingJson } from \"../utils/json-parse.js\";\nimport { sanitizeSurrogates } from \"../utils/sanitize-unicode.js\";\nimport { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from \"./simple-options.js\";\nimport { transformMessages } from \"./transform-messages.js\";\n\nexport type BedrockThinkingDisplay = \"summarized\" | \"omitted\";\n\nexport interface BedrockOptions extends StreamOptions {\n\tregion?: string;\n\tprofile?: string;\n\ttoolChoice?: \"auto\" | \"any\" | \"none\" | { type: \"tool\"; name: string };\n\t/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */\n\treasoning?: ThinkingLevel;\n\t/* Custom token budgets per thinking level. Overrides default budgets. */\n\tthinkingBudgets?: ThinkingBudgets;\n\t/* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */\n\tinterleavedThinking?: boolean;\n\t/**\n\t * Controls how Claude's thinking content is returned in responses.\n\t * - \"summarized\": Thinking blocks contain summarized thinking text (default here).\n\t * - \"omitted\": Thinking content is redacted but the signature still travels back\n\t * for multi-turn continuity, reducing time-to-first-text-token.\n\t *\n\t * Note: Anthropic's API default for Claude Opus 4.7 and Mythos Preview is\n\t * \"omitted\". We default to \"summarized\" here to keep behavior consistent with\n\t * older Claude 4 models. Only applies to Claude models on Bedrock.\n\t */\n\tthinkingDisplay?: BedrockThinkingDisplay;\n\t/** Key-value pairs attached to the inference request for cost allocation tagging.\n\t * Keys: max 64 chars, no `aws:` prefix. Values: max 256 chars. Max 50 pairs.\n\t * Tags appear in AWS Cost Explorer split cost allocation data.\n\t * @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ConverseStream.html */\n\trequestMetadata?: Record<string, string>;\n\t/** Bearer token for Bedrock API key authentication.\n\t * When set, bypasses SigV4 signing and sends Authorization: Bearer <token> instead.\n\t * Requires `bedrock:CallWithBearerToken` IAM permission on the token's identity.\n\t * Set via AWS_BEARER_TOKEN_BEDROCK env var or pass directly.\n\t * @see https://docs.aws.amazon.com/service-authorization/latest/reference/list_amazonbedrock.html */\n\tbearerToken?: string;\n}\n\ntype Block = (TextContent | ThinkingContent | ToolCall) & { index?: number; partialJson?: string };\n\nexport const streamBedrock: StreamFunction<\"bedrock-converse-stream\", BedrockOptions> = (\n\tmodel: Model<\"bedrock-converse-stream\">,\n\tcontext: Context,\n\toptions: BedrockOptions = {},\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t(async () => {\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: \"bedrock-converse-stream\" as Api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\tconst blocks = output.content as Block[];\n\n\t\tconst config: BedrockRuntimeClientConfig = {\n\t\t\tprofile: options.profile,\n\t\t};\n\n\t\t// Pass custom endpoint when the model has a non-default baseUrl.\n\t\t// This enables VPC endpoints, proxy setups, and custom routing.\n\t\tif (model.baseUrl) {\n\t\t\tconfig.endpoint = model.baseUrl;\n\t\t}\n\n\t\t// Resolve bearer token for Bedrock API key auth.\n\t\tconst bearerToken = options.bearerToken || process.env.AWS_BEARER_TOKEN_BEDROCK || undefined;\n\t\tconst useBearerToken = bearerToken !== undefined && process.env.AWS_BEDROCK_SKIP_AUTH !== \"1\";\n\n\t\t// in Node.js/Bun environment only\n\t\tif (typeof process !== \"undefined\" && (process.versions?.node || process.versions?.bun)) {\n\t\t\t// Region resolution: explicit option > env vars > SDK default chain.\n\t\t\t// When AWS_PROFILE is set, we leave region undefined so the SDK can\n\t\t\t// resovle it from aws profile configs. Otherwise fall back to us-east-1.\n\t\t\tconst explicitRegion = options.region || process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION;\n\t\t\tif (explicitRegion) {\n\t\t\t\tconfig.region = explicitRegion;\n\t\t\t} else if (!process.env.AWS_PROFILE) {\n\t\t\t\tconfig.region = \"us-east-1\";\n\t\t\t}\n\n\t\t\t// Support proxies that don't need authentication\n\t\t\tif (process.env.AWS_BEDROCK_SKIP_AUTH === \"1\") {\n\t\t\t\tconfig.credentials = {\n\t\t\t\t\taccessKeyId: \"dummy-access-key\",\n\t\t\t\t\tsecretAccessKey: \"dummy-secret-key\",\n\t\t\t\t};\n\t\t\t}\n\n\t\t\tif (\n\t\t\t\tprocess.env.HTTP_PROXY ||\n\t\t\t\tprocess.env.HTTPS_PROXY ||\n\t\t\t\tprocess.env.NO_PROXY ||\n\t\t\t\tprocess.env.http_proxy ||\n\t\t\t\tprocess.env.https_proxy ||\n\t\t\t\tprocess.env.no_proxy\n\t\t\t) {\n\t\t\t\tconst nodeHttpHandler = await import(\"@smithy/node-http-handler\");\n\t\t\t\tconst proxyAgent = await import(\"proxy-agent\");\n\n\t\t\t\tconst agent = new proxyAgent.ProxyAgent();\n\n\t\t\t\t// Bedrock runtime uses NodeHttp2Handler by default since v3.798.0, which is based\n\t\t\t\t// on `http2` module and has no support for http agent.\n\t\t\t\t// Use NodeHttpHandler to support http agent.\n\t\t\t\tconfig.requestHandler = new nodeHttpHandler.NodeHttpHandler({\n\t\t\t\t\thttpAgent: agent,\n\t\t\t\t\thttpsAgent: agent,\n\t\t\t\t});\n\t\t\t} else if (process.env.AWS_BEDROCK_FORCE_HTTP1 === \"1\") {\n\t\t\t\t// Some custom endpoints require HTTP/1.1 instead of HTTP/2\n\t\t\t\tconst nodeHttpHandler = await import(\"@smithy/node-http-handler\");\n\t\t\t\tconfig.requestHandler = new nodeHttpHandler.NodeHttpHandler();\n\t\t\t}\n\t\t} else {\n\t\t\t// Non-Node environment (browser): fall back to us-east-1 since\n\t\t\t// there's no config file resolution available.\n\t\t\tconfig.region = options.region || \"us-east-1\";\n\t\t}\n\n\t\tif (useBearerToken) {\n\t\t\tconfig.token = { token: bearerToken };\n\t\t\tconfig.authSchemePreference = [\"httpBearerAuth\"];\n\t\t}\n\n\t\ttry {\n\t\t\tconst client = new BedrockRuntimeClient(config);\n\t\t\tconst cacheRetention = resolveCacheRetention(options.cacheRetention);\n\t\t\tlet commandInput = {\n\t\t\t\tmodelId: model.id,\n\t\t\t\tmessages: convertMessages(context, model, cacheRetention),\n\t\t\t\tsystem: buildSystemPrompt(context.systemPrompt, model, cacheRetention),\n\t\t\t\tinferenceConfig: {\n\t\t\t\t\t...(options.maxTokens !== undefined && { maxTokens: options.maxTokens }),\n\t\t\t\t\t...(options.temperature !== undefined && { temperature: options.temperature }),\n\t\t\t\t},\n\t\t\t\ttoolConfig: convertToolConfig(context.tools, options.toolChoice),\n\t\t\t\tadditionalModelRequestFields: buildAdditionalModelRequestFields(model, options),\n\t\t\t\t...(options.requestMetadata !== undefined && { requestMetadata: options.requestMetadata }),\n\t\t\t};\n\t\t\tconst nextCommandInput = await options?.onPayload?.(commandInput, model);\n\t\t\tif (nextCommandInput !== undefined) {\n\t\t\t\tcommandInput = nextCommandInput as typeof commandInput;\n\t\t\t}\n\t\t\tconst command = new ConverseStreamCommand(commandInput);\n\n\t\t\tconst response = await client.send(command, { abortSignal: options.signal });\n\t\t\tif (response.$metadata.httpStatusCode !== undefined) {\n\t\t\t\tconst responseHeaders: Record<string, string> = {};\n\t\t\t\tif (response.$metadata.requestId) {\n\t\t\t\t\tresponseHeaders[\"x-amzn-requestid\"] = response.$metadata.requestId;\n\t\t\t\t}\n\t\t\t\tawait options?.onResponse?.({ status: response.$metadata.httpStatusCode, headers: responseHeaders }, model);\n\t\t\t}\n\n\t\t\tfor await (const item of response.stream!) {\n\t\t\t\tif (item.messageStart) {\n\t\t\t\t\tif (item.messageStart.role !== ConversationRole.ASSISTANT) {\n\t\t\t\t\t\tthrow new Error(\"Unexpected assistant message start but got user message start instead\");\n\t\t\t\t\t}\n\t\t\t\t\tstream.push({ type: \"start\", partial: output });\n\t\t\t\t} else if (item.contentBlockStart) {\n\t\t\t\t\thandleContentBlockStart(item.contentBlockStart, blocks, output, stream);\n\t\t\t\t} else if (item.contentBlockDelta) {\n\t\t\t\t\thandleContentBlockDelta(item.contentBlockDelta, blocks, output, stream);\n\t\t\t\t} else if (item.contentBlockStop) {\n\t\t\t\t\thandleContentBlockStop(item.contentBlockStop, blocks, output, stream);\n\t\t\t\t} else if (item.messageStop) {\n\t\t\t\t\toutput.stopReason = mapStopReason(item.messageStop.stopReason);\n\t\t\t\t} else if (item.metadata) {\n\t\t\t\t\thandleMetadata(item.metadata, model, output);\n\t\t\t\t} else if (item.internalServerException) {\n\t\t\t\t\tthrow item.internalServerException;\n\t\t\t\t} else if (item.modelStreamErrorException) {\n\t\t\t\t\tthrow item.modelStreamErrorException;\n\t\t\t\t} else if (item.validationException) {\n\t\t\t\t\tthrow item.validationException;\n\t\t\t\t} else if (item.throttlingException) {\n\t\t\t\t\tthrow item.throttlingException;\n\t\t\t\t} else if (item.serviceUnavailableException) {\n\t\t\t\t\tthrow item.serviceUnavailableException;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (options.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"error\" || output.stopReason === \"aborted\") {\n\t\t\t\tthrow new Error(\"An unknown error occurred\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) {\n\t\t\t\tdelete (block as Block).index;\n\t\t\t\t// partialJson is only a streaming scratch buffer; never persist it.\n\t\t\t\tdelete (block as Block).partialJson;\n\t\t\t}\n\t\t\toutput.stopReason = options.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = formatBedrockError(error);\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\n/**\n * Human-readable prefixes for Bedrock SDK exception names.\n * The downstream retry logic in agent-session matches patterns like\n * `server.?error` and `service.?unavailable`, so we preserve the legacy\n * prefix format rather than using the raw SDK exception name.\n */\nconst BEDROCK_ERROR_PREFIXES: Record<string, string> = {\n\tInternalServerException: \"Internal server error\",\n\tModelStreamErrorException: \"Model stream error\",\n\tValidationException: \"Validation error\",\n\tThrottlingException: \"Throttling error\",\n\tServiceUnavailableException: \"Service unavailable\",\n};\n\n/**\n * Format a Bedrock error with a human-readable prefix.\n * AWS SDK exceptions (both from `client.send()` and from stream event items)\n * extend BedrockRuntimeServiceException. We map the `.name` to a stable\n * human-readable prefix so downstream consumers (retry logic, context-overflow\n * detection) can distinguish error categories via simple string matching.\n */\nfunction formatBedrockError(error: unknown): string {\n\tconst message = error instanceof Error ? error.message : JSON.stringify(error);\n\tif (error instanceof BedrockRuntimeServiceException) {\n\t\tconst prefix = BEDROCK_ERROR_PREFIXES[error.name] ?? error.name;\n\t\treturn `${prefix}: ${message}`;\n\t}\n\treturn message;\n}\n\nexport const streamSimpleBedrock: StreamFunction<\"bedrock-converse-stream\", SimpleStreamOptions> = (\n\tmodel: Model<\"bedrock-converse-stream\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst base = buildBaseOptions(model, options, undefined);\n\tif (!options?.reasoning) {\n\t\treturn streamBedrock(model, context, { ...base, reasoning: undefined } satisfies BedrockOptions);\n\t}\n\n\tif (isAnthropicClaudeModel(model)) {\n\t\tif (supportsAdaptiveThinking(model.id, model.name)) {\n\t\t\treturn streamBedrock(model, context, {\n\t\t\t\t...base,\n\t\t\t\treasoning: options.reasoning,\n\t\t\t\tthinkingBudgets: options.thinkingBudgets,\n\t\t\t} satisfies BedrockOptions);\n\t\t}\n\n\t\tconst adjusted = adjustMaxTokensForThinking(\n\t\t\tbase.maxTokens || 0,\n\t\t\tmodel.maxTokens,\n\t\t\toptions.reasoning,\n\t\t\toptions.thinkingBudgets,\n\t\t);\n\n\t\treturn streamBedrock(model, context, {\n\t\t\t...base,\n\t\t\tmaxTokens: adjusted.maxTokens,\n\t\t\treasoning: options.reasoning,\n\t\t\tthinkingBudgets: {\n\t\t\t\t...(options.thinkingBudgets || {}),\n\t\t\t\t[clampReasoning(options.reasoning)!]: adjusted.thinkingBudget,\n\t\t\t},\n\t\t} satisfies BedrockOptions);\n\t}\n\n\treturn streamBedrock(model, context, {\n\t\t...base,\n\t\treasoning: options.reasoning,\n\t\tthinkingBudgets: options.thinkingBudgets,\n\t} satisfies BedrockOptions);\n};\n\nfunction handleContentBlockStart(\n\tevent: ContentBlockStartEvent,\n\tblocks: Block[],\n\toutput: AssistantMessage,\n\tstream: AssistantMessageEventStream,\n): void {\n\tconst index = event.contentBlockIndex!;\n\tconst start = event.start;\n\n\tif (start?.toolUse) {\n\t\tconst block: Block = {\n\t\t\ttype: \"toolCall\",\n\t\t\tid: start.toolUse.toolUseId || \"\",\n\t\t\tname: start.toolUse.name || \"\",\n\t\t\targuments: {},\n\t\t\tpartialJson: \"\",\n\t\t\tindex,\n\t\t};\n\t\toutput.content.push(block);\n\t\tstream.push({ type: \"toolcall_start\", contentIndex: blocks.length - 1, partial: output });\n\t}\n}\n\nfunction handleContentBlockDelta(\n\tevent: ContentBlockDeltaEvent,\n\tblocks: Block[],\n\toutput: AssistantMessage,\n\tstream: AssistantMessageEventStream,\n): void {\n\tconst contentBlockIndex = event.contentBlockIndex!;\n\tconst delta = event.delta;\n\tlet index = blocks.findIndex((b) => b.index === contentBlockIndex);\n\tlet block = blocks[index];\n\n\tif (delta?.text !== undefined) {\n\t\t// If no text block exists yet, create one, as `handleContentBlockStart` is not sent for text blocks\n\t\tif (!block) {\n\t\t\tconst newBlock: Block = { type: \"text\", text: \"\", index: contentBlockIndex };\n\t\t\toutput.content.push(newBlock);\n\t\t\tindex = blocks.length - 1;\n\t\t\tblock = blocks[index];\n\t\t\tstream.push({ type: \"text_start\", contentIndex: index, partial: output });\n\t\t}\n\t\tif (block.type === \"text\") {\n\t\t\tblock.text += delta.text;\n\t\t\tstream.push({ type: \"text_delta\", contentIndex: index, delta: delta.text, partial: output });\n\t\t}\n\t} else if (delta?.toolUse && block?.type === \"toolCall\") {\n\t\tblock.partialJson = (block.partialJson || \"\") + (delta.toolUse.input || \"\");\n\t\tblock.arguments = parseStreamingJson(block.partialJson);\n\t\tstream.push({ type: \"toolcall_delta\", contentIndex: index, delta: delta.toolUse.input || \"\", partial: output });\n\t} else if (delta?.reasoningContent) {\n\t\tlet thinkingBlock = block;\n\t\tlet thinkingIndex = index;\n\n\t\tif (!thinkingBlock) {\n\t\t\tconst newBlock: Block = { type: \"thinking\", thinking: \"\", thinkingSignature: \"\", index: contentBlockIndex };\n\t\t\toutput.content.push(newBlock);\n\t\t\tthinkingIndex = blocks.length - 1;\n\t\t\tthinkingBlock = blocks[thinkingIndex];\n\t\t\tstream.push({ type: \"thinking_start\", contentIndex: thinkingIndex, partial: output });\n\t\t}\n\n\t\tif (thinkingBlock?.type === \"thinking\") {\n\t\t\tif (delta.reasoningContent.text) {\n\t\t\t\tthinkingBlock.thinking += delta.reasoningContent.text;\n\t\t\t\tstream.push({\n\t\t\t\t\ttype: \"thinking_delta\",\n\t\t\t\t\tcontentIndex: thinkingIndex,\n\t\t\t\t\tdelta: delta.reasoningContent.text,\n\t\t\t\t\tpartial: output,\n\t\t\t\t});\n\t\t\t}\n\t\t\tif (delta.reasoningContent.signature) {\n\t\t\t\tthinkingBlock.thinkingSignature =\n\t\t\t\t\t(thinkingBlock.thinkingSignature || \"\") + delta.reasoningContent.signature;\n\t\t\t}\n\t\t}\n\t}\n}\n\nfunction handleMetadata(\n\tevent: ConverseStreamMetadataEvent,\n\tmodel: Model<\"bedrock-converse-stream\">,\n\toutput: AssistantMessage,\n): void {\n\tif (event.usage) {\n\t\toutput.usage.input = event.usage.inputTokens || 0;\n\t\toutput.usage.output = event.usage.outputTokens || 0;\n\t\toutput.usage.cacheRead = event.usage.cacheReadInputTokens || 0;\n\t\toutput.usage.cacheWrite = event.usage.cacheWriteInputTokens || 0;\n\t\toutput.usage.totalTokens = event.usage.totalTokens || output.usage.input + output.usage.output;\n\t\tcalculateCost(model, output.usage);\n\t}\n}\n\nfunction handleContentBlockStop(\n\tevent: ContentBlockStopEvent,\n\tblocks: Block[],\n\toutput: AssistantMessage,\n\tstream: AssistantMessageEventStream,\n): void {\n\tconst index = blocks.findIndex((b) => b.index === event.contentBlockIndex);\n\tconst block = blocks[index];\n\tif (!block) return;\n\tdelete (block as Block).index;\n\n\tswitch (block.type) {\n\t\tcase \"text\":\n\t\t\tstream.push({ type: \"text_end\", contentIndex: index, content: block.text, partial: output });\n\t\t\tbreak;\n\t\tcase \"thinking\":\n\t\t\tstream.push({ type: \"thinking_end\", contentIndex: index, content: block.thinking, partial: output });\n\t\t\tbreak;\n\t\tcase \"toolCall\":\n\t\t\tblock.arguments = parseStreamingJson(block.partialJson);\n\t\t\t// Finalize in-place and strip the scratch buffer so replay only\n\t\t\t// carries parsed arguments.\n\t\t\tdelete (block as Block).partialJson;\n\t\t\tstream.push({ type: \"toolcall_end\", contentIndex: index, toolCall: block, partial: output });\n\t\t\tbreak;\n\t}\n}\n\n/**\n * Check if the model supports adaptive thinking (Opus 4.6+, Sonnet 4.6).\n * Checks both model ID and model name to support application inference profiles\n * whose ARNs don't contain the model name.\n */\nfunction supportsAdaptiveThinking(modelId: string, modelName?: string): boolean {\n\tconst candidates = [modelId, modelName]\n\t\t.filter((value): value is string => Boolean(value))\n\t\t.map((value) => value.toLowerCase());\n\treturn candidates.some(\n\t\t(s) =>\n\t\t\ts.includes(\"opus-4-6\") ||\n\t\t\ts.includes(\"opus-4.6\") ||\n\t\t\ts.includes(\"opus 4.6\") ||\n\t\t\ts.includes(\"opus-4-7\") ||\n\t\t\ts.includes(\"opus-4.7\") ||\n\t\t\ts.includes(\"opus 4.7\") ||\n\t\t\ts.includes(\"sonnet-4-6\") ||\n\t\t\ts.includes(\"sonnet-4.6\") ||\n\t\t\ts.includes(\"sonnet 4.6\"),\n\t);\n}\n\nfunction mapThinkingLevelToEffort(\n\tlevel: SimpleStreamOptions[\"reasoning\"],\n\tmodelId: string,\n\tmodelName?: string,\n): \"low\" | \"medium\" | \"high\" | \"xhigh\" | \"max\" {\n\tconst candidates = [modelId, modelName]\n\t\t.filter((value): value is string => Boolean(value))\n\t\t.map((value) => value.toLowerCase());\n\tswitch (level) {\n\t\tcase \"minimal\":\n\t\tcase \"low\":\n\t\t\treturn \"low\";\n\t\tcase \"medium\":\n\t\t\treturn \"medium\";\n\t\tcase \"high\":\n\t\t\treturn \"high\";\n\t\tcase \"xhigh\":\n\t\t\tif (candidates.some((s) => s.includes(\"opus-4-6\") || s.includes(\"opus-4.6\"))) {\n\t\t\t\treturn \"max\";\n\t\t\t}\n\t\t\tif (candidates.some((s) => s.includes(\"opus-4-7\") || s.includes(\"opus-4.7\"))) {\n\t\t\t\treturn \"xhigh\";\n\t\t\t}\n\t\t\treturn \"high\";\n\t\tdefault:\n\t\t\treturn \"high\";\n\t}\n}\n\n/**\n * Resolve cache retention preference.\n * Defaults to \"short\" and uses PI_CACHE_RETENTION for backward compatibility.\n */\nfunction resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {\n\tif (cacheRetention) {\n\t\treturn cacheRetention;\n\t}\n\tif (typeof process !== \"undefined\" && process.env.PI_CACHE_RETENTION === \"long\") {\n\t\treturn \"long\";\n\t}\n\treturn \"short\";\n}\n\n/**\n * Check if the model is an Anthropic Claude model on Bedrock.\n * Checks both model ID and model name to support application inference profiles\n * whose ARNs don't contain the model name.\n */\nfunction isAnthropicClaudeModel(model: Model<\"bedrock-converse-stream\">): boolean {\n\tconst id = model.id.toLowerCase();\n\tconst name = model.name?.toLowerCase() ?? \"\";\n\treturn (\n\t\tid.includes(\"anthropic.claude\") ||\n\t\tid.includes(\"anthropic/claude\") ||\n\t\tname.includes(\"anthropic.claude\") ||\n\t\tname.includes(\"anthropic/claude\") ||\n\t\tname.includes(\"claude\")\n\t);\n}\n\n/**\n * Check if the model supports prompt caching.\n * Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models\n *\n * For base models and system-defined inference profiles the model ID / ARN\n * contains the model name, so we can decide locally.\n *\n * For application inference profiles (whose ARNs don't contain the model name),\n * also checks model.name which is user-controlled via models.json or registerProvider.\n * As a last resort, set AWS_BEDROCK_FORCE_CACHE=1 to enable cache points.\n * Amazon Nova models have automatic caching and don't need explicit cache points.\n */\nfunction supportsPromptCaching(model: Model<\"bedrock-converse-stream\">): boolean {\n\tconst candidates = [model.id.toLowerCase()];\n\tif (model.name) {\n\t\tcandidates.push(model.name.toLowerCase());\n\t}\n\n\tconst hasClaudeRef = candidates.some((s) => s.includes(\"claude\"));\n\tif (!hasClaudeRef) {\n\t\t// Application inference profiles don't contain the model name in the ARN.\n\t\t// Allow users to force cache points via environment variable.\n\t\tif (typeof process !== \"undefined\" && process.env.AWS_BEDROCK_FORCE_CACHE === \"1\") return true;\n\t\treturn false;\n\t}\n\t// Claude 4.x models (opus-4, sonnet-4, haiku-4), including human-readable names like \"Claude Sonnet 4.6\".\n\tif (candidates.some((s) => s.includes(\"-4-\") || s.includes(\"-4.\") || s.includes(\" 4-\") || s.includes(\" 4.\")))\n\t\treturn true;\n\t// Claude 3.7 Sonnet\n\tif (candidates.some((s) => s.includes(\"claude-3-7-sonnet\") || s.includes(\"claude 3.7 sonnet\"))) return true;\n\t// Claude 3.5 Haiku\n\tif (candidates.some((s) => s.includes(\"claude-3-5-haiku\") || s.includes(\"claude 3.5 haiku\"))) return true;\n\treturn false;\n}\n\n/**\n * Check if the model supports thinking signatures in reasoningContent.\n * Only Anthropic Claude models support the signature field.\n * Other models (OpenAI, Qwen, Minimax, Moonshot, etc.) reject it with:\n * \"This model doesn't support the reasoningContent.reasoningText.signature field\"\n *\n * Checks both model ID and model name to support application inference profiles.\n */\nfunction supportsThinkingSignature(model: Model<\"bedrock-converse-stream\">): boolean {\n\treturn isAnthropicClaudeModel(model);\n}\n\nfunction buildSystemPrompt(\n\tsystemPrompt: string | undefined,\n\tmodel: Model<\"bedrock-converse-stream\">,\n\tcacheRetention: CacheRetention,\n): SystemContentBlock[] | undefined {\n\tif (!systemPrompt) return undefined;\n\n\tconst blocks: SystemContentBlock[] = [{ text: sanitizeSurrogates(systemPrompt) }];\n\n\t// Add cache point for supported Claude models when caching is enabled\n\tif (cacheRetention !== \"none\" && supportsPromptCaching(model)) {\n\t\tblocks.push({\n\t\t\tcachePoint: { type: CachePointType.DEFAULT, ...(cacheRetention === \"long\" ? { ttl: CacheTTL.ONE_HOUR } : {}) },\n\t\t});\n\t}\n\n\treturn blocks;\n}\n\nfunction normalizeToolCallId(id: string): string {\n\tconst sanitized = id.replace(/[^a-zA-Z0-9_-]/g, \"_\");\n\treturn sanitized.length > 64 ? sanitized.slice(0, 64) : sanitized;\n}\n\nfunction convertMessages(\n\tcontext: Context,\n\tmodel: Model<\"bedrock-converse-stream\">,\n\tcacheRetention: CacheRetention,\n): Message[] {\n\tconst result: Message[] = [];\n\tconst transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);\n\n\tfor (let i = 0; i < transformedMessages.length; i++) {\n\t\tconst m = transformedMessages[i];\n\n\t\tswitch (m.role) {\n\t\t\tcase \"user\":\n\t\t\t\tresult.push({\n\t\t\t\t\trole: ConversationRole.USER,\n\t\t\t\t\tcontent:\n\t\t\t\t\t\ttypeof m.content === \"string\"\n\t\t\t\t\t\t\t? [{ text: sanitizeSurrogates(m.content) }]\n\t\t\t\t\t\t\t: m.content.map((c) => {\n\t\t\t\t\t\t\t\t\tswitch (c.type) {\n\t\t\t\t\t\t\t\t\t\tcase \"text\":\n\t\t\t\t\t\t\t\t\t\t\treturn { text: sanitizeSurrogates(c.text) };\n\t\t\t\t\t\t\t\t\t\tcase \"image\":\n\t\t\t\t\t\t\t\t\t\t\treturn { image: createImageBlock(c.mimeType, c.data) };\n\t\t\t\t\t\t\t\t\t\tdefault:\n\t\t\t\t\t\t\t\t\t\t\tthrow new Error(\"Unknown user content type\");\n\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t}),\n\t\t\t\t});\n\t\t\t\tbreak;\n\t\t\tcase \"assistant\": {\n\t\t\t\t// Skip assistant messages with empty content (e.g., from aborted requests)\n\t\t\t\t// Bedrock rejects messages with empty content arrays\n\t\t\t\tif (m.content.length === 0) {\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t\tconst contentBlocks: ContentBlock[] = [];\n\t\t\t\tfor (const c of m.content) {\n\t\t\t\t\tswitch (c.type) {\n\t\t\t\t\t\tcase \"text\":\n\t\t\t\t\t\t\t// Skip empty text blocks\n\t\t\t\t\t\t\tif (c.text.trim().length === 0) continue;\n\t\t\t\t\t\t\tcontentBlocks.push({ text: sanitizeSurrogates(c.text) });\n\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\tcase \"toolCall\":\n\t\t\t\t\t\t\tcontentBlocks.push({\n\t\t\t\t\t\t\t\ttoolUse: { toolUseId: c.id, name: c.name, input: c.arguments },\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\tcase \"thinking\":\n\t\t\t\t\t\t\t// Skip empty thinking blocks\n\t\t\t\t\t\t\tif (c.thinking.trim().length === 0) continue;\n\t\t\t\t\t\t\t// Only Anthropic models support the signature field in reasoningText.\n\t\t\t\t\t\t\t// For other models, we omit the signature to avoid errors like:\n\t\t\t\t\t\t\t// \"This model doesn't support the reasoningContent.reasoningText.signature field\"\n\t\t\t\t\t\t\tif (supportsThinkingSignature(model)) {\n\t\t\t\t\t\t\t\t// Signatures arrive after thinking deltas. If a partial or externally\n\t\t\t\t\t\t\t\t// persisted message lacks a signature, Bedrock rejects the replayed\n\t\t\t\t\t\t\t\t// reasoning block. Fall back to plain text, matching Anthropic.\n\t\t\t\t\t\t\t\tif (!c.thinkingSignature || c.thinkingSignature.trim().length === 0) {\n\t\t\t\t\t\t\t\t\tcontentBlocks.push({ text: sanitizeSurrogates(c.thinking) });\n\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\tcontentBlocks.push({\n\t\t\t\t\t\t\t\t\t\treasoningContent: {\n\t\t\t\t\t\t\t\t\t\t\treasoningText: {\n\t\t\t\t\t\t\t\t\t\t\t\ttext: sanitizeSurrogates(c.thinking),\n\t\t\t\t\t\t\t\t\t\t\t\tsignature: c.thinkingSignature,\n\t\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\tcontentBlocks.push({\n\t\t\t\t\t\t\t\t\treasoningContent: {\n\t\t\t\t\t\t\t\t\t\treasoningText: { text: sanitizeSurrogates(c.thinking) },\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\tdefault:\n\t\t\t\t\t\t\tthrow new Error(\"Unknown assistant content type\");\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\t// Skip if all content blocks were filtered out\n\t\t\t\tif (contentBlocks.length === 0) {\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t\tresult.push({\n\t\t\t\t\trole: ConversationRole.ASSISTANT,\n\t\t\t\t\tcontent: contentBlocks,\n\t\t\t\t});\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tcase \"toolResult\": {\n\t\t\t\t// Collect all consecutive toolResult messages into a single user message\n\t\t\t\t// Bedrock requires all tool results to be in one message\n\t\t\t\tconst toolResults: ContentBlock.ToolResultMember[] = [];\n\n\t\t\t\t// Add current tool result with all content blocks combined\n\t\t\t\ttoolResults.push({\n\t\t\t\t\ttoolResult: {\n\t\t\t\t\t\ttoolUseId: m.toolCallId,\n\t\t\t\t\t\tcontent: m.content.map((c) =>\n\t\t\t\t\t\t\tc.type === \"image\"\n\t\t\t\t\t\t\t\t? { image: createImageBlock(c.mimeType, c.data) }\n\t\t\t\t\t\t\t\t: { text: sanitizeSurrogates(c.text) },\n\t\t\t\t\t\t),\n\t\t\t\t\t\tstatus: m.isError ? ToolResultStatus.ERROR : ToolResultStatus.SUCCESS,\n\t\t\t\t\t},\n\t\t\t\t});\n\n\t\t\t\t// Look ahead for consecutive toolResult messages\n\t\t\t\tlet j = i + 1;\n\t\t\t\twhile (j < transformedMessages.length && transformedMessages[j].role === \"toolResult\") {\n\t\t\t\t\tconst nextMsg = transformedMessages[j] as ToolResultMessage;\n\t\t\t\t\ttoolResults.push({\n\t\t\t\t\t\ttoolResult: {\n\t\t\t\t\t\t\ttoolUseId: nextMsg.toolCallId,\n\t\t\t\t\t\t\tcontent: nextMsg.content.map((c) =>\n\t\t\t\t\t\t\t\tc.type === \"image\"\n\t\t\t\t\t\t\t\t\t? { image: createImageBlock(c.mimeType, c.data) }\n\t\t\t\t\t\t\t\t\t: { text: sanitizeSurrogates(c.text) },\n\t\t\t\t\t\t\t),\n\t\t\t\t\t\t\tstatus: nextMsg.isError ? ToolResultStatus.ERROR : ToolResultStatus.SUCCESS,\n\t\t\t\t\t\t},\n\t\t\t\t\t});\n\t\t\t\t\tj++;\n\t\t\t\t}\n\n\t\t\t\t// Skip the messages we've already processed\n\t\t\t\ti = j - 1;\n\n\t\t\t\tresult.push({\n\t\t\t\t\trole: ConversationRole.USER,\n\t\t\t\t\tcontent: toolResults,\n\t\t\t\t});\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tdefault:\n\t\t\t\tthrow new Error(\"Unknown message role\");\n\t\t}\n\t}\n\n\t// Add cache point to the last user message for supported Claude models when caching is enabled\n\tif (cacheRetention !== \"none\" && supportsPromptCaching(model) && result.length > 0) {\n\t\tconst lastMessage = result[result.length - 1];\n\t\tif (lastMessage.role === ConversationRole.USER && lastMessage.content) {\n\t\t\t(lastMessage.content as ContentBlock[]).push({\n\t\t\t\tcachePoint: {\n\t\t\t\t\ttype: CachePointType.DEFAULT,\n\t\t\t\t\t...(cacheRetention === \"long\" ? { ttl: CacheTTL.ONE_HOUR } : {}),\n\t\t\t\t},\n\t\t\t});\n\t\t}\n\t}\n\n\treturn result;\n}\n\nfunction convertToolConfig(\n\ttools: Tool[] | undefined,\n\ttoolChoice: BedrockOptions[\"toolChoice\"],\n): ToolConfiguration | undefined {\n\tif (!tools?.length || toolChoice === \"none\") return undefined;\n\n\tconst bedrockTools: BedrockTool[] = tools.map((tool) => ({\n\t\ttoolSpec: {\n\t\t\tname: tool.name,\n\t\t\tdescription: tool.description,\n\t\t\tinputSchema: { json: tool.parameters },\n\t\t},\n\t}));\n\n\tlet bedrockToolChoice: ToolChoice | undefined;\n\tswitch (toolChoice) {\n\t\tcase \"auto\":\n\t\t\tbedrockToolChoice = { auto: {} };\n\t\t\tbreak;\n\t\tcase \"any\":\n\t\t\tbedrockToolChoice = { any: {} };\n\t\t\tbreak;\n\t\tdefault:\n\t\t\tif (toolChoice?.type === \"tool\") {\n\t\t\t\tbedrockToolChoice = { tool: { name: toolChoice.name } };\n\t\t\t}\n\t}\n\n\treturn { tools: bedrockTools, toolChoice: bedrockToolChoice };\n}\n\nfunction mapStopReason(reason: string | undefined): StopReason {\n\tswitch (reason) {\n\t\tcase BedrockStopReason.END_TURN:\n\t\tcase BedrockStopReason.STOP_SEQUENCE:\n\t\t\treturn \"stop\";\n\t\tcase BedrockStopReason.MAX_TOKENS:\n\t\tcase BedrockStopReason.MODEL_CONTEXT_WINDOW_EXCEEDED:\n\t\t\treturn \"length\";\n\t\tcase BedrockStopReason.TOOL_USE:\n\t\t\treturn \"toolUse\";\n\t\tdefault:\n\t\t\treturn \"error\";\n\t}\n}\n\nfunction isGovCloudBedrockTarget(model: Model<\"bedrock-converse-stream\">, options: BedrockOptions): boolean {\n\tconst region = options.region || process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION;\n\tif (region?.toLowerCase().startsWith(\"us-gov-\")) {\n\t\treturn true;\n\t}\n\n\tconst modelId = model.id.toLowerCase();\n\treturn modelId.startsWith(\"us-gov.\") || modelId.startsWith(\"arn:aws-us-gov:\");\n}\n\nfunction buildAdditionalModelRequestFields(\n\tmodel: Model<\"bedrock-converse-stream\">,\n\toptions: BedrockOptions,\n): Record<string, any> | undefined {\n\tif (!options.reasoning || !model.reasoning) {\n\t\treturn undefined;\n\t}\n\n\tif (isAnthropicClaudeModel(model)) {\n\t\t// GovCloud Bedrock currently rejects the Claude thinking.display field.\n\t\t// Omit it there until the GovCloud Converse schema catches up.\n\t\tconst display = isGovCloudBedrockTarget(model, options) ? undefined : (options.thinkingDisplay ?? \"summarized\");\n\t\tconst result: Record<string, any> = supportsAdaptiveThinking(model.id, model.name)\n\t\t\t? {\n\t\t\t\t\tthinking: { type: \"adaptive\", ...(display !== undefined ? { display } : {}) },\n\t\t\t\t\toutput_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id, model.name) },\n\t\t\t\t}\n\t\t\t: (() => {\n\t\t\t\t\tconst defaultBudgets: Record<ThinkingLevel, number> = {\n\t\t\t\t\t\tminimal: 1024,\n\t\t\t\t\t\tlow: 2048,\n\t\t\t\t\t\tmedium: 8192,\n\t\t\t\t\t\thigh: 16384,\n\t\t\t\t\t\txhigh: 16384, // Claude doesn't support xhigh, clamp to high\n\t\t\t\t\t};\n\n\t\t\t\t\t// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)\n\t\t\t\t\tconst level = options.reasoning === \"xhigh\" ? \"high\" : options.reasoning;\n\t\t\t\t\tconst budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];\n\n\t\t\t\t\treturn {\n\t\t\t\t\t\tthinking: {\n\t\t\t\t\t\t\ttype: \"enabled\",\n\t\t\t\t\t\t\tbudget_tokens: budget,\n\t\t\t\t\t\t\t...(display !== undefined ? { display } : {}),\n\t\t\t\t\t\t},\n\t\t\t\t\t};\n\t\t\t\t})();\n\n\t\tif (!supportsAdaptiveThinking(model.id, model.name) && (options.interleavedThinking ?? true)) {\n\t\t\tresult.anthropic_beta = [\"interleaved-thinking-2025-05-14\"];\n\t\t}\n\n\t\treturn result;\n\t}\n\n\treturn undefined;\n}\n\nfunction createImageBlock(mimeType: string, data: string) {\n\tlet format: ImageFormat;\n\tswitch (mimeType) {\n\t\tcase \"image/jpeg\":\n\t\tcase \"image/jpg\":\n\t\t\tformat = ImageFormat.JPEG;\n\t\t\tbreak;\n\t\tcase \"image/png\":\n\t\t\tformat = ImageFormat.PNG;\n\t\t\tbreak;\n\t\tcase \"image/gif\":\n\t\t\tformat = ImageFormat.GIF;\n\t\t\tbreak;\n\t\tcase \"image/webp\":\n\t\t\tformat = ImageFormat.WEBP;\n\t\t\tbreak;\n\t\tdefault:\n\t\t\tthrow new Error(`Unknown image type: ${mimeType}`);\n\t}\n\n\tconst binaryString = atob(data);\n\tconst bytes = new Uint8Array(binaryString.length);\n\tfor (let i = 0; i < binaryString.length; i++) {\n\t\tbytes[i] = binaryString.charCodeAt(i);\n\t}\n\n\treturn { source: { bytes }, format };\n}\n"]}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { BedrockRuntimeClient, StopReason as BedrockStopReason, CachePointType, CacheTTL, ConversationRole, ConverseStreamCommand, ImageFormat, ToolResultStatus, } from "@aws-sdk/client-bedrock-runtime";
|
|
1
|
+
import { BedrockRuntimeClient, BedrockRuntimeServiceException, StopReason as BedrockStopReason, CachePointType, CacheTTL, ConversationRole, ConverseStreamCommand, ImageFormat, ToolResultStatus, } from "@aws-sdk/client-bedrock-runtime";
|
|
2
2
|
import { calculateCost } from "../models.js";
|
|
3
3
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|
4
4
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
|
@@ -29,6 +29,14 @@ export const streamBedrock = (model, context, options = {}) => {
|
|
|
29
29
|
const config = {
|
|
30
30
|
profile: options.profile,
|
|
31
31
|
};
|
|
32
|
+
// Pass custom endpoint when the model has a non-default baseUrl.
|
|
33
|
+
// This enables VPC endpoints, proxy setups, and custom routing.
|
|
34
|
+
if (model.baseUrl) {
|
|
35
|
+
config.endpoint = model.baseUrl;
|
|
36
|
+
}
|
|
37
|
+
// Resolve bearer token for Bedrock API key auth.
|
|
38
|
+
const bearerToken = options.bearerToken || process.env.AWS_BEARER_TOKEN_BEDROCK || undefined;
|
|
39
|
+
const useBearerToken = bearerToken !== undefined && process.env.AWS_BEDROCK_SKIP_AUTH !== "1";
|
|
32
40
|
// in Node.js/Bun environment only
|
|
33
41
|
if (typeof process !== "undefined" && (process.versions?.node || process.versions?.bun)) {
|
|
34
42
|
// Region resolution: explicit option > env vars > SDK default chain.
|
|
@@ -76,6 +84,10 @@ export const streamBedrock = (model, context, options = {}) => {
|
|
|
76
84
|
// there's no config file resolution available.
|
|
77
85
|
config.region = options.region || "us-east-1";
|
|
78
86
|
}
|
|
87
|
+
if (useBearerToken) {
|
|
88
|
+
config.token = { token: bearerToken };
|
|
89
|
+
config.authSchemePreference = ["httpBearerAuth"];
|
|
90
|
+
}
|
|
79
91
|
try {
|
|
80
92
|
const client = new BedrockRuntimeClient(config);
|
|
81
93
|
const cacheRetention = resolveCacheRetention(options.cacheRetention);
|
|
@@ -83,9 +95,13 @@ export const streamBedrock = (model, context, options = {}) => {
|
|
|
83
95
|
modelId: model.id,
|
|
84
96
|
messages: convertMessages(context, model, cacheRetention),
|
|
85
97
|
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
|
|
86
|
-
inferenceConfig: {
|
|
98
|
+
inferenceConfig: {
|
|
99
|
+
...(options.maxTokens !== undefined && { maxTokens: options.maxTokens }),
|
|
100
|
+
...(options.temperature !== undefined && { temperature: options.temperature }),
|
|
101
|
+
},
|
|
87
102
|
toolConfig: convertToolConfig(context.tools, options.toolChoice),
|
|
88
103
|
additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
|
|
104
|
+
...(options.requestMetadata !== undefined && { requestMetadata: options.requestMetadata }),
|
|
89
105
|
};
|
|
90
106
|
const nextCommandInput = await options?.onPayload?.(commandInput, model);
|
|
91
107
|
if (nextCommandInput !== undefined) {
|
|
@@ -93,6 +109,13 @@ export const streamBedrock = (model, context, options = {}) => {
|
|
|
93
109
|
}
|
|
94
110
|
const command = new ConverseStreamCommand(commandInput);
|
|
95
111
|
const response = await client.send(command, { abortSignal: options.signal });
|
|
112
|
+
if (response.$metadata.httpStatusCode !== undefined) {
|
|
113
|
+
const responseHeaders = {};
|
|
114
|
+
if (response.$metadata.requestId) {
|
|
115
|
+
responseHeaders["x-amzn-requestid"] = response.$metadata.requestId;
|
|
116
|
+
}
|
|
117
|
+
await options?.onResponse?.({ status: response.$metadata.httpStatusCode, headers: responseHeaders }, model);
|
|
118
|
+
}
|
|
96
119
|
for await (const item of response.stream) {
|
|
97
120
|
if (item.messageStart) {
|
|
98
121
|
if (item.messageStart.role !== ConversationRole.ASSISTANT) {
|
|
@@ -116,19 +139,19 @@ export const streamBedrock = (model, context, options = {}) => {
|
|
|
116
139
|
handleMetadata(item.metadata, model, output);
|
|
117
140
|
}
|
|
118
141
|
else if (item.internalServerException) {
|
|
119
|
-
throw
|
|
142
|
+
throw item.internalServerException;
|
|
120
143
|
}
|
|
121
144
|
else if (item.modelStreamErrorException) {
|
|
122
|
-
throw
|
|
145
|
+
throw item.modelStreamErrorException;
|
|
123
146
|
}
|
|
124
147
|
else if (item.validationException) {
|
|
125
|
-
throw
|
|
148
|
+
throw item.validationException;
|
|
126
149
|
}
|
|
127
150
|
else if (item.throttlingException) {
|
|
128
|
-
throw
|
|
151
|
+
throw item.throttlingException;
|
|
129
152
|
}
|
|
130
153
|
else if (item.serviceUnavailableException) {
|
|
131
|
-
throw
|
|
154
|
+
throw item.serviceUnavailableException;
|
|
132
155
|
}
|
|
133
156
|
}
|
|
134
157
|
if (options.signal?.aborted) {
|
|
@@ -143,23 +166,52 @@ export const streamBedrock = (model, context, options = {}) => {
|
|
|
143
166
|
catch (error) {
|
|
144
167
|
for (const block of output.content) {
|
|
145
168
|
delete block.index;
|
|
169
|
+
// partialJson is only a streaming scratch buffer; never persist it.
|
|
146
170
|
delete block.partialJson;
|
|
147
171
|
}
|
|
148
172
|
output.stopReason = options.signal?.aborted ? "aborted" : "error";
|
|
149
|
-
output.errorMessage =
|
|
173
|
+
output.errorMessage = formatBedrockError(error);
|
|
150
174
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
151
175
|
stream.end();
|
|
152
176
|
}
|
|
153
177
|
})();
|
|
154
178
|
return stream;
|
|
155
179
|
};
|
|
180
|
+
/**
|
|
181
|
+
* Human-readable prefixes for Bedrock SDK exception names.
|
|
182
|
+
* The downstream retry logic in agent-session matches patterns like
|
|
183
|
+
* `server.?error` and `service.?unavailable`, so we preserve the legacy
|
|
184
|
+
* prefix format rather than using the raw SDK exception name.
|
|
185
|
+
*/
|
|
186
|
+
const BEDROCK_ERROR_PREFIXES = {
|
|
187
|
+
InternalServerException: "Internal server error",
|
|
188
|
+
ModelStreamErrorException: "Model stream error",
|
|
189
|
+
ValidationException: "Validation error",
|
|
190
|
+
ThrottlingException: "Throttling error",
|
|
191
|
+
ServiceUnavailableException: "Service unavailable",
|
|
192
|
+
};
|
|
193
|
+
/**
|
|
194
|
+
* Format a Bedrock error with a human-readable prefix.
|
|
195
|
+
* AWS SDK exceptions (both from `client.send()` and from stream event items)
|
|
196
|
+
* extend BedrockRuntimeServiceException. We map the `.name` to a stable
|
|
197
|
+
* human-readable prefix so downstream consumers (retry logic, context-overflow
|
|
198
|
+
* detection) can distinguish error categories via simple string matching.
|
|
199
|
+
*/
|
|
200
|
+
function formatBedrockError(error) {
|
|
201
|
+
const message = error instanceof Error ? error.message : JSON.stringify(error);
|
|
202
|
+
if (error instanceof BedrockRuntimeServiceException) {
|
|
203
|
+
const prefix = BEDROCK_ERROR_PREFIXES[error.name] ?? error.name;
|
|
204
|
+
return `${prefix}: ${message}`;
|
|
205
|
+
}
|
|
206
|
+
return message;
|
|
207
|
+
}
|
|
156
208
|
export const streamSimpleBedrock = (model, context, options) => {
|
|
157
209
|
const base = buildBaseOptions(model, options, undefined);
|
|
158
210
|
if (!options?.reasoning) {
|
|
159
211
|
return streamBedrock(model, context, { ...base, reasoning: undefined });
|
|
160
212
|
}
|
|
161
|
-
if (
|
|
162
|
-
if (supportsAdaptiveThinking(model.id)) {
|
|
213
|
+
if (isAnthropicClaudeModel(model)) {
|
|
214
|
+
if (supportsAdaptiveThinking(model.id, model.name)) {
|
|
163
215
|
return streamBedrock(model, context, {
|
|
164
216
|
...base,
|
|
165
217
|
reasoning: options.reasoning,
|
|
@@ -275,21 +327,36 @@ function handleContentBlockStop(event, blocks, output, stream) {
|
|
|
275
327
|
break;
|
|
276
328
|
case "toolCall":
|
|
277
329
|
block.arguments = parseStreamingJson(block.partialJson);
|
|
330
|
+
// Finalize in-place and strip the scratch buffer so replay only
|
|
331
|
+
// carries parsed arguments.
|
|
278
332
|
delete block.partialJson;
|
|
279
333
|
stream.push({ type: "toolcall_end", contentIndex: index, toolCall: block, partial: output });
|
|
280
334
|
break;
|
|
281
335
|
}
|
|
282
336
|
}
|
|
283
337
|
/**
|
|
284
|
-
* Check if the model supports adaptive thinking (Opus 4.6
|
|
338
|
+
* Check if the model supports adaptive thinking (Opus 4.6+, Sonnet 4.6).
|
|
339
|
+
* Checks both model ID and model name to support application inference profiles
|
|
340
|
+
* whose ARNs don't contain the model name.
|
|
285
341
|
*/
|
|
286
|
-
function supportsAdaptiveThinking(modelId) {
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
342
|
+
function supportsAdaptiveThinking(modelId, modelName) {
|
|
343
|
+
const candidates = [modelId, modelName]
|
|
344
|
+
.filter((value) => Boolean(value))
|
|
345
|
+
.map((value) => value.toLowerCase());
|
|
346
|
+
return candidates.some((s) => s.includes("opus-4-6") ||
|
|
347
|
+
s.includes("opus-4.6") ||
|
|
348
|
+
s.includes("opus 4.6") ||
|
|
349
|
+
s.includes("opus-4-7") ||
|
|
350
|
+
s.includes("opus-4.7") ||
|
|
351
|
+
s.includes("opus 4.7") ||
|
|
352
|
+
s.includes("sonnet-4-6") ||
|
|
353
|
+
s.includes("sonnet-4.6") ||
|
|
354
|
+
s.includes("sonnet 4.6"));
|
|
291
355
|
}
|
|
292
|
-
function mapThinkingLevelToEffort(level, modelId) {
|
|
356
|
+
function mapThinkingLevelToEffort(level, modelId, modelName) {
|
|
357
|
+
const candidates = [modelId, modelName]
|
|
358
|
+
.filter((value) => Boolean(value))
|
|
359
|
+
.map((value) => value.toLowerCase());
|
|
293
360
|
switch (level) {
|
|
294
361
|
case "minimal":
|
|
295
362
|
case "low":
|
|
@@ -299,7 +366,13 @@ function mapThinkingLevelToEffort(level, modelId) {
|
|
|
299
366
|
case "high":
|
|
300
367
|
return "high";
|
|
301
368
|
case "xhigh":
|
|
302
|
-
|
|
369
|
+
if (candidates.some((s) => s.includes("opus-4-6") || s.includes("opus-4.6"))) {
|
|
370
|
+
return "max";
|
|
371
|
+
}
|
|
372
|
+
if (candidates.some((s) => s.includes("opus-4-7") || s.includes("opus-4.7"))) {
|
|
373
|
+
return "xhigh";
|
|
374
|
+
}
|
|
375
|
+
return "high";
|
|
303
376
|
default:
|
|
304
377
|
return "high";
|
|
305
378
|
}
|
|
@@ -317,6 +390,20 @@ function resolveCacheRetention(cacheRetention) {
|
|
|
317
390
|
}
|
|
318
391
|
return "short";
|
|
319
392
|
}
|
|
393
|
+
/**
|
|
394
|
+
* Check if the model is an Anthropic Claude model on Bedrock.
|
|
395
|
+
* Checks both model ID and model name to support application inference profiles
|
|
396
|
+
* whose ARNs don't contain the model name.
|
|
397
|
+
*/
|
|
398
|
+
function isAnthropicClaudeModel(model) {
|
|
399
|
+
const id = model.id.toLowerCase();
|
|
400
|
+
const name = model.name?.toLowerCase() ?? "";
|
|
401
|
+
return (id.includes("anthropic.claude") ||
|
|
402
|
+
id.includes("anthropic/claude") ||
|
|
403
|
+
name.includes("anthropic.claude") ||
|
|
404
|
+
name.includes("anthropic/claude") ||
|
|
405
|
+
name.includes("claude"));
|
|
406
|
+
}
|
|
320
407
|
/**
|
|
321
408
|
* Check if the model supports prompt caching.
|
|
322
409
|
* Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models
|
|
@@ -325,26 +412,31 @@ function resolveCacheRetention(cacheRetention) {
|
|
|
325
412
|
* contains the model name, so we can decide locally.
|
|
326
413
|
*
|
|
327
414
|
* For application inference profiles (whose ARNs don't contain the model name),
|
|
328
|
-
*
|
|
329
|
-
*
|
|
415
|
+
* also checks model.name which is user-controlled via models.json or registerProvider.
|
|
416
|
+
* As a last resort, set AWS_BEDROCK_FORCE_CACHE=1 to enable cache points.
|
|
417
|
+
* Amazon Nova models have automatic caching and don't need explicit cache points.
|
|
330
418
|
*/
|
|
331
419
|
function supportsPromptCaching(model) {
|
|
332
|
-
const
|
|
333
|
-
if (
|
|
420
|
+
const candidates = [model.id.toLowerCase()];
|
|
421
|
+
if (model.name) {
|
|
422
|
+
candidates.push(model.name.toLowerCase());
|
|
423
|
+
}
|
|
424
|
+
const hasClaudeRef = candidates.some((s) => s.includes("claude"));
|
|
425
|
+
if (!hasClaudeRef) {
|
|
334
426
|
// Application inference profiles don't contain the model name in the ARN.
|
|
335
427
|
// Allow users to force cache points via environment variable.
|
|
336
428
|
if (typeof process !== "undefined" && process.env.AWS_BEDROCK_FORCE_CACHE === "1")
|
|
337
429
|
return true;
|
|
338
430
|
return false;
|
|
339
431
|
}
|
|
340
|
-
// Claude 4.x models (opus-4, sonnet-4, haiku-4)
|
|
341
|
-
if (
|
|
432
|
+
// Claude 4.x models (opus-4, sonnet-4, haiku-4), including human-readable names like "Claude Sonnet 4.6".
|
|
433
|
+
if (candidates.some((s) => s.includes("-4-") || s.includes("-4.") || s.includes(" 4-") || s.includes(" 4.")))
|
|
342
434
|
return true;
|
|
343
435
|
// Claude 3.7 Sonnet
|
|
344
|
-
if (
|
|
436
|
+
if (candidates.some((s) => s.includes("claude-3-7-sonnet") || s.includes("claude 3.7 sonnet")))
|
|
345
437
|
return true;
|
|
346
438
|
// Claude 3.5 Haiku
|
|
347
|
-
if (
|
|
439
|
+
if (candidates.some((s) => s.includes("claude-3-5-haiku") || s.includes("claude 3.5 haiku")))
|
|
348
440
|
return true;
|
|
349
441
|
return false;
|
|
350
442
|
}
|
|
@@ -353,10 +445,11 @@ function supportsPromptCaching(model) {
|
|
|
353
445
|
* Only Anthropic Claude models support the signature field.
|
|
354
446
|
* Other models (OpenAI, Qwen, Minimax, Moonshot, etc.) reject it with:
|
|
355
447
|
* "This model doesn't support the reasoningContent.reasoningText.signature field"
|
|
448
|
+
*
|
|
449
|
+
* Checks both model ID and model name to support application inference profiles.
|
|
356
450
|
*/
|
|
357
451
|
function supportsThinkingSignature(model) {
|
|
358
|
-
|
|
359
|
-
return id.includes("anthropic.claude") || id.includes("anthropic/claude");
|
|
452
|
+
return isAnthropicClaudeModel(model);
|
|
360
453
|
}
|
|
361
454
|
function buildSystemPrompt(systemPrompt, model, cacheRetention) {
|
|
362
455
|
if (!systemPrompt)
|
|
@@ -558,15 +651,26 @@ function mapStopReason(reason) {
|
|
|
558
651
|
return "error";
|
|
559
652
|
}
|
|
560
653
|
}
|
|
654
|
+
function isGovCloudBedrockTarget(model, options) {
|
|
655
|
+
const region = options.region || process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION;
|
|
656
|
+
if (region?.toLowerCase().startsWith("us-gov-")) {
|
|
657
|
+
return true;
|
|
658
|
+
}
|
|
659
|
+
const modelId = model.id.toLowerCase();
|
|
660
|
+
return modelId.startsWith("us-gov.") || modelId.startsWith("arn:aws-us-gov:");
|
|
661
|
+
}
|
|
561
662
|
function buildAdditionalModelRequestFields(model, options) {
|
|
562
663
|
if (!options.reasoning || !model.reasoning) {
|
|
563
664
|
return undefined;
|
|
564
665
|
}
|
|
565
|
-
if (
|
|
566
|
-
|
|
666
|
+
if (isAnthropicClaudeModel(model)) {
|
|
667
|
+
// GovCloud Bedrock currently rejects the Claude thinking.display field.
|
|
668
|
+
// Omit it there until the GovCloud Converse schema catches up.
|
|
669
|
+
const display = isGovCloudBedrockTarget(model, options) ? undefined : (options.thinkingDisplay ?? "summarized");
|
|
670
|
+
const result = supportsAdaptiveThinking(model.id, model.name)
|
|
567
671
|
? {
|
|
568
|
-
thinking: { type: "adaptive" },
|
|
569
|
-
output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) },
|
|
672
|
+
thinking: { type: "adaptive", ...(display !== undefined ? { display } : {}) },
|
|
673
|
+
output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id, model.name) },
|
|
570
674
|
}
|
|
571
675
|
: (() => {
|
|
572
676
|
const defaultBudgets = {
|
|
@@ -583,10 +687,11 @@ function buildAdditionalModelRequestFields(model, options) {
|
|
|
583
687
|
thinking: {
|
|
584
688
|
type: "enabled",
|
|
585
689
|
budget_tokens: budget,
|
|
690
|
+
...(display !== undefined ? { display } : {}),
|
|
586
691
|
},
|
|
587
692
|
};
|
|
588
693
|
})();
|
|
589
|
-
if (!supportsAdaptiveThinking(model.id) && (options.interleavedThinking ?? true)) {
|
|
694
|
+
if (!supportsAdaptiveThinking(model.id, model.name) && (options.interleavedThinking ?? true)) {
|
|
590
695
|
result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
|
|
591
696
|
}
|
|
592
697
|
return result;
|