@gajae-code/ai 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -295,6 +295,7 @@ export const PROVIDER_DESCRIPTORS: readonly ProviderDescriptor[] = [
295
295
  export const DEFAULT_MODEL_PER_PROVIDER: Record<KnownProvider, string> = {
296
296
  ...Object.fromEntries(PROVIDER_DESCRIPTORS.map(d => [d.providerId, d.defaultModel])),
297
297
  // Providers not in PROVIDER_DESCRIPTORS (special auth or no standard discovery)
298
+ "azure-openai": "gpt-4.1",
298
299
  "alibaba-coding-plan": "qwen3.5-plus",
299
300
  "amazon-bedrock": "us.anthropic.claude-opus-4-6-v1",
300
301
  "google-antigravity": "gemini-3-pro-high",
@@ -304,6 +304,17 @@ export function isAnthropicFastModeUnsupportedError(error: unknown): boolean {
304
304
  return false;
305
305
  }
306
306
 
307
+ export function isAnthropicThinkingBlockMutationError(error: unknown): boolean {
308
+ if (extractHttpStatusFromError(error) !== 400) return false;
309
+ const message = error instanceof Error ? error.message : String(error);
310
+ return (
311
+ /invalid_request_error/i.test(message) &&
312
+ /thinking|redacted_thinking/i.test(message) &&
313
+ /latest assistant message/i.test(message) &&
314
+ /cannot be modified/i.test(message)
315
+ );
316
+ }
317
+
307
318
  function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
308
319
  const tools = params.tools as Array<{ strict?: unknown }> | undefined;
309
320
  return tools?.some(tool => tool.strict === true) ?? false;
@@ -1058,8 +1069,18 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1058
1069
  (providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
1059
1070
  let strictFallbackErrorMessage: string | undefined;
1060
1071
  let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
1061
- const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
1062
- let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
1072
+ const prepareParams = async (paramsOptions?: {
1073
+ repairLatestAssistantThinking?: boolean;
1074
+ }): Promise<MessageCreateParamsStreaming> => {
1075
+ let nextParams = buildParams(
1076
+ model,
1077
+ baseUrl,
1078
+ context,
1079
+ isOAuthToken,
1080
+ options,
1081
+ disableStrictTools,
1082
+ paramsOptions?.repairLatestAssistantThinking === true,
1083
+ );
1063
1084
  if (disableStrictTools) {
1064
1085
  dropAnthropicStrictTools(nextParams);
1065
1086
  }
@@ -1096,6 +1117,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1096
1117
  // Provider-level transport/rate-limit failures: only before any streamed content starts.
1097
1118
  // Malformed envelopes/JSON: only before replay-unsafe text/tool events are visible on this stream.
1098
1119
  let providerRetryAttempt = 0;
1120
+ let thinkingRepairAttempted = false;
1099
1121
  while (true) {
1100
1122
  activeAbortTracker = createAbortSourceTracker(options?.signal);
1101
1123
  const firstEventTimeoutAbortError = new Error(
@@ -1372,6 +1394,26 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1372
1394
  firstTokenTime = undefined;
1373
1395
  continue;
1374
1396
  }
1397
+ if (
1398
+ !thinkingRepairAttempted &&
1399
+ firstTokenTime === undefined &&
1400
+ isAnthropicThinkingBlockMutationError(streamFailure)
1401
+ ) {
1402
+ logger.debug("anthropic: repairing latest assistant thinking replay after provider rejection", {
1403
+ model: model.id,
1404
+ error: streamFailure instanceof Error ? streamFailure.message : String(streamFailure),
1405
+ });
1406
+ thinkingRepairAttempted = true;
1407
+ params = await prepareParams({ repairLatestAssistantThinking: true });
1408
+ providerRetryAttempt = 0;
1409
+ output.content.length = 0;
1410
+ output.responseId = undefined;
1411
+ output.providerPayload = undefined;
1412
+ output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1413
+ output.stopReason = "stop";
1414
+ firstTokenTime = undefined;
1415
+ continue;
1416
+ }
1375
1417
  if (
1376
1418
  !dropFastMode &&
1377
1419
  resolveServiceTier(options?.serviceTier, model.provider) === "priority" &&
@@ -1887,11 +1929,12 @@ function buildParams(
1887
1929
  isOAuthToken: boolean,
1888
1930
  options?: AnthropicOptions,
1889
1931
  disableStrictTools = false,
1932
+ repairLatestAssistantThinking = false,
1890
1933
  ): MessageCreateParamsStreaming {
1891
1934
  const { cacheControl } = getCacheControl(model, baseUrl, options?.cacheRetention);
1892
1935
  const params: AnthropicSamplingParams = {
1893
1936
  model: model.id,
1894
- messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
1937
+ messages: convertAnthropicMessages(context.messages, model, isOAuthToken, { repairLatestAssistantThinking }),
1895
1938
  max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
1896
1939
  stream: true,
1897
1940
  };
@@ -2074,10 +2117,11 @@ export function convertAnthropicMessages(
2074
2117
  messages: Message[],
2075
2118
  model: Model<"anthropic-messages">,
2076
2119
  isOAuthToken: boolean,
2120
+ options?: { repairLatestAssistantThinking?: boolean },
2077
2121
  ): MessageParam[] {
2078
2122
  const params: MessageParam[] = [];
2079
2123
 
2080
- const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
2124
+ const transformedMessages = transformMessages(messages, model, normalizeToolCallId, options);
2081
2125
 
2082
2126
  for (let i = 0; i < transformedMessages.length; i++) {
2083
2127
  const msg = transformedMessages[i];
@@ -31,6 +31,7 @@ export function transformMessages<TApi extends Api>(
31
31
  messages: Message[],
32
32
  model: Model<TApi>,
33
33
  normalizeToolCallId?: (id: string, model: Model<TApi>, source: AssistantMessage) => string,
34
+ options?: { repairLatestAssistantThinking?: boolean },
34
35
  ): Message[] {
35
36
  // Build a map of original tool call IDs to normalized IDs
36
37
  const toolCallIdMap = new Map<string, string>();
@@ -64,16 +65,24 @@ export function transformMessages<TApi extends Api>(
64
65
  index === latestAssistantIndex &&
65
66
  model.api === "anthropic-messages" &&
66
67
  assistantMsg.api === "anthropic-messages";
67
- // Aborted/errored messages may have partially-streamed thinking signatures.
68
- // A partial signature is invalid and will be rejected by the API, so we must
69
- // strip signatures from thinking blocks in these messages.
70
- const hasInvalidSignatures = assistantMsg.stopReason === "aborted" || assistantMsg.stopReason === "error";
68
+ // Aborted/errored messages may contain partially-streamed thinking blocks.
69
+ // Anthropic requires thinking/redacted_thinking bytes in replayed assistant
70
+ // messages to match the original response exactly; stripping a signature,
71
+ // well-forming text, or keeping a partial redacted block would emit a
72
+ // modified thinking sequence. Drop those private blocks instead. Tool calls
73
+ // are kept so the second pass can either preserve real results or synthesize
74
+ // an explicit aborted result without leaving dangling tool_use blocks.
75
+ const hasPartialThinking = assistantMsg.stopReason === "aborted" || assistantMsg.stopReason === "error";
76
+ const dropLatestAssistantThinking =
77
+ options?.repairLatestAssistantThinking === true &&
78
+ index === latestAssistantIndex &&
79
+ model.api === "anthropic-messages" &&
80
+ assistantMsg.api === "anthropic-messages";
71
81
 
72
82
  const transformedContent = assistantMsg.content.flatMap(block => {
73
83
  if (block.type === "thinking") {
74
- // Strip signature from aborted/errored messages — it's likely incomplete
75
- const sanitized =
76
- hasInvalidSignatures && block.thinkingSignature ? { ...block, thinkingSignature: undefined } : block;
84
+ if (hasPartialThinking || dropLatestAssistantThinking) return [];
85
+ const sanitized = block;
77
86
  if (mustPreserveLatestAnthropicThinking) return sanitized;
78
87
  // For same model: keep thinking blocks with signatures (needed for replay)
79
88
  // even if the thinking text is empty (OpenAI encrypted reasoning)
@@ -88,6 +97,7 @@ export function transformMessages<TApi extends Api>(
88
97
  }
89
98
 
90
99
  if (block.type === "redactedThinking") {
100
+ if (hasPartialThinking || dropLatestAssistantThinking) return [];
91
101
  if (mustPreserveLatestAnthropicThinking) return block;
92
102
  if (isSameModel) return block;
93
103
  return [];
package/src/stream.ts CHANGED
@@ -97,6 +97,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
97
97
  cursor: "CURSOR_ACCESS_TOKEN",
98
98
  deepseek: "DEEPSEEK_API_KEY",
99
99
  "openai-codex": "OPENAI_CODEX_OAUTH_TOKEN",
100
+ "azure-openai": "AZURE_OPENAI_API_KEY",
100
101
  "azure-openai-responses": "AZURE_OPENAI_API_KEY",
101
102
  exa: "EXA_API_KEY",
102
103
  jina: "JINA_API_KEY",
package/src/types.ts CHANGED
@@ -98,6 +98,7 @@ export interface ThinkingConfig {
98
98
  export type KnownProvider =
99
99
  | "alibaba-coding-plan"
100
100
  | "amazon-bedrock"
101
+ | "azure-openai"
101
102
  | "anthropic"
102
103
  | "google"
103
104
  | "google-gemini-cli"
@@ -102,9 +102,45 @@ function sanitizeDump(dump: RawHttpRequestDump): RawHttpRequestDump {
102
102
  return {
103
103
  ...dump,
104
104
  headers: redactHeaders(dump.headers),
105
+ body: sanitizeDumpBody(dump.body),
105
106
  };
106
107
  }
107
108
 
109
+ function sanitizeDumpBody(value: unknown): unknown {
110
+ if (Array.isArray(value)) {
111
+ return value.map(item => sanitizeDumpBody(item));
112
+ }
113
+ if (!isObject(value)) {
114
+ return value;
115
+ }
116
+
117
+ const type = typeof value.type === "string" ? value.type : undefined;
118
+ const redactedKeys = getRedactedBodyKeys(type);
119
+ const sanitized: Record<string, unknown> = {};
120
+ for (const [key, property] of Object.entries(value)) {
121
+ if (redactedKeys.has(key)) {
122
+ sanitized[key] = "[redacted]";
123
+ continue;
124
+ }
125
+ sanitized[key] = sanitizeDumpBody(property);
126
+ }
127
+ return sanitized;
128
+ }
129
+
130
+ function getRedactedBodyKeys(type: string | undefined): Set<string> {
131
+ const keys = new Set<string>();
132
+ if (type === "thinking") {
133
+ keys.add("thinking");
134
+ keys.add("signature");
135
+ keys.add("thinkingSignature");
136
+ keys.add("thoughtSignature");
137
+ }
138
+ if (type === "redacted_thinking" || type === "redactedThinking") {
139
+ keys.add("data");
140
+ }
141
+ return keys;
142
+ }
143
+
108
144
  function redactHeaders(headers: Record<string, string> | undefined): Record<string, string> | undefined {
109
145
  if (!headers) {
110
146
  return undefined;