jeo-code 0.6.27 → 0.6.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,12 @@ const DEPRECATED_TEMPERATURE = "`temperature` is deprecated for this model.";
11
11
  const CLAUDE_CODE_VERSION = "2.1.63";
12
12
  const CLAUDE_CODE_SYSTEM_INSTRUCTION = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
13
13
  const CLAUDE_BILLING_HEADER_PREFIX = "x-anthropic-billing-header:";
14
+ /** Betas needed for API-key requests: interleaved-thinking enables thinking+tools,
15
+ * prompt-caching-scope gives scoped cache breakpoints. */
16
+ const ANTHROPIC_API_KEY_BETA = [
17
+ "interleaved-thinking-2025-05-14",
18
+ "prompt-caching-scope-2026-01-05",
19
+ ].join(",");
14
20
  const ANTHROPIC_OAUTH_BETA = [
15
21
  "claude-code-20250219",
16
22
  "oauth-2025-04-20",
@@ -88,28 +94,76 @@ function anthropicThinkingBudget(effort: CallOptions["reasoningEffort"], maxToke
88
94
  return Math.min(budget, Math.max(1024, maxTokens - 1024));
89
95
  }
90
96
 
97
+ type AnthropicContentBlock = Record<string, unknown>;
98
+ type AnthropicMessage = { role: string; content: string | AnthropicContentBlock[] };
99
+
100
+ /** True when an assistant turn can be replayed as native tool_use + thinking blocks: it has
101
+ * structured toolUse AND a same-model Anthropic reasoning artifact that yields at least one
102
+ * valid thinking/redacted block, AND thinking is enabled this call. Native tool_use →
103
+ * tool_result is what makes Claude KEEP the prior thinking blocks (plain-text tool feedback
104
+ * gets them stripped on most models), so this is the core of cross-step reasoning continuity. */
105
+ export function anthropicNativizable(m: Message, model: string, thinkingEnabled: boolean): boolean {
106
+ return thinkingEnabled
107
+ && !!m.toolUse?.length
108
+ && !!m.reasoningArtifacts?.some(a => a.provider === "anthropic" && a.model === model && (!!a.signature || !!a.redacted));
109
+ }
110
+
111
+ /** Build Anthropic wire messages, reconstructing native tool_use / tool_result / thinking
112
+ * blocks for matching turns. `thinkingEnabled` is false (or stripped on a fail-safe retry)
113
+ * ⇒ everything falls back to the plain string/image content (current, always-valid shape). */
114
+ export function buildAnthropicMessages(messages: Message[], model: string, thinkingEnabled: boolean): AnthropicMessage[] {
115
+ const nonSystem = messages.filter(m => m.role !== "system");
116
+ const plain = (m: Message): AnthropicMessage => ({
117
+ role: m.role,
118
+ content: m.images?.length
119
+ ? [
120
+ ...m.images.map((img): AnthropicContentBlock => ({ type: "image", source: { type: "base64", media_type: img.mediaType, data: img.data } })),
121
+ ...(m.content ? [{ type: "text", text: m.content } as AnthropicContentBlock] : []),
122
+ ]
123
+ : m.content,
124
+ });
125
+ return nonSystem.map((m, i) => {
126
+ if (m.role === "assistant" && anthropicNativizable(m, model, thinkingEnabled)) {
127
+ const blocks: AnthropicContentBlock[] = [];
128
+ for (const a of m.reasoningArtifacts!) {
129
+ if (a.provider !== "anthropic" || a.model !== model) continue;
130
+ if (a.signature) blocks.push({ type: "thinking", thinking: a.text ?? "", signature: a.signature });
131
+ else if (a.redacted) blocks.push({ type: "redacted_thinking", data: a.redacted });
132
+ }
133
+ for (const tu of m.toolUse!) blocks.push({ type: "tool_use", id: tu.id, name: tu.tool, input: tu.arguments });
134
+ return { role: "assistant", content: blocks };
135
+ }
136
+ // A tool-result user turn is nativized iff its preceding assistant was — so a native
137
+ // tool_use always has its matching native tool_result (Anthropic errors on a mismatch).
138
+ if (m.role === "user" && m.toolResults?.length && i > 0
139
+ && nonSystem[i - 1].role === "assistant"
140
+ && anthropicNativizable(nonSystem[i - 1], model, thinkingEnabled)) {
141
+ const blocks: AnthropicContentBlock[] = m.toolResults.map(tr => ({
142
+ type: "tool_result", tool_use_id: tr.id, content: tr.output, is_error: tr.isError,
143
+ }));
144
+ if (m.toolResultExtra) blocks.push({ type: "text", text: m.toolResultExtra });
145
+ return { role: "user", content: blocks };
146
+ }
147
+ return plain(m);
148
+ });
149
+ }
150
+
91
151
  export function anthropicPayload(
92
152
  messages: Message[],
93
153
  options: CallOptions,
94
154
  stream: boolean,
95
155
  includeTemperature: boolean,
96
156
  credential: Credential = { kind: "none", provider: "anthropic" },
157
+ stripArtifacts = false,
97
158
  ): string {
98
159
  const model = stripAnthropicPrefix(options.model);
99
160
  const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
100
- // Image attachments (clipboard paste) become Anthropic content blocks; plain
101
- // string content is kept for text-only messages (the overwhelmingly common case).
102
- type ContentBlock = Record<string, unknown>;
103
- const anthropicMessages: { role: string; content: string | ContentBlock[] }[] =
104
- messages.filter(m => m.role !== "system").map(m => ({
105
- role: m.role,
106
- content: m.images?.length
107
- ? [
108
- ...m.images.map((img): ContentBlock => ({ type: "image", source: { type: "base64", media_type: img.mediaType, data: img.data } })),
109
- ...(m.content ? [{ type: "text", text: m.content } as ContentBlock] : []),
110
- ]
111
- : m.content,
112
- }));
161
+ // Image attachments + native tool/thinking-block reconstruction live in buildAnthropicMessages.
162
+ const maxTokens = options.maxTokens ?? 4000;
163
+ const thinkingBudget = anthropicThinkingBudget(options.reasoningEffort, maxTokens);
164
+ // Reconstruct native tool_use / tool_result / thinking blocks for same-model turns when
165
+ // thinking is enabled (and not stripped by a fail-safe retry); else plain string/image.
166
+ const anthropicMessages = buildAnthropicMessages(messages, options.model, thinkingBudget !== undefined && !stripArtifacts);
113
167
  // Conversation prompt caching (gjc parity — the main same-model latency gap):
114
168
  // one breakpoint on the LAST message caches the entire conversation prefix, so
115
169
  // each agent-loop step only pays input processing for the new tail instead of
@@ -125,8 +179,7 @@ export function anthropicPayload(
125
179
  last.content[last.content.length - 1] = { ...tail, cache_control: { type: "ephemeral" } };
126
180
  }
127
181
  }
128
- const maxTokens = options.maxTokens ?? 4000;
129
- const thinkingBudget = anthropicThinkingBudget(options.reasoningEffort, maxTokens);
182
+
130
183
  const payload: Record<string, unknown> = {
131
184
  model,
132
185
  messages: anthropicMessages,
@@ -162,13 +215,14 @@ export function anthropicRequest(
162
215
  credential: Credential,
163
216
  stream: boolean,
164
217
  includeTemperature: boolean,
218
+ stripArtifacts = false,
165
219
  ): { url: string; headers: Record<string, string>; body: string } {
166
220
  return {
167
221
  // Anthropic-compatible providers (z.ai, MiniMax, …) accept the Messages wire
168
222
  // format at their own host; an explicit baseUrl pins `${base}/v1/messages`.
169
223
  url: options.baseUrl ? `${options.baseUrl.replace(/\/$/, "")}/v1/messages` : ANTHROPIC_URL,
170
224
  headers: headersFor(credential, stream),
171
- body: anthropicPayload(messages, options, stream, includeTemperature, credential),
225
+ body: anthropicPayload(messages, options, stream, includeTemperature, credential, stripArtifacts),
172
226
  };
173
227
  }
174
228
 
@@ -176,14 +230,21 @@ function isDeprecatedTemperatureError(status: number, detail: string): boolean {
176
230
  return status === 400 && detail.includes(DEPRECATED_TEMPERATURE);
177
231
  }
178
232
 
233
+ /** A 400 that names thinking/signature/redacted means a replayed reasoning artifact was
234
+ * rejected (expired signature, edited history, thinking toggled). The fail-safe retries
235
+ * once with artifacts stripped (plain string history) so the turn survives. */
236
+ function isReasoningArtifactError(status: number, detail: string): boolean {
237
+ return status === 400 && /thinking|signature|redacted_thinking/i.test(detail);
238
+ }
239
+
179
240
  async function postAnthropic(
180
241
  messages: Message[],
181
242
  options: CallOptions,
182
243
  credential: Credential,
183
244
  stream: boolean,
184
245
  ): Promise<Response> {
185
- const send = (includeTemperature: boolean) => {
186
- const { url, headers, body } = anthropicRequest(messages, options, credential, stream, includeTemperature);
246
+ const send = (includeTemperature: boolean, stripArtifacts = false) => {
247
+ const { url, headers, body } = anthropicRequest(messages, options, credential, stream, includeTemperature, stripArtifacts);
187
248
  return fetch(url, { method: "POST", headers, body, signal: options.signal });
188
249
  };
189
250
 
@@ -196,6 +257,12 @@ async function postAnthropic(
196
257
  if (response.ok) return response;
197
258
  throw await providerHttpError("Anthropic", response, stream ? "(stream)" : undefined);
198
259
  }
260
+ // Fail-safe: a rejected replay artifact → retry once with artifacts stripped (plain history).
261
+ if (isReasoningArtifactError(response.status, detail)) {
262
+ response = await send(true, true);
263
+ if (response.ok) return response;
264
+ throw await providerHttpError("Anthropic", response, stream ? "(stream)" : undefined);
265
+ }
199
266
 
200
267
  throw new ProviderHttpError(
201
268
  "Anthropic",
@@ -233,8 +300,16 @@ export const anthropicAdapter: ProviderAdapter = {
233
300
  supportsNativeTools: true,
234
301
  async call(messages, options, credential) {
235
302
  const response = await postAnthropic(messages, options, credential, false);
236
- const result = (await response.json()) as { content: { type: string; text?: string; name?: string; input?: unknown }[]; stop_reason?: string; usage?: AnthropicUsage };
303
+ const result = (await response.json()) as { content: { type: string; text?: string; name?: string; input?: unknown; thinking?: string; signature?: string; data?: string }[]; stop_reason?: string; usage?: AnthropicUsage };
237
304
  if (result.usage) options.onUsage?.({ inputTokens: totalInputTokens(result.usage), outputTokens: result.usage.output_tokens });
305
+ // Capture thinking/redacted blocks as replay artifacts (parity with the stream path).
306
+ for (const c of result.content) {
307
+ if (c.type === "thinking" && (c.thinking || c.signature)) {
308
+ options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, text: c.thinking || undefined, signature: c.signature });
309
+ } else if (c.type === "redacted_thinking" && c.data) {
310
+ options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, redacted: c.data });
311
+ }
312
+ }
238
313
  // Prefer a native tool call (re-serialized to canonical JSON) over any stray text.
239
314
  const toolCall = serializeToolCalls(
240
315
  result.content
@@ -256,12 +331,16 @@ export const anthropicAdapter: ProviderAdapter = {
256
331
  // never as text_delta — accumulate per block index, then re-serialize to canonical
257
332
  // JSON and yield it once at the end (concatenation still equals call()).
258
333
  const toolBlocks = new Map<number, { name: string; args: string }>();
334
+ // Thinking blocks stream as content_block_start(type:thinking) + thinking_delta(text)
335
+ // + signature_delta(signature). Accumulate per index and emit one ReasoningArtifact per
336
+ // block on stream end so the signed thought can be replayed (gajae continuity).
337
+ const thinkBlocks = new Map<number, { text: string; signature?: string }>();
259
338
  for await (const data of readSse(response.body)) {
260
339
  let evt: {
261
340
  type?: string;
262
341
  index?: number;
263
- content_block?: { type?: string; name?: string };
264
- delta?: { type?: string; text?: string; partial_json?: string; thinking?: string; stop_reason?: string };
342
+ content_block?: { type?: string; name?: string; data?: string };
343
+ delta?: { type?: string; text?: string; partial_json?: string; thinking?: string; signature?: string; stop_reason?: string };
265
344
  message?: { usage?: AnthropicUsage };
266
345
  usage?: { output_tokens?: number };
267
346
  };
@@ -272,6 +351,11 @@ export const anthropicAdapter: ProviderAdapter = {
272
351
  }
273
352
  if (evt.type === "content_block_start" && evt.content_block?.type === "tool_use" && typeof evt.index === "number") {
274
353
  toolBlocks.set(evt.index, { name: evt.content_block.name ?? "", args: "" });
354
+ } else if (evt.type === "content_block_start" && evt.content_block?.type === "thinking" && typeof evt.index === "number") {
355
+ thinkBlocks.set(evt.index, { text: "" });
356
+ } else if (evt.type === "content_block_start" && evt.content_block?.type === "redacted_thinking" && evt.content_block.data) {
357
+ // Redacted thinking carries opaque `data` directly (no deltas) — emit immediately.
358
+ options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, redacted: evt.content_block.data });
275
359
  } else if (evt.type === "content_block_delta" && evt.delta?.type === "input_json_delta" && typeof evt.index === "number") {
276
360
  const b = toolBlocks.get(evt.index);
277
361
  if (b) b.args += evt.delta.partial_json ?? "";
@@ -280,6 +364,15 @@ export const anthropicAdapter: ProviderAdapter = {
280
364
  yield evt.delta.text;
281
365
  } else if (evt.type === "content_block_delta" && evt.delta?.type === "thinking_delta" && evt.delta.thinking) {
282
366
  options.onReasoning?.(evt.delta.thinking);
367
+ if (typeof evt.index === "number") {
368
+ const tb = thinkBlocks.get(evt.index) ?? { text: "" };
369
+ tb.text += evt.delta.thinking;
370
+ thinkBlocks.set(evt.index, tb);
371
+ }
372
+ } else if (evt.type === "content_block_delta" && evt.delta?.type === "signature_delta" && evt.delta.signature && typeof evt.index === "number") {
373
+ const tb = thinkBlocks.get(evt.index) ?? { text: "" };
374
+ tb.signature = (tb.signature ?? "") + evt.delta.signature;
375
+ thinkBlocks.set(evt.index, tb);
283
376
  } else if (evt.type === "message_start" && evt.message?.usage) {
284
377
  // Cache only — usage is reported ONCE at message_delta so an accumulating
285
378
  // sink can't double-count input (and a pre-first-chunk retry that replays
@@ -290,6 +383,12 @@ export const anthropicAdapter: ProviderAdapter = {
290
383
  if (evt.usage) options.onUsage?.({ inputTokens: cachedInput, outputTokens: evt.usage.output_tokens });
291
384
  }
292
385
  }
386
+ // Emit captured thinking blocks as replay artifacts (signed thought + signature).
387
+ for (const tb of thinkBlocks.values()) {
388
+ if (tb.text || tb.signature) {
389
+ options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, text: tb.text || undefined, signature: tb.signature });
390
+ }
391
+ }
293
392
  const envelope = serializeAccumulatedToolCalls(toolBlocks);
294
393
  if (envelope) { yieldedAny = true; yield envelope; }
295
394
  if (!yieldedAny) throw emptyCompletionError(stopReason);
@@ -361,6 +460,7 @@ function headersFor(credential: Credential, stream: boolean): Record<string, str
361
460
  "content-type": "application/json",
362
461
  "x-api-key": credential.token,
363
462
  "anthropic-version": "2023-06-01",
463
+ "anthropic-beta": ANTHROPIC_API_KEY_BETA,
364
464
  };
365
465
  }
366
466
  throw new Error("anthropic adapter requires a credential");
@@ -108,6 +108,12 @@ export async function resolveAntigravityProjectId(
108
108
 
109
109
  type CcaPart = { text: string } | { inlineData: { mimeType: string; data: string } };
110
110
 
111
+ // Reasoning-artifact replay (signed thinking / thoughtSignature / encrypted reasoning) is
112
+ // deliberately OUT OF SCOPE for antigravity: it serves Gemini- and Claude-shaped models over
113
+ // the CCA wire (neither the native Anthropic messages nor the public Gemini shape), so it
114
+ // captures no artifacts and replays none — Message.toolUse/toolResults/reasoningArtifacts are
115
+ // ignored here. The provider-keyed match guard (D3) keeps "anthropic"/"gemini" artifacts from
116
+ // ever being re-injected by this adapter, so there is no cross-adapter leakage.
111
117
  function antigravityContents(messages: Message[]): { role: "user" | "model"; parts: CcaPart[] }[] {
112
118
  const contents: { role: "user" | "model"; parts: CcaPart[] }[] = [];
113
119
  for (const m of messages) {
@@ -54,6 +54,24 @@ export function parseRetryFromBody(detail: string | null | undefined): number |
54
54
  * and any `Retry-After`. Use at every adapter's `!response.ok` site so the retry
55
55
  * layer sees a uniform, status-carrying, backoff-aware error.
56
56
  */
57
+ /**
58
+ * One-shot reasoning-artifact fail-safe: send the request; if it 400s because a replayed
59
+ * reasoning artifact (signature / thoughtSignature / encrypted reasoning item) was rejected
60
+ * — expired signature, edited history, toggled thinking — retry ONCE with artifacts stripped
61
+ * (plain history). `send(strip)` rebuilds + fetches; `isArtifactError` matches the 400 body.
62
+ * ponytail: heuristic error-body string match — tighten to structured error codes if/when
63
+ * the providers expose them.
64
+ */
65
+ export async function fetchWithArtifactFailSafe(
66
+ send: (stripArtifacts: boolean) => Promise<Response>,
67
+ isArtifactError: (status: number, body: string) => boolean,
68
+ ): Promise<Response> {
69
+ const res = await send(false);
70
+ if (res.ok) return res;
71
+ const body = await res.clone().text().catch(() => "");
72
+ return isArtifactError(res.status, body) ? send(true) : res;
73
+ }
74
+
57
75
  export async function providerHttpError(provider: string, response: Response, context?: string): Promise<ProviderHttpError> {
58
76
  const detail = await response.text().catch(() => "");
59
77
  const retryAfterMs = parseRetryAfter(response.headers.get("retry-after")) ?? parseRetryFromBody(detail);
@@ -1,7 +1,7 @@
1
1
  import type { Credential } from "../../auth";
2
2
  import type { CallOptions, Message, ProviderAdapter } from "../types";
3
3
  import { readSse } from "../sse";
4
- import { providerHttpError } from "./errors";
4
+ import { providerHttpError, fetchWithArtifactFailSafe } from "./errors";
5
5
  import { jeoEnv } from "../../util/env";
6
6
  import { serializeToolCalls } from "../../agent/tool-schemas";
7
7
 
@@ -37,35 +37,62 @@ export function geminiThinkingBudget(model: string, effort?: CallOptions["reason
37
37
  return budget;
38
38
  }
39
39
 
40
+
41
+ /** True when an assistant turn can replay native functionCall + thoughtSignature: it has
42
+ * structured toolUse AND a same-model Gemini thoughtSignature artifact, AND thinking is on. */
43
+ export function geminiNativizable(m: Message, modelKey: string, thinkingEnabled: boolean): boolean {
44
+ return thinkingEnabled
45
+ && !!m.toolUse?.length
46
+ && !!m.reasoningArtifacts?.some(a => a.provider === "gemini" && a.model === modelKey && !!a.thoughtSignature);
47
+ }
40
48
  /** Shared Gemini request payload (contents + generationConfig + systemInstruction)
41
49
  * used by BOTH the public generativelanguage path (API key) and the Cloud Code
42
50
  * Assist path (OAuth) — only the envelope/endpoint differs. */
43
- export function buildGeminiPayload(messages: Message[], options: CallOptions): { geminiModel: string; payload: Record<string, unknown> } {
51
+ export function buildGeminiPayload(messages: Message[], options: CallOptions, stripArtifacts = false): { geminiModel: string; payload: Record<string, unknown> } {
44
52
  const resolvedModel = options.model.replace(/^(google|gemini)\//, "");
45
53
  let geminiModel = resolvedModel;
46
54
  if (!geminiModel || geminiModel === "claude-3-5-sonnet") geminiModel = "gemini-2.0-flash";
47
55
 
48
56
  const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
57
+ const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
58
+ const thinkingEnabled = thinkingBudget !== undefined && !stripArtifacts;
49
59
  // Gemini requires strictly ALTERNATING user/model turns. jeo histories can carry
50
60
  // consecutive same-role messages (a compaction summary prepended before a tool-result,
51
61
  // back-to-back tool results, etc.), so coalesce adjacent same-role turns into one
52
- // content block — otherwise the API rejects the request mid-session.
53
- const contents: { role: string; parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] }[] = [];
54
- for (const m of messages) {
55
- if (m.role === "system") continue;
62
+ // content block — otherwise the API rejects the request mid-session. Native
63
+ // functionCall/functionResponse parts (with thoughtSignature) are reconstructed for
64
+ // same-model turns to preserve cross-step thought context; else plain text.
65
+ type GeminiPart = Record<string, unknown>;
66
+ const nonSystem = messages.filter(m => m.role !== "system");
67
+ const contents: { role: string; parts: GeminiPart[] }[] = [];
68
+ nonSystem.forEach((m, i) => {
56
69
  const role = m.role === "assistant" ? "model" : "user";
57
- // Clipboard-pasted images become inlineData parts alongside the text part.
58
- const parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] = [
59
- ...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
60
- { text: m.content },
61
- ];
62
- const prev = contents[contents.length - 1];
63
- if (prev && prev.role === role) {
64
- prev.parts.push(...parts);
70
+ let parts: GeminiPart[];
71
+ if (m.role === "assistant" && geminiNativizable(m, options.model, thinkingEnabled)) {
72
+ const sig = m.reasoningArtifacts!.find(a => a.provider === "gemini" && a.model === options.model && a.thoughtSignature)?.thoughtSignature;
73
+ parts = m.toolUse!.map((tu, idx) => {
74
+ const p: GeminiPart = { functionCall: { name: tu.tool, args: tu.arguments } };
75
+ if (idx === 0 && sig) p.thoughtSignature = sig; // bind the turn signature to the first call
76
+ return p;
77
+ });
78
+ } else if (m.role === "user" && m.toolResults?.length && i > 0
79
+ && nonSystem[i - 1].role === "assistant"
80
+ && geminiNativizable(nonSystem[i - 1], options.model, thinkingEnabled)) {
81
+ const prevToolUse = nonSystem[i - 1].toolUse ?? [];
82
+ parts = m.toolResults.map(tr => ({
83
+ functionResponse: { name: prevToolUse.find(tu => tu.id === tr.id)?.tool ?? "tool", response: { output: tr.output } },
84
+ }));
85
+ if (m.toolResultExtra) parts.push({ text: m.toolResultExtra });
65
86
  } else {
66
- contents.push({ role, parts });
87
+ parts = [
88
+ ...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
89
+ { text: m.content },
90
+ ];
67
91
  }
68
- }
92
+ const prev = contents[contents.length - 1];
93
+ if (prev && prev.role === role) prev.parts.push(...parts);
94
+ else contents.push({ role, parts });
95
+ });
69
96
 
70
97
  const generationConfig: Record<string, unknown> = {
71
98
  temperature: options.temperature ?? 0.2,
@@ -74,7 +101,7 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
74
101
  // Function-calling and responseMimeType:json are mutually exclusive in the Gemini
75
102
  // API — when native tools are declared, the functionCall parts replace JSON-in-prose.
76
103
  if (options.jsonMode && !options.tools?.length) generationConfig.responseMimeType = "application/json";
77
- const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
104
+
78
105
  // includeThoughts: required for Gemini to STREAM thought summaries (the `thought:true`
79
106
  // parts thoughtOf() routes to onReasoning) — without it the model thinks silently.
80
107
  if (thinkingBudget !== undefined) generationConfig.thinkingConfig = { includeThoughts: true, thinkingBudget };
@@ -91,8 +118,8 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
91
118
  return { geminiModel, payload };
92
119
  }
93
120
 
94
- export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent"): { url: string; headers: Record<string, string>; body: string } {
95
- const { geminiModel, payload } = buildGeminiPayload(messages, options);
121
+ export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent", stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
122
+ const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
96
123
  const oauth = credential.kind === "oauth" ? credential.token : undefined;
97
124
  const apiKey = credential.kind === "api_key" ? credential.token : undefined;
98
125
  let url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(geminiModel)}:${action}`;
@@ -123,8 +150,8 @@ export function getGeminiCliHeaders(modelId?: string): Record<string, string> {
123
150
  * plain `jeo auth login gemini` works without any GEMINI_API_KEY. The body
124
151
  * wraps the standard payload as `{ project, model, request }`.
125
152
  */
126
- export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string): { url: string; headers: Record<string, string>; body: string } {
127
- const { geminiModel, payload } = buildGeminiPayload(messages, options);
153
+ export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string, stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
154
+ const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
128
155
  return {
129
156
  url: `${CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse`,
130
157
  headers: {
@@ -137,8 +164,22 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
137
164
  };
138
165
  }
139
166
 
167
+ /** POST a Gemini request with a reasoning-artifact fail-safe (see fetchWithArtifactFailSafe). */
168
+ function geminiFetchFailSafe(
169
+ make: (stripArtifacts: boolean) => { url: string; headers: Record<string, string>; body: string },
170
+ signal?: AbortSignal,
171
+ ): Promise<Response> {
172
+ return fetchWithArtifactFailSafe(
173
+ strip => {
174
+ const r = make(strip);
175
+ return fetch(r.url, { method: "POST", headers: r.headers, body: r.body, signal });
176
+ },
177
+ (status, body) => status === 400 && /thoughtsignature|thought_signature|functioncall|function_call|signature/i.test(body),
178
+ );
179
+ }
180
+
140
181
  interface GeminiChunk {
141
- candidates?: { content?: { parts?: { text?: string; thought?: boolean; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
182
+ candidates?: { content?: { parts?: { text?: string; thought?: boolean; thoughtSignature?: string; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
142
183
  promptFeedback?: { blockReason?: string };
143
184
  usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
144
185
  }
@@ -157,6 +198,19 @@ function textOf(chunk: GeminiChunk): string {
157
198
  function thoughtOf(chunk: GeminiChunk): string {
158
199
  return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
159
200
  }
201
+
202
+ /** Emit each NEW thoughtSignature seen on this chunk's parts as a replay artifact (Gemini
203
+ * binds it to the functionCall part — replayed to keep cross-step thought context). `seen`
204
+ * dedups across the streamed chunks of one turn. */
205
+ function captureGeminiSignatures(chunk: GeminiChunk, options: CallOptions, seen: Set<string>): void {
206
+ for (const p of chunk.candidates?.[0]?.content?.parts ?? []) {
207
+ const sig = p.thoughtSignature;
208
+ if (sig && !seen.has(sig)) {
209
+ seen.add(sig);
210
+ options.onReasoningArtifact?.({ provider: "gemini", model: options.model, thoughtSignature: sig });
211
+ }
212
+ }
213
+ }
160
214
  /** Native Gemini functionCall parts → {tool, arguments} (gjc/antigravity parity). Kept
161
215
  * separate from textOf so the re-serialized canonical JSON envelope drives the loop. */
162
216
  function geminiFunctionCallsOf(chunk: GeminiChunk): { tool: string; arguments: Record<string, unknown> }[] {
@@ -197,14 +251,14 @@ function blockedReason(chunk: GeminiChunk): string | undefined {
197
251
  async function* ccaTurn(messages: Message[], options: CallOptions, credential: Credential & { kind: "oauth" }): AsyncGenerator<string> {
198
252
  const { resolveAntigravityProjectId } = await import("./antigravity");
199
253
  const projectId = await resolveAntigravityProjectId(credential, { signal: options.signal });
200
- const { url, headers, body } = geminiCliRequest(messages, options, credential.token, projectId);
201
- const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
254
+ const response = await geminiFetchFailSafe(strip => geminiCliRequest(messages, options, credential.token, projectId, strip), options.signal);
202
255
  if (!response.ok) throw await providerHttpError("Gemini (Cloud Code Assist)", response);
203
256
  if (!response.body) return;
204
257
  let lastUsage: GeminiChunk["usageMetadata"];
205
258
  let yieldedAny = false;
206
259
  let lastEmptyReason: string | undefined;
207
260
  const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
261
+ const seenSigs = new Set<string>();
208
262
  for await (const data of readSse(response.body)) {
209
263
  let chunk: CcaChunk;
210
264
  try {
@@ -216,6 +270,7 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
216
270
  if (!inner) continue;
217
271
  const thought = thoughtOf(inner);
218
272
  if (thought) options.onReasoning?.(thought);
273
+ captureGeminiSignatures(inner, options, seenSigs);
219
274
  const delta = textOf(inner);
220
275
  if (delta) {
221
276
  yieldedAny = true;
@@ -249,10 +304,10 @@ export const geminiAdapter: ProviderAdapter = {
249
304
  for await (const delta of ccaTurn(messages, options, credential)) out += delta;
250
305
  return out;
251
306
  }
252
- const { url, headers, body } = geminiRequest(messages, options, credential, "generateContent");
253
- const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
307
+ const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "generateContent", strip), options.signal);
254
308
  if (!response.ok) throw await providerHttpError("Gemini", response);
255
309
  const result = (await response.json()) as GeminiChunk;
310
+ captureGeminiSignatures(result, options, new Set());
256
311
  if (result.usageMetadata) {
257
312
  options.onUsage?.({ inputTokens: result.usageMetadata.promptTokenCount, outputTokens: result.usageMetadata.candidatesTokenCount });
258
313
  }
@@ -271,14 +326,14 @@ export const geminiAdapter: ProviderAdapter = {
271
326
  yield* ccaTurn(messages, options, credential);
272
327
  return;
273
328
  }
274
- const { url, headers, body } = geminiRequest(messages, options, credential, "streamGenerateContent");
275
- const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
329
+ const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "streamGenerateContent", strip), options.signal);
276
330
  if (!response.ok) throw await providerHttpError("Gemini", response, "(stream)");
277
331
  if (!response.body) return;
278
332
  let lastUsage: GeminiChunk["usageMetadata"];
279
333
  let yieldedAny = false;
280
334
  let lastEmptyReason: string | undefined;
281
335
  const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
336
+ const seenSigs = new Set<string>();
282
337
  for await (const data of readSse(response.body)) {
283
338
  let chunk: GeminiChunk;
284
339
  try {
@@ -288,6 +343,7 @@ export const geminiAdapter: ProviderAdapter = {
288
343
  }
289
344
  const thought = thoughtOf(chunk);
290
345
  if (thought) options.onReasoning?.(thought);
346
+ captureGeminiSignatures(chunk, options, seenSigs);
291
347
  const delta = textOf(chunk);
292
348
  if (delta) {
293
349
  yieldedAny = true;
@@ -23,6 +23,12 @@ export interface OpenAICompatProviderDef {
23
23
  readonly apiKeyEnv: string;
24
24
  /** Default model id (provider-prefixed) used by `--provider <name>`. */
25
25
  readonly defaultModel: string;
26
+ /** Extra well-known model ids (BARE, not provider-prefixed) for the OFFLINE
27
+ * pick-list fallback shown by `/agents <role> provider <name>` and `--provider`.
28
+ * Live `/models` discovery supersedes this once the provider is logged in, so
29
+ * keep only stable/alias-style ids here (a stale id would 404 at inference).
30
+ * `defaultModel` is always surfaced first regardless of this list. */
31
+ readonly knownModels?: readonly string[];
26
32
  /** Wire protocol: "openai" (/chat/completions, default) or "anthropic" (/v1/messages). */
27
33
  readonly protocol?: "openai" | "anthropic";
28
34
  /** True for subscription/plan products (coding-plan, portal, token-plan, code) rather than
@@ -35,12 +41,12 @@ export interface OpenAICompatProviderDef {
35
41
  }
36
42
 
37
43
  export const OPENAI_COMPAT_PROVIDERS: readonly OpenAICompatProviderDef[] = [
38
- { name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile" },
39
- { name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat" },
40
- { name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest" },
44
+ { name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile", knownModels: ["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "openai/gpt-oss-120b", "openai/gpt-oss-20b"] },
45
+ { name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat", knownModels: ["deepseek-chat", "deepseek-reasoner"] },
46
+ { name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest", knownModels: ["mistral-large-latest", "mistral-small-latest", "codestral-latest", "ministral-8b-latest"] },
41
47
  { name: "openrouter", label: "OpenRouter", baseUrl: "https://openrouter.ai/api/v1", apiKeyEnv: "OPENROUTER_API_KEY", defaultModel: "openrouter/openai/gpt-4o-mini", thinkingFormat: "openrouter" },
42
48
  { name: "together", label: "Together", baseUrl: "https://api.together.xyz/v1", apiKeyEnv: "TOGETHER_API_KEY", defaultModel: "together/meta-llama/Llama-3.3-70B-Instruct-Turbo" },
43
- { name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b" },
49
+ { name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b", knownModels: ["llama-3.3-70b", "llama3.1-8b", "qwen-3-235b-a22b-instruct-2507"] },
44
50
  { name: "fireworks", label: "Fireworks", baseUrl: "https://api.fireworks.ai/inference/v1", apiKeyEnv: "FIREWORKS_API_KEY", defaultModel: "fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct" },
45
51
  { name: "nvidia", label: "NVIDIA", baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", defaultModel: "nvidia/meta/llama-3.3-70b-instruct" },
46
52
  // Additional gjc-parity OpenAI-compatible clouds (authoritative base URLs + env vars).