npm - jeo-code - Versions diffs - 0.6.27 → 0.6.29 - Mend

jeo-code 0.6.27 → 0.6.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +26 -0
package/README.ja.md +2 -6
package/README.ko.md +2 -6
package/README.md +2 -6
package/README.zh.md +2 -6
package/package.json +1 -1
package/src/agent/compaction.ts +10 -1
package/src/agent/engine.ts +62 -16
package/src/agent/loop.ts +3 -0
package/src/ai/model-catalog.ts +12 -5
package/src/ai/model-manager.ts +1 -0
package/src/ai/providers/anthropic.ts +121 -21
package/src/ai/providers/antigravity.ts +6 -0
package/src/ai/providers/errors.ts +18 -0
package/src/ai/providers/gemini.ts +84 -28
package/src/ai/providers/openai-compatible-catalog.ts +10 -4
package/src/ai/providers/openai-responses.ts +76 -19
package/src/ai/types.ts +55 -2
package/src/commands/launch.ts +90 -22
package/src/tui/app.ts +38 -6
package/src/tui/components/ascii-art.ts +27 -31

package/src/ai/providers/anthropic.ts CHANGED Viewed

@@ -11,6 +11,12 @@ const DEPRECATED_TEMPERATURE = "`temperature` is deprecated for this model.";
 const CLAUDE_CODE_VERSION = "2.1.63";
 const CLAUDE_CODE_SYSTEM_INSTRUCTION = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
 const CLAUDE_BILLING_HEADER_PREFIX = "x-anthropic-billing-header:";
+/** Betas needed for API-key requests: interleaved-thinking enables thinking+tools,
+ *  prompt-caching-scope gives scoped cache breakpoints. */
+const ANTHROPIC_API_KEY_BETA = [
+  "interleaved-thinking-2025-05-14",
+  "prompt-caching-scope-2026-01-05",
+].join(",");
 const ANTHROPIC_OAUTH_BETA = [
   "claude-code-20250219",
   "oauth-2025-04-20",
@@ -88,28 +94,76 @@ function anthropicThinkingBudget(effort: CallOptions["reasoningEffort"], maxToke
   return Math.min(budget, Math.max(1024, maxTokens - 1024));
 }
+type AnthropicContentBlock = Record<string, unknown>;
+type AnthropicMessage = { role: string; content: string | AnthropicContentBlock[] };
+/** True when an assistant turn can be replayed as native tool_use + thinking blocks: it has
+ *  structured toolUse AND a same-model Anthropic reasoning artifact that yields at least one
+ *  valid thinking/redacted block, AND thinking is enabled this call. Native tool_use →
+ *  tool_result is what makes Claude KEEP the prior thinking blocks (plain-text tool feedback
+ *  gets them stripped on most models), so this is the core of cross-step reasoning continuity. */
+export function anthropicNativizable(m: Message, model: string, thinkingEnabled: boolean): boolean {
+  return thinkingEnabled
+    && !!m.toolUse?.length
+    && !!m.reasoningArtifacts?.some(a => a.provider === "anthropic" && a.model === model && (!!a.signature || !!a.redacted));
+}
+/** Build Anthropic wire messages, reconstructing native tool_use / tool_result / thinking
+ *  blocks for matching turns. `thinkingEnabled` is false (or stripped on a fail-safe retry)
+ *  ⇒ everything falls back to the plain string/image content (current, always-valid shape). */
+export function buildAnthropicMessages(messages: Message[], model: string, thinkingEnabled: boolean): AnthropicMessage[] {
+  const nonSystem = messages.filter(m => m.role !== "system");
+  const plain = (m: Message): AnthropicMessage => ({
+    role: m.role,
+    content: m.images?.length
+      ? [
+          ...m.images.map((img): AnthropicContentBlock => ({ type: "image", source: { type: "base64", media_type: img.mediaType, data: img.data } })),
+          ...(m.content ? [{ type: "text", text: m.content } as AnthropicContentBlock] : []),
+        ]
+      : m.content,
+  });
+  return nonSystem.map((m, i) => {
+    if (m.role === "assistant" && anthropicNativizable(m, model, thinkingEnabled)) {
+      const blocks: AnthropicContentBlock[] = [];
+      for (const a of m.reasoningArtifacts!) {
+        if (a.provider !== "anthropic" || a.model !== model) continue;
+        if (a.signature) blocks.push({ type: "thinking", thinking: a.text ?? "", signature: a.signature });
+        else if (a.redacted) blocks.push({ type: "redacted_thinking", data: a.redacted });
+      }
+      for (const tu of m.toolUse!) blocks.push({ type: "tool_use", id: tu.id, name: tu.tool, input: tu.arguments });
+      return { role: "assistant", content: blocks };
+    }
+    // A tool-result user turn is nativized iff its preceding assistant was — so a native
+    // tool_use always has its matching native tool_result (Anthropic errors on a mismatch).
+    if (m.role === "user" && m.toolResults?.length && i > 0
+        && nonSystem[i - 1].role === "assistant"
+        && anthropicNativizable(nonSystem[i - 1], model, thinkingEnabled)) {
+      const blocks: AnthropicContentBlock[] = m.toolResults.map(tr => ({
+        type: "tool_result", tool_use_id: tr.id, content: tr.output, is_error: tr.isError,
+      }));
+      if (m.toolResultExtra) blocks.push({ type: "text", text: m.toolResultExtra });
+      return { role: "user", content: blocks };
+    }
+    return plain(m);
+  });
+}
 export function anthropicPayload(
   messages: Message[],
   options: CallOptions,
   stream: boolean,
   includeTemperature: boolean,
   credential: Credential = { kind: "none", provider: "anthropic" },
+  stripArtifacts = false,
 ): string {
   const model = stripAnthropicPrefix(options.model);
   const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
-  // Image attachments (clipboard paste) become Anthropic content blocks; plain
-  // string content is kept for text-only messages (the overwhelmingly common case).
-  type ContentBlock = Record<string, unknown>;
-  const anthropicMessages: { role: string; content: string | ContentBlock[] }[] =
-    messages.filter(m => m.role !== "system").map(m => ({
-      role: m.role,
-      content: m.images?.length
-        ? [
-            ...m.images.map((img): ContentBlock => ({ type: "image", source: { type: "base64", media_type: img.mediaType, data: img.data } })),
-            ...(m.content ? [{ type: "text", text: m.content } as ContentBlock] : []),
-          ]
-        : m.content,
-    }));
+  // Image attachments + native tool/thinking-block reconstruction live in buildAnthropicMessages.
+  const maxTokens = options.maxTokens ?? 4000;
+  const thinkingBudget = anthropicThinkingBudget(options.reasoningEffort, maxTokens);
+  // Reconstruct native tool_use / tool_result / thinking blocks for same-model turns when
+  // thinking is enabled (and not stripped by a fail-safe retry); else plain string/image.
+  const anthropicMessages = buildAnthropicMessages(messages, options.model, thinkingBudget !== undefined && !stripArtifacts);
   // Conversation prompt caching (gjc parity — the main same-model latency gap):
   // one breakpoint on the LAST message caches the entire conversation prefix, so
   // each agent-loop step only pays input processing for the new tail instead of
@@ -125,8 +179,7 @@ export function anthropicPayload(
       last.content[last.content.length - 1] = { ...tail, cache_control: { type: "ephemeral" } };
     }
   }
-  const maxTokens = options.maxTokens ?? 4000;
-  const thinkingBudget = anthropicThinkingBudget(options.reasoningEffort, maxTokens);
   const payload: Record<string, unknown> = {
     model,
     messages: anthropicMessages,
@@ -162,13 +215,14 @@ export function anthropicRequest(
   credential: Credential,
   stream: boolean,
   includeTemperature: boolean,
+  stripArtifacts = false,
 ): { url: string; headers: Record<string, string>; body: string } {
   return {
     // Anthropic-compatible providers (z.ai, MiniMax, …) accept the Messages wire
     // format at their own host; an explicit baseUrl pins `${base}/v1/messages`.
     url: options.baseUrl ? `${options.baseUrl.replace(/\/$/, "")}/v1/messages` : ANTHROPIC_URL,
     headers: headersFor(credential, stream),
-    body: anthropicPayload(messages, options, stream, includeTemperature, credential),
+    body: anthropicPayload(messages, options, stream, includeTemperature, credential, stripArtifacts),
   };
 }
@@ -176,14 +230,21 @@ function isDeprecatedTemperatureError(status: number, detail: string): boolean {
   return status === 400 && detail.includes(DEPRECATED_TEMPERATURE);
 }
+/** A 400 that names thinking/signature/redacted means a replayed reasoning artifact was
+ *  rejected (expired signature, edited history, thinking toggled). The fail-safe retries
+ *  once with artifacts stripped (plain string history) so the turn survives. */
+function isReasoningArtifactError(status: number, detail: string): boolean {
+  return status === 400 && /thinking|signature|redacted_thinking/i.test(detail);
+}
 async function postAnthropic(
   messages: Message[],
   options: CallOptions,
   credential: Credential,
   stream: boolean,
 ): Promise<Response> {
-  const send = (includeTemperature: boolean) => {
-    const { url, headers, body } = anthropicRequest(messages, options, credential, stream, includeTemperature);
+  const send = (includeTemperature: boolean, stripArtifacts = false) => {
+    const { url, headers, body } = anthropicRequest(messages, options, credential, stream, includeTemperature, stripArtifacts);
     return fetch(url, { method: "POST", headers, body, signal: options.signal });
   };
@@ -196,6 +257,12 @@ async function postAnthropic(
     if (response.ok) return response;
     throw await providerHttpError("Anthropic", response, stream ? "(stream)" : undefined);
   }
+  // Fail-safe: a rejected replay artifact → retry once with artifacts stripped (plain history).
+  if (isReasoningArtifactError(response.status, detail)) {
+    response = await send(true, true);
+    if (response.ok) return response;
+    throw await providerHttpError("Anthropic", response, stream ? "(stream)" : undefined);
+  }
   throw new ProviderHttpError(
     "Anthropic",
@@ -233,8 +300,16 @@ export const anthropicAdapter: ProviderAdapter = {
   supportsNativeTools: true,
   async call(messages, options, credential) {
     const response = await postAnthropic(messages, options, credential, false);
-    const result = (await response.json()) as { content: { type: string; text?: string; name?: string; input?: unknown }[]; stop_reason?: string; usage?: AnthropicUsage };
+    const result = (await response.json()) as { content: { type: string; text?: string; name?: string; input?: unknown; thinking?: string; signature?: string; data?: string }[]; stop_reason?: string; usage?: AnthropicUsage };
     if (result.usage) options.onUsage?.({ inputTokens: totalInputTokens(result.usage), outputTokens: result.usage.output_tokens });
+    // Capture thinking/redacted blocks as replay artifacts (parity with the stream path).
+    for (const c of result.content) {
+      if (c.type === "thinking" && (c.thinking || c.signature)) {
+        options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, text: c.thinking || undefined, signature: c.signature });
+      } else if (c.type === "redacted_thinking" && c.data) {
+        options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, redacted: c.data });
+      }
+    }
     // Prefer a native tool call (re-serialized to canonical JSON) over any stray text.
     const toolCall = serializeToolCalls(
       result.content
@@ -256,12 +331,16 @@ export const anthropicAdapter: ProviderAdapter = {
     // never as text_delta — accumulate per block index, then re-serialize to canonical
     // JSON and yield it once at the end (concatenation still equals call()).
     const toolBlocks = new Map<number, { name: string; args: string }>();
+    // Thinking blocks stream as content_block_start(type:thinking) + thinking_delta(text)
+    // + signature_delta(signature). Accumulate per index and emit one ReasoningArtifact per
+    // block on stream end so the signed thought can be replayed (gajae continuity).
+    const thinkBlocks = new Map<number, { text: string; signature?: string }>();
     for await (const data of readSse(response.body)) {
       let evt: {
         type?: string;
         index?: number;
-        content_block?: { type?: string; name?: string };
-        delta?: { type?: string; text?: string; partial_json?: string; thinking?: string; stop_reason?: string };
+        content_block?: { type?: string; name?: string; data?: string };
+        delta?: { type?: string; text?: string; partial_json?: string; thinking?: string; signature?: string; stop_reason?: string };
         message?: { usage?: AnthropicUsage };
         usage?: { output_tokens?: number };
       };
@@ -272,6 +351,11 @@ export const anthropicAdapter: ProviderAdapter = {
       }
       if (evt.type === "content_block_start" && evt.content_block?.type === "tool_use" && typeof evt.index === "number") {
         toolBlocks.set(evt.index, { name: evt.content_block.name ?? "", args: "" });
+      } else if (evt.type === "content_block_start" && evt.content_block?.type === "thinking" && typeof evt.index === "number") {
+        thinkBlocks.set(evt.index, { text: "" });
+      } else if (evt.type === "content_block_start" && evt.content_block?.type === "redacted_thinking" && evt.content_block.data) {
+        // Redacted thinking carries opaque `data` directly (no deltas) — emit immediately.
+        options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, redacted: evt.content_block.data });
       } else if (evt.type === "content_block_delta" && evt.delta?.type === "input_json_delta" && typeof evt.index === "number") {
         const b = toolBlocks.get(evt.index);
         if (b) b.args += evt.delta.partial_json ?? "";
@@ -280,6 +364,15 @@ export const anthropicAdapter: ProviderAdapter = {
         yield evt.delta.text;
       } else if (evt.type === "content_block_delta" && evt.delta?.type === "thinking_delta" && evt.delta.thinking) {
         options.onReasoning?.(evt.delta.thinking);
+        if (typeof evt.index === "number") {
+          const tb = thinkBlocks.get(evt.index) ?? { text: "" };
+          tb.text += evt.delta.thinking;
+          thinkBlocks.set(evt.index, tb);
+        }
+      } else if (evt.type === "content_block_delta" && evt.delta?.type === "signature_delta" && evt.delta.signature && typeof evt.index === "number") {
+        const tb = thinkBlocks.get(evt.index) ?? { text: "" };
+        tb.signature = (tb.signature ?? "") + evt.delta.signature;
+        thinkBlocks.set(evt.index, tb);
       } else if (evt.type === "message_start" && evt.message?.usage) {
         // Cache only — usage is reported ONCE at message_delta so an accumulating
         // sink can't double-count input (and a pre-first-chunk retry that replays
@@ -290,6 +383,12 @@ export const anthropicAdapter: ProviderAdapter = {
         if (evt.usage) options.onUsage?.({ inputTokens: cachedInput, outputTokens: evt.usage.output_tokens });
       }
     }
+    // Emit captured thinking blocks as replay artifacts (signed thought + signature).
+    for (const tb of thinkBlocks.values()) {
+      if (tb.text || tb.signature) {
+        options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, text: tb.text || undefined, signature: tb.signature });
+      }
+    }
     const envelope = serializeAccumulatedToolCalls(toolBlocks);
     if (envelope) { yieldedAny = true; yield envelope; }
     if (!yieldedAny) throw emptyCompletionError(stopReason);
@@ -361,6 +460,7 @@ function headersFor(credential: Credential, stream: boolean): Record<string, str
       "content-type": "application/json",
       "x-api-key": credential.token,
       "anthropic-version": "2023-06-01",
+      "anthropic-beta": ANTHROPIC_API_KEY_BETA,
     };
   }
   throw new Error("anthropic adapter requires a credential");

package/src/ai/providers/antigravity.ts CHANGED Viewed

@@ -108,6 +108,12 @@ export async function resolveAntigravityProjectId(
 type CcaPart = { text: string } | { inlineData: { mimeType: string; data: string } };
+// Reasoning-artifact replay (signed thinking / thoughtSignature / encrypted reasoning) is
+// deliberately OUT OF SCOPE for antigravity: it serves Gemini- and Claude-shaped models over
+// the CCA wire (neither the native Anthropic messages nor the public Gemini shape), so it
+// captures no artifacts and replays none — Message.toolUse/toolResults/reasoningArtifacts are
+// ignored here. The provider-keyed match guard (D3) keeps "anthropic"/"gemini" artifacts from
+// ever being re-injected by this adapter, so there is no cross-adapter leakage.
 function antigravityContents(messages: Message[]): { role: "user" | "model"; parts: CcaPart[] }[] {
   const contents: { role: "user" | "model"; parts: CcaPart[] }[] = [];
   for (const m of messages) {

package/src/ai/providers/errors.ts CHANGED Viewed

@@ -54,6 +54,24 @@ export function parseRetryFromBody(detail: string | null | undefined): number |
  * and any `Retry-After`. Use at every adapter's `!response.ok` site so the retry
  * layer sees a uniform, status-carrying, backoff-aware error.
  */
+/**
+ * One-shot reasoning-artifact fail-safe: send the request; if it 400s because a replayed
+ * reasoning artifact (signature / thoughtSignature / encrypted reasoning item) was rejected
+ * — expired signature, edited history, toggled thinking — retry ONCE with artifacts stripped
+ * (plain history). `send(strip)` rebuilds + fetches; `isArtifactError` matches the 400 body.
+ * ponytail: heuristic error-body string match — tighten to structured error codes if/when
+ * the providers expose them.
+ */
+export async function fetchWithArtifactFailSafe(
+  send: (stripArtifacts: boolean) => Promise<Response>,
+  isArtifactError: (status: number, body: string) => boolean,
+): Promise<Response> {
+  const res = await send(false);
+  if (res.ok) return res;
+  const body = await res.clone().text().catch(() => "");
+  return isArtifactError(res.status, body) ? send(true) : res;
+}
 export async function providerHttpError(provider: string, response: Response, context?: string): Promise<ProviderHttpError> {
   const detail = await response.text().catch(() => "");
   const retryAfterMs = parseRetryAfter(response.headers.get("retry-after")) ?? parseRetryFromBody(detail);

package/src/ai/providers/gemini.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import type { Credential } from "../../auth";
 import type { CallOptions, Message, ProviderAdapter } from "../types";
 import { readSse } from "../sse";
-import { providerHttpError } from "./errors";
+import { providerHttpError, fetchWithArtifactFailSafe } from "./errors";
 import { jeoEnv } from "../../util/env";
 import { serializeToolCalls } from "../../agent/tool-schemas";
@@ -37,35 +37,62 @@ export function geminiThinkingBudget(model: string, effort?: CallOptions["reason
   return budget;
 }
+/** True when an assistant turn can replay native functionCall + thoughtSignature: it has
+ *  structured toolUse AND a same-model Gemini thoughtSignature artifact, AND thinking is on. */
+export function geminiNativizable(m: Message, modelKey: string, thinkingEnabled: boolean): boolean {
+  return thinkingEnabled
+    && !!m.toolUse?.length
+    && !!m.reasoningArtifacts?.some(a => a.provider === "gemini" && a.model === modelKey && !!a.thoughtSignature);
+}
 /** Shared Gemini request payload (contents + generationConfig + systemInstruction)
  *  used by BOTH the public generativelanguage path (API key) and the Cloud Code
  *  Assist path (OAuth) — only the envelope/endpoint differs. */
-export function buildGeminiPayload(messages: Message[], options: CallOptions): { geminiModel: string; payload: Record<string, unknown> } {
+export function buildGeminiPayload(messages: Message[], options: CallOptions, stripArtifacts = false): { geminiModel: string; payload: Record<string, unknown> } {
   const resolvedModel = options.model.replace(/^(google|gemini)\//, "");
   let geminiModel = resolvedModel;
   if (!geminiModel || geminiModel === "claude-3-5-sonnet") geminiModel = "gemini-2.0-flash";
   const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
+  const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
+  const thinkingEnabled = thinkingBudget !== undefined && !stripArtifacts;
   // Gemini requires strictly ALTERNATING user/model turns. jeo histories can carry
   // consecutive same-role messages (a compaction summary prepended before a tool-result,
   // back-to-back tool results, etc.), so coalesce adjacent same-role turns into one
-  // content block — otherwise the API rejects the request mid-session.
-  const contents: { role: string; parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] }[] = [];
-  for (const m of messages) {
-    if (m.role === "system") continue;
+  // content block — otherwise the API rejects the request mid-session. Native
+  // functionCall/functionResponse parts (with thoughtSignature) are reconstructed for
+  // same-model turns to preserve cross-step thought context; else plain text.
+  type GeminiPart = Record<string, unknown>;
+  const nonSystem = messages.filter(m => m.role !== "system");
+  const contents: { role: string; parts: GeminiPart[] }[] = [];
+  nonSystem.forEach((m, i) => {
     const role = m.role === "assistant" ? "model" : "user";
-    // Clipboard-pasted images become inlineData parts alongside the text part.
-    const parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] = [
-      ...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
-      { text: m.content },
-    ];
-    const prev = contents[contents.length - 1];
-    if (prev && prev.role === role) {
-      prev.parts.push(...parts);
+    let parts: GeminiPart[];
+    if (m.role === "assistant" && geminiNativizable(m, options.model, thinkingEnabled)) {
+      const sig = m.reasoningArtifacts!.find(a => a.provider === "gemini" && a.model === options.model && a.thoughtSignature)?.thoughtSignature;
+      parts = m.toolUse!.map((tu, idx) => {
+        const p: GeminiPart = { functionCall: { name: tu.tool, args: tu.arguments } };
+        if (idx === 0 && sig) p.thoughtSignature = sig; // bind the turn signature to the first call
+        return p;
+      });
+    } else if (m.role === "user" && m.toolResults?.length && i > 0
+        && nonSystem[i - 1].role === "assistant"
+        && geminiNativizable(nonSystem[i - 1], options.model, thinkingEnabled)) {
+      const prevToolUse = nonSystem[i - 1].toolUse ?? [];
+      parts = m.toolResults.map(tr => ({
+        functionResponse: { name: prevToolUse.find(tu => tu.id === tr.id)?.tool ?? "tool", response: { output: tr.output } },
+      }));
+      if (m.toolResultExtra) parts.push({ text: m.toolResultExtra });
     } else {
-      contents.push({ role, parts });
+      parts = [
+        ...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
+        { text: m.content },
+      ];
     }
-  }
+    const prev = contents[contents.length - 1];
+    if (prev && prev.role === role) prev.parts.push(...parts);
+    else contents.push({ role, parts });
+  });
   const generationConfig: Record<string, unknown> = {
     temperature: options.temperature ?? 0.2,
@@ -74,7 +101,7 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
   // Function-calling and responseMimeType:json are mutually exclusive in the Gemini
   // API — when native tools are declared, the functionCall parts replace JSON-in-prose.
   if (options.jsonMode && !options.tools?.length) generationConfig.responseMimeType = "application/json";
-  const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
   // includeThoughts: required for Gemini to STREAM thought summaries (the `thought:true`
   // parts thoughtOf() routes to onReasoning) — without it the model thinks silently.
   if (thinkingBudget !== undefined) generationConfig.thinkingConfig = { includeThoughts: true, thinkingBudget };
@@ -91,8 +118,8 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
   return { geminiModel, payload };
 }
-export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent"): { url: string; headers: Record<string, string>; body: string } {
-  const { geminiModel, payload } = buildGeminiPayload(messages, options);
+export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent", stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
+  const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
   const oauth = credential.kind === "oauth" ? credential.token : undefined;
   const apiKey = credential.kind === "api_key" ? credential.token : undefined;
   let url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(geminiModel)}:${action}`;
@@ -123,8 +150,8 @@ export function getGeminiCliHeaders(modelId?: string): Record<string, string> {
  * plain `jeo auth login gemini` works without any GEMINI_API_KEY. The body
  * wraps the standard payload as `{ project, model, request }`.
  */
-export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string): { url: string; headers: Record<string, string>; body: string } {
-  const { geminiModel, payload } = buildGeminiPayload(messages, options);
+export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string, stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
+  const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
   return {
     url: `${CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse`,
     headers: {
@@ -137,8 +164,22 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
   };
 }
+/** POST a Gemini request with a reasoning-artifact fail-safe (see fetchWithArtifactFailSafe). */
+function geminiFetchFailSafe(
+  make: (stripArtifacts: boolean) => { url: string; headers: Record<string, string>; body: string },
+  signal?: AbortSignal,
+): Promise<Response> {
+  return fetchWithArtifactFailSafe(
+    strip => {
+      const r = make(strip);
+      return fetch(r.url, { method: "POST", headers: r.headers, body: r.body, signal });
+    },
+    (status, body) => status === 400 && /thoughtsignature|thought_signature|functioncall|function_call|signature/i.test(body),
+  );
+}
 interface GeminiChunk {
-  candidates?: { content?: { parts?: { text?: string; thought?: boolean; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
+  candidates?: { content?: { parts?: { text?: string; thought?: boolean; thoughtSignature?: string; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
   promptFeedback?: { blockReason?: string };
   usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
 }
@@ -157,6 +198,19 @@ function textOf(chunk: GeminiChunk): string {
 function thoughtOf(chunk: GeminiChunk): string {
   return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
 }
+/** Emit each NEW thoughtSignature seen on this chunk's parts as a replay artifact (Gemini
+ *  binds it to the functionCall part — replayed to keep cross-step thought context). `seen`
+ *  dedups across the streamed chunks of one turn. */
+function captureGeminiSignatures(chunk: GeminiChunk, options: CallOptions, seen: Set<string>): void {
+  for (const p of chunk.candidates?.[0]?.content?.parts ?? []) {
+    const sig = p.thoughtSignature;
+    if (sig && !seen.has(sig)) {
+      seen.add(sig);
+      options.onReasoningArtifact?.({ provider: "gemini", model: options.model, thoughtSignature: sig });
+    }
+  }
+}
 /** Native Gemini functionCall parts → {tool, arguments} (gjc/antigravity parity). Kept
  *  separate from textOf so the re-serialized canonical JSON envelope drives the loop. */
 function geminiFunctionCallsOf(chunk: GeminiChunk): { tool: string; arguments: Record<string, unknown> }[] {
@@ -197,14 +251,14 @@ function blockedReason(chunk: GeminiChunk): string | undefined {
 async function* ccaTurn(messages: Message[], options: CallOptions, credential: Credential & { kind: "oauth" }): AsyncGenerator<string> {
   const { resolveAntigravityProjectId } = await import("./antigravity");
   const projectId = await resolveAntigravityProjectId(credential, { signal: options.signal });
-  const { url, headers, body } = geminiCliRequest(messages, options, credential.token, projectId);
-  const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
+  const response = await geminiFetchFailSafe(strip => geminiCliRequest(messages, options, credential.token, projectId, strip), options.signal);
   if (!response.ok) throw await providerHttpError("Gemini (Cloud Code Assist)", response);
   if (!response.body) return;
   let lastUsage: GeminiChunk["usageMetadata"];
   let yieldedAny = false;
   let lastEmptyReason: string | undefined;
   const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
+  const seenSigs = new Set<string>();
   for await (const data of readSse(response.body)) {
     let chunk: CcaChunk;
     try {
@@ -216,6 +270,7 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
     if (!inner) continue;
     const thought = thoughtOf(inner);
     if (thought) options.onReasoning?.(thought);
+    captureGeminiSignatures(inner, options, seenSigs);
     const delta = textOf(inner);
     if (delta) {
       yieldedAny = true;
@@ -249,10 +304,10 @@ export const geminiAdapter: ProviderAdapter = {
       for await (const delta of ccaTurn(messages, options, credential)) out += delta;
       return out;
     }
-    const { url, headers, body } = geminiRequest(messages, options, credential, "generateContent");
-    const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
+    const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "generateContent", strip), options.signal);
     if (!response.ok) throw await providerHttpError("Gemini", response);
     const result = (await response.json()) as GeminiChunk;
+    captureGeminiSignatures(result, options, new Set());
     if (result.usageMetadata) {
       options.onUsage?.({ inputTokens: result.usageMetadata.promptTokenCount, outputTokens: result.usageMetadata.candidatesTokenCount });
     }
@@ -271,14 +326,14 @@ export const geminiAdapter: ProviderAdapter = {
       yield* ccaTurn(messages, options, credential);
       return;
     }
-    const { url, headers, body } = geminiRequest(messages, options, credential, "streamGenerateContent");
-    const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
+    const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "streamGenerateContent", strip), options.signal);
     if (!response.ok) throw await providerHttpError("Gemini", response, "(stream)");
     if (!response.body) return;
     let lastUsage: GeminiChunk["usageMetadata"];
     let yieldedAny = false;
     let lastEmptyReason: string | undefined;
     const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
+    const seenSigs = new Set<string>();
     for await (const data of readSse(response.body)) {
       let chunk: GeminiChunk;
       try {
@@ -288,6 +343,7 @@ export const geminiAdapter: ProviderAdapter = {
       }
       const thought = thoughtOf(chunk);
       if (thought) options.onReasoning?.(thought);
+      captureGeminiSignatures(chunk, options, seenSigs);
       const delta = textOf(chunk);
       if (delta) {
         yieldedAny = true;

package/src/ai/providers/openai-compatible-catalog.ts CHANGED Viewed

@@ -23,6 +23,12 @@ export interface OpenAICompatProviderDef {
   readonly apiKeyEnv: string;
   /** Default model id (provider-prefixed) used by `--provider <name>`. */
   readonly defaultModel: string;
+  /** Extra well-known model ids (BARE, not provider-prefixed) for the OFFLINE
+   *  pick-list fallback shown by `/agents <role> provider <name>` and `--provider`.
+   *  Live `/models` discovery supersedes this once the provider is logged in, so
+   *  keep only stable/alias-style ids here (a stale id would 404 at inference).
+   *  `defaultModel` is always surfaced first regardless of this list. */
+  readonly knownModels?: readonly string[];
   /** Wire protocol: "openai" (/chat/completions, default) or "anthropic" (/v1/messages). */
   readonly protocol?: "openai" | "anthropic";
   /** True for subscription/plan products (coding-plan, portal, token-plan, code) rather than
@@ -35,12 +41,12 @@ export interface OpenAICompatProviderDef {
 }
 export const OPENAI_COMPAT_PROVIDERS: readonly OpenAICompatProviderDef[] = [
-  { name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile" },
-  { name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat" },
-  { name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest" },
+  { name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile", knownModels: ["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "openai/gpt-oss-120b", "openai/gpt-oss-20b"] },
+  { name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat", knownModels: ["deepseek-chat", "deepseek-reasoner"] },
+  { name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest", knownModels: ["mistral-large-latest", "mistral-small-latest", "codestral-latest", "ministral-8b-latest"] },
   { name: "openrouter", label: "OpenRouter", baseUrl: "https://openrouter.ai/api/v1", apiKeyEnv: "OPENROUTER_API_KEY", defaultModel: "openrouter/openai/gpt-4o-mini", thinkingFormat: "openrouter" },
   { name: "together", label: "Together", baseUrl: "https://api.together.xyz/v1", apiKeyEnv: "TOGETHER_API_KEY", defaultModel: "together/meta-llama/Llama-3.3-70B-Instruct-Turbo" },
-  { name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b" },
+  { name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b", knownModels: ["llama-3.3-70b", "llama3.1-8b", "qwen-3-235b-a22b-instruct-2507"] },
   { name: "fireworks", label: "Fireworks", baseUrl: "https://api.fireworks.ai/inference/v1", apiKeyEnv: "FIREWORKS_API_KEY", defaultModel: "fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct" },
   { name: "nvidia", label: "NVIDIA", baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", defaultModel: "nvidia/meta/llama-3.3-70b-instruct" },
   // Additional gjc-parity OpenAI-compatible clouds (authoritative base URLs + env vars).