jeo-code 0.6.27 → 0.6.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import type { Credential } from "../../auth";
2
2
  import type { CallOptions, Message, ProviderAdapter } from "../types";
3
3
  import { readSse } from "../sse";
4
- import { providerHttpError } from "./errors";
4
+ import { providerHttpError, fetchWithArtifactFailSafe } from "./errors";
5
5
  import { jeoEnv } from "../../util/env";
6
6
  import { serializeToolCalls } from "../../agent/tool-schemas";
7
7
 
@@ -37,35 +37,62 @@ export function geminiThinkingBudget(model: string, effort?: CallOptions["reason
37
37
  return budget;
38
38
  }
39
39
 
40
+
41
+ /** True when an assistant turn can replay native functionCall + thoughtSignature: it has
42
+ * structured toolUse AND a same-model Gemini thoughtSignature artifact, AND thinking is on. */
43
+ export function geminiNativizable(m: Message, modelKey: string, thinkingEnabled: boolean): boolean {
44
+ return thinkingEnabled
45
+ && !!m.toolUse?.length
46
+ && !!m.reasoningArtifacts?.some(a => a.provider === "gemini" && a.model === modelKey && !!a.thoughtSignature);
47
+ }
40
48
  /** Shared Gemini request payload (contents + generationConfig + systemInstruction)
41
49
  * used by BOTH the public generativelanguage path (API key) and the Cloud Code
42
50
  * Assist path (OAuth) — only the envelope/endpoint differs. */
43
- export function buildGeminiPayload(messages: Message[], options: CallOptions): { geminiModel: string; payload: Record<string, unknown> } {
51
+ export function buildGeminiPayload(messages: Message[], options: CallOptions, stripArtifacts = false): { geminiModel: string; payload: Record<string, unknown> } {
44
52
  const resolvedModel = options.model.replace(/^(google|gemini)\//, "");
45
53
  let geminiModel = resolvedModel;
46
54
  if (!geminiModel || geminiModel === "claude-3-5-sonnet") geminiModel = "gemini-2.0-flash";
47
55
 
48
56
  const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
57
+ const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
58
+ const thinkingEnabled = thinkingBudget !== undefined && !stripArtifacts;
49
59
  // Gemini requires strictly ALTERNATING user/model turns. jeo histories can carry
50
60
  // consecutive same-role messages (a compaction summary prepended before a tool-result,
51
61
  // back-to-back tool results, etc.), so coalesce adjacent same-role turns into one
52
- // content block — otherwise the API rejects the request mid-session.
53
- const contents: { role: string; parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] }[] = [];
54
- for (const m of messages) {
55
- if (m.role === "system") continue;
62
+ // content block — otherwise the API rejects the request mid-session. Native
63
+ // functionCall/functionResponse parts (with thoughtSignature) are reconstructed for
64
+ // same-model turns to preserve cross-step thought context; else plain text.
65
+ type GeminiPart = Record<string, unknown>;
66
+ const nonSystem = messages.filter(m => m.role !== "system");
67
+ const contents: { role: string; parts: GeminiPart[] }[] = [];
68
+ nonSystem.forEach((m, i) => {
56
69
  const role = m.role === "assistant" ? "model" : "user";
57
- // Clipboard-pasted images become inlineData parts alongside the text part.
58
- const parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] = [
59
- ...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
60
- { text: m.content },
61
- ];
62
- const prev = contents[contents.length - 1];
63
- if (prev && prev.role === role) {
64
- prev.parts.push(...parts);
70
+ let parts: GeminiPart[];
71
+ if (m.role === "assistant" && geminiNativizable(m, options.model, thinkingEnabled)) {
72
+ const sig = m.reasoningArtifacts!.find(a => a.provider === "gemini" && a.model === options.model && a.thoughtSignature)?.thoughtSignature;
73
+ parts = m.toolUse!.map((tu, idx) => {
74
+ const p: GeminiPart = { functionCall: { name: tu.tool, args: tu.arguments } };
75
+ if (idx === 0 && sig) p.thoughtSignature = sig; // bind the turn signature to the first call
76
+ return p;
77
+ });
78
+ } else if (m.role === "user" && m.toolResults?.length && i > 0
79
+ && nonSystem[i - 1].role === "assistant"
80
+ && geminiNativizable(nonSystem[i - 1], options.model, thinkingEnabled)) {
81
+ const prevToolUse = nonSystem[i - 1].toolUse ?? [];
82
+ parts = m.toolResults.map(tr => ({
83
+ functionResponse: { name: prevToolUse.find(tu => tu.id === tr.id)?.tool ?? "tool", response: { output: tr.output } },
84
+ }));
85
+ if (m.toolResultExtra) parts.push({ text: m.toolResultExtra });
65
86
  } else {
66
- contents.push({ role, parts });
87
+ parts = [
88
+ ...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
89
+ { text: m.content },
90
+ ];
67
91
  }
68
- }
92
+ const prev = contents[contents.length - 1];
93
+ if (prev && prev.role === role) prev.parts.push(...parts);
94
+ else contents.push({ role, parts });
95
+ });
69
96
 
70
97
  const generationConfig: Record<string, unknown> = {
71
98
  temperature: options.temperature ?? 0.2,
@@ -74,7 +101,7 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
74
101
  // Function-calling and responseMimeType:json are mutually exclusive in the Gemini
75
102
  // API — when native tools are declared, the functionCall parts replace JSON-in-prose.
76
103
  if (options.jsonMode && !options.tools?.length) generationConfig.responseMimeType = "application/json";
77
- const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
104
+
78
105
  // includeThoughts: required for Gemini to STREAM thought summaries (the `thought:true`
79
106
  // parts thoughtOf() routes to onReasoning) — without it the model thinks silently.
80
107
  if (thinkingBudget !== undefined) generationConfig.thinkingConfig = { includeThoughts: true, thinkingBudget };
@@ -91,8 +118,8 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
91
118
  return { geminiModel, payload };
92
119
  }
93
120
 
94
- export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent"): { url: string; headers: Record<string, string>; body: string } {
95
- const { geminiModel, payload } = buildGeminiPayload(messages, options);
121
+ export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent", stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
122
+ const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
96
123
  const oauth = credential.kind === "oauth" ? credential.token : undefined;
97
124
  const apiKey = credential.kind === "api_key" ? credential.token : undefined;
98
125
  let url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(geminiModel)}:${action}`;
@@ -123,8 +150,8 @@ export function getGeminiCliHeaders(modelId?: string): Record<string, string> {
123
150
  * plain `jeo auth login gemini` works without any GEMINI_API_KEY. The body
124
151
  * wraps the standard payload as `{ project, model, request }`.
125
152
  */
126
- export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string): { url: string; headers: Record<string, string>; body: string } {
127
- const { geminiModel, payload } = buildGeminiPayload(messages, options);
153
+ export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string, stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
154
+ const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
128
155
  return {
129
156
  url: `${CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse`,
130
157
  headers: {
@@ -137,8 +164,22 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
137
164
  };
138
165
  }
139
166
 
167
+ /** POST a Gemini request with a reasoning-artifact fail-safe (see fetchWithArtifactFailSafe). */
168
+ function geminiFetchFailSafe(
169
+ make: (stripArtifacts: boolean) => { url: string; headers: Record<string, string>; body: string },
170
+ signal?: AbortSignal,
171
+ ): Promise<Response> {
172
+ return fetchWithArtifactFailSafe(
173
+ strip => {
174
+ const r = make(strip);
175
+ return fetch(r.url, { method: "POST", headers: r.headers, body: r.body, signal });
176
+ },
177
+ (status, body) => status === 400 && /thoughtsignature|thought_signature|functioncall|function_call|signature/i.test(body),
178
+ );
179
+ }
180
+
140
181
  interface GeminiChunk {
141
- candidates?: { content?: { parts?: { text?: string; thought?: boolean; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
182
+ candidates?: { content?: { parts?: { text?: string; thought?: boolean; thoughtSignature?: string; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
142
183
  promptFeedback?: { blockReason?: string };
143
184
  usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
144
185
  }
@@ -157,6 +198,19 @@ function textOf(chunk: GeminiChunk): string {
157
198
  function thoughtOf(chunk: GeminiChunk): string {
158
199
  return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
159
200
  }
201
+
202
+ /** Emit each NEW thoughtSignature seen on this chunk's parts as a replay artifact (Gemini
203
+ * binds it to the functionCall part — replayed to keep cross-step thought context). `seen`
204
+ * dedups across the streamed chunks of one turn. */
205
+ function captureGeminiSignatures(chunk: GeminiChunk, options: CallOptions, seen: Set<string>): void {
206
+ for (const p of chunk.candidates?.[0]?.content?.parts ?? []) {
207
+ const sig = p.thoughtSignature;
208
+ if (sig && !seen.has(sig)) {
209
+ seen.add(sig);
210
+ options.onReasoningArtifact?.({ provider: "gemini", model: options.model, thoughtSignature: sig });
211
+ }
212
+ }
213
+ }
160
214
  /** Native Gemini functionCall parts → {tool, arguments} (gjc/antigravity parity). Kept
161
215
  * separate from textOf so the re-serialized canonical JSON envelope drives the loop. */
162
216
  function geminiFunctionCallsOf(chunk: GeminiChunk): { tool: string; arguments: Record<string, unknown> }[] {
@@ -197,14 +251,14 @@ function blockedReason(chunk: GeminiChunk): string | undefined {
197
251
  async function* ccaTurn(messages: Message[], options: CallOptions, credential: Credential & { kind: "oauth" }): AsyncGenerator<string> {
198
252
  const { resolveAntigravityProjectId } = await import("./antigravity");
199
253
  const projectId = await resolveAntigravityProjectId(credential, { signal: options.signal });
200
- const { url, headers, body } = geminiCliRequest(messages, options, credential.token, projectId);
201
- const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
254
+ const response = await geminiFetchFailSafe(strip => geminiCliRequest(messages, options, credential.token, projectId, strip), options.signal);
202
255
  if (!response.ok) throw await providerHttpError("Gemini (Cloud Code Assist)", response);
203
256
  if (!response.body) return;
204
257
  let lastUsage: GeminiChunk["usageMetadata"];
205
258
  let yieldedAny = false;
206
259
  let lastEmptyReason: string | undefined;
207
260
  const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
261
+ const seenSigs = new Set<string>();
208
262
  for await (const data of readSse(response.body)) {
209
263
  let chunk: CcaChunk;
210
264
  try {
@@ -216,6 +270,7 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
216
270
  if (!inner) continue;
217
271
  const thought = thoughtOf(inner);
218
272
  if (thought) options.onReasoning?.(thought);
273
+ captureGeminiSignatures(inner, options, seenSigs);
219
274
  const delta = textOf(inner);
220
275
  if (delta) {
221
276
  yieldedAny = true;
@@ -249,10 +304,10 @@ export const geminiAdapter: ProviderAdapter = {
249
304
  for await (const delta of ccaTurn(messages, options, credential)) out += delta;
250
305
  return out;
251
306
  }
252
- const { url, headers, body } = geminiRequest(messages, options, credential, "generateContent");
253
- const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
307
+ const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "generateContent", strip), options.signal);
254
308
  if (!response.ok) throw await providerHttpError("Gemini", response);
255
309
  const result = (await response.json()) as GeminiChunk;
310
+ captureGeminiSignatures(result, options, new Set());
256
311
  if (result.usageMetadata) {
257
312
  options.onUsage?.({ inputTokens: result.usageMetadata.promptTokenCount, outputTokens: result.usageMetadata.candidatesTokenCount });
258
313
  }
@@ -271,14 +326,14 @@ export const geminiAdapter: ProviderAdapter = {
271
326
  yield* ccaTurn(messages, options, credential);
272
327
  return;
273
328
  }
274
- const { url, headers, body } = geminiRequest(messages, options, credential, "streamGenerateContent");
275
- const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
329
+ const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "streamGenerateContent", strip), options.signal);
276
330
  if (!response.ok) throw await providerHttpError("Gemini", response, "(stream)");
277
331
  if (!response.body) return;
278
332
  let lastUsage: GeminiChunk["usageMetadata"];
279
333
  let yieldedAny = false;
280
334
  let lastEmptyReason: string | undefined;
281
335
  const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
336
+ const seenSigs = new Set<string>();
282
337
  for await (const data of readSse(response.body)) {
283
338
  let chunk: GeminiChunk;
284
339
  try {
@@ -288,6 +343,7 @@ export const geminiAdapter: ProviderAdapter = {
288
343
  }
289
344
  const thought = thoughtOf(chunk);
290
345
  if (thought) options.onReasoning?.(thought);
346
+ captureGeminiSignatures(chunk, options, seenSigs);
291
347
  const delta = textOf(chunk);
292
348
  if (delta) {
293
349
  yieldedAny = true;
@@ -23,6 +23,12 @@ export interface OpenAICompatProviderDef {
23
23
  readonly apiKeyEnv: string;
24
24
  /** Default model id (provider-prefixed) used by `--provider <name>`. */
25
25
  readonly defaultModel: string;
26
+ /** Extra well-known model ids (BARE, not provider-prefixed) for the OFFLINE
27
+ * pick-list fallback shown by `/agents <role> provider <name>` and `--provider`.
28
+ * Live `/models` discovery supersedes this once the provider is logged in, so
29
+ * keep only stable/alias-style ids here (a stale id would 404 at inference).
30
+ * `defaultModel` is always surfaced first regardless of this list. */
31
+ readonly knownModels?: readonly string[];
26
32
  /** Wire protocol: "openai" (/chat/completions, default) or "anthropic" (/v1/messages). */
27
33
  readonly protocol?: "openai" | "anthropic";
28
34
  /** True for subscription/plan products (coding-plan, portal, token-plan, code) rather than
@@ -35,12 +41,12 @@ export interface OpenAICompatProviderDef {
35
41
  }
36
42
 
37
43
  export const OPENAI_COMPAT_PROVIDERS: readonly OpenAICompatProviderDef[] = [
38
- { name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile" },
39
- { name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat" },
40
- { name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest" },
44
+ { name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile", knownModels: ["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "openai/gpt-oss-120b", "openai/gpt-oss-20b"] },
45
+ { name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat", knownModels: ["deepseek-chat", "deepseek-reasoner"] },
46
+ { name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest", knownModels: ["mistral-large-latest", "mistral-small-latest", "codestral-latest", "ministral-8b-latest"] },
41
47
  { name: "openrouter", label: "OpenRouter", baseUrl: "https://openrouter.ai/api/v1", apiKeyEnv: "OPENROUTER_API_KEY", defaultModel: "openrouter/openai/gpt-4o-mini", thinkingFormat: "openrouter" },
42
48
  { name: "together", label: "Together", baseUrl: "https://api.together.xyz/v1", apiKeyEnv: "TOGETHER_API_KEY", defaultModel: "together/meta-llama/Llama-3.3-70B-Instruct-Turbo" },
43
- { name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b" },
49
+ { name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b", knownModels: ["llama-3.3-70b", "llama3.1-8b", "qwen-3-235b-a22b-instruct-2507"] },
44
50
  { name: "fireworks", label: "Fireworks", baseUrl: "https://api.fireworks.ai/inference/v1", apiKeyEnv: "FIREWORKS_API_KEY", defaultModel: "fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct" },
45
51
  { name: "nvidia", label: "NVIDIA", baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", defaultModel: "nvidia/meta/llama-3.3-70b-instruct" },
46
52
  // Additional gjc-parity OpenAI-compatible clouds (authoritative base URLs + env vars).
@@ -13,7 +13,7 @@
13
13
  import type { Credential } from "../../auth";
14
14
  import type { CallOptions, Message } from "../types";
15
15
  import { readSse } from "../sse";
16
- import { providerHttpError } from "./errors";
16
+ import { providerHttpError, fetchWithArtifactFailSafe } from "./errors";
17
17
  import { serializeAccumulatedToolCalls } from "../../agent/tool-schemas";
18
18
 
19
19
  export const CODEX_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses";
@@ -35,28 +35,64 @@ export function extractChatgptAccountId(token: string): string | undefined {
35
35
  }
36
36
  }
37
37
 
38
+
39
+ type ResponsesInputItem = Record<string, unknown>;
40
+
41
+ /** True when an assistant turn can replay stateless reasoning: it has structured toolUse AND
42
+ * a same-model OpenAI reasoning item (id + encrypted_content) captured this session. */
43
+ export function responsesNativizable(m: Message, modelKey: string): boolean {
44
+ return !!m.toolUse?.length
45
+ && !!m.reasoningArtifacts?.some(a => a.provider === "openai" && a.model === modelKey && !!a.itemId && !!a.encrypted);
46
+ }
47
+
48
+ /** Build the Responses `input` array, reconstructing native reasoning + function_call +
49
+ * function_call_output items for same-model OpenAI turns (stateless reasoning replay).
50
+ * stripArtifacts (fail-safe) or a non-matching model ⇒ the plain output_text/input_text shape. */
51
+ export function buildResponsesInput(messages: Message[], modelKey: string, stripArtifacts = false): ResponsesInputItem[] {
52
+ const nonSystem = messages.filter(m => m.role !== "system");
53
+ const items: ResponsesInputItem[] = [];
54
+ const plain = (m: Message): ResponsesInputItem => ({
55
+ role: m.role,
56
+ content: [
57
+ { type: m.role === "assistant" ? "output_text" : "input_text", text: m.content },
58
+ ...(m.role !== "assistant" && m.images?.length
59
+ ? m.images.map(img => ({ type: "input_image", image_url: `data:${img.mediaType};base64,${img.data}` }))
60
+ : []),
61
+ ],
62
+ });
63
+ nonSystem.forEach((m, i) => {
64
+ if (!stripArtifacts && m.role === "assistant" && responsesNativizable(m, modelKey)) {
65
+ for (const a of m.reasoningArtifacts!) {
66
+ if (a.provider === "openai" && a.model === modelKey && a.itemId && a.encrypted) {
67
+ items.push({ type: "reasoning", id: a.itemId, encrypted_content: a.encrypted, summary: [] });
68
+ }
69
+ }
70
+ for (const tu of m.toolUse!) {
71
+ items.push({ type: "function_call", call_id: tu.id, name: tu.tool, arguments: JSON.stringify(tu.arguments) });
72
+ }
73
+ return;
74
+ }
75
+ if (!stripArtifacts && m.role === "user" && m.toolResults?.length && i > 0
76
+ && nonSystem[i - 1].role === "assistant" && responsesNativizable(nonSystem[i - 1], modelKey)) {
77
+ for (const tr of m.toolResults) items.push({ type: "function_call_output", call_id: tr.id, output: tr.output });
78
+ if (m.toolResultExtra) items.push({ role: "user", content: [{ type: "input_text", text: m.toolResultExtra }] });
79
+ return;
80
+ }
81
+ items.push(plain(m));
82
+ });
83
+ return items;
84
+ }
38
85
  /** Build the Codex Responses request (url + headers + body) for an OAuth credential. */
39
86
  export function codexResponsesRequest(
40
87
  messages: Message[],
41
88
  options: CallOptions,
42
89
  credential: Credential,
90
+ stripArtifacts = false,
43
91
  ): { url: string; headers: Record<string, string>; body: string } {
44
92
  const model = options.model.startsWith("openai/") ? options.model.slice(7) : options.model;
45
93
  const token = credential.kind === "none" ? "" : credential.token;
46
94
  const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
47
- const input = messages
48
- .filter(m => m.role !== "system")
49
- .map(m => ({
50
- role: m.role,
51
- content: [
52
- { type: m.role === "assistant" ? "output_text" : "input_text", text: m.content },
53
- // Clipboard-pasted images ride along as input_image data URLs (user turns only —
54
- // assistant history is always text in jeo).
55
- ...(m.role !== "assistant" && m.images?.length
56
- ? m.images.map(img => ({ type: "input_image", image_url: `data:${img.mediaType};base64,${img.data}` }))
57
- : []),
58
- ],
59
- }));
95
+ const input = buildResponsesInput(messages, options.model, stripArtifacts);
60
96
  const payload: Record<string, unknown> = {
61
97
  model,
62
98
  instructions: systemPrompt ?? "You are a helpful coding assistant.",
@@ -81,6 +117,9 @@ export function codexResponsesRequest(
81
117
  // Both speak the same Responses schema (the body above), so only url+headers differ.
82
118
  if (credential.kind === "api_key") {
83
119
  const base = (options.baseUrl ?? "https://api.openai.com/v1").replace(/\/$/, "");
120
+ // Stateless reasoning replay (public Responses API): ask for encrypted reasoning content
121
+ // so it can be captured and threaded back into a later `input` (store stays false).
122
+ payload.include = ["reasoning.encrypted_content"];
84
123
  return {
85
124
  url: `${base}/responses`,
86
125
  headers: { "content-type": "application/json", authorization: `Bearer ${token}`, accept: "text/event-stream" },
@@ -113,6 +152,8 @@ export interface ResponsesEvent {
113
152
  toolCallName?: string;
114
153
  toolCallArgsDelta?: string;
115
154
  toolCallIndex?: number;
155
+ /** A completed reasoning item carrying its id + encrypted_content (stateless replay capture). */
156
+ reasoningItem?: { id: string; encrypted: string };
116
157
  }
117
158
 
118
159
  /** Parse one Responses SSE `data:` payload into a delta / usage / error. */
@@ -120,7 +161,7 @@ export function parseResponsesEvent(data: string): ResponsesEvent {
120
161
  let o: {
121
162
  type?: string;
122
163
  delta?: unknown;
123
- item?: { type?: string; name?: string };
164
+ item?: { type?: string; name?: string; id?: string; encrypted_content?: string };
124
165
  output_index?: number;
125
166
  response?: {
126
167
  usage?: { input_tokens?: number; output_tokens?: number };
@@ -137,6 +178,11 @@ export function parseResponsesEvent(data: string): ResponsesEvent {
137
178
  if (o.type === "response.output_item.added" && o.item?.type === "function_call") {
138
179
  return { toolCallName: o.item.name, toolCallIndex: o.output_index };
139
180
  }
181
+ // A completed reasoning item carries the encrypted_content we replay later (needs the
182
+ // request's `include: ["reasoning.encrypted_content"]`). Captured on output_item.done.
183
+ if (o.type === "response.output_item.done" && o.item?.type === "reasoning" && o.item.id && o.item.encrypted_content) {
184
+ return { reasoningItem: { id: o.item.id, encrypted: o.item.encrypted_content } };
185
+ }
140
186
  if (o.type === "response.function_call_arguments.delta" && typeof o.delta === "string") {
141
187
  return { toolCallArgsDelta: o.delta, toolCallIndex: o.output_index };
142
188
  }
@@ -185,10 +231,20 @@ function emptyCompletionError(reason: string | undefined): Error {
185
231
  return new Error(`OpenAI Codex returned no content${reason ? ` (${reason})` : ""}${hint}.`);
186
232
  }
187
233
 
234
+ /** Fetch the Responses endpoint with a reasoning-artifact fail-safe (see fetchWithArtifactFailSafe). */
235
+ function fetchResponses(messages: Message[], options: CallOptions, credential: Credential): Promise<Response> {
236
+ return fetchWithArtifactFailSafe(
237
+ strip => {
238
+ const { url, headers, body } = codexResponsesRequest(messages, options, credential, strip);
239
+ return fetch(url, { method: "POST", headers, body, signal: options.signal });
240
+ },
241
+ (status, body) => status === 400 && /reasoning|encrypted_content/i.test(body),
242
+ );
243
+ }
244
+
188
245
  /** Non-streaming call over the Codex backend (collects the streamed output). */
189
246
  export async function codexResponsesCall(messages: Message[], options: CallOptions, credential: Credential): Promise<string> {
190
- const { url, headers, body } = codexResponsesRequest(messages, options, credential);
191
- const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
247
+ const response = await fetchResponses(messages, options, credential);
192
248
  if (!response.ok) throw await providerHttpError("OpenAI", response);
193
249
  if (!response.body) return "";
194
250
  let out = "";
@@ -198,6 +254,7 @@ export async function codexResponsesCall(messages: Message[], options: CallOptio
198
254
  const ev = parseResponsesEvent(data);
199
255
  if (ev.delta) out += ev.delta;
200
256
  if (ev.reasoningDelta) options.onReasoning?.(ev.reasoningDelta);
257
+ if (ev.reasoningItem) options.onReasoningArtifact?.({ provider: "openai", model: options.model, itemId: ev.reasoningItem.id, encrypted: ev.reasoningItem.encrypted });
201
258
  accumulateResponsesToolCall(toolAcc, ev);
202
259
  if (ev.usage) options.onUsage?.(ev.usage);
203
260
  if (ev.incompleteReason) incompleteReason = ev.incompleteReason;
@@ -216,8 +273,7 @@ export async function* codexResponsesStream(
216
273
  options: CallOptions,
217
274
  credential: Credential,
218
275
  ): AsyncGenerator<string> {
219
- const { url, headers, body } = codexResponsesRequest(messages, options, credential);
220
- const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
276
+ const response = await fetchResponses(messages, options, credential);
221
277
  if (!response.ok) throw await providerHttpError("OpenAI", response, "(stream)");
222
278
  if (!response.body) return;
223
279
  let yieldedAny = false;
@@ -226,6 +282,7 @@ export async function* codexResponsesStream(
226
282
  for await (const data of readSse(response.body)) {
227
283
  const ev = parseResponsesEvent(data);
228
284
  if (ev.reasoningDelta) options.onReasoning?.(ev.reasoningDelta);
285
+ if (ev.reasoningItem) options.onReasoningArtifact?.({ provider: "openai", model: options.model, itemId: ev.reasoningItem.id, encrypted: ev.reasoningItem.encrypted });
229
286
  if (ev.delta) {
230
287
  yieldedAny = true;
231
288
  yield ev.delta;
package/src/ai/types.ts CHANGED
@@ -19,9 +19,58 @@ export interface Message {
19
19
  images?: ImageAttachment[];
20
20
  /** Persisted reasoning/thinking text for an assistant turn (the thought before the
21
21
  * answer). Survives /resume + export so the durable record shows "think → answer".
22
- * Display-only: NOT replayed to providers (anthropic/gemini thinking replay needs
23
- * the original signed block, which the streaming path does not capture). */
22
+ * Display channel; the REPLAY channel is `reasoningArtifacts`. */
24
23
  reasoning?: string;
24
+ /** Provider-native, opaque reasoning artifacts captured during streaming (Anthropic
25
+ * thinking signature, Gemini thoughtSignature, OpenAI Responses reasoning items).
26
+ * Replayed to the SAME provider+model to preserve multi-step reasoning continuity;
27
+ * dropped on cross-model replay. Display-agnostic, not written to markdown export. */
28
+ reasoningArtifacts?: ReasoningArtifact[];
29
+ /** Structured native tool calls this assistant turn made (with stable ids). `content`
30
+ * keeps the canonical JSON envelope for display/compaction/fallback adapters; capable
31
+ * adapters replay these as native tool_use / functionCall / function_call blocks. */
32
+ toolUse?: ToolUseRecord[];
33
+ /** Structured native tool results for a tool-feedback user turn (ids match the prior
34
+ * assistant's `toolUse`). Capable adapters replay these as native tool_result /
35
+ * functionResponse / function_call_output blocks. */
36
+ toolResults?: ToolResultRecord[];
37
+ /** Non-tool trailing text on a tool-feedback user turn (e.g. post-turn hook
38
+ * diagnostics) — replayed as a trailing text block after the native tool results. */
39
+ toolResultExtra?: string;
40
+ }
41
+
42
+ /** A provider-native opaque reasoning artifact. Only replayed when `provider` AND
43
+ * `model` match the active call (the adapter stamps the exact wire model id). */
44
+ export interface ReasoningArtifact {
45
+ provider: ProviderName;
46
+ model: string;
47
+ /** Thought text (display is covered by Message.reasoning; kept here for fidelity). */
48
+ text?: string;
49
+ /** Anthropic: thinking block signature. */
50
+ signature?: string;
51
+ /** Anthropic: redacted_thinking opaque data. */
52
+ redacted?: string;
53
+ /** Gemini: per-part thoughtSignature (binds to the matching functionCall part). */
54
+ thoughtSignature?: string;
55
+ /** OpenAI Responses: reasoning item id. */
56
+ itemId?: string;
57
+ /** OpenAI Responses: reasoning item encrypted_content. */
58
+ encrypted?: string;
59
+ }
60
+
61
+ /** A structured native tool call (assistant turn). `id` is a stable synthetic id the
62
+ * engine assigns so tool_use ↔ tool_result correlation survives replay. */
63
+ export interface ToolUseRecord {
64
+ id: string;
65
+ tool: string;
66
+ arguments: Record<string, unknown>;
67
+ }
68
+
69
+ /** A structured native tool result (user turn). `id` matches a prior `ToolUseRecord`. */
70
+ export interface ToolResultRecord {
71
+ id: string;
72
+ output: string;
73
+ isError: boolean;
25
74
  }
26
75
 
27
76
  export interface Usage {
@@ -67,6 +116,10 @@ export interface CallOptions {
67
116
  * answer text). Surfaced as a transient dimmed view; absent for models that emit no
68
117
  * thought text. */
69
118
  onReasoning?: (delta: string) => void;
119
+ /** Sink for provider-native reasoning ARTIFACTS captured during streaming (signature /
120
+ * thoughtSignature / reasoning item id+encrypted). Separate from `onReasoning` (display
121
+ * text) because these arrive on different SSE events and are opaque replay data. */
122
+ onReasoningArtifact?: (artifact: ReasoningArtifact) => void;
70
123
  /** NATIVE tool-calling: function declarations the model may call. Present only on the
71
124
  * main agent step (never the prose wrap-up). Adapters with `supportsNativeTools` send
72
125
  * these on the wire and re-serialize the structured tool call back into the engine's