jeo-code 0.6.26 → 0.6.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/README.ja.md +2 -6
- package/README.ko.md +2 -6
- package/README.md +2 -6
- package/README.zh.md +2 -6
- package/package.json +1 -1
- package/src/agent/compaction.ts +10 -1
- package/src/agent/engine.ts +62 -16
- package/src/agent/loop.ts +3 -0
- package/src/ai/model-manager.ts +6 -8
- package/src/ai/providers/anthropic.ts +114 -21
- package/src/ai/providers/antigravity.ts +6 -0
- package/src/ai/providers/errors.ts +18 -0
- package/src/ai/providers/gemini.ts +84 -28
- package/src/ai/providers/openai-compatible-catalog.ts +10 -4
- package/src/ai/providers/openai-responses.ts +76 -19
- package/src/ai/types.ts +55 -2
- package/src/commands/launch/flags.ts +5 -2
- package/src/commands/launch.ts +119 -25
- package/src/tui/app.ts +38 -6
- package/src/tui/components/ascii-art.ts +38 -45
|
@@ -108,6 +108,12 @@ export async function resolveAntigravityProjectId(
|
|
|
108
108
|
|
|
109
109
|
type CcaPart = { text: string } | { inlineData: { mimeType: string; data: string } };
|
|
110
110
|
|
|
111
|
+
// Reasoning-artifact replay (signed thinking / thoughtSignature / encrypted reasoning) is
|
|
112
|
+
// deliberately OUT OF SCOPE for antigravity: it serves Gemini- and Claude-shaped models over
|
|
113
|
+
// the CCA wire (neither the native Anthropic messages nor the public Gemini shape), so it
|
|
114
|
+
// captures no artifacts and replays none — Message.toolUse/toolResults/reasoningArtifacts are
|
|
115
|
+
// ignored here. The provider-keyed match guard (D3) keeps "anthropic"/"gemini" artifacts from
|
|
116
|
+
// ever being re-injected by this adapter, so there is no cross-adapter leakage.
|
|
111
117
|
function antigravityContents(messages: Message[]): { role: "user" | "model"; parts: CcaPart[] }[] {
|
|
112
118
|
const contents: { role: "user" | "model"; parts: CcaPart[] }[] = [];
|
|
113
119
|
for (const m of messages) {
|
|
@@ -54,6 +54,24 @@ export function parseRetryFromBody(detail: string | null | undefined): number |
|
|
|
54
54
|
* and any `Retry-After`. Use at every adapter's `!response.ok` site so the retry
|
|
55
55
|
* layer sees a uniform, status-carrying, backoff-aware error.
|
|
56
56
|
*/
|
|
57
|
+
/**
|
|
58
|
+
* One-shot reasoning-artifact fail-safe: send the request; if it 400s because a replayed
|
|
59
|
+
* reasoning artifact (signature / thoughtSignature / encrypted reasoning item) was rejected
|
|
60
|
+
* — expired signature, edited history, toggled thinking — retry ONCE with artifacts stripped
|
|
61
|
+
* (plain history). `send(strip)` rebuilds + fetches; `isArtifactError` matches the 400 body.
|
|
62
|
+
* ponytail: heuristic error-body string match — tighten to structured error codes if/when
|
|
63
|
+
* the providers expose them.
|
|
64
|
+
*/
|
|
65
|
+
export async function fetchWithArtifactFailSafe(
|
|
66
|
+
send: (stripArtifacts: boolean) => Promise<Response>,
|
|
67
|
+
isArtifactError: (status: number, body: string) => boolean,
|
|
68
|
+
): Promise<Response> {
|
|
69
|
+
const res = await send(false);
|
|
70
|
+
if (res.ok) return res;
|
|
71
|
+
const body = await res.clone().text().catch(() => "");
|
|
72
|
+
return isArtifactError(res.status, body) ? send(true) : res;
|
|
73
|
+
}
|
|
74
|
+
|
|
57
75
|
export async function providerHttpError(provider: string, response: Response, context?: string): Promise<ProviderHttpError> {
|
|
58
76
|
const detail = await response.text().catch(() => "");
|
|
59
77
|
const retryAfterMs = parseRetryAfter(response.headers.get("retry-after")) ?? parseRetryFromBody(detail);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { Credential } from "../../auth";
|
|
2
2
|
import type { CallOptions, Message, ProviderAdapter } from "../types";
|
|
3
3
|
import { readSse } from "../sse";
|
|
4
|
-
import { providerHttpError } from "./errors";
|
|
4
|
+
import { providerHttpError, fetchWithArtifactFailSafe } from "./errors";
|
|
5
5
|
import { jeoEnv } from "../../util/env";
|
|
6
6
|
import { serializeToolCalls } from "../../agent/tool-schemas";
|
|
7
7
|
|
|
@@ -37,35 +37,62 @@ export function geminiThinkingBudget(model: string, effort?: CallOptions["reason
|
|
|
37
37
|
return budget;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
|
|
41
|
+
/** True when an assistant turn can replay native functionCall + thoughtSignature: it has
|
|
42
|
+
* structured toolUse AND a same-model Gemini thoughtSignature artifact, AND thinking is on. */
|
|
43
|
+
export function geminiNativizable(m: Message, modelKey: string, thinkingEnabled: boolean): boolean {
|
|
44
|
+
return thinkingEnabled
|
|
45
|
+
&& !!m.toolUse?.length
|
|
46
|
+
&& !!m.reasoningArtifacts?.some(a => a.provider === "gemini" && a.model === modelKey && !!a.thoughtSignature);
|
|
47
|
+
}
|
|
40
48
|
/** Shared Gemini request payload (contents + generationConfig + systemInstruction)
|
|
41
49
|
* used by BOTH the public generativelanguage path (API key) and the Cloud Code
|
|
42
50
|
* Assist path (OAuth) — only the envelope/endpoint differs. */
|
|
43
|
-
export function buildGeminiPayload(messages: Message[], options: CallOptions): { geminiModel: string; payload: Record<string, unknown> } {
|
|
51
|
+
export function buildGeminiPayload(messages: Message[], options: CallOptions, stripArtifacts = false): { geminiModel: string; payload: Record<string, unknown> } {
|
|
44
52
|
const resolvedModel = options.model.replace(/^(google|gemini)\//, "");
|
|
45
53
|
let geminiModel = resolvedModel;
|
|
46
54
|
if (!geminiModel || geminiModel === "claude-3-5-sonnet") geminiModel = "gemini-2.0-flash";
|
|
47
55
|
|
|
48
56
|
const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
|
|
57
|
+
const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
|
|
58
|
+
const thinkingEnabled = thinkingBudget !== undefined && !stripArtifacts;
|
|
49
59
|
// Gemini requires strictly ALTERNATING user/model turns. jeo histories can carry
|
|
50
60
|
// consecutive same-role messages (a compaction summary prepended before a tool-result,
|
|
51
61
|
// back-to-back tool results, etc.), so coalesce adjacent same-role turns into one
|
|
52
|
-
// content block — otherwise the API rejects the request mid-session.
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
62
|
+
// content block — otherwise the API rejects the request mid-session. Native
|
|
63
|
+
// functionCall/functionResponse parts (with thoughtSignature) are reconstructed for
|
|
64
|
+
// same-model turns to preserve cross-step thought context; else plain text.
|
|
65
|
+
type GeminiPart = Record<string, unknown>;
|
|
66
|
+
const nonSystem = messages.filter(m => m.role !== "system");
|
|
67
|
+
const contents: { role: string; parts: GeminiPart[] }[] = [];
|
|
68
|
+
nonSystem.forEach((m, i) => {
|
|
56
69
|
const role = m.role === "assistant" ? "model" : "user";
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
70
|
+
let parts: GeminiPart[];
|
|
71
|
+
if (m.role === "assistant" && geminiNativizable(m, options.model, thinkingEnabled)) {
|
|
72
|
+
const sig = m.reasoningArtifacts!.find(a => a.provider === "gemini" && a.model === options.model && a.thoughtSignature)?.thoughtSignature;
|
|
73
|
+
parts = m.toolUse!.map((tu, idx) => {
|
|
74
|
+
const p: GeminiPart = { functionCall: { name: tu.tool, args: tu.arguments } };
|
|
75
|
+
if (idx === 0 && sig) p.thoughtSignature = sig; // bind the turn signature to the first call
|
|
76
|
+
return p;
|
|
77
|
+
});
|
|
78
|
+
} else if (m.role === "user" && m.toolResults?.length && i > 0
|
|
79
|
+
&& nonSystem[i - 1].role === "assistant"
|
|
80
|
+
&& geminiNativizable(nonSystem[i - 1], options.model, thinkingEnabled)) {
|
|
81
|
+
const prevToolUse = nonSystem[i - 1].toolUse ?? [];
|
|
82
|
+
parts = m.toolResults.map(tr => ({
|
|
83
|
+
functionResponse: { name: prevToolUse.find(tu => tu.id === tr.id)?.tool ?? "tool", response: { output: tr.output } },
|
|
84
|
+
}));
|
|
85
|
+
if (m.toolResultExtra) parts.push({ text: m.toolResultExtra });
|
|
65
86
|
} else {
|
|
66
|
-
|
|
87
|
+
parts = [
|
|
88
|
+
...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
|
|
89
|
+
{ text: m.content },
|
|
90
|
+
];
|
|
67
91
|
}
|
|
68
|
-
|
|
92
|
+
const prev = contents[contents.length - 1];
|
|
93
|
+
if (prev && prev.role === role) prev.parts.push(...parts);
|
|
94
|
+
else contents.push({ role, parts });
|
|
95
|
+
});
|
|
69
96
|
|
|
70
97
|
const generationConfig: Record<string, unknown> = {
|
|
71
98
|
temperature: options.temperature ?? 0.2,
|
|
@@ -74,7 +101,7 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
|
|
|
74
101
|
// Function-calling and responseMimeType:json are mutually exclusive in the Gemini
|
|
75
102
|
// API — when native tools are declared, the functionCall parts replace JSON-in-prose.
|
|
76
103
|
if (options.jsonMode && !options.tools?.length) generationConfig.responseMimeType = "application/json";
|
|
77
|
-
|
|
104
|
+
|
|
78
105
|
// includeThoughts: required for Gemini to STREAM thought summaries (the `thought:true`
|
|
79
106
|
// parts thoughtOf() routes to onReasoning) — without it the model thinks silently.
|
|
80
107
|
if (thinkingBudget !== undefined) generationConfig.thinkingConfig = { includeThoughts: true, thinkingBudget };
|
|
@@ -91,8 +118,8 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
|
|
|
91
118
|
return { geminiModel, payload };
|
|
92
119
|
}
|
|
93
120
|
|
|
94
|
-
export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent"): { url: string; headers: Record<string, string>; body: string } {
|
|
95
|
-
const { geminiModel, payload } = buildGeminiPayload(messages, options);
|
|
121
|
+
export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent", stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
|
|
122
|
+
const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
|
|
96
123
|
const oauth = credential.kind === "oauth" ? credential.token : undefined;
|
|
97
124
|
const apiKey = credential.kind === "api_key" ? credential.token : undefined;
|
|
98
125
|
let url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(geminiModel)}:${action}`;
|
|
@@ -123,8 +150,8 @@ export function getGeminiCliHeaders(modelId?: string): Record<string, string> {
|
|
|
123
150
|
* plain `jeo auth login gemini` works without any GEMINI_API_KEY. The body
|
|
124
151
|
* wraps the standard payload as `{ project, model, request }`.
|
|
125
152
|
*/
|
|
126
|
-
export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string): { url: string; headers: Record<string, string>; body: string } {
|
|
127
|
-
const { geminiModel, payload } = buildGeminiPayload(messages, options);
|
|
153
|
+
export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string, stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
|
|
154
|
+
const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
|
|
128
155
|
return {
|
|
129
156
|
url: `${CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse`,
|
|
130
157
|
headers: {
|
|
@@ -137,8 +164,22 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
|
|
|
137
164
|
};
|
|
138
165
|
}
|
|
139
166
|
|
|
167
|
+
/** POST a Gemini request with a reasoning-artifact fail-safe (see fetchWithArtifactFailSafe). */
|
|
168
|
+
function geminiFetchFailSafe(
|
|
169
|
+
make: (stripArtifacts: boolean) => { url: string; headers: Record<string, string>; body: string },
|
|
170
|
+
signal?: AbortSignal,
|
|
171
|
+
): Promise<Response> {
|
|
172
|
+
return fetchWithArtifactFailSafe(
|
|
173
|
+
strip => {
|
|
174
|
+
const r = make(strip);
|
|
175
|
+
return fetch(r.url, { method: "POST", headers: r.headers, body: r.body, signal });
|
|
176
|
+
},
|
|
177
|
+
(status, body) => status === 400 && /thoughtsignature|thought_signature|functioncall|function_call|signature/i.test(body),
|
|
178
|
+
);
|
|
179
|
+
}
|
|
180
|
+
|
|
140
181
|
interface GeminiChunk {
|
|
141
|
-
candidates?: { content?: { parts?: { text?: string; thought?: boolean; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
|
|
182
|
+
candidates?: { content?: { parts?: { text?: string; thought?: boolean; thoughtSignature?: string; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
|
|
142
183
|
promptFeedback?: { blockReason?: string };
|
|
143
184
|
usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
|
|
144
185
|
}
|
|
@@ -157,6 +198,19 @@ function textOf(chunk: GeminiChunk): string {
|
|
|
157
198
|
function thoughtOf(chunk: GeminiChunk): string {
|
|
158
199
|
return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
159
200
|
}
|
|
201
|
+
|
|
202
|
+
/** Emit each NEW thoughtSignature seen on this chunk's parts as a replay artifact (Gemini
|
|
203
|
+
* binds it to the functionCall part — replayed to keep cross-step thought context). `seen`
|
|
204
|
+
* dedups across the streamed chunks of one turn. */
|
|
205
|
+
function captureGeminiSignatures(chunk: GeminiChunk, options: CallOptions, seen: Set<string>): void {
|
|
206
|
+
for (const p of chunk.candidates?.[0]?.content?.parts ?? []) {
|
|
207
|
+
const sig = p.thoughtSignature;
|
|
208
|
+
if (sig && !seen.has(sig)) {
|
|
209
|
+
seen.add(sig);
|
|
210
|
+
options.onReasoningArtifact?.({ provider: "gemini", model: options.model, thoughtSignature: sig });
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
160
214
|
/** Native Gemini functionCall parts → {tool, arguments} (gjc/antigravity parity). Kept
|
|
161
215
|
* separate from textOf so the re-serialized canonical JSON envelope drives the loop. */
|
|
162
216
|
function geminiFunctionCallsOf(chunk: GeminiChunk): { tool: string; arguments: Record<string, unknown> }[] {
|
|
@@ -197,14 +251,14 @@ function blockedReason(chunk: GeminiChunk): string | undefined {
|
|
|
197
251
|
async function* ccaTurn(messages: Message[], options: CallOptions, credential: Credential & { kind: "oauth" }): AsyncGenerator<string> {
|
|
198
252
|
const { resolveAntigravityProjectId } = await import("./antigravity");
|
|
199
253
|
const projectId = await resolveAntigravityProjectId(credential, { signal: options.signal });
|
|
200
|
-
const
|
|
201
|
-
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
254
|
+
const response = await geminiFetchFailSafe(strip => geminiCliRequest(messages, options, credential.token, projectId, strip), options.signal);
|
|
202
255
|
if (!response.ok) throw await providerHttpError("Gemini (Cloud Code Assist)", response);
|
|
203
256
|
if (!response.body) return;
|
|
204
257
|
let lastUsage: GeminiChunk["usageMetadata"];
|
|
205
258
|
let yieldedAny = false;
|
|
206
259
|
let lastEmptyReason: string | undefined;
|
|
207
260
|
const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
|
|
261
|
+
const seenSigs = new Set<string>();
|
|
208
262
|
for await (const data of readSse(response.body)) {
|
|
209
263
|
let chunk: CcaChunk;
|
|
210
264
|
try {
|
|
@@ -216,6 +270,7 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
|
|
|
216
270
|
if (!inner) continue;
|
|
217
271
|
const thought = thoughtOf(inner);
|
|
218
272
|
if (thought) options.onReasoning?.(thought);
|
|
273
|
+
captureGeminiSignatures(inner, options, seenSigs);
|
|
219
274
|
const delta = textOf(inner);
|
|
220
275
|
if (delta) {
|
|
221
276
|
yieldedAny = true;
|
|
@@ -249,10 +304,10 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
249
304
|
for await (const delta of ccaTurn(messages, options, credential)) out += delta;
|
|
250
305
|
return out;
|
|
251
306
|
}
|
|
252
|
-
const
|
|
253
|
-
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
307
|
+
const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "generateContent", strip), options.signal);
|
|
254
308
|
if (!response.ok) throw await providerHttpError("Gemini", response);
|
|
255
309
|
const result = (await response.json()) as GeminiChunk;
|
|
310
|
+
captureGeminiSignatures(result, options, new Set());
|
|
256
311
|
if (result.usageMetadata) {
|
|
257
312
|
options.onUsage?.({ inputTokens: result.usageMetadata.promptTokenCount, outputTokens: result.usageMetadata.candidatesTokenCount });
|
|
258
313
|
}
|
|
@@ -271,14 +326,14 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
271
326
|
yield* ccaTurn(messages, options, credential);
|
|
272
327
|
return;
|
|
273
328
|
}
|
|
274
|
-
const
|
|
275
|
-
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
329
|
+
const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "streamGenerateContent", strip), options.signal);
|
|
276
330
|
if (!response.ok) throw await providerHttpError("Gemini", response, "(stream)");
|
|
277
331
|
if (!response.body) return;
|
|
278
332
|
let lastUsage: GeminiChunk["usageMetadata"];
|
|
279
333
|
let yieldedAny = false;
|
|
280
334
|
let lastEmptyReason: string | undefined;
|
|
281
335
|
const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
|
|
336
|
+
const seenSigs = new Set<string>();
|
|
282
337
|
for await (const data of readSse(response.body)) {
|
|
283
338
|
let chunk: GeminiChunk;
|
|
284
339
|
try {
|
|
@@ -288,6 +343,7 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
288
343
|
}
|
|
289
344
|
const thought = thoughtOf(chunk);
|
|
290
345
|
if (thought) options.onReasoning?.(thought);
|
|
346
|
+
captureGeminiSignatures(chunk, options, seenSigs);
|
|
291
347
|
const delta = textOf(chunk);
|
|
292
348
|
if (delta) {
|
|
293
349
|
yieldedAny = true;
|
|
@@ -23,6 +23,12 @@ export interface OpenAICompatProviderDef {
|
|
|
23
23
|
readonly apiKeyEnv: string;
|
|
24
24
|
/** Default model id (provider-prefixed) used by `--provider <name>`. */
|
|
25
25
|
readonly defaultModel: string;
|
|
26
|
+
/** Extra well-known model ids (BARE, not provider-prefixed) for the OFFLINE
|
|
27
|
+
* pick-list fallback shown by `/agents <role> provider <name>` and `--provider`.
|
|
28
|
+
* Live `/models` discovery supersedes this once the provider is logged in, so
|
|
29
|
+
* keep only stable/alias-style ids here (a stale id would 404 at inference).
|
|
30
|
+
* `defaultModel` is always surfaced first regardless of this list. */
|
|
31
|
+
readonly knownModels?: readonly string[];
|
|
26
32
|
/** Wire protocol: "openai" (/chat/completions, default) or "anthropic" (/v1/messages). */
|
|
27
33
|
readonly protocol?: "openai" | "anthropic";
|
|
28
34
|
/** True for subscription/plan products (coding-plan, portal, token-plan, code) rather than
|
|
@@ -35,12 +41,12 @@ export interface OpenAICompatProviderDef {
|
|
|
35
41
|
}
|
|
36
42
|
|
|
37
43
|
export const OPENAI_COMPAT_PROVIDERS: readonly OpenAICompatProviderDef[] = [
|
|
38
|
-
{ name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile" },
|
|
39
|
-
{ name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat" },
|
|
40
|
-
{ name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest" },
|
|
44
|
+
{ name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile", knownModels: ["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "openai/gpt-oss-120b", "openai/gpt-oss-20b"] },
|
|
45
|
+
{ name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat", knownModels: ["deepseek-chat", "deepseek-reasoner"] },
|
|
46
|
+
{ name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest", knownModels: ["mistral-large-latest", "mistral-small-latest", "codestral-latest", "ministral-8b-latest"] },
|
|
41
47
|
{ name: "openrouter", label: "OpenRouter", baseUrl: "https://openrouter.ai/api/v1", apiKeyEnv: "OPENROUTER_API_KEY", defaultModel: "openrouter/openai/gpt-4o-mini", thinkingFormat: "openrouter" },
|
|
42
48
|
{ name: "together", label: "Together", baseUrl: "https://api.together.xyz/v1", apiKeyEnv: "TOGETHER_API_KEY", defaultModel: "together/meta-llama/Llama-3.3-70B-Instruct-Turbo" },
|
|
43
|
-
{ name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b" },
|
|
49
|
+
{ name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b", knownModels: ["llama-3.3-70b", "llama3.1-8b", "qwen-3-235b-a22b-instruct-2507"] },
|
|
44
50
|
{ name: "fireworks", label: "Fireworks", baseUrl: "https://api.fireworks.ai/inference/v1", apiKeyEnv: "FIREWORKS_API_KEY", defaultModel: "fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct" },
|
|
45
51
|
{ name: "nvidia", label: "NVIDIA", baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", defaultModel: "nvidia/meta/llama-3.3-70b-instruct" },
|
|
46
52
|
// Additional gjc-parity OpenAI-compatible clouds (authoritative base URLs + env vars).
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
import type { Credential } from "../../auth";
|
|
14
14
|
import type { CallOptions, Message } from "../types";
|
|
15
15
|
import { readSse } from "../sse";
|
|
16
|
-
import { providerHttpError } from "./errors";
|
|
16
|
+
import { providerHttpError, fetchWithArtifactFailSafe } from "./errors";
|
|
17
17
|
import { serializeAccumulatedToolCalls } from "../../agent/tool-schemas";
|
|
18
18
|
|
|
19
19
|
export const CODEX_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses";
|
|
@@ -35,28 +35,64 @@ export function extractChatgptAccountId(token: string): string | undefined {
|
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
|
|
39
|
+
type ResponsesInputItem = Record<string, unknown>;
|
|
40
|
+
|
|
41
|
+
/** True when an assistant turn can replay stateless reasoning: it has structured toolUse AND
|
|
42
|
+
* a same-model OpenAI reasoning item (id + encrypted_content) captured this session. */
|
|
43
|
+
export function responsesNativizable(m: Message, modelKey: string): boolean {
|
|
44
|
+
return !!m.toolUse?.length
|
|
45
|
+
&& !!m.reasoningArtifacts?.some(a => a.provider === "openai" && a.model === modelKey && !!a.itemId && !!a.encrypted);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Build the Responses `input` array, reconstructing native reasoning + function_call +
|
|
49
|
+
* function_call_output items for same-model OpenAI turns (stateless reasoning replay).
|
|
50
|
+
* stripArtifacts (fail-safe) or a non-matching model ⇒ the plain output_text/input_text shape. */
|
|
51
|
+
export function buildResponsesInput(messages: Message[], modelKey: string, stripArtifacts = false): ResponsesInputItem[] {
|
|
52
|
+
const nonSystem = messages.filter(m => m.role !== "system");
|
|
53
|
+
const items: ResponsesInputItem[] = [];
|
|
54
|
+
const plain = (m: Message): ResponsesInputItem => ({
|
|
55
|
+
role: m.role,
|
|
56
|
+
content: [
|
|
57
|
+
{ type: m.role === "assistant" ? "output_text" : "input_text", text: m.content },
|
|
58
|
+
...(m.role !== "assistant" && m.images?.length
|
|
59
|
+
? m.images.map(img => ({ type: "input_image", image_url: `data:${img.mediaType};base64,${img.data}` }))
|
|
60
|
+
: []),
|
|
61
|
+
],
|
|
62
|
+
});
|
|
63
|
+
nonSystem.forEach((m, i) => {
|
|
64
|
+
if (!stripArtifacts && m.role === "assistant" && responsesNativizable(m, modelKey)) {
|
|
65
|
+
for (const a of m.reasoningArtifacts!) {
|
|
66
|
+
if (a.provider === "openai" && a.model === modelKey && a.itemId && a.encrypted) {
|
|
67
|
+
items.push({ type: "reasoning", id: a.itemId, encrypted_content: a.encrypted, summary: [] });
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
for (const tu of m.toolUse!) {
|
|
71
|
+
items.push({ type: "function_call", call_id: tu.id, name: tu.tool, arguments: JSON.stringify(tu.arguments) });
|
|
72
|
+
}
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
if (!stripArtifacts && m.role === "user" && m.toolResults?.length && i > 0
|
|
76
|
+
&& nonSystem[i - 1].role === "assistant" && responsesNativizable(nonSystem[i - 1], modelKey)) {
|
|
77
|
+
for (const tr of m.toolResults) items.push({ type: "function_call_output", call_id: tr.id, output: tr.output });
|
|
78
|
+
if (m.toolResultExtra) items.push({ role: "user", content: [{ type: "input_text", text: m.toolResultExtra }] });
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
items.push(plain(m));
|
|
82
|
+
});
|
|
83
|
+
return items;
|
|
84
|
+
}
|
|
38
85
|
/** Build the Codex Responses request (url + headers + body) for an OAuth credential. */
|
|
39
86
|
export function codexResponsesRequest(
|
|
40
87
|
messages: Message[],
|
|
41
88
|
options: CallOptions,
|
|
42
89
|
credential: Credential,
|
|
90
|
+
stripArtifacts = false,
|
|
43
91
|
): { url: string; headers: Record<string, string>; body: string } {
|
|
44
92
|
const model = options.model.startsWith("openai/") ? options.model.slice(7) : options.model;
|
|
45
93
|
const token = credential.kind === "none" ? "" : credential.token;
|
|
46
94
|
const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
|
|
47
|
-
const input = messages
|
|
48
|
-
.filter(m => m.role !== "system")
|
|
49
|
-
.map(m => ({
|
|
50
|
-
role: m.role,
|
|
51
|
-
content: [
|
|
52
|
-
{ type: m.role === "assistant" ? "output_text" : "input_text", text: m.content },
|
|
53
|
-
// Clipboard-pasted images ride along as input_image data URLs (user turns only —
|
|
54
|
-
// assistant history is always text in jeo).
|
|
55
|
-
...(m.role !== "assistant" && m.images?.length
|
|
56
|
-
? m.images.map(img => ({ type: "input_image", image_url: `data:${img.mediaType};base64,${img.data}` }))
|
|
57
|
-
: []),
|
|
58
|
-
],
|
|
59
|
-
}));
|
|
95
|
+
const input = buildResponsesInput(messages, options.model, stripArtifacts);
|
|
60
96
|
const payload: Record<string, unknown> = {
|
|
61
97
|
model,
|
|
62
98
|
instructions: systemPrompt ?? "You are a helpful coding assistant.",
|
|
@@ -81,6 +117,9 @@ export function codexResponsesRequest(
|
|
|
81
117
|
// Both speak the same Responses schema (the body above), so only url+headers differ.
|
|
82
118
|
if (credential.kind === "api_key") {
|
|
83
119
|
const base = (options.baseUrl ?? "https://api.openai.com/v1").replace(/\/$/, "");
|
|
120
|
+
// Stateless reasoning replay (public Responses API): ask for encrypted reasoning content
|
|
121
|
+
// so it can be captured and threaded back into a later `input` (store stays false).
|
|
122
|
+
payload.include = ["reasoning.encrypted_content"];
|
|
84
123
|
return {
|
|
85
124
|
url: `${base}/responses`,
|
|
86
125
|
headers: { "content-type": "application/json", authorization: `Bearer ${token}`, accept: "text/event-stream" },
|
|
@@ -113,6 +152,8 @@ export interface ResponsesEvent {
|
|
|
113
152
|
toolCallName?: string;
|
|
114
153
|
toolCallArgsDelta?: string;
|
|
115
154
|
toolCallIndex?: number;
|
|
155
|
+
/** A completed reasoning item carrying its id + encrypted_content (stateless replay capture). */
|
|
156
|
+
reasoningItem?: { id: string; encrypted: string };
|
|
116
157
|
}
|
|
117
158
|
|
|
118
159
|
/** Parse one Responses SSE `data:` payload into a delta / usage / error. */
|
|
@@ -120,7 +161,7 @@ export function parseResponsesEvent(data: string): ResponsesEvent {
|
|
|
120
161
|
let o: {
|
|
121
162
|
type?: string;
|
|
122
163
|
delta?: unknown;
|
|
123
|
-
item?: { type?: string; name?: string };
|
|
164
|
+
item?: { type?: string; name?: string; id?: string; encrypted_content?: string };
|
|
124
165
|
output_index?: number;
|
|
125
166
|
response?: {
|
|
126
167
|
usage?: { input_tokens?: number; output_tokens?: number };
|
|
@@ -137,6 +178,11 @@ export function parseResponsesEvent(data: string): ResponsesEvent {
|
|
|
137
178
|
if (o.type === "response.output_item.added" && o.item?.type === "function_call") {
|
|
138
179
|
return { toolCallName: o.item.name, toolCallIndex: o.output_index };
|
|
139
180
|
}
|
|
181
|
+
// A completed reasoning item carries the encrypted_content we replay later (needs the
|
|
182
|
+
// request's `include: ["reasoning.encrypted_content"]`). Captured on output_item.done.
|
|
183
|
+
if (o.type === "response.output_item.done" && o.item?.type === "reasoning" && o.item.id && o.item.encrypted_content) {
|
|
184
|
+
return { reasoningItem: { id: o.item.id, encrypted: o.item.encrypted_content } };
|
|
185
|
+
}
|
|
140
186
|
if (o.type === "response.function_call_arguments.delta" && typeof o.delta === "string") {
|
|
141
187
|
return { toolCallArgsDelta: o.delta, toolCallIndex: o.output_index };
|
|
142
188
|
}
|
|
@@ -185,10 +231,20 @@ function emptyCompletionError(reason: string | undefined): Error {
|
|
|
185
231
|
return new Error(`OpenAI Codex returned no content${reason ? ` (${reason})` : ""}${hint}.`);
|
|
186
232
|
}
|
|
187
233
|
|
|
234
|
+
/** Fetch the Responses endpoint with a reasoning-artifact fail-safe (see fetchWithArtifactFailSafe). */
|
|
235
|
+
function fetchResponses(messages: Message[], options: CallOptions, credential: Credential): Promise<Response> {
|
|
236
|
+
return fetchWithArtifactFailSafe(
|
|
237
|
+
strip => {
|
|
238
|
+
const { url, headers, body } = codexResponsesRequest(messages, options, credential, strip);
|
|
239
|
+
return fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
240
|
+
},
|
|
241
|
+
(status, body) => status === 400 && /reasoning|encrypted_content/i.test(body),
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
|
|
188
245
|
/** Non-streaming call over the Codex backend (collects the streamed output). */
|
|
189
246
|
export async function codexResponsesCall(messages: Message[], options: CallOptions, credential: Credential): Promise<string> {
|
|
190
|
-
const
|
|
191
|
-
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
247
|
+
const response = await fetchResponses(messages, options, credential);
|
|
192
248
|
if (!response.ok) throw await providerHttpError("OpenAI", response);
|
|
193
249
|
if (!response.body) return "";
|
|
194
250
|
let out = "";
|
|
@@ -198,6 +254,7 @@ export async function codexResponsesCall(messages: Message[], options: CallOptio
|
|
|
198
254
|
const ev = parseResponsesEvent(data);
|
|
199
255
|
if (ev.delta) out += ev.delta;
|
|
200
256
|
if (ev.reasoningDelta) options.onReasoning?.(ev.reasoningDelta);
|
|
257
|
+
if (ev.reasoningItem) options.onReasoningArtifact?.({ provider: "openai", model: options.model, itemId: ev.reasoningItem.id, encrypted: ev.reasoningItem.encrypted });
|
|
201
258
|
accumulateResponsesToolCall(toolAcc, ev);
|
|
202
259
|
if (ev.usage) options.onUsage?.(ev.usage);
|
|
203
260
|
if (ev.incompleteReason) incompleteReason = ev.incompleteReason;
|
|
@@ -216,8 +273,7 @@ export async function* codexResponsesStream(
|
|
|
216
273
|
options: CallOptions,
|
|
217
274
|
credential: Credential,
|
|
218
275
|
): AsyncGenerator<string> {
|
|
219
|
-
const
|
|
220
|
-
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
276
|
+
const response = await fetchResponses(messages, options, credential);
|
|
221
277
|
if (!response.ok) throw await providerHttpError("OpenAI", response, "(stream)");
|
|
222
278
|
if (!response.body) return;
|
|
223
279
|
let yieldedAny = false;
|
|
@@ -226,6 +282,7 @@ export async function* codexResponsesStream(
|
|
|
226
282
|
for await (const data of readSse(response.body)) {
|
|
227
283
|
const ev = parseResponsesEvent(data);
|
|
228
284
|
if (ev.reasoningDelta) options.onReasoning?.(ev.reasoningDelta);
|
|
285
|
+
if (ev.reasoningItem) options.onReasoningArtifact?.({ provider: "openai", model: options.model, itemId: ev.reasoningItem.id, encrypted: ev.reasoningItem.encrypted });
|
|
229
286
|
if (ev.delta) {
|
|
230
287
|
yieldedAny = true;
|
|
231
288
|
yield ev.delta;
|
package/src/ai/types.ts
CHANGED
|
@@ -19,9 +19,58 @@ export interface Message {
|
|
|
19
19
|
images?: ImageAttachment[];
|
|
20
20
|
/** Persisted reasoning/thinking text for an assistant turn (the thought before the
|
|
21
21
|
* answer). Survives /resume + export so the durable record shows "think → answer".
|
|
22
|
-
* Display
|
|
23
|
-
* the original signed block, which the streaming path does not capture). */
|
|
22
|
+
* Display channel; the REPLAY channel is `reasoningArtifacts`. */
|
|
24
23
|
reasoning?: string;
|
|
24
|
+
/** Provider-native, opaque reasoning artifacts captured during streaming (Anthropic
|
|
25
|
+
* thinking signature, Gemini thoughtSignature, OpenAI Responses reasoning items).
|
|
26
|
+
* Replayed to the SAME provider+model to preserve multi-step reasoning continuity;
|
|
27
|
+
* dropped on cross-model replay. Display-agnostic, not written to markdown export. */
|
|
28
|
+
reasoningArtifacts?: ReasoningArtifact[];
|
|
29
|
+
/** Structured native tool calls this assistant turn made (with stable ids). `content`
|
|
30
|
+
* keeps the canonical JSON envelope for display/compaction/fallback adapters; capable
|
|
31
|
+
* adapters replay these as native tool_use / functionCall / function_call blocks. */
|
|
32
|
+
toolUse?: ToolUseRecord[];
|
|
33
|
+
/** Structured native tool results for a tool-feedback user turn (ids match the prior
|
|
34
|
+
* assistant's `toolUse`). Capable adapters replay these as native tool_result /
|
|
35
|
+
* functionResponse / function_call_output blocks. */
|
|
36
|
+
toolResults?: ToolResultRecord[];
|
|
37
|
+
/** Non-tool trailing text on a tool-feedback user turn (e.g. post-turn hook
|
|
38
|
+
* diagnostics) — replayed as a trailing text block after the native tool results. */
|
|
39
|
+
toolResultExtra?: string;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** A provider-native opaque reasoning artifact. Only replayed when `provider` AND
|
|
43
|
+
* `model` match the active call (the adapter stamps the exact wire model id). */
|
|
44
|
+
export interface ReasoningArtifact {
|
|
45
|
+
provider: ProviderName;
|
|
46
|
+
model: string;
|
|
47
|
+
/** Thought text (display is covered by Message.reasoning; kept here for fidelity). */
|
|
48
|
+
text?: string;
|
|
49
|
+
/** Anthropic: thinking block signature. */
|
|
50
|
+
signature?: string;
|
|
51
|
+
/** Anthropic: redacted_thinking opaque data. */
|
|
52
|
+
redacted?: string;
|
|
53
|
+
/** Gemini: per-part thoughtSignature (binds to the matching functionCall part). */
|
|
54
|
+
thoughtSignature?: string;
|
|
55
|
+
/** OpenAI Responses: reasoning item id. */
|
|
56
|
+
itemId?: string;
|
|
57
|
+
/** OpenAI Responses: reasoning item encrypted_content. */
|
|
58
|
+
encrypted?: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** A structured native tool call (assistant turn). `id` is a stable synthetic id the
|
|
62
|
+
* engine assigns so tool_use ↔ tool_result correlation survives replay. */
|
|
63
|
+
export interface ToolUseRecord {
|
|
64
|
+
id: string;
|
|
65
|
+
tool: string;
|
|
66
|
+
arguments: Record<string, unknown>;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** A structured native tool result (user turn). `id` matches a prior `ToolUseRecord`. */
|
|
70
|
+
export interface ToolResultRecord {
|
|
71
|
+
id: string;
|
|
72
|
+
output: string;
|
|
73
|
+
isError: boolean;
|
|
25
74
|
}
|
|
26
75
|
|
|
27
76
|
export interface Usage {
|
|
@@ -67,6 +116,10 @@ export interface CallOptions {
|
|
|
67
116
|
* answer text). Surfaced as a transient dimmed view; absent for models that emit no
|
|
68
117
|
* thought text. */
|
|
69
118
|
onReasoning?: (delta: string) => void;
|
|
119
|
+
/** Sink for provider-native reasoning ARTIFACTS captured during streaming (signature /
|
|
120
|
+
* thoughtSignature / reasoning item id+encrypted). Separate from `onReasoning` (display
|
|
121
|
+
* text) because these arrive on different SSE events and are opaque replay data. */
|
|
122
|
+
onReasoningArtifact?: (artifact: ReasoningArtifact) => void;
|
|
70
123
|
/** NATIVE tool-calling: function declarations the model may call. Present only on the
|
|
71
124
|
* main agent step (never the prose wrap-up). Adapters with `supportsNativeTools` send
|
|
72
125
|
* these on the wire and re-serialize the structured tool call back into the engine's
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
|
-
import { type ProviderName, type ModelRole, type ThinkLevel, catalogMetadata } from "../../ai";
|
|
3
|
+
import { type ProviderName, type ModelRole, type ThinkLevel, catalogMetadata, PROVIDER_NAMES } from "../../ai";
|
|
4
4
|
|
|
5
5
|
export interface LaunchFlags {
|
|
6
6
|
list: boolean;
|
|
@@ -39,7 +39,10 @@ function takeValue(args: string[], index: number, inlinePrefix: string): { value
|
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
export function isProviderName(input: string | undefined): input is ProviderName {
|
|
42
|
-
|
|
42
|
+
// Validate against the canonical registry, not a hand-maintained subset — the
|
|
43
|
+
// old 5-name list silently rejected every OpenAI-compat provider (groq,
|
|
44
|
+
// deepseek, openrouter, …) at `/agents <role> provider <name>`.
|
|
45
|
+
return input !== undefined && (PROVIDER_NAMES as readonly string[]).includes(input);
|
|
43
46
|
}
|
|
44
47
|
|
|
45
48
|
export function isThinkingLevel(input: string | undefined): input is ThinkLevel {
|