jeo-code 0.6.27 → 0.6.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/README.ja.md +2 -6
- package/README.ko.md +2 -6
- package/README.md +2 -6
- package/README.zh.md +2 -6
- package/package.json +1 -1
- package/src/agent/compaction.ts +10 -1
- package/src/agent/engine.ts +62 -16
- package/src/agent/loop.ts +3 -0
- package/src/ai/model-catalog.ts +12 -5
- package/src/ai/model-manager.ts +1 -0
- package/src/ai/providers/anthropic.ts +121 -21
- package/src/ai/providers/antigravity.ts +6 -0
- package/src/ai/providers/errors.ts +18 -0
- package/src/ai/providers/gemini.ts +84 -28
- package/src/ai/providers/openai-compatible-catalog.ts +10 -4
- package/src/ai/providers/openai-responses.ts +76 -19
- package/src/ai/types.ts +55 -2
- package/src/commands/launch.ts +90 -22
- package/src/tui/app.ts +38 -6
- package/src/tui/components/ascii-art.ts +27 -31
|
@@ -11,6 +11,12 @@ const DEPRECATED_TEMPERATURE = "`temperature` is deprecated for this model.";
|
|
|
11
11
|
const CLAUDE_CODE_VERSION = "2.1.63";
|
|
12
12
|
const CLAUDE_CODE_SYSTEM_INSTRUCTION = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
|
|
13
13
|
const CLAUDE_BILLING_HEADER_PREFIX = "x-anthropic-billing-header:";
|
|
14
|
+
/** Betas needed for API-key requests: interleaved-thinking enables thinking+tools,
|
|
15
|
+
* prompt-caching-scope gives scoped cache breakpoints. */
|
|
16
|
+
const ANTHROPIC_API_KEY_BETA = [
|
|
17
|
+
"interleaved-thinking-2025-05-14",
|
|
18
|
+
"prompt-caching-scope-2026-01-05",
|
|
19
|
+
].join(",");
|
|
14
20
|
const ANTHROPIC_OAUTH_BETA = [
|
|
15
21
|
"claude-code-20250219",
|
|
16
22
|
"oauth-2025-04-20",
|
|
@@ -88,28 +94,76 @@ function anthropicThinkingBudget(effort: CallOptions["reasoningEffort"], maxToke
|
|
|
88
94
|
return Math.min(budget, Math.max(1024, maxTokens - 1024));
|
|
89
95
|
}
|
|
90
96
|
|
|
97
|
+
type AnthropicContentBlock = Record<string, unknown>;
|
|
98
|
+
type AnthropicMessage = { role: string; content: string | AnthropicContentBlock[] };
|
|
99
|
+
|
|
100
|
+
/** True when an assistant turn can be replayed as native tool_use + thinking blocks: it has
|
|
101
|
+
* structured toolUse AND a same-model Anthropic reasoning artifact that yields at least one
|
|
102
|
+
* valid thinking/redacted block, AND thinking is enabled this call. Native tool_use →
|
|
103
|
+
* tool_result is what makes Claude KEEP the prior thinking blocks (plain-text tool feedback
|
|
104
|
+
* gets them stripped on most models), so this is the core of cross-step reasoning continuity. */
|
|
105
|
+
export function anthropicNativizable(m: Message, model: string, thinkingEnabled: boolean): boolean {
|
|
106
|
+
return thinkingEnabled
|
|
107
|
+
&& !!m.toolUse?.length
|
|
108
|
+
&& !!m.reasoningArtifacts?.some(a => a.provider === "anthropic" && a.model === model && (!!a.signature || !!a.redacted));
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** Build Anthropic wire messages, reconstructing native tool_use / tool_result / thinking
|
|
112
|
+
* blocks for matching turns. `thinkingEnabled` is false (or stripped on a fail-safe retry)
|
|
113
|
+
* ⇒ everything falls back to the plain string/image content (current, always-valid shape). */
|
|
114
|
+
export function buildAnthropicMessages(messages: Message[], model: string, thinkingEnabled: boolean): AnthropicMessage[] {
|
|
115
|
+
const nonSystem = messages.filter(m => m.role !== "system");
|
|
116
|
+
const plain = (m: Message): AnthropicMessage => ({
|
|
117
|
+
role: m.role,
|
|
118
|
+
content: m.images?.length
|
|
119
|
+
? [
|
|
120
|
+
...m.images.map((img): AnthropicContentBlock => ({ type: "image", source: { type: "base64", media_type: img.mediaType, data: img.data } })),
|
|
121
|
+
...(m.content ? [{ type: "text", text: m.content } as AnthropicContentBlock] : []),
|
|
122
|
+
]
|
|
123
|
+
: m.content,
|
|
124
|
+
});
|
|
125
|
+
return nonSystem.map((m, i) => {
|
|
126
|
+
if (m.role === "assistant" && anthropicNativizable(m, model, thinkingEnabled)) {
|
|
127
|
+
const blocks: AnthropicContentBlock[] = [];
|
|
128
|
+
for (const a of m.reasoningArtifacts!) {
|
|
129
|
+
if (a.provider !== "anthropic" || a.model !== model) continue;
|
|
130
|
+
if (a.signature) blocks.push({ type: "thinking", thinking: a.text ?? "", signature: a.signature });
|
|
131
|
+
else if (a.redacted) blocks.push({ type: "redacted_thinking", data: a.redacted });
|
|
132
|
+
}
|
|
133
|
+
for (const tu of m.toolUse!) blocks.push({ type: "tool_use", id: tu.id, name: tu.tool, input: tu.arguments });
|
|
134
|
+
return { role: "assistant", content: blocks };
|
|
135
|
+
}
|
|
136
|
+
// A tool-result user turn is nativized iff its preceding assistant was — so a native
|
|
137
|
+
// tool_use always has its matching native tool_result (Anthropic errors on a mismatch).
|
|
138
|
+
if (m.role === "user" && m.toolResults?.length && i > 0
|
|
139
|
+
&& nonSystem[i - 1].role === "assistant"
|
|
140
|
+
&& anthropicNativizable(nonSystem[i - 1], model, thinkingEnabled)) {
|
|
141
|
+
const blocks: AnthropicContentBlock[] = m.toolResults.map(tr => ({
|
|
142
|
+
type: "tool_result", tool_use_id: tr.id, content: tr.output, is_error: tr.isError,
|
|
143
|
+
}));
|
|
144
|
+
if (m.toolResultExtra) blocks.push({ type: "text", text: m.toolResultExtra });
|
|
145
|
+
return { role: "user", content: blocks };
|
|
146
|
+
}
|
|
147
|
+
return plain(m);
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
|
|
91
151
|
export function anthropicPayload(
|
|
92
152
|
messages: Message[],
|
|
93
153
|
options: CallOptions,
|
|
94
154
|
stream: boolean,
|
|
95
155
|
includeTemperature: boolean,
|
|
96
156
|
credential: Credential = { kind: "none", provider: "anthropic" },
|
|
157
|
+
stripArtifacts = false,
|
|
97
158
|
): string {
|
|
98
159
|
const model = stripAnthropicPrefix(options.model);
|
|
99
160
|
const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
|
|
100
|
-
// Image attachments
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
content: m.images?.length
|
|
107
|
-
? [
|
|
108
|
-
...m.images.map((img): ContentBlock => ({ type: "image", source: { type: "base64", media_type: img.mediaType, data: img.data } })),
|
|
109
|
-
...(m.content ? [{ type: "text", text: m.content } as ContentBlock] : []),
|
|
110
|
-
]
|
|
111
|
-
: m.content,
|
|
112
|
-
}));
|
|
161
|
+
// Image attachments + native tool/thinking-block reconstruction live in buildAnthropicMessages.
|
|
162
|
+
const maxTokens = options.maxTokens ?? 4000;
|
|
163
|
+
const thinkingBudget = anthropicThinkingBudget(options.reasoningEffort, maxTokens);
|
|
164
|
+
// Reconstruct native tool_use / tool_result / thinking blocks for same-model turns when
|
|
165
|
+
// thinking is enabled (and not stripped by a fail-safe retry); else plain string/image.
|
|
166
|
+
const anthropicMessages = buildAnthropicMessages(messages, options.model, thinkingBudget !== undefined && !stripArtifacts);
|
|
113
167
|
// Conversation prompt caching (gjc parity — the main same-model latency gap):
|
|
114
168
|
// one breakpoint on the LAST message caches the entire conversation prefix, so
|
|
115
169
|
// each agent-loop step only pays input processing for the new tail instead of
|
|
@@ -125,8 +179,7 @@ export function anthropicPayload(
|
|
|
125
179
|
last.content[last.content.length - 1] = { ...tail, cache_control: { type: "ephemeral" } };
|
|
126
180
|
}
|
|
127
181
|
}
|
|
128
|
-
|
|
129
|
-
const thinkingBudget = anthropicThinkingBudget(options.reasoningEffort, maxTokens);
|
|
182
|
+
|
|
130
183
|
const payload: Record<string, unknown> = {
|
|
131
184
|
model,
|
|
132
185
|
messages: anthropicMessages,
|
|
@@ -162,13 +215,14 @@ export function anthropicRequest(
|
|
|
162
215
|
credential: Credential,
|
|
163
216
|
stream: boolean,
|
|
164
217
|
includeTemperature: boolean,
|
|
218
|
+
stripArtifacts = false,
|
|
165
219
|
): { url: string; headers: Record<string, string>; body: string } {
|
|
166
220
|
return {
|
|
167
221
|
// Anthropic-compatible providers (z.ai, MiniMax, …) accept the Messages wire
|
|
168
222
|
// format at their own host; an explicit baseUrl pins `${base}/v1/messages`.
|
|
169
223
|
url: options.baseUrl ? `${options.baseUrl.replace(/\/$/, "")}/v1/messages` : ANTHROPIC_URL,
|
|
170
224
|
headers: headersFor(credential, stream),
|
|
171
|
-
body: anthropicPayload(messages, options, stream, includeTemperature, credential),
|
|
225
|
+
body: anthropicPayload(messages, options, stream, includeTemperature, credential, stripArtifacts),
|
|
172
226
|
};
|
|
173
227
|
}
|
|
174
228
|
|
|
@@ -176,14 +230,21 @@ function isDeprecatedTemperatureError(status: number, detail: string): boolean {
|
|
|
176
230
|
return status === 400 && detail.includes(DEPRECATED_TEMPERATURE);
|
|
177
231
|
}
|
|
178
232
|
|
|
233
|
+
/** A 400 that names thinking/signature/redacted means a replayed reasoning artifact was
|
|
234
|
+
* rejected (expired signature, edited history, thinking toggled). The fail-safe retries
|
|
235
|
+
* once with artifacts stripped (plain string history) so the turn survives. */
|
|
236
|
+
function isReasoningArtifactError(status: number, detail: string): boolean {
|
|
237
|
+
return status === 400 && /thinking|signature|redacted_thinking/i.test(detail);
|
|
238
|
+
}
|
|
239
|
+
|
|
179
240
|
async function postAnthropic(
|
|
180
241
|
messages: Message[],
|
|
181
242
|
options: CallOptions,
|
|
182
243
|
credential: Credential,
|
|
183
244
|
stream: boolean,
|
|
184
245
|
): Promise<Response> {
|
|
185
|
-
const send = (includeTemperature: boolean) => {
|
|
186
|
-
const { url, headers, body } = anthropicRequest(messages, options, credential, stream, includeTemperature);
|
|
246
|
+
const send = (includeTemperature: boolean, stripArtifacts = false) => {
|
|
247
|
+
const { url, headers, body } = anthropicRequest(messages, options, credential, stream, includeTemperature, stripArtifacts);
|
|
187
248
|
return fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
188
249
|
};
|
|
189
250
|
|
|
@@ -196,6 +257,12 @@ async function postAnthropic(
|
|
|
196
257
|
if (response.ok) return response;
|
|
197
258
|
throw await providerHttpError("Anthropic", response, stream ? "(stream)" : undefined);
|
|
198
259
|
}
|
|
260
|
+
// Fail-safe: a rejected replay artifact → retry once with artifacts stripped (plain history).
|
|
261
|
+
if (isReasoningArtifactError(response.status, detail)) {
|
|
262
|
+
response = await send(true, true);
|
|
263
|
+
if (response.ok) return response;
|
|
264
|
+
throw await providerHttpError("Anthropic", response, stream ? "(stream)" : undefined);
|
|
265
|
+
}
|
|
199
266
|
|
|
200
267
|
throw new ProviderHttpError(
|
|
201
268
|
"Anthropic",
|
|
@@ -233,8 +300,16 @@ export const anthropicAdapter: ProviderAdapter = {
|
|
|
233
300
|
supportsNativeTools: true,
|
|
234
301
|
async call(messages, options, credential) {
|
|
235
302
|
const response = await postAnthropic(messages, options, credential, false);
|
|
236
|
-
const result = (await response.json()) as { content: { type: string; text?: string; name?: string; input?: unknown }[]; stop_reason?: string; usage?: AnthropicUsage };
|
|
303
|
+
const result = (await response.json()) as { content: { type: string; text?: string; name?: string; input?: unknown; thinking?: string; signature?: string; data?: string }[]; stop_reason?: string; usage?: AnthropicUsage };
|
|
237
304
|
if (result.usage) options.onUsage?.({ inputTokens: totalInputTokens(result.usage), outputTokens: result.usage.output_tokens });
|
|
305
|
+
// Capture thinking/redacted blocks as replay artifacts (parity with the stream path).
|
|
306
|
+
for (const c of result.content) {
|
|
307
|
+
if (c.type === "thinking" && (c.thinking || c.signature)) {
|
|
308
|
+
options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, text: c.thinking || undefined, signature: c.signature });
|
|
309
|
+
} else if (c.type === "redacted_thinking" && c.data) {
|
|
310
|
+
options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, redacted: c.data });
|
|
311
|
+
}
|
|
312
|
+
}
|
|
238
313
|
// Prefer a native tool call (re-serialized to canonical JSON) over any stray text.
|
|
239
314
|
const toolCall = serializeToolCalls(
|
|
240
315
|
result.content
|
|
@@ -256,12 +331,16 @@ export const anthropicAdapter: ProviderAdapter = {
|
|
|
256
331
|
// never as text_delta — accumulate per block index, then re-serialize to canonical
|
|
257
332
|
// JSON and yield it once at the end (concatenation still equals call()).
|
|
258
333
|
const toolBlocks = new Map<number, { name: string; args: string }>();
|
|
334
|
+
// Thinking blocks stream as content_block_start(type:thinking) + thinking_delta(text)
|
|
335
|
+
// + signature_delta(signature). Accumulate per index and emit one ReasoningArtifact per
|
|
336
|
+
// block on stream end so the signed thought can be replayed (gajae continuity).
|
|
337
|
+
const thinkBlocks = new Map<number, { text: string; signature?: string }>();
|
|
259
338
|
for await (const data of readSse(response.body)) {
|
|
260
339
|
let evt: {
|
|
261
340
|
type?: string;
|
|
262
341
|
index?: number;
|
|
263
|
-
content_block?: { type?: string; name?: string };
|
|
264
|
-
delta?: { type?: string; text?: string; partial_json?: string; thinking?: string; stop_reason?: string };
|
|
342
|
+
content_block?: { type?: string; name?: string; data?: string };
|
|
343
|
+
delta?: { type?: string; text?: string; partial_json?: string; thinking?: string; signature?: string; stop_reason?: string };
|
|
265
344
|
message?: { usage?: AnthropicUsage };
|
|
266
345
|
usage?: { output_tokens?: number };
|
|
267
346
|
};
|
|
@@ -272,6 +351,11 @@ export const anthropicAdapter: ProviderAdapter = {
|
|
|
272
351
|
}
|
|
273
352
|
if (evt.type === "content_block_start" && evt.content_block?.type === "tool_use" && typeof evt.index === "number") {
|
|
274
353
|
toolBlocks.set(evt.index, { name: evt.content_block.name ?? "", args: "" });
|
|
354
|
+
} else if (evt.type === "content_block_start" && evt.content_block?.type === "thinking" && typeof evt.index === "number") {
|
|
355
|
+
thinkBlocks.set(evt.index, { text: "" });
|
|
356
|
+
} else if (evt.type === "content_block_start" && evt.content_block?.type === "redacted_thinking" && evt.content_block.data) {
|
|
357
|
+
// Redacted thinking carries opaque `data` directly (no deltas) — emit immediately.
|
|
358
|
+
options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, redacted: evt.content_block.data });
|
|
275
359
|
} else if (evt.type === "content_block_delta" && evt.delta?.type === "input_json_delta" && typeof evt.index === "number") {
|
|
276
360
|
const b = toolBlocks.get(evt.index);
|
|
277
361
|
if (b) b.args += evt.delta.partial_json ?? "";
|
|
@@ -280,6 +364,15 @@ export const anthropicAdapter: ProviderAdapter = {
|
|
|
280
364
|
yield evt.delta.text;
|
|
281
365
|
} else if (evt.type === "content_block_delta" && evt.delta?.type === "thinking_delta" && evt.delta.thinking) {
|
|
282
366
|
options.onReasoning?.(evt.delta.thinking);
|
|
367
|
+
if (typeof evt.index === "number") {
|
|
368
|
+
const tb = thinkBlocks.get(evt.index) ?? { text: "" };
|
|
369
|
+
tb.text += evt.delta.thinking;
|
|
370
|
+
thinkBlocks.set(evt.index, tb);
|
|
371
|
+
}
|
|
372
|
+
} else if (evt.type === "content_block_delta" && evt.delta?.type === "signature_delta" && evt.delta.signature && typeof evt.index === "number") {
|
|
373
|
+
const tb = thinkBlocks.get(evt.index) ?? { text: "" };
|
|
374
|
+
tb.signature = (tb.signature ?? "") + evt.delta.signature;
|
|
375
|
+
thinkBlocks.set(evt.index, tb);
|
|
283
376
|
} else if (evt.type === "message_start" && evt.message?.usage) {
|
|
284
377
|
// Cache only — usage is reported ONCE at message_delta so an accumulating
|
|
285
378
|
// sink can't double-count input (and a pre-first-chunk retry that replays
|
|
@@ -290,6 +383,12 @@ export const anthropicAdapter: ProviderAdapter = {
|
|
|
290
383
|
if (evt.usage) options.onUsage?.({ inputTokens: cachedInput, outputTokens: evt.usage.output_tokens });
|
|
291
384
|
}
|
|
292
385
|
}
|
|
386
|
+
// Emit captured thinking blocks as replay artifacts (signed thought + signature).
|
|
387
|
+
for (const tb of thinkBlocks.values()) {
|
|
388
|
+
if (tb.text || tb.signature) {
|
|
389
|
+
options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, text: tb.text || undefined, signature: tb.signature });
|
|
390
|
+
}
|
|
391
|
+
}
|
|
293
392
|
const envelope = serializeAccumulatedToolCalls(toolBlocks);
|
|
294
393
|
if (envelope) { yieldedAny = true; yield envelope; }
|
|
295
394
|
if (!yieldedAny) throw emptyCompletionError(stopReason);
|
|
@@ -361,6 +460,7 @@ function headersFor(credential: Credential, stream: boolean): Record<string, str
|
|
|
361
460
|
"content-type": "application/json",
|
|
362
461
|
"x-api-key": credential.token,
|
|
363
462
|
"anthropic-version": "2023-06-01",
|
|
463
|
+
"anthropic-beta": ANTHROPIC_API_KEY_BETA,
|
|
364
464
|
};
|
|
365
465
|
}
|
|
366
466
|
throw new Error("anthropic adapter requires a credential");
|
|
@@ -108,6 +108,12 @@ export async function resolveAntigravityProjectId(
|
|
|
108
108
|
|
|
109
109
|
type CcaPart = { text: string } | { inlineData: { mimeType: string; data: string } };
|
|
110
110
|
|
|
111
|
+
// Reasoning-artifact replay (signed thinking / thoughtSignature / encrypted reasoning) is
|
|
112
|
+
// deliberately OUT OF SCOPE for antigravity: it serves Gemini- and Claude-shaped models over
|
|
113
|
+
// the CCA wire (neither the native Anthropic messages nor the public Gemini shape), so it
|
|
114
|
+
// captures no artifacts and replays none — Message.toolUse/toolResults/reasoningArtifacts are
|
|
115
|
+
// ignored here. The provider-keyed match guard (D3) keeps "anthropic"/"gemini" artifacts from
|
|
116
|
+
// ever being re-injected by this adapter, so there is no cross-adapter leakage.
|
|
111
117
|
function antigravityContents(messages: Message[]): { role: "user" | "model"; parts: CcaPart[] }[] {
|
|
112
118
|
const contents: { role: "user" | "model"; parts: CcaPart[] }[] = [];
|
|
113
119
|
for (const m of messages) {
|
|
@@ -54,6 +54,24 @@ export function parseRetryFromBody(detail: string | null | undefined): number |
|
|
|
54
54
|
* and any `Retry-After`. Use at every adapter's `!response.ok` site so the retry
|
|
55
55
|
* layer sees a uniform, status-carrying, backoff-aware error.
|
|
56
56
|
*/
|
|
57
|
+
/**
|
|
58
|
+
* One-shot reasoning-artifact fail-safe: send the request; if it 400s because a replayed
|
|
59
|
+
* reasoning artifact (signature / thoughtSignature / encrypted reasoning item) was rejected
|
|
60
|
+
* — expired signature, edited history, toggled thinking — retry ONCE with artifacts stripped
|
|
61
|
+
* (plain history). `send(strip)` rebuilds + fetches; `isArtifactError` matches the 400 body.
|
|
62
|
+
* ponytail: heuristic error-body string match — tighten to structured error codes if/when
|
|
63
|
+
* the providers expose them.
|
|
64
|
+
*/
|
|
65
|
+
export async function fetchWithArtifactFailSafe(
|
|
66
|
+
send: (stripArtifacts: boolean) => Promise<Response>,
|
|
67
|
+
isArtifactError: (status: number, body: string) => boolean,
|
|
68
|
+
): Promise<Response> {
|
|
69
|
+
const res = await send(false);
|
|
70
|
+
if (res.ok) return res;
|
|
71
|
+
const body = await res.clone().text().catch(() => "");
|
|
72
|
+
return isArtifactError(res.status, body) ? send(true) : res;
|
|
73
|
+
}
|
|
74
|
+
|
|
57
75
|
export async function providerHttpError(provider: string, response: Response, context?: string): Promise<ProviderHttpError> {
|
|
58
76
|
const detail = await response.text().catch(() => "");
|
|
59
77
|
const retryAfterMs = parseRetryAfter(response.headers.get("retry-after")) ?? parseRetryFromBody(detail);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { Credential } from "../../auth";
|
|
2
2
|
import type { CallOptions, Message, ProviderAdapter } from "../types";
|
|
3
3
|
import { readSse } from "../sse";
|
|
4
|
-
import { providerHttpError } from "./errors";
|
|
4
|
+
import { providerHttpError, fetchWithArtifactFailSafe } from "./errors";
|
|
5
5
|
import { jeoEnv } from "../../util/env";
|
|
6
6
|
import { serializeToolCalls } from "../../agent/tool-schemas";
|
|
7
7
|
|
|
@@ -37,35 +37,62 @@ export function geminiThinkingBudget(model: string, effort?: CallOptions["reason
|
|
|
37
37
|
return budget;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
|
|
41
|
+
/** True when an assistant turn can replay native functionCall + thoughtSignature: it has
|
|
42
|
+
* structured toolUse AND a same-model Gemini thoughtSignature artifact, AND thinking is on. */
|
|
43
|
+
export function geminiNativizable(m: Message, modelKey: string, thinkingEnabled: boolean): boolean {
|
|
44
|
+
return thinkingEnabled
|
|
45
|
+
&& !!m.toolUse?.length
|
|
46
|
+
&& !!m.reasoningArtifacts?.some(a => a.provider === "gemini" && a.model === modelKey && !!a.thoughtSignature);
|
|
47
|
+
}
|
|
40
48
|
/** Shared Gemini request payload (contents + generationConfig + systemInstruction)
|
|
41
49
|
* used by BOTH the public generativelanguage path (API key) and the Cloud Code
|
|
42
50
|
* Assist path (OAuth) — only the envelope/endpoint differs. */
|
|
43
|
-
export function buildGeminiPayload(messages: Message[], options: CallOptions): { geminiModel: string; payload: Record<string, unknown> } {
|
|
51
|
+
export function buildGeminiPayload(messages: Message[], options: CallOptions, stripArtifacts = false): { geminiModel: string; payload: Record<string, unknown> } {
|
|
44
52
|
const resolvedModel = options.model.replace(/^(google|gemini)\//, "");
|
|
45
53
|
let geminiModel = resolvedModel;
|
|
46
54
|
if (!geminiModel || geminiModel === "claude-3-5-sonnet") geminiModel = "gemini-2.0-flash";
|
|
47
55
|
|
|
48
56
|
const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
|
|
57
|
+
const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
|
|
58
|
+
const thinkingEnabled = thinkingBudget !== undefined && !stripArtifacts;
|
|
49
59
|
// Gemini requires strictly ALTERNATING user/model turns. jeo histories can carry
|
|
50
60
|
// consecutive same-role messages (a compaction summary prepended before a tool-result,
|
|
51
61
|
// back-to-back tool results, etc.), so coalesce adjacent same-role turns into one
|
|
52
|
-
// content block — otherwise the API rejects the request mid-session.
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
62
|
+
// content block — otherwise the API rejects the request mid-session. Native
|
|
63
|
+
// functionCall/functionResponse parts (with thoughtSignature) are reconstructed for
|
|
64
|
+
// same-model turns to preserve cross-step thought context; else plain text.
|
|
65
|
+
type GeminiPart = Record<string, unknown>;
|
|
66
|
+
const nonSystem = messages.filter(m => m.role !== "system");
|
|
67
|
+
const contents: { role: string; parts: GeminiPart[] }[] = [];
|
|
68
|
+
nonSystem.forEach((m, i) => {
|
|
56
69
|
const role = m.role === "assistant" ? "model" : "user";
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
70
|
+
let parts: GeminiPart[];
|
|
71
|
+
if (m.role === "assistant" && geminiNativizable(m, options.model, thinkingEnabled)) {
|
|
72
|
+
const sig = m.reasoningArtifacts!.find(a => a.provider === "gemini" && a.model === options.model && a.thoughtSignature)?.thoughtSignature;
|
|
73
|
+
parts = m.toolUse!.map((tu, idx) => {
|
|
74
|
+
const p: GeminiPart = { functionCall: { name: tu.tool, args: tu.arguments } };
|
|
75
|
+
if (idx === 0 && sig) p.thoughtSignature = sig; // bind the turn signature to the first call
|
|
76
|
+
return p;
|
|
77
|
+
});
|
|
78
|
+
} else if (m.role === "user" && m.toolResults?.length && i > 0
|
|
79
|
+
&& nonSystem[i - 1].role === "assistant"
|
|
80
|
+
&& geminiNativizable(nonSystem[i - 1], options.model, thinkingEnabled)) {
|
|
81
|
+
const prevToolUse = nonSystem[i - 1].toolUse ?? [];
|
|
82
|
+
parts = m.toolResults.map(tr => ({
|
|
83
|
+
functionResponse: { name: prevToolUse.find(tu => tu.id === tr.id)?.tool ?? "tool", response: { output: tr.output } },
|
|
84
|
+
}));
|
|
85
|
+
if (m.toolResultExtra) parts.push({ text: m.toolResultExtra });
|
|
65
86
|
} else {
|
|
66
|
-
|
|
87
|
+
parts = [
|
|
88
|
+
...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
|
|
89
|
+
{ text: m.content },
|
|
90
|
+
];
|
|
67
91
|
}
|
|
68
|
-
|
|
92
|
+
const prev = contents[contents.length - 1];
|
|
93
|
+
if (prev && prev.role === role) prev.parts.push(...parts);
|
|
94
|
+
else contents.push({ role, parts });
|
|
95
|
+
});
|
|
69
96
|
|
|
70
97
|
const generationConfig: Record<string, unknown> = {
|
|
71
98
|
temperature: options.temperature ?? 0.2,
|
|
@@ -74,7 +101,7 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
|
|
|
74
101
|
// Function-calling and responseMimeType:json are mutually exclusive in the Gemini
|
|
75
102
|
// API — when native tools are declared, the functionCall parts replace JSON-in-prose.
|
|
76
103
|
if (options.jsonMode && !options.tools?.length) generationConfig.responseMimeType = "application/json";
|
|
77
|
-
|
|
104
|
+
|
|
78
105
|
// includeThoughts: required for Gemini to STREAM thought summaries (the `thought:true`
|
|
79
106
|
// parts thoughtOf() routes to onReasoning) — without it the model thinks silently.
|
|
80
107
|
if (thinkingBudget !== undefined) generationConfig.thinkingConfig = { includeThoughts: true, thinkingBudget };
|
|
@@ -91,8 +118,8 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
|
|
|
91
118
|
return { geminiModel, payload };
|
|
92
119
|
}
|
|
93
120
|
|
|
94
|
-
export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent"): { url: string; headers: Record<string, string>; body: string } {
|
|
95
|
-
const { geminiModel, payload } = buildGeminiPayload(messages, options);
|
|
121
|
+
export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent", stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
|
|
122
|
+
const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
|
|
96
123
|
const oauth = credential.kind === "oauth" ? credential.token : undefined;
|
|
97
124
|
const apiKey = credential.kind === "api_key" ? credential.token : undefined;
|
|
98
125
|
let url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(geminiModel)}:${action}`;
|
|
@@ -123,8 +150,8 @@ export function getGeminiCliHeaders(modelId?: string): Record<string, string> {
|
|
|
123
150
|
* plain `jeo auth login gemini` works without any GEMINI_API_KEY. The body
|
|
124
151
|
* wraps the standard payload as `{ project, model, request }`.
|
|
125
152
|
*/
|
|
126
|
-
export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string): { url: string; headers: Record<string, string>; body: string } {
|
|
127
|
-
const { geminiModel, payload } = buildGeminiPayload(messages, options);
|
|
153
|
+
export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string, stripArtifacts = false): { url: string; headers: Record<string, string>; body: string } {
|
|
154
|
+
const { geminiModel, payload } = buildGeminiPayload(messages, options, stripArtifacts);
|
|
128
155
|
return {
|
|
129
156
|
url: `${CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse`,
|
|
130
157
|
headers: {
|
|
@@ -137,8 +164,22 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
|
|
|
137
164
|
};
|
|
138
165
|
}
|
|
139
166
|
|
|
167
|
+
/** POST a Gemini request with a reasoning-artifact fail-safe (see fetchWithArtifactFailSafe). */
|
|
168
|
+
function geminiFetchFailSafe(
|
|
169
|
+
make: (stripArtifacts: boolean) => { url: string; headers: Record<string, string>; body: string },
|
|
170
|
+
signal?: AbortSignal,
|
|
171
|
+
): Promise<Response> {
|
|
172
|
+
return fetchWithArtifactFailSafe(
|
|
173
|
+
strip => {
|
|
174
|
+
const r = make(strip);
|
|
175
|
+
return fetch(r.url, { method: "POST", headers: r.headers, body: r.body, signal });
|
|
176
|
+
},
|
|
177
|
+
(status, body) => status === 400 && /thoughtsignature|thought_signature|functioncall|function_call|signature/i.test(body),
|
|
178
|
+
);
|
|
179
|
+
}
|
|
180
|
+
|
|
140
181
|
interface GeminiChunk {
|
|
141
|
-
candidates?: { content?: { parts?: { text?: string; thought?: boolean; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
|
|
182
|
+
candidates?: { content?: { parts?: { text?: string; thought?: boolean; thoughtSignature?: string; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
|
|
142
183
|
promptFeedback?: { blockReason?: string };
|
|
143
184
|
usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
|
|
144
185
|
}
|
|
@@ -157,6 +198,19 @@ function textOf(chunk: GeminiChunk): string {
|
|
|
157
198
|
function thoughtOf(chunk: GeminiChunk): string {
|
|
158
199
|
return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
159
200
|
}
|
|
201
|
+
|
|
202
|
+
/** Emit each NEW thoughtSignature seen on this chunk's parts as a replay artifact (Gemini
|
|
203
|
+
* binds it to the functionCall part — replayed to keep cross-step thought context). `seen`
|
|
204
|
+
* dedups across the streamed chunks of one turn. */
|
|
205
|
+
function captureGeminiSignatures(chunk: GeminiChunk, options: CallOptions, seen: Set<string>): void {
|
|
206
|
+
for (const p of chunk.candidates?.[0]?.content?.parts ?? []) {
|
|
207
|
+
const sig = p.thoughtSignature;
|
|
208
|
+
if (sig && !seen.has(sig)) {
|
|
209
|
+
seen.add(sig);
|
|
210
|
+
options.onReasoningArtifact?.({ provider: "gemini", model: options.model, thoughtSignature: sig });
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
160
214
|
/** Native Gemini functionCall parts → {tool, arguments} (gjc/antigravity parity). Kept
|
|
161
215
|
* separate from textOf so the re-serialized canonical JSON envelope drives the loop. */
|
|
162
216
|
function geminiFunctionCallsOf(chunk: GeminiChunk): { tool: string; arguments: Record<string, unknown> }[] {
|
|
@@ -197,14 +251,14 @@ function blockedReason(chunk: GeminiChunk): string | undefined {
|
|
|
197
251
|
async function* ccaTurn(messages: Message[], options: CallOptions, credential: Credential & { kind: "oauth" }): AsyncGenerator<string> {
|
|
198
252
|
const { resolveAntigravityProjectId } = await import("./antigravity");
|
|
199
253
|
const projectId = await resolveAntigravityProjectId(credential, { signal: options.signal });
|
|
200
|
-
const
|
|
201
|
-
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
254
|
+
const response = await geminiFetchFailSafe(strip => geminiCliRequest(messages, options, credential.token, projectId, strip), options.signal);
|
|
202
255
|
if (!response.ok) throw await providerHttpError("Gemini (Cloud Code Assist)", response);
|
|
203
256
|
if (!response.body) return;
|
|
204
257
|
let lastUsage: GeminiChunk["usageMetadata"];
|
|
205
258
|
let yieldedAny = false;
|
|
206
259
|
let lastEmptyReason: string | undefined;
|
|
207
260
|
const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
|
|
261
|
+
const seenSigs = new Set<string>();
|
|
208
262
|
for await (const data of readSse(response.body)) {
|
|
209
263
|
let chunk: CcaChunk;
|
|
210
264
|
try {
|
|
@@ -216,6 +270,7 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
|
|
|
216
270
|
if (!inner) continue;
|
|
217
271
|
const thought = thoughtOf(inner);
|
|
218
272
|
if (thought) options.onReasoning?.(thought);
|
|
273
|
+
captureGeminiSignatures(inner, options, seenSigs);
|
|
219
274
|
const delta = textOf(inner);
|
|
220
275
|
if (delta) {
|
|
221
276
|
yieldedAny = true;
|
|
@@ -249,10 +304,10 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
249
304
|
for await (const delta of ccaTurn(messages, options, credential)) out += delta;
|
|
250
305
|
return out;
|
|
251
306
|
}
|
|
252
|
-
const
|
|
253
|
-
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
307
|
+
const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "generateContent", strip), options.signal);
|
|
254
308
|
if (!response.ok) throw await providerHttpError("Gemini", response);
|
|
255
309
|
const result = (await response.json()) as GeminiChunk;
|
|
310
|
+
captureGeminiSignatures(result, options, new Set());
|
|
256
311
|
if (result.usageMetadata) {
|
|
257
312
|
options.onUsage?.({ inputTokens: result.usageMetadata.promptTokenCount, outputTokens: result.usageMetadata.candidatesTokenCount });
|
|
258
313
|
}
|
|
@@ -271,14 +326,14 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
271
326
|
yield* ccaTurn(messages, options, credential);
|
|
272
327
|
return;
|
|
273
328
|
}
|
|
274
|
-
const
|
|
275
|
-
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
329
|
+
const response = await geminiFetchFailSafe(strip => geminiRequest(messages, options, credential, "streamGenerateContent", strip), options.signal);
|
|
276
330
|
if (!response.ok) throw await providerHttpError("Gemini", response, "(stream)");
|
|
277
331
|
if (!response.body) return;
|
|
278
332
|
let lastUsage: GeminiChunk["usageMetadata"];
|
|
279
333
|
let yieldedAny = false;
|
|
280
334
|
let lastEmptyReason: string | undefined;
|
|
281
335
|
const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
|
|
336
|
+
const seenSigs = new Set<string>();
|
|
282
337
|
for await (const data of readSse(response.body)) {
|
|
283
338
|
let chunk: GeminiChunk;
|
|
284
339
|
try {
|
|
@@ -288,6 +343,7 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
288
343
|
}
|
|
289
344
|
const thought = thoughtOf(chunk);
|
|
290
345
|
if (thought) options.onReasoning?.(thought);
|
|
346
|
+
captureGeminiSignatures(chunk, options, seenSigs);
|
|
291
347
|
const delta = textOf(chunk);
|
|
292
348
|
if (delta) {
|
|
293
349
|
yieldedAny = true;
|
|
@@ -23,6 +23,12 @@ export interface OpenAICompatProviderDef {
|
|
|
23
23
|
readonly apiKeyEnv: string;
|
|
24
24
|
/** Default model id (provider-prefixed) used by `--provider <name>`. */
|
|
25
25
|
readonly defaultModel: string;
|
|
26
|
+
/** Extra well-known model ids (BARE, not provider-prefixed) for the OFFLINE
|
|
27
|
+
* pick-list fallback shown by `/agents <role> provider <name>` and `--provider`.
|
|
28
|
+
* Live `/models` discovery supersedes this once the provider is logged in, so
|
|
29
|
+
* keep only stable/alias-style ids here (a stale id would 404 at inference).
|
|
30
|
+
* `defaultModel` is always surfaced first regardless of this list. */
|
|
31
|
+
readonly knownModels?: readonly string[];
|
|
26
32
|
/** Wire protocol: "openai" (/chat/completions, default) or "anthropic" (/v1/messages). */
|
|
27
33
|
readonly protocol?: "openai" | "anthropic";
|
|
28
34
|
/** True for subscription/plan products (coding-plan, portal, token-plan, code) rather than
|
|
@@ -35,12 +41,12 @@ export interface OpenAICompatProviderDef {
|
|
|
35
41
|
}
|
|
36
42
|
|
|
37
43
|
export const OPENAI_COMPAT_PROVIDERS: readonly OpenAICompatProviderDef[] = [
|
|
38
|
-
{ name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile" },
|
|
39
|
-
{ name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat" },
|
|
40
|
-
{ name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest" },
|
|
44
|
+
{ name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile", knownModels: ["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "openai/gpt-oss-120b", "openai/gpt-oss-20b"] },
|
|
45
|
+
{ name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat", knownModels: ["deepseek-chat", "deepseek-reasoner"] },
|
|
46
|
+
{ name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest", knownModels: ["mistral-large-latest", "mistral-small-latest", "codestral-latest", "ministral-8b-latest"] },
|
|
41
47
|
{ name: "openrouter", label: "OpenRouter", baseUrl: "https://openrouter.ai/api/v1", apiKeyEnv: "OPENROUTER_API_KEY", defaultModel: "openrouter/openai/gpt-4o-mini", thinkingFormat: "openrouter" },
|
|
42
48
|
{ name: "together", label: "Together", baseUrl: "https://api.together.xyz/v1", apiKeyEnv: "TOGETHER_API_KEY", defaultModel: "together/meta-llama/Llama-3.3-70B-Instruct-Turbo" },
|
|
43
|
-
{ name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b" },
|
|
49
|
+
{ name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b", knownModels: ["llama-3.3-70b", "llama3.1-8b", "qwen-3-235b-a22b-instruct-2507"] },
|
|
44
50
|
{ name: "fireworks", label: "Fireworks", baseUrl: "https://api.fireworks.ai/inference/v1", apiKeyEnv: "FIREWORKS_API_KEY", defaultModel: "fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct" },
|
|
45
51
|
{ name: "nvidia", label: "NVIDIA", baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", defaultModel: "nvidia/meta/llama-3.3-70b-instruct" },
|
|
46
52
|
// Additional gjc-parity OpenAI-compatible clouds (authoritative base URLs + env vars).
|