jeo-code 0.1.0 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja.md +160 -0
- package/README.ko.md +160 -0
- package/README.md +115 -297
- package/README.zh.md +160 -0
- package/package.json +11 -6
- package/scripts/install.sh +28 -28
- package/scripts/uninstall.sh +17 -15
- package/src/AGENTS.md +50 -0
- package/src/agent/AGENTS.md +49 -0
- package/src/agent/bash-fixups.ts +103 -0
- package/src/agent/compaction.ts +410 -19
- package/src/agent/config-schema.ts +119 -5
- package/src/agent/context-files.ts +314 -17
- package/src/agent/dev/AGENTS.md +36 -0
- package/src/agent/dev/advanced-analyzer.ts +12 -0
- package/src/agent/dev/evolution-bridge.ts +82 -0
- package/src/agent/dev/evolution-logger.ts +41 -0
- package/src/agent/dev/self-analysis.ts +64 -0
- package/src/agent/dev/self-improve.ts +24 -0
- package/src/agent/dev/spec-automation.ts +49 -0
- package/src/agent/engine.ts +804 -54
- package/src/agent/hooks.ts +273 -0
- package/src/agent/loop.ts +21 -1
- package/src/agent/memory.ts +201 -0
- package/src/agent/model-recency.ts +32 -0
- package/src/agent/output-minimizer.ts +108 -0
- package/src/agent/output-util.ts +64 -0
- package/src/agent/plan.ts +187 -0
- package/src/agent/seed.ts +52 -0
- package/src/agent/session.ts +235 -21
- package/src/agent/state.ts +286 -39
- package/src/agent/step-budget.ts +232 -0
- package/src/agent/subagents.ts +223 -26
- package/src/agent/task-tool.ts +272 -0
- package/src/agent/todo-tool.ts +87 -0
- package/src/agent/tokenizer.ts +117 -0
- package/src/agent/tool-registry.ts +54 -0
- package/src/agent/tools.ts +562 -103
- package/src/agent/web-search.ts +538 -0
- package/src/ai/AGENTS.md +44 -0
- package/src/ai/index.ts +1 -0
- package/src/ai/model-catalog-compat.ts +3 -1
- package/src/ai/model-catalog.ts +74 -9
- package/src/ai/model-discovery.ts +215 -17
- package/src/ai/model-manager.ts +346 -32
- package/src/ai/model-picker.ts +1 -1
- package/src/ai/model-registry.ts +4 -2
- package/src/ai/pricing.ts +84 -0
- package/src/ai/provider-registry.ts +23 -0
- package/src/ai/provider-status.ts +60 -16
- package/src/ai/providers/AGENTS.md +42 -0
- package/src/ai/providers/anthropic.ts +250 -31
- package/src/ai/providers/antigravity.ts +219 -0
- package/src/ai/providers/errors.ts +15 -1
- package/src/ai/providers/gemini.ts +196 -13
- package/src/ai/providers/ollama.ts +37 -7
- package/src/ai/providers/openai-responses.ts +173 -0
- package/src/ai/providers/openai.ts +64 -12
- package/src/ai/sse.ts +4 -1
- package/src/ai/types.ts +18 -1
- package/src/auth/AGENTS.md +41 -0
- package/src/auth/callback-server.ts +6 -1
- package/src/auth/flows/AGENTS.md +32 -0
- package/src/auth/flows/antigravity.ts +151 -0
- package/src/auth/flows/google-project.ts +190 -0
- package/src/auth/flows/google.ts +39 -18
- package/src/auth/flows/index.ts +15 -5
- package/src/auth/flows/openai.ts +2 -2
- package/src/auth/oauth.ts +8 -0
- package/src/auth/refresh.ts +44 -27
- package/src/auth/storage.ts +149 -26
- package/src/auth/types.ts +1 -1
- package/src/autopilot.ts +362 -0
- package/src/bun-imports.d.ts +4 -0
- package/src/cli/AGENTS.md +39 -0
- package/src/cli/runner.ts +148 -14
- package/src/cli.ts +13 -4
- package/src/commands/AGENTS.md +40 -0
- package/src/commands/approve.ts +62 -3
- package/src/commands/auth.ts +167 -25
- package/src/commands/chat.ts +37 -8
- package/src/commands/deep-interview.ts +633 -175
- package/src/commands/doctor.ts +84 -37
- package/src/commands/evolve-core.ts +18 -0
- package/src/commands/evolve.ts +2 -1
- package/src/commands/export.ts +176 -0
- package/src/commands/gjc.ts +52 -0
- package/src/commands/launch.ts +3549 -240
- package/src/commands/mcp.ts +3 -3
- package/src/commands/ooo-seed.ts +19 -0
- package/src/commands/ralplan.ts +253 -35
- package/src/commands/resume.ts +1 -1
- package/src/commands/session.ts +183 -0
- package/src/commands/setup-helpers.ts +10 -3
- package/src/commands/setup.ts +57 -16
- package/src/commands/skills.ts +78 -18
- package/src/commands/state.ts +198 -0
- package/src/commands/status.ts +84 -0
- package/src/commands/team.ts +340 -212
- package/src/commands/ultragoal.ts +122 -61
- package/src/commands/update.ts +244 -0
- package/src/ledger.ts +270 -0
- package/src/mcp/AGENTS.md +38 -0
- package/src/mcp/server.ts +115 -14
- package/src/mcp/tools.ts +42 -22
- package/src/md-modules.d.ts +4 -0
- package/src/prompts/AGENTS.md +41 -0
- package/src/prompts/agents/AGENTS.md +35 -0
- package/src/prompts/agents/architect.md +35 -0
- package/src/prompts/agents/critic.md +37 -0
- package/src/prompts/agents/executor.md +36 -0
- package/src/prompts/agents/planner.md +37 -0
- package/src/prompts/skills/AGENTS.md +36 -0
- package/src/prompts/skills/deep-dive/AGENTS.md +31 -0
- package/src/prompts/skills/deep-dive/SKILL.md +13 -0
- package/src/prompts/skills/deep-interview/AGENTS.md +31 -0
- package/src/prompts/skills/deep-interview/SKILL.md +12 -0
- package/src/prompts/skills/gjc/AGENTS.md +31 -0
- package/src/prompts/skills/gjc/SKILL.md +15 -0
- package/src/prompts/skills/ralplan/AGENTS.md +31 -0
- package/src/prompts/skills/ralplan/SKILL.md +11 -0
- package/src/prompts/skills/team/AGENTS.md +31 -0
- package/src/prompts/skills/team/SKILL.md +11 -0
- package/src/prompts/skills/ultragoal/AGENTS.md +31 -0
- package/src/prompts/skills/ultragoal/SKILL.md +11 -0
- package/src/skills/AGENTS.md +38 -0
- package/src/skills/catalog.ts +565 -31
- package/src/tui/AGENTS.md +43 -0
- package/src/tui/app.ts +1181 -92
- package/src/tui/components/AGENTS.md +42 -0
- package/src/tui/components/ascii-art.ts +257 -15
- package/src/tui/components/autocomplete.ts +98 -16
- package/src/tui/components/autopilot-status.ts +65 -0
- package/src/tui/components/category-index.ts +49 -0
- package/src/tui/components/code-view.ts +54 -11
- package/src/tui/components/color.ts +171 -2
- package/src/tui/components/config-panel.ts +82 -15
- package/src/tui/components/duration.ts +38 -0
- package/src/tui/components/evolution.ts +3 -3
- package/src/tui/components/footer.ts +91 -42
- package/src/tui/components/forge.ts +426 -31
- package/src/tui/components/hints.ts +54 -0
- package/src/tui/components/hud.ts +73 -0
- package/src/tui/components/index.ts +4 -0
- package/src/tui/components/input-box.ts +150 -0
- package/src/tui/components/layout.ts +11 -3
- package/src/tui/components/live-model-picker.ts +108 -0
- package/src/tui/components/markdown-table.ts +140 -0
- package/src/tui/components/markdown-text.ts +97 -0
- package/src/tui/components/meter.ts +4 -1
- package/src/tui/components/model-picker.ts +3 -2
- package/src/tui/components/provider-picker.ts +3 -2
- package/src/tui/components/section.ts +70 -0
- package/src/tui/components/select-list.ts +40 -10
- package/src/tui/components/skill-picker.ts +25 -0
- package/src/tui/components/slash.ts +244 -21
- package/src/tui/components/status.ts +272 -11
- package/src/tui/components/step-timeline.ts +218 -0
- package/src/tui/components/stream.ts +26 -9
- package/src/tui/components/themes.ts +212 -6
- package/src/tui/components/todo-card.ts +47 -0
- package/src/tui/components/tool-list.ts +58 -12
- package/src/tui/components/transcript.ts +120 -0
- package/src/tui/components/update-box.ts +31 -0
- package/src/tui/components/welcome.ts +162 -0
- package/src/tui/components/width.ts +163 -0
- package/src/tui/monitoring/AGENTS.md +31 -0
- package/src/tui/monitoring/hud-view.ts +55 -0
- package/src/tui/renderer.ts +112 -3
- package/src/tui/terminal.ts +40 -33
- package/src/util/AGENTS.md +39 -0
- package/src/util/clipboard-image.ts +118 -0
- package/src/util/env.ts +12 -0
- package/src/util/provider-error.ts +78 -0
- package/src/util/retry.ts +91 -6
- package/src/util/update-check.ts +64 -0
- package/src/commands/models.ts +0 -104
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import type { Credential } from "../../auth";
|
|
3
|
+
import type { CallOptions, Message, ProviderAdapter } from "../types";
|
|
4
|
+
import { readSse } from "../sse";
|
|
5
|
+
import { providerHttpError } from "./errors";
|
|
6
|
+
|
|
7
|
+
const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
|
|
8
|
+
const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
|
|
9
|
+
const ENDPOINTS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
|
|
10
|
+
|
|
11
|
+
export function getAntigravityUserAgent(): string {
|
|
12
|
+
const version = process.env.PI_AI_ANTIGRAVITY_VERSION || "1.104.0";
|
|
13
|
+
const os = process.platform === "win32" ? "windows" : process.platform;
|
|
14
|
+
const arch = process.arch === "x64" ? "amd64" : process.arch === "ia32" ? "386" : process.arch;
|
|
15
|
+
return `antigravity/${version} ${os}/${arch}`;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function antigravityModelId(model: string): string {
|
|
19
|
+
return model.replace(/^antigravity\//, "");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function projectIdFor(credential: Credential): string | undefined {
|
|
23
|
+
if (credential.kind === "oauth" && credential.projectId) return credential.projectId;
|
|
24
|
+
return process.env.GOOGLE_CLOUD_PROJECT || process.env.GOOGLE_CLOUD_PROJECT_ID || undefined;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// In-process cache of lazily discovered project ids, keyed by access token so a
|
|
28
|
+
// rotated token re-discovers. Persisted to the stored gemini OAuth record too,
|
|
29
|
+
// so the next process start skips discovery entirely.
|
|
30
|
+
const discoveredProjects = new Map<string, string>();
|
|
31
|
+
|
|
32
|
+
export interface ResolveProjectOptions {
|
|
33
|
+
discover?: (accessToken: string) => Promise<string>;
|
|
34
|
+
persist?: (projectId: string) => Promise<void>;
|
|
35
|
+
/** Turn abort — threaded into discovery fetches so a stalled loadCodeAssist
|
|
36
|
+
* can never hang the first OAuth turn forever (round-5 #2). */
|
|
37
|
+
signal?: AbortSignal;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Resolve the Cloud Code Assist project id for an Antigravity call:
|
|
42
|
+
* stored credential → env → lazy loadCodeAssist/onboardUser discovery (gjc parity).
|
|
43
|
+
* A discovered id is persisted onto the stored gemini OAuth record so users who
|
|
44
|
+
* logged in before discovery existed are healed without re-login.
|
|
45
|
+
*/
|
|
46
|
+
export async function resolveAntigravityProjectId(
|
|
47
|
+
credential: Credential,
|
|
48
|
+
opts: ResolveProjectOptions = {},
|
|
49
|
+
): Promise<string> {
|
|
50
|
+
if (credential.kind !== "oauth") {
|
|
51
|
+
throw new Error("Antigravity provider requires Google/Gemini CLI OAuth credentials. Run `jeo auth login gemini`.");
|
|
52
|
+
}
|
|
53
|
+
const direct = projectIdFor(credential);
|
|
54
|
+
if (direct) return direct;
|
|
55
|
+
const cached = discoveredProjects.get(credential.token);
|
|
56
|
+
if (cached) return cached;
|
|
57
|
+
|
|
58
|
+
const discover = opts.discover ?? (async (token: string) => {
|
|
59
|
+
const { discoverGoogleProjectId, ANTIGRAVITY_DISCOVERY_METADATA } = await import("../../auth/flows/google-project");
|
|
60
|
+
// Antigravity-client tokens discover with ANTIGRAVITY metadata; gemini-cli
|
|
61
|
+
// tokens use the default gemini-cli metadata shape.
|
|
62
|
+
return discoverGoogleProjectId(token, credential.provider === "antigravity"
|
|
63
|
+
? { metadata: { ...ANTIGRAVITY_DISCOVERY_METADATA }, extraHeaders: { "User-Agent": getAntigravityUserAgent() }, signal: opts.signal }
|
|
64
|
+
: { signal: opts.signal });
|
|
65
|
+
});
|
|
66
|
+
let projectId: string;
|
|
67
|
+
try {
|
|
68
|
+
projectId = await discover(credential.token);
|
|
69
|
+
} catch (err) {
|
|
70
|
+
throw new Error(
|
|
71
|
+
`Antigravity project auto-discovery failed: ${(err as Error)?.message ?? err}. ` +
|
|
72
|
+
"Set GOOGLE_CLOUD_PROJECT(_ID) or re-run `jeo auth login gemini`.",
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
discoveredProjects.set(credential.token, projectId);
|
|
76
|
+
|
|
77
|
+
const persist = opts.persist ?? (async (id: string) => {
|
|
78
|
+
const { getStoredOAuth, setOauthCredential } = await import("../../auth/storage");
|
|
79
|
+
const owner = credential.provider === "antigravity" ? "antigravity" : "gemini";
|
|
80
|
+
const stored = await getStoredOAuth(owner);
|
|
81
|
+
if (stored && !stored.projectId) await setOauthCredential(owner, { ...stored, projectId: id });
|
|
82
|
+
});
|
|
83
|
+
try {
|
|
84
|
+
await persist(projectId);
|
|
85
|
+
} catch {
|
|
86
|
+
// Persistence is best-effort; the in-process cache still serves this session.
|
|
87
|
+
}
|
|
88
|
+
return projectId;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
type CcaPart = { text: string } | { inlineData: { mimeType: string; data: string } };
|
|
92
|
+
|
|
93
|
+
function antigravityContents(messages: Message[]): { role: "user" | "model"; parts: CcaPart[] }[] {
|
|
94
|
+
const contents: { role: "user" | "model"; parts: CcaPart[] }[] = [];
|
|
95
|
+
for (const m of messages) {
|
|
96
|
+
if (m.role === "system") continue;
|
|
97
|
+
const role = m.role === "assistant" ? "model" : "user";
|
|
98
|
+
// Clipboard-pasted images become inlineData parts alongside the text part.
|
|
99
|
+
const parts: CcaPart[] = [
|
|
100
|
+
...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
|
|
101
|
+
{ text: m.content },
|
|
102
|
+
];
|
|
103
|
+
const prev = contents[contents.length - 1];
|
|
104
|
+
if (prev && prev.role === role) prev.parts.push(...parts);
|
|
105
|
+
else contents.push({ role, parts });
|
|
106
|
+
}
|
|
107
|
+
return contents;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function sessionId(messages: Message[]): string {
|
|
111
|
+
const first = messages.find(m => m.role === "user")?.content ?? `${Date.now()}`;
|
|
112
|
+
let hash = 0n;
|
|
113
|
+
for (const ch of new TextEncoder().encode(first)) hash = (hash * 131n + BigInt(ch)) & ((1n << 63n) - 1n);
|
|
114
|
+
return `-${hash.toString()}`;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export function antigravityRequest(messages: Message[], options: CallOptions, credential: Credential, endpoint = ANTIGRAVITY_DAILY_ENDPOINT, projectId?: string): { url: string; headers: Record<string, string>; body: string } {
|
|
118
|
+
if (credential.kind !== "oauth") throw new Error("Antigravity provider requires Google/Gemini CLI OAuth credentials.");
|
|
119
|
+
const project = projectId ?? projectIdFor(credential);
|
|
120
|
+
if (!project) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
"Antigravity needs a Google Cloud projectId and auto-discovery has not run yet. " +
|
|
123
|
+
"Set GOOGLE_CLOUD_PROJECT(_ID) or re-run `jeo auth login gemini`.",
|
|
124
|
+
);
|
|
125
|
+
}
|
|
126
|
+
const model = antigravityModelId(options.model);
|
|
127
|
+
const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
|
|
128
|
+
const generationConfig: Record<string, unknown> = {};
|
|
129
|
+
if (options.temperature !== undefined) generationConfig.temperature = options.temperature;
|
|
130
|
+
// Upstream Antigravity strips maxOutputTokens for non-Claude models; do the same.
|
|
131
|
+
if (model.toLowerCase().includes("claude")) generationConfig.maxOutputTokens = options.maxTokens ?? 4000;
|
|
132
|
+
|
|
133
|
+
const request: Record<string, unknown> = {
|
|
134
|
+
contents: antigravityContents(messages),
|
|
135
|
+
sessionId: sessionId(messages),
|
|
136
|
+
};
|
|
137
|
+
if (systemPrompt) request.systemInstruction = { role: "user", parts: [{ text: systemPrompt }] };
|
|
138
|
+
if (Object.keys(generationConfig).length > 0) request.generationConfig = generationConfig;
|
|
139
|
+
|
|
140
|
+
const body = JSON.stringify({
|
|
141
|
+
project,
|
|
142
|
+
model,
|
|
143
|
+
request,
|
|
144
|
+
requestType: "agent",
|
|
145
|
+
userAgent: "antigravity",
|
|
146
|
+
requestId: `agent-${randomUUID()}`,
|
|
147
|
+
});
|
|
148
|
+
return {
|
|
149
|
+
url: `${endpoint}/v1internal:streamGenerateContent?alt=sse`,
|
|
150
|
+
headers: {
|
|
151
|
+
authorization: `Bearer ${credential.token}`,
|
|
152
|
+
"content-type": "application/json",
|
|
153
|
+
accept: "text/event-stream",
|
|
154
|
+
"User-Agent": getAntigravityUserAgent(),
|
|
155
|
+
},
|
|
156
|
+
body,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
type CcaUsage = { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
|
|
161
|
+
interface CcaChunk {
|
|
162
|
+
response?: {
|
|
163
|
+
candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
|
|
164
|
+
usageMetadata?: CcaUsage;
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function textOf(chunk: CcaChunk): string {
|
|
169
|
+
return chunk.response?.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
async function fetchAntigravity(messages: Message[], options: CallOptions, credential: Credential): Promise<Response> {
|
|
173
|
+
// Resolve the project id up front: stored credential → env → lazy
|
|
174
|
+
// loadCodeAssist/onboardUser discovery (persisted for future sessions).
|
|
175
|
+
const projectId = await resolveAntigravityProjectId(credential, { signal: options.signal });
|
|
176
|
+
let last: Response | undefined;
|
|
177
|
+
for (const endpoint of ENDPOINTS) {
|
|
178
|
+
const { url, headers, body } = antigravityRequest(messages, options, credential, endpoint, projectId);
|
|
179
|
+
const res = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
180
|
+
if (res.ok) return res;
|
|
181
|
+
last = res;
|
|
182
|
+
if (res.status !== 404 && res.status !== 503) break;
|
|
183
|
+
}
|
|
184
|
+
throw await providerHttpError("Antigravity", last!);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
export const antigravityAdapter: ProviderAdapter = {
|
|
188
|
+
name: "antigravity",
|
|
189
|
+
async call(messages, options, credential) {
|
|
190
|
+
const response = await fetchAntigravity(messages, options, credential);
|
|
191
|
+
if (!response.body) return "";
|
|
192
|
+
let out = "";
|
|
193
|
+
let usage: CcaUsage | undefined;
|
|
194
|
+
for await (const data of readSse(response.body)) {
|
|
195
|
+
let chunk: CcaChunk;
|
|
196
|
+
try { chunk = JSON.parse(data); } catch { continue; }
|
|
197
|
+
out += textOf(chunk);
|
|
198
|
+
if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
|
|
199
|
+
}
|
|
200
|
+
if (usage) options.onUsage?.({ inputTokens: usage.promptTokenCount, outputTokens: (usage.candidatesTokenCount ?? 0) + (usage.thoughtsTokenCount ?? 0) });
|
|
201
|
+
if (!out) throw new Error("Antigravity Cloud Code Assist returned an empty response.");
|
|
202
|
+
return out;
|
|
203
|
+
},
|
|
204
|
+
async *stream(messages, options, credential) {
|
|
205
|
+
const response = await fetchAntigravity(messages, options, credential);
|
|
206
|
+
if (!response.body) return;
|
|
207
|
+
let yielded = false;
|
|
208
|
+
let usage: CcaUsage | undefined;
|
|
209
|
+
for await (const data of readSse(response.body)) {
|
|
210
|
+
let chunk: CcaChunk;
|
|
211
|
+
try { chunk = JSON.parse(data); } catch { continue; }
|
|
212
|
+
const delta = textOf(chunk);
|
|
213
|
+
if (delta) { yielded = true; yield delta; }
|
|
214
|
+
if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
|
|
215
|
+
}
|
|
216
|
+
if (usage) options.onUsage?.({ inputTokens: usage.promptTokenCount, outputTokens: (usage.candidatesTokenCount ?? 0) + (usage.thoughtsTokenCount ?? 0) });
|
|
217
|
+
if (!yielded) throw new Error("Antigravity Cloud Code Assist returned an empty response.");
|
|
218
|
+
},
|
|
219
|
+
};
|
|
@@ -36,6 +36,19 @@ export function parseRetryAfter(value: string | null | undefined): number | unde
|
|
|
36
36
|
return undefined;
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
+
/**
|
|
40
|
+
* Extract a server-directed retry delay from a 429/503 response *body* (in ms).
|
|
41
|
+
* Some providers (notably Google/Gemini) omit the `Retry-After` header and instead
|
|
42
|
+
* put the hint in the JSON, e.g. `"retryDelay": "8s"` or `"Please retry in 8.6s"`.
|
|
43
|
+
*/
|
|
44
|
+
export function parseRetryFromBody(detail: string | null | undefined): number | undefined {
|
|
45
|
+
if (!detail) return undefined;
|
|
46
|
+
const m = detail.match(/"retryDelay"\s*:\s*"?([\d.]+)s/i) || detail.match(/retry in ([\d.]+)\s*s/i);
|
|
47
|
+
if (!m) return undefined;
|
|
48
|
+
const s = Number(m[1]);
|
|
49
|
+
return Number.isFinite(s) ? Math.max(0, s * 1000) : undefined;
|
|
50
|
+
}
|
|
51
|
+
|
|
39
52
|
/**
|
|
40
53
|
* Build a {@link ProviderHttpError} from a non-ok `Response`, capturing the body
|
|
41
54
|
* and any `Retry-After`. Use at every adapter's `!response.ok` site so the retry
|
|
@@ -43,5 +56,6 @@ export function parseRetryAfter(value: string | null | undefined): number | unde
|
|
|
43
56
|
*/
|
|
44
57
|
export async function providerHttpError(provider: string, response: Response, context?: string): Promise<ProviderHttpError> {
|
|
45
58
|
const detail = await response.text().catch(() => "");
|
|
46
|
-
|
|
59
|
+
const retryAfterMs = parseRetryAfter(response.headers.get("retry-after")) ?? parseRetryFromBody(detail);
|
|
60
|
+
return new ProviderHttpError(provider, response.status, detail, context, retryAfterMs);
|
|
47
61
|
}
|
|
@@ -2,31 +2,83 @@ import type { Credential } from "../../auth";
|
|
|
2
2
|
import type { CallOptions, Message, ProviderAdapter } from "../types";
|
|
3
3
|
import { readSse } from "../sse";
|
|
4
4
|
import { providerHttpError } from "./errors";
|
|
5
|
+
import { jeoEnv } from "../../util/env";
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
/** Gemini 2.5+/latest models think by default and BILL thought tokens against
|
|
8
|
+
* `maxOutputTokens` — a small-budget call can burn its entire budget on thoughts
|
|
9
|
+
* and return MAX_TOKENS with zero text (observed live on `gemini-flash-latest`).
|
|
10
|
+
* Pin an explicit budget: off unless reasoning was requested. Pro-class models
|
|
11
|
+
* cannot disable thinking (API minimum 128), so they keep a floor instead of 0.
|
|
12
|
+
* Older models (1.5/2.0) reject `thinkingConfig` entirely → undefined (omit). */
|
|
13
|
+
export function geminiThinkingBudget(model: string, effort?: CallOptions["reasoningEffort"], maxTokens?: number): number | undefined {
|
|
14
|
+
const m = model.toLowerCase();
|
|
15
|
+
const thinkingCapable = /gemini-(2\.5|[3-9])|flash-latest|pro-latest/.test(m);
|
|
16
|
+
if (!thinkingCapable) return undefined;
|
|
17
|
+
const floor = m.includes("pro") ? 128 : 0; // pro-class cannot fully disable thinking
|
|
18
|
+
let budget: number;
|
|
19
|
+
switch (effort) {
|
|
20
|
+
case "low": budget = 1024; break;
|
|
21
|
+
case "medium": budget = 4096; break;
|
|
22
|
+
case "high": budget = 8192; break;
|
|
23
|
+
case "minimal":
|
|
24
|
+
default: budget = floor;
|
|
25
|
+
}
|
|
26
|
+
// Thought tokens bill against maxOutputTokens: keep at least ~1K of the output
|
|
27
|
+
// budget for visible text, or thinking starves the reply to an empty MAX_TOKENS.
|
|
28
|
+
if (typeof maxTokens === "number") budget = Math.min(budget, Math.max(floor, maxTokens - 1024));
|
|
29
|
+
return budget;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Shared Gemini request payload (contents + generationConfig + systemInstruction)
|
|
33
|
+
* used by BOTH the public generativelanguage path (API key) and the Cloud Code
|
|
34
|
+
* Assist path (OAuth) — only the envelope/endpoint differs. */
|
|
35
|
+
export function buildGeminiPayload(messages: Message[], options: CallOptions): { geminiModel: string; payload: Record<string, unknown> } {
|
|
36
|
+
const resolvedModel = options.model.replace(/^(google|gemini)\//, "");
|
|
8
37
|
let geminiModel = resolvedModel;
|
|
9
38
|
if (!geminiModel || geminiModel === "claude-3-5-sonnet") geminiModel = "gemini-2.0-flash";
|
|
10
39
|
|
|
11
40
|
const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
41
|
+
// Gemini requires strictly ALTERNATING user/model turns. jeo histories can carry
|
|
42
|
+
// consecutive same-role messages (a compaction summary prepended before a tool-result,
|
|
43
|
+
// back-to-back tool results, etc.), so coalesce adjacent same-role turns into one
|
|
44
|
+
// content block — otherwise the API rejects the request mid-session.
|
|
45
|
+
const contents: { role: string; parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] }[] = [];
|
|
46
|
+
for (const m of messages) {
|
|
47
|
+
if (m.role === "system") continue;
|
|
48
|
+
const role = m.role === "assistant" ? "model" : "user";
|
|
49
|
+
// Clipboard-pasted images become inlineData parts alongside the text part.
|
|
50
|
+
const parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] = [
|
|
51
|
+
...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
|
|
52
|
+
{ text: m.content },
|
|
53
|
+
];
|
|
54
|
+
const prev = contents[contents.length - 1];
|
|
55
|
+
if (prev && prev.role === role) {
|
|
56
|
+
prev.parts.push(...parts);
|
|
57
|
+
} else {
|
|
58
|
+
contents.push({ role, parts });
|
|
59
|
+
}
|
|
60
|
+
}
|
|
15
61
|
|
|
16
62
|
const generationConfig: Record<string, unknown> = {
|
|
17
63
|
temperature: options.temperature ?? 0.2,
|
|
18
64
|
maxOutputTokens: options.maxTokens ?? 4000,
|
|
19
65
|
};
|
|
20
66
|
if (options.jsonMode) generationConfig.responseMimeType = "application/json";
|
|
67
|
+
const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
|
|
68
|
+
if (thinkingBudget !== undefined) generationConfig.thinkingConfig = { thinkingBudget };
|
|
21
69
|
|
|
22
70
|
const payload: Record<string, unknown> = { contents, generationConfig };
|
|
23
71
|
if (systemPrompt) payload.systemInstruction = { parts: [{ text: systemPrompt }] };
|
|
72
|
+
return { geminiModel, payload };
|
|
73
|
+
}
|
|
24
74
|
|
|
75
|
+
export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent"): { url: string; headers: Record<string, string>; body: string } {
|
|
76
|
+
const { geminiModel, payload } = buildGeminiPayload(messages, options);
|
|
25
77
|
const oauth = credential.kind === "oauth" ? credential.token : undefined;
|
|
26
78
|
const apiKey = credential.kind === "api_key" ? credential.token : undefined;
|
|
27
|
-
let url = `https://generativelanguage.googleapis.com/v1beta/models/${geminiModel}:${action}`;
|
|
79
|
+
let url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(geminiModel)}:${action}`;
|
|
28
80
|
const query = action === "streamGenerateContent" ? "alt=sse" : "";
|
|
29
|
-
if (!oauth) url += `?${query ? query + "&" : ""}key=${apiKey ?? ""}`;
|
|
81
|
+
if (!oauth) url += `?${query ? query + "&" : ""}key=${encodeURIComponent(apiKey ?? "")}`;
|
|
30
82
|
else if (query) url += `?${query}`;
|
|
31
83
|
const headers: Record<string, string> = oauth
|
|
32
84
|
? { "content-type": "application/json", authorization: `Bearer ${oauth}` }
|
|
@@ -34,18 +86,125 @@ function geminiRequest(messages: Message[], options: CallOptions, credential: Cr
|
|
|
34
86
|
return { url, headers, body: JSON.stringify(payload) };
|
|
35
87
|
}
|
|
36
88
|
|
|
89
|
+
const CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com";
|
|
90
|
+
|
|
91
|
+
/** gemini-cli identification headers Cloud Code Assist expects (gjc parity). */
|
|
92
|
+
export function getGeminiCliHeaders(modelId?: string): Record<string, string> {
|
|
93
|
+
const version = jeoEnv("GEMINI_CLI_VERSION") || "0.45.2";
|
|
94
|
+
return {
|
|
95
|
+
"User-Agent": `GeminiCLI/${version}/${modelId ?? "gemini-2.5-flash"} (${process.platform}; ${process.arch}; terminal)`,
|
|
96
|
+
"Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI",
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Cloud Code Assist request for a Google OAuth (gemini-cli) credential — the
|
|
102
|
+
* gemini-cli/gjc call path. OAuth tokens carry cloud-platform scope and target
|
|
103
|
+
* cloudcode-pa.googleapis.com, NOT the public generativelanguage API, so a
|
|
104
|
+
* plain `jeo auth login gemini` works without any GEMINI_API_KEY. The body
|
|
105
|
+
* wraps the standard payload as `{ project, model, request }`.
|
|
106
|
+
*/
|
|
107
|
+
export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string): { url: string; headers: Record<string, string>; body: string } {
|
|
108
|
+
const { geminiModel, payload } = buildGeminiPayload(messages, options);
|
|
109
|
+
return {
|
|
110
|
+
url: `${CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse`,
|
|
111
|
+
headers: {
|
|
112
|
+
authorization: `Bearer ${accessToken}`,
|
|
113
|
+
"content-type": "application/json",
|
|
114
|
+
accept: "text/event-stream",
|
|
115
|
+
...getGeminiCliHeaders(geminiModel),
|
|
116
|
+
},
|
|
117
|
+
body: JSON.stringify({ project: projectId, model: geminiModel, request: payload }),
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
37
121
|
interface GeminiChunk {
|
|
38
|
-
candidates?: { content?: { parts?: { text?: string }[] } }[];
|
|
39
|
-
|
|
122
|
+
candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
|
|
123
|
+
promptFeedback?: { blockReason?: string };
|
|
124
|
+
usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/** Cloud Code Assist wraps each standard chunk under `response`. */
|
|
128
|
+
interface CcaChunk {
|
|
129
|
+
response?: GeminiChunk;
|
|
40
130
|
}
|
|
41
131
|
|
|
42
132
|
function textOf(chunk: GeminiChunk): string {
|
|
43
133
|
return chunk.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
|
|
44
134
|
}
|
|
45
135
|
|
|
136
|
+
/** When Gemini returns HTTP 200 with no text, surface the real cause (safety block /
|
|
137
|
+
* RECITATION / MAX_TOKENS) instead of a silent empty string that downstream JSON
|
|
138
|
+
* parsing would misreport as "couldn't parse tool call". */
|
|
139
|
+
function blockedReason(chunk: GeminiChunk): string | undefined {
|
|
140
|
+
const block = chunk.promptFeedback?.blockReason;
|
|
141
|
+
if (block) return `blockReason=${block}`;
|
|
142
|
+
const finish = chunk.candidates?.[0]?.finishReason;
|
|
143
|
+
if (finish === "MAX_TOKENS") {
|
|
144
|
+
// Only reached when NO text was produced at all (both call/stream paths guard
|
|
145
|
+
// on emptiness): the output budget was consumed before any visible text —
|
|
146
|
+
// typically thinking tokens on a 2.5+/latest model.
|
|
147
|
+
return "finishReason=MAX_TOKENS — output budget exhausted before any text; raise maxTokens or lower the thinking level";
|
|
148
|
+
}
|
|
149
|
+
if (finish && finish !== "STOP") return `finishReason=${finish}`;
|
|
150
|
+
return undefined;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Cloud Code Assist SSE turn for a Google OAuth credential: resolves the
|
|
155
|
+
* projectId (stored → env → lazy loadCodeAssist/onboardUser discovery), POSTs
|
|
156
|
+
* the gemini-cli request, and yields text deltas. Usage is reported ONCE after
|
|
157
|
+
* the stream (thought tokens count as output, gjc parity). Shared by both
|
|
158
|
+
* `call` (concatenates) and `stream` (yields through).
|
|
159
|
+
*/
|
|
160
|
+
async function* ccaTurn(messages: Message[], options: CallOptions, credential: Credential & { kind: "oauth" }): AsyncGenerator<string> {
|
|
161
|
+
const { resolveAntigravityProjectId } = await import("./antigravity");
|
|
162
|
+
const projectId = await resolveAntigravityProjectId(credential, { signal: options.signal });
|
|
163
|
+
const { url, headers, body } = geminiCliRequest(messages, options, credential.token, projectId);
|
|
164
|
+
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
165
|
+
if (!response.ok) throw await providerHttpError("Gemini (Cloud Code Assist)", response);
|
|
166
|
+
if (!response.body) return;
|
|
167
|
+
let lastUsage: GeminiChunk["usageMetadata"];
|
|
168
|
+
let yieldedAny = false;
|
|
169
|
+
let lastEmptyReason: string | undefined;
|
|
170
|
+
for await (const data of readSse(response.body)) {
|
|
171
|
+
let chunk: CcaChunk;
|
|
172
|
+
try {
|
|
173
|
+
chunk = JSON.parse(data);
|
|
174
|
+
} catch {
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
const inner = chunk.response;
|
|
178
|
+
if (!inner) continue;
|
|
179
|
+
const delta = textOf(inner);
|
|
180
|
+
if (delta) {
|
|
181
|
+
yieldedAny = true;
|
|
182
|
+
yield delta;
|
|
183
|
+
} else {
|
|
184
|
+
lastEmptyReason = blockedReason(inner) ?? lastEmptyReason;
|
|
185
|
+
}
|
|
186
|
+
if (inner.usageMetadata) lastUsage = inner.usageMetadata;
|
|
187
|
+
}
|
|
188
|
+
if (!yieldedAny) {
|
|
189
|
+
throw new Error(`Gemini (Cloud Code Assist) returned no content${lastEmptyReason ? ` (${lastEmptyReason})` : ""}.`);
|
|
190
|
+
}
|
|
191
|
+
if (lastUsage) {
|
|
192
|
+
options.onUsage?.({
|
|
193
|
+
inputTokens: lastUsage.promptTokenCount,
|
|
194
|
+
outputTokens: (lastUsage.candidatesTokenCount ?? 0) + (lastUsage.thoughtsTokenCount ?? 0),
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
46
199
|
export const geminiAdapter: ProviderAdapter = {
|
|
47
200
|
name: "gemini",
|
|
48
201
|
async call(messages, options, credential) {
|
|
202
|
+
// OAuth (gemini-cli login) → Cloud Code Assist; no GEMINI_API_KEY required.
|
|
203
|
+
if (credential.kind === "oauth") {
|
|
204
|
+
let out = "";
|
|
205
|
+
for await (const delta of ccaTurn(messages, options, credential)) out += delta;
|
|
206
|
+
return out;
|
|
207
|
+
}
|
|
49
208
|
const { url, headers, body } = geminiRequest(messages, options, credential, "generateContent");
|
|
50
209
|
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
51
210
|
if (!response.ok) throw await providerHttpError("Gemini", response);
|
|
@@ -53,13 +212,26 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
53
212
|
if (result.usageMetadata) {
|
|
54
213
|
options.onUsage?.({ inputTokens: result.usageMetadata.promptTokenCount, outputTokens: result.usageMetadata.candidatesTokenCount });
|
|
55
214
|
}
|
|
56
|
-
|
|
215
|
+
const text = textOf(result);
|
|
216
|
+
if (!text) {
|
|
217
|
+
const reason = blockedReason(result);
|
|
218
|
+
if (reason) throw new Error(`Gemini returned no content (${reason}).`);
|
|
219
|
+
}
|
|
220
|
+
return text;
|
|
57
221
|
},
|
|
58
222
|
async *stream(messages, options, credential) {
|
|
223
|
+
// OAuth (gemini-cli login) → Cloud Code Assist; no GEMINI_API_KEY required.
|
|
224
|
+
if (credential.kind === "oauth") {
|
|
225
|
+
yield* ccaTurn(messages, options, credential);
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
59
228
|
const { url, headers, body } = geminiRequest(messages, options, credential, "streamGenerateContent");
|
|
60
229
|
const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
|
|
61
230
|
if (!response.ok) throw await providerHttpError("Gemini", response, "(stream)");
|
|
62
231
|
if (!response.body) return;
|
|
232
|
+
let lastUsage: GeminiChunk["usageMetadata"];
|
|
233
|
+
let yieldedAny = false;
|
|
234
|
+
let lastEmptyReason: string | undefined;
|
|
63
235
|
for await (const data of readSse(response.body)) {
|
|
64
236
|
let chunk: GeminiChunk;
|
|
65
237
|
try {
|
|
@@ -68,10 +240,21 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
68
240
|
continue;
|
|
69
241
|
}
|
|
70
242
|
const delta = textOf(chunk);
|
|
71
|
-
if (delta)
|
|
72
|
-
|
|
73
|
-
|
|
243
|
+
if (delta) {
|
|
244
|
+
yieldedAny = true;
|
|
245
|
+
yield delta;
|
|
246
|
+
} else {
|
|
247
|
+
lastEmptyReason = blockedReason(chunk) ?? lastEmptyReason;
|
|
74
248
|
}
|
|
249
|
+
// Gemini emits cumulative usageMetadata on most chunks; capture the last and
|
|
250
|
+
// report ONCE after the stream so an accumulating sink can't over-count.
|
|
251
|
+
if (chunk.usageMetadata) lastUsage = chunk.usageMetadata;
|
|
252
|
+
}
|
|
253
|
+
if (!yieldedAny && lastEmptyReason) {
|
|
254
|
+
throw new Error(`Gemini returned no content (${lastEmptyReason}).`);
|
|
255
|
+
}
|
|
256
|
+
if (lastUsage) {
|
|
257
|
+
options.onUsage?.({ inputTokens: lastUsage.promptTokenCount, outputTokens: lastUsage.candidatesTokenCount });
|
|
75
258
|
}
|
|
76
259
|
},
|
|
77
260
|
};
|
|
@@ -2,13 +2,26 @@ import type { CallOptions, Message, ProviderAdapter } from "../types";
|
|
|
2
2
|
import { readLines } from "../sse";
|
|
3
3
|
import { providerHttpError } from "./errors";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Resolve the Ollama base URL. `OLLAMA_HOST` is documented as a bare host:port
|
|
7
|
+
* (e.g. `127.0.0.1:11434`), but `fetch` needs a scheme — prepend `http://` when
|
|
8
|
+
* missing, else `fetch("127.0.0.1:11434/api/chat")` throws "Failed to parse URL".
|
|
9
|
+
*/
|
|
10
|
+
export function normalizeOllamaBaseUrl(baseUrl?: string): string {
|
|
11
|
+
const v = (baseUrl ?? process.env.OLLAMA_HOST ?? "http://localhost:11434").trim();
|
|
12
|
+
return (/^https?:\/\//i.test(v) ? v : `http://${v}`).replace(/\/$/, "");
|
|
13
|
+
}
|
|
14
|
+
|
|
5
15
|
function ollamaRequest(messages: Message[], options: CallOptions, stream: boolean): { url: string; body: string } {
|
|
6
16
|
const model = options.model.startsWith("ollama/") ? options.model.slice(7) : options.model;
|
|
7
17
|
const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
|
|
8
|
-
const chatMessages: { role: string; content: string }[] = [];
|
|
18
|
+
const chatMessages: { role: string; content: string; images?: string[] }[] = [];
|
|
9
19
|
if (systemPrompt) chatMessages.push({ role: "system", content: systemPrompt });
|
|
10
20
|
for (const msg of messages) {
|
|
11
|
-
if (msg.role
|
|
21
|
+
if (msg.role === "system") continue;
|
|
22
|
+
// Ollama multimodal models take raw base64 strings in a sibling `images` array.
|
|
23
|
+
if (msg.images?.length) chatMessages.push({ role: msg.role, content: msg.content, images: msg.images.map(i => i.data) });
|
|
24
|
+
else chatMessages.push({ role: msg.role, content: msg.content });
|
|
12
25
|
}
|
|
13
26
|
const payload: Record<string, unknown> = {
|
|
14
27
|
model,
|
|
@@ -17,38 +30,55 @@ function ollamaRequest(messages: Message[], options: CallOptions, stream: boolea
|
|
|
17
30
|
options: { temperature: options.temperature ?? 0.2, num_predict: options.maxTokens ?? 4000 },
|
|
18
31
|
};
|
|
19
32
|
if (options.jsonMode) payload.format = "json";
|
|
20
|
-
const base = (options.baseUrl
|
|
33
|
+
const base = normalizeOllamaBaseUrl(options.baseUrl);
|
|
21
34
|
return { url: `${base}/api/chat`, body: JSON.stringify(payload) };
|
|
22
35
|
}
|
|
23
36
|
|
|
37
|
+
/** Round-5 #1: surface done_reason when a 200 carries no text (uniform contract). */
|
|
38
|
+
function emptyCompletionError(doneReason: string | undefined): Error {
|
|
39
|
+
const hint = doneReason === "length"
|
|
40
|
+
? " — output budget exhausted before any text; raise maxTokens"
|
|
41
|
+
: "";
|
|
42
|
+
return new Error(`Ollama returned no content${doneReason ? ` (done_reason=${doneReason})` : ""}${hint}.`);
|
|
43
|
+
}
|
|
44
|
+
|
|
24
45
|
export const ollamaAdapter: ProviderAdapter = {
|
|
25
46
|
name: "ollama",
|
|
26
47
|
async call(messages, options) {
|
|
27
48
|
const { url, body } = ollamaRequest(messages, options, false);
|
|
28
49
|
const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json" }, body, signal: options.signal });
|
|
29
50
|
if (!response.ok) throw await providerHttpError("Ollama", response, `at ${url}`);
|
|
30
|
-
const result = (await response.json()) as { message?: { content?: string }; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
|
|
51
|
+
const result = (await response.json()) as { message?: { content?: string }; done_reason?: string; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
|
|
31
52
|
options.onUsage?.({ inputTokens: result.prompt_eval_count, outputTokens: result.eval_count, durationMs: result.total_duration ? Math.round(result.total_duration / 1e6) : undefined });
|
|
32
|
-
|
|
53
|
+
const text = result.message?.content ?? "";
|
|
54
|
+
if (!text) throw emptyCompletionError(result.done_reason);
|
|
55
|
+
return text;
|
|
33
56
|
},
|
|
34
57
|
async *stream(messages, options) {
|
|
35
58
|
const { url, body } = ollamaRequest(messages, options, true);
|
|
36
59
|
const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json" }, body, signal: options.signal });
|
|
37
60
|
if (!response.ok) throw await providerHttpError("Ollama", response, `(stream) at ${url}`);
|
|
38
61
|
if (!response.body) return;
|
|
62
|
+
let yieldedAny = false;
|
|
63
|
+
let doneReason: string | undefined;
|
|
39
64
|
for await (const line of readLines(response.body)) {
|
|
40
|
-
let chunk: { message?: { content?: string }; done?: boolean; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
|
|
65
|
+
let chunk: { message?: { content?: string }; done?: boolean; done_reason?: string; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
|
|
41
66
|
try {
|
|
42
67
|
chunk = JSON.parse(line);
|
|
43
68
|
} catch {
|
|
44
69
|
continue;
|
|
45
70
|
}
|
|
46
71
|
const delta = chunk.message?.content;
|
|
47
|
-
if (delta)
|
|
72
|
+
if (delta) {
|
|
73
|
+
yieldedAny = true;
|
|
74
|
+
yield delta;
|
|
75
|
+
}
|
|
48
76
|
if (chunk.done) {
|
|
77
|
+
if (chunk.done_reason) doneReason = chunk.done_reason;
|
|
49
78
|
options.onUsage?.({ inputTokens: chunk.prompt_eval_count, outputTokens: chunk.eval_count, durationMs: chunk.total_duration ? Math.round(chunk.total_duration / 1e6) : undefined });
|
|
50
79
|
break;
|
|
51
80
|
}
|
|
52
81
|
}
|
|
82
|
+
if (!yieldedAny) throw emptyCompletionError(doneReason);
|
|
53
83
|
},
|
|
54
84
|
};
|