jeo-code 0.1.0 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.ja.md +160 -0
  2. package/README.ko.md +160 -0
  3. package/README.md +115 -297
  4. package/README.zh.md +160 -0
  5. package/package.json +11 -6
  6. package/scripts/install.sh +28 -28
  7. package/scripts/uninstall.sh +17 -15
  8. package/src/AGENTS.md +50 -0
  9. package/src/agent/AGENTS.md +49 -0
  10. package/src/agent/bash-fixups.ts +103 -0
  11. package/src/agent/compaction.ts +410 -19
  12. package/src/agent/config-schema.ts +119 -5
  13. package/src/agent/context-files.ts +314 -17
  14. package/src/agent/dev/AGENTS.md +36 -0
  15. package/src/agent/dev/advanced-analyzer.ts +12 -0
  16. package/src/agent/dev/evolution-bridge.ts +82 -0
  17. package/src/agent/dev/evolution-logger.ts +41 -0
  18. package/src/agent/dev/self-analysis.ts +64 -0
  19. package/src/agent/dev/self-improve.ts +24 -0
  20. package/src/agent/dev/spec-automation.ts +49 -0
  21. package/src/agent/engine.ts +808 -54
  22. package/src/agent/hooks.ts +273 -0
  23. package/src/agent/loop.ts +21 -1
  24. package/src/agent/memory.ts +201 -0
  25. package/src/agent/model-recency.ts +32 -0
  26. package/src/agent/output-minimizer.ts +108 -0
  27. package/src/agent/output-util.ts +64 -0
  28. package/src/agent/plan.ts +187 -0
  29. package/src/agent/seed.ts +52 -0
  30. package/src/agent/session.ts +235 -21
  31. package/src/agent/state.ts +286 -39
  32. package/src/agent/step-budget.ts +232 -0
  33. package/src/agent/subagents.ts +223 -26
  34. package/src/agent/task-tool.ts +272 -0
  35. package/src/agent/todo-tool.ts +87 -0
  36. package/src/agent/tokenizer.ts +117 -0
  37. package/src/agent/tool-registry.ts +54 -0
  38. package/src/agent/tools.ts +624 -103
  39. package/src/agent/web-search.ts +538 -0
  40. package/src/ai/AGENTS.md +44 -0
  41. package/src/ai/index.ts +1 -0
  42. package/src/ai/model-catalog-compat.ts +3 -1
  43. package/src/ai/model-catalog.ts +74 -9
  44. package/src/ai/model-discovery.ts +215 -17
  45. package/src/ai/model-manager.ts +346 -32
  46. package/src/ai/model-picker.ts +1 -1
  47. package/src/ai/model-registry.ts +4 -2
  48. package/src/ai/pricing.ts +84 -0
  49. package/src/ai/provider-registry.ts +23 -0
  50. package/src/ai/provider-status.ts +60 -16
  51. package/src/ai/providers/AGENTS.md +42 -0
  52. package/src/ai/providers/anthropic.ts +250 -31
  53. package/src/ai/providers/antigravity.ts +219 -0
  54. package/src/ai/providers/errors.ts +15 -1
  55. package/src/ai/providers/gemini.ts +196 -13
  56. package/src/ai/providers/ollama.ts +37 -7
  57. package/src/ai/providers/openai-responses.ts +173 -0
  58. package/src/ai/providers/openai.ts +64 -12
  59. package/src/ai/sse.ts +4 -1
  60. package/src/ai/types.ts +18 -1
  61. package/src/auth/AGENTS.md +41 -0
  62. package/src/auth/callback-server.ts +6 -1
  63. package/src/auth/flows/AGENTS.md +32 -0
  64. package/src/auth/flows/antigravity.ts +151 -0
  65. package/src/auth/flows/google-project.ts +190 -0
  66. package/src/auth/flows/google.ts +39 -18
  67. package/src/auth/flows/index.ts +15 -5
  68. package/src/auth/flows/openai.ts +2 -2
  69. package/src/auth/oauth.ts +8 -0
  70. package/src/auth/refresh.ts +44 -27
  71. package/src/auth/storage.ts +149 -26
  72. package/src/auth/types.ts +1 -1
  73. package/src/autopilot.ts +362 -0
  74. package/src/bun-imports.d.ts +4 -0
  75. package/src/cli/AGENTS.md +39 -0
  76. package/src/cli/runner.ts +148 -14
  77. package/src/cli.ts +13 -4
  78. package/src/commands/AGENTS.md +40 -0
  79. package/src/commands/approve.ts +62 -3
  80. package/src/commands/auth.ts +167 -25
  81. package/src/commands/chat.ts +37 -8
  82. package/src/commands/deep-interview.ts +633 -175
  83. package/src/commands/doctor.ts +84 -37
  84. package/src/commands/evolve-core.ts +18 -0
  85. package/src/commands/evolve.ts +2 -1
  86. package/src/commands/export.ts +176 -0
  87. package/src/commands/gjc.ts +52 -0
  88. package/src/commands/launch.ts +3549 -240
  89. package/src/commands/mcp.ts +3 -3
  90. package/src/commands/ooo-seed.ts +19 -0
  91. package/src/commands/ralplan.ts +253 -35
  92. package/src/commands/resume.ts +1 -1
  93. package/src/commands/session.ts +183 -0
  94. package/src/commands/setup-helpers.ts +10 -3
  95. package/src/commands/setup.ts +57 -16
  96. package/src/commands/skills.ts +78 -18
  97. package/src/commands/state.ts +198 -0
  98. package/src/commands/status.ts +84 -0
  99. package/src/commands/team.ts +340 -212
  100. package/src/commands/ultragoal.ts +122 -61
  101. package/src/commands/update.ts +244 -0
  102. package/src/ledger.ts +270 -0
  103. package/src/mcp/AGENTS.md +38 -0
  104. package/src/mcp/server.ts +115 -14
  105. package/src/mcp/tools.ts +42 -22
  106. package/src/md-modules.d.ts +4 -0
  107. package/src/prompts/AGENTS.md +41 -0
  108. package/src/prompts/agents/AGENTS.md +35 -0
  109. package/src/prompts/agents/architect.md +35 -0
  110. package/src/prompts/agents/critic.md +37 -0
  111. package/src/prompts/agents/executor.md +36 -0
  112. package/src/prompts/agents/planner.md +37 -0
  113. package/src/prompts/skills/AGENTS.md +36 -0
  114. package/src/prompts/skills/deep-dive/AGENTS.md +31 -0
  115. package/src/prompts/skills/deep-dive/SKILL.md +13 -0
  116. package/src/prompts/skills/deep-interview/AGENTS.md +31 -0
  117. package/src/prompts/skills/deep-interview/SKILL.md +12 -0
  118. package/src/prompts/skills/gjc/AGENTS.md +31 -0
  119. package/src/prompts/skills/gjc/SKILL.md +15 -0
  120. package/src/prompts/skills/ralplan/AGENTS.md +31 -0
  121. package/src/prompts/skills/ralplan/SKILL.md +11 -0
  122. package/src/prompts/skills/team/AGENTS.md +31 -0
  123. package/src/prompts/skills/team/SKILL.md +11 -0
  124. package/src/prompts/skills/ultragoal/AGENTS.md +31 -0
  125. package/src/prompts/skills/ultragoal/SKILL.md +11 -0
  126. package/src/skills/AGENTS.md +38 -0
  127. package/src/skills/catalog.ts +565 -31
  128. package/src/tui/AGENTS.md +43 -0
  129. package/src/tui/app.ts +1181 -92
  130. package/src/tui/components/AGENTS.md +42 -0
  131. package/src/tui/components/ascii-art.ts +257 -15
  132. package/src/tui/components/autocomplete.ts +98 -16
  133. package/src/tui/components/autopilot-status.ts +65 -0
  134. package/src/tui/components/category-index.ts +49 -0
  135. package/src/tui/components/code-view.ts +54 -11
  136. package/src/tui/components/color.ts +171 -2
  137. package/src/tui/components/config-panel.ts +82 -15
  138. package/src/tui/components/duration.ts +38 -0
  139. package/src/tui/components/evolution.ts +3 -3
  140. package/src/tui/components/footer.ts +91 -42
  141. package/src/tui/components/forge.ts +426 -31
  142. package/src/tui/components/hints.ts +54 -0
  143. package/src/tui/components/hud.ts +73 -0
  144. package/src/tui/components/index.ts +4 -0
  145. package/src/tui/components/input-box.ts +150 -0
  146. package/src/tui/components/layout.ts +11 -3
  147. package/src/tui/components/live-model-picker.ts +108 -0
  148. package/src/tui/components/markdown-table.ts +140 -0
  149. package/src/tui/components/markdown-text.ts +97 -0
  150. package/src/tui/components/meter.ts +4 -1
  151. package/src/tui/components/model-picker.ts +3 -2
  152. package/src/tui/components/provider-picker.ts +3 -2
  153. package/src/tui/components/section.ts +70 -0
  154. package/src/tui/components/select-list.ts +40 -10
  155. package/src/tui/components/skill-picker.ts +25 -0
  156. package/src/tui/components/slash.ts +244 -21
  157. package/src/tui/components/status.ts +272 -11
  158. package/src/tui/components/step-timeline.ts +218 -0
  159. package/src/tui/components/stream.ts +26 -9
  160. package/src/tui/components/themes.ts +212 -6
  161. package/src/tui/components/todo-card.ts +47 -0
  162. package/src/tui/components/tool-list.ts +58 -12
  163. package/src/tui/components/transcript.ts +120 -0
  164. package/src/tui/components/update-box.ts +31 -0
  165. package/src/tui/components/welcome.ts +162 -0
  166. package/src/tui/components/width.ts +163 -0
  167. package/src/tui/monitoring/AGENTS.md +31 -0
  168. package/src/tui/monitoring/hud-view.ts +55 -0
  169. package/src/tui/renderer.ts +112 -3
  170. package/src/tui/terminal.ts +40 -33
  171. package/src/util/AGENTS.md +39 -0
  172. package/src/util/clipboard-image.ts +118 -0
  173. package/src/util/env.ts +12 -0
  174. package/src/util/provider-error.ts +78 -0
  175. package/src/util/retry.ts +91 -6
  176. package/src/util/update-check.ts +64 -0
  177. package/src/commands/models.ts +0 -104
@@ -0,0 +1,219 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import type { Credential } from "../../auth";
3
+ import type { CallOptions, Message, ProviderAdapter } from "../types";
4
+ import { readSse } from "../sse";
5
+ import { providerHttpError } from "./errors";
6
+
7
+ const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
8
+ const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
9
+ const ENDPOINTS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
10
+
11
+ export function getAntigravityUserAgent(): string {
12
+ const version = process.env.PI_AI_ANTIGRAVITY_VERSION || "1.104.0";
13
+ const os = process.platform === "win32" ? "windows" : process.platform;
14
+ const arch = process.arch === "x64" ? "amd64" : process.arch === "ia32" ? "386" : process.arch;
15
+ return `antigravity/${version} ${os}/${arch}`;
16
+ }
17
+
18
+ function antigravityModelId(model: string): string {
19
+ return model.replace(/^antigravity\//, "");
20
+ }
21
+
22
+ function projectIdFor(credential: Credential): string | undefined {
23
+ if (credential.kind === "oauth" && credential.projectId) return credential.projectId;
24
+ return process.env.GOOGLE_CLOUD_PROJECT || process.env.GOOGLE_CLOUD_PROJECT_ID || undefined;
25
+ }
26
+
27
+ // In-process cache of lazily discovered project ids, keyed by access token so a
28
+ // rotated token re-discovers. Persisted to the stored gemini OAuth record too,
29
+ // so the next process start skips discovery entirely.
30
+ const discoveredProjects = new Map<string, string>();
31
+
32
+ export interface ResolveProjectOptions {
33
+ discover?: (accessToken: string) => Promise<string>;
34
+ persist?: (projectId: string) => Promise<void>;
35
+ /** Turn abort — threaded into discovery fetches so a stalled loadCodeAssist
36
+ * can never hang the first OAuth turn forever (round-5 #2). */
37
+ signal?: AbortSignal;
38
+ }
39
+
40
+ /**
41
+ * Resolve the Cloud Code Assist project id for an Antigravity call:
42
+ * stored credential → env → lazy loadCodeAssist/onboardUser discovery (gjc parity).
43
+ * A discovered id is persisted onto the stored gemini OAuth record so users who
44
+ * logged in before discovery existed are healed without re-login.
45
+ */
46
+ export async function resolveAntigravityProjectId(
47
+ credential: Credential,
48
+ opts: ResolveProjectOptions = {},
49
+ ): Promise<string> {
50
+ if (credential.kind !== "oauth") {
51
+ throw new Error("Antigravity provider requires Google/Gemini CLI OAuth credentials. Run `jeo auth login gemini`.");
52
+ }
53
+ const direct = projectIdFor(credential);
54
+ if (direct) return direct;
55
+ const cached = discoveredProjects.get(credential.token);
56
+ if (cached) return cached;
57
+
58
+ const discover = opts.discover ?? (async (token: string) => {
59
+ const { discoverGoogleProjectId, ANTIGRAVITY_DISCOVERY_METADATA } = await import("../../auth/flows/google-project");
60
+ // Antigravity-client tokens discover with ANTIGRAVITY metadata; gemini-cli
61
+ // tokens use the default gemini-cli metadata shape.
62
+ return discoverGoogleProjectId(token, credential.provider === "antigravity"
63
+ ? { metadata: { ...ANTIGRAVITY_DISCOVERY_METADATA }, extraHeaders: { "User-Agent": getAntigravityUserAgent() }, signal: opts.signal }
64
+ : { signal: opts.signal });
65
+ });
66
+ let projectId: string;
67
+ try {
68
+ projectId = await discover(credential.token);
69
+ } catch (err) {
70
+ throw new Error(
71
+ `Antigravity project auto-discovery failed: ${(err as Error)?.message ?? err}. ` +
72
+ "Set GOOGLE_CLOUD_PROJECT(_ID) or re-run `jeo auth login gemini`.",
73
+ );
74
+ }
75
+ discoveredProjects.set(credential.token, projectId);
76
+
77
+ const persist = opts.persist ?? (async (id: string) => {
78
+ const { getStoredOAuth, setOauthCredential } = await import("../../auth/storage");
79
+ const owner = credential.provider === "antigravity" ? "antigravity" : "gemini";
80
+ const stored = await getStoredOAuth(owner);
81
+ if (stored && !stored.projectId) await setOauthCredential(owner, { ...stored, projectId: id });
82
+ });
83
+ try {
84
+ await persist(projectId);
85
+ } catch {
86
+ // Persistence is best-effort; the in-process cache still serves this session.
87
+ }
88
+ return projectId;
89
+ }
90
+
91
+ type CcaPart = { text: string } | { inlineData: { mimeType: string; data: string } };
92
+
93
+ function antigravityContents(messages: Message[]): { role: "user" | "model"; parts: CcaPart[] }[] {
94
+ const contents: { role: "user" | "model"; parts: CcaPart[] }[] = [];
95
+ for (const m of messages) {
96
+ if (m.role === "system") continue;
97
+ const role = m.role === "assistant" ? "model" : "user";
98
+ // Clipboard-pasted images become inlineData parts alongside the text part.
99
+ const parts: CcaPart[] = [
100
+ ...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
101
+ { text: m.content },
102
+ ];
103
+ const prev = contents[contents.length - 1];
104
+ if (prev && prev.role === role) prev.parts.push(...parts);
105
+ else contents.push({ role, parts });
106
+ }
107
+ return contents;
108
+ }
109
+
110
+ function sessionId(messages: Message[]): string {
111
+ const first = messages.find(m => m.role === "user")?.content ?? `${Date.now()}`;
112
+ let hash = 0n;
113
+ for (const ch of new TextEncoder().encode(first)) hash = (hash * 131n + BigInt(ch)) & ((1n << 63n) - 1n);
114
+ return `-${hash.toString()}`;
115
+ }
116
+
117
+ export function antigravityRequest(messages: Message[], options: CallOptions, credential: Credential, endpoint = ANTIGRAVITY_DAILY_ENDPOINT, projectId?: string): { url: string; headers: Record<string, string>; body: string } {
118
+ if (credential.kind !== "oauth") throw new Error("Antigravity provider requires Google/Gemini CLI OAuth credentials.");
119
+ const project = projectId ?? projectIdFor(credential);
120
+ if (!project) {
121
+ throw new Error(
122
+ "Antigravity needs a Google Cloud projectId and auto-discovery has not run yet. " +
123
+ "Set GOOGLE_CLOUD_PROJECT(_ID) or re-run `jeo auth login gemini`.",
124
+ );
125
+ }
126
+ const model = antigravityModelId(options.model);
127
+ const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
128
+ const generationConfig: Record<string, unknown> = {};
129
+ if (options.temperature !== undefined) generationConfig.temperature = options.temperature;
130
+ // Upstream Antigravity strips maxOutputTokens for non-Claude models; do the same.
131
+ if (model.toLowerCase().includes("claude")) generationConfig.maxOutputTokens = options.maxTokens ?? 4000;
132
+
133
+ const request: Record<string, unknown> = {
134
+ contents: antigravityContents(messages),
135
+ sessionId: sessionId(messages),
136
+ };
137
+ if (systemPrompt) request.systemInstruction = { role: "user", parts: [{ text: systemPrompt }] };
138
+ if (Object.keys(generationConfig).length > 0) request.generationConfig = generationConfig;
139
+
140
+ const body = JSON.stringify({
141
+ project,
142
+ model,
143
+ request,
144
+ requestType: "agent",
145
+ userAgent: "antigravity",
146
+ requestId: `agent-${randomUUID()}`,
147
+ });
148
+ return {
149
+ url: `${endpoint}/v1internal:streamGenerateContent?alt=sse`,
150
+ headers: {
151
+ authorization: `Bearer ${credential.token}`,
152
+ "content-type": "application/json",
153
+ accept: "text/event-stream",
154
+ "User-Agent": getAntigravityUserAgent(),
155
+ },
156
+ body,
157
+ };
158
+ }
159
+
160
+ type CcaUsage = { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
161
+ interface CcaChunk {
162
+ response?: {
163
+ candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
164
+ usageMetadata?: CcaUsage;
165
+ };
166
+ }
167
+
168
+ function textOf(chunk: CcaChunk): string {
169
+ return chunk.response?.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
170
+ }
171
+
172
+ async function fetchAntigravity(messages: Message[], options: CallOptions, credential: Credential): Promise<Response> {
173
+ // Resolve the project id up front: stored credential → env → lazy
174
+ // loadCodeAssist/onboardUser discovery (persisted for future sessions).
175
+ const projectId = await resolveAntigravityProjectId(credential, { signal: options.signal });
176
+ let last: Response | undefined;
177
+ for (const endpoint of ENDPOINTS) {
178
+ const { url, headers, body } = antigravityRequest(messages, options, credential, endpoint, projectId);
179
+ const res = await fetch(url, { method: "POST", headers, body, signal: options.signal });
180
+ if (res.ok) return res;
181
+ last = res;
182
+ if (res.status !== 404 && res.status !== 503) break;
183
+ }
184
+ throw await providerHttpError("Antigravity", last!);
185
+ }
186
+
187
+ export const antigravityAdapter: ProviderAdapter = {
188
+ name: "antigravity",
189
+ async call(messages, options, credential) {
190
+ const response = await fetchAntigravity(messages, options, credential);
191
+ if (!response.body) return "";
192
+ let out = "";
193
+ let usage: CcaUsage | undefined;
194
+ for await (const data of readSse(response.body)) {
195
+ let chunk: CcaChunk;
196
+ try { chunk = JSON.parse(data); } catch { continue; }
197
+ out += textOf(chunk);
198
+ if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
199
+ }
200
+ if (usage) options.onUsage?.({ inputTokens: usage.promptTokenCount, outputTokens: (usage.candidatesTokenCount ?? 0) + (usage.thoughtsTokenCount ?? 0) });
201
+ if (!out) throw new Error("Antigravity Cloud Code Assist returned an empty response.");
202
+ return out;
203
+ },
204
+ async *stream(messages, options, credential) {
205
+ const response = await fetchAntigravity(messages, options, credential);
206
+ if (!response.body) return;
207
+ let yielded = false;
208
+ let usage: CcaUsage | undefined;
209
+ for await (const data of readSse(response.body)) {
210
+ let chunk: CcaChunk;
211
+ try { chunk = JSON.parse(data); } catch { continue; }
212
+ const delta = textOf(chunk);
213
+ if (delta) { yielded = true; yield delta; }
214
+ if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
215
+ }
216
+ if (usage) options.onUsage?.({ inputTokens: usage.promptTokenCount, outputTokens: (usage.candidatesTokenCount ?? 0) + (usage.thoughtsTokenCount ?? 0) });
217
+ if (!yielded) throw new Error("Antigravity Cloud Code Assist returned an empty response.");
218
+ },
219
+ };
@@ -36,6 +36,19 @@ export function parseRetryAfter(value: string | null | undefined): number | unde
36
36
  return undefined;
37
37
  }
38
38
 
39
+ /**
40
+ * Extract a server-directed retry delay from a 429/503 response *body* (in ms).
41
+ * Some providers (notably Google/Gemini) omit the `Retry-After` header and instead
42
+ * put the hint in the JSON, e.g. `"retryDelay": "8s"` or `"Please retry in 8.6s"`.
43
+ */
44
+ export function parseRetryFromBody(detail: string | null | undefined): number | undefined {
45
+ if (!detail) return undefined;
46
+ const m = detail.match(/"retryDelay"\s*:\s*"?([\d.]+)s/i) || detail.match(/retry in ([\d.]+)\s*s/i);
47
+ if (!m) return undefined;
48
+ const s = Number(m[1]);
49
+ return Number.isFinite(s) ? Math.max(0, s * 1000) : undefined;
50
+ }
51
+
39
52
  /**
40
53
  * Build a {@link ProviderHttpError} from a non-ok `Response`, capturing the body
41
54
  * and any `Retry-After`. Use at every adapter's `!response.ok` site so the retry
@@ -43,5 +56,6 @@ export function parseRetryAfter(value: string | null | undefined): number | unde
43
56
  */
44
57
  export async function providerHttpError(provider: string, response: Response, context?: string): Promise<ProviderHttpError> {
45
58
  const detail = await response.text().catch(() => "");
46
- return new ProviderHttpError(provider, response.status, detail, context, parseRetryAfter(response.headers.get("retry-after")));
59
+ const retryAfterMs = parseRetryAfter(response.headers.get("retry-after")) ?? parseRetryFromBody(detail);
60
+ return new ProviderHttpError(provider, response.status, detail, context, retryAfterMs);
47
61
  }
@@ -2,31 +2,83 @@ import type { Credential } from "../../auth";
2
2
  import type { CallOptions, Message, ProviderAdapter } from "../types";
3
3
  import { readSse } from "../sse";
4
4
  import { providerHttpError } from "./errors";
5
+ import { jeoEnv } from "../../util/env";
5
6
 
6
- function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent"): { url: string; headers: Record<string, string>; body: string } {
7
- const resolvedModel = options.model.startsWith("google/") ? options.model.slice(7) : options.model;
7
+ /** Gemini 2.5+/latest models think by default and BILL thought tokens against
8
+ * `maxOutputTokens` a small-budget call can burn its entire budget on thoughts
9
+ * and return MAX_TOKENS with zero text (observed live on `gemini-flash-latest`).
10
+ * Pin an explicit budget: off unless reasoning was requested. Pro-class models
11
+ * cannot disable thinking (API minimum 128), so they keep a floor instead of 0.
12
+ * Older models (1.5/2.0) reject `thinkingConfig` entirely → undefined (omit). */
13
+ export function geminiThinkingBudget(model: string, effort?: CallOptions["reasoningEffort"], maxTokens?: number): number | undefined {
14
+ const m = model.toLowerCase();
15
+ const thinkingCapable = /gemini-(2\.5|[3-9])|flash-latest|pro-latest/.test(m);
16
+ if (!thinkingCapable) return undefined;
17
+ const floor = m.includes("pro") ? 128 : 0; // pro-class cannot fully disable thinking
18
+ let budget: number;
19
+ switch (effort) {
20
+ case "low": budget = 1024; break;
21
+ case "medium": budget = 4096; break;
22
+ case "high": budget = 8192; break;
23
+ case "minimal":
24
+ default: budget = floor;
25
+ }
26
+ // Thought tokens bill against maxOutputTokens: keep at least ~1K of the output
27
+ // budget for visible text, or thinking starves the reply to an empty MAX_TOKENS.
28
+ if (typeof maxTokens === "number") budget = Math.min(budget, Math.max(floor, maxTokens - 1024));
29
+ return budget;
30
+ }
31
+
32
+ /** Shared Gemini request payload (contents + generationConfig + systemInstruction)
33
+ * used by BOTH the public generativelanguage path (API key) and the Cloud Code
34
+ * Assist path (OAuth) — only the envelope/endpoint differs. */
35
+ export function buildGeminiPayload(messages: Message[], options: CallOptions): { geminiModel: string; payload: Record<string, unknown> } {
36
+ const resolvedModel = options.model.replace(/^(google|gemini)\//, "");
8
37
  let geminiModel = resolvedModel;
9
38
  if (!geminiModel || geminiModel === "claude-3-5-sonnet") geminiModel = "gemini-2.0-flash";
10
39
 
11
40
  const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
12
- const contents = messages
13
- .filter(m => m.role !== "system")
14
- .map(m => ({ role: m.role === "assistant" ? "model" : "user", parts: [{ text: m.content }] }));
41
+ // Gemini requires strictly ALTERNATING user/model turns. jeo histories can carry
42
+ // consecutive same-role messages (a compaction summary prepended before a tool-result,
43
+ // back-to-back tool results, etc.), so coalesce adjacent same-role turns into one
44
+ // content block — otherwise the API rejects the request mid-session.
45
+ const contents: { role: string; parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] }[] = [];
46
+ for (const m of messages) {
47
+ if (m.role === "system") continue;
48
+ const role = m.role === "assistant" ? "model" : "user";
49
+ // Clipboard-pasted images become inlineData parts alongside the text part.
50
+ const parts: ({ text: string } | { inlineData: { mimeType: string; data: string } })[] = [
51
+ ...(m.images?.map(img => ({ inlineData: { mimeType: img.mediaType, data: img.data } })) ?? []),
52
+ { text: m.content },
53
+ ];
54
+ const prev = contents[contents.length - 1];
55
+ if (prev && prev.role === role) {
56
+ prev.parts.push(...parts);
57
+ } else {
58
+ contents.push({ role, parts });
59
+ }
60
+ }
15
61
 
16
62
  const generationConfig: Record<string, unknown> = {
17
63
  temperature: options.temperature ?? 0.2,
18
64
  maxOutputTokens: options.maxTokens ?? 4000,
19
65
  };
20
66
  if (options.jsonMode) generationConfig.responseMimeType = "application/json";
67
+ const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
68
+ if (thinkingBudget !== undefined) generationConfig.thinkingConfig = { thinkingBudget };
21
69
 
22
70
  const payload: Record<string, unknown> = { contents, generationConfig };
23
71
  if (systemPrompt) payload.systemInstruction = { parts: [{ text: systemPrompt }] };
72
+ return { geminiModel, payload };
73
+ }
24
74
 
75
+ export function geminiRequest(messages: Message[], options: CallOptions, credential: Credential, action: "generateContent" | "streamGenerateContent"): { url: string; headers: Record<string, string>; body: string } {
76
+ const { geminiModel, payload } = buildGeminiPayload(messages, options);
25
77
  const oauth = credential.kind === "oauth" ? credential.token : undefined;
26
78
  const apiKey = credential.kind === "api_key" ? credential.token : undefined;
27
- let url = `https://generativelanguage.googleapis.com/v1beta/models/${geminiModel}:${action}`;
79
+ let url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(geminiModel)}:${action}`;
28
80
  const query = action === "streamGenerateContent" ? "alt=sse" : "";
29
- if (!oauth) url += `?${query ? query + "&" : ""}key=${apiKey ?? ""}`;
81
+ if (!oauth) url += `?${query ? query + "&" : ""}key=${encodeURIComponent(apiKey ?? "")}`;
30
82
  else if (query) url += `?${query}`;
31
83
  const headers: Record<string, string> = oauth
32
84
  ? { "content-type": "application/json", authorization: `Bearer ${oauth}` }
@@ -34,18 +86,125 @@ function geminiRequest(messages: Message[], options: CallOptions, credential: Cr
34
86
  return { url, headers, body: JSON.stringify(payload) };
35
87
  }
36
88
 
89
+ const CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com";
90
+
91
+ /** gemini-cli identification headers Cloud Code Assist expects (gjc parity). */
92
+ export function getGeminiCliHeaders(modelId?: string): Record<string, string> {
93
+ const version = jeoEnv("GEMINI_CLI_VERSION") || "0.45.2";
94
+ return {
95
+ "User-Agent": `GeminiCLI/${version}/${modelId ?? "gemini-2.5-flash"} (${process.platform}; ${process.arch}; terminal)`,
96
+ "Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI",
97
+ };
98
+ }
99
+
100
+ /**
101
+ * Cloud Code Assist request for a Google OAuth (gemini-cli) credential — the
102
+ * gemini-cli/gjc call path. OAuth tokens carry cloud-platform scope and target
103
+ * cloudcode-pa.googleapis.com, NOT the public generativelanguage API, so a
104
+ * plain `jeo auth login gemini` works without any GEMINI_API_KEY. The body
105
+ * wraps the standard payload as `{ project, model, request }`.
106
+ */
107
+ export function geminiCliRequest(messages: Message[], options: CallOptions, accessToken: string, projectId: string): { url: string; headers: Record<string, string>; body: string } {
108
+ const { geminiModel, payload } = buildGeminiPayload(messages, options);
109
+ return {
110
+ url: `${CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse`,
111
+ headers: {
112
+ authorization: `Bearer ${accessToken}`,
113
+ "content-type": "application/json",
114
+ accept: "text/event-stream",
115
+ ...getGeminiCliHeaders(geminiModel),
116
+ },
117
+ body: JSON.stringify({ project: projectId, model: geminiModel, request: payload }),
118
+ };
119
+ }
120
+
37
121
  interface GeminiChunk {
38
- candidates?: { content?: { parts?: { text?: string }[] } }[];
39
- usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number };
122
+ candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
123
+ promptFeedback?: { blockReason?: string };
124
+ usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
125
+ }
126
+
127
+ /** Cloud Code Assist wraps each standard chunk under `response`. */
128
+ interface CcaChunk {
129
+ response?: GeminiChunk;
40
130
  }
41
131
 
42
132
  function textOf(chunk: GeminiChunk): string {
43
133
  return chunk.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
44
134
  }
45
135
 
136
+ /** When Gemini returns HTTP 200 with no text, surface the real cause (safety block /
137
+ * RECITATION / MAX_TOKENS) instead of a silent empty string that downstream JSON
138
+ * parsing would misreport as "couldn't parse tool call". */
139
+ function blockedReason(chunk: GeminiChunk): string | undefined {
140
+ const block = chunk.promptFeedback?.blockReason;
141
+ if (block) return `blockReason=${block}`;
142
+ const finish = chunk.candidates?.[0]?.finishReason;
143
+ if (finish === "MAX_TOKENS") {
144
+ // Only reached when NO text was produced at all (both call/stream paths guard
145
+ // on emptiness): the output budget was consumed before any visible text —
146
+ // typically thinking tokens on a 2.5+/latest model.
147
+ return "finishReason=MAX_TOKENS — output budget exhausted before any text; raise maxTokens or lower the thinking level";
148
+ }
149
+ if (finish && finish !== "STOP") return `finishReason=${finish}`;
150
+ return undefined;
151
+ }
152
+
153
+ /**
154
+ * Cloud Code Assist SSE turn for a Google OAuth credential: resolves the
155
+ * projectId (stored → env → lazy loadCodeAssist/onboardUser discovery), POSTs
156
+ * the gemini-cli request, and yields text deltas. Usage is reported ONCE after
157
+ * the stream (thought tokens count as output, gjc parity). Shared by both
158
+ * `call` (concatenates) and `stream` (yields through).
159
+ */
160
+ async function* ccaTurn(messages: Message[], options: CallOptions, credential: Credential & { kind: "oauth" }): AsyncGenerator<string> {
161
+ const { resolveAntigravityProjectId } = await import("./antigravity");
162
+ const projectId = await resolveAntigravityProjectId(credential, { signal: options.signal });
163
+ const { url, headers, body } = geminiCliRequest(messages, options, credential.token, projectId);
164
+ const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
165
+ if (!response.ok) throw await providerHttpError("Gemini (Cloud Code Assist)", response);
166
+ if (!response.body) return;
167
+ let lastUsage: GeminiChunk["usageMetadata"];
168
+ let yieldedAny = false;
169
+ let lastEmptyReason: string | undefined;
170
+ for await (const data of readSse(response.body)) {
171
+ let chunk: CcaChunk;
172
+ try {
173
+ chunk = JSON.parse(data);
174
+ } catch {
175
+ continue;
176
+ }
177
+ const inner = chunk.response;
178
+ if (!inner) continue;
179
+ const delta = textOf(inner);
180
+ if (delta) {
181
+ yieldedAny = true;
182
+ yield delta;
183
+ } else {
184
+ lastEmptyReason = blockedReason(inner) ?? lastEmptyReason;
185
+ }
186
+ if (inner.usageMetadata) lastUsage = inner.usageMetadata;
187
+ }
188
+ if (!yieldedAny) {
189
+ throw new Error(`Gemini (Cloud Code Assist) returned no content${lastEmptyReason ? ` (${lastEmptyReason})` : ""}.`);
190
+ }
191
+ if (lastUsage) {
192
+ options.onUsage?.({
193
+ inputTokens: lastUsage.promptTokenCount,
194
+ outputTokens: (lastUsage.candidatesTokenCount ?? 0) + (lastUsage.thoughtsTokenCount ?? 0),
195
+ });
196
+ }
197
+ }
198
+
46
199
  export const geminiAdapter: ProviderAdapter = {
47
200
  name: "gemini",
48
201
  async call(messages, options, credential) {
202
+ // OAuth (gemini-cli login) → Cloud Code Assist; no GEMINI_API_KEY required.
203
+ if (credential.kind === "oauth") {
204
+ let out = "";
205
+ for await (const delta of ccaTurn(messages, options, credential)) out += delta;
206
+ return out;
207
+ }
49
208
  const { url, headers, body } = geminiRequest(messages, options, credential, "generateContent");
50
209
  const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
51
210
  if (!response.ok) throw await providerHttpError("Gemini", response);
@@ -53,13 +212,26 @@ export const geminiAdapter: ProviderAdapter = {
53
212
  if (result.usageMetadata) {
54
213
  options.onUsage?.({ inputTokens: result.usageMetadata.promptTokenCount, outputTokens: result.usageMetadata.candidatesTokenCount });
55
214
  }
56
- return textOf(result);
215
+ const text = textOf(result);
216
+ if (!text) {
217
+ const reason = blockedReason(result);
218
+ if (reason) throw new Error(`Gemini returned no content (${reason}).`);
219
+ }
220
+ return text;
57
221
  },
58
222
  async *stream(messages, options, credential) {
223
+ // OAuth (gemini-cli login) → Cloud Code Assist; no GEMINI_API_KEY required.
224
+ if (credential.kind === "oauth") {
225
+ yield* ccaTurn(messages, options, credential);
226
+ return;
227
+ }
59
228
  const { url, headers, body } = geminiRequest(messages, options, credential, "streamGenerateContent");
60
229
  const response = await fetch(url, { method: "POST", headers, body, signal: options.signal });
61
230
  if (!response.ok) throw await providerHttpError("Gemini", response, "(stream)");
62
231
  if (!response.body) return;
232
+ let lastUsage: GeminiChunk["usageMetadata"];
233
+ let yieldedAny = false;
234
+ let lastEmptyReason: string | undefined;
63
235
  for await (const data of readSse(response.body)) {
64
236
  let chunk: GeminiChunk;
65
237
  try {
@@ -68,10 +240,21 @@ export const geminiAdapter: ProviderAdapter = {
68
240
  continue;
69
241
  }
70
242
  const delta = textOf(chunk);
71
- if (delta) yield delta;
72
- if (chunk.usageMetadata) {
73
- options.onUsage?.({ inputTokens: chunk.usageMetadata.promptTokenCount, outputTokens: chunk.usageMetadata.candidatesTokenCount });
243
+ if (delta) {
244
+ yieldedAny = true;
245
+ yield delta;
246
+ } else {
247
+ lastEmptyReason = blockedReason(chunk) ?? lastEmptyReason;
74
248
  }
249
+ // Gemini emits cumulative usageMetadata on most chunks; capture the last and
250
+ // report ONCE after the stream so an accumulating sink can't over-count.
251
+ if (chunk.usageMetadata) lastUsage = chunk.usageMetadata;
252
+ }
253
+ if (!yieldedAny && lastEmptyReason) {
254
+ throw new Error(`Gemini returned no content (${lastEmptyReason}).`);
255
+ }
256
+ if (lastUsage) {
257
+ options.onUsage?.({ inputTokens: lastUsage.promptTokenCount, outputTokens: lastUsage.candidatesTokenCount });
75
258
  }
76
259
  },
77
260
  };
@@ -2,13 +2,26 @@ import type { CallOptions, Message, ProviderAdapter } from "../types";
2
2
  import { readLines } from "../sse";
3
3
  import { providerHttpError } from "./errors";
4
4
 
5
+ /**
6
+ * Resolve the Ollama base URL. `OLLAMA_HOST` is documented as a bare host:port
7
+ * (e.g. `127.0.0.1:11434`), but `fetch` needs a scheme — prepend `http://` when
8
+ * missing, else `fetch("127.0.0.1:11434/api/chat")` throws "Failed to parse URL".
9
+ */
10
+ export function normalizeOllamaBaseUrl(baseUrl?: string): string {
11
+ const v = (baseUrl ?? process.env.OLLAMA_HOST ?? "http://localhost:11434").trim();
12
+ return (/^https?:\/\//i.test(v) ? v : `http://${v}`).replace(/\/$/, "");
13
+ }
14
+
5
15
  function ollamaRequest(messages: Message[], options: CallOptions, stream: boolean): { url: string; body: string } {
6
16
  const model = options.model.startsWith("ollama/") ? options.model.slice(7) : options.model;
7
17
  const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
8
- const chatMessages: { role: string; content: string }[] = [];
18
+ const chatMessages: { role: string; content: string; images?: string[] }[] = [];
9
19
  if (systemPrompt) chatMessages.push({ role: "system", content: systemPrompt });
10
20
  for (const msg of messages) {
11
- if (msg.role !== "system") chatMessages.push({ role: msg.role, content: msg.content });
21
+ if (msg.role === "system") continue;
22
+ // Ollama multimodal models take raw base64 strings in a sibling `images` array.
23
+ if (msg.images?.length) chatMessages.push({ role: msg.role, content: msg.content, images: msg.images.map(i => i.data) });
24
+ else chatMessages.push({ role: msg.role, content: msg.content });
12
25
  }
13
26
  const payload: Record<string, unknown> = {
14
27
  model,
@@ -17,38 +30,55 @@ function ollamaRequest(messages: Message[], options: CallOptions, stream: boolea
17
30
  options: { temperature: options.temperature ?? 0.2, num_predict: options.maxTokens ?? 4000 },
18
31
  };
19
32
  if (options.jsonMode) payload.format = "json";
20
- const base = (options.baseUrl ?? process.env.OLLAMA_HOST ?? "http://localhost:11434").replace(/\/$/, "");
33
+ const base = normalizeOllamaBaseUrl(options.baseUrl);
21
34
  return { url: `${base}/api/chat`, body: JSON.stringify(payload) };
22
35
  }
23
36
 
37
+ /** Round-5 #1: surface done_reason when a 200 carries no text (uniform contract). */
38
+ function emptyCompletionError(doneReason: string | undefined): Error {
39
+ const hint = doneReason === "length"
40
+ ? " — output budget exhausted before any text; raise maxTokens"
41
+ : "";
42
+ return new Error(`Ollama returned no content${doneReason ? ` (done_reason=${doneReason})` : ""}${hint}.`);
43
+ }
44
+
24
45
  export const ollamaAdapter: ProviderAdapter = {
25
46
  name: "ollama",
26
47
  async call(messages, options) {
27
48
  const { url, body } = ollamaRequest(messages, options, false);
28
49
  const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json" }, body, signal: options.signal });
29
50
  if (!response.ok) throw await providerHttpError("Ollama", response, `at ${url}`);
30
- const result = (await response.json()) as { message?: { content?: string }; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
51
+ const result = (await response.json()) as { message?: { content?: string }; done_reason?: string; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
31
52
  options.onUsage?.({ inputTokens: result.prompt_eval_count, outputTokens: result.eval_count, durationMs: result.total_duration ? Math.round(result.total_duration / 1e6) : undefined });
32
- return result.message?.content ?? "";
53
+ const text = result.message?.content ?? "";
54
+ if (!text) throw emptyCompletionError(result.done_reason);
55
+ return text;
33
56
  },
34
57
  async *stream(messages, options) {
35
58
  const { url, body } = ollamaRequest(messages, options, true);
36
59
  const response = await fetch(url, { method: "POST", headers: { "content-type": "application/json" }, body, signal: options.signal });
37
60
  if (!response.ok) throw await providerHttpError("Ollama", response, `(stream) at ${url}`);
38
61
  if (!response.body) return;
62
+ let yieldedAny = false;
63
+ let doneReason: string | undefined;
39
64
  for await (const line of readLines(response.body)) {
40
- let chunk: { message?: { content?: string }; done?: boolean; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
65
+ let chunk: { message?: { content?: string }; done?: boolean; done_reason?: string; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
41
66
  try {
42
67
  chunk = JSON.parse(line);
43
68
  } catch {
44
69
  continue;
45
70
  }
46
71
  const delta = chunk.message?.content;
47
- if (delta) yield delta;
72
+ if (delta) {
73
+ yieldedAny = true;
74
+ yield delta;
75
+ }
48
76
  if (chunk.done) {
77
+ if (chunk.done_reason) doneReason = chunk.done_reason;
49
78
  options.onUsage?.({ inputTokens: chunk.prompt_eval_count, outputTokens: chunk.eval_count, durationMs: chunk.total_duration ? Math.round(chunk.total_duration / 1e6) : undefined });
50
79
  break;
51
80
  }
52
81
  }
82
+ if (!yieldedAny) throw emptyCompletionError(doneReason);
53
83
  },
54
84
  };