@nghyane/arcane 0.1.16 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/package.json +7 -15
  3. package/src/cli/setup-cli.ts +2 -62
  4. package/src/commands/setup.ts +1 -1
  5. package/src/config/keybindings.ts +1 -4
  6. package/src/config/settings-schema.ts +23 -98
  7. package/src/config/settings.ts +0 -1
  8. package/src/exa/mcp-client.ts +57 -2
  9. package/src/extensibility/custom-tools/types.ts +2 -2
  10. package/src/extensibility/custom-tools/wrapper.ts +1 -1
  11. package/src/extensibility/extensions/wrapper.ts +1 -1
  12. package/src/extensibility/hooks/tool-wrapper.ts +1 -1
  13. package/src/internal-urls/docs-index.generated.ts +1 -2
  14. package/src/internal-urls/index.ts +2 -4
  15. package/src/internal-urls/router.ts +2 -2
  16. package/src/internal-urls/types.ts +2 -2
  17. package/src/mcp/oauth-flow.ts +1 -1
  18. package/src/modes/components/custom-editor.ts +6 -2
  19. package/src/modes/controllers/command-controller.ts +4 -46
  20. package/src/modes/controllers/input-controller.ts +123 -6
  21. package/src/modes/interactive-mode.ts +1 -84
  22. package/src/modes/types.ts +0 -1
  23. package/src/patch/edit-tool.ts +2 -11
  24. package/src/patch/hashline.ts +42 -0
  25. package/src/prompts/agents/explore.md +4 -2
  26. package/src/prompts/agents/librarian.md +4 -6
  27. package/src/prompts/agents/reviewer.md +1 -1
  28. package/src/prompts/agents/task.md +5 -1
  29. package/src/prompts/system/system-prompt.md +29 -18
  30. package/src/prompts/thread-extract.md +16 -0
  31. package/src/prompts/tools/render-mermaid.md +9 -0
  32. package/src/sdk.ts +12 -37
  33. package/src/session/agent-session.ts +5 -10
  34. package/src/session/retry-utils.ts +1 -1
  35. package/src/session/session-index.ts +329 -0
  36. package/src/session/session-manager.ts +0 -30
  37. package/src/session/streaming-edit.ts +1 -36
  38. package/src/slash-commands/builtin-registry.ts +0 -16
  39. package/src/task/index.ts +1 -1
  40. package/src/tools/ask.ts +9 -6
  41. package/src/tools/bash-skill-urls.ts +3 -3
  42. package/src/tools/bash.ts +2 -1
  43. package/src/tools/create-tools.ts +28 -33
  44. package/src/tools/fetch.ts +1 -1
  45. package/src/tools/find-thread.ts +120 -0
  46. package/src/tools/grep.ts +2 -1
  47. package/src/tools/index.ts +5 -0
  48. package/src/tools/python.ts +53 -1
  49. package/src/tools/read-thread.ts +409 -0
  50. package/src/tools/read.ts +4 -3
  51. package/src/tools/render-mermaid.ts +68 -0
  52. package/src/tools/save-memory.ts +182 -0
  53. package/src/tools/write.ts +1 -1
  54. package/src/web/search/index.ts +4 -1
  55. package/src/web/search/provider.ts +3 -0
  56. package/src/web/search/providers/anthropic.ts +1 -0
  57. package/src/web/search/providers/gemini.ts +122 -37
  58. package/src/web/search/providers/kagi.ts +163 -0
  59. package/src/web/search/types.ts +1 -0
  60. package/src/internal-urls/memory-protocol.ts +0 -133
  61. package/src/memories/index.ts +0 -1099
  62. package/src/memories/storage.ts +0 -563
  63. package/src/patch/normative.ts +0 -72
  64. package/src/prompts/memories/consolidation.md +0 -30
  65. package/src/prompts/memories/read_path.md +0 -11
  66. package/src/prompts/memories/stage_one_input.md +0 -6
  67. package/src/prompts/memories/stage_one_system.md +0 -21
  68. package/src/stt/downloader.ts +0 -68
  69. package/src/stt/index.ts +0 -3
  70. package/src/stt/recorder.ts +0 -351
  71. package/src/stt/setup.ts +0 -50
  72. package/src/stt/stt-controller.ts +0 -160
  73. package/src/stt/transcribe.py +0 -70
  74. package/src/stt/transcriber.ts +0 -91
@@ -0,0 +1,68 @@
1
+ import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@nghyane/arcane-agent";
2
+ import { type AsciiRenderOptions, renderMermaidAscii } from "@nghyane/arcane-utils";
3
+ import { type Static, Type } from "@sinclair/typebox";
4
+ import { renderPromptTemplate } from "../config/prompt-templates";
5
+ import renderMermaidDescription from "../prompts/tools/render-mermaid.md" with { type: "text" };
6
+ import type { ToolSession } from "./index";
7
+ import { allocateOutputArtifact } from "./output-utils";
8
+
9
+ const renderMermaidSchema = Type.Object({
10
+ mermaid: Type.String({ description: "Mermaid graph source text" }),
11
+ config: Type.Optional(
12
+ Type.Object({
13
+ useAscii: Type.Optional(Type.Boolean()),
14
+ paddingX: Type.Optional(Type.Number()),
15
+ paddingY: Type.Optional(Type.Number()),
16
+ boxBorderPadding: Type.Optional(Type.Number()),
17
+ }),
18
+ ),
19
+ });
20
+
21
+ type RenderMermaidParams = Static<typeof renderMermaidSchema>;
22
+
23
+ function sanitizeRenderConfig(config: AsciiRenderOptions | undefined): AsciiRenderOptions | undefined {
24
+ if (!config) return undefined;
25
+ return {
26
+ useAscii: config.useAscii,
27
+ boxBorderPadding:
28
+ config.boxBorderPadding === undefined ? undefined : Math.max(0, Math.floor(config.boxBorderPadding)),
29
+ paddingX: config.paddingX === undefined ? undefined : Math.max(0, Math.floor(config.paddingX)),
30
+ paddingY: config.paddingY === undefined ? undefined : Math.max(0, Math.floor(config.paddingY)),
31
+ };
32
+ }
33
+
34
+ export interface RenderMermaidToolDetails {
35
+ artifactId?: string;
36
+ }
37
+
38
+ export class RenderMermaidTool implements AgentTool<typeof renderMermaidSchema, RenderMermaidToolDetails> {
39
+ readonly name = "render_mermaid";
40
+ readonly label = "RenderMermaid";
41
+ readonly description: string;
42
+ readonly parameters = renderMermaidSchema;
43
+ readonly strict = true;
44
+
45
+ constructor(private readonly session: ToolSession) {
46
+ this.description = renderPromptTemplate(renderMermaidDescription);
47
+ }
48
+
49
+ async execute(
50
+ _toolCallId: string,
51
+ params: RenderMermaidParams,
52
+ _signal?: AbortSignal,
53
+ _onUpdate?: AgentToolUpdateCallback<RenderMermaidToolDetails>,
54
+ _context?: AgentToolContext,
55
+ ): Promise<AgentToolResult<RenderMermaidToolDetails>> {
56
+ const ascii = renderMermaidAscii(params.mermaid, sanitizeRenderConfig(params.config));
57
+ const { artifactPath, artifactId } = await allocateOutputArtifact(this.session, "render_mermaid");
58
+ if (artifactPath) {
59
+ await Bun.write(artifactPath, ascii);
60
+ }
61
+
62
+ const artifactLine = artifactId ? `\n\nSaved artifact: artifact://${artifactId}` : "";
63
+ return {
64
+ content: [{ type: "text", text: `${ascii}${artifactLine}` }],
65
+ details: { artifactId },
66
+ };
67
+ }
68
+ }
@@ -0,0 +1,182 @@
1
+ import * as path from "node:path";
2
+ import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@nghyane/arcane-agent";
3
+ import type { Component } from "@nghyane/arcane-tui";
4
+ import { Text } from "@nghyane/arcane-tui";
5
+ import { isEnoent, logger } from "@nghyane/arcane-utils";
6
+ import { type Static, Type } from "@sinclair/typebox";
7
+ import type { RenderResultOptions } from "../extensibility/custom-tools/types";
8
+ import type { Theme } from "../theme/theme";
9
+ import { renderStatusLine } from "../tui";
10
+ import { shortenPath, TRUNCATE_LENGTHS, truncateToWidth } from "../ui/render-utils";
11
+ import type { ToolSession } from ".";
12
+
13
+ const saveMemorySchema = Type.Object({
14
+ fact: Type.String({ description: "A clear, self-contained statement to remember across sessions" }),
15
+ });
16
+
17
+ type SaveMemoryParams = Static<typeof saveMemorySchema>;
18
+
19
+ export interface SaveMemoryToolDetails {
20
+ fact: string;
21
+ filePath: string;
22
+ duplicate?: boolean;
23
+ }
24
+
25
+ interface SaveMemoryRenderArgs {
26
+ fact?: string;
27
+ }
28
+
29
+ const MEMORIES_HEADING = "## Memories";
30
+ const MEMORIES_HEADING_RE = /^## Memories\s*$/;
31
+ const NEXT_HEADING_RE = /^## /;
32
+
33
+ async function findNearestAgentsMd(startDir: string): Promise<string | null> {
34
+ let dir = path.resolve(startDir);
35
+ const root = path.parse(dir).root;
36
+ while (true) {
37
+ const candidate = path.join(dir, "AGENTS.md");
38
+ try {
39
+ await Bun.file(candidate).text();
40
+ return candidate;
41
+ } catch (err) {
42
+ if (!isEnoent(err)) throw err;
43
+ }
44
+ const parent = path.dirname(dir);
45
+ if (parent === dir || dir === root) break;
46
+ dir = parent;
47
+ }
48
+ return null;
49
+ }
50
+
51
+ function insertMemory(content: string, fact: string): { content: string; duplicate: boolean } {
52
+ const lines = content.split("\n");
53
+ const bullet = `- ${fact}`;
54
+
55
+ // Find Memories section
56
+ let sectionStart = -1;
57
+ for (let i = 0; i < lines.length; i++) {
58
+ if (MEMORIES_HEADING_RE.test(lines[i])) {
59
+ sectionStart = i;
60
+ break;
61
+ }
62
+ }
63
+
64
+ if (sectionStart === -1) {
65
+ // Append section at end
66
+ const trimmed = content.trimEnd();
67
+ return { content: `${trimmed}\n\n${MEMORIES_HEADING}\n${bullet}\n`, duplicate: false };
68
+ }
69
+
70
+ // Find section end (next ## heading or EOF)
71
+ let sectionEnd = lines.length;
72
+ for (let i = sectionStart + 1; i < lines.length; i++) {
73
+ if (NEXT_HEADING_RE.test(lines[i])) {
74
+ sectionEnd = i;
75
+ break;
76
+ }
77
+ }
78
+
79
+ // Check duplicates among existing bullets
80
+ const factLower = fact.toLowerCase();
81
+ for (let i = sectionStart + 1; i < sectionEnd; i++) {
82
+ const line = lines[i].trim();
83
+ if (line.startsWith("- ")) {
84
+ const existing = line.slice(2).toLowerCase();
85
+ if (existing.includes(factLower) || factLower.includes(existing)) {
86
+ return { content, duplicate: true };
87
+ }
88
+ }
89
+ }
90
+
91
+ // Insert bullet before sectionEnd
92
+ lines.splice(sectionEnd, 0, bullet);
93
+ return { content: lines.join("\n"), duplicate: false };
94
+ }
95
+
96
+ export class SaveMemoryTool implements AgentTool<typeof saveMemorySchema, SaveMemoryToolDetails, Theme> {
97
+ readonly name = "save_memory";
98
+ readonly label = "Save Memory";
99
+ description =
100
+ 'Save a fact or preference to long-term memory that persists across sessions. Use when the user explicitly asks to remember something or states a clear preference. Facts should be short, self-contained: "Prefers tabs over spaces", "Project uses pnpm". Do not save transient conversation context. If unsure, ask the user.';
101
+ readonly parameters = saveMemorySchema;
102
+ readonly concurrency = "exclusive";
103
+
104
+ constructor(private readonly session: ToolSession) {}
105
+
106
+ async execute(
107
+ _toolCallId: string,
108
+ params: SaveMemoryParams,
109
+ _signal?: AbortSignal,
110
+ _onUpdate?: AgentToolUpdateCallback<SaveMemoryToolDetails>,
111
+ _context?: AgentToolContext,
112
+ ): Promise<AgentToolResult<SaveMemoryToolDetails>> {
113
+ const fact = params.fact.trim();
114
+ if (!fact) {
115
+ return {
116
+ content: [{ type: "text", text: "Fact cannot be empty." }],
117
+ details: { fact: "", filePath: "" },
118
+ };
119
+ }
120
+
121
+ let filePath = await findNearestAgentsMd(this.session.cwd);
122
+ let content: string;
123
+
124
+ if (filePath) {
125
+ content = await Bun.file(filePath).text();
126
+ } else {
127
+ filePath = path.join(this.session.cwd, "AGENTS.md");
128
+ content = "";
129
+ }
130
+
131
+ const result = insertMemory(content, fact);
132
+
133
+ if (result.duplicate) {
134
+ return {
135
+ content: [{ type: "text", text: "This fact is already saved." }],
136
+ details: { fact, filePath, duplicate: true },
137
+ };
138
+ }
139
+
140
+ try {
141
+ await Bun.write(filePath, result.content);
142
+ } catch (err) {
143
+ logger.error("Failed to write AGENTS.md", { path: filePath, error: String(err) });
144
+ return {
145
+ content: [{ type: "text", text: "Failed to save memory." }],
146
+ details: { fact, filePath },
147
+ };
148
+ }
149
+
150
+ return {
151
+ content: [{ type: "text", text: `Saved to ${filePath}` }],
152
+ details: { fact, filePath },
153
+ };
154
+ }
155
+
156
+ renderCall(args: SaveMemoryRenderArgs, _options: RenderResultOptions, uiTheme: Theme): Component {
157
+ const preview = args.fact ? truncateToWidth(args.fact, TRUNCATE_LENGTHS.CONTENT) : "";
158
+ const meta = preview ? [preview] : [];
159
+ const text = renderStatusLine({ icon: "pending", title: "Save Memory", meta }, uiTheme);
160
+ return new Text(text, 0, 0);
161
+ }
162
+
163
+ renderResult(
164
+ result: { content: Array<{ type: string; text?: string }>; details?: SaveMemoryToolDetails },
165
+ _options: RenderResultOptions,
166
+ uiTheme: Theme,
167
+ _args?: SaveMemoryRenderArgs,
168
+ ): Component {
169
+ const details = result.details;
170
+ const isDuplicate = details?.duplicate === true;
171
+ const icon = isDuplicate ? "info" : "success";
172
+ const filePath = details?.filePath ? shortenPath(details.filePath) : "";
173
+ const meta = filePath ? [filePath] : [];
174
+ const header = renderStatusLine({ icon, title: "Save Memory", meta }, uiTheme);
175
+
176
+ const message = isDuplicate
177
+ ? uiTheme.fg("dim", "This fact is already saved.")
178
+ : uiTheme.fg("dim", details?.fact ?? "");
179
+
180
+ return new Text(`${header}\n${message}`, 0, 0);
181
+ }
182
+ }
@@ -71,7 +71,7 @@ type WriteParams = WriteToolInput;
71
71
  export class WriteTool implements AgentTool<typeof writeSchema, WriteToolDetails, Theme> {
72
72
  readonly name = "write";
73
73
  readonly label = "Write";
74
- description = "Create a new file";
74
+ description = "Create a new file. For existing files, prefer edit instead — even for extensive changes.";
75
75
  readonly parameters = writeSchema;
76
76
  readonly nonAbortable = true;
77
77
  readonly concurrency = "exclusive";
@@ -35,6 +35,7 @@ export const webSearchSchema = Type.Object({
35
35
  "exa",
36
36
  "brave",
37
37
  "jina",
38
+ "kagi",
38
39
  "kimi",
39
40
  "zai",
40
41
  "anthropic",
@@ -55,6 +56,7 @@ export type SearchParams = {
55
56
  | "exa"
56
57
  | "brave"
57
58
  | "jina"
59
+ | "kagi"
58
60
  | "kimi"
59
61
  | "zai"
60
62
  | "anthropic"
@@ -263,7 +265,8 @@ export async function runSearchQuery(params: SearchParams): Promise<{
263
265
  export class SearchTool implements AgentTool<typeof webSearchSchema, SearchRenderDetails, Theme> {
264
266
  readonly name = "web_search";
265
267
  readonly label = "Web Search";
266
- readonly description = "Search the web";
268
+ readonly description =
269
+ "Search the web for up-to-date information. Use fetch to read full content from a specific URL.";
267
270
  readonly parameters = webSearchSchema;
268
271
  readonly renderCall = renderSearchCall;
269
272
  readonly renderResult = renderSearchResult;
@@ -5,6 +5,7 @@ import { CodexProvider } from "./providers/codex";
5
5
  import { ExaProvider } from "./providers/exa";
6
6
  import { GeminiProvider } from "./providers/gemini";
7
7
  import { JinaProvider } from "./providers/jina";
8
+ import { KagiProvider } from "./providers/kagi";
8
9
  import { KimiProvider } from "./providers/kimi";
9
10
  import { PerplexityProvider } from "./providers/perplexity";
10
11
  import { SyntheticProvider } from "./providers/synthetic";
@@ -18,6 +19,7 @@ const SEARCH_PROVIDERS: Record<SearchProviderId, SearchProvider> = {
18
19
  exa: new ExaProvider(),
19
20
  brave: new BraveProvider(),
20
21
  jina: new JinaProvider(),
22
+ kagi: new KagiProvider(),
21
23
  perplexity: new PerplexityProvider(),
22
24
  kimi: new KimiProvider(),
23
25
  zai: new ZaiProvider(),
@@ -32,6 +34,7 @@ export const SEARCH_PROVIDER_ORDER: SearchProviderId[] = [
32
34
  "exa",
33
35
  "brave",
34
36
  "jina",
37
+ "kagi",
35
38
  "kimi",
36
39
  "anthropic",
37
40
  "gemini",
@@ -65,6 +65,7 @@ function buildSystemBlocks(
65
65
  return buildAnthropicSystemBlocks(systemPrompt, {
66
66
  includeClaudeCodeInstruction: includeClaudeCode,
67
67
  extraInstructions,
68
+ cacheControl: { type: "ephemeral" },
68
69
  });
69
70
  }
70
71
 
@@ -5,7 +5,13 @@
5
5
  * Requires OAuth credentials stored in agent.db for provider "google-gemini-cli" or "google-antigravity".
6
6
  * Returns synthesized answers with citations and source metadata from grounding chunks.
7
7
  */
8
- import { getAntigravityHeaders, getGeminiCliHeaders, refreshGoogleCloudToken } from "@nghyane/arcane-ai";
8
+ import {
9
+ ANTIGRAVITY_SYSTEM_INSTRUCTION,
10
+ extractRetryDelay,
11
+ getAntigravityHeaders,
12
+ getGeminiCliHeaders,
13
+ refreshGoogleCloudToken,
14
+ } from "@nghyane/arcane-ai";
9
15
  import { getAgentDbPath } from "@nghyane/arcane-utils/dirs";
10
16
  import { AgentStorage } from "../../../session/agent-storage";
11
17
  import type { SearchCitation, SearchResponse, SearchSource } from "../../../web/search/types";
@@ -14,10 +20,32 @@ import type { SearchParams } from "./base";
14
20
  import { SearchProvider } from "./base";
15
21
 
16
22
  const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
17
- const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
23
+ const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
24
+ const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
25
+ const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
18
26
  const DEFAULT_MODEL = "gemini-2.5-flash";
27
+ const MAX_RETRIES = 3;
28
+ const BASE_DELAY_MS = 1000;
29
+ const RATE_LIMIT_BUDGET_MS = 5 * 60 * 1000;
30
+
31
+ interface GeminiToolParams {
32
+ google_search?: Record<string, unknown>;
33
+ code_execution?: Record<string, unknown>;
34
+ url_context?: Record<string, unknown>;
35
+ }
19
36
 
20
- export interface GeminiSearchParams {
37
+ export function buildGeminiRequestTools(params: GeminiToolParams): Array<Record<string, Record<string, unknown>>> {
38
+ const tools: Array<Record<string, Record<string, unknown>>> = [{ googleSearch: params.google_search ?? {} }];
39
+ if (params.code_execution !== undefined) {
40
+ tools.push({ codeExecution: params.code_execution });
41
+ }
42
+ if (params.url_context !== undefined) {
43
+ tools.push({ urlContext: params.url_context });
44
+ }
45
+ return tools;
46
+ }
47
+
48
+ export interface GeminiSearchParams extends GeminiToolParams {
21
49
  query: string;
22
50
  system_prompt?: string;
23
51
  num_results?: number;
@@ -55,8 +83,8 @@ export async function findGeminiAuth(): Promise<GeminiAuth | null> {
55
83
  const expiryBuffer = 5 * 60 * 1000; // 5 minutes
56
84
  const now = Date.now();
57
85
 
58
- // Try providers in order: antigravity first (more quota), then gemini-cli
59
- const providers = ["google-antigravity", "google-gemini-cli"] as const;
86
+ // Try providers in order: gemini-cli first (deterministic), then antigravity
87
+ const providers = ["google-gemini-cli", "google-antigravity"] as const;
60
88
 
61
89
  try {
62
90
  const storage = await AgentStorage.open(getAgentDbPath());
@@ -180,6 +208,7 @@ async function callGeminiSearch(
180
208
  systemPrompt?: string,
181
209
  maxOutputTokens?: number,
182
210
  temperature?: number,
211
+ toolParams: GeminiToolParams = {},
183
212
  ): Promise<{
184
213
  answer: string;
185
214
  sources: SearchSource[];
@@ -188,10 +217,20 @@ async function callGeminiSearch(
188
217
  model: string;
189
218
  usage?: { inputTokens: number; outputTokens: number; totalTokens: number };
190
219
  }> {
191
- const endpoint = auth.isAntigravity ? ANTIGRAVITY_ENDPOINT : DEFAULT_ENDPOINT;
192
- const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
220
+ const endpoints = auth.isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
193
221
  const headers = auth.isAntigravity ? getAntigravityHeaders() : getGeminiCliHeaders();
194
222
 
223
+ const normalizedSystemPrompt = systemPrompt?.toWellFormed();
224
+ const systemInstructionParts: Array<{ text: string }> = [
225
+ ...(auth.isAntigravity
226
+ ? [
227
+ { text: ANTIGRAVITY_SYSTEM_INSTRUCTION },
228
+ { text: `Please ignore following [ignore]${ANTIGRAVITY_SYSTEM_INSTRUCTION}[/ignore]` },
229
+ ]
230
+ : []),
231
+ ...(normalizedSystemPrompt ? [{ text: normalizedSystemPrompt }] : []),
232
+ ];
233
+
195
234
  const requestBody: Record<string, unknown> = {
196
235
  project: auth.projectId,
197
236
  model: DEFAULT_MODEL,
@@ -202,11 +241,11 @@ async function callGeminiSearch(
202
241
  parts: [{ text: query }],
203
242
  },
204
243
  ],
205
- // Add googleSearch tool for grounding
206
- tools: [{ googleSearch: {} }],
207
- ...(systemPrompt && {
244
+ tools: buildGeminiRequestTools(toolParams),
245
+ ...(systemInstructionParts.length > 0 && {
208
246
  systemInstruction: {
209
- parts: [{ text: systemPrompt }],
247
+ ...(auth.isAntigravity ? { role: "user" } : {}),
248
+ parts: systemInstructionParts,
210
249
  },
211
250
  }),
212
251
  },
@@ -225,31 +264,83 @@ async function callGeminiSearch(
225
264
  (requestBody.request as Record<string, unknown>).generationConfig = generationConfig;
226
265
  }
227
266
 
228
- const response = await fetch(url, {
229
- method: "POST",
230
- headers: {
231
- Authorization: `Bearer ${auth.accessToken}`,
232
- "Content-Type": "application/json",
233
- Accept: "text/event-stream",
234
- ...headers,
235
- },
236
- body: JSON.stringify(requestBody),
237
- });
238
-
239
- if (!response.ok) {
240
- const errorText = await response.text();
241
- throw new SearchProviderError(
242
- "gemini",
243
- `Gemini Cloud Code API error (${response.status}): ${errorText}`,
244
- response.status,
245
- );
267
+ // Retry loop with endpoint fallback and rate limit budgeting
268
+ let lastError: Error | undefined;
269
+ let totalDelayMs = 0;
270
+
271
+ for (const endpoint of endpoints) {
272
+ const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
273
+
274
+ for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
275
+ try {
276
+ const response = await fetch(url, {
277
+ method: "POST",
278
+ headers: {
279
+ Authorization: `Bearer ${auth.accessToken}`,
280
+ "Content-Type": "application/json",
281
+ Accept: "text/event-stream",
282
+ ...headers,
283
+ },
284
+ body: JSON.stringify(requestBody),
285
+ });
286
+
287
+ if (response.ok) {
288
+ return await parseGeminiSSEResponse(response);
289
+ }
290
+
291
+ const errorText = await response.text();
292
+
293
+ // Non-retryable status codes
294
+ if (response.status >= 400 && response.status < 429) {
295
+ throw new SearchProviderError(
296
+ "gemini",
297
+ `Gemini Cloud Code API error (${response.status}): ${errorText}`,
298
+ response.status,
299
+ );
300
+ }
301
+
302
+ // Rate limit or server error — retry with backoff
303
+ const serverDelay = extractRetryDelay(errorText, response);
304
+ const delay = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
305
+ totalDelayMs += delay;
306
+
307
+ if (totalDelayMs > RATE_LIMIT_BUDGET_MS) {
308
+ throw new SearchProviderError(
309
+ "gemini",
310
+ `Rate limit budget exhausted after ${Math.round(totalDelayMs / 1000)}s of delays`,
311
+ 429,
312
+ );
313
+ }
314
+
315
+ lastError = new SearchProviderError(
316
+ "gemini",
317
+ `Gemini Cloud Code API error (${response.status}): ${errorText}`,
318
+ response.status,
319
+ );
320
+ await Bun.sleep(delay);
321
+ } catch (err) {
322
+ if (err instanceof SearchProviderError) throw err;
323
+ lastError = err as Error;
324
+ break; // Network error — try next endpoint
325
+ }
326
+ }
246
327
  }
247
328
 
329
+ throw lastError ?? new SearchProviderError("gemini", "All Gemini endpoints failed", 500);
330
+ }
331
+
332
+ async function parseGeminiSSEResponse(response: Response): Promise<{
333
+ answer: string;
334
+ sources: SearchSource[];
335
+ citations: SearchCitation[];
336
+ searchQueries: string[];
337
+ model: string;
338
+ usage?: { inputTokens: number; outputTokens: number; totalTokens: number };
339
+ }> {
248
340
  if (!response.body) {
249
341
  throw new SearchProviderError("gemini", "Gemini API returned no response body", 500);
250
342
  }
251
343
 
252
- // Parse SSE stream
253
344
  const answerParts: string[] = [];
254
345
  const sources: SearchSource[] = [];
255
346
  const citations: SearchCitation[] = [];
@@ -289,7 +380,6 @@ async function callGeminiSearch(
289
380
 
290
381
  const candidate = responseData.candidates?.[0];
291
382
 
292
- // Extract text content
293
383
  if (candidate?.content?.parts) {
294
384
  for (const part of candidate.content.parts) {
295
385
  if (part.text) {
@@ -298,10 +388,8 @@ async function callGeminiSearch(
298
388
  }
299
389
  }
300
390
 
301
- // Extract grounding metadata
302
391
  const groundingMetadata = candidate?.groundingMetadata;
303
392
  if (groundingMetadata) {
304
- // Extract sources from grounding chunks
305
393
  if (groundingMetadata.groundingChunks) {
306
394
  for (const grChunk of groundingMetadata.groundingChunks) {
307
395
  if (grChunk.web?.uri) {
@@ -317,7 +405,6 @@ async function callGeminiSearch(
317
405
  }
318
406
  }
319
407
 
320
- // Extract citations from grounding supports
321
408
  if (groundingMetadata.groundingSupports && groundingMetadata.groundingChunks) {
322
409
  for (const support of groundingMetadata.groundingSupports) {
323
410
  const citedText = support.segment?.text;
@@ -336,7 +423,6 @@ async function callGeminiSearch(
336
423
  }
337
424
  }
338
425
 
339
- // Extract search queries
340
426
  if (groundingMetadata.webSearchQueries) {
341
427
  for (const q of groundingMetadata.webSearchQueries) {
342
428
  if (!searchQueries.includes(q)) {
@@ -346,7 +432,6 @@ async function callGeminiSearch(
346
432
  }
347
433
  }
348
434
 
349
- // Extract usage metadata
350
435
  if (responseData.usageMetadata) {
351
436
  usage = {
352
437
  inputTokens: responseData.usageMetadata.promptTokenCount ?? 0,
@@ -355,7 +440,6 @@ async function callGeminiSearch(
355
440
  };
356
441
  }
357
442
 
358
- // Extract model version
359
443
  if (responseData.modelVersion) {
360
444
  model = responseData.modelVersion;
361
445
  }
@@ -396,6 +480,7 @@ export async function searchGemini(params: GeminiSearchParams): Promise<SearchRe
396
480
  params.system_prompt,
397
481
  params.max_output_tokens,
398
482
  params.temperature,
483
+ params,
399
484
  );
400
485
 
401
486
  let sources = result.sources;