ex-brain 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -65,6 +65,13 @@ ebrain timeline extract companies/river-ai
65
65
  ebrain search "some topic"
66
66
  ebrain query "some question"
67
67
 
68
+ # AI-powered Q&A with LLM (RAG)
69
+ ebrain query --llm "What is the main idea of River AI's product?"
70
+ ebrain query --llm "What are Mario Zechner's main views on game development?"
71
+
72
+ # Smart ingest: compile + timeline + entity links in one command
73
+ ebrain smart-ingest companies/river-ai --file article.md
74
+
68
75
  # Start MCP Server (for AI tool integration)
69
76
  ebrain serve
70
77
  ```
@@ -82,12 +89,53 @@ Edit `~/.ebrain/settings.json`:
82
89
  "model": "...",
83
90
  "dimensions": 1024,
84
91
  "apiKey": "sk-..."
92
+ },
93
+ "llm": {
94
+ "baseURL": "https://dashscope.aliyuncs.com/compatible-mode/v1",
95
+ "model": "qwen-plus",
96
+ "apiKey": "sk-..."
97
+ },
98
+ "extraction": {
99
+ "confidenceThreshold": 0.7 // Entity extraction confidence (0~1)
85
100
  }
86
101
  }
87
102
  ```
88
103
 
89
104
  Run `ebrain config` to view active configuration. See [docs/ebrain-cli.md](docs/ebrain-cli.md) for details.
90
105
 
106
+ ## AI Q&A (RAG)
107
+
108
+ Ask natural language questions and get answers based on your knowledge base:
109
+
110
+ ```bash
111
+ # Basic Q&A
112
+ ex-brain query --llm "What is the main idea of River AI's product?"
113
+
114
+ # Control context depth
115
+ ebrain query --llm "What happened in Q4?" --context-limit 3
116
+ ```
117
+
118
+ How it works:
119
+
120
+ 1. **Semantic Search** — Finds top matching pages for your question
121
+ 2. **Multi-Layer Context Collection** — Builds rich context from:
122
+ - **Page Content** — Compiled truth + timeline for each matched page
123
+ - **Raw Documents** — Original imported documents (via `raw set`)
124
+ - **Linked Pages** — Incoming and outgoing linked pages, filtered by semantic relevance to the question
125
+ 3. **LLM Synthesis** — Generates a sourced answer with `[[slug|title]]` citations
126
+
127
+ Configure LLM in `~/.ebrain/settings.json`:
128
+
129
+ ```json
130
+ {
131
+ "llm": {
132
+ "baseURL": "https://dashscope.aliyuncs.com/compatible-mode/v1",
133
+ "model": "qwen-plus",
134
+ "apiKey": "sk-..."
135
+ }
136
+ }
137
+ ```
138
+
91
139
  ## Development
92
140
 
93
141
  ```bash
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ex-brain",
3
- "version": "0.1.1",
3
+ "version": "0.2.0",
4
4
  "description": "CLI personal knowledge base powered by seekdb",
5
5
  "module": "src/cli.ts",
6
6
  "type": "module",
@@ -29,6 +29,7 @@
29
29
  "@seekdb/openai": "1.2.0",
30
30
  "commander": "^14.0.3",
31
31
  "gray-matter": "^4.0.3",
32
+ "jsonrepair": "^3.13.3",
32
33
  "pinyin-pro": "^3.28.0",
33
34
  "seekdb": "^1.2.0",
34
35
  "yaml": "^2.8.3",
@@ -1,5 +1,7 @@
1
1
  import type { ResolvedLLM } from "../settings";
2
2
  import type { TimelineEntry } from "../types";
3
+ import { callLLM, resolveApiKey } from "./llm-client";
4
+ import { jsonrepair } from "jsonrepair";
3
5
 
4
6
  // ---------------------------------------------------------------------------
5
7
  // Types
@@ -116,8 +118,7 @@ async function analyzeNewInfo(
116
118
  llm: ResolvedLLM,
117
119
  ): Promise<FactAnalysis> {
118
120
  const prompt = buildAnalysisPrompt(input);
119
-
120
- const resp = await callLLM(llm, prompt, 2048);
121
+ const resp = await callLLM(llm, prompt, 2048, COMPILER_SYSTEM_PROMPT);
121
122
  const parsed = parseAnalysisResponse(resp);
122
123
 
123
124
  return parsed;
@@ -174,8 +175,7 @@ async function smartMergeTruth(
174
175
  llm: ResolvedLLM,
175
176
  ): Promise<{ compiledTruth: string; changed: boolean; changeType: CompileResult["changeType"]; changeSummary: string }> {
176
177
  const prompt = buildMergePrompt(input, analysis);
177
-
178
- const resp = await callLLM(llm, prompt, 4096);
178
+ const resp = await callLLM(llm, prompt, 4096, COMPILER_SYSTEM_PROMPT);
179
179
  const result = parseMergeResponse(resp);
180
180
 
181
181
  return result;
@@ -192,7 +192,7 @@ async function extractTimelineFromInfo(
192
192
  // Only extract timeline for significant events
193
193
  if (analysis.infoType === "status_update" || analysis.infoType === "new_event") {
194
194
  const prompt = buildTimelinePrompt(input, analysis);
195
- const resp = await callLLM(llm, prompt, 1024);
195
+ const resp = await callLLM(llm, prompt, 1024, COMPILER_SYSTEM_PROMPT);
196
196
  return parseTimelineResponse(resp, input.pageContext?.slug ?? "");
197
197
  }
198
198
 
@@ -276,7 +276,7 @@ Rewrite the compiled truth. Output ONLY JSON with this schema:
276
276
  {
277
277
  "compiledTruth": "the full rewritten compiled truth content (markdown format)",
278
278
  "changed": true|false,
279
- "changeType": "update|replace|conflict|none",
279
+ "changeType": "append|update|replace|conflict|none",
280
280
  "changeSummary": "human-readable summary of what changed"
281
281
  }
282
282
 
@@ -338,45 +338,8 @@ Rules:
338
338
  // LLM Call
339
339
  // ---------------------------------------------------------------------------
340
340
 
341
- async function callLLM(llm: ResolvedLLM, prompt: string, maxTokens: number): Promise<string> {
342
- const apiKey = resolveApiKey(llm);
343
- if (!apiKey) return "";
344
-
345
- const body = {
346
- model: llm.model,
347
- messages: [
348
- { role: "system", content: "You are a knowledge compilation assistant. You analyze information, extract facts, and maintain structured compiled truth. Always output valid JSON. Be precise and factual." },
349
- { role: "user", content: prompt },
350
- ],
351
- temperature: 0.1,
352
- max_tokens: maxTokens,
353
- enable_thinking: false,
354
- };
355
-
356
- try {
357
- const resp = await fetch(
358
- llm.baseURL.endsWith("/") ? llm.baseURL + "chat/completions" : llm.baseURL + "/chat/completions",
359
- {
360
- method: "POST",
361
- headers: { "Content-Type": "application/json", Authorization: `Bearer ${apiKey}` },
362
- body: JSON.stringify(body),
363
- },
364
- );
365
-
366
- if (!resp.ok) {
367
- const text = await resp.text();
368
- console.warn(`[compiler] LLM call failed (${resp.status}): ${text.slice(0, 200)}`);
369
- return "";
370
- }
371
-
372
- const data = await resp.json();
373
- return data.choices?.[0]?.message?.content?.trim() ?? "";
374
- } catch (error) {
375
- const msg = error instanceof Error ? error.message : String(error);
376
- console.warn(`[compiler] LLM call error: ${msg}`);
377
- return "";
378
- }
379
- }
341
+ // Use callLLM from llm-client module with custom system prompt
342
+ const COMPILER_SYSTEM_PROMPT = "You are a knowledge compilation assistant. You analyze information, extract facts, and maintain structured compiled truth. Always output valid JSON. Be precise and factual.";
380
343
 
381
344
  // ---------------------------------------------------------------------------
382
345
  // Response Parsing
@@ -389,7 +352,9 @@ function parseAnalysisResponse(resp: string): FactAnalysis {
389
352
  }
390
353
 
391
354
  try {
392
- const parsed = JSON.parse(match[0]) as Record<string, unknown>;
355
+ // Use jsonrepair to fix common LLM JSON issues
356
+ const repaired = jsonrepair(match[0]);
357
+ const parsed = JSON.parse(repaired) as Record<string, unknown>;
393
358
 
394
359
  const facts: ExtractedFact[] = [];
395
360
  const rawFacts = parsed.facts as unknown[] ?? [];
@@ -429,7 +394,9 @@ function parseMergeResponse(resp: string): { compiledTruth: string; changed: boo
429
394
  }
430
395
 
431
396
  try {
432
- const parsed = JSON.parse(match[0]) as Record<string, unknown>;
397
+ // Use jsonrepair to fix common LLM JSON issues
398
+ const repaired = jsonrepair(match[0]);
399
+ const parsed = JSON.parse(repaired) as Record<string, unknown>;
433
400
  return {
434
401
  compiledTruth: String(parsed.compiledTruth ?? ""),
435
402
  changed: Boolean(parsed.changed),
@@ -451,7 +418,9 @@ function parseTimelineResponse(resp: string, pageSlug: string): TimelineEntry[]
451
418
  if (!match) return [];
452
419
 
453
420
  try {
454
- const parsed = JSON.parse(match[0]) as unknown[];
421
+ // Use jsonrepair to fix common LLM JSON issues
422
+ const repaired = jsonrepair(match[0]);
423
+ const parsed = JSON.parse(repaired) as unknown[];
455
424
  const entries: TimelineEntry[] = [];
456
425
 
457
426
  for (const e of parsed) {
@@ -490,11 +459,7 @@ function normalizeChangeType(raw: string): CompileResult["changeType"] {
490
459
  return "none";
491
460
  }
492
461
 
493
- function resolveApiKey(llm: ResolvedLLM): string {
494
- if (llm.apiKey) return llm.apiKey;
495
- if (llm.apiKeyEnv) return process.env[llm.apiKeyEnv] ?? "";
496
- return "";
497
- }
462
+ // resolveApiKey is now imported from llm-client module
498
463
 
499
464
  function appendFact(current: string, newInfo: string, source: string): string {
500
465
  const timestamp = new Date().toISOString().slice(0, 10);
@@ -1,4 +1,6 @@
1
- import { ResolvedLLM } from "../settings";
1
+ import type { ResolvedLLM } from "../settings";
2
+ import { callLLM, resolveApiKey, isLLMConfigured } from "./llm-client";
3
+ import { jsonrepair } from "jsonrepair";
2
4
 
3
5
  // ---------------------------------------------------------------------------
4
6
  // Types
@@ -75,10 +77,15 @@ const RELATION_TYPES = [
75
77
  /**
76
78
  * Use the configured LLM to extract entity relationships from text.
77
79
  * Returns a list of relations with relation type, confidence, and context.
80
+ * Filters out relations with confidence below the threshold (default: 0.7).
78
81
  */
79
82
  export async function extractRelations(
80
83
  content: string,
81
84
  llm: ResolvedLLM,
85
+ options?: {
86
+ /** Minimum confidence threshold (0-1). Relations below this are filtered out. Default: 0.7 */
87
+ confidenceThreshold?: number;
88
+ },
82
89
  ): Promise<ExtractionResult> {
83
90
  const trimmed = content.trim();
84
91
  if (!trimmed) return [];
@@ -91,65 +98,29 @@ export async function extractRelations(
91
98
  context = trimmed.slice(0, 4000) + "\n\n...\n\n" + trimmed.slice(-1000);
92
99
  }
93
100
 
94
- const apiKey = resolveApiKey(llm);
95
- if (!apiKey) return [];
96
-
97
- const body = {
98
- model: llm.model,
99
- messages: [
100
- {
101
- role: "system",
102
- content:
103
- "You are a knowledge graph extraction assistant. " +
104
- "Identify relationships between named entities. " +
105
- "For each relationship, provide: from entity, to entity, relation type, confidence score, and exact context sentence. " +
106
- `Allowed relation types: ${RELATION_TYPES}. ` +
107
- "Output ONLY a JSON array. Schema: " +
108
- '{ "type": "relation", "from": {"name": "...", "type": "..."}, ' +
109
- '"to": {"name": "...", "type": "..."}, "relation": "...", "context": "...", "confidence": 0.9 }. ' +
110
- "Output ONLY the JSON array. /no_think",
111
- },
112
- {
113
- role: "user",
114
- content: `Extract relationships from:\n\n${context}`,
115
- },
116
- ],
117
- temperature: 0.1,
118
- max_tokens: 1024,
119
- enable_thinking: false,
120
- };
101
+ if (!isLLMConfigured(llm)) return [];
121
102
 
122
- try {
123
- const resp = await fetch(
124
- llm.baseURL.endsWith("/")
125
- ? llm.baseURL + "chat/completions"
126
- : llm.baseURL + "/chat/completions",
127
- {
128
- method: "POST",
129
- headers: {
130
- "Content-Type": "application/json",
131
- Authorization: `Bearer ${apiKey}`,
132
- },
133
- body: JSON.stringify(body),
134
- },
135
- );
136
-
137
- if (!resp.ok) {
138
- const text = await resp.text();
139
- console.warn(
140
- `[ebrain] Entity extraction failed (${resp.status}): ${text.slice(0, 200)}`,
141
- );
142
- return [];
143
- }
103
+ const systemPrompt =
104
+ "You are a knowledge graph extraction assistant. " +
105
+ "Identify relationships between named entities. " +
106
+ "For each relationship, provide: from entity, to entity, relation type, confidence score, and exact context sentence. " +
107
+ `Allowed relation types: ${RELATION_TYPES}. ` +
108
+ "Output ONLY a JSON array. Schema: " +
109
+ '{ "type": "relation", "from": {"name": "...", "type": "..."}, ' +
110
+ '"to": {"name": "...", "type": "..."}, "relation": "...", "context": "...", "confidence": 0.9 }. ' +
111
+ "Output ONLY the JSON array. /no_think";
144
112
 
145
- const data = await resp.json();
146
- const raw = data.choices?.[0]?.message?.content?.trim();
147
- if (!raw) return [];
113
+ const resp = await callLLM(llm, `Extract relationships from:\n\n${context}`, 1024, systemPrompt);
114
+ if (!resp) return [];
148
115
 
149
- const match = raw.match(/\[[\s\S]*\]/);
150
- if (!match) return [];
116
+ // Extract JSON array from response
117
+ const match = resp.match(/\[[\s\S]*\]/);
118
+ if (!match) return [];
151
119
 
152
- const parsed = JSON.parse(match[0]) as unknown[];
120
+ try {
121
+ // Use jsonrepair to fix common LLM JSON issues (unterminated strings, etc.)
122
+ const repaired = jsonrepair(match[0]);
123
+ const parsed = JSON.parse(repaired) as unknown[];
153
124
  const relations: ExtractionResult = [];
154
125
 
155
126
  for (const item of parsed) {
@@ -175,7 +146,9 @@ export async function extractRelations(
175
146
  });
176
147
  }
177
148
 
178
- return relations;
149
+ // Filter by confidence threshold (default 0.7)
150
+ const threshold = options?.confidenceThreshold ?? 0.7;
151
+ return relations.filter((r) => r.confidence >= threshold);
179
152
  } catch (error) {
180
153
  const msg = error instanceof Error ? error.message : String(error);
181
154
  console.warn(`[ebrain] Entity extraction error: ${msg}`);
@@ -219,8 +192,4 @@ export function normalizeRelationType(raw: string): RelationType {
219
192
  return "related_to";
220
193
  }
221
194
 
222
- function resolveApiKey(llm: ResolvedLLM): string {
223
- if (llm.apiKey) return llm.apiKey;
224
- if (llm.apiKeyEnv) return process.env[llm.apiKeyEnv] ?? "";
225
- return "";
226
- }
195
+
@@ -0,0 +1,291 @@
1
+ /**
2
+ * Unified LLM Client Module
3
+ *
4
+ * Provides centralized LLM calling functionality with:
5
+ * - Retry mechanism (exponential backoff, max 3 retries)
6
+ * - Error classification (APIError, TimeoutError, RateLimitError)
7
+ * - Timeout control
8
+ * - Unified API key resolution
9
+ */
10
+
11
+ import type { ResolvedLLM } from "../settings";
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Error Classes
15
+ // ---------------------------------------------------------------------------
16
+
17
+ export class LLMError extends Error {
18
+ constructor(
19
+ message: string,
20
+ public readonly code: string,
21
+ public readonly statusCode?: number,
22
+ public readonly retryable: boolean = false,
23
+ ) {
24
+ super(message);
25
+ this.name = "LLMError";
26
+ }
27
+ }
28
+
29
+ export class APIError extends LLMError {
30
+ constructor(message: string, statusCode?: number) {
31
+ super(message, "API_ERROR", statusCode, false);
32
+ this.name = "APIError";
33
+ }
34
+ }
35
+
36
+ export class TimeoutError extends LLMError {
37
+ constructor(message: string = "LLM request timed out") {
38
+ super(message, "TIMEOUT_ERROR", undefined, true);
39
+ this.name = "TimeoutError";
40
+ }
41
+ }
42
+
43
+ export class RateLimitError extends LLMError {
44
+ constructor(message: string = "Rate limit exceeded", retryAfter?: number) {
45
+ super(message, "RATE_LIMIT_ERROR", 429, true);
46
+ this.name = "RateLimitError";
47
+ this.retryAfter = retryAfter;
48
+ }
49
+ readonly retryAfter?: number;
50
+ }
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // Configuration
54
+ // ---------------------------------------------------------------------------
55
+
56
+ export interface LLMClientConfig {
57
+ /** Maximum number of retry attempts (default: 3) */
58
+ maxRetries?: number;
59
+ /** Base delay for exponential backoff in ms (default: 1000) */
60
+ baseDelay?: number;
61
+ /** Maximum delay cap in ms (default: 10000) */
62
+ maxDelay?: number;
63
+ /** Request timeout in ms (default: 60000) */
64
+ timeout?: number;
65
+ }
66
+
67
+ const DEFAULT_CONFIG: Required<LLMClientConfig> = {
68
+ maxRetries: 3,
69
+ baseDelay: 1000,
70
+ maxDelay: 10000,
71
+ timeout: 60000,
72
+ };
73
+
74
+ // ---------------------------------------------------------------------------
75
+ // API Key Resolution
76
+ // ---------------------------------------------------------------------------
77
+
78
+ /**
79
+ * Resolve API key from LLM configuration.
80
+ * Checks direct apiKey first, then falls back to environment variable.
81
+ */
82
+ export function resolveApiKey(llm: ResolvedLLM): string {
83
+ if (llm.apiKey) return llm.apiKey;
84
+ if (llm.apiKeyEnv) return process.env[llm.apiKeyEnv] ?? "";
85
+ return "";
86
+ }
87
+
88
+ /**
89
+ * Check if LLM is properly configured with an API key.
90
+ */
91
+ export function isLLMConfigured(llm: ResolvedLLM): boolean {
92
+ return !!resolveApiKey(llm);
93
+ }
94
+
95
+ // ---------------------------------------------------------------------------
96
+ // LLM Call with Retry
97
+ // ---------------------------------------------------------------------------
98
+
99
+ /**
100
+ * Call LLM with unified fetch, retry mechanism, error handling, and timeout.
101
+ *
102
+ * @param llm - Resolved LLM configuration
103
+ * @param prompt - Prompt to send to the LLM
104
+ * @param maxTokens - Maximum tokens in response
105
+ * @param systemPrompt - Optional system prompt (default provided)
106
+ * @param config - Optional client configuration
107
+ * @returns Raw response text from LLM, or empty string on failure
108
+ */
109
+ export async function callLLM(
110
+ llm: ResolvedLLM,
111
+ prompt: string,
112
+ maxTokens: number,
113
+ systemPrompt: string = "You are a helpful assistant. Always output valid JSON.",
114
+ config: LLMClientConfig = {},
115
+ ): Promise<string> {
116
+ const apiKey = resolveApiKey(llm);
117
+ if (!apiKey) {
118
+ return "";
119
+ }
120
+
121
+ const cfg = { ...DEFAULT_CONFIG, ...config };
122
+ const url = llm.baseURL.endsWith("/")
123
+ ? llm.baseURL + "chat/completions"
124
+ : llm.baseURL + "/chat/completions";
125
+
126
+ const body = {
127
+ model: llm.model,
128
+ messages: [
129
+ { role: "system", content: systemPrompt },
130
+ { role: "user", content: prompt },
131
+ ],
132
+ temperature: 0.1,
133
+ max_tokens: maxTokens,
134
+ enable_thinking: false,
135
+ };
136
+
137
+ let lastError: LLMError | null = null;
138
+
139
+ for (let attempt = 0; attempt <= cfg.maxRetries; attempt++) {
140
+ try {
141
+ const response = await callWithTimeout(
142
+ fetch(url, {
143
+ method: "POST",
144
+ headers: {
145
+ "Content-Type": "application/json",
146
+ Authorization: `Bearer ${apiKey}`,
147
+ },
148
+ body: JSON.stringify(body),
149
+ }),
150
+ cfg.timeout,
151
+ );
152
+
153
+ if (!response.ok) {
154
+ const text = await response.text().catch(() => "");
155
+ lastError = classifyError(response.status, text, response.statusText);
156
+
157
+ // Don't retry for non-retryable errors
158
+ if (!lastError.retryable || attempt === cfg.maxRetries) {
159
+ console.warn(`[llm-client] LLM call failed after ${attempt + 1} attempt(s): ${lastError.message}`);
160
+ return "";
161
+ }
162
+
163
+ const delay = calculateBackoff(attempt, cfg.baseDelay, cfg.maxDelay, (lastError as RateLimitError).retryAfter);
164
+ console.warn(`[llm-client] Retrying after ${delay}ms (attempt ${attempt + 1}/${cfg.maxRetries})`);
165
+ await sleep(delay);
166
+ continue;
167
+ }
168
+
169
+ const data = await response.json() as { choices?: Array<{ message?: { content?: string } }> };
170
+ return data.choices?.[0]?.message?.content?.trim() ?? "";
171
+
172
+ } catch (error) {
173
+ // Classify the error
174
+ if (error instanceof TimeoutError) {
175
+ lastError = error;
176
+ } else if (error instanceof LLMError) {
177
+ lastError = error;
178
+ } else {
179
+ // Unknown error - wrap it
180
+ const msg = error instanceof Error ? error.message : String(error);
181
+ lastError = new APIError(`Unexpected error: ${msg}`);
182
+ }
183
+
184
+ // Don't retry if we've exhausted attempts
185
+ if (attempt === cfg.maxRetries) {
186
+ console.warn(`[llm-client] LLM call failed after ${attempt + 1} attempt(s): ${lastError.message}`);
187
+ return "";
188
+ }
189
+
190
+ // Check if error is retryable
191
+ if (!lastError.retryable) {
192
+ console.warn(`[llm-client] Non-retryable error: ${lastError.message}`);
193
+ return "";
194
+ }
195
+
196
+ const delay = calculateBackoff(attempt, cfg.baseDelay, cfg.maxDelay);
197
+ console.warn(`[llm-client] Retrying after ${delay}ms (attempt ${attempt + 1}/${cfg.maxRetries}): ${lastError.message}`);
198
+ await sleep(delay);
199
+ }
200
+ }
201
+
202
+ return "";
203
+ }
204
+
205
+ /**
206
+ * Classify HTTP error into appropriate error type.
207
+ */
208
+ function classifyError(status: number, responseText: string, statusText: string): LLMError {
209
+ const truncatedText = responseText.slice(0, 200);
210
+
211
+ switch (status) {
212
+ case 429:
213
+ // Try to extract retry-after from response
214
+ const retryAfterMatch = responseText.match(/retry[- ]?after["']?\s*[:=]\s*(\d+)/i);
215
+ const retryAfter = retryAfterMatch?.[1] ? parseInt(retryAfterMatch[1], 10) : undefined;
216
+ return new RateLimitError(`Rate limited: ${statusText} - ${truncatedText}`, retryAfter);
217
+
218
+ case 408:
219
+ case 504:
220
+ return new TimeoutError(`Request timeout: ${statusText}`);
221
+
222
+ case 500:
223
+ case 502:
224
+ case 503:
225
+ return new APIError(`Server error (${status}): ${truncatedText}`, status);
226
+
227
+ default:
228
+ if (status >= 500) {
229
+ return new APIError(`Server error (${status}): ${truncatedText}`, status);
230
+ }
231
+ if (status >= 400) {
232
+ return new APIError(`Client error (${status}): ${truncatedText}`, status);
233
+ }
234
+ return new APIError(`HTTP error (${status}): ${truncatedText}`, status);
235
+ }
236
+ }
237
+
238
+ /**
239
+ * Calculate exponential backoff delay with jitter.
240
+ */
241
+ function calculateBackoff(
242
+ attempt: number,
243
+ baseDelay: number,
244
+ maxDelay: number,
245
+ retryAfter?: number,
246
+ ): number {
247
+ // If server specified retry-after, use that
248
+ if (retryAfter && retryAfter > 0) {
249
+ return Math.min(retryAfter * 1000, maxDelay);
250
+ }
251
+
252
+ // Exponential backoff: baseDelay * 2^attempt
253
+ const exponentialDelay = baseDelay * Math.pow(2, attempt);
254
+
255
+ // Add jitter (±25%)
256
+ const jitter = exponentialDelay * 0.25 * (Math.random() * 2 - 1);
257
+
258
+ return Math.min(Math.round(exponentialDelay + jitter), maxDelay);
259
+ }
260
+
261
+ /**
262
+ * Sleep for specified milliseconds.
263
+ */
264
+ function sleep(ms: number): Promise<void> {
265
+ return new Promise((resolve) => setTimeout(resolve, ms));
266
+ }
267
+
268
+ /**
269
+ * Wrap fetch with timeout using Promise.race.
270
+ */
271
+ async function callWithTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
272
+ let timeoutId: NodeJS.Timeout;
273
+
274
+ const timeoutPromise = new Promise<never>((_, reject) => {
275
+ timeoutId = setTimeout(() => {
276
+ reject(new TimeoutError(`Request timed out after ${timeoutMs}ms`));
277
+ }, timeoutMs);
278
+ });
279
+
280
+ try {
281
+ return await Promise.race([promise, timeoutPromise]);
282
+ } finally {
283
+ clearTimeout(timeoutId!);
284
+ }
285
+ }
286
+
287
+ // ---------------------------------------------------------------------------
288
+ // Re-export settings type for convenience
289
+ // ---------------------------------------------------------------------------
290
+
291
+ export type { ResolvedLLM } from "../settings";