ex-brain 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -544,10 +544,14 @@ Examples:
544
544
  }
545
545
 
546
546
  // Collect multi-layer context (primary + raw data + linked pages scored by relevance)
547
- progress.update(`Loading pages, raw documents, and linked content...`);
548
547
  // ~100KB char budget ≈ 25K tokens, safe for most models
549
548
  const MAX_CONTEXT_CHARS = 100_000;
550
- const { sections, totalChars, stats } = await collectContextForLLM(repo, topHits, question, MAX_CONTEXT_CHARS);
549
+ const ctxStart = Date.now();
550
+ progress.update(`Loading page content...`);
551
+ const { sections, totalChars, stats } = await collectContextForLLM(repo, topHits, question, MAX_CONTEXT_CHARS, (stage) => {
552
+ progress.update(`Loading ${stage}...`);
553
+ });
554
+ const ctxDuration = formatDuration(Date.now() - ctxStart);
551
555
 
552
556
  if (sections.length === 0) {
553
557
  progress.stop();
@@ -556,16 +560,18 @@ Examples:
556
560
  return;
557
561
  }
558
562
 
559
- progress.update(`Generating answer from ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)...`);
563
+ progress.succeed(`Loaded ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s) (${ctxDuration})`);
560
564
  const startTime = Date.now();
561
565
 
562
- const answer = await generateAnswerWithContext(question, sections, stats, settings.llm);
566
+ const { answer, ok } = await generateAnswerWithStream(question, sections, stats, settings.llm);
563
567
 
564
- const duration = formatDuration(Date.now() - startTime);
565
- progress.succeed(`Answer generated (${duration}, context: ${(totalChars / 1024).toFixed(1)}KB)`);
568
+ if (!ok) {
569
+ // If streaming failed, answer contains the error message
570
+ console.log(answer);
571
+ return;
572
+ }
566
573
 
567
- // Output answer as markdown
568
- console.log("\n" + answer);
574
+ const duration = formatDuration(Date.now() - startTime);
569
575
 
570
576
  // Show sources breakdown
571
577
  console.log("\n---\n**Sources:**\n");
@@ -1093,7 +1099,7 @@ Examples:
1093
1099
  }
1094
1100
 
1095
1101
  for (let i = 0; i < fileData.length; i += BATCH_SIZE) {
1096
- const batch = fileData.slice(i, i + BATCH_SIZE).filter(d => d.tags.length === 0);
1102
+ const batch = fileData.slice(i, i + BATCH_SIZE);
1097
1103
  if (!jsonOut) {
1098
1104
  spinner.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, fileData.length)}/${fileData.length}`);
1099
1105
  }
@@ -1668,6 +1674,7 @@ async function collectContextForLLM(
1668
1674
  hits: Array<{ slug: string; title: string; score: number }>,
1669
1675
  question: string,
1670
1676
  maxChars: number,
1677
+ onProgress?: (stage: string) => void,
1671
1678
  ): Promise<{ sections: ContextSection[]; totalChars: number; stats: ContextStats }> {
1672
1679
  const sections: ContextSection[] = [];
1673
1680
  let totalChars = 0;
@@ -1699,10 +1706,15 @@ async function collectContextForLLM(
1699
1706
  return false;
1700
1707
  }
1701
1708
 
1709
+ // Cache pages fetched in Layer 1 to avoid redundant DB calls in Layer 3
1710
+ const pageCache = new Map<string, NonNullable<Awaited<ReturnType<typeof repo.getPage>>>>();
1711
+
1702
1712
  // Layer 1: Primary pages (compiledTruth + timeline)
1713
+ onProgress?.('page content');
1703
1714
  for (const hit of hits) {
1704
1715
  const page = await repo.getPage(hit.slug);
1705
1716
  if (!page) continue;
1717
+ pageCache.set(hit.slug, page);
1706
1718
 
1707
1719
  const parts: string[] = [];
1708
1720
  if (page.compiledTruth?.trim()) {
@@ -1726,6 +1738,7 @@ async function collectContextForLLM(
1726
1738
  }
1727
1739
 
1728
1740
  // Layer 2: Raw data (original documents)
1741
+ onProgress?.('raw documents');
1729
1742
  for (const hit of hits) {
1730
1743
  try {
1731
1744
  const rawRows = await repo.readRaw(hit.slug) as Array<{ source: string; data: unknown; fetchedAt?: string }>;
@@ -1752,8 +1765,9 @@ async function collectContextForLLM(
1752
1765
  }
1753
1766
  }
1754
1767
 
1755
- // Layer 3: Linked pages — SEMANTICALLY SCORED against the question
1756
- // Only include linked pages that are actually relevant to what the user asked.
1768
+ // Layer 3: Linked pages — score using cached data + keyword matching
1769
+ // No second repo.query() call needed reuse hits scores + keyword fallback
1770
+ onProgress?.('linked pages');
1757
1771
  const allLinkedSlugs = new Set<string>();
1758
1772
  for (const hit of hits) {
1759
1773
  try {
@@ -1767,26 +1781,27 @@ async function collectContextForLLM(
1767
1781
  }
1768
1782
 
1769
1783
  if (allLinkedSlugs.size > 0) {
1770
- // Score linked pages using broad semantic search.
1771
- // Query a wide set of pages, then intersect with linked slugs.
1772
- const broadLimit = Math.min(200, Math.max(50, allLinkedSlugs.size));
1773
- const broadResults = await repo.query(question, broadLimit);
1774
- const semanticScoreMap = new Map(broadResults.map(h => [h.slug, h.score]));
1775
-
1776
- // Keyword-based fallback scoring for linked pages without embedding scores
1784
+ // Score: use semantic scores from initial hits (already cached), keyword for rest
1785
+ const semanticScoreMap = new Map(hits.map(h => [h.slug, h.score]));
1777
1786
  const keywordScores = new Map<string, number>();
1778
1787
  for (const linkedSlug of allLinkedSlugs) {
1779
1788
  if (semanticScoreMap.has(linkedSlug)) continue;
1780
- try {
1789
+ // Use cached page if available, only fetch if not in cache
1790
+ const cached = pageCache.get(linkedSlug);
1791
+ if (cached) {
1792
+ const text = `${cached.title} ${cached.compiledTruth}`.slice(0, 2000);
1793
+ keywordScores.set(linkedSlug, computeKeywordRelevance(text, question));
1794
+ } else {
1781
1795
  const page = await repo.getPage(linkedSlug);
1782
1796
  if (page) {
1797
+ pageCache.set(linkedSlug, page);
1783
1798
  const text = `${page.title} ${page.compiledTruth}`.slice(0, 2000);
1784
1799
  keywordScores.set(linkedSlug, computeKeywordRelevance(text, question));
1785
1800
  }
1786
- } catch { /* ignore */ }
1801
+ }
1787
1802
  }
1788
1803
 
1789
- // Combine scores: semantic first, then keyword fallback
1804
+ // Combine scores
1790
1805
  const scoredLinked = [...allLinkedSlugs].map(slug => ({
1791
1806
  slug,
1792
1807
  score: semanticScoreMap.get(slug) ?? keywordScores.get(slug) ?? 0,
@@ -1798,11 +1813,11 @@ async function collectContextForLLM(
1798
1813
  .filter(s => s.score >= MIN_LINKED_SCORE)
1799
1814
  .sort((a, b) => b.score - a.score);
1800
1815
 
1801
- // Fetch content for relevant linked pages (respecting budget)
1816
+ // Add linked pages (already cached in pageCache, no extra fetch needed)
1802
1817
  for (const linked of relevantLinked) {
1803
1818
  if (totalChars >= maxChars) break;
1804
1819
 
1805
- const linkedPage = await repo.getPage(linked.slug);
1820
+ const linkedPage = pageCache.get(linked.slug);
1806
1821
  if (!linkedPage || !linkedPage.compiledTruth?.trim()) continue;
1807
1822
 
1808
1823
  const remaining = maxChars - totalChars;
@@ -1879,6 +1894,175 @@ interface ContextStats {
1879
1894
  /**
1880
1895
  * Build LLM prompt from collected context sections and generate answer.
1881
1896
  */
1897
+ async function generateAnswerWithStream(
1898
+ question: string,
1899
+ sections: ContextSection[],
1900
+ stats: ContextStats,
1901
+ llm: ResolvedLLM,
1902
+ ): Promise<{ answer: string; ok: boolean }> {
1903
+ const apiKey = llm.apiKey || process.env[llm.apiKeyEnv] || "";
1904
+ if (!apiKey) {
1905
+ return { answer: "Error: LLM API key not configured.", ok: false };
1906
+ }
1907
+
1908
+ if (sections.length === 0) {
1909
+ return { answer: "知识库中没有找到相关内容。", ok: true };
1910
+ }
1911
+
1912
+ // Build context sections with clear labels
1913
+ const contextParts: string[] = [];
1914
+ let sectionIndex = 0;
1915
+
1916
+ // Group by type for cleaner output
1917
+ const primarySections = sections.filter(s => s.type === 'primary');
1918
+ const rawSections = sections.filter(s => s.type === 'raw_data');
1919
+ const linkedSections = sections.filter(s => s.type === 'linked');
1920
+
1921
+ function renderSections(group: ContextSection[], header: string) {
1922
+ if (group.length === 0) return;
1923
+ contextParts.push(`## ${header}\n`);
1924
+ for (const s of group) {
1925
+ sectionIndex++;
1926
+ contextParts.push(`### [${sectionIndex}] ${s.title} — ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
1927
+ }
1928
+ contextParts.push('');
1929
+ }
1930
+
1931
+ renderSections(primarySections, '页面正文');
1932
+ renderSections(rawSections, '原始文档');
1933
+ renderSections(linkedSections, '关联页面');
1934
+
1935
+ const context = contextParts.join('\n');
1936
+
1937
+ const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
1938
+
1939
+ ## 问题
1940
+ ${question}
1941
+
1942
+ ## 知识库内容
1943
+
1944
+ ${context}
1945
+
1946
+ ## 回答要求
1947
+ - 仅基于提供的知识库内容回答,不要编造信息
1948
+ - 如果知识库中没有相关信息,请明确说明
1949
+ - 引用来源时使用 [[slug|标题]] 的格式
1950
+ - 使用清晰的 markdown 格式
1951
+ - 如果涉及时间线信息,请在回答中体现
1952
+ - 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
1953
+ - 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
1954
+
1955
+ ## 回答`;
1956
+
1957
+ // Disable thinking/reasoning mode to reduce latency
1958
+ const disableThinking: Record<string, unknown> = {};
1959
+ // OpenAI/compatible: extra_body for thinking disable
1960
+ // DeepSeek: use extra_body to disable thinking
1961
+ // Many providers ignore unknown fields, so this is safe to always include
1962
+ const extraBody: Record<string, unknown> = {
1963
+ thinking: { type: "disabled" },
1964
+ };
1965
+
1966
+ try {
1967
+ const url = llm.baseURL.endsWith("/") ? llm.baseURL + "chat/completions" : llm.baseURL + "/chat/completions";
1968
+
1969
+ // Show thinking indicator while waiting for first token
1970
+ process.stderr.write(`\x1b[35m💭\x1b[0m \x1b[2mConnecting to ${llm.model}...\x1b[0m\n`);
1971
+
1972
+ const resp = await fetch(
1973
+ url,
1974
+ {
1975
+ method: "POST",
1976
+ headers: {
1977
+ "Content-Type": "application/json",
1978
+ Authorization: `Bearer ${apiKey}`,
1979
+ },
1980
+ body: JSON.stringify({
1981
+ model: llm.model,
1982
+ stream: true,
1983
+ messages: [
1984
+ {
1985
+ role: "system",
1986
+ content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
1987
+ },
1988
+ { role: "user", content: prompt },
1989
+ ],
1990
+ temperature: 0.3,
1991
+ max_tokens: 4096,
1992
+ ...disableThinking,
1993
+ extra_body: extraBody,
1994
+ // Also send thinking disable as top-level for providers that support it
1995
+ thinking: { type: "disabled" },
1996
+ }),
1997
+ // Abort if no response within 30s
1998
+ signal: AbortSignal.timeout(30_000),
1999
+ },
2000
+ );
2001
+
2002
+ if (!resp.ok) {
2003
+ const text = await resp.text();
2004
+ // Clear the thinking indicator line
2005
+ process.stderr.write("\r\x1b[K");
2006
+ return { answer: `Error: LLM API failed (${resp.status}): ${text.slice(0, 200)}`, ok: false };
2007
+ }
2008
+
2009
+ if (!resp.body) {
2010
+ process.stderr.write("\r\x1b[K");
2011
+ return { answer: "Error: No response body from LLM API.", ok: false };
2012
+ }
2013
+
2014
+ // Clear thinking indicator, show streaming status
2015
+ process.stderr.write("\r\x1b[K");
2016
+ process.stderr.write(`\x1b[32m✦\x1b[0m \x1b[2mStreaming response...\x1b[0m\n`);
2017
+
2018
+ // Stream the response
2019
+ const reader = resp.body.getReader();
2020
+ const decoder = new TextDecoder();
2021
+ let fullAnswer = "";
2022
+ let buffer = "";
2023
+ let tokenCount = 0;
2024
+
2025
+ while (true) {
2026
+ const { done, value } = await reader.read();
2027
+ if (done) break;
2028
+
2029
+ buffer += decoder.decode(value, { stream: true });
2030
+ const lines = buffer.split("\n");
2031
+ // Keep the last incomplete line in buffer
2032
+ buffer = lines.pop() || "";
2033
+
2034
+ for (const line of lines) {
2035
+ const trimmed = line.trim();
2036
+ if (!trimmed || trimmed === "data: [DONE]") continue;
2037
+ if (!trimmed.startsWith("data: ")) continue;
2038
+
2039
+ try {
2040
+ const json = JSON.parse(trimmed.slice(6));
2041
+ const content = json.choices?.[0]?.delta?.content;
2042
+ if (content) {
2043
+ process.stdout.write(content);
2044
+ fullAnswer += content;
2045
+ tokenCount++;
2046
+ }
2047
+ } catch {
2048
+ // Skip malformed SSE data
2049
+ }
2050
+ }
2051
+ }
2052
+
2053
+ // Add a newline after streaming completes
2054
+ process.stdout.write("\n");
2055
+
2056
+ return { answer: fullAnswer || "(No answer generated)", ok: true };
2057
+ } catch (error) {
2058
+ const msg = error instanceof Error ? error.message : String(error);
2059
+ return { answer: `Error: ${msg}`, ok: false };
2060
+ }
2061
+ }
2062
+
2063
+ /**
2064
+ * @deprecated Use generateAnswerWithStream instead
2065
+ */
1882
2066
  async function generateAnswerWithContext(
1883
2067
  question: string,
1884
2068
  sections: ContextSection[],
@@ -1,291 +0,0 @@
1
- /**
2
- * Unified LLM Client Module
3
- *
4
- * Provides centralized LLM calling functionality with:
5
- * - Retry mechanism (exponential backoff, max 3 retries)
6
- * - Error classification (APIError, TimeoutError, RateLimitError)
7
- * - Timeout control
8
- * - Unified API key resolution
9
- */
10
-
11
- import type { ResolvedLLM } from "../settings";
12
-
13
- // ---------------------------------------------------------------------------
14
- // Error Classes
15
- // ---------------------------------------------------------------------------
16
-
17
- export class LLMError extends Error {
18
- constructor(
19
- message: string,
20
- public readonly code: string,
21
- public readonly statusCode?: number,
22
- public readonly retryable: boolean = false,
23
- ) {
24
- super(message);
25
- this.name = "LLMError";
26
- }
27
- }
28
-
29
- export class APIError extends LLMError {
30
- constructor(message: string, statusCode?: number) {
31
- super(message, "API_ERROR", statusCode, false);
32
- this.name = "APIError";
33
- }
34
- }
35
-
36
- export class TimeoutError extends LLMError {
37
- constructor(message: string = "LLM request timed out") {
38
- super(message, "TIMEOUT_ERROR", undefined, true);
39
- this.name = "TimeoutError";
40
- }
41
- }
42
-
43
- export class RateLimitError extends LLMError {
44
- constructor(message: string = "Rate limit exceeded", retryAfter?: number) {
45
- super(message, "RATE_LIMIT_ERROR", 429, true);
46
- this.name = "RateLimitError";
47
- this.retryAfter = retryAfter;
48
- }
49
- readonly retryAfter?: number;
50
- }
51
-
52
- // ---------------------------------------------------------------------------
53
- // Configuration
54
- // ---------------------------------------------------------------------------
55
-
56
- export interface LLMClientConfig {
57
- /** Maximum number of retry attempts (default: 3) */
58
- maxRetries?: number;
59
- /** Base delay for exponential backoff in ms (default: 1000) */
60
- baseDelay?: number;
61
- /** Maximum delay cap in ms (default: 10000) */
62
- maxDelay?: number;
63
- /** Request timeout in ms (default: 60000) */
64
- timeout?: number;
65
- }
66
-
67
- const DEFAULT_CONFIG: Required<LLMClientConfig> = {
68
- maxRetries: 3,
69
- baseDelay: 1000,
70
- maxDelay: 10000,
71
- timeout: 60000,
72
- };
73
-
74
- // ---------------------------------------------------------------------------
75
- // API Key Resolution
76
- // ---------------------------------------------------------------------------
77
-
78
- /**
79
- * Resolve API key from LLM configuration.
80
- * Checks direct apiKey first, then falls back to environment variable.
81
- */
82
- export function resolveApiKey(llm: ResolvedLLM): string {
83
- if (llm.apiKey) return llm.apiKey;
84
- if (llm.apiKeyEnv) return process.env[llm.apiKeyEnv] ?? "";
85
- return "";
86
- }
87
-
88
- /**
89
- * Check if LLM is properly configured with an API key.
90
- */
91
- export function isLLMConfigured(llm: ResolvedLLM): boolean {
92
- return !!resolveApiKey(llm);
93
- }
94
-
95
- // ---------------------------------------------------------------------------
96
- // LLM Call with Retry
97
- // ---------------------------------------------------------------------------
98
-
99
- /**
100
- * Call LLM with unified fetch, retry mechanism, error handling, and timeout.
101
- *
102
- * @param llm - Resolved LLM configuration
103
- * @param prompt - Prompt to send to the LLM
104
- * @param maxTokens - Maximum tokens in response
105
- * @param systemPrompt - Optional system prompt (default provided)
106
- * @param config - Optional client configuration
107
- * @returns Raw response text from LLM, or empty string on failure
108
- */
109
- export async function callLLM(
110
- llm: ResolvedLLM,
111
- prompt: string,
112
- maxTokens: number,
113
- systemPrompt: string = "You are a helpful assistant. Always output valid JSON.",
114
- config: LLMClientConfig = {},
115
- ): Promise<string> {
116
- const apiKey = resolveApiKey(llm);
117
- if (!apiKey) {
118
- return "";
119
- }
120
-
121
- const cfg = { ...DEFAULT_CONFIG, ...config };
122
- const url = llm.baseURL.endsWith("/")
123
- ? llm.baseURL + "chat/completions"
124
- : llm.baseURL + "/chat/completions";
125
-
126
- const body = {
127
- model: llm.model,
128
- messages: [
129
- { role: "system", content: systemPrompt },
130
- { role: "user", content: prompt },
131
- ],
132
- temperature: 0.1,
133
- max_tokens: maxTokens,
134
- enable_thinking: false,
135
- };
136
-
137
- let lastError: LLMError | null = null;
138
-
139
- for (let attempt = 0; attempt <= cfg.maxRetries; attempt++) {
140
- try {
141
- const response = await callWithTimeout(
142
- fetch(url, {
143
- method: "POST",
144
- headers: {
145
- "Content-Type": "application/json",
146
- Authorization: `Bearer ${apiKey}`,
147
- },
148
- body: JSON.stringify(body),
149
- }),
150
- cfg.timeout,
151
- );
152
-
153
- if (!response.ok) {
154
- const text = await response.text().catch(() => "");
155
- lastError = classifyError(response.status, text, response.statusText);
156
-
157
- // Don't retry for non-retryable errors
158
- if (!lastError.retryable || attempt === cfg.maxRetries) {
159
- console.warn(`[llm-client] LLM call failed after ${attempt + 1} attempt(s): ${lastError.message}`);
160
- return "";
161
- }
162
-
163
- const delay = calculateBackoff(attempt, cfg.baseDelay, cfg.maxDelay, (lastError as RateLimitError).retryAfter);
164
- console.warn(`[llm-client] Retrying after ${delay}ms (attempt ${attempt + 1}/${cfg.maxRetries})`);
165
- await sleep(delay);
166
- continue;
167
- }
168
-
169
- const data = await response.json() as { choices?: Array<{ message?: { content?: string } }> };
170
- return data.choices?.[0]?.message?.content?.trim() ?? "";
171
-
172
- } catch (error) {
173
- // Classify the error
174
- if (error instanceof TimeoutError) {
175
- lastError = error;
176
- } else if (error instanceof LLMError) {
177
- lastError = error;
178
- } else {
179
- // Unknown error - wrap it
180
- const msg = error instanceof Error ? error.message : String(error);
181
- lastError = new APIError(`Unexpected error: ${msg}`);
182
- }
183
-
184
- // Don't retry if we've exhausted attempts
185
- if (attempt === cfg.maxRetries) {
186
- console.warn(`[llm-client] LLM call failed after ${attempt + 1} attempt(s): ${lastError.message}`);
187
- return "";
188
- }
189
-
190
- // Check if error is retryable
191
- if (!lastError.retryable) {
192
- console.warn(`[llm-client] Non-retryable error: ${lastError.message}`);
193
- return "";
194
- }
195
-
196
- const delay = calculateBackoff(attempt, cfg.baseDelay, cfg.maxDelay);
197
- console.warn(`[llm-client] Retrying after ${delay}ms (attempt ${attempt + 1}/${cfg.maxRetries}): ${lastError.message}`);
198
- await sleep(delay);
199
- }
200
- }
201
-
202
- return "";
203
- }
204
-
205
- /**
206
- * Classify HTTP error into appropriate error type.
207
- */
208
- function classifyError(status: number, responseText: string, statusText: string): LLMError {
209
- const truncatedText = responseText.slice(0, 200);
210
-
211
- switch (status) {
212
- case 429:
213
- // Try to extract retry-after from response
214
- const retryAfterMatch = responseText.match(/retry[- ]?after["']?\s*[:=]\s*(\d+)/i);
215
- const retryAfter = retryAfterMatch?.[1] ? parseInt(retryAfterMatch[1], 10) : undefined;
216
- return new RateLimitError(`Rate limited: ${statusText} - ${truncatedText}`, retryAfter);
217
-
218
- case 408:
219
- case 504:
220
- return new TimeoutError(`Request timeout: ${statusText}`);
221
-
222
- case 500:
223
- case 502:
224
- case 503:
225
- return new APIError(`Server error (${status}): ${truncatedText}`, status);
226
-
227
- default:
228
- if (status >= 500) {
229
- return new APIError(`Server error (${status}): ${truncatedText}`, status);
230
- }
231
- if (status >= 400) {
232
- return new APIError(`Client error (${status}): ${truncatedText}`, status);
233
- }
234
- return new APIError(`HTTP error (${status}): ${truncatedText}`, status);
235
- }
236
- }
237
-
238
- /**
239
- * Calculate exponential backoff delay with jitter.
240
- */
241
- function calculateBackoff(
242
- attempt: number,
243
- baseDelay: number,
244
- maxDelay: number,
245
- retryAfter?: number,
246
- ): number {
247
- // If server specified retry-after, use that
248
- if (retryAfter && retryAfter > 0) {
249
- return Math.min(retryAfter * 1000, maxDelay);
250
- }
251
-
252
- // Exponential backoff: baseDelay * 2^attempt
253
- const exponentialDelay = baseDelay * Math.pow(2, attempt);
254
-
255
- // Add jitter (±25%)
256
- const jitter = exponentialDelay * 0.25 * (Math.random() * 2 - 1);
257
-
258
- return Math.min(Math.round(exponentialDelay + jitter), maxDelay);
259
- }
260
-
261
- /**
262
- * Sleep for specified milliseconds.
263
- */
264
- function sleep(ms: number): Promise<void> {
265
- return new Promise((resolve) => setTimeout(resolve, ms));
266
- }
267
-
268
- /**
269
- * Wrap fetch with timeout using Promise.race.
270
- */
271
- async function callWithTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
272
- let timeoutId: NodeJS.Timeout;
273
-
274
- const timeoutPromise = new Promise<never>((_, reject) => {
275
- timeoutId = setTimeout(() => {
276
- reject(new TimeoutError(`Request timed out after ${timeoutMs}ms`));
277
- }, timeoutMs);
278
- });
279
-
280
- try {
281
- return await Promise.race([promise, timeoutPromise]);
282
- } finally {
283
- clearTimeout(timeoutId!);
284
- }
285
- }
286
-
287
- // ---------------------------------------------------------------------------
288
- // Re-export settings type for convenience
289
- // ---------------------------------------------------------------------------
290
-
291
- export type { ResolvedLLM } from "../settings";