ex-brain 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -0
- package/package.json +2 -1
- package/src/ai/compiler.ts +18 -53
- package/src/ai/entity-link.ts +31 -62
- package/src/ai/llm-client.ts +291 -0
- package/src/ai/timeline-extractor.ts +29 -62
- package/src/commands/index.ts +612 -86
- package/src/db/client.ts +121 -15
- package/src/db/errors.ts +178 -0
- package/src/db/schema.ts +1 -0
- package/src/mcp/server.ts +400 -237
- package/src/repositories/brain-repo.ts +576 -358
- package/src/settings.ts +23 -2
- package/src/types/index.ts +1 -0
- package/src/utils/cli-output.ts +569 -0
- package/src/utils/query-sanitizer.ts +63 -0
package/README.md
CHANGED
|
@@ -65,6 +65,13 @@ ebrain timeline extract companies/river-ai
|
|
|
65
65
|
ebrain search "some topic"
|
|
66
66
|
ebrain query "some question"
|
|
67
67
|
|
|
68
|
+
# AI-powered Q&A with LLM (RAG)
|
|
69
|
+
ebrain query --llm "What is the main idea of River AI's product?"
|
|
70
|
+
ebrain query --llm "What are Mario Zechner's main views on game development?"
|
|
71
|
+
|
|
72
|
+
# Smart ingest: compile + timeline + entity links in one command
|
|
73
|
+
ebrain smart-ingest companies/river-ai --file article.md
|
|
74
|
+
|
|
68
75
|
# Start MCP Server (for AI tool integration)
|
|
69
76
|
ebrain serve
|
|
70
77
|
```
|
|
@@ -82,12 +89,53 @@ Edit `~/.ebrain/settings.json`:
|
|
|
82
89
|
"model": "...",
|
|
83
90
|
"dimensions": 1024,
|
|
84
91
|
"apiKey": "sk-..."
|
|
92
|
+
},
|
|
93
|
+
"llm": {
|
|
94
|
+
"baseURL": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
|
95
|
+
"model": "qwen-plus",
|
|
96
|
+
"apiKey": "sk-..."
|
|
97
|
+
},
|
|
98
|
+
"extraction": {
|
|
99
|
+
"confidenceThreshold": 0.7 // Entity extraction confidence (0~1)
|
|
85
100
|
}
|
|
86
101
|
}
|
|
87
102
|
```
|
|
88
103
|
|
|
89
104
|
Run `ebrain config` to view active configuration. See [docs/ebrain-cli.md](docs/ebrain-cli.md) for details.
|
|
90
105
|
|
|
106
|
+
## AI Q&A (RAG)
|
|
107
|
+
|
|
108
|
+
Ask natural language questions and get answers based on your knowledge base:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# Basic Q&A
|
|
112
|
+
ex-brain query --llm "What is the main idea of River AI's product?"
|
|
113
|
+
|
|
114
|
+
# Control context depth
|
|
115
|
+
ebrain query --llm "What happened in Q4?" --context-limit 3
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
How it works:
|
|
119
|
+
|
|
120
|
+
1. **Semantic Search** — Finds top matching pages for your question
|
|
121
|
+
2. **Multi-Layer Context Collection** — Builds rich context from:
|
|
122
|
+
- **Page Content** — Compiled truth + timeline for each matched page
|
|
123
|
+
- **Raw Documents** — Original imported documents (via `raw set`)
|
|
124
|
+
- **Linked Pages** — Incoming and outgoing linked pages, filtered by semantic relevance to the question
|
|
125
|
+
3. **LLM Synthesis** — Generates a sourced answer with `[[slug|title]]` citations
|
|
126
|
+
|
|
127
|
+
Configure LLM in `~/.ebrain/settings.json`:
|
|
128
|
+
|
|
129
|
+
```json
|
|
130
|
+
{
|
|
131
|
+
"llm": {
|
|
132
|
+
"baseURL": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
|
133
|
+
"model": "qwen-plus",
|
|
134
|
+
"apiKey": "sk-..."
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
91
139
|
## Development
|
|
92
140
|
|
|
93
141
|
```bash
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ex-brain",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "CLI personal knowledge base powered by seekdb",
|
|
5
5
|
"module": "src/cli.ts",
|
|
6
6
|
"type": "module",
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
"@seekdb/openai": "1.2.0",
|
|
30
30
|
"commander": "^14.0.3",
|
|
31
31
|
"gray-matter": "^4.0.3",
|
|
32
|
+
"jsonrepair": "^3.13.3",
|
|
32
33
|
"pinyin-pro": "^3.28.0",
|
|
33
34
|
"seekdb": "^1.2.0",
|
|
34
35
|
"yaml": "^2.8.3",
|
package/src/ai/compiler.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import type { ResolvedLLM } from "../settings";
|
|
2
2
|
import type { TimelineEntry } from "../types";
|
|
3
|
+
import { callLLM, resolveApiKey } from "./llm-client";
|
|
4
|
+
import { jsonrepair } from "jsonrepair";
|
|
3
5
|
|
|
4
6
|
// ---------------------------------------------------------------------------
|
|
5
7
|
// Types
|
|
@@ -116,8 +118,7 @@ async function analyzeNewInfo(
|
|
|
116
118
|
llm: ResolvedLLM,
|
|
117
119
|
): Promise<FactAnalysis> {
|
|
118
120
|
const prompt = buildAnalysisPrompt(input);
|
|
119
|
-
|
|
120
|
-
const resp = await callLLM(llm, prompt, 2048);
|
|
121
|
+
const resp = await callLLM(llm, prompt, 2048, COMPILER_SYSTEM_PROMPT);
|
|
121
122
|
const parsed = parseAnalysisResponse(resp);
|
|
122
123
|
|
|
123
124
|
return parsed;
|
|
@@ -174,8 +175,7 @@ async function smartMergeTruth(
|
|
|
174
175
|
llm: ResolvedLLM,
|
|
175
176
|
): Promise<{ compiledTruth: string; changed: boolean; changeType: CompileResult["changeType"]; changeSummary: string }> {
|
|
176
177
|
const prompt = buildMergePrompt(input, analysis);
|
|
177
|
-
|
|
178
|
-
const resp = await callLLM(llm, prompt, 4096);
|
|
178
|
+
const resp = await callLLM(llm, prompt, 4096, COMPILER_SYSTEM_PROMPT);
|
|
179
179
|
const result = parseMergeResponse(resp);
|
|
180
180
|
|
|
181
181
|
return result;
|
|
@@ -192,7 +192,7 @@ async function extractTimelineFromInfo(
|
|
|
192
192
|
// Only extract timeline for significant events
|
|
193
193
|
if (analysis.infoType === "status_update" || analysis.infoType === "new_event") {
|
|
194
194
|
const prompt = buildTimelinePrompt(input, analysis);
|
|
195
|
-
const resp = await callLLM(llm, prompt, 1024);
|
|
195
|
+
const resp = await callLLM(llm, prompt, 1024, COMPILER_SYSTEM_PROMPT);
|
|
196
196
|
return parseTimelineResponse(resp, input.pageContext?.slug ?? "");
|
|
197
197
|
}
|
|
198
198
|
|
|
@@ -276,7 +276,7 @@ Rewrite the compiled truth. Output ONLY JSON with this schema:
|
|
|
276
276
|
{
|
|
277
277
|
"compiledTruth": "the full rewritten compiled truth content (markdown format)",
|
|
278
278
|
"changed": true|false,
|
|
279
|
-
"changeType": "update|replace|conflict|none",
|
|
279
|
+
"changeType": "append|update|replace|conflict|none",
|
|
280
280
|
"changeSummary": "human-readable summary of what changed"
|
|
281
281
|
}
|
|
282
282
|
|
|
@@ -338,45 +338,8 @@ Rules:
|
|
|
338
338
|
// LLM Call
|
|
339
339
|
// ---------------------------------------------------------------------------
|
|
340
340
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
if (!apiKey) return "";
|
|
344
|
-
|
|
345
|
-
const body = {
|
|
346
|
-
model: llm.model,
|
|
347
|
-
messages: [
|
|
348
|
-
{ role: "system", content: "You are a knowledge compilation assistant. You analyze information, extract facts, and maintain structured compiled truth. Always output valid JSON. Be precise and factual." },
|
|
349
|
-
{ role: "user", content: prompt },
|
|
350
|
-
],
|
|
351
|
-
temperature: 0.1,
|
|
352
|
-
max_tokens: maxTokens,
|
|
353
|
-
enable_thinking: false,
|
|
354
|
-
};
|
|
355
|
-
|
|
356
|
-
try {
|
|
357
|
-
const resp = await fetch(
|
|
358
|
-
llm.baseURL.endsWith("/") ? llm.baseURL + "chat/completions" : llm.baseURL + "/chat/completions",
|
|
359
|
-
{
|
|
360
|
-
method: "POST",
|
|
361
|
-
headers: { "Content-Type": "application/json", Authorization: `Bearer ${apiKey}` },
|
|
362
|
-
body: JSON.stringify(body),
|
|
363
|
-
},
|
|
364
|
-
);
|
|
365
|
-
|
|
366
|
-
if (!resp.ok) {
|
|
367
|
-
const text = await resp.text();
|
|
368
|
-
console.warn(`[compiler] LLM call failed (${resp.status}): ${text.slice(0, 200)}`);
|
|
369
|
-
return "";
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
const data = await resp.json();
|
|
373
|
-
return data.choices?.[0]?.message?.content?.trim() ?? "";
|
|
374
|
-
} catch (error) {
|
|
375
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
376
|
-
console.warn(`[compiler] LLM call error: ${msg}`);
|
|
377
|
-
return "";
|
|
378
|
-
}
|
|
379
|
-
}
|
|
341
|
+
// Use callLLM from llm-client module with custom system prompt
|
|
342
|
+
const COMPILER_SYSTEM_PROMPT = "You are a knowledge compilation assistant. You analyze information, extract facts, and maintain structured compiled truth. Always output valid JSON. Be precise and factual.";
|
|
380
343
|
|
|
381
344
|
// ---------------------------------------------------------------------------
|
|
382
345
|
// Response Parsing
|
|
@@ -389,7 +352,9 @@ function parseAnalysisResponse(resp: string): FactAnalysis {
|
|
|
389
352
|
}
|
|
390
353
|
|
|
391
354
|
try {
|
|
392
|
-
|
|
355
|
+
// Use jsonrepair to fix common LLM JSON issues
|
|
356
|
+
const repaired = jsonrepair(match[0]);
|
|
357
|
+
const parsed = JSON.parse(repaired) as Record<string, unknown>;
|
|
393
358
|
|
|
394
359
|
const facts: ExtractedFact[] = [];
|
|
395
360
|
const rawFacts = parsed.facts as unknown[] ?? [];
|
|
@@ -429,7 +394,9 @@ function parseMergeResponse(resp: string): { compiledTruth: string; changed: boo
|
|
|
429
394
|
}
|
|
430
395
|
|
|
431
396
|
try {
|
|
432
|
-
|
|
397
|
+
// Use jsonrepair to fix common LLM JSON issues
|
|
398
|
+
const repaired = jsonrepair(match[0]);
|
|
399
|
+
const parsed = JSON.parse(repaired) as Record<string, unknown>;
|
|
433
400
|
return {
|
|
434
401
|
compiledTruth: String(parsed.compiledTruth ?? ""),
|
|
435
402
|
changed: Boolean(parsed.changed),
|
|
@@ -451,7 +418,9 @@ function parseTimelineResponse(resp: string, pageSlug: string): TimelineEntry[]
|
|
|
451
418
|
if (!match) return [];
|
|
452
419
|
|
|
453
420
|
try {
|
|
454
|
-
|
|
421
|
+
// Use jsonrepair to fix common LLM JSON issues
|
|
422
|
+
const repaired = jsonrepair(match[0]);
|
|
423
|
+
const parsed = JSON.parse(repaired) as unknown[];
|
|
455
424
|
const entries: TimelineEntry[] = [];
|
|
456
425
|
|
|
457
426
|
for (const e of parsed) {
|
|
@@ -490,11 +459,7 @@ function normalizeChangeType(raw: string): CompileResult["changeType"] {
|
|
|
490
459
|
return "none";
|
|
491
460
|
}
|
|
492
461
|
|
|
493
|
-
|
|
494
|
-
if (llm.apiKey) return llm.apiKey;
|
|
495
|
-
if (llm.apiKeyEnv) return process.env[llm.apiKeyEnv] ?? "";
|
|
496
|
-
return "";
|
|
497
|
-
}
|
|
462
|
+
// resolveApiKey is now imported from llm-client module
|
|
498
463
|
|
|
499
464
|
function appendFact(current: string, newInfo: string, source: string): string {
|
|
500
465
|
const timestamp = new Date().toISOString().slice(0, 10);
|
package/src/ai/entity-link.ts
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
import { ResolvedLLM } from "../settings";
|
|
1
|
+
import type { ResolvedLLM } from "../settings";
|
|
2
|
+
import { callLLM, resolveApiKey, isLLMConfigured } from "./llm-client";
|
|
3
|
+
import { jsonrepair } from "jsonrepair";
|
|
2
4
|
|
|
3
5
|
// ---------------------------------------------------------------------------
|
|
4
6
|
// Types
|
|
@@ -75,10 +77,15 @@ const RELATION_TYPES = [
|
|
|
75
77
|
/**
|
|
76
78
|
* Use the configured LLM to extract entity relationships from text.
|
|
77
79
|
* Returns a list of relations with relation type, confidence, and context.
|
|
80
|
+
* Filters out relations with confidence below the threshold (default: 0.7).
|
|
78
81
|
*/
|
|
79
82
|
export async function extractRelations(
|
|
80
83
|
content: string,
|
|
81
84
|
llm: ResolvedLLM,
|
|
85
|
+
options?: {
|
|
86
|
+
/** Minimum confidence threshold (0-1). Relations below this are filtered out. Default: 0.7 */
|
|
87
|
+
confidenceThreshold?: number;
|
|
88
|
+
},
|
|
82
89
|
): Promise<ExtractionResult> {
|
|
83
90
|
const trimmed = content.trim();
|
|
84
91
|
if (!trimmed) return [];
|
|
@@ -91,65 +98,29 @@ export async function extractRelations(
|
|
|
91
98
|
context = trimmed.slice(0, 4000) + "\n\n...\n\n" + trimmed.slice(-1000);
|
|
92
99
|
}
|
|
93
100
|
|
|
94
|
-
|
|
95
|
-
if (!apiKey) return [];
|
|
96
|
-
|
|
97
|
-
const body = {
|
|
98
|
-
model: llm.model,
|
|
99
|
-
messages: [
|
|
100
|
-
{
|
|
101
|
-
role: "system",
|
|
102
|
-
content:
|
|
103
|
-
"You are a knowledge graph extraction assistant. " +
|
|
104
|
-
"Identify relationships between named entities. " +
|
|
105
|
-
"For each relationship, provide: from entity, to entity, relation type, confidence score, and exact context sentence. " +
|
|
106
|
-
`Allowed relation types: ${RELATION_TYPES}. ` +
|
|
107
|
-
"Output ONLY a JSON array. Schema: " +
|
|
108
|
-
'{ "type": "relation", "from": {"name": "...", "type": "..."}, ' +
|
|
109
|
-
'"to": {"name": "...", "type": "..."}, "relation": "...", "context": "...", "confidence": 0.9 }. ' +
|
|
110
|
-
"Output ONLY the JSON array. /no_think",
|
|
111
|
-
},
|
|
112
|
-
{
|
|
113
|
-
role: "user",
|
|
114
|
-
content: `Extract relationships from:\n\n${context}`,
|
|
115
|
-
},
|
|
116
|
-
],
|
|
117
|
-
temperature: 0.1,
|
|
118
|
-
max_tokens: 1024,
|
|
119
|
-
enable_thinking: false,
|
|
120
|
-
};
|
|
101
|
+
if (!isLLMConfigured(llm)) return [];
|
|
121
102
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
Authorization: `Bearer ${apiKey}`,
|
|
132
|
-
},
|
|
133
|
-
body: JSON.stringify(body),
|
|
134
|
-
},
|
|
135
|
-
);
|
|
136
|
-
|
|
137
|
-
if (!resp.ok) {
|
|
138
|
-
const text = await resp.text();
|
|
139
|
-
console.warn(
|
|
140
|
-
`[ebrain] Entity extraction failed (${resp.status}): ${text.slice(0, 200)}`,
|
|
141
|
-
);
|
|
142
|
-
return [];
|
|
143
|
-
}
|
|
103
|
+
const systemPrompt =
|
|
104
|
+
"You are a knowledge graph extraction assistant. " +
|
|
105
|
+
"Identify relationships between named entities. " +
|
|
106
|
+
"For each relationship, provide: from entity, to entity, relation type, confidence score, and exact context sentence. " +
|
|
107
|
+
`Allowed relation types: ${RELATION_TYPES}. ` +
|
|
108
|
+
"Output ONLY a JSON array. Schema: " +
|
|
109
|
+
'{ "type": "relation", "from": {"name": "...", "type": "..."}, ' +
|
|
110
|
+
'"to": {"name": "...", "type": "..."}, "relation": "...", "context": "...", "confidence": 0.9 }. ' +
|
|
111
|
+
"Output ONLY the JSON array. /no_think";
|
|
144
112
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
if (!raw) return [];
|
|
113
|
+
const resp = await callLLM(llm, `Extract relationships from:\n\n${context}`, 1024, systemPrompt);
|
|
114
|
+
if (!resp) return [];
|
|
148
115
|
|
|
149
|
-
|
|
150
|
-
|
|
116
|
+
// Extract JSON array from response
|
|
117
|
+
const match = resp.match(/\[[\s\S]*\]/);
|
|
118
|
+
if (!match) return [];
|
|
151
119
|
|
|
152
|
-
|
|
120
|
+
try {
|
|
121
|
+
// Use jsonrepair to fix common LLM JSON issues (unterminated strings, etc.)
|
|
122
|
+
const repaired = jsonrepair(match[0]);
|
|
123
|
+
const parsed = JSON.parse(repaired) as unknown[];
|
|
153
124
|
const relations: ExtractionResult = [];
|
|
154
125
|
|
|
155
126
|
for (const item of parsed) {
|
|
@@ -175,7 +146,9 @@ export async function extractRelations(
|
|
|
175
146
|
});
|
|
176
147
|
}
|
|
177
148
|
|
|
178
|
-
|
|
149
|
+
// Filter by confidence threshold (default 0.7)
|
|
150
|
+
const threshold = options?.confidenceThreshold ?? 0.7;
|
|
151
|
+
return relations.filter((r) => r.confidence >= threshold);
|
|
179
152
|
} catch (error) {
|
|
180
153
|
const msg = error instanceof Error ? error.message : String(error);
|
|
181
154
|
console.warn(`[ebrain] Entity extraction error: ${msg}`);
|
|
@@ -219,8 +192,4 @@ export function normalizeRelationType(raw: string): RelationType {
|
|
|
219
192
|
return "related_to";
|
|
220
193
|
}
|
|
221
194
|
|
|
222
|
-
|
|
223
|
-
if (llm.apiKey) return llm.apiKey;
|
|
224
|
-
if (llm.apiKeyEnv) return process.env[llm.apiKeyEnv] ?? "";
|
|
225
|
-
return "";
|
|
226
|
-
}
|
|
195
|
+
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified LLM Client Module
|
|
3
|
+
*
|
|
4
|
+
* Provides centralized LLM calling functionality with:
|
|
5
|
+
* - Retry mechanism (exponential backoff, max 3 retries)
|
|
6
|
+
* - Error classification (APIError, TimeoutError, RateLimitError)
|
|
7
|
+
* - Timeout control
|
|
8
|
+
* - Unified API key resolution
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { ResolvedLLM } from "../settings";
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Error Classes
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
export class LLMError extends Error {
|
|
18
|
+
constructor(
|
|
19
|
+
message: string,
|
|
20
|
+
public readonly code: string,
|
|
21
|
+
public readonly statusCode?: number,
|
|
22
|
+
public readonly retryable: boolean = false,
|
|
23
|
+
) {
|
|
24
|
+
super(message);
|
|
25
|
+
this.name = "LLMError";
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export class APIError extends LLMError {
|
|
30
|
+
constructor(message: string, statusCode?: number) {
|
|
31
|
+
super(message, "API_ERROR", statusCode, false);
|
|
32
|
+
this.name = "APIError";
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export class TimeoutError extends LLMError {
|
|
37
|
+
constructor(message: string = "LLM request timed out") {
|
|
38
|
+
super(message, "TIMEOUT_ERROR", undefined, true);
|
|
39
|
+
this.name = "TimeoutError";
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export class RateLimitError extends LLMError {
|
|
44
|
+
constructor(message: string = "Rate limit exceeded", retryAfter?: number) {
|
|
45
|
+
super(message, "RATE_LIMIT_ERROR", 429, true);
|
|
46
|
+
this.name = "RateLimitError";
|
|
47
|
+
this.retryAfter = retryAfter;
|
|
48
|
+
}
|
|
49
|
+
readonly retryAfter?: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Configuration
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
export interface LLMClientConfig {
|
|
57
|
+
/** Maximum number of retry attempts (default: 3) */
|
|
58
|
+
maxRetries?: number;
|
|
59
|
+
/** Base delay for exponential backoff in ms (default: 1000) */
|
|
60
|
+
baseDelay?: number;
|
|
61
|
+
/** Maximum delay cap in ms (default: 10000) */
|
|
62
|
+
maxDelay?: number;
|
|
63
|
+
/** Request timeout in ms (default: 60000) */
|
|
64
|
+
timeout?: number;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const DEFAULT_CONFIG: Required<LLMClientConfig> = {
|
|
68
|
+
maxRetries: 3,
|
|
69
|
+
baseDelay: 1000,
|
|
70
|
+
maxDelay: 10000,
|
|
71
|
+
timeout: 60000,
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
// API Key Resolution
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Resolve API key from LLM configuration.
|
|
80
|
+
* Checks direct apiKey first, then falls back to environment variable.
|
|
81
|
+
*/
|
|
82
|
+
export function resolveApiKey(llm: ResolvedLLM): string {
|
|
83
|
+
if (llm.apiKey) return llm.apiKey;
|
|
84
|
+
if (llm.apiKeyEnv) return process.env[llm.apiKeyEnv] ?? "";
|
|
85
|
+
return "";
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Check if LLM is properly configured with an API key.
|
|
90
|
+
*/
|
|
91
|
+
export function isLLMConfigured(llm: ResolvedLLM): boolean {
|
|
92
|
+
return !!resolveApiKey(llm);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
// LLM Call with Retry
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Call LLM with unified fetch, retry mechanism, error handling, and timeout.
|
|
101
|
+
*
|
|
102
|
+
* @param llm - Resolved LLM configuration
|
|
103
|
+
* @param prompt - Prompt to send to the LLM
|
|
104
|
+
* @param maxTokens - Maximum tokens in response
|
|
105
|
+
* @param systemPrompt - Optional system prompt (default provided)
|
|
106
|
+
* @param config - Optional client configuration
|
|
107
|
+
* @returns Raw response text from LLM, or empty string on failure
|
|
108
|
+
*/
|
|
109
|
+
export async function callLLM(
|
|
110
|
+
llm: ResolvedLLM,
|
|
111
|
+
prompt: string,
|
|
112
|
+
maxTokens: number,
|
|
113
|
+
systemPrompt: string = "You are a helpful assistant. Always output valid JSON.",
|
|
114
|
+
config: LLMClientConfig = {},
|
|
115
|
+
): Promise<string> {
|
|
116
|
+
const apiKey = resolveApiKey(llm);
|
|
117
|
+
if (!apiKey) {
|
|
118
|
+
return "";
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
122
|
+
const url = llm.baseURL.endsWith("/")
|
|
123
|
+
? llm.baseURL + "chat/completions"
|
|
124
|
+
: llm.baseURL + "/chat/completions";
|
|
125
|
+
|
|
126
|
+
const body = {
|
|
127
|
+
model: llm.model,
|
|
128
|
+
messages: [
|
|
129
|
+
{ role: "system", content: systemPrompt },
|
|
130
|
+
{ role: "user", content: prompt },
|
|
131
|
+
],
|
|
132
|
+
temperature: 0.1,
|
|
133
|
+
max_tokens: maxTokens,
|
|
134
|
+
enable_thinking: false,
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
let lastError: LLMError | null = null;
|
|
138
|
+
|
|
139
|
+
for (let attempt = 0; attempt <= cfg.maxRetries; attempt++) {
|
|
140
|
+
try {
|
|
141
|
+
const response = await callWithTimeout(
|
|
142
|
+
fetch(url, {
|
|
143
|
+
method: "POST",
|
|
144
|
+
headers: {
|
|
145
|
+
"Content-Type": "application/json",
|
|
146
|
+
Authorization: `Bearer ${apiKey}`,
|
|
147
|
+
},
|
|
148
|
+
body: JSON.stringify(body),
|
|
149
|
+
}),
|
|
150
|
+
cfg.timeout,
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
if (!response.ok) {
|
|
154
|
+
const text = await response.text().catch(() => "");
|
|
155
|
+
lastError = classifyError(response.status, text, response.statusText);
|
|
156
|
+
|
|
157
|
+
// Don't retry for non-retryable errors
|
|
158
|
+
if (!lastError.retryable || attempt === cfg.maxRetries) {
|
|
159
|
+
console.warn(`[llm-client] LLM call failed after ${attempt + 1} attempt(s): ${lastError.message}`);
|
|
160
|
+
return "";
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const delay = calculateBackoff(attempt, cfg.baseDelay, cfg.maxDelay, (lastError as RateLimitError).retryAfter);
|
|
164
|
+
console.warn(`[llm-client] Retrying after ${delay}ms (attempt ${attempt + 1}/${cfg.maxRetries})`);
|
|
165
|
+
await sleep(delay);
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const data = await response.json() as { choices?: Array<{ message?: { content?: string } }> };
|
|
170
|
+
return data.choices?.[0]?.message?.content?.trim() ?? "";
|
|
171
|
+
|
|
172
|
+
} catch (error) {
|
|
173
|
+
// Classify the error
|
|
174
|
+
if (error instanceof TimeoutError) {
|
|
175
|
+
lastError = error;
|
|
176
|
+
} else if (error instanceof LLMError) {
|
|
177
|
+
lastError = error;
|
|
178
|
+
} else {
|
|
179
|
+
// Unknown error - wrap it
|
|
180
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
181
|
+
lastError = new APIError(`Unexpected error: ${msg}`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Don't retry if we've exhausted attempts
|
|
185
|
+
if (attempt === cfg.maxRetries) {
|
|
186
|
+
console.warn(`[llm-client] LLM call failed after ${attempt + 1} attempt(s): ${lastError.message}`);
|
|
187
|
+
return "";
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Check if error is retryable
|
|
191
|
+
if (!lastError.retryable) {
|
|
192
|
+
console.warn(`[llm-client] Non-retryable error: ${lastError.message}`);
|
|
193
|
+
return "";
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const delay = calculateBackoff(attempt, cfg.baseDelay, cfg.maxDelay);
|
|
197
|
+
console.warn(`[llm-client] Retrying after ${delay}ms (attempt ${attempt + 1}/${cfg.maxRetries}): ${lastError.message}`);
|
|
198
|
+
await sleep(delay);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return "";
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Classify HTTP error into appropriate error type.
|
|
207
|
+
*/
|
|
208
|
+
function classifyError(status: number, responseText: string, statusText: string): LLMError {
|
|
209
|
+
const truncatedText = responseText.slice(0, 200);
|
|
210
|
+
|
|
211
|
+
switch (status) {
|
|
212
|
+
case 429:
|
|
213
|
+
// Try to extract retry-after from response
|
|
214
|
+
const retryAfterMatch = responseText.match(/retry[- ]?after["']?\s*[:=]\s*(\d+)/i);
|
|
215
|
+
const retryAfter = retryAfterMatch?.[1] ? parseInt(retryAfterMatch[1], 10) : undefined;
|
|
216
|
+
return new RateLimitError(`Rate limited: ${statusText} - ${truncatedText}`, retryAfter);
|
|
217
|
+
|
|
218
|
+
case 408:
|
|
219
|
+
case 504:
|
|
220
|
+
return new TimeoutError(`Request timeout: ${statusText}`);
|
|
221
|
+
|
|
222
|
+
case 500:
|
|
223
|
+
case 502:
|
|
224
|
+
case 503:
|
|
225
|
+
return new APIError(`Server error (${status}): ${truncatedText}`, status);
|
|
226
|
+
|
|
227
|
+
default:
|
|
228
|
+
if (status >= 500) {
|
|
229
|
+
return new APIError(`Server error (${status}): ${truncatedText}`, status);
|
|
230
|
+
}
|
|
231
|
+
if (status >= 400) {
|
|
232
|
+
return new APIError(`Client error (${status}): ${truncatedText}`, status);
|
|
233
|
+
}
|
|
234
|
+
return new APIError(`HTTP error (${status}): ${truncatedText}`, status);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Calculate exponential backoff delay with jitter.
|
|
240
|
+
*/
|
|
241
|
+
function calculateBackoff(
|
|
242
|
+
attempt: number,
|
|
243
|
+
baseDelay: number,
|
|
244
|
+
maxDelay: number,
|
|
245
|
+
retryAfter?: number,
|
|
246
|
+
): number {
|
|
247
|
+
// If server specified retry-after, use that
|
|
248
|
+
if (retryAfter && retryAfter > 0) {
|
|
249
|
+
return Math.min(retryAfter * 1000, maxDelay);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Exponential backoff: baseDelay * 2^attempt
|
|
253
|
+
const exponentialDelay = baseDelay * Math.pow(2, attempt);
|
|
254
|
+
|
|
255
|
+
// Add jitter (±25%)
|
|
256
|
+
const jitter = exponentialDelay * 0.25 * (Math.random() * 2 - 1);
|
|
257
|
+
|
|
258
|
+
return Math.min(Math.round(exponentialDelay + jitter), maxDelay);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Sleep for specified milliseconds.
|
|
263
|
+
*/
|
|
264
|
+
function sleep(ms: number): Promise<void> {
|
|
265
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Wrap fetch with timeout using Promise.race.
|
|
270
|
+
*/
|
|
271
|
+
async function callWithTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
|
|
272
|
+
let timeoutId: NodeJS.Timeout;
|
|
273
|
+
|
|
274
|
+
const timeoutPromise = new Promise<never>((_, reject) => {
|
|
275
|
+
timeoutId = setTimeout(() => {
|
|
276
|
+
reject(new TimeoutError(`Request timed out after ${timeoutMs}ms`));
|
|
277
|
+
}, timeoutMs);
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
try {
|
|
281
|
+
return await Promise.race([promise, timeoutPromise]);
|
|
282
|
+
} finally {
|
|
283
|
+
clearTimeout(timeoutId!);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// ---------------------------------------------------------------------------
|
|
288
|
+
// Re-export settings type for convenience
|
|
289
|
+
// ---------------------------------------------------------------------------
|
|
290
|
+
|
|
291
|
+
export type { ResolvedLLM } from "../settings";
|