ex-brain 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -37
- package/package.json +6 -5
- package/src/ai/compiler.ts +494 -0
- package/src/ai/embed-factory.ts +116 -0
- package/src/ai/entity-link.ts +195 -0
- package/src/ai/hash-embed.ts +30 -0
- package/src/ai/llm-client.ts +291 -0
- package/src/ai/timeline-extractor.ts +403 -0
- package/src/cli.ts +16 -0
- package/src/commands/compile-cmd.ts +208 -0
- package/src/commands/graph-cmd.ts +1070 -0
- package/src/commands/index.ts +1973 -0
- package/src/config.ts +80 -0
- package/src/db/client.ts +207 -0
- package/src/db/errors.ts +178 -0
- package/src/db/schema.ts +50 -0
- package/src/markdown/io.ts +61 -0
- package/src/markdown/parser.ts +72 -0
- package/src/mcp/server.ts +703 -0
- package/src/repositories/brain-repo.ts +990 -0
- package/src/settings.ts +235 -0
- package/src/types/index.ts +56 -0
- package/src/utils/cli-output.ts +569 -0
- package/src/utils/progress.ts +171 -0
- package/src/utils/query-sanitizer.ts +63 -0
- package/dist/cli.js +0 -93543
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
import type { ResolvedLLM } from "../settings";
|
|
2
|
+
import type { TimelineEntry } from "../types";
|
|
3
|
+
import { callLLM, resolveApiKey } from "./llm-client";
|
|
4
|
+
import { jsonrepair } from "jsonrepair";
|
|
5
|
+
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// Types
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
export interface CompileInput {
|
|
11
|
+
/** Current compiled truth content */
|
|
12
|
+
currentTruth: string;
|
|
13
|
+
/** Timeline entries for context */
|
|
14
|
+
timeline: TimelineEntry[];
|
|
15
|
+
/** New information to process */
|
|
16
|
+
newInfo: string;
|
|
17
|
+
/** Source of the new information */
|
|
18
|
+
source: string;
|
|
19
|
+
/** Date of the new information (ISO or YYYY-MM-DD) */
|
|
20
|
+
date: string;
|
|
21
|
+
/** Page metadata for context */
|
|
22
|
+
pageContext?: {
|
|
23
|
+
slug: string;
|
|
24
|
+
type: string;
|
|
25
|
+
title: string;
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface CompileResult {
|
|
30
|
+
/** Updated compiled truth */
|
|
31
|
+
compiledTruth: string;
|
|
32
|
+
/** Whether any update was made */
|
|
33
|
+
changed: boolean;
|
|
34
|
+
/** Type of change */
|
|
35
|
+
changeType: "append" | "update" | "replace" | "none" | "conflict";
|
|
36
|
+
/** Human-readable summary of what changed */
|
|
37
|
+
changeSummary: string;
|
|
38
|
+
/** Timeline entries to add (extracted from new info) */
|
|
39
|
+
timelineEntries: TimelineEntry[];
|
|
40
|
+
/** Confidence score */
|
|
41
|
+
confidence: number;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export interface FactAnalysis {
|
|
45
|
+
/** Key facts extracted */
|
|
46
|
+
facts: ExtractedFact[];
|
|
47
|
+
/** Information type classification */
|
|
48
|
+
infoType: "status_update" | "new_event" | "correction" | "confirmation" | "new_entity";
|
|
49
|
+
/** Entities mentioned */
|
|
50
|
+
entities: string[];
|
|
51
|
+
/** Temporal context */
|
|
52
|
+
temporalContext: string;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export interface ExtractedFact {
|
|
56
|
+
/** Fact category (e.g., "funding_stage", "valuation", "ceo") */
|
|
57
|
+
category: string;
|
|
58
|
+
/** Previous value (if this is an update) */
|
|
59
|
+
oldValue?: string;
|
|
60
|
+
/** New value */
|
|
61
|
+
newValue: string;
|
|
62
|
+
/** Whether this replaces or adds */
|
|
63
|
+
action: "replace" | "add";
|
|
64
|
+
/** Source sentence */
|
|
65
|
+
sourceSentence: string;
|
|
66
|
+
/** Confidence */
|
|
67
|
+
confidence: number;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
// Compile Logic
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Intelligent compilation: analyze new info, merge/update compiled truth.
|
|
76
|
+
* Uses LLM to understand semantic changes and update appropriately.
|
|
77
|
+
*/
|
|
78
|
+
export async function compileTruth(
|
|
79
|
+
input: CompileInput,
|
|
80
|
+
llm: ResolvedLLM,
|
|
81
|
+
): Promise<CompileResult> {
|
|
82
|
+
const apiKey = resolveApiKey(llm);
|
|
83
|
+
if (!apiKey) {
|
|
84
|
+
return {
|
|
85
|
+
compiledTruth: appendFact(input.currentTruth, input.newInfo, input.source),
|
|
86
|
+
changed: true,
|
|
87
|
+
changeType: "append",
|
|
88
|
+
changeSummary: "LLM not configured, appended as simple fact",
|
|
89
|
+
timelineEntries: [],
|
|
90
|
+
confidence: 0.5,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Step 1: Analyze the new information
|
|
95
|
+
const analysis = await analyzeNewInfo(input, llm);
|
|
96
|
+
|
|
97
|
+
// Step 2: Generate updated compiled truth
|
|
98
|
+
const updateResult = await generateUpdatedTruth(input, analysis, llm);
|
|
99
|
+
|
|
100
|
+
// Step 3: Extract timeline entries from new info
|
|
101
|
+
const timelineEntries = await extractTimelineFromInfo(input, analysis, llm);
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
compiledTruth: updateResult.compiledTruth,
|
|
105
|
+
changed: updateResult.changed,
|
|
106
|
+
changeType: updateResult.changeType,
|
|
107
|
+
changeSummary: updateResult.changeSummary,
|
|
108
|
+
timelineEntries,
|
|
109
|
+
confidence: analysis.facts.reduce((sum, f) => sum + f.confidence, 0) / Math.max(analysis.facts.length, 1),
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Step 1: Analyze new information to understand what it means
|
|
115
|
+
*/
|
|
116
|
+
async function analyzeNewInfo(
|
|
117
|
+
input: CompileInput,
|
|
118
|
+
llm: ResolvedLLM,
|
|
119
|
+
): Promise<FactAnalysis> {
|
|
120
|
+
const prompt = buildAnalysisPrompt(input);
|
|
121
|
+
const resp = await callLLM(llm, prompt, 2048, COMPILER_SYSTEM_PROMPT);
|
|
122
|
+
const parsed = parseAnalysisResponse(resp);
|
|
123
|
+
|
|
124
|
+
return parsed;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Step 2: Generate updated compiled truth based on analysis
|
|
129
|
+
*/
|
|
130
|
+
async function generateUpdatedTruth(
|
|
131
|
+
input: CompileInput,
|
|
132
|
+
analysis: FactAnalysis,
|
|
133
|
+
llm: ResolvedLLM,
|
|
134
|
+
): Promise<{ compiledTruth: string; changed: boolean; changeType: CompileResult["changeType"]; changeSummary: string }> {
|
|
135
|
+
// If no facts extracted, no change needed
|
|
136
|
+
if (analysis.facts.length === 0) {
|
|
137
|
+
return {
|
|
138
|
+
compiledTruth: input.currentTruth,
|
|
139
|
+
changed: false,
|
|
140
|
+
changeType: "none",
|
|
141
|
+
changeSummary: "No actionable facts extracted",
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// For status updates and corrections, use LLM to intelligently merge
|
|
146
|
+
if (analysis.infoType === "status_update" || analysis.infoType === "correction") {
|
|
147
|
+
return await smartMergeTruth(input, analysis, llm);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// For new events/entities, append
|
|
151
|
+
if (analysis.infoType === "new_event" || analysis.infoType === "new_entity") {
|
|
152
|
+
return {
|
|
153
|
+
compiledTruth: appendStructuredFacts(input.currentTruth, analysis.facts, input.source),
|
|
154
|
+
changed: true,
|
|
155
|
+
changeType: "append",
|
|
156
|
+
changeSummary: `Added ${analysis.facts.length} new facts`,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Default: append with source attribution
|
|
161
|
+
return {
|
|
162
|
+
compiledTruth: appendFact(input.currentTruth, input.newInfo, input.source),
|
|
163
|
+
changed: true,
|
|
164
|
+
changeType: "append",
|
|
165
|
+
changeSummary: "Appended new information with source attribution",
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Smart merge: LLM understands semantic updates and rewrites compiled truth
|
|
171
|
+
*/
|
|
172
|
+
async function smartMergeTruth(
|
|
173
|
+
input: CompileInput,
|
|
174
|
+
analysis: FactAnalysis,
|
|
175
|
+
llm: ResolvedLLM,
|
|
176
|
+
): Promise<{ compiledTruth: string; changed: boolean; changeType: CompileResult["changeType"]; changeSummary: string }> {
|
|
177
|
+
const prompt = buildMergePrompt(input, analysis);
|
|
178
|
+
const resp = await callLLM(llm, prompt, 4096, COMPILER_SYSTEM_PROMPT);
|
|
179
|
+
const result = parseMergeResponse(resp);
|
|
180
|
+
|
|
181
|
+
return result;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Step 3: Extract timeline entries from new information
|
|
186
|
+
*/
|
|
187
|
+
async function extractTimelineFromInfo(
|
|
188
|
+
input: CompileInput,
|
|
189
|
+
analysis: FactAnalysis,
|
|
190
|
+
llm: ResolvedLLM,
|
|
191
|
+
): Promise<TimelineEntry[]> {
|
|
192
|
+
// Only extract timeline for significant events
|
|
193
|
+
if (analysis.infoType === "status_update" || analysis.infoType === "new_event") {
|
|
194
|
+
const prompt = buildTimelinePrompt(input, analysis);
|
|
195
|
+
const resp = await callLLM(llm, prompt, 1024, COMPILER_SYSTEM_PROMPT);
|
|
196
|
+
return parseTimelineResponse(resp, input.pageContext?.slug ?? "");
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return [];
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
// Prompt Building
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
|
|
206
|
+
function buildAnalysisPrompt(input: CompileInput): string {
|
|
207
|
+
return `Analyze the new information and classify what type of update this represents.
|
|
208
|
+
|
|
209
|
+
## Context
|
|
210
|
+
Page: ${input.pageContext?.title ?? "Unknown"} (${input.pageContext?.type ?? "unknown"})
|
|
211
|
+
Current Compiled Truth:
|
|
212
|
+
${input.currentTruth || "(empty)"}
|
|
213
|
+
|
|
214
|
+
Recent Timeline (for temporal context):
|
|
215
|
+
${input.timeline.slice(0, 10).map(t => `- ${t.date} | ${t.source}: ${t.summary}`).join("\n") || "(no timeline)"}
|
|
216
|
+
|
|
217
|
+
## New Information
|
|
218
|
+
Source: ${input.source}
|
|
219
|
+
Date: ${input.date}
|
|
220
|
+
Content: ${input.newInfo}
|
|
221
|
+
|
|
222
|
+
## Task
|
|
223
|
+
Classify this information and extract key facts. Output ONLY JSON.
|
|
224
|
+
|
|
225
|
+
Schema:
|
|
226
|
+
{
|
|
227
|
+
"facts": [
|
|
228
|
+
{
|
|
229
|
+
"category": "funding_stage|valuation|ceo|employee_count|product_status|partnership|...",
|
|
230
|
+
"oldValue": "previous value if this updates something (null if new)",
|
|
231
|
+
"newValue": "the new value",
|
|
232
|
+
"action": "replace|add",
|
|
233
|
+
"sourceSentence": "exact sentence from new info",
|
|
234
|
+
"confidence": 0.0-1.0
|
|
235
|
+
}
|
|
236
|
+
],
|
|
237
|
+
"infoType": "status_update|new_event|correction|confirmation|new_entity",
|
|
238
|
+
"entities": ["list of entities mentioned"],
|
|
239
|
+
"temporalContext": "when this happened or is valid for"
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
Rules:
|
|
243
|
+
1. "status_update" = information that changes/updates existing state (e.g., funding stage change)
|
|
244
|
+
2. "new_event" = discrete event that happened (e.g., product launch)
|
|
245
|
+
3. "correction" = explicitly correcting previous information
|
|
246
|
+
4. "confirmation" = confirming existing information without change
|
|
247
|
+
5. "new_entity" = introducing new entity/aspect not previously tracked
|
|
248
|
+
6. Extract ALL actionable facts, not just the most prominent one
|
|
249
|
+
7. Use high confidence (0.8+) for clear, explicit statements; lower for ambiguous ones
|
|
250
|
+
|
|
251
|
+
/no_think`;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
function buildMergePrompt(input: CompileInput, analysis: FactAnalysis): string {
|
|
255
|
+
const factSummaries = analysis.facts.map(f =>
|
|
256
|
+
`- ${f.category}: ${f.oldValue ? `"${f.oldValue}" → "${f.newValue}"` : `"${f.newValue}"`} (${f.action}, confidence: ${f.confidence})`
|
|
257
|
+
).join("\n");
|
|
258
|
+
|
|
259
|
+
return `Rewrite the compiled truth to incorporate the analyzed changes.
|
|
260
|
+
|
|
261
|
+
## Current Compiled Truth
|
|
262
|
+
${input.currentTruth || "(empty)"}
|
|
263
|
+
|
|
264
|
+
## Changes to Apply
|
|
265
|
+
${factSummaries}
|
|
266
|
+
|
|
267
|
+
## Source Attribution
|
|
268
|
+
Source: ${input.source}
|
|
269
|
+
Date: ${input.date}
|
|
270
|
+
|
|
271
|
+
## Change Type
|
|
272
|
+
${analysis.infoType}
|
|
273
|
+
|
|
274
|
+
## Task
|
|
275
|
+
Rewrite the compiled truth. Output ONLY JSON with this schema:
|
|
276
|
+
{
|
|
277
|
+
"compiledTruth": "the full rewritten compiled truth content (markdown format)",
|
|
278
|
+
"changed": true|false,
|
|
279
|
+
"changeType": "append|update|replace|conflict|none",
|
|
280
|
+
"changeSummary": "human-readable summary of what changed"
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
Rules:
|
|
284
|
+
1. For "replace" actions: remove the old value, add the new value
|
|
285
|
+
2. For "add" actions: append the new fact in appropriate section
|
|
286
|
+
3. Preserve the overall structure and style of existing content
|
|
287
|
+
4. Add source attribution: append " (Source: ${input.source}, ${input.date})" to updated facts
|
|
288
|
+
5. If structure doesn't exist, create appropriate sections (## Status, ## Facts, etc.)
|
|
289
|
+
6. "update" = modified existing content; "replace" = replaced entire section; "conflict" = contradictory info (keep both with notes)
|
|
290
|
+
7. Do NOT remove historical context - keep timeline references
|
|
291
|
+
8. Format as clean markdown
|
|
292
|
+
|
|
293
|
+
Example output for funding stage update:
|
|
294
|
+
{
|
|
295
|
+
"compiledTruth": "## Status\n\n- **Funding Stage**: Series A (Source: meeting_notes, 2024-05-20)\n- **Valuation**: ~$50M (estimated)\n\n## History\n\n- Previously: Seed stage (until 2024-05-20)\n\n## Facts\n\n- ...",
|
|
296
|
+
"changed": true,
|
|
297
|
+
"changeType": "update",
|
|
298
|
+
"changeSummary": "Updated funding stage from Seed to Series A"
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/no_think`;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function buildTimelinePrompt(input: CompileInput, analysis: FactAnalysis): string {
|
|
305
|
+
return `Extract timeline entries from this information.
|
|
306
|
+
|
|
307
|
+
## New Information
|
|
308
|
+
Date: ${input.date}
|
|
309
|
+
Source: ${input.source}
|
|
310
|
+
Content: ${input.newInfo}
|
|
311
|
+
|
|
312
|
+
## Analysis
|
|
313
|
+
Type: ${analysis.infoType}
|
|
314
|
+
Key Facts: ${analysis.facts.map(f => f.newValue).join(", ")}
|
|
315
|
+
|
|
316
|
+
## Task
|
|
317
|
+
Create timeline entries. Output ONLY JSON array:
|
|
318
|
+
[
|
|
319
|
+
{
|
|
320
|
+
"date": "YYYY-MM-DD",
|
|
321
|
+
"source": "${input.source}",
|
|
322
|
+
"summary": "one-line summary (max 80 chars)",
|
|
323
|
+
"detail": "optional additional detail (markdown)"
|
|
324
|
+
}
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
Rules:
|
|
328
|
+
1. Use the provided date, or extract exact date from content if mentioned
|
|
329
|
+
2. Summary should be concise and factual
|
|
330
|
+
3. Only create entries for significant events worth tracking
|
|
331
|
+
4. Max 2 entries per input
|
|
332
|
+
5. Empty array if nothing significant
|
|
333
|
+
|
|
334
|
+
/no_think`;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// ---------------------------------------------------------------------------
|
|
338
|
+
// LLM Call
|
|
339
|
+
// ---------------------------------------------------------------------------
|
|
340
|
+
|
|
341
|
+
// Use callLLM from llm-client module with custom system prompt
|
|
342
|
+
const COMPILER_SYSTEM_PROMPT = "You are a knowledge compilation assistant. You analyze information, extract facts, and maintain structured compiled truth. Always output valid JSON. Be precise and factual.";
|
|
343
|
+
|
|
344
|
+
// ---------------------------------------------------------------------------
|
|
345
|
+
// Response Parsing
|
|
346
|
+
// ---------------------------------------------------------------------------
|
|
347
|
+
|
|
348
|
+
function parseAnalysisResponse(resp: string): FactAnalysis {
|
|
349
|
+
const match = resp.match(/\{[\s\S]*\}/);
|
|
350
|
+
if (!match) {
|
|
351
|
+
return { facts: [], infoType: "new_entity", entities: [], temporalContext: "" };
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
try {
|
|
355
|
+
// Use jsonrepair to fix common LLM JSON issues
|
|
356
|
+
const repaired = jsonrepair(match[0]);
|
|
357
|
+
const parsed = JSON.parse(repaired) as Record<string, unknown>;
|
|
358
|
+
|
|
359
|
+
const facts: ExtractedFact[] = [];
|
|
360
|
+
const rawFacts = parsed.facts as unknown[] ?? [];
|
|
361
|
+
for (const f of rawFacts) {
|
|
362
|
+
if (typeof f !== "object" || f === null) continue;
|
|
363
|
+
const fact = f as Record<string, unknown>;
|
|
364
|
+
facts.push({
|
|
365
|
+
category: String(fact.category ?? "other"),
|
|
366
|
+
oldValue: fact.oldValue ? String(fact.oldValue) : undefined,
|
|
367
|
+
newValue: String(fact.newValue ?? ""),
|
|
368
|
+
action: fact.action === "replace" ? "replace" : "add",
|
|
369
|
+
sourceSentence: String(fact.sourceSentence ?? ""),
|
|
370
|
+
confidence: typeof fact.confidence === "number" ? fact.confidence : 0.8,
|
|
371
|
+
});
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
return {
|
|
375
|
+
facts,
|
|
376
|
+
infoType: normalizeInfoType(String(parsed.infoType ?? "new_entity")),
|
|
377
|
+
entities: (parsed.entities as unknown[] ?? []).map(String),
|
|
378
|
+
temporalContext: String(parsed.temporalContext ?? ""),
|
|
379
|
+
};
|
|
380
|
+
} catch {
|
|
381
|
+
return { facts: [], infoType: "new_entity", entities: [], temporalContext: "" };
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function parseMergeResponse(resp: string): { compiledTruth: string; changed: boolean; changeType: CompileResult["changeType"]; changeSummary: string } {
|
|
386
|
+
const match = resp.match(/\{[\s\S]*\}/);
|
|
387
|
+
if (!match) {
|
|
388
|
+
return {
|
|
389
|
+
compiledTruth: "",
|
|
390
|
+
changed: false,
|
|
391
|
+
changeType: "none",
|
|
392
|
+
changeSummary: "Failed to parse LLM response",
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
try {
|
|
397
|
+
// Use jsonrepair to fix common LLM JSON issues
|
|
398
|
+
const repaired = jsonrepair(match[0]);
|
|
399
|
+
const parsed = JSON.parse(repaired) as Record<string, unknown>;
|
|
400
|
+
return {
|
|
401
|
+
compiledTruth: String(parsed.compiledTruth ?? ""),
|
|
402
|
+
changed: Boolean(parsed.changed),
|
|
403
|
+
changeType: normalizeChangeType(String(parsed.changeType ?? "none")),
|
|
404
|
+
changeSummary: String(parsed.changeSummary ?? ""),
|
|
405
|
+
};
|
|
406
|
+
} catch {
|
|
407
|
+
return {
|
|
408
|
+
compiledTruth: "",
|
|
409
|
+
changed: false,
|
|
410
|
+
changeType: "none",
|
|
411
|
+
changeSummary: "Failed to parse LLM response",
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function parseTimelineResponse(resp: string, pageSlug: string): TimelineEntry[] {
|
|
417
|
+
const match = resp.match(/\[[\s\S]*\]/);
|
|
418
|
+
if (!match) return [];
|
|
419
|
+
|
|
420
|
+
try {
|
|
421
|
+
// Use jsonrepair to fix common LLM JSON issues
|
|
422
|
+
const repaired = jsonrepair(match[0]);
|
|
423
|
+
const parsed = JSON.parse(repaired) as unknown[];
|
|
424
|
+
const entries: TimelineEntry[] = [];
|
|
425
|
+
|
|
426
|
+
for (const e of parsed) {
|
|
427
|
+
if (typeof e !== "object" || e === null) continue;
|
|
428
|
+
const entry = e as Record<string, unknown>;
|
|
429
|
+
entries.push({
|
|
430
|
+
pageSlug,
|
|
431
|
+
date: String(entry.date ?? ""),
|
|
432
|
+
source: String(entry.source ?? "manual"),
|
|
433
|
+
summary: String(entry.summary ?? "").slice(0, 120),
|
|
434
|
+
detail: String(entry.detail ?? ""),
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
return entries;
|
|
439
|
+
} catch {
|
|
440
|
+
return [];
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// ---------------------------------------------------------------------------
|
|
445
|
+
// Helpers
|
|
446
|
+
// ---------------------------------------------------------------------------
|
|
447
|
+
|
|
448
|
+
function normalizeInfoType(raw: string): FactAnalysis["infoType"] {
|
|
449
|
+
const valid = ["status_update", "new_event", "correction", "confirmation", "new_entity"] as const;
|
|
450
|
+
const lower = raw.toLowerCase().trim();
|
|
451
|
+
if (valid.includes(lower as typeof valid[number])) return lower as typeof valid[number];
|
|
452
|
+
return "new_entity";
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
function normalizeChangeType(raw: string): CompileResult["changeType"] {
|
|
456
|
+
const valid = ["append", "update", "replace", "none", "conflict"] as const;
|
|
457
|
+
const lower = raw.toLowerCase().trim();
|
|
458
|
+
if (valid.includes(lower as typeof valid[number])) return lower as typeof valid[number];
|
|
459
|
+
return "none";
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// resolveApiKey is now imported from llm-client module
|
|
463
|
+
|
|
464
|
+
function appendFact(current: string, newInfo: string, source: string): string {
|
|
465
|
+
const timestamp = new Date().toISOString().slice(0, 10);
|
|
466
|
+
const newLine = `- ${newInfo.trim()} (Source: ${source}, ${timestamp})`;
|
|
467
|
+
|
|
468
|
+
if (!current.trim()) {
|
|
469
|
+
return `## Facts\n\n${newLine}`;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
if (!current.includes("## Facts")) {
|
|
473
|
+
return `${current}\n\n## Facts\n\n${newLine}`;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
return `${current}\n${newLine}`;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
function appendStructuredFacts(current: string, facts: ExtractedFact[], source: string): string {
|
|
480
|
+
const timestamp = new Date().toISOString().slice(0, 10);
|
|
481
|
+
const newLines = facts.map(f =>
|
|
482
|
+
`- **${f.category}**: ${f.newValue} (Source: ${source}, ${timestamp})`
|
|
483
|
+
).join("\n");
|
|
484
|
+
|
|
485
|
+
if (!current.trim()) {
|
|
486
|
+
return `## Facts\n\n${newLines}`;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
if (!current.includes("## Facts")) {
|
|
490
|
+
return `${current}\n\n## Facts\n\n${newLines}`;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
return `${current}\n${newLines}`;
|
|
494
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { OpenAIEmbeddingFunction } from "@seekdb/openai";
|
|
2
|
+
import type { EmbeddingFunction } from "seekdb";
|
|
3
|
+
import type { ResolvedEmbed } from "../settings";
|
|
4
|
+
import { LocalHashEmbeddingFunction } from "./hash-embed";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* 嵌入服务:与 seekdb 业务库(EBRAIN_SEEKDB_*)分离,仅由 EBRAIN_EMBED_* 控制。
|
|
8
|
+
* - `hash`(默认):本地确定性向量,无网络。
|
|
9
|
+
* - `openai_compatible`:OpenAI 兼容 HTTP 端(如 DashScope compatible-mode)。
|
|
10
|
+
*/
|
|
11
|
+
export function createBrainEmbeddingFunction(cfg?: ResolvedEmbed): EmbeddingFunction {
|
|
12
|
+
// Fallback to env vars when no resolved settings passed
|
|
13
|
+
if (!cfg) {
|
|
14
|
+
return createFromEnv();
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if (cfg.provider !== "openai_compatible") {
|
|
18
|
+
return new LocalHashEmbeddingFunction();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Workaround: seekdb's Schema.fromJSON loads stored embedding function config
|
|
22
|
+
// (e.g. { name: "openai", properties: {} }) and instantiates it WITHOUT
|
|
23
|
+
// the API key. Setting OPENAI_API_KEY ensures seekdb can instantiate it.
|
|
24
|
+
if (cfg.apiKey) {
|
|
25
|
+
process.env.OPENAI_API_KEY = cfg.apiKey;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (!cfg.apiKey) {
|
|
29
|
+
const fromEnv = process.env[cfg.apiKeyEnv]?.trim();
|
|
30
|
+
if (!fromEnv) {
|
|
31
|
+
console.warn(
|
|
32
|
+
`[ebrain] embed provider=openai_compatible but no API key; falling back to hash.`,
|
|
33
|
+
);
|
|
34
|
+
return new LocalHashEmbeddingFunction();
|
|
35
|
+
}
|
|
36
|
+
process.env.OPENAI_API_KEY = fromEnv;
|
|
37
|
+
return new OpenAIEmbeddingFunction({
|
|
38
|
+
baseURL: cfg.baseURL,
|
|
39
|
+
modelName: cfg.model,
|
|
40
|
+
dimensions: cfg.dimensions,
|
|
41
|
+
apiKeyEnvVar: cfg.apiKeyEnv,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return new OpenAIEmbeddingFunction({
|
|
46
|
+
baseURL: cfg.baseURL,
|
|
47
|
+
modelName: cfg.model,
|
|
48
|
+
dimensions: cfg.dimensions,
|
|
49
|
+
apiKey: cfg.apiKey,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Legacy fallback: read directly from env vars (backward compatible)
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
const DEFAULT_DASHSCOPE_COMPAT_URL =
|
|
58
|
+
"https://dashscope.aliyuncs.com/compatible-mode/v1";
|
|
59
|
+
const DEFAULT_EMBED_MODEL = "text-embedding-v4";
|
|
60
|
+
const DEFAULT_EMBED_DIMENSIONS = 1024;
|
|
61
|
+
const DEFAULT_KEY_ENV = "DASHSCOPE_API_KEY";
|
|
62
|
+
|
|
63
|
+
function createFromEnv(): EmbeddingFunction {
|
|
64
|
+
const provider = (process.env.EBRAIN_EMBED_PROVIDER ?? "hash")
|
|
65
|
+
.trim()
|
|
66
|
+
.toLowerCase();
|
|
67
|
+
if (provider !== "openai_compatible") {
|
|
68
|
+
return new LocalHashEmbeddingFunction();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const baseURL =
|
|
72
|
+
process.env.EBRAIN_EMBED_BASE_URL?.trim() || DEFAULT_DASHSCOPE_COMPAT_URL;
|
|
73
|
+
const modelName =
|
|
74
|
+
process.env.EBRAIN_EMBED_MODEL?.trim() || DEFAULT_EMBED_MODEL;
|
|
75
|
+
const dimensionsRaw = process.env.EBRAIN_EMBED_DIMENSIONS?.trim();
|
|
76
|
+
const dimensions = dimensionsRaw
|
|
77
|
+
? Number(dimensionsRaw)
|
|
78
|
+
: DEFAULT_EMBED_DIMENSIONS;
|
|
79
|
+
if (!Number.isFinite(dimensions) || dimensions <= 0) {
|
|
80
|
+
throw new Error(
|
|
81
|
+
`[ebrain] EBRAIN_EMBED_DIMENSIONS must be a positive number, got: ${dimensionsRaw}`,
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const directKey = process.env.EBRAIN_EMBED_API_KEY?.trim();
|
|
86
|
+
const keyEnv =
|
|
87
|
+
process.env.EBRAIN_EMBED_API_KEY_ENV?.trim() || DEFAULT_KEY_ENV;
|
|
88
|
+
const fromNamedEnv = process.env[keyEnv]?.trim();
|
|
89
|
+
const resolvedKey = directKey || fromNamedEnv;
|
|
90
|
+
|
|
91
|
+
if (!resolvedKey) {
|
|
92
|
+
console.warn(
|
|
93
|
+
`[ebrain] EBRAIN_EMBED_PROVIDER=openai_compatible but no API key (set EBRAIN_EMBED_API_KEY or ${keyEnv}); falling back to hash embedding.`,
|
|
94
|
+
);
|
|
95
|
+
return new LocalHashEmbeddingFunction();
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Set OPENAI_API_KEY for seekdb's Schema.fromJSON fallback
|
|
99
|
+
process.env.OPENAI_API_KEY = resolvedKey;
|
|
100
|
+
|
|
101
|
+
if (directKey) {
|
|
102
|
+
return new OpenAIEmbeddingFunction({
|
|
103
|
+
baseURL,
|
|
104
|
+
modelName,
|
|
105
|
+
dimensions,
|
|
106
|
+
apiKey: directKey,
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return new OpenAIEmbeddingFunction({
|
|
111
|
+
baseURL,
|
|
112
|
+
modelName,
|
|
113
|
+
dimensions,
|
|
114
|
+
apiKeyEnvVar: keyEnv,
|
|
115
|
+
});
|
|
116
|
+
}
|