ex-brain 0.2.7 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/ai/ax-pipeline.ts +114 -0
- package/src/ai/compiler.ts +118 -113
- package/src/ai/entity-link.ts +96 -78
- package/src/ai/timeline-extractor.ts +110 -99
- package/src/commands/compile-cmd.ts +1 -1
- package/src/commands/entity-links.ts +105 -0
- package/src/commands/import-cmd.ts +219 -0
- package/src/commands/import-put.ts +180 -0
- package/src/commands/index.ts +30 -2314
- package/src/commands/misc-cmds.ts +190 -0
- package/src/commands/misc-commands.ts +252 -0
- package/src/commands/put-cmd.ts +525 -0
- package/src/commands/query-cmd.ts +486 -0
- package/src/commands/shared.ts +109 -0
- package/src/commands/timeline-cmd.ts +159 -0
- package/src/config/index.ts +53 -0
- package/src/config/init.ts +50 -0
- package/src/config/paths.ts +21 -0
- package/src/config/schema.ts +121 -0
- package/src/config/settings.ts +168 -0
- package/src/db/client.ts +1 -1
- package/src/markdown/document-loader.ts +30 -2
- package/src/repositories/brain-repo.ts +43 -1
- package/src/settings.ts +27 -282
- /package/src/{config.ts → slug-utils.ts} +0 -0
package/src/commands/index.ts
CHANGED
|
@@ -1,179 +1,21 @@
|
|
|
1
|
-
import { basename, extname, resolve } from "node:path";
|
|
2
1
|
import { readFileSync } from "node:fs";
|
|
3
|
-
import { createHash } from "node:crypto";
|
|
4
2
|
import { Command } from "commander";
|
|
5
|
-
import { DEFAULT_DB_NAME
|
|
6
|
-
import { BrainDb } from "../db/client";
|
|
7
|
-
import {
|
|
8
|
-
collectMarkdownFiles,
|
|
9
|
-
ensureDir,
|
|
10
|
-
fileExists,
|
|
11
|
-
pathToSlug,
|
|
12
|
-
readMaybeStdin,
|
|
13
|
-
readTextFile,
|
|
14
|
-
slugToPath,
|
|
15
|
-
writeTextFile,
|
|
16
|
-
} from "../markdown/io";
|
|
17
|
-
import { loadDocument, isRemoteUrl, type DocumentKind } from "../markdown/document-loader";
|
|
18
|
-
import {
|
|
19
|
-
extractTimelineLines,
|
|
20
|
-
extractWikiStyleLinks,
|
|
21
|
-
parsePageMarkdown,
|
|
22
|
-
renderPageMarkdown,
|
|
23
|
-
} from "../markdown/parser";
|
|
24
|
-
import { BrainRepository } from "../repositories/brain-repo";
|
|
25
|
-
import { loadSettings, SETTINGS_PATH, DEFAULT_DB_PATH, type ResolvedLLM } from "../settings";
|
|
26
|
-
import { extractRelations, entityToSlug, type EntityType } from "../ai/entity-link";
|
|
3
|
+
import { DEFAULT_DB_NAME } from "../slug-utils";
|
|
27
4
|
import { registerCompileCommands } from "./compile-cmd";
|
|
28
5
|
import { registerGraphCommand } from "./graph-cmd";
|
|
29
|
-
import {
|
|
6
|
+
import { registerPutCommand } from "./put-cmd";
|
|
7
|
+
import { registerQueryCommand } from "./query-cmd";
|
|
8
|
+
import { registerImportCommand } from "./import-cmd";
|
|
9
|
+
import { registerTimelineCommand } from "./timeline-cmd";
|
|
10
|
+
import { registerTagCommand, registerRawCommand, registerLinkCommand } from "./misc-cmds";
|
|
30
11
|
import {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
header,
|
|
39
|
-
separator,
|
|
40
|
-
createSpinner,
|
|
41
|
-
formatCount,
|
|
42
|
-
type ProgressSpinner,
|
|
43
|
-
} from "../utils/cli-output";
|
|
44
|
-
|
|
45
|
-
// ---------------------------------------------------------------------------
|
|
46
|
-
// Helpers
|
|
47
|
-
// ---------------------------------------------------------------------------
|
|
48
|
-
|
|
49
|
-
function addDryRun(cmd: Command): Command {
|
|
50
|
-
return cmd.option("--dry-run", "preview changes without executing", false);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
function isDryRun(opts: Record<string, unknown>): boolean {
|
|
54
|
-
return Boolean(opts.dryRun);
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* Compute a short SHA-256 hex hash of a string (first 16 chars).
|
|
59
|
-
* Used for detecting duplicate document ingestion.
|
|
60
|
-
*/
|
|
61
|
-
function contentHash(text: string): string {
|
|
62
|
-
return createHash("sha256").update(text, "utf8").digest("hex").slice(0, 16);
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// Simple progress output to stderr (won't interfere with --json stdout).
|
|
66
|
-
// e.g. "[3/42] import docs/api"
|
|
67
|
-
function progress(label: string, current: number, total: number, json: boolean): void {
|
|
68
|
-
if (json) return;
|
|
69
|
-
process.stderr.write(`[${current}/${total}] ${label}\n`);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* Extract entities and create entity pages + links.
|
|
74
|
-
* Non-blocking: failures produce warnings, not errors.
|
|
75
|
-
*/
|
|
76
|
-
async function applyEntityLinks(
|
|
77
|
-
repo: BrainRepository,
|
|
78
|
-
sourceSlug: string,
|
|
79
|
-
content: string,
|
|
80
|
-
json: boolean,
|
|
81
|
-
): Promise<{ created: number; linked: number }> {
|
|
82
|
-
if (!content.trim()) return { created: 0, linked: 0 };
|
|
83
|
-
|
|
84
|
-
const settings = await loadSettings();
|
|
85
|
-
if (!settings.llm.baseURL) {
|
|
86
|
-
if (!json) {
|
|
87
|
-
warning(`LLM not configured, skipping entity extraction for ${sourceSlug}`);
|
|
88
|
-
}
|
|
89
|
-
return { created: 0, linked: 0 };
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
const spinner = createSpinner();
|
|
93
|
-
if (!json) {
|
|
94
|
-
spinner.start(`Extracting entities from ${sourceSlug}...`);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
const startTime = Date.now();
|
|
98
|
-
let relations;
|
|
99
|
-
try {
|
|
100
|
-
relations = await extractRelations(content, settings.llm);
|
|
101
|
-
} catch (err) {
|
|
102
|
-
if (!json) {
|
|
103
|
-
spinner.fail(`Entity extraction failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
104
|
-
}
|
|
105
|
-
return { created: 0, linked: 0 };
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Filter by confidence
|
|
109
|
-
const confidenceThreshold = settings.extraction.confidenceThreshold;
|
|
110
|
-
const highConfidence = relations.filter((r) => r.confidence >= confidenceThreshold);
|
|
111
|
-
const ignoredCount = relations.length - highConfidence.length;
|
|
112
|
-
|
|
113
|
-
if (highConfidence.length === 0) {
|
|
114
|
-
if (!json) {
|
|
115
|
-
if (relations.length > 0) {
|
|
116
|
-
spinner.warn(`Found ${relations.length} entities but all below confidence threshold (${confidenceThreshold})`);
|
|
117
|
-
} else {
|
|
118
|
-
spinner.warn(`No entities found in content`);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
return { created: 0, linked: 0 };
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
let created = 0;
|
|
125
|
-
let linked = 0;
|
|
126
|
-
const details: string[] = [];
|
|
127
|
-
|
|
128
|
-
for (const r of highConfidence) {
|
|
129
|
-
// 1. Resolve entity slugs (disambiguation)
|
|
130
|
-
const fromCandidate = entityToSlug(r.from.name, r.from.type);
|
|
131
|
-
const toCandidate = entityToSlug(r.to.name, r.to.type);
|
|
132
|
-
|
|
133
|
-
const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
|
|
134
|
-
const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
|
|
135
|
-
|
|
136
|
-
// 2. Ensure entity pages exist
|
|
137
|
-
const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, sourceSlug);
|
|
138
|
-
const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, sourceSlug);
|
|
139
|
-
if (c1) { created += 1; details.push(`Created: ${r.from.name} (${r.from.type})`); }
|
|
140
|
-
if (c2) { created += 1; details.push(`Created: ${r.to.name} (${r.to.type})`); }
|
|
141
|
-
|
|
142
|
-
// 3. Link between entities (context includes relation type)
|
|
143
|
-
await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
|
|
144
|
-
linked += 1;
|
|
145
|
-
|
|
146
|
-
// 4. Link from source document to entities (for backlinks tracing)
|
|
147
|
-
await repo.link(sourceSlug, fromSlug, `Mentions ${r.from.name}`);
|
|
148
|
-
linked += 1;
|
|
149
|
-
await repo.link(sourceSlug, toSlug, `Mentions ${r.to.name}`);
|
|
150
|
-
linked += 1;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
if (!json) {
|
|
154
|
-
const duration = formatDuration(Date.now() - startTime);
|
|
155
|
-
const entityNames = [...new Set(highConfidence.flatMap((r) => [r.from.name, r.to.name]))];
|
|
156
|
-
spinner.succeed(`Extracted ${entityNames.length} entities: ${entityNames.join(", ")}`);
|
|
157
|
-
|
|
158
|
-
// Print detailed info
|
|
159
|
-
subItem(`${created} entity pages created`);
|
|
160
|
-
subItem(`${linked} links added`);
|
|
161
|
-
if (ignoredCount > 0) {
|
|
162
|
-
subItem(`${ignoredCount} low-confidence relations ignored`);
|
|
163
|
-
}
|
|
164
|
-
subItem(`Completed in ${duration}`);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
return { created, linked };
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
async function resolveInput(
|
|
171
|
-
fileOpt: string | undefined,
|
|
172
|
-
stdin: boolean,
|
|
173
|
-
): Promise<string> {
|
|
174
|
-
if (fileOpt) return readTextFile(resolve(fileOpt));
|
|
175
|
-
return readMaybeStdin().then((s) => s ?? "");
|
|
176
|
-
}
|
|
12
|
+
registerExportCommand,
|
|
13
|
+
registerEmbedCommand,
|
|
14
|
+
registerInitCommand,
|
|
15
|
+
registerStatsCommand,
|
|
16
|
+
registerConfigCommand,
|
|
17
|
+
registerServeCommand,
|
|
18
|
+
} from "./misc-commands";
|
|
177
19
|
|
|
178
20
|
// ---------------------------------------------------------------------------
|
|
179
21
|
// Build
|
|
@@ -200,1540 +42,20 @@ Examples:
|
|
|
200
42
|
.option("--db <path>", "database path (overrides settings.json)")
|
|
201
43
|
.option("--json", "output as JSON", false);
|
|
202
44
|
|
|
203
|
-
//
|
|
204
|
-
|
|
205
|
-
program
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
embed: {
|
|
218
|
-
provider: settings.embed.provider,
|
|
219
|
-
baseURL: settings.embed.baseURL,
|
|
220
|
-
model: settings.embed.model,
|
|
221
|
-
dimensions: settings.embed.dimensions,
|
|
222
|
-
hasApiKey:
|
|
223
|
-
!!settings.embed.apiKey ||
|
|
224
|
-
!!process.env[settings.embed.apiKeyEnv],
|
|
225
|
-
},
|
|
226
|
-
llm: {
|
|
227
|
-
baseURL: settings.llm.baseURL || "(not configured)",
|
|
228
|
-
model: settings.llm.model,
|
|
229
|
-
hasApiKey:
|
|
230
|
-
!!settings.llm.apiKey ||
|
|
231
|
-
!!process.env[settings.llm.apiKeyEnv],
|
|
232
|
-
},
|
|
233
|
-
});
|
|
234
|
-
});
|
|
235
|
-
|
|
236
|
-
// -- page CRUD ------------------------------------------------------------
|
|
237
|
-
|
|
238
|
-
// -- put ------------------------------------------------------------------
|
|
239
|
-
// Auto-detects file type: markdown goes through parsePageMarkdown,
|
|
240
|
-
// other formats (pdf, docx, html, txt, json) go through loadDocument.
|
|
241
|
-
|
|
242
|
-
/** Non-markdown extensions that should use the document ingestion path. */
|
|
243
|
-
const DOC_EXTENSIONS = new Set([
|
|
244
|
-
"pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
|
|
245
|
-
]);
|
|
246
|
-
|
|
247
|
-
/** Whether a file path should be treated as a document (not markdown). */
|
|
248
|
-
function isDocumentFile(filePath: string, forceKind?: string): boolean {
|
|
249
|
-
if (forceKind && forceKind !== "markdown") return true;
|
|
250
|
-
const ext = extname(filePath).toLowerCase().replace(/^\./, "");
|
|
251
|
-
return DOC_EXTENSIONS.has(ext);
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
addDryRun(
|
|
255
|
-
program
|
|
256
|
-
.command("put")
|
|
257
|
-
.argument("[slug]", "page slug (optional; auto-generated if omitted)")
|
|
258
|
-
.option("--file <path>", "read content from file (markdown, pdf, docx, html, txt, json)")
|
|
259
|
-
.option("--stdin", "read markdown from stdin", false)
|
|
260
|
-
.option("--type <type>", "page type override")
|
|
261
|
-
.option("--title <title>", "page title override")
|
|
262
|
-
.option("--format <kind>", "force document kind (pdf|docx|html|json|markdown|text) — only needed for --file with non-md files when auto-detect fails")
|
|
263
|
-
.option("--max-bytes <number>", "max bytes for URL/file ingest", "52428800")
|
|
264
|
-
.option("--timeout <ms>", "fetch timeout for URLs in ms", "30000")
|
|
265
|
-
.description(
|
|
266
|
-
"create or update a page (idempotent; upserts by slug). Auto-detects file type: markdown is parsed normally, PDF/DOCX/HTML/TXT/JSON are extracted and ingested.",
|
|
267
|
-
)
|
|
268
|
-
.addHelpText(
|
|
269
|
-
"after",
|
|
270
|
-
`
|
|
271
|
-
Examples:
|
|
272
|
-
ebrain put --file api.md # markdown → parsePageMarkdown
|
|
273
|
-
ebrain put docs/api --file api.md # explicit slug
|
|
274
|
-
ebrain put --file report.pdf # pdf → auto-extract text
|
|
275
|
-
ebrain put docs/report --file report.pdf # explicit slug for pdf
|
|
276
|
-
ebrain put --file article.docx # docx → auto-extract text
|
|
277
|
-
ebrain put --file https://example.com/a.pdf # URL → download + extract
|
|
278
|
-
cat note.md | ebrain put --stdin # auto-generate slug from title/timestamp
|
|
279
|
-
ebrain put --title "My Note" --stdin # auto-generate slug from title
|
|
280
|
-
ebrain put people/john --type person --title "John Doe"
|
|
281
|
-
ebrain put docs/api --file api.md --dry-run
|
|
282
|
-
`,
|
|
283
|
-
),
|
|
284
|
-
).action(
|
|
285
|
-
async (
|
|
286
|
-
slug: string | undefined,
|
|
287
|
-
opts: {
|
|
288
|
-
file?: string;
|
|
289
|
-
stdin?: boolean;
|
|
290
|
-
type?: string;
|
|
291
|
-
title?: string;
|
|
292
|
-
format?: string;
|
|
293
|
-
maxBytes?: string;
|
|
294
|
-
timeout?: string;
|
|
295
|
-
dryRun?: boolean;
|
|
296
|
-
},
|
|
297
|
-
) => {
|
|
298
|
-
// ── Branch 1: document file (pdf/docx/html/txt/json or URL) ──
|
|
299
|
-
const forceKind = opts.format as DocumentKind | undefined;
|
|
300
|
-
if (opts.file && isDocumentFile(opts.file, opts.format)) {
|
|
301
|
-
const loaded = await loadDocument(opts.file, {
|
|
302
|
-
forceKind,
|
|
303
|
-
fetchTimeoutMs: opts.timeout ? Number(opts.timeout) : undefined,
|
|
304
|
-
maxBytes: opts.maxBytes ? Number(opts.maxBytes) : undefined,
|
|
305
|
-
});
|
|
306
|
-
const content = loaded.text;
|
|
307
|
-
const fileName = loaded.fileName;
|
|
308
|
-
const kind = loaded.kind;
|
|
309
|
-
const sourceRef = loaded.source;
|
|
310
|
-
const sourceType = loaded.sourceType;
|
|
311
|
-
const mimeType = loaded.mimeType;
|
|
312
|
-
const bytes = loaded.bytes;
|
|
313
|
-
const metadata = loaded.metadata;
|
|
314
|
-
|
|
315
|
-
let finalSlug = slug;
|
|
316
|
-
if (!finalSlug) {
|
|
317
|
-
const nameNoExt = fileName.replace(/\.[^.]+$/, "");
|
|
318
|
-
const slugBase = normalizeLongSlug(slugify(nameNoExt));
|
|
319
|
-
finalSlug = `ingest/${slugBase}`;
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
const type = opts.type ?? kind;
|
|
323
|
-
const title =
|
|
324
|
-
opts.title ??
|
|
325
|
-
String(slugToTitle(finalSlug));
|
|
326
|
-
const hash = contentHash(content);
|
|
327
|
-
const frontmatter: Record<string, unknown> = {
|
|
328
|
-
sourceFile: sourceRef,
|
|
329
|
-
sourceType,
|
|
330
|
-
sourceKind: kind,
|
|
331
|
-
sourceMimeType: mimeType,
|
|
332
|
-
sourceBytes: bytes,
|
|
333
|
-
sourceFileName: fileName,
|
|
334
|
-
_contentHash: hash,
|
|
335
|
-
...metadata,
|
|
336
|
-
};
|
|
337
|
-
|
|
338
|
-
if (isDryRun(opts)) {
|
|
339
|
-
print(program, {
|
|
340
|
-
dryRun: true,
|
|
341
|
-
action: "put",
|
|
342
|
-
slug: finalSlug,
|
|
343
|
-
type,
|
|
344
|
-
title,
|
|
345
|
-
kind,
|
|
346
|
-
sourceType,
|
|
347
|
-
sourceRef,
|
|
348
|
-
mimeType,
|
|
349
|
-
bytes,
|
|
350
|
-
contentLength: content.length,
|
|
351
|
-
contentHash: hash,
|
|
352
|
-
metadata,
|
|
353
|
-
});
|
|
354
|
-
return;
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
await withRepo(program, async (repo) => {
|
|
358
|
-
const jsonOut = isJson(program);
|
|
359
|
-
const spinner = createSpinner();
|
|
360
|
-
const startTime = Date.now();
|
|
361
|
-
|
|
362
|
-
// Check if content has already been ingested (idempotency)
|
|
363
|
-
const existingPage = await repo.getPage(finalSlug);
|
|
364
|
-
const existingHash = existingPage?.frontmatter._contentHash as string | undefined;
|
|
365
|
-
|
|
366
|
-
if (existingHash === hash) {
|
|
367
|
-
if (!jsonOut) {
|
|
368
|
-
header(`Put: ${fileName}`);
|
|
369
|
-
success(`Content unchanged — skipped (hash: ${hash})`);
|
|
370
|
-
}
|
|
371
|
-
print(program, {
|
|
372
|
-
ok: true,
|
|
373
|
-
action: "put",
|
|
374
|
-
slug: finalSlug,
|
|
375
|
-
unchanged: true,
|
|
376
|
-
contentHash: hash,
|
|
377
|
-
});
|
|
378
|
-
return;
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
if (!jsonOut) {
|
|
382
|
-
header(`Put: ${fileName}`);
|
|
383
|
-
keyValue("Kind", kind);
|
|
384
|
-
keyValue("Source", sourceRef);
|
|
385
|
-
if (mimeType) keyValue("Content-Type", mimeType);
|
|
386
|
-
keyValue("Bytes", String(bytes));
|
|
387
|
-
if (existingPage) {
|
|
388
|
-
keyValue("Previous hash", existingHash ?? "none");
|
|
389
|
-
keyValue("New hash", hash);
|
|
390
|
-
}
|
|
391
|
-
spinner.start(`Creating page from ${kind}...`);
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
await repo.putPage({
|
|
395
|
-
slug: finalSlug,
|
|
396
|
-
type,
|
|
397
|
-
title,
|
|
398
|
-
compiledTruth: content,
|
|
399
|
-
timeline: "",
|
|
400
|
-
frontmatter,
|
|
401
|
-
});
|
|
402
|
-
|
|
403
|
-
if (!jsonOut) {
|
|
404
|
-
spinner.succeed(`Page created: ${finalSlug}`);
|
|
405
|
-
keyValue("Type", type);
|
|
406
|
-
keyValue("Content length", `${content.length} chars`);
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
// ── Side-effect operations (only on new/changed content) ──
|
|
410
|
-
await repo.timelineAdd({
|
|
411
|
-
pageSlug: finalSlug,
|
|
412
|
-
date: new Date().toISOString().slice(0, 10),
|
|
413
|
-
source: type,
|
|
414
|
-
summary: `Ingested ${kind} ${fileName}`,
|
|
415
|
-
detail: sourceType === "url" ? `Source URL: ${sourceRef}` : "",
|
|
416
|
-
});
|
|
417
|
-
|
|
418
|
-
try {
|
|
419
|
-
await repo.writeRaw(finalSlug, sourceType, {
|
|
420
|
-
fileName,
|
|
421
|
-
sourceRef,
|
|
422
|
-
kind,
|
|
423
|
-
mimeType,
|
|
424
|
-
bytes,
|
|
425
|
-
metadata,
|
|
426
|
-
ingestedAt: new Date().toISOString(),
|
|
427
|
-
});
|
|
428
|
-
} catch (err) {
|
|
429
|
-
if (!jsonOut) {
|
|
430
|
-
warning(
|
|
431
|
-
`failed to record raw_data: ${err instanceof Error ? err.message : String(err)}`,
|
|
432
|
-
);
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
await applyEntityLinks(repo, finalSlug, content, jsonOut);
|
|
437
|
-
|
|
438
|
-
if (!jsonOut) {
|
|
439
|
-
const duration = formatDuration(Date.now() - startTime);
|
|
440
|
-
success(`Operation completed in ${duration}`);
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
print(program, {
|
|
444
|
-
ok: true,
|
|
445
|
-
action: "put",
|
|
446
|
-
slug: finalSlug,
|
|
447
|
-
kind,
|
|
448
|
-
sourceType,
|
|
449
|
-
sourceRef,
|
|
450
|
-
bytes,
|
|
451
|
-
contentLength: content.length,
|
|
452
|
-
contentHash: hash,
|
|
453
|
-
});
|
|
454
|
-
});
|
|
455
|
-
return;
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
// ── Branch 2: markdown (stdin or .md file) ──
|
|
459
|
-
const input = await resolveInput(opts.file, opts.stdin ?? false);
|
|
460
|
-
if (!input.trim()) {
|
|
461
|
-
throw new Error(
|
|
462
|
-
"empty input — provide --file <path>, --stdin, or pipe markdown",
|
|
463
|
-
);
|
|
464
|
-
}
|
|
465
|
-
const parsed = parsePageMarkdown(input);
|
|
466
|
-
|
|
467
|
-
// Auto-generate slug if not provided
|
|
468
|
-
let finalSlug = slug;
|
|
469
|
-
if (!finalSlug) {
|
|
470
|
-
// Priority: file name > title option > frontmatter title > timestamp
|
|
471
|
-
if (opts.file) {
|
|
472
|
-
const fileName = basename(opts.file).replace(/\.md$/i, "");
|
|
473
|
-
finalSlug = normalizeLongSlug(slugify(fileName));
|
|
474
|
-
} else if (opts.title) {
|
|
475
|
-
finalSlug = normalizeLongSlug(slugify(opts.title));
|
|
476
|
-
} else if (parsed.frontmatter.title) {
|
|
477
|
-
finalSlug = normalizeLongSlug(slugify(String(parsed.frontmatter.title)));
|
|
478
|
-
} else {
|
|
479
|
-
// Use timestamp as fallback
|
|
480
|
-
const timestamp = new Date().toISOString().slice(0, 19).replace(/[-:T]/g, "");
|
|
481
|
-
finalSlug = `notes/${timestamp}`;
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
const type =
|
|
486
|
-
opts.type ??
|
|
487
|
-
String(parsed.frontmatter.type ?? inferTypeFromSlug(finalSlug));
|
|
488
|
-
const title =
|
|
489
|
-
opts.title ??
|
|
490
|
-
String(parsed.frontmatter.title ?? slugToTitle(finalSlug));
|
|
491
|
-
|
|
492
|
-
// Compute content hash and embed in frontmatter for idempotency
|
|
493
|
-
const hash = contentHash(parsed.compiledTruth);
|
|
494
|
-
parsed.frontmatter._contentHash = hash;
|
|
495
|
-
|
|
496
|
-
if (isDryRun(opts)) {
|
|
497
|
-
print(program, {
|
|
498
|
-
dryRun: true,
|
|
499
|
-
action: "put",
|
|
500
|
-
slug: finalSlug,
|
|
501
|
-
type,
|
|
502
|
-
title,
|
|
503
|
-
contentLength: parsed.compiledTruth.length,
|
|
504
|
-
contentHash: hash,
|
|
505
|
-
hasTimeline: !!parsed.timeline,
|
|
506
|
-
frontmatterKeys: Object.keys(parsed.frontmatter),
|
|
507
|
-
});
|
|
508
|
-
return;
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
await withRepo(program, async (repo) => {
|
|
512
|
-
const jsonOut = isJson(program);
|
|
513
|
-
const spinner = createSpinner();
|
|
514
|
-
const startTime = Date.now();
|
|
515
|
-
|
|
516
|
-
// Check if content is unchanged (idempotency)
|
|
517
|
-
const existingPage = await repo.getPage(finalSlug);
|
|
518
|
-
const existingHash = existingPage?.frontmatter._contentHash as string | undefined;
|
|
519
|
-
|
|
520
|
-
if (existingHash === hash) {
|
|
521
|
-
if (!jsonOut) {
|
|
522
|
-
header(`Put: ${finalSlug}`);
|
|
523
|
-
success(`Content unchanged — skipped (hash: ${hash})`);
|
|
524
|
-
}
|
|
525
|
-
print(program, {
|
|
526
|
-
ok: true,
|
|
527
|
-
action: "put",
|
|
528
|
-
slug: finalSlug,
|
|
529
|
-
unchanged: true,
|
|
530
|
-
contentHash: hash,
|
|
531
|
-
});
|
|
532
|
-
return;
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
if (!jsonOut) {
|
|
536
|
-
header(`Put: ${finalSlug}`);
|
|
537
|
-
if (existingPage) {
|
|
538
|
-
keyValue("Previous hash", existingHash ?? "none");
|
|
539
|
-
keyValue("New hash", hash);
|
|
540
|
-
}
|
|
541
|
-
spinner.start(`Creating/updating page...`);
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
const page = await repo.putPage({
|
|
545
|
-
slug: finalSlug,
|
|
546
|
-
type,
|
|
547
|
-
title,
|
|
548
|
-
compiledTruth: parsed.compiledTruth,
|
|
549
|
-
timeline: parsed.timeline,
|
|
550
|
-
frontmatter: parsed.frontmatter,
|
|
551
|
-
});
|
|
552
|
-
|
|
553
|
-
if (!jsonOut) {
|
|
554
|
-
spinner.succeed(`Page saved: ${page.slug}`);
|
|
555
|
-
keyValue("Title", title);
|
|
556
|
-
keyValue("Type", type);
|
|
557
|
-
keyValue("Content length", `${parsed.compiledTruth.length} chars`);
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
await applyEntityLinks(
|
|
561
|
-
repo,
|
|
562
|
-
finalSlug,
|
|
563
|
-
parsed.compiledTruth,
|
|
564
|
-
jsonOut,
|
|
565
|
-
);
|
|
566
|
-
|
|
567
|
-
if (!jsonOut) {
|
|
568
|
-
const duration = formatDuration(Date.now() - startTime);
|
|
569
|
-
success(`Operation completed in ${duration}`);
|
|
570
|
-
}
|
|
571
|
-
|
|
572
|
-
print(program, {
|
|
573
|
-
ok: true,
|
|
574
|
-
slug: page.slug,
|
|
575
|
-
updatedAt: page.updatedAt,
|
|
576
|
-
contentHash: hash,
|
|
577
|
-
});
|
|
578
|
-
});
|
|
579
|
-
},
|
|
580
|
-
);
|
|
581
|
-
|
|
582
|
-
program
|
|
583
|
-
.command("get")
|
|
584
|
-
.argument("<slug>", "page slug")
|
|
585
|
-
.option("--json", "output as JSON (overrides global --json)")
|
|
586
|
-
.description("read a page and render it as markdown")
|
|
587
|
-
.addHelpText(
|
|
588
|
-
"after",
|
|
589
|
-
`
|
|
590
|
-
Examples:
|
|
591
|
-
ebrain get docs/api
|
|
592
|
-
ebrain get docs/api --json
|
|
593
|
-
`,
|
|
594
|
-
)
|
|
595
|
-
.action(async (slug: string, opts: { json?: boolean }) => {
|
|
596
|
-
const localJson = opts.json !== undefined ? opts.json : isJson(program);
|
|
597
|
-
await withRepo(program, async (repo) => {
|
|
598
|
-
const page = await repo.getPage(slug);
|
|
599
|
-
if (!page) {
|
|
600
|
-
throw new Error(`page not found: ${slug}`);
|
|
601
|
-
}
|
|
602
|
-
if (localJson) {
|
|
603
|
-
console.log(JSON.stringify(page, null, 2));
|
|
604
|
-
return;
|
|
605
|
-
}
|
|
606
|
-
console.log(
|
|
607
|
-
renderPageMarkdown(
|
|
608
|
-
page.frontmatter,
|
|
609
|
-
page.compiledTruth,
|
|
610
|
-
page.timeline,
|
|
611
|
-
),
|
|
612
|
-
);
|
|
613
|
-
});
|
|
614
|
-
});
|
|
615
|
-
|
|
616
|
-
addDryRun(
|
|
617
|
-
program
|
|
618
|
-
.command("delete")
|
|
619
|
-
.argument("<slug>", "page slug to delete")
|
|
620
|
-
.description("delete a page and its related data (links, tags, timeline, raw)")
|
|
621
|
-
.addHelpText(
|
|
622
|
-
"after",
|
|
623
|
-
`
|
|
624
|
-
Examples:
|
|
625
|
-
ebrain delete notes/old-draft
|
|
626
|
-
ebrain delete notes/old-draft --dry-run
|
|
627
|
-
`,
|
|
628
|
-
),
|
|
629
|
-
).action(async (slug: string, opts: { dryRun?: boolean }) => {
|
|
630
|
-
if (isDryRun(opts)) {
|
|
631
|
-
await withRepo(program, async (repo) => {
|
|
632
|
-
const page = await repo.getPage(slug);
|
|
633
|
-
if (!page) {
|
|
634
|
-
throw new Error(`page not found: ${slug}`);
|
|
635
|
-
}
|
|
636
|
-
print(program, {
|
|
637
|
-
dryRun: true,
|
|
638
|
-
action: "delete",
|
|
639
|
-
slug,
|
|
640
|
-
title: page.title,
|
|
641
|
-
});
|
|
642
|
-
});
|
|
643
|
-
return;
|
|
644
|
-
}
|
|
645
|
-
await withRepo(program, async (repo) => {
|
|
646
|
-
const jsonOut = isJson(program);
|
|
647
|
-
const spinner = createSpinner();
|
|
648
|
-
|
|
649
|
-
if (!jsonOut) {
|
|
650
|
-
header(`Delete: ${slug}`);
|
|
651
|
-
spinner.start(`Deleting page and related data...`);
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
await repo.deletePage(slug);
|
|
655
|
-
|
|
656
|
-
if (!jsonOut) {
|
|
657
|
-
spinner.succeed(`Page deleted: ${slug}`);
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
print(program, { ok: true, action: "delete", slug });
|
|
661
|
-
});
|
|
662
|
-
});
|
|
663
|
-
|
|
664
|
-
program
|
|
665
|
-
.command("list")
|
|
666
|
-
.option("--type <type>", "filter by page type")
|
|
667
|
-
.option("--tag <tag>", "filter by tag")
|
|
668
|
-
.option("-f, --fields <fields>", "comma-separated fields to display (slug,type,title,createdAt,updatedAt)")
|
|
669
|
-
.option("--limit <number>", "max results", "50")
|
|
670
|
-
.description("list pages")
|
|
671
|
-
.addHelpText(
|
|
672
|
-
"after",
|
|
673
|
-
`
|
|
674
|
-
Examples:
|
|
675
|
-
ebrain list
|
|
676
|
-
ebrain list --type person
|
|
677
|
-
ebrain list -f slug
|
|
678
|
-
ebrain list -f slug,title,type
|
|
679
|
-
`,
|
|
680
|
-
)
|
|
681
|
-
.action(async (opts: Record<string, string | undefined>) => {
|
|
682
|
-
await withRepo(program, async (repo) => {
|
|
683
|
-
const rows = await repo.listPages({
|
|
684
|
-
type: opts.type,
|
|
685
|
-
tag: opts.tag,
|
|
686
|
-
limit: Number(opts.limit),
|
|
687
|
-
});
|
|
688
|
-
|
|
689
|
-
// When --fields is set, show one page per line with tab-separated values
|
|
690
|
-
if (opts.fields) {
|
|
691
|
-
const fields = opts.fields.split(",").map((f) => f.trim());
|
|
692
|
-
for (const row of rows) {
|
|
693
|
-
const vals = fields.map((field) => {
|
|
694
|
-
const val = (row as Record<string, unknown>)[field];
|
|
695
|
-
if (val === undefined || val === null) return "";
|
|
696
|
-
if (typeof val === "object") return JSON.stringify(val);
|
|
697
|
-
return String(val);
|
|
698
|
-
});
|
|
699
|
-
console.log(vals.join("\t"));
|
|
700
|
-
}
|
|
701
|
-
return;
|
|
702
|
-
}
|
|
703
|
-
|
|
704
|
-
print(program, rows);
|
|
705
|
-
});
|
|
706
|
-
});
|
|
707
|
-
|
|
708
|
-
// -- search / query -------------------------------------------------------
|
|
709
|
-
|
|
710
|
-
program
|
|
711
|
-
.command("search")
|
|
712
|
-
.argument("<query>", "full-text search query")
|
|
713
|
-
.option("--type <type>", "filter by page type")
|
|
714
|
-
.option("--limit <number>", "max results", "10")
|
|
715
|
-
.description("full-text / hybrid search")
|
|
716
|
-
.addHelpText(
|
|
717
|
-
"after",
|
|
718
|
-
`
|
|
719
|
-
Examples:
|
|
720
|
-
ebrain search "machine learning"
|
|
721
|
-
ebrain search "quarterly revenue" --type deal --limit 5
|
|
722
|
-
`,
|
|
723
|
-
)
|
|
724
|
-
.action(async (query: string, opts: Record<string, string>) => {
|
|
725
|
-
await withRepo(program, async (repo) => {
|
|
726
|
-
const hits = await repo.search(
|
|
727
|
-
query,
|
|
728
|
-
Number(opts.limit ?? 10),
|
|
729
|
-
opts.type,
|
|
730
|
-
);
|
|
731
|
-
print(program, hits);
|
|
732
|
-
});
|
|
733
|
-
});
|
|
734
|
-
|
|
735
|
-
program
|
|
736
|
-
.command("query")
|
|
737
|
-
.argument("<question>", "natural language question")
|
|
738
|
-
.option("--limit <number>", "max results", "10")
|
|
739
|
-
.option("--llm", "use LLM to answer based on retrieved context", false)
|
|
740
|
-
.option("--context-limit <number>", "max pages to use as context", "5")
|
|
741
|
-
.description("semantic / vector search")
|
|
742
|
-
.addHelpText(
|
|
743
|
-
"after",
|
|
744
|
-
`
|
|
745
|
-
Examples:
|
|
746
|
-
ebrain query "What projects did we ship in Q4?"
|
|
747
|
-
ebrain query "Who leads the ML team?" --limit 5
|
|
748
|
-
ebrain query "What are the key findings?" --llm
|
|
749
|
-
`,
|
|
750
|
-
)
|
|
751
|
-
.action(async (question: string, opts: Record<string, string>) => {
|
|
752
|
-
await withRepo(program, async (repo) => {
|
|
753
|
-
const limit = Number(opts.limit ?? 10);
|
|
754
|
-
const hits = await repo.query(question, limit);
|
|
755
|
-
|
|
756
|
-
// If --llm flag, generate answer based on multi-layer context
|
|
757
|
-
if (opts.llm) {
|
|
758
|
-
const settings = await loadSettings();
|
|
759
|
-
if (!settings.llm.baseURL) {
|
|
760
|
-
print(program, { error: "LLM not configured. Set llm.baseURL in settings." });
|
|
761
|
-
return;
|
|
762
|
-
}
|
|
763
|
-
|
|
764
|
-
const progress = createProgress();
|
|
765
|
-
progress.start("Searching knowledge base...");
|
|
766
|
-
|
|
767
|
-
const contextLimit = Number(opts.contextLimit ?? 5);
|
|
768
|
-
const topHits = hits.slice(0, contextLimit);
|
|
769
|
-
|
|
770
|
-
if (topHits.length === 0) {
|
|
771
|
-
progress.stop();
|
|
772
|
-
process.stderr.write("No relevant pages found.\n");
|
|
773
|
-
print(program, { answer: "No relevant information found in the knowledge base.", sources: [] });
|
|
774
|
-
return;
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
// Collect multi-layer context (primary + raw data + linked pages scored by relevance)
|
|
778
|
-
// ~100KB char budget ≈ 25K tokens, safe for most models
|
|
779
|
-
const MAX_CONTEXT_CHARS = 100_000;
|
|
780
|
-
const ctxStart = Date.now();
|
|
781
|
-
progress.update(`Loading page content...`);
|
|
782
|
-
const { sections, totalChars, stats } = await collectContextForLLM(repo, topHits, question, MAX_CONTEXT_CHARS, (stage) => {
|
|
783
|
-
progress.update(`Loading ${stage}...`);
|
|
784
|
-
});
|
|
785
|
-
const ctxDuration = formatDuration(Date.now() - ctxStart);
|
|
786
|
-
|
|
787
|
-
if (sections.length === 0) {
|
|
788
|
-
progress.stop();
|
|
789
|
-
process.stderr.write("No content could be loaded.\n");
|
|
790
|
-
print(program, { answer: "Failed to load page content.", sources: [] });
|
|
791
|
-
return;
|
|
792
|
-
}
|
|
793
|
-
|
|
794
|
-
progress.succeed(`Loaded ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s) (${ctxDuration})`);
|
|
795
|
-
const startTime = Date.now();
|
|
796
|
-
|
|
797
|
-
const { answer, ok } = await generateAnswerWithStream(question, sections, stats, settings.llm);
|
|
798
|
-
|
|
799
|
-
if (!ok) {
|
|
800
|
-
// If streaming failed, answer contains the error message
|
|
801
|
-
console.log(answer);
|
|
802
|
-
return;
|
|
803
|
-
}
|
|
804
|
-
|
|
805
|
-
const duration = formatDuration(Date.now() - startTime);
|
|
806
|
-
|
|
807
|
-
// Show sources breakdown
|
|
808
|
-
console.log("\n---\n**Sources:**\n");
|
|
809
|
-
for (let i = 0; i < sections.length; i++) {
|
|
810
|
-
const s = sections[i];
|
|
811
|
-
const icon = s.type === 'primary' ? '📄' : s.type === 'raw_data' ? '📎' : '🔗';
|
|
812
|
-
console.log(`${icon} ${i + 1}. [[${s.slug}|${s.title}]] - ${s.label} (${(s.content.length / 1024).toFixed(1)}KB)`);
|
|
813
|
-
}
|
|
814
|
-
console.log(`\n*Context: ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)*`);
|
|
815
|
-
} else {
|
|
816
|
-
print(program, hits);
|
|
817
|
-
}
|
|
818
|
-
});
|
|
819
|
-
});
|
|
820
|
-
|
|
821
|
-
// -- link -----------------------------------------------------------------
|
|
822
|
-
|
|
823
|
-
addDryRun(
|
|
824
|
-
program
|
|
825
|
-
.command("link")
|
|
826
|
-
.argument("<from>", "source page slug")
|
|
827
|
-
.argument("<to>", "target page slug")
|
|
828
|
-
.option("--context <text>", "link context", "")
|
|
829
|
-
.description("create a cross-link between pages (idempotent)")
|
|
830
|
-
.addHelpText(
|
|
831
|
-
"after",
|
|
832
|
-
`
|
|
833
|
-
Examples:
|
|
834
|
-
ebrain link docs/api docs/getting-started
|
|
835
|
-
ebrain link people/john projects/alpha --context "lead"
|
|
836
|
-
ebrain link docs/api docs/getting-started --dry-run
|
|
837
|
-
`,
|
|
838
|
-
),
|
|
839
|
-
).action(
|
|
840
|
-
async (
|
|
841
|
-
from: string,
|
|
842
|
-
to: string,
|
|
843
|
-
opts: { context?: string; dryRun?: boolean },
|
|
844
|
-
) => {
|
|
845
|
-
if (isDryRun(opts)) {
|
|
846
|
-
print(program, {
|
|
847
|
-
dryRun: true,
|
|
848
|
-
action: "link",
|
|
849
|
-
from,
|
|
850
|
-
to,
|
|
851
|
-
context: opts.context ?? "",
|
|
852
|
-
});
|
|
853
|
-
return;
|
|
854
|
-
}
|
|
855
|
-
await withRepo(program, async (repo) => {
|
|
856
|
-
await repo.link(from, to, opts.context ?? "");
|
|
857
|
-
print(program, { ok: true, from, to });
|
|
858
|
-
});
|
|
859
|
-
},
|
|
860
|
-
);
|
|
861
|
-
|
|
862
|
-
program
|
|
863
|
-
.command("backlinks")
|
|
864
|
-
.argument("<slug>", "target page slug")
|
|
865
|
-
.description("list pages that link to this page")
|
|
866
|
-
.addHelpText(
|
|
867
|
-
"after",
|
|
868
|
-
`
|
|
869
|
-
Examples:
|
|
870
|
-
ebrain backlinks docs/api
|
|
871
|
-
`,
|
|
872
|
-
)
|
|
873
|
-
.action(async (slug: string) => {
|
|
874
|
-
await withRepo(program, async (repo) => {
|
|
875
|
-
const links = await repo.backlinks(slug);
|
|
876
|
-
print(program, links);
|
|
877
|
-
});
|
|
878
|
-
});
|
|
879
|
-
|
|
880
|
-
// -- timeline (subcommands) -----------------------------------------------
|
|
881
|
-
|
|
882
|
-
const timelineCmd = program
|
|
883
|
-
.command("timeline")
|
|
884
|
-
.description("manage timeline entries");
|
|
885
|
-
|
|
886
|
-
timelineCmd
|
|
887
|
-
.command("list")
|
|
888
|
-
.argument("<slug>", "page slug")
|
|
889
|
-
.option("--limit <number>", "max results", "50")
|
|
890
|
-
.description("list timeline entries for a page")
|
|
891
|
-
.addHelpText(
|
|
892
|
-
"after",
|
|
893
|
-
`
|
|
894
|
-
Examples:
|
|
895
|
-
ebrain timeline list projects/alpha
|
|
896
|
-
ebrain timeline list projects/alpha --limit 10
|
|
897
|
-
`,
|
|
898
|
-
)
|
|
899
|
-
.action(async (slug: string, opts: Record<string, string>) => {
|
|
900
|
-
await withRepo(program, async (repo) => {
|
|
901
|
-
const rows = await repo.timeline(slug, Number(opts.limit ?? 50));
|
|
902
|
-
print(program, rows);
|
|
903
|
-
});
|
|
904
|
-
});
|
|
905
|
-
|
|
906
|
-
addDryRun(
|
|
907
|
-
timelineCmd
|
|
908
|
-
.command("add")
|
|
909
|
-
.argument("<slug>", "page slug")
|
|
910
|
-
.requiredOption("--date <date>", "date (YYYY-MM-DD or ISO)")
|
|
911
|
-
.requiredOption("--summary <summary>", "one-line summary")
|
|
912
|
-
.option("--source <source>", "event source", "manual")
|
|
913
|
-
.option("--detail <detail>", "detail markdown", "")
|
|
914
|
-
.description("add a timeline entry")
|
|
915
|
-
.addHelpText(
|
|
916
|
-
"after",
|
|
917
|
-
`
|
|
918
|
-
Examples:
|
|
919
|
-
ebrain timeline add projects/alpha --date 2025-03-15 --summary "v1.0 shipped"
|
|
920
|
-
ebrain timeline add projects/alpha --date 2025-03-15 --summary "launch" --source release
|
|
921
|
-
ebrain timeline add projects/alpha --date 2025-03-15 --summary "launch" --dry-run
|
|
922
|
-
`,
|
|
923
|
-
),
|
|
924
|
-
).action(
|
|
925
|
-
async (
|
|
926
|
-
slug: string,
|
|
927
|
-
opts: {
|
|
928
|
-
date: string;
|
|
929
|
-
summary: string;
|
|
930
|
-
source?: string;
|
|
931
|
-
detail?: string;
|
|
932
|
-
dryRun?: boolean;
|
|
933
|
-
},
|
|
934
|
-
) => {
|
|
935
|
-
if (isDryRun(opts)) {
|
|
936
|
-
print(program, {
|
|
937
|
-
dryRun: true,
|
|
938
|
-
action: "timeline-add",
|
|
939
|
-
slug,
|
|
940
|
-
date: opts.date,
|
|
941
|
-
summary: opts.summary,
|
|
942
|
-
source: opts.source ?? "manual",
|
|
943
|
-
});
|
|
944
|
-
return;
|
|
945
|
-
}
|
|
946
|
-
await withRepo(program, async (repo) => {
|
|
947
|
-
await repo.timelineAdd({
|
|
948
|
-
pageSlug: slug,
|
|
949
|
-
date: opts.date,
|
|
950
|
-
source: opts.source ?? "manual",
|
|
951
|
-
summary: opts.summary,
|
|
952
|
-
detail: opts.detail ?? "",
|
|
953
|
-
});
|
|
954
|
-
print(program, {
|
|
955
|
-
ok: true,
|
|
956
|
-
action: "timeline-add",
|
|
957
|
-
slug,
|
|
958
|
-
date: opts.date,
|
|
959
|
-
});
|
|
960
|
-
});
|
|
961
|
-
},
|
|
962
|
-
);
|
|
963
|
-
|
|
964
|
-
addDryRun(
|
|
965
|
-
timelineCmd
|
|
966
|
-
.command("extract")
|
|
967
|
-
.argument("<slug>", "page slug")
|
|
968
|
-
.option("--source <source>", "source identifier", "extracted")
|
|
969
|
-
.option("--default-date <date>", "default date (YYYY-MM-DD)")
|
|
970
|
-
.description("extract timeline events from page content using AI")
|
|
971
|
-
.addHelpText(
|
|
972
|
-
"after",
|
|
973
|
-
`
|
|
974
|
-
Examples:
|
|
975
|
-
ebrain timeline extract companies/river-ai
|
|
976
|
-
ebrain timeline extract docs/meeting --source meeting_notes --default-date 2024-03-15
|
|
977
|
-
`,
|
|
978
|
-
),
|
|
979
|
-
).action(async (slug: string, opts: { source?: string; defaultDate?: string; dryRun?: boolean }) => {
|
|
980
|
-
if (isDryRun(opts)) {
|
|
981
|
-
print(program, {
|
|
982
|
-
dryRun: true,
|
|
983
|
-
action: "timeline-extract",
|
|
984
|
-
slug,
|
|
985
|
-
source: opts.source ?? "extracted",
|
|
986
|
-
defaultDate: opts.defaultDate ?? new Date().toISOString().slice(0, 10),
|
|
987
|
-
});
|
|
988
|
-
return;
|
|
989
|
-
}
|
|
990
|
-
await withRepo(program, async (repo) => {
|
|
991
|
-
const page = await repo.getPage(slug);
|
|
992
|
-
if (!page) {
|
|
993
|
-
throw new Error(`page not found: ${slug}`);
|
|
994
|
-
}
|
|
995
|
-
const settings = await loadSettings();
|
|
996
|
-
|
|
997
|
-
const progress = createProgress();
|
|
998
|
-
progress.start(`Extracting timeline from ${slug}...`);
|
|
999
|
-
const startTime = Date.now();
|
|
1000
|
-
|
|
1001
|
-
const result = await repo.extractAndAddTimeline(
|
|
1002
|
-
slug,
|
|
1003
|
-
page.compiledTruth,
|
|
1004
|
-
opts.source ?? "extracted",
|
|
1005
|
-
opts.defaultDate ?? new Date().toISOString().slice(0, 10),
|
|
1006
|
-
settings.llm,
|
|
1007
|
-
);
|
|
1008
|
-
|
|
1009
|
-
const duration = formatDuration(Date.now() - startTime);
|
|
1010
|
-
|
|
1011
|
-
if (result.entries.length > 0) {
|
|
1012
|
-
progress.succeed(`${result.entries.length} events extracted (${duration})`);
|
|
1013
|
-
} else {
|
|
1014
|
-
progress.stop();
|
|
1015
|
-
process.stderr.write(`No events found (${duration})\n`);
|
|
1016
|
-
}
|
|
1017
|
-
|
|
1018
|
-
print(program, {
|
|
1019
|
-
ok: true,
|
|
1020
|
-
action: "timeline-extract",
|
|
1021
|
-
slug,
|
|
1022
|
-
entriesAdded: result.entries.length,
|
|
1023
|
-
entries: result.entries,
|
|
1024
|
-
confidence: result.confidence,
|
|
1025
|
-
});
|
|
1026
|
-
});
|
|
1027
|
-
});
|
|
1028
|
-
|
|
1029
|
-
timelineCmd
|
|
1030
|
-
.command("global")
|
|
1031
|
-
.option("--limit <number>", "max results", "100")
|
|
1032
|
-
.description("list timeline entries across all pages")
|
|
1033
|
-
.addHelpText(
|
|
1034
|
-
"after",
|
|
1035
|
-
`
|
|
1036
|
-
Examples:
|
|
1037
|
-
ebrain timeline global
|
|
1038
|
-
ebrain timeline global --limit 20
|
|
1039
|
-
`,
|
|
1040
|
-
)
|
|
1041
|
-
.action(async (opts: Record<string, string>) => {
|
|
1042
|
-
await withRepo(program, async (repo) => {
|
|
1043
|
-
const entries = await repo.timelineGlobal(Number(opts.limit ?? 100));
|
|
1044
|
-
print(program, entries);
|
|
1045
|
-
});
|
|
1046
|
-
});
|
|
1047
|
-
|
|
1048
|
-
// -- tag (subcommands) ----------------------------------------------------
|
|
1049
|
-
|
|
1050
|
-
const tagCmd = program
|
|
1051
|
-
.command("tag")
|
|
1052
|
-
.description("manage tags on a page");
|
|
1053
|
-
|
|
1054
|
-
tagCmd
|
|
1055
|
-
.command("list")
|
|
1056
|
-
.argument("<slug>", "page slug")
|
|
1057
|
-
.description("list tags on a page")
|
|
1058
|
-
.addHelpText(
|
|
1059
|
-
"after",
|
|
1060
|
-
`
|
|
1061
|
-
Examples:
|
|
1062
|
-
ebrain tag list docs/api
|
|
1063
|
-
`,
|
|
1064
|
-
)
|
|
1065
|
-
.action(async (slug: string) => {
|
|
1066
|
-
await withRepo(program, async (repo) => {
|
|
1067
|
-
const tags = await repo.tags(slug);
|
|
1068
|
-
print(program, tags);
|
|
1069
|
-
});
|
|
1070
|
-
});
|
|
1071
|
-
|
|
1072
|
-
addDryRun(
|
|
1073
|
-
tagCmd
|
|
1074
|
-
.command("add")
|
|
1075
|
-
.argument("<slug>", "page slug")
|
|
1076
|
-
.argument("<tag>", "tag to add")
|
|
1077
|
-
.description("add a tag to a page (idempotent)")
|
|
1078
|
-
.addHelpText(
|
|
1079
|
-
"after",
|
|
1080
|
-
`
|
|
1081
|
-
Examples:
|
|
1082
|
-
ebrain tag add docs/api rest
|
|
1083
|
-
ebrain tag add docs/api rest --dry-run
|
|
1084
|
-
`,
|
|
1085
|
-
),
|
|
1086
|
-
).action(async (slug: string, tag: string, opts: { dryRun?: boolean }) => {
|
|
1087
|
-
if (isDryRun(opts)) {
|
|
1088
|
-
print(program, { dryRun: true, action: "tag-add", slug, tag });
|
|
1089
|
-
return;
|
|
1090
|
-
}
|
|
1091
|
-
await withRepo(program, async (repo) => {
|
|
1092
|
-
await repo.tag(slug, tag);
|
|
1093
|
-
print(program, { ok: true, action: "tag-add", slug, tag });
|
|
1094
|
-
});
|
|
1095
|
-
});
|
|
1096
|
-
|
|
1097
|
-
addDryRun(
|
|
1098
|
-
tagCmd
|
|
1099
|
-
.command("remove")
|
|
1100
|
-
.argument("<slug>", "page slug")
|
|
1101
|
-
.argument("<tag>", "tag to remove")
|
|
1102
|
-
.description("remove a tag from a page")
|
|
1103
|
-
.addHelpText(
|
|
1104
|
-
"after",
|
|
1105
|
-
`
|
|
1106
|
-
Examples:
|
|
1107
|
-
ebrain tag remove docs/api outdated
|
|
1108
|
-
ebrain tag remove docs/api outdated --dry-run
|
|
1109
|
-
`,
|
|
1110
|
-
),
|
|
1111
|
-
).action(async (slug: string, tag: string, opts: { dryRun?: boolean }) => {
|
|
1112
|
-
if (isDryRun(opts)) {
|
|
1113
|
-
print(program, { dryRun: true, action: "tag-remove", slug, tag });
|
|
1114
|
-
return;
|
|
1115
|
-
}
|
|
1116
|
-
await withRepo(program, async (repo) => {
|
|
1117
|
-
await repo.untag(slug, tag);
|
|
1118
|
-
print(program, { ok: true, action: "tag-remove", slug, tag });
|
|
1119
|
-
});
|
|
1120
|
-
});
|
|
1121
|
-
|
|
1122
|
-
// -- raw (subcommands) ----------------------------------------------------
|
|
1123
|
-
|
|
1124
|
-
const rawCmd = program
|
|
1125
|
-
.command("raw")
|
|
1126
|
-
.description("manage raw source data for a page");
|
|
1127
|
-
|
|
1128
|
-
rawCmd
|
|
1129
|
-
.command("get")
|
|
1130
|
-
.argument("<slug>", "page slug")
|
|
1131
|
-
.option("--source <source>", "filter by source name")
|
|
1132
|
-
.description("read raw source data for a page")
|
|
1133
|
-
.addHelpText(
|
|
1134
|
-
"after",
|
|
1135
|
-
`
|
|
1136
|
-
Examples:
|
|
1137
|
-
ebrain raw get ingest/report
|
|
1138
|
-
ebrain raw get ingest/report --source crm
|
|
1139
|
-
`,
|
|
1140
|
-
)
|
|
1141
|
-
.action(async (slug: string, opts: { source?: string }) => {
|
|
1142
|
-
await withRepo(program, async (repo) => {
|
|
1143
|
-
const rows = await repo.readRaw(slug, opts.source);
|
|
1144
|
-
print(program, rows);
|
|
1145
|
-
});
|
|
1146
|
-
});
|
|
1147
|
-
|
|
1148
|
-
addDryRun(
|
|
1149
|
-
rawCmd
|
|
1150
|
-
.command("set")
|
|
1151
|
-
.argument("<slug>", "page slug")
|
|
1152
|
-
.requiredOption("--source <source>", "source name")
|
|
1153
|
-
.option("--data <json>", "JSON string")
|
|
1154
|
-
.option("--stdin", "read JSON from stdin", false)
|
|
1155
|
-
.description("write raw source data for a page")
|
|
1156
|
-
.addHelpText(
|
|
1157
|
-
"after",
|
|
1158
|
-
`
|
|
1159
|
-
Examples:
|
|
1160
|
-
ebrain raw set ingest/report --source crm --data '{"rev": 1000}'
|
|
1161
|
-
echo '{"rev": 1000}' | ebrain raw set ingest/report --source crm --stdin
|
|
1162
|
-
ebrain raw set ingest/report --source crm --data '{"rev": 1000}' --dry-run
|
|
1163
|
-
`,
|
|
1164
|
-
),
|
|
1165
|
-
).action(
|
|
1166
|
-
async (
|
|
1167
|
-
slug: string,
|
|
1168
|
-
opts: {
|
|
1169
|
-
source: string;
|
|
1170
|
-
data?: string;
|
|
1171
|
-
stdin?: boolean;
|
|
1172
|
-
dryRun?: boolean;
|
|
1173
|
-
},
|
|
1174
|
-
) => {
|
|
1175
|
-
let data: unknown;
|
|
1176
|
-
if (opts.data) {
|
|
1177
|
-
data = JSON.parse(opts.data);
|
|
1178
|
-
} else if (opts.stdin) {
|
|
1179
|
-
const raw = await readMaybeStdin();
|
|
1180
|
-
if (!raw?.trim()) throw new Error("empty stdin - pipe JSON");
|
|
1181
|
-
data = JSON.parse(raw);
|
|
1182
|
-
} else {
|
|
1183
|
-
throw new Error("provide --data <json> or --stdin");
|
|
1184
|
-
}
|
|
1185
|
-
|
|
1186
|
-
if (isDryRun(opts)) {
|
|
1187
|
-
print(program, {
|
|
1188
|
-
dryRun: true,
|
|
1189
|
-
action: "raw-set",
|
|
1190
|
-
slug,
|
|
1191
|
-
source: opts.source,
|
|
1192
|
-
});
|
|
1193
|
-
return;
|
|
1194
|
-
}
|
|
1195
|
-
|
|
1196
|
-
await withRepo(program, async (repo) => {
|
|
1197
|
-
await repo.writeRaw(slug, opts.source, data);
|
|
1198
|
-
print(program, {
|
|
1199
|
-
ok: true,
|
|
1200
|
-
action: "raw-set",
|
|
1201
|
-
slug,
|
|
1202
|
-
source: opts.source,
|
|
1203
|
-
});
|
|
1204
|
-
});
|
|
1205
|
-
},
|
|
1206
|
-
);
|
|
1207
|
-
|
|
1208
|
-
// -- import / export ------------------------------------------------------
|
|
1209
|
-
|
|
1210
|
-
addDryRun(
|
|
1211
|
-
program
|
|
1212
|
-
.command("import")
|
|
1213
|
-
.argument("<dir>", "directory of markdown files")
|
|
1214
|
-
.description("import a directory of markdown files")
|
|
1215
|
-
.option("--skip-index", "skip vector indexing (useful if seekdb crashes)")
|
|
1216
|
-
.addHelpText(
|
|
1217
|
-
"after",
|
|
1218
|
-
`
|
|
1219
|
-
Examples:
|
|
1220
|
-
ebrain import ./docs
|
|
1221
|
-
ebrain import ./docs --dry-run
|
|
1222
|
-
ebrain import ./docs --skip-index # skip vector indexing
|
|
1223
|
-
`,
|
|
1224
|
-
),
|
|
1225
|
-
).action(async (dir: string, opts: { dryRun?: boolean; skipIndex?: boolean }) => {
|
|
1226
|
-
await withRepo(program, async (repo) => {
|
|
1227
|
-
const root = resolve(dir);
|
|
1228
|
-
const files = await collectMarkdownFiles(root);
|
|
1229
|
-
|
|
1230
|
-
if (isDryRun(opts)) {
|
|
1231
|
-
print(program, {
|
|
1232
|
-
dryRun: true,
|
|
1233
|
-
action: "import",
|
|
1234
|
-
dir: root,
|
|
1235
|
-
filesFound: files.length,
|
|
1236
|
-
slugs: files.map((f) => pathToSlug(f, root)),
|
|
1237
|
-
});
|
|
1238
|
-
return;
|
|
1239
|
-
}
|
|
1240
|
-
|
|
1241
|
-
const jsonOut = isJson(program);
|
|
1242
|
-
const settings = await loadSettings();
|
|
1243
|
-
const spinner = createSpinner();
|
|
1244
|
-
const startTime = Date.now();
|
|
1245
|
-
|
|
1246
|
-
if (!jsonOut) {
|
|
1247
|
-
header(`Import: ${root}`);
|
|
1248
|
-
}
|
|
1249
|
-
|
|
1250
|
-
// Phase 1: Parse all files and collect data
|
|
1251
|
-
if (!jsonOut) {
|
|
1252
|
-
spinner.start(`Scanning ${files.length} files...`);
|
|
1253
|
-
}
|
|
1254
|
-
|
|
1255
|
-
const fileData: Array<{
|
|
1256
|
-
file: string;
|
|
1257
|
-
slug: string;
|
|
1258
|
-
parsed: ReturnType<typeof parsePageMarkdown>;
|
|
1259
|
-
content: string;
|
|
1260
|
-
wikiLinks: string[];
|
|
1261
|
-
timelineEntries: ReturnType<typeof extractTimelineLines>;
|
|
1262
|
-
tags: string[];
|
|
1263
|
-
}> = [];
|
|
1264
|
-
|
|
1265
|
-
for (const file of files) {
|
|
1266
|
-
const rawSlug = pathToSlug(file, root);
|
|
1267
|
-
const slug = normalizeLongSlug(rawSlug);
|
|
1268
|
-
const content = await readTextFile(file);
|
|
1269
|
-
const parsed = parsePageMarkdown(content);
|
|
1270
|
-
const wikiLinks = extractWikiStyleLinks(content).map(normalizeLinkSlug);
|
|
1271
|
-
const timelineEntries = extractTimelineLines(parsed.timeline);
|
|
1272
|
-
const tags = Array.isArray(parsed.frontmatter.tags)
|
|
1273
|
-
? parsed.frontmatter.tags.filter((t): t is string => typeof t === "string")
|
|
1274
|
-
: [];
|
|
1275
|
-
fileData.push({ file, slug, parsed, content, wikiLinks, timelineEntries, tags });
|
|
1276
|
-
}
|
|
1277
|
-
|
|
1278
|
-
if (!jsonOut) {
|
|
1279
|
-
spinner.succeed(`Found ${files.length} markdown files`);
|
|
1280
|
-
}
|
|
1281
|
-
|
|
1282
|
-
// Phase 2: Write all pages first (skip embed for performance)
|
|
1283
|
-
if (!jsonOut) {
|
|
1284
|
-
spinner.start(`Writing ${fileData.length} pages to database...`);
|
|
1285
|
-
}
|
|
1286
|
-
|
|
1287
|
-
const allSlugs: string[] = [];
|
|
1288
|
-
const writeErrors: string[] = [];
|
|
1289
|
-
|
|
1290
|
-
for (let i = 0; i < fileData.length; i++) {
|
|
1291
|
-
const { slug, parsed } = fileData[i]!;
|
|
1292
|
-
if (!jsonOut && i % 20 === 0) {
|
|
1293
|
-
spinner.update(`Writing pages... ${i + 1}/${fileData.length}`);
|
|
1294
|
-
}
|
|
1295
|
-
try {
|
|
1296
|
-
await repo.putPage({
|
|
1297
|
-
slug,
|
|
1298
|
-
type: String(parsed.frontmatter.type ?? inferTypeFromSlug(slug)),
|
|
1299
|
-
title: String(parsed.frontmatter.title ?? slugToTitle(slug)),
|
|
1300
|
-
compiledTruth: parsed.compiledTruth,
|
|
1301
|
-
timeline: parsed.timeline,
|
|
1302
|
-
frontmatter: parsed.frontmatter,
|
|
1303
|
-
}, true); // skipEmbed: true for performance
|
|
1304
|
-
allSlugs.push(slug);
|
|
1305
|
-
} catch (err) {
|
|
1306
|
-
writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
|
|
1307
|
-
}
|
|
1308
|
-
}
|
|
1309
|
-
|
|
1310
|
-
if (!jsonOut) {
|
|
1311
|
-
spinner.succeed(`Wrote ${allSlugs.length} pages to database`);
|
|
1312
|
-
if (writeErrors.length > 0) {
|
|
1313
|
-
warning(`${writeErrors.length} pages failed to write`);
|
|
1314
|
-
for (const e of writeErrors.slice(0, 3)) {
|
|
1315
|
-
subItem(e);
|
|
1316
|
-
}
|
|
1317
|
-
if (writeErrors.length > 3) {
|
|
1318
|
-
subItem(`... and ${writeErrors.length - 3} more`);
|
|
1319
|
-
}
|
|
1320
|
-
}
|
|
1321
|
-
}
|
|
1322
|
-
|
|
1323
|
-
// Phase 3: Parallel entity extraction (main optimization)
|
|
1324
|
-
const BATCH_SIZE = 10;
|
|
1325
|
-
const entityResults = new Map<string, Awaited<ReturnType<typeof extractRelations>>>();
|
|
1326
|
-
|
|
1327
|
-
if (settings.llm.baseURL) {
|
|
1328
|
-
if (!jsonOut) {
|
|
1329
|
-
spinner.start(`Extracting entities with LLM...`);
|
|
1330
|
-
}
|
|
1331
|
-
|
|
1332
|
-
for (let i = 0; i < fileData.length; i += BATCH_SIZE) {
|
|
1333
|
-
const batch = fileData.slice(i, i + BATCH_SIZE);
|
|
1334
|
-
if (!jsonOut) {
|
|
1335
|
-
spinner.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, fileData.length)}/${fileData.length}`);
|
|
1336
|
-
}
|
|
1337
|
-
const batchPromises = batch.map(async ({ slug, content }) => {
|
|
1338
|
-
const relations = await extractRelations(content, settings.llm);
|
|
1339
|
-
return { slug, relations };
|
|
1340
|
-
});
|
|
1341
|
-
const results = await Promise.all(batchPromises);
|
|
1342
|
-
for (const { slug, relations } of results) {
|
|
1343
|
-
entityResults.set(slug, relations);
|
|
1344
|
-
}
|
|
1345
|
-
}
|
|
1346
|
-
|
|
1347
|
-
if (!jsonOut) {
|
|
1348
|
-
spinner.succeed(`Entity extraction complete`);
|
|
1349
|
-
}
|
|
1350
|
-
} else {
|
|
1351
|
-
if (!jsonOut) {
|
|
1352
|
-
warning(`LLM not configured, skipping entity extraction`);
|
|
1353
|
-
}
|
|
1354
|
-
}
|
|
1355
|
-
|
|
1356
|
-
// Phase 4: Write links, tags, timeline, and entity pages
|
|
1357
|
-
if (!jsonOut) {
|
|
1358
|
-
spinner.start(`Creating links, tags, and timeline entries...`);
|
|
1359
|
-
}
|
|
1360
|
-
|
|
1361
|
-
let linkCount = 0;
|
|
1362
|
-
let timelineCount = 0;
|
|
1363
|
-
let entityCount = 0;
|
|
1364
|
-
let tagCount = 0;
|
|
1365
|
-
|
|
1366
|
-
// Collect timeline entries for batch insert
|
|
1367
|
-
const allTimelineEntries: Array<{
|
|
1368
|
-
pageSlug: string;
|
|
1369
|
-
date: string;
|
|
1370
|
-
source: string;
|
|
1371
|
-
summary: string;
|
|
1372
|
-
detail: string;
|
|
1373
|
-
}> = [];
|
|
1374
|
-
|
|
1375
|
-
for (const { slug, wikiLinks, timelineEntries, tags, content } of fileData) {
|
|
1376
|
-
// Wiki links
|
|
1377
|
-
for (const link of wikiLinks) {
|
|
1378
|
-
await repo.link(slug, link, "import");
|
|
1379
|
-
linkCount++;
|
|
1380
|
-
}
|
|
1381
|
-
|
|
1382
|
-
// Collect timeline entries for batch insert
|
|
1383
|
-
for (const entry of timelineEntries) {
|
|
1384
|
-
allTimelineEntries.push({
|
|
1385
|
-
pageSlug: slug,
|
|
1386
|
-
date: entry.date,
|
|
1387
|
-
source: entry.source,
|
|
1388
|
-
summary: entry.summary,
|
|
1389
|
-
detail: "",
|
|
1390
|
-
});
|
|
1391
|
-
timelineCount++;
|
|
1392
|
-
}
|
|
1393
|
-
|
|
1394
|
-
// Tags
|
|
1395
|
-
for (const tag of tags) {
|
|
1396
|
-
await repo.tag(slug, tag);
|
|
1397
|
-
tagCount++;
|
|
1398
|
-
}
|
|
1399
|
-
|
|
1400
|
-
// Entity links from parallel extraction
|
|
1401
|
-
const relations = entityResults.get(slug);
|
|
1402
|
-
if (relations && relations.length > 0) {
|
|
1403
|
-
const highConfidence = relations.filter(r => r.confidence >= 0.6);
|
|
1404
|
-
for (const r of highConfidence) {
|
|
1405
|
-
const fromCandidate = entityToSlug(r.from.name, r.from.type);
|
|
1406
|
-
const toCandidate = entityToSlug(r.to.name, r.to.type);
|
|
1407
|
-
const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
|
|
1408
|
-
const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
|
|
1409
|
-
|
|
1410
|
-
const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
|
|
1411
|
-
const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
|
|
1412
|
-
if (c1) entityCount++;
|
|
1413
|
-
if (c2) entityCount++;
|
|
1414
|
-
|
|
1415
|
-
await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
|
|
1416
|
-
await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
|
|
1417
|
-
await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
|
|
1418
|
-
linkCount += 3;
|
|
1419
|
-
}
|
|
1420
|
-
}
|
|
1421
|
-
}
|
|
1422
|
-
|
|
1423
|
-
// Batch insert all timeline entries
|
|
1424
|
-
if (allTimelineEntries.length > 0) {
|
|
1425
|
-
await repo.timelineAddBatch(allTimelineEntries);
|
|
1426
|
-
}
|
|
1427
|
-
|
|
1428
|
-
if (!jsonOut) {
|
|
1429
|
-
spinner.succeed(`Created links, tags, and timeline`);
|
|
1430
|
-
}
|
|
1431
|
-
|
|
1432
|
-
// Phase 5: Batch sync all pages to search index
|
|
1433
|
-
if (opts.skipIndex) {
|
|
1434
|
-
if (!jsonOut) {
|
|
1435
|
-
info(`Skipping vector indexing (--skip-index)`);
|
|
1436
|
-
}
|
|
1437
|
-
} else {
|
|
1438
|
-
if (!jsonOut) {
|
|
1439
|
-
spinner.start(`Indexing ${allSlugs.length} pages for search...`);
|
|
1440
|
-
}
|
|
1441
|
-
await repo.embedAll();
|
|
1442
|
-
|
|
1443
|
-
if (!jsonOut) {
|
|
1444
|
-
spinner.succeed(`Search indexing complete`);
|
|
1445
|
-
}
|
|
1446
|
-
}
|
|
1447
|
-
|
|
1448
|
-
const duration = formatDuration(Date.now() - startTime);
|
|
1449
|
-
|
|
1450
|
-
if (!jsonOut) {
|
|
1451
|
-
// Print summary
|
|
1452
|
-
header("Import Summary");
|
|
1453
|
-
keyValue("Files imported", String(files.length));
|
|
1454
|
-
keyValue("Pages created", String(allSlugs.length));
|
|
1455
|
-
keyValue("Entities extracted", String(entityCount));
|
|
1456
|
-
keyValue("Links created", String(linkCount));
|
|
1457
|
-
keyValue("Timeline entries", String(timelineCount));
|
|
1458
|
-
keyValue("Tags added", String(tagCount));
|
|
1459
|
-
keyValue("Duration", duration);
|
|
1460
|
-
|
|
1461
|
-
if (writeErrors.length > 0) {
|
|
1462
|
-
warning(`${writeErrors.length} pages had errors`);
|
|
1463
|
-
}
|
|
1464
|
-
}
|
|
1465
|
-
|
|
1466
|
-
print(program, {
|
|
1467
|
-
ok: true,
|
|
1468
|
-
importedFiles: files.length,
|
|
1469
|
-
pages: allSlugs.length,
|
|
1470
|
-
links: linkCount,
|
|
1471
|
-
timelineEntries: timelineCount,
|
|
1472
|
-
entities: entityCount,
|
|
1473
|
-
});
|
|
1474
|
-
});
|
|
1475
|
-
});
|
|
1476
|
-
|
|
1477
|
-
program
|
|
1478
|
-
.command("export")
|
|
1479
|
-
.option("--dir <dir>", "output directory", resolve(process.cwd(), "export"))
|
|
1480
|
-
.description("export all pages as markdown files")
|
|
1481
|
-
.addHelpText(
|
|
1482
|
-
"after",
|
|
1483
|
-
`
|
|
1484
|
-
Examples:
|
|
1485
|
-
ebrain export
|
|
1486
|
-
ebrain export --dir ./backup
|
|
1487
|
-
`,
|
|
1488
|
-
)
|
|
1489
|
-
.action(async (opts: { dir: string }) => {
|
|
1490
|
-
await withRepo(program, async (repo) => {
|
|
1491
|
-
const dir = resolve(opts.dir);
|
|
1492
|
-
await ensureDir(dir);
|
|
1493
|
-
const pages = await repo.listPages({ limit: 100000 });
|
|
1494
|
-
const jsonOut = isJson(program);
|
|
1495
|
-
for (let i = 0; i < pages.length; i += 1) {
|
|
1496
|
-
const page = pages[i]!;
|
|
1497
|
-
progress("export " + page.slug, i + 1, pages.length, jsonOut);
|
|
1498
|
-
const tags = await repo.tags(page.slug);
|
|
1499
|
-
const fm = {
|
|
1500
|
-
...page.frontmatter,
|
|
1501
|
-
type: page.type,
|
|
1502
|
-
title: page.title,
|
|
1503
|
-
};
|
|
1504
|
-
if (tags.length > 0)
|
|
1505
|
-
(fm as Record<string, unknown>).tags = tags;
|
|
1506
|
-
const md = renderPageMarkdown(fm, page.compiledTruth, page.timeline);
|
|
1507
|
-
await writeTextFile(slugToPath(page.slug, dir), md);
|
|
1508
|
-
}
|
|
1509
|
-
print(program, { exported: pages.length, dir });
|
|
1510
|
-
});
|
|
1511
|
-
});
|
|
1512
|
-
|
|
1513
|
-
// -- embed ----------------------------------------------------------------
|
|
1514
|
-
|
|
1515
|
-
addDryRun(
|
|
1516
|
-
program
|
|
1517
|
-
.command("embed")
|
|
1518
|
-
.argument("[slug]", "page slug (omit with --all)")
|
|
1519
|
-
.option("--all", "embed all pages")
|
|
1520
|
-
.description("refresh page embedding(s)")
|
|
1521
|
-
.addHelpText(
|
|
1522
|
-
"after",
|
|
1523
|
-
`
|
|
1524
|
-
Examples:
|
|
1525
|
-
ebrain embed docs/api
|
|
1526
|
-
ebrain embed --all
|
|
1527
|
-
ebrain embed --all --dry-run
|
|
1528
|
-
`,
|
|
1529
|
-
),
|
|
1530
|
-
).action(
|
|
1531
|
-
async (
|
|
1532
|
-
slug: string | undefined,
|
|
1533
|
-
opts: { all?: boolean; dryRun?: boolean },
|
|
1534
|
-
) => {
|
|
1535
|
-
if (opts.all) {
|
|
1536
|
-
if (isDryRun(opts)) {
|
|
1537
|
-
await withRepo(program, async (repo) => {
|
|
1538
|
-
const pages = await repo.listPages({ limit: 100000 });
|
|
1539
|
-
print(program, {
|
|
1540
|
-
dryRun: true,
|
|
1541
|
-
action: "embed",
|
|
1542
|
-
mode: "all",
|
|
1543
|
-
pagesFound: pages.length,
|
|
1544
|
-
});
|
|
1545
|
-
});
|
|
1546
|
-
return;
|
|
1547
|
-
}
|
|
1548
|
-
await withRepo(program, async (repo) => {
|
|
1549
|
-
const jsonOut = isJson(program);
|
|
1550
|
-
const spinner = createSpinner();
|
|
1551
|
-
const startTime = Date.now();
|
|
1552
|
-
|
|
1553
|
-
if (!jsonOut) {
|
|
1554
|
-
header("Embed All Pages");
|
|
1555
|
-
spinner.start(`Loading pages...`);
|
|
1556
|
-
}
|
|
1557
|
-
|
|
1558
|
-
const pages = await repo.listPages({ limit: 100000 });
|
|
1559
|
-
|
|
1560
|
-
if (!jsonOut) {
|
|
1561
|
-
spinner.update(`Embedding ${pages.length} pages...`);
|
|
1562
|
-
}
|
|
1563
|
-
|
|
1564
|
-
const count = await repo.embedAll();
|
|
1565
|
-
|
|
1566
|
-
if (!jsonOut) {
|
|
1567
|
-
const duration = formatDuration(Date.now() - startTime);
|
|
1568
|
-
spinner.succeed(`Embedded ${count} pages`);
|
|
1569
|
-
keyValue("Duration", duration);
|
|
1570
|
-
}
|
|
1571
|
-
|
|
1572
|
-
print(program, { embedded: count, mode: "all" });
|
|
1573
|
-
});
|
|
1574
|
-
return;
|
|
1575
|
-
}
|
|
1576
|
-
if (!slug) {
|
|
1577
|
-
throw new Error("provide <slug> or --all");
|
|
1578
|
-
}
|
|
1579
|
-
if (isDryRun(opts)) {
|
|
1580
|
-
print(program, { dryRun: true, action: "embed", slug });
|
|
1581
|
-
return;
|
|
1582
|
-
}
|
|
1583
|
-
await withRepo(program, async (repo) => {
|
|
1584
|
-
const jsonOut = isJson(program);
|
|
1585
|
-
const spinner = createSpinner();
|
|
1586
|
-
|
|
1587
|
-
if (!jsonOut) {
|
|
1588
|
-
header(`Embed: ${slug}`);
|
|
1589
|
-
spinner.start(`Generating embedding for page...`);
|
|
1590
|
-
}
|
|
1591
|
-
|
|
1592
|
-
await repo.syncPageToSearch(slug);
|
|
1593
|
-
|
|
1594
|
-
if (!jsonOut) {
|
|
1595
|
-
spinner.succeed(`Page embedded: ${slug}`);
|
|
1596
|
-
}
|
|
1597
|
-
|
|
1598
|
-
print(program, { embedded: 1, slug });
|
|
1599
|
-
});
|
|
1600
|
-
},
|
|
1601
|
-
);
|
|
1602
|
-
|
|
1603
|
-
// -- init / stats ---------------------------------------------------------
|
|
1604
|
-
|
|
1605
|
-
program
|
|
1606
|
-
.command("init")
|
|
1607
|
-
.description("initialize ebrain: create config, database, and show setup guide")
|
|
1608
|
-
.addHelpText(
|
|
1609
|
-
"after",
|
|
1610
|
-
`
|
|
1611
|
-
Examples:
|
|
1612
|
-
ebrain init
|
|
1613
|
-
ebrain init --db ./my.db
|
|
1614
|
-
`,
|
|
1615
|
-
)
|
|
1616
|
-
.action(async () => {
|
|
1617
|
-
const jsonOut = isJson(program);
|
|
1618
|
-
const settings = await loadSettings();
|
|
1619
|
-
const cliDb = program.opts().db;
|
|
1620
|
-
const dbPath = cliDb ?? settings.dbPath;
|
|
1621
|
-
|
|
1622
|
-
if (!jsonOut) {
|
|
1623
|
-
header("ebrain init");
|
|
1624
|
-
}
|
|
1625
|
-
|
|
1626
|
-
// Step 1: Create settings.json if it doesn't exist
|
|
1627
|
-
const { createDefaultSettings } = await import("../settings");
|
|
1628
|
-
const settingsCreated = await createDefaultSettings();
|
|
1629
|
-
|
|
1630
|
-
if (!jsonOut) {
|
|
1631
|
-
if (settingsCreated) {
|
|
1632
|
-
success(`Created config: ${SETTINGS_PATH}`);
|
|
1633
|
-
} else {
|
|
1634
|
-
success(`Config already exists: ${SETTINGS_PATH}`);
|
|
1635
|
-
}
|
|
1636
|
-
}
|
|
1637
|
-
|
|
1638
|
-
// Step 2: Check or initialize database
|
|
1639
|
-
const dbExists = await fileExists(dbPath);
|
|
1640
|
-
let dbInitialized = false;
|
|
1641
|
-
|
|
1642
|
-
if (dbExists) {
|
|
1643
|
-
// Database already exists, skip connection attempt to avoid
|
|
1644
|
-
// noisy errors (e.g. embedding function key mismatch)
|
|
1645
|
-
if (!jsonOut) {
|
|
1646
|
-
success(`Database already exists: ${dbPath}`);
|
|
1647
|
-
}
|
|
1648
|
-
dbInitialized = true;
|
|
1649
|
-
} else {
|
|
1650
|
-
// Try to create it without collection - embedding config may not be ready
|
|
1651
|
-
try {
|
|
1652
|
-
const db = await BrainDb.connect(dbPath, settings, { skipCollection: true });
|
|
1653
|
-
await db.close();
|
|
1654
|
-
await new Promise((r) => setTimeout(r, 200));
|
|
1655
|
-
dbInitialized = true;
|
|
1656
|
-
if (!jsonOut) {
|
|
1657
|
-
success(`Database initialized: ${dbPath}`);
|
|
1658
|
-
}
|
|
1659
|
-
} catch {
|
|
1660
|
-
if (!jsonOut) {
|
|
1661
|
-
warning(`Database will be auto-created on first use`);
|
|
1662
|
-
}
|
|
1663
|
-
}
|
|
1664
|
-
}
|
|
1665
|
-
|
|
1666
|
-
// Step 3: Show setup guide
|
|
1667
|
-
if (!jsonOut) {
|
|
1668
|
-
console.log("");
|
|
1669
|
-
separator();
|
|
1670
|
-
info("Quick Start Guide");
|
|
1671
|
-
console.log("");
|
|
1672
|
-
|
|
1673
|
-
subItem("1. Configure LLM (for AI queries):", 0);
|
|
1674
|
-
subItem(` Edit ${SETTINGS_PATH}`, 4);
|
|
1675
|
-
subItem(` Set llm.baseURL to your OpenAI-compatible API endpoint`, 4);
|
|
1676
|
-
subItem(` Set llm.apiKey or export DASHSCOPE_API_KEY`, 4);
|
|
1677
|
-
console.log("");
|
|
1678
|
-
|
|
1679
|
-
subItem("2. Add your first page:", 0);
|
|
1680
|
-
subItem(" echo '# Hello' | ebrain put hello --stdin", 4);
|
|
1681
|
-
console.log("");
|
|
1682
|
-
|
|
1683
|
-
subItem("3. Import a directory of markdown files:", 0);
|
|
1684
|
-
subItem(" ebrain import ./docs", 4);
|
|
1685
|
-
console.log("");
|
|
1686
|
-
|
|
1687
|
-
subItem("4. Query with AI:", 0);
|
|
1688
|
-
subItem(' ebrain query "What did we ship in Q4?" --llm', 4);
|
|
1689
|
-
console.log("");
|
|
1690
|
-
|
|
1691
|
-
subItem("5. Visualize your knowledge graph:", 0);
|
|
1692
|
-
subItem(" ebrain graph", 4);
|
|
1693
|
-
console.log("");
|
|
1694
|
-
|
|
1695
|
-
separator();
|
|
1696
|
-
}
|
|
1697
|
-
|
|
1698
|
-
print(program, {
|
|
1699
|
-
ok: true,
|
|
1700
|
-
settingsPath: SETTINGS_PATH,
|
|
1701
|
-
settingsCreated,
|
|
1702
|
-
dbPath,
|
|
1703
|
-
dbInitialized,
|
|
1704
|
-
});
|
|
1705
|
-
|
|
1706
|
-
process.exit(0);
|
|
1707
|
-
});
|
|
1708
|
-
|
|
1709
|
-
program
|
|
1710
|
-
.command("stats")
|
|
1711
|
-
.description("show knowledge base statistics")
|
|
1712
|
-
.addHelpText(
|
|
1713
|
-
"after",
|
|
1714
|
-
`
|
|
1715
|
-
Examples:
|
|
1716
|
-
ebrain stats
|
|
1717
|
-
ebrain stats --json
|
|
1718
|
-
`,
|
|
1719
|
-
)
|
|
1720
|
-
.action(async () => {
|
|
1721
|
-
await withRepo(program, async (repo) => {
|
|
1722
|
-
const jsonOut = isJson(program);
|
|
1723
|
-
const stats = await repo.stats();
|
|
1724
|
-
|
|
1725
|
-
if (!jsonOut) {
|
|
1726
|
-
header("Knowledge Base Statistics");
|
|
1727
|
-
keyValue("Pages", String(stats.pages));
|
|
1728
|
-
keyValue("Links", String(stats.links));
|
|
1729
|
-
keyValue("Tags", String(stats.tags));
|
|
1730
|
-
keyValue("Timeline entries", String(stats.timelineEntries));
|
|
1731
|
-
keyValue("Raw data rows", String(stats.rawRows));
|
|
1732
|
-
}
|
|
1733
|
-
|
|
1734
|
-
print(program, stats);
|
|
1735
|
-
});
|
|
1736
|
-
});
|
|
45
|
+
// Register commands
|
|
46
|
+
registerConfigCommand(program);
|
|
47
|
+
registerPutCommand(program);
|
|
48
|
+
registerQueryCommand(program);
|
|
49
|
+
registerLinkCommand(program);
|
|
50
|
+
registerTimelineCommand(program);
|
|
51
|
+
registerTagCommand(program);
|
|
52
|
+
registerRawCommand(program);
|
|
53
|
+
registerImportCommand(program);
|
|
54
|
+
registerExportCommand(program);
|
|
55
|
+
registerEmbedCommand(program);
|
|
56
|
+
registerInitCommand(program);
|
|
57
|
+
registerStatsCommand(program);
|
|
58
|
+
registerServeCommand(program);
|
|
1737
59
|
|
|
1738
60
|
// Register compile and smart-ingest commands
|
|
1739
61
|
registerCompileCommands(program);
|
|
@@ -1741,621 +63,15 @@ Examples:
|
|
|
1741
63
|
// Register graph command
|
|
1742
64
|
registerGraphCommand(program);
|
|
1743
65
|
|
|
1744
|
-
// --
|
|
1745
|
-
|
|
66
|
+
// -- legacy aliases (backward compat) -------------------------------------
|
|
1746
67
|
program
|
|
1747
|
-
.command("
|
|
1748
|
-
.description("
|
|
1749
|
-
.addHelpText(
|
|
1750
|
-
"after",
|
|
1751
|
-
`
|
|
1752
|
-
Examples:
|
|
1753
|
-
ebrain serve
|
|
1754
|
-
`,
|
|
1755
|
-
)
|
|
1756
|
-
.action(async () => {
|
|
1757
|
-
const { startMcpServer } = await import("../mcp/server");
|
|
1758
|
-
const dbPath = String(program.opts().db);
|
|
1759
|
-
await startMcpServer(dbPath);
|
|
1760
|
-
});
|
|
1761
|
-
|
|
1762
|
-
program
|
|
1763
|
-
.command("tools-json")
|
|
1764
|
-
.description("print MCP tools discovery JSON")
|
|
68
|
+
.command("tools")
|
|
69
|
+
.description("alias for tools-json (deprecated)")
|
|
1765
70
|
.action(() => {
|
|
1766
71
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
1767
72
|
const { TOOL_MANIFEST } = require("../mcp/server");
|
|
1768
73
|
console.log(JSON.stringify({ tools: TOOL_MANIFEST }, null, 2));
|
|
1769
74
|
});
|
|
1770
75
|
|
|
1771
|
-
// -- legacy aliases (backward compat, hidden) -----------------------------
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
76
|
return program;
|
|
1778
77
|
}
|
|
1779
|
-
|
|
1780
|
-
// ---------------------------------------------------------------------------
|
|
1781
|
-
// Repo / output helpers
|
|
1782
|
-
// ---------------------------------------------------------------------------
|
|
1783
|
-
|
|
1784
|
-
async function withRepo(
|
|
1785
|
-
program: Command,
|
|
1786
|
-
callback: (repo: BrainRepository) => Promise<void>,
|
|
1787
|
-
): Promise<void> {
|
|
1788
|
-
const settings = await loadSettings();
|
|
1789
|
-
const cliDb = program.opts().db;
|
|
1790
|
-
const dbPath = cliDb ?? settings.dbPath;
|
|
1791
|
-
const db = await BrainDb.connect(dbPath, settings);
|
|
1792
|
-
const repo = new BrainRepository(db);
|
|
1793
|
-
await callback(repo);
|
|
1794
|
-
|
|
1795
|
-
// Gracefully close database
|
|
1796
|
-
// Note: seekdb SDK's InternalEmbeddedClient.close() is empty in embedded mode
|
|
1797
|
-
// Data may not flush properly. Use remote seekdb server for reliability.
|
|
1798
|
-
try {
|
|
1799
|
-
await db.close();
|
|
1800
|
-
} catch (e) {
|
|
1801
|
-
// Close may fail due to seekdb native bug
|
|
1802
|
-
}
|
|
1803
|
-
|
|
1804
|
-
// Give seekdb extra time after close
|
|
1805
|
-
await new Promise((r) => setTimeout(r, 500));
|
|
1806
|
-
|
|
1807
|
-
// CLI: force exit to bypass seekdb native cleanup segfault
|
|
1808
|
-
process.exit(0);
|
|
1809
|
-
}
|
|
1810
|
-
|
|
1811
|
-
function print(program: Command, payload: unknown): void {
|
|
1812
|
-
if (isJson(program)) {
|
|
1813
|
-
console.log(JSON.stringify(payload, null, 2));
|
|
1814
|
-
return;
|
|
1815
|
-
}
|
|
1816
|
-
if (typeof payload === "string") {
|
|
1817
|
-
console.log(payload);
|
|
1818
|
-
return;
|
|
1819
|
-
}
|
|
1820
|
-
console.log(formatHuman(payload));
|
|
1821
|
-
}
|
|
1822
|
-
|
|
1823
|
-
function isJson(program: Command): boolean {
|
|
1824
|
-
return Boolean(program.opts().json);
|
|
1825
|
-
}
|
|
1826
|
-
|
|
1827
|
-
function formatHuman(payload: unknown): string {
|
|
1828
|
-
if (Array.isArray(payload)) {
|
|
1829
|
-
return payload
|
|
1830
|
-
.map((item) =>
|
|
1831
|
-
typeof item === "string"
|
|
1832
|
-
? `- ${item}`
|
|
1833
|
-
: `- ${JSON.stringify(item)}`,
|
|
1834
|
-
)
|
|
1835
|
-
.join("\n");
|
|
1836
|
-
}
|
|
1837
|
-
return JSON.stringify(payload, null, 2);
|
|
1838
|
-
}
|
|
1839
|
-
|
|
1840
|
-
function normalizeLinkSlug(path: string): string {
|
|
1841
|
-
return path
|
|
1842
|
-
.replaceAll("\\", "/")
|
|
1843
|
-
.replace(/^\.\//, "")
|
|
1844
|
-
.replace(/^\.\.\//g, "")
|
|
1845
|
-
.replace(/\.md$/, "");
|
|
1846
|
-
}
|
|
1847
|
-
|
|
1848
|
-
// ---------------------------------------------------------------------------
|
|
1849
|
-
// LLM Answer Generation - Multi-layer Context Collection
|
|
1850
|
-
// ---------------------------------------------------------------------------
|
|
1851
|
-
|
|
1852
|
-
/** A single section of context for the LLM prompt. */
|
|
1853
|
-
interface ContextSection {
|
|
1854
|
-
type: 'primary' | 'raw_data' | 'linked';
|
|
1855
|
-
slug: string;
|
|
1856
|
-
title: string;
|
|
1857
|
-
content: string;
|
|
1858
|
-
/** Human-readable label like "原始文档 (crm)" or "关联页面: projects/alpha". */
|
|
1859
|
-
label: string;
|
|
1860
|
-
}
|
|
1861
|
-
|
|
1862
|
-
/**
|
|
1863
|
-
* Collect multi-layer context for LLM answer generation.
|
|
1864
|
-
*
|
|
1865
|
-
* Layers (in priority order):
|
|
1866
|
-
* 1. Primary: compiledTruth + timeline of each hit page
|
|
1867
|
-
* 2. Raw data: original documents stored via raw.set
|
|
1868
|
-
* 3. Linked pages: compiledTruth of pages linked to/from hit pages
|
|
1869
|
-
*
|
|
1870
|
-
* Budget is enforced via total character limit.
|
|
1871
|
-
*/
|
|
1872
|
-
async function collectContextForLLM(
|
|
1873
|
-
repo: BrainRepository,
|
|
1874
|
-
hits: Array<{ slug: string; title: string; score: number }>,
|
|
1875
|
-
question: string,
|
|
1876
|
-
maxChars: number,
|
|
1877
|
-
onProgress?: (stage: string) => void,
|
|
1878
|
-
): Promise<{ sections: ContextSection[]; totalChars: number; stats: ContextStats }> {
|
|
1879
|
-
const sections: ContextSection[] = [];
|
|
1880
|
-
let totalChars = 0;
|
|
1881
|
-
const stats: ContextStats = {
|
|
1882
|
-
primaryPages: 0,
|
|
1883
|
-
rawDocs: 0,
|
|
1884
|
-
linkedPages: 0,
|
|
1885
|
-
skippedChars: 0,
|
|
1886
|
-
};
|
|
1887
|
-
|
|
1888
|
-
const seenSlugs = new Set<string>();
|
|
1889
|
-
|
|
1890
|
-
function addSection(section: ContextSection): boolean {
|
|
1891
|
-
if (seenSlugs.has(`${section.type}:${section.slug}:${section.label}`)) {
|
|
1892
|
-
return false;
|
|
1893
|
-
}
|
|
1894
|
-
const budget = maxChars - totalChars;
|
|
1895
|
-
if (section.content.length > budget && sections.length > 0) {
|
|
1896
|
-
// Truncate to fit budget
|
|
1897
|
-
section.content = section.content.slice(0, budget - 20) + '\n...[truncated]';
|
|
1898
|
-
stats.skippedChars += section.content.length - budget;
|
|
1899
|
-
}
|
|
1900
|
-
if (section.content.length > 0) {
|
|
1901
|
-
sections.push(section);
|
|
1902
|
-
totalChars += section.content.length;
|
|
1903
|
-
seenSlugs.add(`${section.type}:${section.slug}:${section.label}`);
|
|
1904
|
-
return true;
|
|
1905
|
-
}
|
|
1906
|
-
return false;
|
|
1907
|
-
}
|
|
1908
|
-
|
|
1909
|
-
// Cache pages fetched in Layer 1 to avoid redundant DB calls in Layer 3
|
|
1910
|
-
const pageCache = new Map<string, NonNullable<Awaited<ReturnType<typeof repo.getPage>>>>();
|
|
1911
|
-
|
|
1912
|
-
// Layer 1: Primary pages (compiledTruth + timeline)
|
|
1913
|
-
onProgress?.('page content');
|
|
1914
|
-
for (const hit of hits) {
|
|
1915
|
-
const page = await repo.getPage(hit.slug);
|
|
1916
|
-
if (!page) continue;
|
|
1917
|
-
pageCache.set(hit.slug, page);
|
|
1918
|
-
|
|
1919
|
-
const parts: string[] = [];
|
|
1920
|
-
if (page.compiledTruth?.trim()) {
|
|
1921
|
-
parts.push(page.compiledTruth.trim());
|
|
1922
|
-
}
|
|
1923
|
-
const tl = page.timeline?.trim();
|
|
1924
|
-
if (tl) {
|
|
1925
|
-
parts.push(`## 时间线\n${tl}`);
|
|
1926
|
-
}
|
|
1927
|
-
|
|
1928
|
-
if (parts.length > 0) {
|
|
1929
|
-
addSection({
|
|
1930
|
-
type: 'primary',
|
|
1931
|
-
slug: page.slug,
|
|
1932
|
-
title: page.title,
|
|
1933
|
-
content: parts.join('\n\n'),
|
|
1934
|
-
label: `页面正文`,
|
|
1935
|
-
});
|
|
1936
|
-
stats.primaryPages++;
|
|
1937
|
-
}
|
|
1938
|
-
}
|
|
1939
|
-
|
|
1940
|
-
// Layer 2: Raw data (original documents)
|
|
1941
|
-
onProgress?.('raw documents');
|
|
1942
|
-
for (const hit of hits) {
|
|
1943
|
-
try {
|
|
1944
|
-
const rawRows = await repo.readRaw(hit.slug) as Array<{ source: string; data: unknown; fetchedAt?: string }>;
|
|
1945
|
-
for (const row of rawRows) {
|
|
1946
|
-
let rawContent = '';
|
|
1947
|
-
if (typeof row.data === 'string') {
|
|
1948
|
-
rawContent = row.data;
|
|
1949
|
-
} else if (typeof row.data === 'object' && row.data !== null) {
|
|
1950
|
-
rawContent = JSON.stringify(row.data, null, 2);
|
|
1951
|
-
}
|
|
1952
|
-
if (rawContent.trim()) {
|
|
1953
|
-
addSection({
|
|
1954
|
-
type: 'raw_data',
|
|
1955
|
-
slug: hit.slug,
|
|
1956
|
-
title: hit.title,
|
|
1957
|
-
content: rawContent,
|
|
1958
|
-
label: `原始文档 (${row.source})`,
|
|
1959
|
-
});
|
|
1960
|
-
stats.rawDocs++;
|
|
1961
|
-
}
|
|
1962
|
-
}
|
|
1963
|
-
} catch {
|
|
1964
|
-
// Raw data fetch failure is non-fatal
|
|
1965
|
-
}
|
|
1966
|
-
}
|
|
1967
|
-
|
|
1968
|
-
// Layer 3: Linked pages - score using cached data + keyword matching
|
|
1969
|
-
// No second repo.query() call needed - reuse hits scores + keyword fallback
|
|
1970
|
-
onProgress?.('linked pages');
|
|
1971
|
-
const allLinkedSlugs = new Set<string>();
|
|
1972
|
-
for (const hit of hits) {
|
|
1973
|
-
try {
|
|
1974
|
-
const outLinks = await repo.outgoingLinks(hit.slug);
|
|
1975
|
-
outLinks.forEach(l => allLinkedSlugs.add(l.slug));
|
|
1976
|
-
} catch { /* ignore */ }
|
|
1977
|
-
try {
|
|
1978
|
-
const backlinkSlugs = await repo.backlinks(hit.slug);
|
|
1979
|
-
backlinkSlugs.forEach(s => allLinkedSlugs.add(s));
|
|
1980
|
-
} catch { /* ignore */ }
|
|
1981
|
-
}
|
|
1982
|
-
|
|
1983
|
-
if (allLinkedSlugs.size > 0) {
|
|
1984
|
-
// Score: use semantic scores from initial hits (already cached), keyword for rest
|
|
1985
|
-
const semanticScoreMap = new Map(hits.map(h => [h.slug, h.score]));
|
|
1986
|
-
const keywordScores = new Map<string, number>();
|
|
1987
|
-
for (const linkedSlug of allLinkedSlugs) {
|
|
1988
|
-
if (semanticScoreMap.has(linkedSlug)) continue;
|
|
1989
|
-
// Use cached page if available, only fetch if not in cache
|
|
1990
|
-
const cached = pageCache.get(linkedSlug);
|
|
1991
|
-
if (cached) {
|
|
1992
|
-
const text = `${cached.title} ${cached.compiledTruth}`.slice(0, 2000);
|
|
1993
|
-
keywordScores.set(linkedSlug, computeKeywordRelevance(text, question));
|
|
1994
|
-
} else {
|
|
1995
|
-
const page = await repo.getPage(linkedSlug);
|
|
1996
|
-
if (page) {
|
|
1997
|
-
pageCache.set(linkedSlug, page);
|
|
1998
|
-
const text = `${page.title} ${page.compiledTruth}`.slice(0, 2000);
|
|
1999
|
-
keywordScores.set(linkedSlug, computeKeywordRelevance(text, question));
|
|
2000
|
-
}
|
|
2001
|
-
}
|
|
2002
|
-
}
|
|
2003
|
-
|
|
2004
|
-
// Combine scores
|
|
2005
|
-
const scoredLinked = [...allLinkedSlugs].map(slug => ({
|
|
2006
|
-
slug,
|
|
2007
|
-
score: semanticScoreMap.get(slug) ?? keywordScores.get(slug) ?? 0,
|
|
2008
|
-
}));
|
|
2009
|
-
|
|
2010
|
-
// Filter: only include linked pages with meaningful relevance
|
|
2011
|
-
const MIN_LINKED_SCORE = 0.02;
|
|
2012
|
-
const relevantLinked = scoredLinked
|
|
2013
|
-
.filter(s => s.score >= MIN_LINKED_SCORE)
|
|
2014
|
-
.sort((a, b) => b.score - a.score);
|
|
2015
|
-
|
|
2016
|
-
// Add linked pages (already cached in pageCache, no extra fetch needed)
|
|
2017
|
-
for (const linked of relevantLinked) {
|
|
2018
|
-
if (totalChars >= maxChars) break;
|
|
2019
|
-
|
|
2020
|
-
const linkedPage = pageCache.get(linked.slug);
|
|
2021
|
-
if (!linkedPage || !linkedPage.compiledTruth?.trim()) continue;
|
|
2022
|
-
|
|
2023
|
-
const remaining = maxChars - totalChars;
|
|
2024
|
-
let content = linkedPage.compiledTruth.trim();
|
|
2025
|
-
if (content.length > remaining - 100) {
|
|
2026
|
-
content = content.slice(0, remaining - 100) + '\n...[truncated]';
|
|
2027
|
-
}
|
|
2028
|
-
|
|
2029
|
-
addSection({
|
|
2030
|
-
type: 'linked',
|
|
2031
|
-
slug: linkedPage.slug,
|
|
2032
|
-
title: linkedPage.title,
|
|
2033
|
-
content,
|
|
2034
|
-
label: `关联页面: ${linkedPage.slug} (相关度: ${(linked.score * 100).toFixed(1)}%)`,
|
|
2035
|
-
});
|
|
2036
|
-
stats.linkedPages++;
|
|
2037
|
-
|
|
2038
|
-
// Also fetch raw data for highly relevant linked pages
|
|
2039
|
-
if (linked.score > 0.1) {
|
|
2040
|
-
try {
|
|
2041
|
-
const rawRows = await repo.readRaw(linked.slug) as Array<{ source: string; data: unknown }>;
|
|
2042
|
-
for (const row of rawRows) {
|
|
2043
|
-
let rawContent = typeof row.data === 'string' ? row.data : JSON.stringify(row.data);
|
|
2044
|
-
if (rawContent.trim().length > 100) {
|
|
2045
|
-
const remaining2 = maxChars - totalChars;
|
|
2046
|
-
if (rawContent.length > remaining2 - 100) {
|
|
2047
|
-
rawContent = rawContent.slice(0, remaining2 - 100) + '\n...[truncated]';
|
|
2048
|
-
}
|
|
2049
|
-
addSection({
|
|
2050
|
-
type: 'raw_data',
|
|
2051
|
-
slug: linked.slug,
|
|
2052
|
-
title: linkedPage.title,
|
|
2053
|
-
content: rawContent,
|
|
2054
|
-
label: `原始文档 (关联: ${row.source})`,
|
|
2055
|
-
});
|
|
2056
|
-
stats.rawDocs++;
|
|
2057
|
-
}
|
|
2058
|
-
}
|
|
2059
|
-
} catch { /* ignore */ }
|
|
2060
|
-
}
|
|
2061
|
-
}
|
|
2062
|
-
}
|
|
2063
|
-
|
|
2064
|
-
return { sections, totalChars, stats };
|
|
2065
|
-
}
|
|
2066
|
-
|
|
2067
|
-
/**
|
|
2068
|
-
* Simple keyword-based relevance scoring (fallback for pages without embeddings).
|
|
2069
|
-
* Computes the fraction of unique meaningful characters from the question
|
|
2070
|
-
* that appear in the text.
|
|
2071
|
-
*/
|
|
2072
|
-
function computeKeywordRelevance(text: string, question: string): number {
|
|
2073
|
-
const STOP_CHARS = new Set('的是了在和我有你就这不人都说上个大国为到以们年会生地要主中子自实家小对多能好可很所把当');
|
|
2074
|
-
const questionChars = [...question]
|
|
2075
|
-
.filter(c => !/\s|[,,。!?、;::""''()()【】\[\]{}<>\/\\|~`@#$%^&*+=_-]/.test(c) && !STOP_CHARS.has(c));
|
|
2076
|
-
if (questionChars.length === 0) return 0;
|
|
2077
|
-
|
|
2078
|
-
const uniqueChars = new Set(questionChars);
|
|
2079
|
-
const lower = text.toLowerCase();
|
|
2080
|
-
let matched = 0;
|
|
2081
|
-
for (const char of uniqueChars) {
|
|
2082
|
-
if (lower.includes(char.toLowerCase())) matched++;
|
|
2083
|
-
}
|
|
2084
|
-
return matched / uniqueChars.size;
|
|
2085
|
-
}
|
|
2086
|
-
|
|
2087
|
-
interface ContextStats {
|
|
2088
|
-
primaryPages: number;
|
|
2089
|
-
rawDocs: number;
|
|
2090
|
-
linkedPages: number;
|
|
2091
|
-
skippedChars: number;
|
|
2092
|
-
}
|
|
2093
|
-
|
|
2094
|
-
/**
|
|
2095
|
-
* Build LLM prompt from collected context sections and generate answer.
|
|
2096
|
-
*/
|
|
2097
|
-
async function generateAnswerWithStream(
|
|
2098
|
-
question: string,
|
|
2099
|
-
sections: ContextSection[],
|
|
2100
|
-
stats: ContextStats,
|
|
2101
|
-
llm: ResolvedLLM,
|
|
2102
|
-
): Promise<{ answer: string; ok: boolean }> {
|
|
2103
|
-
const apiKey = llm.apiKey || process.env[llm.apiKeyEnv] || "";
|
|
2104
|
-
if (!apiKey) {
|
|
2105
|
-
return { answer: "Error: LLM API key not configured.", ok: false };
|
|
2106
|
-
}
|
|
2107
|
-
|
|
2108
|
-
if (sections.length === 0) {
|
|
2109
|
-
return { answer: "知识库中没有找到相关内容。", ok: true };
|
|
2110
|
-
}
|
|
2111
|
-
|
|
2112
|
-
// Build context sections with clear labels
|
|
2113
|
-
const contextParts: string[] = [];
|
|
2114
|
-
let sectionIndex = 0;
|
|
2115
|
-
|
|
2116
|
-
// Group by type for cleaner output
|
|
2117
|
-
const primarySections = sections.filter(s => s.type === 'primary');
|
|
2118
|
-
const rawSections = sections.filter(s => s.type === 'raw_data');
|
|
2119
|
-
const linkedSections = sections.filter(s => s.type === 'linked');
|
|
2120
|
-
|
|
2121
|
-
function renderSections(group: ContextSection[], header: string) {
|
|
2122
|
-
if (group.length === 0) return;
|
|
2123
|
-
contextParts.push(`## ${header}\n`);
|
|
2124
|
-
for (const s of group) {
|
|
2125
|
-
sectionIndex++;
|
|
2126
|
-
contextParts.push(`### [${sectionIndex}] ${s.title} - ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
|
|
2127
|
-
}
|
|
2128
|
-
contextParts.push('');
|
|
2129
|
-
}
|
|
2130
|
-
|
|
2131
|
-
renderSections(primarySections, '页面正文');
|
|
2132
|
-
renderSections(rawSections, '原始文档');
|
|
2133
|
-
renderSections(linkedSections, '关联页面');
|
|
2134
|
-
|
|
2135
|
-
const context = contextParts.join('\n');
|
|
2136
|
-
|
|
2137
|
-
const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
|
|
2138
|
-
|
|
2139
|
-
## 问题
|
|
2140
|
-
${question}
|
|
2141
|
-
|
|
2142
|
-
## 知识库内容
|
|
2143
|
-
|
|
2144
|
-
${context}
|
|
2145
|
-
|
|
2146
|
-
## 回答要求
|
|
2147
|
-
- 仅基于提供的知识库内容回答,不要编造信息
|
|
2148
|
-
- 如果知识库中没有相关信息,请明确说明
|
|
2149
|
-
- 引用来源时使用 [[slug|标题]] 的格式
|
|
2150
|
-
- 使用清晰的 markdown 格式
|
|
2151
|
-
- 如果涉及时间线信息,请在回答中体现
|
|
2152
|
-
- 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
|
|
2153
|
-
- 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
|
|
2154
|
-
|
|
2155
|
-
## 回答`;
|
|
2156
|
-
|
|
2157
|
-
// Disable thinking/reasoning mode to reduce latency
|
|
2158
|
-
const disableThinking: Record<string, unknown> = {};
|
|
2159
|
-
// OpenAI/compatible: extra_body for thinking disable
|
|
2160
|
-
// DeepSeek: use extra_body to disable thinking
|
|
2161
|
-
// Many providers ignore unknown fields, so this is safe to always include
|
|
2162
|
-
const extraBody: Record<string, unknown> = {
|
|
2163
|
-
thinking: { type: "disabled" },
|
|
2164
|
-
};
|
|
2165
|
-
|
|
2166
|
-
try {
|
|
2167
|
-
const url = llm.baseURL.endsWith("/") ? llm.baseURL + "chat/completions" : llm.baseURL + "/chat/completions";
|
|
2168
|
-
|
|
2169
|
-
// Show thinking indicator while waiting for first token
|
|
2170
|
-
process.stderr.write(`\x1b[35m💭\x1b[0m \x1b[2mConnecting to ${llm.model}...\x1b[0m\n`);
|
|
2171
|
-
|
|
2172
|
-
const resp = await fetch(
|
|
2173
|
-
url,
|
|
2174
|
-
{
|
|
2175
|
-
method: "POST",
|
|
2176
|
-
headers: {
|
|
2177
|
-
"Content-Type": "application/json",
|
|
2178
|
-
Authorization: `Bearer ${apiKey}`,
|
|
2179
|
-
},
|
|
2180
|
-
body: JSON.stringify({
|
|
2181
|
-
model: llm.model,
|
|
2182
|
-
stream: true,
|
|
2183
|
-
messages: [
|
|
2184
|
-
{
|
|
2185
|
-
role: "system",
|
|
2186
|
-
content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
|
|
2187
|
-
},
|
|
2188
|
-
{ role: "user", content: prompt },
|
|
2189
|
-
],
|
|
2190
|
-
temperature: 0.3,
|
|
2191
|
-
max_tokens: 4096,
|
|
2192
|
-
...disableThinking,
|
|
2193
|
-
extra_body: extraBody,
|
|
2194
|
-
// Also send thinking disable as top-level for providers that support it
|
|
2195
|
-
thinking: { type: "disabled" },
|
|
2196
|
-
}),
|
|
2197
|
-
// Abort if no response within 30s
|
|
2198
|
-
signal: AbortSignal.timeout(30_000),
|
|
2199
|
-
},
|
|
2200
|
-
);
|
|
2201
|
-
|
|
2202
|
-
if (!resp.ok) {
|
|
2203
|
-
const text = await resp.text();
|
|
2204
|
-
// Clear the thinking indicator line
|
|
2205
|
-
process.stderr.write("\r\x1b[K");
|
|
2206
|
-
return { answer: `Error: LLM API failed (${resp.status}): ${text.slice(0, 200)}`, ok: false };
|
|
2207
|
-
}
|
|
2208
|
-
|
|
2209
|
-
if (!resp.body) {
|
|
2210
|
-
process.stderr.write("\r\x1b[K");
|
|
2211
|
-
return { answer: "Error: No response body from LLM API.", ok: false };
|
|
2212
|
-
}
|
|
2213
|
-
|
|
2214
|
-
// Clear thinking indicator, show streaming status
|
|
2215
|
-
process.stderr.write("\r\x1b[K");
|
|
2216
|
-
process.stderr.write(`\x1b[32m✦\x1b[0m \x1b[2mStreaming response...\x1b[0m\n`);
|
|
2217
|
-
|
|
2218
|
-
// Stream the response
|
|
2219
|
-
const reader = resp.body.getReader();
|
|
2220
|
-
const decoder = new TextDecoder();
|
|
2221
|
-
let fullAnswer = "";
|
|
2222
|
-
let buffer = "";
|
|
2223
|
-
let tokenCount = 0;
|
|
2224
|
-
|
|
2225
|
-
while (true) {
|
|
2226
|
-
const { done, value } = await reader.read();
|
|
2227
|
-
if (done) break;
|
|
2228
|
-
|
|
2229
|
-
buffer += decoder.decode(value, { stream: true });
|
|
2230
|
-
const lines = buffer.split("\n");
|
|
2231
|
-
// Keep the last incomplete line in buffer
|
|
2232
|
-
buffer = lines.pop() || "";
|
|
2233
|
-
|
|
2234
|
-
for (const line of lines) {
|
|
2235
|
-
const trimmed = line.trim();
|
|
2236
|
-
if (!trimmed || trimmed === "data: [DONE]") continue;
|
|
2237
|
-
if (!trimmed.startsWith("data: ")) continue;
|
|
2238
|
-
|
|
2239
|
-
try {
|
|
2240
|
-
const json = JSON.parse(trimmed.slice(6));
|
|
2241
|
-
const content = json.choices?.[0]?.delta?.content;
|
|
2242
|
-
if (content) {
|
|
2243
|
-
process.stdout.write(content);
|
|
2244
|
-
fullAnswer += content;
|
|
2245
|
-
tokenCount++;
|
|
2246
|
-
}
|
|
2247
|
-
} catch {
|
|
2248
|
-
// Skip malformed SSE data
|
|
2249
|
-
}
|
|
2250
|
-
}
|
|
2251
|
-
}
|
|
2252
|
-
|
|
2253
|
-
// Add a newline after streaming completes
|
|
2254
|
-
process.stdout.write("\n");
|
|
2255
|
-
|
|
2256
|
-
return { answer: fullAnswer || "(No answer generated)", ok: true };
|
|
2257
|
-
} catch (error) {
|
|
2258
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
2259
|
-
return { answer: `Error: ${msg}`, ok: false };
|
|
2260
|
-
}
|
|
2261
|
-
}
|
|
2262
|
-
|
|
2263
|
-
/**
|
|
2264
|
-
* @deprecated Use generateAnswerWithStream instead
|
|
2265
|
-
*/
|
|
2266
|
-
async function generateAnswerWithContext(
|
|
2267
|
-
question: string,
|
|
2268
|
-
sections: ContextSection[],
|
|
2269
|
-
stats: ContextStats,
|
|
2270
|
-
llm: ResolvedLLM,
|
|
2271
|
-
): Promise<string> {
|
|
2272
|
-
const apiKey = llm.apiKey || process.env[llm.apiKeyEnv] || "";
|
|
2273
|
-
if (!apiKey) {
|
|
2274
|
-
return "Error: LLM API key not configured.";
|
|
2275
|
-
}
|
|
2276
|
-
|
|
2277
|
-
if (sections.length === 0) {
|
|
2278
|
-
return "知识库中没有找到相关内容。";
|
|
2279
|
-
}
|
|
2280
|
-
|
|
2281
|
-
// Build context sections with clear labels
|
|
2282
|
-
const contextParts: string[] = [];
|
|
2283
|
-
let sectionIndex = 0;
|
|
2284
|
-
|
|
2285
|
-
// Group by type for cleaner output
|
|
2286
|
-
const primarySections = sections.filter(s => s.type === 'primary');
|
|
2287
|
-
const rawSections = sections.filter(s => s.type === 'raw_data');
|
|
2288
|
-
const linkedSections = sections.filter(s => s.type === 'linked');
|
|
2289
|
-
|
|
2290
|
-
function renderSections(group: ContextSection[], header: string) {
|
|
2291
|
-
if (group.length === 0) return;
|
|
2292
|
-
contextParts.push(`## ${header}\n`);
|
|
2293
|
-
for (const s of group) {
|
|
2294
|
-
sectionIndex++;
|
|
2295
|
-
contextParts.push(`### [${sectionIndex}] ${s.title} - ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
|
|
2296
|
-
}
|
|
2297
|
-
contextParts.push('');
|
|
2298
|
-
}
|
|
2299
|
-
|
|
2300
|
-
renderSections(primarySections, '页面正文');
|
|
2301
|
-
renderSections(rawSections, '原始文档');
|
|
2302
|
-
renderSections(linkedSections, '关联页面');
|
|
2303
|
-
|
|
2304
|
-
const context = contextParts.join('\n');
|
|
2305
|
-
|
|
2306
|
-
const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
|
|
2307
|
-
|
|
2308
|
-
## 问题
|
|
2309
|
-
${question}
|
|
2310
|
-
|
|
2311
|
-
## 知识库内容
|
|
2312
|
-
|
|
2313
|
-
${context}
|
|
2314
|
-
|
|
2315
|
-
## 回答要求
|
|
2316
|
-
- 仅基于提供的知识库内容回答,不要编造信息
|
|
2317
|
-
- 如果知识库中没有相关信息,请明确说明
|
|
2318
|
-
- 引用来源时使用 [[slug|标题]] 的格式
|
|
2319
|
-
- 使用清晰的 markdown 格式
|
|
2320
|
-
- 如果涉及时间线信息,请在回答中体现
|
|
2321
|
-
- 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
|
|
2322
|
-
- 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
|
|
2323
|
-
|
|
2324
|
-
## 回答`;
|
|
2325
|
-
|
|
2326
|
-
try {
|
|
2327
|
-
const resp = await fetch(
|
|
2328
|
-
llm.baseURL.endsWith("/") ? llm.baseURL + "chat/completions" : llm.baseURL + "/chat/completions",
|
|
2329
|
-
{
|
|
2330
|
-
method: "POST",
|
|
2331
|
-
headers: {
|
|
2332
|
-
"Content-Type": "application/json",
|
|
2333
|
-
Authorization: `Bearer ${apiKey}`,
|
|
2334
|
-
},
|
|
2335
|
-
body: JSON.stringify({
|
|
2336
|
-
model: llm.model,
|
|
2337
|
-
messages: [
|
|
2338
|
-
{
|
|
2339
|
-
role: "system",
|
|
2340
|
-
content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
|
|
2341
|
-
},
|
|
2342
|
-
{ role: "user", content: prompt },
|
|
2343
|
-
],
|
|
2344
|
-
temperature: 0.3,
|
|
2345
|
-
max_tokens: 4096,
|
|
2346
|
-
}),
|
|
2347
|
-
},
|
|
2348
|
-
);
|
|
2349
|
-
|
|
2350
|
-
if (!resp.ok) {
|
|
2351
|
-
const text = await resp.text();
|
|
2352
|
-
return `Error: LLM API failed (${resp.status}): ${text.slice(0, 200)}`;
|
|
2353
|
-
}
|
|
2354
|
-
|
|
2355
|
-
const data = await resp.json();
|
|
2356
|
-
return data.choices?.[0]?.message?.content || "(No answer generated)";
|
|
2357
|
-
} catch (error) {
|
|
2358
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
2359
|
-
return `Error: ${msg}`;
|
|
2360
|
-
}
|
|
2361
|
-
}
|