ex-brain 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -1
- package/src/commands/index.ts +431 -231
- package/src/db/client.ts +14 -1
- package/src/markdown/document-loader.ts +486 -0
- package/src/mcp/server.ts +148 -0
- package/src/repositories/brain-repo.ts +10 -2
- package/src/settings.ts +51 -2
package/src/commands/index.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { basename, resolve } from "node:path";
|
|
1
|
+
import { basename, extname, resolve } from "node:path";
|
|
2
2
|
import { readFileSync } from "node:fs";
|
|
3
|
+
import { createHash } from "node:crypto";
|
|
3
4
|
import { Command } from "commander";
|
|
4
5
|
import { DEFAULT_DB_NAME, inferTypeFromSlug, slugToTitle, normalizeLongSlug, slugify } from "../config";
|
|
5
6
|
import { BrainDb } from "../db/client";
|
|
@@ -13,6 +14,7 @@ import {
|
|
|
13
14
|
slugToPath,
|
|
14
15
|
writeTextFile,
|
|
15
16
|
} from "../markdown/io";
|
|
17
|
+
import { loadDocument, isRemoteUrl, type DocumentKind } from "../markdown/document-loader";
|
|
16
18
|
import {
|
|
17
19
|
extractTimelineLines,
|
|
18
20
|
extractWikiStyleLinks,
|
|
@@ -34,6 +36,7 @@ import {
|
|
|
34
36
|
subItem,
|
|
35
37
|
keyValue,
|
|
36
38
|
header,
|
|
39
|
+
separator,
|
|
37
40
|
createSpinner,
|
|
38
41
|
formatCount,
|
|
39
42
|
type ProgressSpinner,
|
|
@@ -51,6 +54,14 @@ function isDryRun(opts: Record<string, unknown>): boolean {
|
|
|
51
54
|
return Boolean(opts.dryRun);
|
|
52
55
|
}
|
|
53
56
|
|
|
57
|
+
/**
|
|
58
|
+
* Compute a short SHA-256 hex hash of a string (first 16 chars).
|
|
59
|
+
* Used for detecting duplicate document ingestion.
|
|
60
|
+
*/
|
|
61
|
+
function contentHash(text: string): string {
|
|
62
|
+
return createHash("sha256").update(text, "utf8").digest("hex").slice(0, 16);
|
|
63
|
+
}
|
|
64
|
+
|
|
54
65
|
// Simple progress output to stderr (won't interfere with --json stdout).
|
|
55
66
|
// e.g. "[3/42] import docs/api"
|
|
56
67
|
function progress(label: string, current: number, total: number, json: boolean): void {
|
|
@@ -93,12 +104,12 @@ async function applyEntityLinks(
|
|
|
93
104
|
}
|
|
94
105
|
return { created: 0, linked: 0 };
|
|
95
106
|
}
|
|
96
|
-
|
|
107
|
+
|
|
97
108
|
// Filter by confidence
|
|
98
109
|
const confidenceThreshold = settings.extraction.confidenceThreshold;
|
|
99
110
|
const highConfidence = relations.filter((r) => r.confidence >= confidenceThreshold);
|
|
100
111
|
const ignoredCount = relations.length - highConfidence.length;
|
|
101
|
-
|
|
112
|
+
|
|
102
113
|
if (highConfidence.length === 0) {
|
|
103
114
|
if (!json) {
|
|
104
115
|
if (relations.length > 0) {
|
|
@@ -118,7 +129,7 @@ async function applyEntityLinks(
|
|
|
118
129
|
// 1. Resolve entity slugs (disambiguation)
|
|
119
130
|
const fromCandidate = entityToSlug(r.from.name, r.from.type);
|
|
120
131
|
const toCandidate = entityToSlug(r.to.name, r.to.type);
|
|
121
|
-
|
|
132
|
+
|
|
122
133
|
const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
|
|
123
134
|
const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
|
|
124
135
|
|
|
@@ -143,7 +154,7 @@ async function applyEntityLinks(
|
|
|
143
154
|
const duration = formatDuration(Date.now() - startTime);
|
|
144
155
|
const entityNames = [...new Set(highConfidence.flatMap((r) => [r.from.name, r.to.name]))];
|
|
145
156
|
spinner.succeed(`Extracted ${entityNames.length} entities: ${entityNames.join(", ")}`);
|
|
146
|
-
|
|
157
|
+
|
|
147
158
|
// Print detailed info
|
|
148
159
|
subItem(`${created} entity pages created`);
|
|
149
160
|
subItem(`${linked} links added`);
|
|
@@ -224,23 +235,46 @@ Examples:
|
|
|
224
235
|
|
|
225
236
|
// -- page CRUD ------------------------------------------------------------
|
|
226
237
|
|
|
238
|
+
// -- put ------------------------------------------------------------------
|
|
239
|
+
// Auto-detects file type: markdown goes through parsePageMarkdown,
|
|
240
|
+
// other formats (pdf, docx, html, txt, json) go through loadDocument.
|
|
241
|
+
|
|
242
|
+
/** Non-markdown extensions that should use the document ingestion path. */
|
|
243
|
+
const DOC_EXTENSIONS = new Set([
|
|
244
|
+
"pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
|
|
245
|
+
]);
|
|
246
|
+
|
|
247
|
+
/** Whether a file path should be treated as a document (not markdown). */
|
|
248
|
+
function isDocumentFile(filePath: string, forceKind?: string): boolean {
|
|
249
|
+
if (forceKind && forceKind !== "markdown") return true;
|
|
250
|
+
const ext = extname(filePath).toLowerCase().replace(/^\./, "");
|
|
251
|
+
return DOC_EXTENSIONS.has(ext);
|
|
252
|
+
}
|
|
253
|
+
|
|
227
254
|
addDryRun(
|
|
228
255
|
program
|
|
229
256
|
.command("put")
|
|
230
257
|
.argument("[slug]", "page slug (optional; auto-generated if omitted)")
|
|
231
|
-
.option("--file <path>", "read
|
|
258
|
+
.option("--file <path>", "read content from file (markdown, pdf, docx, html, txt, json)")
|
|
232
259
|
.option("--stdin", "read markdown from stdin", false)
|
|
233
|
-
.option("--type <type>", "page type")
|
|
234
|
-
.option("--title <title>", "page title")
|
|
260
|
+
.option("--type <type>", "page type override")
|
|
261
|
+
.option("--title <title>", "page title override")
|
|
262
|
+
.option("--format <kind>", "force document kind (pdf|docx|html|json|markdown|text) — only needed for --file with non-md files when auto-detect fails")
|
|
263
|
+
.option("--max-bytes <number>", "max bytes for URL/file ingest", "52428800")
|
|
264
|
+
.option("--timeout <ms>", "fetch timeout for URLs in ms", "30000")
|
|
235
265
|
.description(
|
|
236
|
-
"create or update a page (idempotent; upserts by slug).
|
|
266
|
+
"create or update a page (idempotent; upserts by slug). Auto-detects file type: markdown is parsed normally, PDF/DOCX/HTML/TXT/JSON are extracted and ingested.",
|
|
237
267
|
)
|
|
238
268
|
.addHelpText(
|
|
239
269
|
"after",
|
|
240
270
|
`
|
|
241
271
|
Examples:
|
|
242
|
-
ebrain put --file api.md #
|
|
272
|
+
ebrain put --file api.md # markdown → parsePageMarkdown
|
|
243
273
|
ebrain put docs/api --file api.md # explicit slug
|
|
274
|
+
ebrain put --file report.pdf # pdf → auto-extract text
|
|
275
|
+
ebrain put docs/report --file report.pdf # explicit slug for pdf
|
|
276
|
+
ebrain put --file article.docx # docx → auto-extract text
|
|
277
|
+
ebrain put --file https://example.com/a.pdf # URL → download + extract
|
|
244
278
|
cat note.md | ebrain put --stdin # auto-generate slug from title/timestamp
|
|
245
279
|
ebrain put --title "My Note" --stdin # auto-generate slug from title
|
|
246
280
|
ebrain put people/john --type person --title "John Doe"
|
|
@@ -255,9 +289,173 @@ Examples:
|
|
|
255
289
|
stdin?: boolean;
|
|
256
290
|
type?: string;
|
|
257
291
|
title?: string;
|
|
292
|
+
format?: string;
|
|
293
|
+
maxBytes?: string;
|
|
294
|
+
timeout?: string;
|
|
258
295
|
dryRun?: boolean;
|
|
259
296
|
},
|
|
260
297
|
) => {
|
|
298
|
+
// ── Branch 1: document file (pdf/docx/html/txt/json or URL) ──
|
|
299
|
+
const forceKind = opts.format as DocumentKind | undefined;
|
|
300
|
+
if (opts.file && isDocumentFile(opts.file, opts.format)) {
|
|
301
|
+
const loaded = await loadDocument(opts.file, {
|
|
302
|
+
forceKind,
|
|
303
|
+
fetchTimeoutMs: opts.timeout ? Number(opts.timeout) : undefined,
|
|
304
|
+
maxBytes: opts.maxBytes ? Number(opts.maxBytes) : undefined,
|
|
305
|
+
});
|
|
306
|
+
const content = loaded.text;
|
|
307
|
+
const fileName = loaded.fileName;
|
|
308
|
+
const kind = loaded.kind;
|
|
309
|
+
const sourceRef = loaded.source;
|
|
310
|
+
const sourceType = loaded.sourceType;
|
|
311
|
+
const mimeType = loaded.mimeType;
|
|
312
|
+
const bytes = loaded.bytes;
|
|
313
|
+
const metadata = loaded.metadata;
|
|
314
|
+
|
|
315
|
+
let finalSlug = slug;
|
|
316
|
+
if (!finalSlug) {
|
|
317
|
+
const nameNoExt = fileName.replace(/\.[^.]+$/, "");
|
|
318
|
+
const slugBase = normalizeLongSlug(slugify(nameNoExt));
|
|
319
|
+
finalSlug = `ingest/${slugBase}`;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const type = opts.type ?? kind;
|
|
323
|
+
const title =
|
|
324
|
+
opts.title ??
|
|
325
|
+
String(slugToTitle(finalSlug));
|
|
326
|
+
const hash = contentHash(content);
|
|
327
|
+
const frontmatter: Record<string, unknown> = {
|
|
328
|
+
sourceFile: sourceRef,
|
|
329
|
+
sourceType,
|
|
330
|
+
sourceKind: kind,
|
|
331
|
+
sourceMimeType: mimeType,
|
|
332
|
+
sourceBytes: bytes,
|
|
333
|
+
sourceFileName: fileName,
|
|
334
|
+
_contentHash: hash,
|
|
335
|
+
...metadata,
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
if (isDryRun(opts)) {
|
|
339
|
+
print(program, {
|
|
340
|
+
dryRun: true,
|
|
341
|
+
action: "put",
|
|
342
|
+
slug: finalSlug,
|
|
343
|
+
type,
|
|
344
|
+
title,
|
|
345
|
+
kind,
|
|
346
|
+
sourceType,
|
|
347
|
+
sourceRef,
|
|
348
|
+
mimeType,
|
|
349
|
+
bytes,
|
|
350
|
+
contentLength: content.length,
|
|
351
|
+
contentHash: hash,
|
|
352
|
+
metadata,
|
|
353
|
+
});
|
|
354
|
+
return;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
await withRepo(program, async (repo) => {
|
|
358
|
+
const jsonOut = isJson(program);
|
|
359
|
+
const spinner = createSpinner();
|
|
360
|
+
const startTime = Date.now();
|
|
361
|
+
|
|
362
|
+
// Check if content has already been ingested (idempotency)
|
|
363
|
+
const existingPage = await repo.getPage(finalSlug);
|
|
364
|
+
const existingHash = existingPage?.frontmatter._contentHash as string | undefined;
|
|
365
|
+
|
|
366
|
+
if (existingHash === hash) {
|
|
367
|
+
if (!jsonOut) {
|
|
368
|
+
header(`Put: ${fileName}`);
|
|
369
|
+
success(`Content unchanged — skipped (hash: ${hash})`);
|
|
370
|
+
}
|
|
371
|
+
print(program, {
|
|
372
|
+
ok: true,
|
|
373
|
+
action: "put",
|
|
374
|
+
slug: finalSlug,
|
|
375
|
+
unchanged: true,
|
|
376
|
+
contentHash: hash,
|
|
377
|
+
});
|
|
378
|
+
return;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
if (!jsonOut) {
|
|
382
|
+
header(`Put: ${fileName}`);
|
|
383
|
+
keyValue("Kind", kind);
|
|
384
|
+
keyValue("Source", sourceRef);
|
|
385
|
+
if (mimeType) keyValue("Content-Type", mimeType);
|
|
386
|
+
keyValue("Bytes", String(bytes));
|
|
387
|
+
if (existingPage) {
|
|
388
|
+
keyValue("Previous hash", existingHash ?? "none");
|
|
389
|
+
keyValue("New hash", hash);
|
|
390
|
+
}
|
|
391
|
+
spinner.start(`Creating page from ${kind}...`);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
await repo.putPage({
|
|
395
|
+
slug: finalSlug,
|
|
396
|
+
type,
|
|
397
|
+
title,
|
|
398
|
+
compiledTruth: content,
|
|
399
|
+
timeline: "",
|
|
400
|
+
frontmatter,
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
if (!jsonOut) {
|
|
404
|
+
spinner.succeed(`Page created: ${finalSlug}`);
|
|
405
|
+
keyValue("Type", type);
|
|
406
|
+
keyValue("Content length", `${content.length} chars`);
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// ── Side-effect operations (only on new/changed content) ──
|
|
410
|
+
await repo.timelineAdd({
|
|
411
|
+
pageSlug: finalSlug,
|
|
412
|
+
date: new Date().toISOString().slice(0, 10),
|
|
413
|
+
source: type,
|
|
414
|
+
summary: `Ingested ${kind} ${fileName}`,
|
|
415
|
+
detail: sourceType === "url" ? `Source URL: ${sourceRef}` : "",
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
try {
|
|
419
|
+
await repo.writeRaw(finalSlug, sourceType, {
|
|
420
|
+
fileName,
|
|
421
|
+
sourceRef,
|
|
422
|
+
kind,
|
|
423
|
+
mimeType,
|
|
424
|
+
bytes,
|
|
425
|
+
metadata,
|
|
426
|
+
ingestedAt: new Date().toISOString(),
|
|
427
|
+
});
|
|
428
|
+
} catch (err) {
|
|
429
|
+
if (!jsonOut) {
|
|
430
|
+
warning(
|
|
431
|
+
`failed to record raw_data: ${err instanceof Error ? err.message : String(err)}`,
|
|
432
|
+
);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
await applyEntityLinks(repo, finalSlug, content, jsonOut);
|
|
437
|
+
|
|
438
|
+
if (!jsonOut) {
|
|
439
|
+
const duration = formatDuration(Date.now() - startTime);
|
|
440
|
+
success(`Operation completed in ${duration}`);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
print(program, {
|
|
444
|
+
ok: true,
|
|
445
|
+
action: "put",
|
|
446
|
+
slug: finalSlug,
|
|
447
|
+
kind,
|
|
448
|
+
sourceType,
|
|
449
|
+
sourceRef,
|
|
450
|
+
bytes,
|
|
451
|
+
contentLength: content.length,
|
|
452
|
+
contentHash: hash,
|
|
453
|
+
});
|
|
454
|
+
});
|
|
455
|
+
return;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// ── Branch 2: markdown (stdin or .md file) ──
|
|
261
459
|
const input = await resolveInput(opts.file, opts.stdin ?? false);
|
|
262
460
|
if (!input.trim()) {
|
|
263
461
|
throw new Error(
|
|
@@ -265,7 +463,7 @@ Examples:
|
|
|
265
463
|
);
|
|
266
464
|
}
|
|
267
465
|
const parsed = parsePageMarkdown(input);
|
|
268
|
-
|
|
466
|
+
|
|
269
467
|
// Auto-generate slug if not provided
|
|
270
468
|
let finalSlug = slug;
|
|
271
469
|
if (!finalSlug) {
|
|
@@ -283,7 +481,7 @@ Examples:
|
|
|
283
481
|
finalSlug = `notes/${timestamp}`;
|
|
284
482
|
}
|
|
285
483
|
}
|
|
286
|
-
|
|
484
|
+
|
|
287
485
|
const type =
|
|
288
486
|
opts.type ??
|
|
289
487
|
String(parsed.frontmatter.type ?? inferTypeFromSlug(finalSlug));
|
|
@@ -291,6 +489,10 @@ Examples:
|
|
|
291
489
|
opts.title ??
|
|
292
490
|
String(parsed.frontmatter.title ?? slugToTitle(finalSlug));
|
|
293
491
|
|
|
492
|
+
// Compute content hash and embed in frontmatter for idempotency
|
|
493
|
+
const hash = contentHash(parsed.compiledTruth);
|
|
494
|
+
parsed.frontmatter._contentHash = hash;
|
|
495
|
+
|
|
294
496
|
if (isDryRun(opts)) {
|
|
295
497
|
print(program, {
|
|
296
498
|
dryRun: true,
|
|
@@ -299,6 +501,7 @@ Examples:
|
|
|
299
501
|
type,
|
|
300
502
|
title,
|
|
301
503
|
contentLength: parsed.compiledTruth.length,
|
|
504
|
+
contentHash: hash,
|
|
302
505
|
hasTimeline: !!parsed.timeline,
|
|
303
506
|
frontmatterKeys: Object.keys(parsed.frontmatter),
|
|
304
507
|
});
|
|
@@ -309,12 +512,35 @@ Examples:
|
|
|
309
512
|
const jsonOut = isJson(program);
|
|
310
513
|
const spinner = createSpinner();
|
|
311
514
|
const startTime = Date.now();
|
|
312
|
-
|
|
515
|
+
|
|
516
|
+
// Check if content is unchanged (idempotency)
|
|
517
|
+
const existingPage = await repo.getPage(finalSlug);
|
|
518
|
+
const existingHash = existingPage?.frontmatter._contentHash as string | undefined;
|
|
519
|
+
|
|
520
|
+
if (existingHash === hash) {
|
|
521
|
+
if (!jsonOut) {
|
|
522
|
+
header(`Put: ${finalSlug}`);
|
|
523
|
+
success(`Content unchanged — skipped (hash: ${hash})`);
|
|
524
|
+
}
|
|
525
|
+
print(program, {
|
|
526
|
+
ok: true,
|
|
527
|
+
action: "put",
|
|
528
|
+
slug: finalSlug,
|
|
529
|
+
unchanged: true,
|
|
530
|
+
contentHash: hash,
|
|
531
|
+
});
|
|
532
|
+
return;
|
|
533
|
+
}
|
|
534
|
+
|
|
313
535
|
if (!jsonOut) {
|
|
314
536
|
header(`Put: ${finalSlug}`);
|
|
537
|
+
if (existingPage) {
|
|
538
|
+
keyValue("Previous hash", existingHash ?? "none");
|
|
539
|
+
keyValue("New hash", hash);
|
|
540
|
+
}
|
|
315
541
|
spinner.start(`Creating/updating page...`);
|
|
316
542
|
}
|
|
317
|
-
|
|
543
|
+
|
|
318
544
|
const page = await repo.putPage({
|
|
319
545
|
slug: finalSlug,
|
|
320
546
|
type,
|
|
@@ -323,27 +549,32 @@ Examples:
|
|
|
323
549
|
timeline: parsed.timeline,
|
|
324
550
|
frontmatter: parsed.frontmatter,
|
|
325
551
|
});
|
|
326
|
-
|
|
552
|
+
|
|
327
553
|
if (!jsonOut) {
|
|
328
554
|
spinner.succeed(`Page saved: ${page.slug}`);
|
|
329
555
|
keyValue("Title", title);
|
|
330
556
|
keyValue("Type", type);
|
|
331
557
|
keyValue("Content length", `${parsed.compiledTruth.length} chars`);
|
|
332
558
|
}
|
|
333
|
-
|
|
559
|
+
|
|
334
560
|
await applyEntityLinks(
|
|
335
561
|
repo,
|
|
336
562
|
finalSlug,
|
|
337
563
|
parsed.compiledTruth,
|
|
338
564
|
jsonOut,
|
|
339
565
|
);
|
|
340
|
-
|
|
566
|
+
|
|
341
567
|
if (!jsonOut) {
|
|
342
568
|
const duration = formatDuration(Date.now() - startTime);
|
|
343
569
|
success(`Operation completed in ${duration}`);
|
|
344
570
|
}
|
|
345
|
-
|
|
346
|
-
print(program, {
|
|
571
|
+
|
|
572
|
+
print(program, {
|
|
573
|
+
ok: true,
|
|
574
|
+
slug: page.slug,
|
|
575
|
+
updatedAt: page.updatedAt,
|
|
576
|
+
contentHash: hash,
|
|
577
|
+
});
|
|
347
578
|
});
|
|
348
579
|
},
|
|
349
580
|
);
|
|
@@ -414,18 +645,18 @@ Examples:
|
|
|
414
645
|
await withRepo(program, async (repo) => {
|
|
415
646
|
const jsonOut = isJson(program);
|
|
416
647
|
const spinner = createSpinner();
|
|
417
|
-
|
|
648
|
+
|
|
418
649
|
if (!jsonOut) {
|
|
419
650
|
header(`Delete: ${slug}`);
|
|
420
651
|
spinner.start(`Deleting page and related data...`);
|
|
421
652
|
}
|
|
422
|
-
|
|
653
|
+
|
|
423
654
|
await repo.deletePage(slug);
|
|
424
|
-
|
|
655
|
+
|
|
425
656
|
if (!jsonOut) {
|
|
426
657
|
spinner.succeed(`Page deleted: ${slug}`);
|
|
427
658
|
}
|
|
428
|
-
|
|
659
|
+
|
|
429
660
|
print(program, { ok: true, action: "delete", slug });
|
|
430
661
|
});
|
|
431
662
|
});
|
|
@@ -521,7 +752,7 @@ Examples:
|
|
|
521
752
|
await withRepo(program, async (repo) => {
|
|
522
753
|
const limit = Number(opts.limit ?? 10);
|
|
523
754
|
const hits = await repo.query(question, limit);
|
|
524
|
-
|
|
755
|
+
|
|
525
756
|
// If --llm flag, generate answer based on multi-layer context
|
|
526
757
|
if (opts.llm) {
|
|
527
758
|
const settings = await loadSettings();
|
|
@@ -529,20 +760,20 @@ Examples:
|
|
|
529
760
|
print(program, { error: "LLM not configured. Set llm.baseURL in settings." });
|
|
530
761
|
return;
|
|
531
762
|
}
|
|
532
|
-
|
|
763
|
+
|
|
533
764
|
const progress = createProgress();
|
|
534
765
|
progress.start("Searching knowledge base...");
|
|
535
|
-
|
|
766
|
+
|
|
536
767
|
const contextLimit = Number(opts.contextLimit ?? 5);
|
|
537
768
|
const topHits = hits.slice(0, contextLimit);
|
|
538
|
-
|
|
769
|
+
|
|
539
770
|
if (topHits.length === 0) {
|
|
540
771
|
progress.stop();
|
|
541
772
|
process.stderr.write("No relevant pages found.\n");
|
|
542
773
|
print(program, { answer: "No relevant information found in the knowledge base.", sources: [] });
|
|
543
774
|
return;
|
|
544
775
|
}
|
|
545
|
-
|
|
776
|
+
|
|
546
777
|
// Collect multi-layer context (primary + raw data + linked pages scored by relevance)
|
|
547
778
|
// ~100KB char budget ≈ 25K tokens, safe for most models
|
|
548
779
|
const MAX_CONTEXT_CHARS = 100_000;
|
|
@@ -552,33 +783,33 @@ Examples:
|
|
|
552
783
|
progress.update(`Loading ${stage}...`);
|
|
553
784
|
});
|
|
554
785
|
const ctxDuration = formatDuration(Date.now() - ctxStart);
|
|
555
|
-
|
|
786
|
+
|
|
556
787
|
if (sections.length === 0) {
|
|
557
788
|
progress.stop();
|
|
558
789
|
process.stderr.write("No content could be loaded.\n");
|
|
559
790
|
print(program, { answer: "Failed to load page content.", sources: [] });
|
|
560
791
|
return;
|
|
561
792
|
}
|
|
562
|
-
|
|
793
|
+
|
|
563
794
|
progress.succeed(`Loaded ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s) (${ctxDuration})`);
|
|
564
795
|
const startTime = Date.now();
|
|
565
|
-
|
|
796
|
+
|
|
566
797
|
const { answer, ok } = await generateAnswerWithStream(question, sections, stats, settings.llm);
|
|
567
|
-
|
|
798
|
+
|
|
568
799
|
if (!ok) {
|
|
569
800
|
// If streaming failed, answer contains the error message
|
|
570
801
|
console.log(answer);
|
|
571
802
|
return;
|
|
572
803
|
}
|
|
573
|
-
|
|
804
|
+
|
|
574
805
|
const duration = formatDuration(Date.now() - startTime);
|
|
575
|
-
|
|
806
|
+
|
|
576
807
|
// Show sources breakdown
|
|
577
808
|
console.log("\n---\n**Sources:**\n");
|
|
578
809
|
for (let i = 0; i < sections.length; i++) {
|
|
579
810
|
const s = sections[i];
|
|
580
811
|
const icon = s.type === 'primary' ? '📄' : s.type === 'raw_data' ? '📎' : '🔗';
|
|
581
|
-
console.log(`${icon} ${i + 1}. [[${s.slug}|${s.title}]]
|
|
812
|
+
console.log(`${icon} ${i + 1}. [[${s.slug}|${s.title}]] - ${s.label} (${(s.content.length / 1024).toFixed(1)}KB)`);
|
|
582
813
|
}
|
|
583
814
|
console.log(`\n*Context: ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)*`);
|
|
584
815
|
} else {
|
|
@@ -762,11 +993,11 @@ Examples:
|
|
|
762
993
|
throw new Error(`page not found: ${slug}`);
|
|
763
994
|
}
|
|
764
995
|
const settings = await loadSettings();
|
|
765
|
-
|
|
996
|
+
|
|
766
997
|
const progress = createProgress();
|
|
767
998
|
progress.start(`Extracting timeline from ${slug}...`);
|
|
768
999
|
const startTime = Date.now();
|
|
769
|
-
|
|
1000
|
+
|
|
770
1001
|
const result = await repo.extractAndAddTimeline(
|
|
771
1002
|
slug,
|
|
772
1003
|
page.compiledTruth,
|
|
@@ -774,16 +1005,16 @@ Examples:
|
|
|
774
1005
|
opts.defaultDate ?? new Date().toISOString().slice(0, 10),
|
|
775
1006
|
settings.llm,
|
|
776
1007
|
);
|
|
777
|
-
|
|
1008
|
+
|
|
778
1009
|
const duration = formatDuration(Date.now() - startTime);
|
|
779
|
-
|
|
1010
|
+
|
|
780
1011
|
if (result.entries.length > 0) {
|
|
781
1012
|
progress.succeed(`${result.entries.length} events extracted (${duration})`);
|
|
782
1013
|
} else {
|
|
783
1014
|
progress.stop();
|
|
784
1015
|
process.stderr.write(`No events found (${duration})\n`);
|
|
785
1016
|
}
|
|
786
|
-
|
|
1017
|
+
|
|
787
1018
|
print(program, {
|
|
788
1019
|
ok: true,
|
|
789
1020
|
action: "timeline-extract",
|
|
@@ -946,7 +1177,7 @@ Examples:
|
|
|
946
1177
|
data = JSON.parse(opts.data);
|
|
947
1178
|
} else if (opts.stdin) {
|
|
948
1179
|
const raw = await readMaybeStdin();
|
|
949
|
-
if (!raw?.trim()) throw new Error("empty stdin
|
|
1180
|
+
if (!raw?.trim()) throw new Error("empty stdin - pipe JSON");
|
|
950
1181
|
data = JSON.parse(raw);
|
|
951
1182
|
} else {
|
|
952
1183
|
throw new Error("provide --data <json> or --stdin");
|
|
@@ -995,7 +1226,7 @@ Examples:
|
|
|
995
1226
|
await withRepo(program, async (repo) => {
|
|
996
1227
|
const root = resolve(dir);
|
|
997
1228
|
const files = await collectMarkdownFiles(root);
|
|
998
|
-
|
|
1229
|
+
|
|
999
1230
|
if (isDryRun(opts)) {
|
|
1000
1231
|
print(program, {
|
|
1001
1232
|
dryRun: true,
|
|
@@ -1011,16 +1242,16 @@ Examples:
|
|
|
1011
1242
|
const settings = await loadSettings();
|
|
1012
1243
|
const spinner = createSpinner();
|
|
1013
1244
|
const startTime = Date.now();
|
|
1014
|
-
|
|
1245
|
+
|
|
1015
1246
|
if (!jsonOut) {
|
|
1016
1247
|
header(`Import: ${root}`);
|
|
1017
1248
|
}
|
|
1018
|
-
|
|
1249
|
+
|
|
1019
1250
|
// Phase 1: Parse all files and collect data
|
|
1020
1251
|
if (!jsonOut) {
|
|
1021
1252
|
spinner.start(`Scanning ${files.length} files...`);
|
|
1022
1253
|
}
|
|
1023
|
-
|
|
1254
|
+
|
|
1024
1255
|
const fileData: Array<{
|
|
1025
1256
|
file: string;
|
|
1026
1257
|
slug: string;
|
|
@@ -1030,7 +1261,7 @@ Examples:
|
|
|
1030
1261
|
timelineEntries: ReturnType<typeof extractTimelineLines>;
|
|
1031
1262
|
tags: string[];
|
|
1032
1263
|
}> = [];
|
|
1033
|
-
|
|
1264
|
+
|
|
1034
1265
|
for (const file of files) {
|
|
1035
1266
|
const rawSlug = pathToSlug(file, root);
|
|
1036
1267
|
const slug = normalizeLongSlug(rawSlug);
|
|
@@ -1043,19 +1274,19 @@ Examples:
|
|
|
1043
1274
|
: [];
|
|
1044
1275
|
fileData.push({ file, slug, parsed, content, wikiLinks, timelineEntries, tags });
|
|
1045
1276
|
}
|
|
1046
|
-
|
|
1277
|
+
|
|
1047
1278
|
if (!jsonOut) {
|
|
1048
1279
|
spinner.succeed(`Found ${files.length} markdown files`);
|
|
1049
1280
|
}
|
|
1050
|
-
|
|
1281
|
+
|
|
1051
1282
|
// Phase 2: Write all pages first (skip embed for performance)
|
|
1052
1283
|
if (!jsonOut) {
|
|
1053
1284
|
spinner.start(`Writing ${fileData.length} pages to database...`);
|
|
1054
1285
|
}
|
|
1055
|
-
|
|
1286
|
+
|
|
1056
1287
|
const allSlugs: string[] = [];
|
|
1057
1288
|
const writeErrors: string[] = [];
|
|
1058
|
-
|
|
1289
|
+
|
|
1059
1290
|
for (let i = 0; i < fileData.length; i++) {
|
|
1060
1291
|
const { slug, parsed } = fileData[i]!;
|
|
1061
1292
|
if (!jsonOut && i % 20 === 0) {
|
|
@@ -1075,7 +1306,7 @@ Examples:
|
|
|
1075
1306
|
writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
|
|
1076
1307
|
}
|
|
1077
1308
|
}
|
|
1078
|
-
|
|
1309
|
+
|
|
1079
1310
|
if (!jsonOut) {
|
|
1080
1311
|
spinner.succeed(`Wrote ${allSlugs.length} pages to database`);
|
|
1081
1312
|
if (writeErrors.length > 0) {
|
|
@@ -1088,16 +1319,16 @@ Examples:
|
|
|
1088
1319
|
}
|
|
1089
1320
|
}
|
|
1090
1321
|
}
|
|
1091
|
-
|
|
1322
|
+
|
|
1092
1323
|
// Phase 3: Parallel entity extraction (main optimization)
|
|
1093
1324
|
const BATCH_SIZE = 10;
|
|
1094
1325
|
const entityResults = new Map<string, Awaited<ReturnType<typeof extractRelations>>>();
|
|
1095
|
-
|
|
1326
|
+
|
|
1096
1327
|
if (settings.llm.baseURL) {
|
|
1097
1328
|
if (!jsonOut) {
|
|
1098
1329
|
spinner.start(`Extracting entities with LLM...`);
|
|
1099
1330
|
}
|
|
1100
|
-
|
|
1331
|
+
|
|
1101
1332
|
for (let i = 0; i < fileData.length; i += BATCH_SIZE) {
|
|
1102
1333
|
const batch = fileData.slice(i, i + BATCH_SIZE);
|
|
1103
1334
|
if (!jsonOut) {
|
|
@@ -1112,7 +1343,7 @@ Examples:
|
|
|
1112
1343
|
entityResults.set(slug, relations);
|
|
1113
1344
|
}
|
|
1114
1345
|
}
|
|
1115
|
-
|
|
1346
|
+
|
|
1116
1347
|
if (!jsonOut) {
|
|
1117
1348
|
spinner.succeed(`Entity extraction complete`);
|
|
1118
1349
|
}
|
|
@@ -1121,17 +1352,17 @@ Examples:
|
|
|
1121
1352
|
warning(`LLM not configured, skipping entity extraction`);
|
|
1122
1353
|
}
|
|
1123
1354
|
}
|
|
1124
|
-
|
|
1355
|
+
|
|
1125
1356
|
// Phase 4: Write links, tags, timeline, and entity pages
|
|
1126
1357
|
if (!jsonOut) {
|
|
1127
1358
|
spinner.start(`Creating links, tags, and timeline entries...`);
|
|
1128
1359
|
}
|
|
1129
|
-
|
|
1360
|
+
|
|
1130
1361
|
let linkCount = 0;
|
|
1131
1362
|
let timelineCount = 0;
|
|
1132
1363
|
let entityCount = 0;
|
|
1133
1364
|
let tagCount = 0;
|
|
1134
|
-
|
|
1365
|
+
|
|
1135
1366
|
// Collect timeline entries for batch insert
|
|
1136
1367
|
const allTimelineEntries: Array<{
|
|
1137
1368
|
pageSlug: string;
|
|
@@ -1140,14 +1371,14 @@ Examples:
|
|
|
1140
1371
|
summary: string;
|
|
1141
1372
|
detail: string;
|
|
1142
1373
|
}> = [];
|
|
1143
|
-
|
|
1374
|
+
|
|
1144
1375
|
for (const { slug, wikiLinks, timelineEntries, tags, content } of fileData) {
|
|
1145
1376
|
// Wiki links
|
|
1146
1377
|
for (const link of wikiLinks) {
|
|
1147
1378
|
await repo.link(slug, link, "import");
|
|
1148
1379
|
linkCount++;
|
|
1149
1380
|
}
|
|
1150
|
-
|
|
1381
|
+
|
|
1151
1382
|
// Collect timeline entries for batch insert
|
|
1152
1383
|
for (const entry of timelineEntries) {
|
|
1153
1384
|
allTimelineEntries.push({
|
|
@@ -1159,13 +1390,13 @@ Examples:
|
|
|
1159
1390
|
});
|
|
1160
1391
|
timelineCount++;
|
|
1161
1392
|
}
|
|
1162
|
-
|
|
1393
|
+
|
|
1163
1394
|
// Tags
|
|
1164
1395
|
for (const tag of tags) {
|
|
1165
1396
|
await repo.tag(slug, tag);
|
|
1166
1397
|
tagCount++;
|
|
1167
1398
|
}
|
|
1168
|
-
|
|
1399
|
+
|
|
1169
1400
|
// Entity links from parallel extraction
|
|
1170
1401
|
const relations = entityResults.get(slug);
|
|
1171
1402
|
if (relations && relations.length > 0) {
|
|
@@ -1175,12 +1406,12 @@ Examples:
|
|
|
1175
1406
|
const toCandidate = entityToSlug(r.to.name, r.to.type);
|
|
1176
1407
|
const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
|
|
1177
1408
|
const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
|
|
1178
|
-
|
|
1409
|
+
|
|
1179
1410
|
const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
|
|
1180
1411
|
const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
|
|
1181
1412
|
if (c1) entityCount++;
|
|
1182
1413
|
if (c2) entityCount++;
|
|
1183
|
-
|
|
1414
|
+
|
|
1184
1415
|
await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
|
|
1185
1416
|
await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
|
|
1186
1417
|
await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
|
|
@@ -1188,16 +1419,16 @@ Examples:
|
|
|
1188
1419
|
}
|
|
1189
1420
|
}
|
|
1190
1421
|
}
|
|
1191
|
-
|
|
1422
|
+
|
|
1192
1423
|
// Batch insert all timeline entries
|
|
1193
1424
|
if (allTimelineEntries.length > 0) {
|
|
1194
1425
|
await repo.timelineAddBatch(allTimelineEntries);
|
|
1195
1426
|
}
|
|
1196
|
-
|
|
1427
|
+
|
|
1197
1428
|
if (!jsonOut) {
|
|
1198
1429
|
spinner.succeed(`Created links, tags, and timeline`);
|
|
1199
1430
|
}
|
|
1200
|
-
|
|
1431
|
+
|
|
1201
1432
|
// Phase 5: Batch sync all pages to search index
|
|
1202
1433
|
if (opts.skipIndex) {
|
|
1203
1434
|
if (!jsonOut) {
|
|
@@ -1208,14 +1439,14 @@ Examples:
|
|
|
1208
1439
|
spinner.start(`Indexing ${allSlugs.length} pages for search...`);
|
|
1209
1440
|
}
|
|
1210
1441
|
await repo.embedAll();
|
|
1211
|
-
|
|
1442
|
+
|
|
1212
1443
|
if (!jsonOut) {
|
|
1213
1444
|
spinner.succeed(`Search indexing complete`);
|
|
1214
1445
|
}
|
|
1215
1446
|
}
|
|
1216
|
-
|
|
1447
|
+
|
|
1217
1448
|
const duration = formatDuration(Date.now() - startTime);
|
|
1218
|
-
|
|
1449
|
+
|
|
1219
1450
|
if (!jsonOut) {
|
|
1220
1451
|
// Print summary
|
|
1221
1452
|
header("Import Summary");
|
|
@@ -1226,12 +1457,12 @@ Examples:
|
|
|
1226
1457
|
keyValue("Timeline entries", String(timelineCount));
|
|
1227
1458
|
keyValue("Tags added", String(tagCount));
|
|
1228
1459
|
keyValue("Duration", duration);
|
|
1229
|
-
|
|
1460
|
+
|
|
1230
1461
|
if (writeErrors.length > 0) {
|
|
1231
1462
|
warning(`${writeErrors.length} pages had errors`);
|
|
1232
1463
|
}
|
|
1233
1464
|
}
|
|
1234
|
-
|
|
1465
|
+
|
|
1235
1466
|
print(program, {
|
|
1236
1467
|
ok: true,
|
|
1237
1468
|
importedFiles: files.length,
|
|
@@ -1279,116 +1510,6 @@ Examples:
|
|
|
1279
1510
|
});
|
|
1280
1511
|
});
|
|
1281
1512
|
|
|
1282
|
-
// -- ingest ---------------------------------------------------------------
|
|
1283
|
-
|
|
1284
|
-
addDryRun(
|
|
1285
|
-
program
|
|
1286
|
-
.command("ingest")
|
|
1287
|
-
.argument("[file]", "file path to ingest (omit for stdin)")
|
|
1288
|
-
.option("--type <type>", "source type", "doc")
|
|
1289
|
-
.option("--stdin", "read from stdin", false)
|
|
1290
|
-
.description("ingest a file as a new page (under ingest/<name>)")
|
|
1291
|
-
.addHelpText(
|
|
1292
|
-
"after",
|
|
1293
|
-
`
|
|
1294
|
-
Examples:
|
|
1295
|
-
ebrain ingest report.pdf --type pdf
|
|
1296
|
-
cat article.md | ebrain ingest --stdin --type article
|
|
1297
|
-
ebrain ingest report.pdf --type pdf --dry-run
|
|
1298
|
-
`,
|
|
1299
|
-
),
|
|
1300
|
-
).action(
|
|
1301
|
-
async (
|
|
1302
|
-
file: string | undefined,
|
|
1303
|
-
opts: { type?: string; stdin?: boolean; dryRun?: boolean },
|
|
1304
|
-
) => {
|
|
1305
|
-
let content: string;
|
|
1306
|
-
let fileName: string;
|
|
1307
|
-
|
|
1308
|
-
if (file) {
|
|
1309
|
-
const fullPath = resolve(file);
|
|
1310
|
-
if (!(await fileExists(fullPath))) {
|
|
1311
|
-
throw new Error(`file not found: ${file}`);
|
|
1312
|
-
}
|
|
1313
|
-
content = await readTextFile(fullPath);
|
|
1314
|
-
fileName = basename(fullPath);
|
|
1315
|
-
} else if (opts.stdin) {
|
|
1316
|
-
const raw = await readMaybeStdin();
|
|
1317
|
-
if (!raw?.trim()) throw new Error("empty stdin — pipe content");
|
|
1318
|
-
content = raw;
|
|
1319
|
-
fileName = "stdin";
|
|
1320
|
-
} else {
|
|
1321
|
-
throw new Error("provide <file> or --stdin");
|
|
1322
|
-
}
|
|
1323
|
-
|
|
1324
|
-
const slug = `ingest/${fileName.replace(/\.[^.]+$/, "")}`;
|
|
1325
|
-
const type = opts.type ?? "doc";
|
|
1326
|
-
|
|
1327
|
-
if (isDryRun(opts)) {
|
|
1328
|
-
print(program, {
|
|
1329
|
-
dryRun: true,
|
|
1330
|
-
action: "ingest",
|
|
1331
|
-
slug,
|
|
1332
|
-
type,
|
|
1333
|
-
contentLength: content.length,
|
|
1334
|
-
});
|
|
1335
|
-
return;
|
|
1336
|
-
}
|
|
1337
|
-
|
|
1338
|
-
await withRepo(program, async (repo) => {
|
|
1339
|
-
const jsonOut = isJson(program);
|
|
1340
|
-
const spinner = createSpinner();
|
|
1341
|
-
const startTime = Date.now();
|
|
1342
|
-
|
|
1343
|
-
if (!jsonOut) {
|
|
1344
|
-
header(`Ingest: ${fileName}`);
|
|
1345
|
-
spinner.start(`Creating page from file...`);
|
|
1346
|
-
}
|
|
1347
|
-
|
|
1348
|
-
await repo.putPage({
|
|
1349
|
-
slug,
|
|
1350
|
-
type,
|
|
1351
|
-
title: slugToTitle(slug),
|
|
1352
|
-
compiledTruth: content,
|
|
1353
|
-
timeline: "",
|
|
1354
|
-
frontmatter: {
|
|
1355
|
-
sourceFile: resolve(fileName),
|
|
1356
|
-
sourceType: type,
|
|
1357
|
-
},
|
|
1358
|
-
});
|
|
1359
|
-
|
|
1360
|
-
if (!jsonOut) {
|
|
1361
|
-
spinner.succeed(`Page created: ${slug}`);
|
|
1362
|
-
keyValue("Source file", fileName);
|
|
1363
|
-
keyValue("Type", type);
|
|
1364
|
-
keyValue("Content length", `${content.length} chars`);
|
|
1365
|
-
}
|
|
1366
|
-
|
|
1367
|
-
await repo.timelineAdd({
|
|
1368
|
-
pageSlug: slug,
|
|
1369
|
-
date: new Date().toISOString().slice(0, 10),
|
|
1370
|
-
source: type,
|
|
1371
|
-
summary: `Ingested file ${fileName}`,
|
|
1372
|
-
detail: "",
|
|
1373
|
-
});
|
|
1374
|
-
|
|
1375
|
-
await applyEntityLinks(
|
|
1376
|
-
repo,
|
|
1377
|
-
slug,
|
|
1378
|
-
content,
|
|
1379
|
-
jsonOut,
|
|
1380
|
-
);
|
|
1381
|
-
|
|
1382
|
-
if (!jsonOut) {
|
|
1383
|
-
const duration = formatDuration(Date.now() - startTime);
|
|
1384
|
-
success(`Ingestion completed in ${duration}`);
|
|
1385
|
-
}
|
|
1386
|
-
|
|
1387
|
-
print(program, { ok: true, action: "ingest", slug });
|
|
1388
|
-
});
|
|
1389
|
-
},
|
|
1390
|
-
);
|
|
1391
|
-
|
|
1392
1513
|
// -- embed ----------------------------------------------------------------
|
|
1393
1514
|
|
|
1394
1515
|
addDryRun(
|
|
@@ -1428,26 +1549,26 @@ Examples:
|
|
|
1428
1549
|
const jsonOut = isJson(program);
|
|
1429
1550
|
const spinner = createSpinner();
|
|
1430
1551
|
const startTime = Date.now();
|
|
1431
|
-
|
|
1552
|
+
|
|
1432
1553
|
if (!jsonOut) {
|
|
1433
1554
|
header("Embed All Pages");
|
|
1434
1555
|
spinner.start(`Loading pages...`);
|
|
1435
1556
|
}
|
|
1436
|
-
|
|
1557
|
+
|
|
1437
1558
|
const pages = await repo.listPages({ limit: 100000 });
|
|
1438
|
-
|
|
1559
|
+
|
|
1439
1560
|
if (!jsonOut) {
|
|
1440
1561
|
spinner.update(`Embedding ${pages.length} pages...`);
|
|
1441
1562
|
}
|
|
1442
|
-
|
|
1563
|
+
|
|
1443
1564
|
const count = await repo.embedAll();
|
|
1444
|
-
|
|
1565
|
+
|
|
1445
1566
|
if (!jsonOut) {
|
|
1446
1567
|
const duration = formatDuration(Date.now() - startTime);
|
|
1447
1568
|
spinner.succeed(`Embedded ${count} pages`);
|
|
1448
1569
|
keyValue("Duration", duration);
|
|
1449
1570
|
}
|
|
1450
|
-
|
|
1571
|
+
|
|
1451
1572
|
print(program, { embedded: count, mode: "all" });
|
|
1452
1573
|
});
|
|
1453
1574
|
return;
|
|
@@ -1462,18 +1583,18 @@ Examples:
|
|
|
1462
1583
|
await withRepo(program, async (repo) => {
|
|
1463
1584
|
const jsonOut = isJson(program);
|
|
1464
1585
|
const spinner = createSpinner();
|
|
1465
|
-
|
|
1586
|
+
|
|
1466
1587
|
if (!jsonOut) {
|
|
1467
1588
|
header(`Embed: ${slug}`);
|
|
1468
1589
|
spinner.start(`Generating embedding for page...`);
|
|
1469
1590
|
}
|
|
1470
|
-
|
|
1591
|
+
|
|
1471
1592
|
await repo.syncPageToSearch(slug);
|
|
1472
|
-
|
|
1593
|
+
|
|
1473
1594
|
if (!jsonOut) {
|
|
1474
1595
|
spinner.succeed(`Page embedded: ${slug}`);
|
|
1475
1596
|
}
|
|
1476
|
-
|
|
1597
|
+
|
|
1477
1598
|
print(program, { embedded: 1, slug });
|
|
1478
1599
|
});
|
|
1479
1600
|
},
|
|
@@ -1483,27 +1604,106 @@ Examples:
|
|
|
1483
1604
|
|
|
1484
1605
|
program
|
|
1485
1606
|
.command("init")
|
|
1486
|
-
.description("initialize
|
|
1607
|
+
.description("initialize ebrain: create config, database, and show setup guide")
|
|
1487
1608
|
.addHelpText(
|
|
1488
1609
|
"after",
|
|
1489
1610
|
`
|
|
1490
1611
|
Examples:
|
|
1491
1612
|
ebrain init
|
|
1613
|
+
ebrain init --db ./my.db
|
|
1492
1614
|
`,
|
|
1493
1615
|
)
|
|
1494
1616
|
.action(async () => {
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1617
|
+
const jsonOut = isJson(program);
|
|
1618
|
+
const settings = await loadSettings();
|
|
1619
|
+
const cliDb = program.opts().db;
|
|
1620
|
+
const dbPath = cliDb ?? settings.dbPath;
|
|
1621
|
+
|
|
1622
|
+
if (!jsonOut) {
|
|
1623
|
+
header("ebrain init");
|
|
1624
|
+
}
|
|
1625
|
+
|
|
1626
|
+
// Step 1: Create settings.json if it doesn't exist
|
|
1627
|
+
const { createDefaultSettings } = await import("../settings");
|
|
1628
|
+
const settingsCreated = await createDefaultSettings();
|
|
1629
|
+
|
|
1630
|
+
if (!jsonOut) {
|
|
1631
|
+
if (settingsCreated) {
|
|
1632
|
+
success(`Created config: ${SETTINGS_PATH}`);
|
|
1633
|
+
} else {
|
|
1634
|
+
success(`Config already exists: ${SETTINGS_PATH}`);
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
|
|
1638
|
+
// Step 2: Check or initialize database
|
|
1639
|
+
const dbExists = await fileExists(dbPath);
|
|
1640
|
+
let dbInitialized = false;
|
|
1641
|
+
|
|
1642
|
+
if (dbExists) {
|
|
1643
|
+
// Database already exists, skip connection attempt to avoid
|
|
1644
|
+
// noisy errors (e.g. embedding function key mismatch)
|
|
1645
|
+
if (!jsonOut) {
|
|
1646
|
+
success(`Database already exists: ${dbPath}`);
|
|
1647
|
+
}
|
|
1648
|
+
dbInitialized = true;
|
|
1649
|
+
} else {
|
|
1650
|
+
// Try to create it without collection - embedding config may not be ready
|
|
1651
|
+
try {
|
|
1652
|
+
const db = await BrainDb.connect(dbPath, settings, { skipCollection: true });
|
|
1653
|
+
await db.close();
|
|
1654
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
1655
|
+
dbInitialized = true;
|
|
1656
|
+
if (!jsonOut) {
|
|
1657
|
+
success(`Database initialized: ${dbPath}`);
|
|
1658
|
+
}
|
|
1659
|
+
} catch {
|
|
1660
|
+
if (!jsonOut) {
|
|
1661
|
+
warning(`Database will be auto-created on first use`);
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
|
|
1666
|
+
// Step 3: Show setup guide
|
|
1667
|
+
if (!jsonOut) {
|
|
1668
|
+
console.log("");
|
|
1669
|
+
separator();
|
|
1670
|
+
info("Quick Start Guide");
|
|
1671
|
+
console.log("");
|
|
1672
|
+
|
|
1673
|
+
subItem("1. Configure LLM (for AI queries):", 0);
|
|
1674
|
+
subItem(` Edit ${SETTINGS_PATH}`, 4);
|
|
1675
|
+
subItem(` Set llm.baseURL to your OpenAI-compatible API endpoint`, 4);
|
|
1676
|
+
subItem(` Set llm.apiKey or export DASHSCOPE_API_KEY`, 4);
|
|
1677
|
+
console.log("");
|
|
1678
|
+
|
|
1679
|
+
subItem("2. Add your first page:", 0);
|
|
1680
|
+
subItem(" echo '# Hello' | ebrain put hello --stdin", 4);
|
|
1681
|
+
console.log("");
|
|
1682
|
+
|
|
1683
|
+
subItem("3. Import a directory of markdown files:", 0);
|
|
1684
|
+
subItem(" ebrain import ./docs", 4);
|
|
1685
|
+
console.log("");
|
|
1686
|
+
|
|
1687
|
+
subItem("4. Query with AI:", 0);
|
|
1688
|
+
subItem(' ebrain query "What did we ship in Q4?" --llm', 4);
|
|
1689
|
+
console.log("");
|
|
1690
|
+
|
|
1691
|
+
subItem("5. Visualize your knowledge graph:", 0);
|
|
1692
|
+
subItem(" ebrain graph", 4);
|
|
1693
|
+
console.log("");
|
|
1694
|
+
|
|
1695
|
+
separator();
|
|
1696
|
+
}
|
|
1697
|
+
|
|
1698
|
+
print(program, {
|
|
1699
|
+
ok: true,
|
|
1700
|
+
settingsPath: SETTINGS_PATH,
|
|
1701
|
+
settingsCreated,
|
|
1702
|
+
dbPath,
|
|
1703
|
+
dbInitialized,
|
|
1506
1704
|
});
|
|
1705
|
+
|
|
1706
|
+
process.exit(0);
|
|
1507
1707
|
});
|
|
1508
1708
|
|
|
1509
1709
|
program
|
|
@@ -1521,7 +1721,7 @@ Examples:
|
|
|
1521
1721
|
await withRepo(program, async (repo) => {
|
|
1522
1722
|
const jsonOut = isJson(program);
|
|
1523
1723
|
const stats = await repo.stats();
|
|
1524
|
-
|
|
1724
|
+
|
|
1525
1725
|
if (!jsonOut) {
|
|
1526
1726
|
header("Knowledge Base Statistics");
|
|
1527
1727
|
keyValue("Pages", String(stats.pages));
|
|
@@ -1530,7 +1730,7 @@ Examples:
|
|
|
1530
1730
|
keyValue("Timeline entries", String(stats.timelineEntries));
|
|
1531
1731
|
keyValue("Raw data rows", String(stats.rawRows));
|
|
1532
1732
|
}
|
|
1533
|
-
|
|
1733
|
+
|
|
1534
1734
|
print(program, stats);
|
|
1535
1735
|
});
|
|
1536
1736
|
});
|
|
@@ -1591,7 +1791,7 @@ async function withRepo(
|
|
|
1591
1791
|
const db = await BrainDb.connect(dbPath, settings);
|
|
1592
1792
|
const repo = new BrainRepository(db);
|
|
1593
1793
|
await callback(repo);
|
|
1594
|
-
|
|
1794
|
+
|
|
1595
1795
|
// Gracefully close database
|
|
1596
1796
|
// Note: seekdb SDK's InternalEmbeddedClient.close() is empty in embedded mode
|
|
1597
1797
|
// Data may not flush properly. Use remote seekdb server for reliability.
|
|
@@ -1600,10 +1800,10 @@ async function withRepo(
|
|
|
1600
1800
|
} catch (e) {
|
|
1601
1801
|
// Close may fail due to seekdb native bug
|
|
1602
1802
|
}
|
|
1603
|
-
|
|
1803
|
+
|
|
1604
1804
|
// Give seekdb extra time after close
|
|
1605
1805
|
await new Promise((r) => setTimeout(r, 500));
|
|
1606
|
-
|
|
1806
|
+
|
|
1607
1807
|
// CLI: force exit to bypass seekdb native cleanup segfault
|
|
1608
1808
|
process.exit(0);
|
|
1609
1809
|
}
|
|
@@ -1646,7 +1846,7 @@ function normalizeLinkSlug(path: string): string {
|
|
|
1646
1846
|
}
|
|
1647
1847
|
|
|
1648
1848
|
// ---------------------------------------------------------------------------
|
|
1649
|
-
// LLM Answer Generation
|
|
1849
|
+
// LLM Answer Generation - Multi-layer Context Collection
|
|
1650
1850
|
// ---------------------------------------------------------------------------
|
|
1651
1851
|
|
|
1652
1852
|
/** A single section of context for the LLM prompt. */
|
|
@@ -1661,12 +1861,12 @@ interface ContextSection {
|
|
|
1661
1861
|
|
|
1662
1862
|
/**
|
|
1663
1863
|
* Collect multi-layer context for LLM answer generation.
|
|
1664
|
-
*
|
|
1864
|
+
*
|
|
1665
1865
|
* Layers (in priority order):
|
|
1666
1866
|
* 1. Primary: compiledTruth + timeline of each hit page
|
|
1667
1867
|
* 2. Raw data: original documents stored via raw.set
|
|
1668
1868
|
* 3. Linked pages: compiledTruth of pages linked to/from hit pages
|
|
1669
|
-
*
|
|
1869
|
+
*
|
|
1670
1870
|
* Budget is enforced via total character limit.
|
|
1671
1871
|
*/
|
|
1672
1872
|
async function collectContextForLLM(
|
|
@@ -1765,8 +1965,8 @@ async function collectContextForLLM(
|
|
|
1765
1965
|
}
|
|
1766
1966
|
}
|
|
1767
1967
|
|
|
1768
|
-
// Layer 3: Linked pages
|
|
1769
|
-
// No second repo.query() call needed
|
|
1968
|
+
// Layer 3: Linked pages - score using cached data + keyword matching
|
|
1969
|
+
// No second repo.query() call needed - reuse hits scores + keyword fallback
|
|
1770
1970
|
onProgress?.('linked pages');
|
|
1771
1971
|
const allLinkedSlugs = new Set<string>();
|
|
1772
1972
|
for (const hit of hits) {
|
|
@@ -1872,7 +2072,7 @@ async function collectContextForLLM(
|
|
|
1872
2072
|
function computeKeywordRelevance(text: string, question: string): number {
|
|
1873
2073
|
const STOP_CHARS = new Set('的是了在和我有你就这不人都说上个大国为到以们年会生地要主中子自实家小对多能好可很所把当');
|
|
1874
2074
|
const questionChars = [...question]
|
|
1875
|
-
.filter(c => !/\s|[
|
|
2075
|
+
.filter(c => !/\s|[,,。!?、;::""''()()【】\[\]{}<>\/\\|~`@#$%^&*+=_-]/.test(c) && !STOP_CHARS.has(c));
|
|
1876
2076
|
if (questionChars.length === 0) return 0;
|
|
1877
2077
|
|
|
1878
2078
|
const uniqueChars = new Set(questionChars);
|
|
@@ -1923,7 +2123,7 @@ async function generateAnswerWithStream(
|
|
|
1923
2123
|
contextParts.push(`## ${header}\n`);
|
|
1924
2124
|
for (const s of group) {
|
|
1925
2125
|
sectionIndex++;
|
|
1926
|
-
contextParts.push(`### [${sectionIndex}] ${s.title}
|
|
2126
|
+
contextParts.push(`### [${sectionIndex}] ${s.title} - ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
|
|
1927
2127
|
}
|
|
1928
2128
|
contextParts.push('');
|
|
1929
2129
|
}
|
|
@@ -1934,7 +2134,7 @@ async function generateAnswerWithStream(
|
|
|
1934
2134
|
|
|
1935
2135
|
const context = contextParts.join('\n');
|
|
1936
2136
|
|
|
1937
|
-
const prompt =
|
|
2137
|
+
const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
|
|
1938
2138
|
|
|
1939
2139
|
## 问题
|
|
1940
2140
|
${question}
|
|
@@ -1944,13 +2144,13 @@ ${question}
|
|
|
1944
2144
|
${context}
|
|
1945
2145
|
|
|
1946
2146
|
## 回答要求
|
|
1947
|
-
-
|
|
1948
|
-
-
|
|
2147
|
+
- 仅基于提供的知识库内容回答,不要编造信息
|
|
2148
|
+
- 如果知识库中没有相关信息,请明确说明
|
|
1949
2149
|
- 引用来源时使用 [[slug|标题]] 的格式
|
|
1950
2150
|
- 使用清晰的 markdown 格式
|
|
1951
|
-
-
|
|
2151
|
+
- 如果涉及时间线信息,请在回答中体现
|
|
1952
2152
|
- 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
|
|
1953
|
-
-
|
|
2153
|
+
- 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
|
|
1954
2154
|
|
|
1955
2155
|
## 回答`;
|
|
1956
2156
|
|
|
@@ -1965,10 +2165,10 @@ ${context}
|
|
|
1965
2165
|
|
|
1966
2166
|
try {
|
|
1967
2167
|
const url = llm.baseURL.endsWith("/") ? llm.baseURL + "chat/completions" : llm.baseURL + "/chat/completions";
|
|
1968
|
-
|
|
2168
|
+
|
|
1969
2169
|
// Show thinking indicator while waiting for first token
|
|
1970
2170
|
process.stderr.write(`\x1b[35m💭\x1b[0m \x1b[2mConnecting to ${llm.model}...\x1b[0m\n`);
|
|
1971
|
-
|
|
2171
|
+
|
|
1972
2172
|
const resp = await fetch(
|
|
1973
2173
|
url,
|
|
1974
2174
|
{
|
|
@@ -1983,7 +2183,7 @@ ${context}
|
|
|
1983
2183
|
messages: [
|
|
1984
2184
|
{
|
|
1985
2185
|
role: "system",
|
|
1986
|
-
content: "
|
|
2186
|
+
content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
|
|
1987
2187
|
},
|
|
1988
2188
|
{ role: "user", content: prompt },
|
|
1989
2189
|
],
|
|
@@ -2092,7 +2292,7 @@ async function generateAnswerWithContext(
|
|
|
2092
2292
|
contextParts.push(`## ${header}\n`);
|
|
2093
2293
|
for (const s of group) {
|
|
2094
2294
|
sectionIndex++;
|
|
2095
|
-
contextParts.push(`### [${sectionIndex}] ${s.title}
|
|
2295
|
+
contextParts.push(`### [${sectionIndex}] ${s.title} - ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
|
|
2096
2296
|
}
|
|
2097
2297
|
contextParts.push('');
|
|
2098
2298
|
}
|
|
@@ -2103,7 +2303,7 @@ async function generateAnswerWithContext(
|
|
|
2103
2303
|
|
|
2104
2304
|
const context = contextParts.join('\n');
|
|
2105
2305
|
|
|
2106
|
-
const prompt =
|
|
2306
|
+
const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
|
|
2107
2307
|
|
|
2108
2308
|
## 问题
|
|
2109
2309
|
${question}
|
|
@@ -2113,13 +2313,13 @@ ${question}
|
|
|
2113
2313
|
${context}
|
|
2114
2314
|
|
|
2115
2315
|
## 回答要求
|
|
2116
|
-
-
|
|
2117
|
-
-
|
|
2316
|
+
- 仅基于提供的知识库内容回答,不要编造信息
|
|
2317
|
+
- 如果知识库中没有相关信息,请明确说明
|
|
2118
2318
|
- 引用来源时使用 [[slug|标题]] 的格式
|
|
2119
2319
|
- 使用清晰的 markdown 格式
|
|
2120
|
-
-
|
|
2320
|
+
- 如果涉及时间线信息,请在回答中体现
|
|
2121
2321
|
- 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
|
|
2122
|
-
-
|
|
2322
|
+
- 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
|
|
2123
2323
|
|
|
2124
2324
|
## 回答`;
|
|
2125
2325
|
|
|
@@ -2137,7 +2337,7 @@ ${context}
|
|
|
2137
2337
|
messages: [
|
|
2138
2338
|
{
|
|
2139
2339
|
role: "system",
|
|
2140
|
-
content: "
|
|
2340
|
+
content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
|
|
2141
2341
|
},
|
|
2142
2342
|
{ role: "user", content: prompt },
|
|
2143
2343
|
],
|