ex-brain 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1973 @@
1
+ import { basename, resolve } from "node:path";
2
+ import { Command } from "commander";
3
+ import { DEFAULT_DB_NAME, inferTypeFromSlug, slugToTitle, normalizeLongSlug, slugify } from "../config";
4
+ import { BrainDb } from "../db/client";
5
+ import {
6
+ collectMarkdownFiles,
7
+ ensureDir,
8
+ fileExists,
9
+ pathToSlug,
10
+ readMaybeStdin,
11
+ readTextFile,
12
+ slugToPath,
13
+ writeTextFile,
14
+ } from "../markdown/io";
15
+ import {
16
+ extractTimelineLines,
17
+ extractWikiStyleLinks,
18
+ parsePageMarkdown,
19
+ renderPageMarkdown,
20
+ } from "../markdown/parser";
21
+ import { BrainRepository } from "../repositories/brain-repo";
22
+ import { loadSettings, SETTINGS_PATH, DEFAULT_DB_PATH, type ResolvedLLM } from "../settings";
23
+ import { extractRelations, entityToSlug, type EntityType } from "../ai/entity-link";
24
+ import { registerCompileCommands } from "./compile-cmd";
25
+ import { registerGraphCommand } from "./graph-cmd";
26
+ import { createProgress, formatDuration } from "../utils/progress";
27
+ import {
28
+ success,
29
+ error as cliError,
30
+ warning,
31
+ info,
32
+ step,
33
+ subItem,
34
+ keyValue,
35
+ header,
36
+ createSpinner,
37
+ formatCount,
38
+ type ProgressSpinner,
39
+ } from "../utils/cli-output";
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // Helpers
43
+ // ---------------------------------------------------------------------------
44
+
45
+ function addDryRun(cmd: Command): Command {
46
+ return cmd.option("--dry-run", "preview changes without executing", false);
47
+ }
48
+
49
+ function isDryRun(opts: Record<string, unknown>): boolean {
50
+ return Boolean(opts.dryRun);
51
+ }
52
+
53
+ // Simple progress output to stderr (won't interfere with --json stdout).
54
+ // e.g. "[3/42] import docs/api"
55
+ function progress(label: string, current: number, total: number, json: boolean): void {
56
+ if (json) return;
57
+ process.stderr.write(`[${current}/${total}] ${label}\n`);
58
+ }
59
+
60
+ /**
61
+ * Extract entities and create entity pages + links.
62
+ * Non-blocking: failures produce warnings, not errors.
63
+ */
64
+ async function applyEntityLinks(
65
+ repo: BrainRepository,
66
+ sourceSlug: string,
67
+ content: string,
68
+ json: boolean,
69
+ ): Promise<{ created: number; linked: number }> {
70
+ if (!content.trim()) return { created: 0, linked: 0 };
71
+
72
+ const settings = await loadSettings();
73
+ if (!settings.llm.baseURL) {
74
+ if (!json) {
75
+ warning(`LLM not configured, skipping entity extraction for ${sourceSlug}`);
76
+ }
77
+ return { created: 0, linked: 0 };
78
+ }
79
+
80
+ const spinner = createSpinner();
81
+ if (!json) {
82
+ spinner.start(`Extracting entities from ${sourceSlug}...`);
83
+ }
84
+
85
+ const startTime = Date.now();
86
+ let relations;
87
+ try {
88
+ relations = await extractRelations(content, settings.llm);
89
+ } catch (err) {
90
+ if (!json) {
91
+ spinner.fail(`Entity extraction failed: ${err instanceof Error ? err.message : String(err)}`);
92
+ }
93
+ return { created: 0, linked: 0 };
94
+ }
95
+
96
+ // Filter by confidence
97
+ const confidenceThreshold = settings.extraction.confidenceThreshold;
98
+ const highConfidence = relations.filter((r) => r.confidence >= confidenceThreshold);
99
+ const ignoredCount = relations.length - highConfidence.length;
100
+
101
+ if (highConfidence.length === 0) {
102
+ if (!json) {
103
+ if (relations.length > 0) {
104
+ spinner.warn(`Found ${relations.length} entities but all below confidence threshold (${confidenceThreshold})`);
105
+ } else {
106
+ spinner.warn(`No entities found in content`);
107
+ }
108
+ }
109
+ return { created: 0, linked: 0 };
110
+ }
111
+
112
+ let created = 0;
113
+ let linked = 0;
114
+ const details: string[] = [];
115
+
116
+ for (const r of highConfidence) {
117
+ // 1. Resolve entity slugs (disambiguation)
118
+ const fromCandidate = entityToSlug(r.from.name, r.from.type);
119
+ const toCandidate = entityToSlug(r.to.name, r.to.type);
120
+
121
+ const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
122
+ const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
123
+
124
+ // 2. Ensure entity pages exist
125
+ const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, sourceSlug);
126
+ const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, sourceSlug);
127
+ if (c1) { created += 1; details.push(`Created: ${r.from.name} (${r.from.type})`); }
128
+ if (c2) { created += 1; details.push(`Created: ${r.to.name} (${r.to.type})`); }
129
+
130
+ // 3. Link between entities (context includes relation type)
131
+ await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
132
+ linked += 1;
133
+
134
+ // 4. Link from source document to entities (for backlinks tracing)
135
+ await repo.link(sourceSlug, fromSlug, `Mentions ${r.from.name}`);
136
+ linked += 1;
137
+ await repo.link(sourceSlug, toSlug, `Mentions ${r.to.name}`);
138
+ linked += 1;
139
+ }
140
+
141
+ if (!json) {
142
+ const duration = formatDuration(Date.now() - startTime);
143
+ const entityNames = [...new Set(highConfidence.flatMap((r) => [r.from.name, r.to.name]))];
144
+ spinner.succeed(`Extracted ${entityNames.length} entities: ${entityNames.join(", ")}`);
145
+
146
+ // Print detailed info
147
+ subItem(`${created} entity pages created`);
148
+ subItem(`${linked} links added`);
149
+ if (ignoredCount > 0) {
150
+ subItem(`${ignoredCount} low-confidence relations ignored`);
151
+ }
152
+ subItem(`Completed in ${duration}`);
153
+ }
154
+
155
+ return { created, linked };
156
+ }
157
+
158
+ async function resolveInput(
159
+ fileOpt: string | undefined,
160
+ stdin: boolean,
161
+ ): Promise<string> {
162
+ if (fileOpt) return readTextFile(resolve(fileOpt));
163
+ return readMaybeStdin().then((s) => s ?? "");
164
+ }
165
+
166
+ // ---------------------------------------------------------------------------
167
+ // Build
168
+ // ---------------------------------------------------------------------------
169
+
170
+ export function buildProgram(): Command {
171
+ const program = new Command("ebrain")
172
+ .description("Personal knowledge base CLI powered by seekdb")
173
+ .addHelpText(
174
+ "after",
175
+ `
176
+ Examples:
177
+ ebrain config
178
+ ebrain put docs/api --file api.md
179
+ ebrain search "machine learning" --limit 5
180
+ ebrain query "What projects did we ship in Q4?"
181
+ cat note.md | ebrain put notes/daily --stdin
182
+ ebrain serve # start MCP server for AI tools
183
+ `,
184
+ )
185
+ .option("--db <path>", "database path (overrides settings.json)")
186
+ .option("--json", "output as JSON", false);
187
+
188
+ // -- config ---------------------------------------------------------------
189
+
190
+ program
191
+ .command("config")
192
+ .description("show resolved configuration")
193
+ .action(async () => {
194
+ const settings = await loadSettings();
195
+ const cliDb = program.opts().db;
196
+ const effectiveDb = cliDb ?? settings.dbPath;
197
+ print(program, {
198
+ settingsFile: SETTINGS_PATH,
199
+ dbPath: effectiveDb,
200
+ mode: settings.remote ? "remote" : "local",
201
+ remote: settings.remote ?? null,
202
+ embed: {
203
+ provider: settings.embed.provider,
204
+ baseURL: settings.embed.baseURL,
205
+ model: settings.embed.model,
206
+ dimensions: settings.embed.dimensions,
207
+ hasApiKey:
208
+ !!settings.embed.apiKey ||
209
+ !!process.env[settings.embed.apiKeyEnv],
210
+ },
211
+ llm: {
212
+ baseURL: settings.llm.baseURL || "(not configured)",
213
+ model: settings.llm.model,
214
+ hasApiKey:
215
+ !!settings.llm.apiKey ||
216
+ !!process.env[settings.llm.apiKeyEnv],
217
+ },
218
+ });
219
+ });
220
+
221
+ // -- page CRUD ------------------------------------------------------------
222
+
223
+ addDryRun(
224
+ program
225
+ .command("put")
226
+ .argument("[slug]", "page slug (optional; auto-generated if omitted)")
227
+ .option("--file <path>", "read markdown from file")
228
+ .option("--stdin", "read markdown from stdin", false)
229
+ .option("--type <type>", "page type")
230
+ .option("--title <title>", "page title")
231
+ .description(
232
+ "create or update a page (idempotent; upserts by slug). If slug is omitted, it is auto-generated from file name, title, or timestamp.",
233
+ )
234
+ .addHelpText(
235
+ "after",
236
+ `
237
+ Examples:
238
+ ebrain put --file api.md # auto-generate slug from file name
239
+ ebrain put docs/api --file api.md # explicit slug
240
+ cat note.md | ebrain put --stdin # auto-generate slug from title/timestamp
241
+ ebrain put --title "My Note" --stdin # auto-generate slug from title
242
+ ebrain put people/john --type person --title "John Doe"
243
+ ebrain put docs/api --file api.md --dry-run
244
+ `,
245
+ ),
246
+ ).action(
247
+ async (
248
+ slug: string | undefined,
249
+ opts: {
250
+ file?: string;
251
+ stdin?: boolean;
252
+ type?: string;
253
+ title?: string;
254
+ dryRun?: boolean;
255
+ },
256
+ ) => {
257
+ const input = await resolveInput(opts.file, opts.stdin ?? false);
258
+ if (!input.trim()) {
259
+ throw new Error(
260
+ "empty input — provide --file <path>, --stdin, or pipe markdown",
261
+ );
262
+ }
263
+ const parsed = parsePageMarkdown(input);
264
+
265
+ // Auto-generate slug if not provided
266
+ let finalSlug = slug;
267
+ if (!finalSlug) {
268
+ // Priority: file name > title option > frontmatter title > timestamp
269
+ if (opts.file) {
270
+ const fileName = basename(opts.file).replace(/\.md$/i, "");
271
+ finalSlug = normalizeLongSlug(slugify(fileName));
272
+ } else if (opts.title) {
273
+ finalSlug = normalizeLongSlug(slugify(opts.title));
274
+ } else if (parsed.frontmatter.title) {
275
+ finalSlug = normalizeLongSlug(slugify(String(parsed.frontmatter.title)));
276
+ } else {
277
+ // Use timestamp as fallback
278
+ const timestamp = new Date().toISOString().slice(0, 19).replace(/[-:T]/g, "");
279
+ finalSlug = `notes/${timestamp}`;
280
+ }
281
+ }
282
+
283
+ const type =
284
+ opts.type ??
285
+ String(parsed.frontmatter.type ?? inferTypeFromSlug(finalSlug));
286
+ const title =
287
+ opts.title ??
288
+ String(parsed.frontmatter.title ?? slugToTitle(finalSlug));
289
+
290
+ if (isDryRun(opts)) {
291
+ print(program, {
292
+ dryRun: true,
293
+ action: "put",
294
+ slug: finalSlug,
295
+ type,
296
+ title,
297
+ contentLength: parsed.compiledTruth.length,
298
+ hasTimeline: !!parsed.timeline,
299
+ frontmatterKeys: Object.keys(parsed.frontmatter),
300
+ });
301
+ return;
302
+ }
303
+
304
+ await withRepo(program, async (repo) => {
305
+ const jsonOut = isJson(program);
306
+ const spinner = createSpinner();
307
+ const startTime = Date.now();
308
+
309
+ if (!jsonOut) {
310
+ header(`Put: ${finalSlug}`);
311
+ spinner.start(`Creating/updating page...`);
312
+ }
313
+
314
+ const page = await repo.putPage({
315
+ slug: finalSlug,
316
+ type,
317
+ title,
318
+ compiledTruth: parsed.compiledTruth,
319
+ timeline: parsed.timeline,
320
+ frontmatter: parsed.frontmatter,
321
+ });
322
+
323
+ if (!jsonOut) {
324
+ spinner.succeed(`Page saved: ${page.slug}`);
325
+ keyValue("Title", title);
326
+ keyValue("Type", type);
327
+ keyValue("Content length", `${parsed.compiledTruth.length} chars`);
328
+ }
329
+
330
+ await applyEntityLinks(
331
+ repo,
332
+ finalSlug,
333
+ parsed.compiledTruth,
334
+ jsonOut,
335
+ );
336
+
337
+ if (!jsonOut) {
338
+ const duration = formatDuration(Date.now() - startTime);
339
+ success(`Operation completed in ${duration}`);
340
+ }
341
+
342
+ print(program, { ok: true, slug: page.slug, updatedAt: page.updatedAt });
343
+ });
344
+ },
345
+ );
346
+
347
+ program
348
+ .command("get")
349
+ .argument("<slug>", "page slug")
350
+ .option("--json", "output as JSON (overrides global --json)")
351
+ .description("read a page and render it as markdown")
352
+ .addHelpText(
353
+ "after",
354
+ `
355
+ Examples:
356
+ ebrain get docs/api
357
+ ebrain get docs/api --json
358
+ `,
359
+ )
360
+ .action(async (slug: string, opts: { json?: boolean }) => {
361
+ const localJson = opts.json !== undefined ? opts.json : isJson(program);
362
+ await withRepo(program, async (repo) => {
363
+ const page = await repo.getPage(slug);
364
+ if (!page) {
365
+ throw new Error(`page not found: ${slug}`);
366
+ }
367
+ if (localJson) {
368
+ console.log(JSON.stringify(page, null, 2));
369
+ return;
370
+ }
371
+ console.log(
372
+ renderPageMarkdown(
373
+ page.frontmatter,
374
+ page.compiledTruth,
375
+ page.timeline,
376
+ ),
377
+ );
378
+ });
379
+ });
380
+
381
+ addDryRun(
382
+ program
383
+ .command("delete")
384
+ .argument("<slug>", "page slug to delete")
385
+ .description("delete a page and its related data (links, tags, timeline, raw)")
386
+ .addHelpText(
387
+ "after",
388
+ `
389
+ Examples:
390
+ ebrain delete notes/old-draft
391
+ ebrain delete notes/old-draft --dry-run
392
+ `,
393
+ ),
394
+ ).action(async (slug: string, opts: { dryRun?: boolean }) => {
395
+ if (isDryRun(opts)) {
396
+ await withRepo(program, async (repo) => {
397
+ const page = await repo.getPage(slug);
398
+ if (!page) {
399
+ throw new Error(`page not found: ${slug}`);
400
+ }
401
+ print(program, {
402
+ dryRun: true,
403
+ action: "delete",
404
+ slug,
405
+ title: page.title,
406
+ });
407
+ });
408
+ return;
409
+ }
410
+ await withRepo(program, async (repo) => {
411
+ const jsonOut = isJson(program);
412
+ const spinner = createSpinner();
413
+
414
+ if (!jsonOut) {
415
+ header(`Delete: ${slug}`);
416
+ spinner.start(`Deleting page and related data...`);
417
+ }
418
+
419
+ await repo.deletePage(slug);
420
+
421
+ if (!jsonOut) {
422
+ spinner.succeed(`Page deleted: ${slug}`);
423
+ }
424
+
425
+ print(program, { ok: true, action: "delete", slug });
426
+ });
427
+ });
428
+
429
+ program
430
+ .command("list")
431
+ .option("--type <type>", "filter by page type")
432
+ .option("--tag <tag>", "filter by tag")
433
+ .option("-f, --fields <fields>", "comma-separated fields to display (slug,type,title,createdAt,updatedAt)")
434
+ .option("--limit <number>", "max results", "50")
435
+ .description("list pages")
436
+ .addHelpText(
437
+ "after",
438
+ `
439
+ Examples:
440
+ ebrain list
441
+ ebrain list --type person
442
+ ebrain list -f slug
443
+ ebrain list -f slug,title,type
444
+ `,
445
+ )
446
+ .action(async (opts: Record<string, string | undefined>) => {
447
+ await withRepo(program, async (repo) => {
448
+ const rows = await repo.listPages({
449
+ type: opts.type,
450
+ tag: opts.tag,
451
+ limit: Number(opts.limit),
452
+ });
453
+
454
+ // When --fields is set, show one page per line with tab-separated values
455
+ if (opts.fields) {
456
+ const fields = opts.fields.split(",").map((f) => f.trim());
457
+ for (const row of rows) {
458
+ const vals = fields.map((field) => {
459
+ const val = (row as Record<string, unknown>)[field];
460
+ if (val === undefined || val === null) return "";
461
+ if (typeof val === "object") return JSON.stringify(val);
462
+ return String(val);
463
+ });
464
+ console.log(vals.join("\t"));
465
+ }
466
+ return;
467
+ }
468
+
469
+ print(program, rows);
470
+ });
471
+ });
472
+
473
+ // -- search / query -------------------------------------------------------
474
+
475
+ program
476
+ .command("search")
477
+ .argument("<query>", "full-text search query")
478
+ .option("--type <type>", "filter by page type")
479
+ .option("--limit <number>", "max results", "10")
480
+ .description("full-text / hybrid search")
481
+ .addHelpText(
482
+ "after",
483
+ `
484
+ Examples:
485
+ ebrain search "machine learning"
486
+ ebrain search "quarterly revenue" --type deal --limit 5
487
+ `,
488
+ )
489
+ .action(async (query: string, opts: Record<string, string>) => {
490
+ await withRepo(program, async (repo) => {
491
+ const hits = await repo.search(
492
+ query,
493
+ Number(opts.limit ?? 10),
494
+ opts.type,
495
+ );
496
+ print(program, hits);
497
+ });
498
+ });
499
+
500
+ program
501
+ .command("query")
502
+ .argument("<question>", "natural language question")
503
+ .option("--limit <number>", "max results", "10")
504
+ .option("--llm", "use LLM to answer based on retrieved context", false)
505
+ .option("--context-limit <number>", "max pages to use as context", "5")
506
+ .description("semantic / vector search")
507
+ .addHelpText(
508
+ "after",
509
+ `
510
+ Examples:
511
+ ebrain query "What projects did we ship in Q4?"
512
+ ebrain query "Who leads the ML team?" --limit 5
513
+ ebrain query "What are the key findings?" --llm
514
+ `,
515
+ )
516
+ .action(async (question: string, opts: Record<string, string>) => {
517
+ await withRepo(program, async (repo) => {
518
+ const limit = Number(opts.limit ?? 10);
519
+ const hits = await repo.query(question, limit);
520
+
521
+ // If --llm flag, generate answer based on multi-layer context
522
+ if (opts.llm) {
523
+ const settings = await loadSettings();
524
+ if (!settings.llm.baseURL) {
525
+ print(program, { error: "LLM not configured. Set llm.baseURL in settings." });
526
+ return;
527
+ }
528
+
529
+ const progress = createProgress();
530
+ progress.start("Searching knowledge base...");
531
+
532
+ const contextLimit = Number(opts.contextLimit ?? 5);
533
+ const topHits = hits.slice(0, contextLimit);
534
+
535
+ if (topHits.length === 0) {
536
+ progress.stop();
537
+ process.stderr.write("No relevant pages found.\n");
538
+ print(program, { answer: "No relevant information found in the knowledge base.", sources: [] });
539
+ return;
540
+ }
541
+
542
+ // Collect multi-layer context (primary + raw data + linked pages scored by relevance)
543
+ progress.update(`Loading pages, raw documents, and linked content...`);
544
+ // ~100KB char budget ≈ 25K tokens, safe for most models
545
+ const MAX_CONTEXT_CHARS = 100_000;
546
+ const { sections, totalChars, stats } = await collectContextForLLM(repo, topHits, question, MAX_CONTEXT_CHARS);
547
+
548
+ if (sections.length === 0) {
549
+ progress.stop();
550
+ process.stderr.write("No content could be loaded.\n");
551
+ print(program, { answer: "Failed to load page content.", sources: [] });
552
+ return;
553
+ }
554
+
555
+ progress.update(`Generating answer from ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)...`);
556
+ const startTime = Date.now();
557
+
558
+ const answer = await generateAnswerWithContext(question, sections, stats, settings.llm);
559
+
560
+ const duration = formatDuration(Date.now() - startTime);
561
+ progress.succeed(`Answer generated (${duration}, context: ${(totalChars / 1024).toFixed(1)}KB)`);
562
+
563
+ // Output answer as markdown
564
+ console.log("\n" + answer);
565
+
566
+ // Show sources breakdown
567
+ console.log("\n---\n**Sources:**\n");
568
+ for (let i = 0; i < sections.length; i++) {
569
+ const s = sections[i];
570
+ const icon = s.type === 'primary' ? '📄' : s.type === 'raw_data' ? '📎' : '🔗';
571
+ console.log(`${icon} ${i + 1}. [[${s.slug}|${s.title}]] — ${s.label} (${(s.content.length / 1024).toFixed(1)}KB)`);
572
+ }
573
+ console.log(`\n*Context: ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)*`);
574
+ } else {
575
+ print(program, hits);
576
+ }
577
+ });
578
+ });
579
+
580
+ // -- link -----------------------------------------------------------------
581
+
582
+ addDryRun(
583
+ program
584
+ .command("link")
585
+ .argument("<from>", "source page slug")
586
+ .argument("<to>", "target page slug")
587
+ .option("--context <text>", "link context", "")
588
+ .description("create a cross-link between pages (idempotent)")
589
+ .addHelpText(
590
+ "after",
591
+ `
592
+ Examples:
593
+ ebrain link docs/api docs/getting-started
594
+ ebrain link people/john projects/alpha --context "lead"
595
+ ebrain link docs/api docs/getting-started --dry-run
596
+ `,
597
+ ),
598
+ ).action(
599
+ async (
600
+ from: string,
601
+ to: string,
602
+ opts: { context?: string; dryRun?: boolean },
603
+ ) => {
604
+ if (isDryRun(opts)) {
605
+ print(program, {
606
+ dryRun: true,
607
+ action: "link",
608
+ from,
609
+ to,
610
+ context: opts.context ?? "",
611
+ });
612
+ return;
613
+ }
614
+ await withRepo(program, async (repo) => {
615
+ await repo.link(from, to, opts.context ?? "");
616
+ print(program, { ok: true, from, to });
617
+ });
618
+ },
619
+ );
620
+
621
+ program
622
+ .command("backlinks")
623
+ .argument("<slug>", "target page slug")
624
+ .description("list pages that link to this page")
625
+ .addHelpText(
626
+ "after",
627
+ `
628
+ Examples:
629
+ ebrain backlinks docs/api
630
+ `,
631
+ )
632
+ .action(async (slug: string) => {
633
+ await withRepo(program, async (repo) => {
634
+ const links = await repo.backlinks(slug);
635
+ print(program, links);
636
+ });
637
+ });
638
+
639
+ // -- timeline (subcommands) -----------------------------------------------
640
+
641
+ const timelineCmd = program
642
+ .command("timeline")
643
+ .description("manage timeline entries");
644
+
645
+ timelineCmd
646
+ .command("list")
647
+ .argument("<slug>", "page slug")
648
+ .option("--limit <number>", "max results", "50")
649
+ .description("list timeline entries for a page")
650
+ .addHelpText(
651
+ "after",
652
+ `
653
+ Examples:
654
+ ebrain timeline list projects/alpha
655
+ ebrain timeline list projects/alpha --limit 10
656
+ `,
657
+ )
658
+ .action(async (slug: string, opts: Record<string, string>) => {
659
+ await withRepo(program, async (repo) => {
660
+ const rows = await repo.timeline(slug, Number(opts.limit ?? 50));
661
+ print(program, rows);
662
+ });
663
+ });
664
+
665
+ addDryRun(
666
+ timelineCmd
667
+ .command("add")
668
+ .argument("<slug>", "page slug")
669
+ .requiredOption("--date <date>", "date (YYYY-MM-DD or ISO)")
670
+ .requiredOption("--summary <summary>", "one-line summary")
671
+ .option("--source <source>", "event source", "manual")
672
+ .option("--detail <detail>", "detail markdown", "")
673
+ .description("add a timeline entry")
674
+ .addHelpText(
675
+ "after",
676
+ `
677
+ Examples:
678
+ ebrain timeline add projects/alpha --date 2025-03-15 --summary "v1.0 shipped"
679
+ ebrain timeline add projects/alpha --date 2025-03-15 --summary "launch" --source release
680
+ ebrain timeline add projects/alpha --date 2025-03-15 --summary "launch" --dry-run
681
+ `,
682
+ ),
683
+ ).action(
684
+ async (
685
+ slug: string,
686
+ opts: {
687
+ date: string;
688
+ summary: string;
689
+ source?: string;
690
+ detail?: string;
691
+ dryRun?: boolean;
692
+ },
693
+ ) => {
694
+ if (isDryRun(opts)) {
695
+ print(program, {
696
+ dryRun: true,
697
+ action: "timeline-add",
698
+ slug,
699
+ date: opts.date,
700
+ summary: opts.summary,
701
+ source: opts.source ?? "manual",
702
+ });
703
+ return;
704
+ }
705
+ await withRepo(program, async (repo) => {
706
+ await repo.timelineAdd({
707
+ pageSlug: slug,
708
+ date: opts.date,
709
+ source: opts.source ?? "manual",
710
+ summary: opts.summary,
711
+ detail: opts.detail ?? "",
712
+ });
713
+ print(program, {
714
+ ok: true,
715
+ action: "timeline-add",
716
+ slug,
717
+ date: opts.date,
718
+ });
719
+ });
720
+ },
721
+ );
722
+
723
+ addDryRun(
724
+ timelineCmd
725
+ .command("extract")
726
+ .argument("<slug>", "page slug")
727
+ .option("--source <source>", "source identifier", "extracted")
728
+ .option("--default-date <date>", "default date (YYYY-MM-DD)")
729
+ .description("extract timeline events from page content using AI")
730
+ .addHelpText(
731
+ "after",
732
+ `
733
+ Examples:
734
+ ebrain timeline extract companies/river-ai
735
+ ebrain timeline extract docs/meeting --source meeting_notes --default-date 2024-03-15
736
+ `,
737
+ ),
738
+ ).action(async (slug: string, opts: { source?: string; defaultDate?: string; dryRun?: boolean }) => {
739
+ if (isDryRun(opts)) {
740
+ print(program, {
741
+ dryRun: true,
742
+ action: "timeline-extract",
743
+ slug,
744
+ source: opts.source ?? "extracted",
745
+ defaultDate: opts.defaultDate ?? new Date().toISOString().slice(0, 10),
746
+ });
747
+ return;
748
+ }
749
+ await withRepo(program, async (repo) => {
750
+ const page = await repo.getPage(slug);
751
+ if (!page) {
752
+ throw new Error(`page not found: ${slug}`);
753
+ }
754
+ const settings = await loadSettings();
755
+
756
+ const progress = createProgress();
757
+ progress.start(`Extracting timeline from ${slug}...`);
758
+ const startTime = Date.now();
759
+
760
+ const result = await repo.extractAndAddTimeline(
761
+ slug,
762
+ page.compiledTruth,
763
+ opts.source ?? "extracted",
764
+ opts.defaultDate ?? new Date().toISOString().slice(0, 10),
765
+ settings.llm,
766
+ );
767
+
768
+ const duration = formatDuration(Date.now() - startTime);
769
+
770
+ if (result.entries.length > 0) {
771
+ progress.succeed(`${result.entries.length} events extracted (${duration})`);
772
+ } else {
773
+ progress.stop();
774
+ process.stderr.write(`No events found (${duration})\n`);
775
+ }
776
+
777
+ print(program, {
778
+ ok: true,
779
+ action: "timeline-extract",
780
+ slug,
781
+ entriesAdded: result.entries.length,
782
+ entries: result.entries,
783
+ confidence: result.confidence,
784
+ });
785
+ });
786
+ });
787
+
788
+ timelineCmd
789
+ .command("global")
790
+ .option("--limit <number>", "max results", "100")
791
+ .description("list timeline entries across all pages")
792
+ .addHelpText(
793
+ "after",
794
+ `
795
+ Examples:
796
+ ebrain timeline global
797
+ ebrain timeline global --limit 20
798
+ `,
799
+ )
800
+ .action(async (opts: Record<string, string>) => {
801
+ await withRepo(program, async (repo) => {
802
+ const entries = await repo.timelineGlobal(Number(opts.limit ?? 100));
803
+ print(program, entries);
804
+ });
805
+ });
806
+
807
+ // -- tag (subcommands) ----------------------------------------------------
808
+
809
+ const tagCmd = program
810
+ .command("tag")
811
+ .description("manage tags on a page");
812
+
813
+ tagCmd
814
+ .command("list")
815
+ .argument("<slug>", "page slug")
816
+ .description("list tags on a page")
817
+ .addHelpText(
818
+ "after",
819
+ `
820
+ Examples:
821
+ ebrain tag list docs/api
822
+ `,
823
+ )
824
+ .action(async (slug: string) => {
825
+ await withRepo(program, async (repo) => {
826
+ const tags = await repo.tags(slug);
827
+ print(program, tags);
828
+ });
829
+ });
830
+
831
+ addDryRun(
832
+ tagCmd
833
+ .command("add")
834
+ .argument("<slug>", "page slug")
835
+ .argument("<tag>", "tag to add")
836
+ .description("add a tag to a page (idempotent)")
837
+ .addHelpText(
838
+ "after",
839
+ `
840
+ Examples:
841
+ ebrain tag add docs/api rest
842
+ ebrain tag add docs/api rest --dry-run
843
+ `,
844
+ ),
845
+ ).action(async (slug: string, tag: string, opts: { dryRun?: boolean }) => {
846
+ if (isDryRun(opts)) {
847
+ print(program, { dryRun: true, action: "tag-add", slug, tag });
848
+ return;
849
+ }
850
+ await withRepo(program, async (repo) => {
851
+ await repo.tag(slug, tag);
852
+ print(program, { ok: true, action: "tag-add", slug, tag });
853
+ });
854
+ });
855
+
856
+ addDryRun(
857
+ tagCmd
858
+ .command("remove")
859
+ .argument("<slug>", "page slug")
860
+ .argument("<tag>", "tag to remove")
861
+ .description("remove a tag from a page")
862
+ .addHelpText(
863
+ "after",
864
+ `
865
+ Examples:
866
+ ebrain tag remove docs/api outdated
867
+ ebrain tag remove docs/api outdated --dry-run
868
+ `,
869
+ ),
870
+ ).action(async (slug: string, tag: string, opts: { dryRun?: boolean }) => {
871
+ if (isDryRun(opts)) {
872
+ print(program, { dryRun: true, action: "tag-remove", slug, tag });
873
+ return;
874
+ }
875
+ await withRepo(program, async (repo) => {
876
+ await repo.untag(slug, tag);
877
+ print(program, { ok: true, action: "tag-remove", slug, tag });
878
+ });
879
+ });
880
+
881
+ // -- raw (subcommands) ----------------------------------------------------
882
+
883
+ const rawCmd = program
884
+ .command("raw")
885
+ .description("manage raw source data for a page");
886
+
887
+ rawCmd
888
+ .command("get")
889
+ .argument("<slug>", "page slug")
890
+ .option("--source <source>", "filter by source name")
891
+ .description("read raw source data for a page")
892
+ .addHelpText(
893
+ "after",
894
+ `
895
+ Examples:
896
+ ebrain raw get ingest/report
897
+ ebrain raw get ingest/report --source crm
898
+ `,
899
+ )
900
+ .action(async (slug: string, opts: { source?: string }) => {
901
+ await withRepo(program, async (repo) => {
902
+ const rows = await repo.readRaw(slug, opts.source);
903
+ print(program, rows);
904
+ });
905
+ });
906
+
907
+ addDryRun(
908
+ rawCmd
909
+ .command("set")
910
+ .argument("<slug>", "page slug")
911
+ .requiredOption("--source <source>", "source name")
912
+ .option("--data <json>", "JSON string")
913
+ .option("--stdin", "read JSON from stdin", false)
914
+ .description("write raw source data for a page")
915
+ .addHelpText(
916
+ "after",
917
+ `
918
+ Examples:
919
+ ebrain raw set ingest/report --source crm --data '{"rev": 1000}'
920
+ echo '{"rev": 1000}' | ebrain raw set ingest/report --source crm --stdin
921
+ ebrain raw set ingest/report --source crm --data '{"rev": 1000}' --dry-run
922
+ `,
923
+ ),
924
+ ).action(
925
+ async (
926
+ slug: string,
927
+ opts: {
928
+ source: string;
929
+ data?: string;
930
+ stdin?: boolean;
931
+ dryRun?: boolean;
932
+ },
933
+ ) => {
934
+ let data: unknown;
935
+ if (opts.data) {
936
+ data = JSON.parse(opts.data);
937
+ } else if (opts.stdin) {
938
+ const raw = await readMaybeStdin();
939
+ if (!raw?.trim()) throw new Error("empty stdin — pipe JSON");
940
+ data = JSON.parse(raw);
941
+ } else {
942
+ throw new Error("provide --data <json> or --stdin");
943
+ }
944
+
945
+ if (isDryRun(opts)) {
946
+ print(program, {
947
+ dryRun: true,
948
+ action: "raw-set",
949
+ slug,
950
+ source: opts.source,
951
+ });
952
+ return;
953
+ }
954
+
955
+ await withRepo(program, async (repo) => {
956
+ await repo.writeRaw(slug, opts.source, data);
957
+ print(program, {
958
+ ok: true,
959
+ action: "raw-set",
960
+ slug,
961
+ source: opts.source,
962
+ });
963
+ });
964
+ },
965
+ );
966
+
967
+ // -- import / export ------------------------------------------------------
968
+
969
+ addDryRun(
970
+ program
971
+ .command("import")
972
+ .argument("<dir>", "directory of markdown files")
973
+ .description("import a directory of markdown files")
974
+ .option("--skip-index", "skip vector indexing (useful if seekdb crashes)")
975
+ .addHelpText(
976
+ "after",
977
+ `
978
+ Examples:
979
+ ebrain import ./docs
980
+ ebrain import ./docs --dry-run
981
+ ebrain import ./docs --skip-index # skip vector indexing
982
+ `,
983
+ ),
984
+ ).action(async (dir: string, opts: { dryRun?: boolean; skipIndex?: boolean }) => {
985
+ await withRepo(program, async (repo) => {
986
+ const root = resolve(dir);
987
+ const files = await collectMarkdownFiles(root);
988
+
989
+ if (isDryRun(opts)) {
990
+ print(program, {
991
+ dryRun: true,
992
+ action: "import",
993
+ dir: root,
994
+ filesFound: files.length,
995
+ slugs: files.map((f) => pathToSlug(f, root)),
996
+ });
997
+ return;
998
+ }
999
+
1000
+ const jsonOut = isJson(program);
1001
+ const settings = await loadSettings();
1002
+ const spinner = createSpinner();
1003
+ const startTime = Date.now();
1004
+
1005
+ if (!jsonOut) {
1006
+ header(`Import: ${root}`);
1007
+ }
1008
+
1009
+ // Phase 1: Parse all files and collect data
1010
+ if (!jsonOut) {
1011
+ spinner.start(`Scanning ${files.length} files...`);
1012
+ }
1013
+
1014
+ const fileData: Array<{
1015
+ file: string;
1016
+ slug: string;
1017
+ parsed: ReturnType<typeof parsePageMarkdown>;
1018
+ content: string;
1019
+ wikiLinks: string[];
1020
+ timelineEntries: ReturnType<typeof extractTimelineLines>;
1021
+ tags: string[];
1022
+ }> = [];
1023
+
1024
+ for (const file of files) {
1025
+ const rawSlug = pathToSlug(file, root);
1026
+ const slug = normalizeLongSlug(rawSlug);
1027
+ const content = await readTextFile(file);
1028
+ const parsed = parsePageMarkdown(content);
1029
+ const wikiLinks = extractWikiStyleLinks(content).map(normalizeLinkSlug);
1030
+ const timelineEntries = extractTimelineLines(parsed.timeline);
1031
+ const tags = Array.isArray(parsed.frontmatter.tags)
1032
+ ? parsed.frontmatter.tags.filter((t): t is string => typeof t === "string")
1033
+ : [];
1034
+ fileData.push({ file, slug, parsed, content, wikiLinks, timelineEntries, tags });
1035
+ }
1036
+
1037
+ if (!jsonOut) {
1038
+ spinner.succeed(`Found ${files.length} markdown files`);
1039
+ }
1040
+
1041
+ // Phase 2: Write all pages first (skip embed for performance)
1042
+ if (!jsonOut) {
1043
+ spinner.start(`Writing ${fileData.length} pages to database...`);
1044
+ }
1045
+
1046
+ const allSlugs: string[] = [];
1047
+ const writeErrors: string[] = [];
1048
+
1049
+ for (let i = 0; i < fileData.length; i++) {
1050
+ const { slug, parsed } = fileData[i]!;
1051
+ if (!jsonOut && i % 20 === 0) {
1052
+ spinner.update(`Writing pages... ${i + 1}/${fileData.length}`);
1053
+ }
1054
+ try {
1055
+ await repo.putPage({
1056
+ slug,
1057
+ type: String(parsed.frontmatter.type ?? inferTypeFromSlug(slug)),
1058
+ title: String(parsed.frontmatter.title ?? slugToTitle(slug)),
1059
+ compiledTruth: parsed.compiledTruth,
1060
+ timeline: parsed.timeline,
1061
+ frontmatter: parsed.frontmatter,
1062
+ }, true); // skipEmbed: true for performance
1063
+ allSlugs.push(slug);
1064
+ } catch (err) {
1065
+ writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
1066
+ }
1067
+ }
1068
+
1069
+ if (!jsonOut) {
1070
+ spinner.succeed(`Wrote ${allSlugs.length} pages to database`);
1071
+ if (writeErrors.length > 0) {
1072
+ warning(`${writeErrors.length} pages failed to write`);
1073
+ for (const e of writeErrors.slice(0, 3)) {
1074
+ subItem(e);
1075
+ }
1076
+ if (writeErrors.length > 3) {
1077
+ subItem(`... and ${writeErrors.length - 3} more`);
1078
+ }
1079
+ }
1080
+ }
1081
+
1082
+ // Phase 3: Parallel entity extraction (main optimization)
1083
+ const BATCH_SIZE = 10;
1084
+ const entityResults = new Map<string, Awaited<ReturnType<typeof extractRelations>>>();
1085
+
1086
+ if (settings.llm.baseURL) {
1087
+ if (!jsonOut) {
1088
+ spinner.start(`Extracting entities with LLM...`);
1089
+ }
1090
+
1091
+ for (let i = 0; i < fileData.length; i += BATCH_SIZE) {
1092
+ const batch = fileData.slice(i, i + BATCH_SIZE).filter(d => d.tags.length === 0);
1093
+ if (!jsonOut) {
1094
+ spinner.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, fileData.length)}/${fileData.length}`);
1095
+ }
1096
+ const batchPromises = batch.map(async ({ slug, content }) => {
1097
+ const relations = await extractRelations(content, settings.llm);
1098
+ return { slug, relations };
1099
+ });
1100
+ const results = await Promise.all(batchPromises);
1101
+ for (const { slug, relations } of results) {
1102
+ entityResults.set(slug, relations);
1103
+ }
1104
+ }
1105
+
1106
+ if (!jsonOut) {
1107
+ spinner.succeed(`Entity extraction complete`);
1108
+ }
1109
+ } else {
1110
+ if (!jsonOut) {
1111
+ warning(`LLM not configured, skipping entity extraction`);
1112
+ }
1113
+ }
1114
+
1115
+ // Phase 4: Write links, tags, timeline, and entity pages
1116
+ if (!jsonOut) {
1117
+ spinner.start(`Creating links, tags, and timeline entries...`);
1118
+ }
1119
+
1120
+ let linkCount = 0;
1121
+ let timelineCount = 0;
1122
+ let entityCount = 0;
1123
+ let tagCount = 0;
1124
+
1125
+ // Collect timeline entries for batch insert
1126
+ const allTimelineEntries: Array<{
1127
+ pageSlug: string;
1128
+ date: string;
1129
+ source: string;
1130
+ summary: string;
1131
+ detail: string;
1132
+ }> = [];
1133
+
1134
+ for (const { slug, wikiLinks, timelineEntries, tags, content } of fileData) {
1135
+ // Wiki links
1136
+ for (const link of wikiLinks) {
1137
+ await repo.link(slug, link, "import");
1138
+ linkCount++;
1139
+ }
1140
+
1141
+ // Collect timeline entries for batch insert
1142
+ for (const entry of timelineEntries) {
1143
+ allTimelineEntries.push({
1144
+ pageSlug: slug,
1145
+ date: entry.date,
1146
+ source: entry.source,
1147
+ summary: entry.summary,
1148
+ detail: "",
1149
+ });
1150
+ timelineCount++;
1151
+ }
1152
+
1153
+ // Tags
1154
+ for (const tag of tags) {
1155
+ await repo.tag(slug, tag);
1156
+ tagCount++;
1157
+ }
1158
+
1159
+ // Entity links from parallel extraction
1160
+ const relations = entityResults.get(slug);
1161
+ if (relations && relations.length > 0) {
1162
+ const highConfidence = relations.filter(r => r.confidence >= 0.6);
1163
+ for (const r of highConfidence) {
1164
+ const fromCandidate = entityToSlug(r.from.name, r.from.type);
1165
+ const toCandidate = entityToSlug(r.to.name, r.to.type);
1166
+ const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
1167
+ const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
1168
+
1169
+ const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
1170
+ const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
1171
+ if (c1) entityCount++;
1172
+ if (c2) entityCount++;
1173
+
1174
+ await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
1175
+ await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
1176
+ await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
1177
+ linkCount += 3;
1178
+ }
1179
+ }
1180
+ }
1181
+
1182
+ // Batch insert all timeline entries
1183
+ if (allTimelineEntries.length > 0) {
1184
+ await repo.timelineAddBatch(allTimelineEntries);
1185
+ }
1186
+
1187
+ if (!jsonOut) {
1188
+ spinner.succeed(`Created links, tags, and timeline`);
1189
+ }
1190
+
1191
+ // Phase 5: Batch sync all pages to search index
1192
+ if (opts.skipIndex) {
1193
+ if (!jsonOut) {
1194
+ info(`Skipping vector indexing (--skip-index)`);
1195
+ }
1196
+ } else {
1197
+ if (!jsonOut) {
1198
+ spinner.start(`Indexing ${allSlugs.length} pages for search...`);
1199
+ }
1200
+ await repo.embedAll();
1201
+
1202
+ if (!jsonOut) {
1203
+ spinner.succeed(`Search indexing complete`);
1204
+ }
1205
+ }
1206
+
1207
+ const duration = formatDuration(Date.now() - startTime);
1208
+
1209
+ if (!jsonOut) {
1210
+ // Print summary
1211
+ header("Import Summary");
1212
+ keyValue("Files imported", String(files.length));
1213
+ keyValue("Pages created", String(allSlugs.length));
1214
+ keyValue("Entities extracted", String(entityCount));
1215
+ keyValue("Links created", String(linkCount));
1216
+ keyValue("Timeline entries", String(timelineCount));
1217
+ keyValue("Tags added", String(tagCount));
1218
+ keyValue("Duration", duration);
1219
+
1220
+ if (writeErrors.length > 0) {
1221
+ warning(`${writeErrors.length} pages had errors`);
1222
+ }
1223
+ }
1224
+
1225
+ print(program, {
1226
+ ok: true,
1227
+ importedFiles: files.length,
1228
+ pages: allSlugs.length,
1229
+ links: linkCount,
1230
+ timelineEntries: timelineCount,
1231
+ entities: entityCount,
1232
+ });
1233
+ });
1234
+ });
1235
+
1236
+ program
1237
+ .command("export")
1238
+ .option("--dir <dir>", "output directory", resolve(process.cwd(), "export"))
1239
+ .description("export all pages as markdown files")
1240
+ .addHelpText(
1241
+ "after",
1242
+ `
1243
+ Examples:
1244
+ ebrain export
1245
+ ebrain export --dir ./backup
1246
+ `,
1247
+ )
1248
+ .action(async (opts: { dir: string }) => {
1249
+ await withRepo(program, async (repo) => {
1250
+ const dir = resolve(opts.dir);
1251
+ await ensureDir(dir);
1252
+ const pages = await repo.listPages({ limit: 100000 });
1253
+ const jsonOut = isJson(program);
1254
+ for (let i = 0; i < pages.length; i += 1) {
1255
+ const page = pages[i]!;
1256
+ progress("export " + page.slug, i + 1, pages.length, jsonOut);
1257
+ const tags = await repo.tags(page.slug);
1258
+ const fm = {
1259
+ ...page.frontmatter,
1260
+ type: page.type,
1261
+ title: page.title,
1262
+ };
1263
+ if (tags.length > 0)
1264
+ (fm as Record<string, unknown>).tags = tags;
1265
+ const md = renderPageMarkdown(fm, page.compiledTruth, page.timeline);
1266
+ await writeTextFile(slugToPath(page.slug, dir), md);
1267
+ }
1268
+ print(program, { exported: pages.length, dir });
1269
+ });
1270
+ });
1271
+
1272
+ // -- ingest ---------------------------------------------------------------
1273
+
1274
+ addDryRun(
1275
+ program
1276
+ .command("ingest")
1277
+ .argument("[file]", "file path to ingest (omit for stdin)")
1278
+ .option("--type <type>", "source type", "doc")
1279
+ .option("--stdin", "read from stdin", false)
1280
+ .description("ingest a file as a new page (under ingest/<name>)")
1281
+ .addHelpText(
1282
+ "after",
1283
+ `
1284
+ Examples:
1285
+ ebrain ingest report.pdf --type pdf
1286
+ cat article.md | ebrain ingest --stdin --type article
1287
+ ebrain ingest report.pdf --type pdf --dry-run
1288
+ `,
1289
+ ),
1290
+ ).action(
1291
+ async (
1292
+ file: string | undefined,
1293
+ opts: { type?: string; stdin?: boolean; dryRun?: boolean },
1294
+ ) => {
1295
+ let content: string;
1296
+ let fileName: string;
1297
+
1298
+ if (file) {
1299
+ const fullPath = resolve(file);
1300
+ if (!(await fileExists(fullPath))) {
1301
+ throw new Error(`file not found: ${file}`);
1302
+ }
1303
+ content = await readTextFile(fullPath);
1304
+ fileName = basename(fullPath);
1305
+ } else if (opts.stdin) {
1306
+ const raw = await readMaybeStdin();
1307
+ if (!raw?.trim()) throw new Error("empty stdin — pipe content");
1308
+ content = raw;
1309
+ fileName = "stdin";
1310
+ } else {
1311
+ throw new Error("provide <file> or --stdin");
1312
+ }
1313
+
1314
+ const slug = `ingest/${fileName.replace(/\.[^.]+$/, "")}`;
1315
+ const type = opts.type ?? "doc";
1316
+
1317
+ if (isDryRun(opts)) {
1318
+ print(program, {
1319
+ dryRun: true,
1320
+ action: "ingest",
1321
+ slug,
1322
+ type,
1323
+ contentLength: content.length,
1324
+ });
1325
+ return;
1326
+ }
1327
+
1328
+ await withRepo(program, async (repo) => {
1329
+ const jsonOut = isJson(program);
1330
+ const spinner = createSpinner();
1331
+ const startTime = Date.now();
1332
+
1333
+ if (!jsonOut) {
1334
+ header(`Ingest: ${fileName}`);
1335
+ spinner.start(`Creating page from file...`);
1336
+ }
1337
+
1338
+ await repo.putPage({
1339
+ slug,
1340
+ type,
1341
+ title: slugToTitle(slug),
1342
+ compiledTruth: content,
1343
+ timeline: "",
1344
+ frontmatter: {
1345
+ sourceFile: resolve(fileName),
1346
+ sourceType: type,
1347
+ },
1348
+ });
1349
+
1350
+ if (!jsonOut) {
1351
+ spinner.succeed(`Page created: ${slug}`);
1352
+ keyValue("Source file", fileName);
1353
+ keyValue("Type", type);
1354
+ keyValue("Content length", `${content.length} chars`);
1355
+ }
1356
+
1357
+ await repo.timelineAdd({
1358
+ pageSlug: slug,
1359
+ date: new Date().toISOString().slice(0, 10),
1360
+ source: type,
1361
+ summary: `Ingested file ${fileName}`,
1362
+ detail: "",
1363
+ });
1364
+
1365
+ await applyEntityLinks(
1366
+ repo,
1367
+ slug,
1368
+ content,
1369
+ jsonOut,
1370
+ );
1371
+
1372
+ if (!jsonOut) {
1373
+ const duration = formatDuration(Date.now() - startTime);
1374
+ success(`Ingestion completed in ${duration}`);
1375
+ }
1376
+
1377
+ print(program, { ok: true, action: "ingest", slug });
1378
+ });
1379
+ },
1380
+ );
1381
+
1382
+ // -- embed ----------------------------------------------------------------
1383
+
1384
+ addDryRun(
1385
+ program
1386
+ .command("embed")
1387
+ .argument("[slug]", "page slug (omit with --all)")
1388
+ .option("--all", "embed all pages")
1389
+ .description("refresh page embedding(s)")
1390
+ .addHelpText(
1391
+ "after",
1392
+ `
1393
+ Examples:
1394
+ ebrain embed docs/api
1395
+ ebrain embed --all
1396
+ ebrain embed --all --dry-run
1397
+ `,
1398
+ ),
1399
+ ).action(
1400
+ async (
1401
+ slug: string | undefined,
1402
+ opts: { all?: boolean; dryRun?: boolean },
1403
+ ) => {
1404
+ if (opts.all) {
1405
+ if (isDryRun(opts)) {
1406
+ await withRepo(program, async (repo) => {
1407
+ const pages = await repo.listPages({ limit: 100000 });
1408
+ print(program, {
1409
+ dryRun: true,
1410
+ action: "embed",
1411
+ mode: "all",
1412
+ pagesFound: pages.length,
1413
+ });
1414
+ });
1415
+ return;
1416
+ }
1417
+ await withRepo(program, async (repo) => {
1418
+ const jsonOut = isJson(program);
1419
+ const spinner = createSpinner();
1420
+ const startTime = Date.now();
1421
+
1422
+ if (!jsonOut) {
1423
+ header("Embed All Pages");
1424
+ spinner.start(`Loading pages...`);
1425
+ }
1426
+
1427
+ const pages = await repo.listPages({ limit: 100000 });
1428
+
1429
+ if (!jsonOut) {
1430
+ spinner.update(`Embedding ${pages.length} pages...`);
1431
+ }
1432
+
1433
+ const count = await repo.embedAll();
1434
+
1435
+ if (!jsonOut) {
1436
+ const duration = formatDuration(Date.now() - startTime);
1437
+ spinner.succeed(`Embedded ${count} pages`);
1438
+ keyValue("Duration", duration);
1439
+ }
1440
+
1441
+ print(program, { embedded: count, mode: "all" });
1442
+ });
1443
+ return;
1444
+ }
1445
+ if (!slug) {
1446
+ throw new Error("provide <slug> or --all");
1447
+ }
1448
+ if (isDryRun(opts)) {
1449
+ print(program, { dryRun: true, action: "embed", slug });
1450
+ return;
1451
+ }
1452
+ await withRepo(program, async (repo) => {
1453
+ const jsonOut = isJson(program);
1454
+ const spinner = createSpinner();
1455
+
1456
+ if (!jsonOut) {
1457
+ header(`Embed: ${slug}`);
1458
+ spinner.start(`Generating embedding for page...`);
1459
+ }
1460
+
1461
+ await repo.syncPageToSearch(slug);
1462
+
1463
+ if (!jsonOut) {
1464
+ spinner.succeed(`Page embedded: ${slug}`);
1465
+ }
1466
+
1467
+ print(program, { embedded: 1, slug });
1468
+ });
1469
+ },
1470
+ );
1471
+
1472
+ // -- init / stats ---------------------------------------------------------
1473
+
1474
+ program
1475
+ .command("init")
1476
+ .description("initialize the ebrain database")
1477
+ .addHelpText(
1478
+ "after",
1479
+ `
1480
+ Examples:
1481
+ ebrain init
1482
+ `,
1483
+ )
1484
+ .action(async () => {
1485
+ await withRepo(program, async () => {
1486
+ const settings = await loadSettings();
1487
+ const dbPath = program.opts().db ?? settings.dbPath;
1488
+
1489
+ success(`Database initialized`);
1490
+ keyValue("Path", dbPath);
1491
+
1492
+ print(program, {
1493
+ ok: true,
1494
+ dbPath,
1495
+ });
1496
+ });
1497
+ });
1498
+
1499
+ program
1500
+ .command("stats")
1501
+ .description("show knowledge base statistics")
1502
+ .addHelpText(
1503
+ "after",
1504
+ `
1505
+ Examples:
1506
+ ebrain stats
1507
+ ebrain stats --json
1508
+ `,
1509
+ )
1510
+ .action(async () => {
1511
+ await withRepo(program, async (repo) => {
1512
+ const jsonOut = isJson(program);
1513
+ const stats = await repo.stats();
1514
+
1515
+ if (!jsonOut) {
1516
+ header("Knowledge Base Statistics");
1517
+ keyValue("Pages", String(stats.pages));
1518
+ keyValue("Links", String(stats.links));
1519
+ keyValue("Tags", String(stats.tags));
1520
+ keyValue("Timeline entries", String(stats.timelineEntries));
1521
+ keyValue("Raw data rows", String(stats.rawRows));
1522
+ }
1523
+
1524
+ print(program, stats);
1525
+ });
1526
+ });
1527
+
1528
+ // Register compile and smart-ingest commands
1529
+ registerCompileCommands(program);
1530
+
1531
+ // Register graph command
1532
+ registerGraphCommand(program);
1533
+
1534
+ // -- serve / tools-json ---------------------------------------------------
1535
+
1536
+ program
1537
+ .command("serve")
1538
+ .description("start MCP server over stdio (for AI tool integration)")
1539
+ .addHelpText(
1540
+ "after",
1541
+ `
1542
+ Examples:
1543
+ ebrain serve
1544
+ `,
1545
+ )
1546
+ .action(async () => {
1547
+ const { startMcpServer } = await import("../mcp/server");
1548
+ const dbPath = String(program.opts().db);
1549
+ await startMcpServer(dbPath);
1550
+ });
1551
+
1552
+ program
1553
+ .command("tools-json")
1554
+ .description("print MCP tools discovery JSON")
1555
+ .action(() => {
1556
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
1557
+ const { TOOL_MANIFEST } = require("../mcp/server");
1558
+ console.log(JSON.stringify({ tools: TOOL_MANIFEST }, null, 2));
1559
+ });
1560
+
1561
+ // -- legacy aliases (backward compat, hidden) -----------------------------
1562
+
1563
+
1564
+
1565
+
1566
+
1567
+ return program;
1568
+ }
1569
+
1570
+ // ---------------------------------------------------------------------------
1571
+ // Repo / output helpers
1572
+ // ---------------------------------------------------------------------------
1573
+
1574
+ async function withRepo(
1575
+ program: Command,
1576
+ callback: (repo: BrainRepository) => Promise<void>,
1577
+ ): Promise<void> {
1578
+ const settings = await loadSettings();
1579
+ const cliDb = program.opts().db;
1580
+ const dbPath = cliDb ?? settings.dbPath;
1581
+ const db = await BrainDb.connect(dbPath, settings);
1582
+ const repo = new BrainRepository(db);
1583
+ await callback(repo);
1584
+
1585
+ // Gracefully close database
1586
+ // Note: seekdb SDK's InternalEmbeddedClient.close() is empty in embedded mode
1587
+ // Data may not flush properly. Use remote seekdb server for reliability.
1588
+ try {
1589
+ await db.close();
1590
+ } catch (e) {
1591
+ // Close may fail due to seekdb native bug
1592
+ }
1593
+
1594
+ // Give seekdb extra time after close
1595
+ await new Promise((r) => setTimeout(r, 500));
1596
+
1597
+ // CLI: force exit to bypass seekdb native cleanup segfault
1598
+ process.exit(0);
1599
+ }
1600
+
1601
+ function print(program: Command, payload: unknown): void {
1602
+ if (isJson(program)) {
1603
+ console.log(JSON.stringify(payload, null, 2));
1604
+ return;
1605
+ }
1606
+ if (typeof payload === "string") {
1607
+ console.log(payload);
1608
+ return;
1609
+ }
1610
+ console.log(formatHuman(payload));
1611
+ }
1612
+
1613
+ function isJson(program: Command): boolean {
1614
+ return Boolean(program.opts().json);
1615
+ }
1616
+
1617
+ function formatHuman(payload: unknown): string {
1618
+ if (Array.isArray(payload)) {
1619
+ return payload
1620
+ .map((item) =>
1621
+ typeof item === "string"
1622
+ ? `- ${item}`
1623
+ : `- ${JSON.stringify(item)}`,
1624
+ )
1625
+ .join("\n");
1626
+ }
1627
+ return JSON.stringify(payload, null, 2);
1628
+ }
1629
+
1630
+ function normalizeLinkSlug(path: string): string {
1631
+ return path
1632
+ .replaceAll("\\", "/")
1633
+ .replace(/^\.\//, "")
1634
+ .replace(/^\.\.\//g, "")
1635
+ .replace(/\.md$/, "");
1636
+ }
1637
+
1638
+ // ---------------------------------------------------------------------------
1639
+ // LLM Answer Generation — Multi-layer Context Collection
1640
+ // ---------------------------------------------------------------------------
1641
+
1642
+ /** A single section of context for the LLM prompt. */
1643
+ interface ContextSection {
1644
+ type: 'primary' | 'raw_data' | 'linked';
1645
+ slug: string;
1646
+ title: string;
1647
+ content: string;
1648
+ /** Human-readable label like "原始文档 (crm)" or "关联页面: projects/alpha". */
1649
+ label: string;
1650
+ }
1651
+
1652
+ /**
1653
+ * Collect multi-layer context for LLM answer generation.
1654
+ *
1655
+ * Layers (in priority order):
1656
+ * 1. Primary: compiledTruth + timeline of each hit page
1657
+ * 2. Raw data: original documents stored via raw.set
1658
+ * 3. Linked pages: compiledTruth of pages linked to/from hit pages
1659
+ *
1660
+ * Budget is enforced via total character limit.
1661
+ */
1662
+ async function collectContextForLLM(
1663
+ repo: BrainRepository,
1664
+ hits: Array<{ slug: string; title: string; score: number }>,
1665
+ question: string,
1666
+ maxChars: number,
1667
+ ): Promise<{ sections: ContextSection[]; totalChars: number; stats: ContextStats }> {
1668
+ const sections: ContextSection[] = [];
1669
+ let totalChars = 0;
1670
+ const stats: ContextStats = {
1671
+ primaryPages: 0,
1672
+ rawDocs: 0,
1673
+ linkedPages: 0,
1674
+ skippedChars: 0,
1675
+ };
1676
+
1677
+ const seenSlugs = new Set<string>();
1678
+
1679
+ function addSection(section: ContextSection): boolean {
1680
+ if (seenSlugs.has(`${section.type}:${section.slug}:${section.label}`)) {
1681
+ return false;
1682
+ }
1683
+ const budget = maxChars - totalChars;
1684
+ if (section.content.length > budget && sections.length > 0) {
1685
+ // Truncate to fit budget
1686
+ section.content = section.content.slice(0, budget - 20) + '\n...[truncated]';
1687
+ stats.skippedChars += section.content.length - budget;
1688
+ }
1689
+ if (section.content.length > 0) {
1690
+ sections.push(section);
1691
+ totalChars += section.content.length;
1692
+ seenSlugs.add(`${section.type}:${section.slug}:${section.label}`);
1693
+ return true;
1694
+ }
1695
+ return false;
1696
+ }
1697
+
1698
+ // Layer 1: Primary pages (compiledTruth + timeline)
1699
+ for (const hit of hits) {
1700
+ const page = await repo.getPage(hit.slug);
1701
+ if (!page) continue;
1702
+
1703
+ const parts: string[] = [];
1704
+ if (page.compiledTruth?.trim()) {
1705
+ parts.push(page.compiledTruth.trim());
1706
+ }
1707
+ const tl = page.timeline?.trim();
1708
+ if (tl) {
1709
+ parts.push(`## 时间线\n${tl}`);
1710
+ }
1711
+
1712
+ if (parts.length > 0) {
1713
+ addSection({
1714
+ type: 'primary',
1715
+ slug: page.slug,
1716
+ title: page.title,
1717
+ content: parts.join('\n\n'),
1718
+ label: `页面正文`,
1719
+ });
1720
+ stats.primaryPages++;
1721
+ }
1722
+ }
1723
+
1724
+ // Layer 2: Raw data (original documents)
1725
+ for (const hit of hits) {
1726
+ try {
1727
+ const rawRows = await repo.readRaw(hit.slug) as Array<{ source: string; data: unknown; fetchedAt?: string }>;
1728
+ for (const row of rawRows) {
1729
+ let rawContent = '';
1730
+ if (typeof row.data === 'string') {
1731
+ rawContent = row.data;
1732
+ } else if (typeof row.data === 'object' && row.data !== null) {
1733
+ rawContent = JSON.stringify(row.data, null, 2);
1734
+ }
1735
+ if (rawContent.trim()) {
1736
+ addSection({
1737
+ type: 'raw_data',
1738
+ slug: hit.slug,
1739
+ title: hit.title,
1740
+ content: rawContent,
1741
+ label: `原始文档 (${row.source})`,
1742
+ });
1743
+ stats.rawDocs++;
1744
+ }
1745
+ }
1746
+ } catch {
1747
+ // Raw data fetch failure is non-fatal
1748
+ }
1749
+ }
1750
+
1751
+ // Layer 3: Linked pages — SEMANTICALLY SCORED against the question
1752
+ // Only include linked pages that are actually relevant to what the user asked.
1753
+ const allLinkedSlugs = new Set<string>();
1754
+ for (const hit of hits) {
1755
+ try {
1756
+ const outLinks = await repo.outgoingLinks(hit.slug);
1757
+ outLinks.forEach(l => allLinkedSlugs.add(l.slug));
1758
+ } catch { /* ignore */ }
1759
+ try {
1760
+ const backlinkSlugs = await repo.backlinks(hit.slug);
1761
+ backlinkSlugs.forEach(s => allLinkedSlugs.add(s));
1762
+ } catch { /* ignore */ }
1763
+ }
1764
+
1765
+ if (allLinkedSlugs.size > 0) {
1766
+ // Score linked pages using broad semantic search.
1767
+ // Query a wide set of pages, then intersect with linked slugs.
1768
+ const broadLimit = Math.min(200, Math.max(50, allLinkedSlugs.size));
1769
+ const broadResults = await repo.query(question, broadLimit);
1770
+ const semanticScoreMap = new Map(broadResults.map(h => [h.slug, h.score]));
1771
+
1772
+ // Keyword-based fallback scoring for linked pages without embedding scores
1773
+ const keywordScores = new Map<string, number>();
1774
+ for (const linkedSlug of allLinkedSlugs) {
1775
+ if (semanticScoreMap.has(linkedSlug)) continue;
1776
+ try {
1777
+ const page = await repo.getPage(linkedSlug);
1778
+ if (page) {
1779
+ const text = `${page.title} ${page.compiledTruth}`.slice(0, 2000);
1780
+ keywordScores.set(linkedSlug, computeKeywordRelevance(text, question));
1781
+ }
1782
+ } catch { /* ignore */ }
1783
+ }
1784
+
1785
+ // Combine scores: semantic first, then keyword fallback
1786
+ const scoredLinked = [...allLinkedSlugs].map(slug => ({
1787
+ slug,
1788
+ score: semanticScoreMap.get(slug) ?? keywordScores.get(slug) ?? 0,
1789
+ }));
1790
+
1791
+ // Filter: only include linked pages with meaningful relevance
1792
+ const MIN_LINKED_SCORE = 0.02;
1793
+ const relevantLinked = scoredLinked
1794
+ .filter(s => s.score >= MIN_LINKED_SCORE)
1795
+ .sort((a, b) => b.score - a.score);
1796
+
1797
+ // Fetch content for relevant linked pages (respecting budget)
1798
+ for (const linked of relevantLinked) {
1799
+ if (totalChars >= maxChars) break;
1800
+
1801
+ const linkedPage = await repo.getPage(linked.slug);
1802
+ if (!linkedPage || !linkedPage.compiledTruth?.trim()) continue;
1803
+
1804
+ const remaining = maxChars - totalChars;
1805
+ let content = linkedPage.compiledTruth.trim();
1806
+ if (content.length > remaining - 100) {
1807
+ content = content.slice(0, remaining - 100) + '\n...[truncated]';
1808
+ }
1809
+
1810
+ addSection({
1811
+ type: 'linked',
1812
+ slug: linkedPage.slug,
1813
+ title: linkedPage.title,
1814
+ content,
1815
+ label: `关联页面: ${linkedPage.slug} (相关度: ${(linked.score * 100).toFixed(1)}%)`,
1816
+ });
1817
+ stats.linkedPages++;
1818
+
1819
+ // Also fetch raw data for highly relevant linked pages
1820
+ if (linked.score > 0.1) {
1821
+ try {
1822
+ const rawRows = await repo.readRaw(linked.slug) as Array<{ source: string; data: unknown }>;
1823
+ for (const row of rawRows) {
1824
+ let rawContent = typeof row.data === 'string' ? row.data : JSON.stringify(row.data);
1825
+ if (rawContent.trim().length > 100) {
1826
+ const remaining2 = maxChars - totalChars;
1827
+ if (rawContent.length > remaining2 - 100) {
1828
+ rawContent = rawContent.slice(0, remaining2 - 100) + '\n...[truncated]';
1829
+ }
1830
+ addSection({
1831
+ type: 'raw_data',
1832
+ slug: linked.slug,
1833
+ title: linkedPage.title,
1834
+ content: rawContent,
1835
+ label: `原始文档 (关联: ${row.source})`,
1836
+ });
1837
+ stats.rawDocs++;
1838
+ }
1839
+ }
1840
+ } catch { /* ignore */ }
1841
+ }
1842
+ }
1843
+ }
1844
+
1845
+ return { sections, totalChars, stats };
1846
+ }
1847
+
1848
+ /**
1849
+ * Simple keyword-based relevance scoring (fallback for pages without embeddings).
1850
+ * Computes the fraction of unique meaningful characters from the question
1851
+ * that appear in the text.
1852
+ */
1853
+ function computeKeywordRelevance(text: string, question: string): number {
1854
+ const STOP_CHARS = new Set('的是了在和我有你就这不人都说上个大国为到以们年会生地要主中子自实家小对多能好可很所把当');
1855
+ const questionChars = [...question]
1856
+ .filter(c => !/\s|[,,。!?、;::""''()()【】\[\]{}<>\/\\|~`@#$%^&*+=_-]/.test(c) && !STOP_CHARS.has(c));
1857
+ if (questionChars.length === 0) return 0;
1858
+
1859
+ const uniqueChars = new Set(questionChars);
1860
+ const lower = text.toLowerCase();
1861
+ let matched = 0;
1862
+ for (const char of uniqueChars) {
1863
+ if (lower.includes(char.toLowerCase())) matched++;
1864
+ }
1865
+ return matched / uniqueChars.size;
1866
+ }
1867
+
1868
+ interface ContextStats {
1869
+ primaryPages: number;
1870
+ rawDocs: number;
1871
+ linkedPages: number;
1872
+ skippedChars: number;
1873
+ }
1874
+
1875
+ /**
1876
+ * Build LLM prompt from collected context sections and generate answer.
1877
+ */
1878
+ async function generateAnswerWithContext(
1879
+ question: string,
1880
+ sections: ContextSection[],
1881
+ stats: ContextStats,
1882
+ llm: ResolvedLLM,
1883
+ ): Promise<string> {
1884
+ const apiKey = llm.apiKey || process.env[llm.apiKeyEnv] || "";
1885
+ if (!apiKey) {
1886
+ return "Error: LLM API key not configured.";
1887
+ }
1888
+
1889
+ if (sections.length === 0) {
1890
+ return "知识库中没有找到相关内容。";
1891
+ }
1892
+
1893
+ // Build context sections with clear labels
1894
+ const contextParts: string[] = [];
1895
+ let sectionIndex = 0;
1896
+
1897
+ // Group by type for cleaner output
1898
+ const primarySections = sections.filter(s => s.type === 'primary');
1899
+ const rawSections = sections.filter(s => s.type === 'raw_data');
1900
+ const linkedSections = sections.filter(s => s.type === 'linked');
1901
+
1902
+ function renderSections(group: ContextSection[], header: string) {
1903
+ if (group.length === 0) return;
1904
+ contextParts.push(`## ${header}\n`);
1905
+ for (const s of group) {
1906
+ sectionIndex++;
1907
+ contextParts.push(`### [${sectionIndex}] ${s.title} — ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
1908
+ }
1909
+ contextParts.push('');
1910
+ }
1911
+
1912
+ renderSections(primarySections, '页面正文');
1913
+ renderSections(rawSections, '原始文档');
1914
+ renderSections(linkedSections, '关联页面');
1915
+
1916
+ const context = contextParts.join('\n');
1917
+
1918
+ const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
1919
+
1920
+ ## 问题
1921
+ ${question}
1922
+
1923
+ ## 知识库内容
1924
+
1925
+ ${context}
1926
+
1927
+ ## 回答要求
1928
+ - 仅基于提供的知识库内容回答,不要编造信息
1929
+ - 如果知识库中没有相关信息,请明确说明
1930
+ - 引用来源时使用 [[slug|标题]] 的格式
1931
+ - 使用清晰的 markdown 格式
1932
+ - 如果涉及时间线信息,请在回答中体现
1933
+ - 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
1934
+ - 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
1935
+
1936
+ ## 回答`;
1937
+
1938
+ try {
1939
+ const resp = await fetch(
1940
+ llm.baseURL.endsWith("/") ? llm.baseURL + "chat/completions" : llm.baseURL + "/chat/completions",
1941
+ {
1942
+ method: "POST",
1943
+ headers: {
1944
+ "Content-Type": "application/json",
1945
+ Authorization: `Bearer ${apiKey}`,
1946
+ },
1947
+ body: JSON.stringify({
1948
+ model: llm.model,
1949
+ messages: [
1950
+ {
1951
+ role: "system",
1952
+ content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
1953
+ },
1954
+ { role: "user", content: prompt },
1955
+ ],
1956
+ temperature: 0.3,
1957
+ max_tokens: 4096,
1958
+ }),
1959
+ },
1960
+ );
1961
+
1962
+ if (!resp.ok) {
1963
+ const text = await resp.text();
1964
+ return `Error: LLM API failed (${resp.status}): ${text.slice(0, 200)}`;
1965
+ }
1966
+
1967
+ const data = await resp.json();
1968
+ return data.choices?.[0]?.message?.content || "(No answer generated)";
1969
+ } catch (error) {
1970
+ const msg = error instanceof Error ? error.message : String(error);
1971
+ return `Error: ${msg}`;
1972
+ }
1973
+ }