ex-brain 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
- import { basename, resolve } from "node:path";
1
+ import { basename, extname, resolve } from "node:path";
2
2
  import { readFileSync } from "node:fs";
3
+ import { createHash } from "node:crypto";
3
4
  import { Command } from "commander";
4
5
  import { DEFAULT_DB_NAME, inferTypeFromSlug, slugToTitle, normalizeLongSlug, slugify } from "../config";
5
6
  import { BrainDb } from "../db/client";
@@ -13,6 +14,7 @@ import {
13
14
  slugToPath,
14
15
  writeTextFile,
15
16
  } from "../markdown/io";
17
+ import { loadDocument, isRemoteUrl, type DocumentKind } from "../markdown/document-loader";
16
18
  import {
17
19
  extractTimelineLines,
18
20
  extractWikiStyleLinks,
@@ -34,6 +36,7 @@ import {
34
36
  subItem,
35
37
  keyValue,
36
38
  header,
39
+ separator,
37
40
  createSpinner,
38
41
  formatCount,
39
42
  type ProgressSpinner,
@@ -51,6 +54,14 @@ function isDryRun(opts: Record<string, unknown>): boolean {
51
54
  return Boolean(opts.dryRun);
52
55
  }
53
56
 
57
+ /**
58
+ * Compute a short SHA-256 hex hash of a string (first 16 chars).
59
+ * Used for detecting duplicate document ingestion.
60
+ */
61
+ function contentHash(text: string): string {
62
+ return createHash("sha256").update(text, "utf8").digest("hex").slice(0, 16);
63
+ }
64
+
54
65
  // Simple progress output to stderr (won't interfere with --json stdout).
55
66
  // e.g. "[3/42] import docs/api"
56
67
  function progress(label: string, current: number, total: number, json: boolean): void {
@@ -93,12 +104,12 @@ async function applyEntityLinks(
93
104
  }
94
105
  return { created: 0, linked: 0 };
95
106
  }
96
-
107
+
97
108
  // Filter by confidence
98
109
  const confidenceThreshold = settings.extraction.confidenceThreshold;
99
110
  const highConfidence = relations.filter((r) => r.confidence >= confidenceThreshold);
100
111
  const ignoredCount = relations.length - highConfidence.length;
101
-
112
+
102
113
  if (highConfidence.length === 0) {
103
114
  if (!json) {
104
115
  if (relations.length > 0) {
@@ -118,7 +129,7 @@ async function applyEntityLinks(
118
129
  // 1. Resolve entity slugs (disambiguation)
119
130
  const fromCandidate = entityToSlug(r.from.name, r.from.type);
120
131
  const toCandidate = entityToSlug(r.to.name, r.to.type);
121
-
132
+
122
133
  const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
123
134
  const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
124
135
 
@@ -143,7 +154,7 @@ async function applyEntityLinks(
143
154
  const duration = formatDuration(Date.now() - startTime);
144
155
  const entityNames = [...new Set(highConfidence.flatMap((r) => [r.from.name, r.to.name]))];
145
156
  spinner.succeed(`Extracted ${entityNames.length} entities: ${entityNames.join(", ")}`);
146
-
157
+
147
158
  // Print detailed info
148
159
  subItem(`${created} entity pages created`);
149
160
  subItem(`${linked} links added`);
@@ -224,23 +235,46 @@ Examples:
224
235
 
225
236
  // -- page CRUD ------------------------------------------------------------
226
237
 
238
+ // -- put ------------------------------------------------------------------
239
+ // Auto-detects file type: markdown goes through parsePageMarkdown,
240
+ // other formats (pdf, docx, html, txt, json) go through loadDocument.
241
+
242
+ /** Non-markdown extensions that should use the document ingestion path. */
243
+ const DOC_EXTENSIONS = new Set([
244
+ "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
245
+ ]);
246
+
247
+ /** Whether a file path should be treated as a document (not markdown). */
248
+ function isDocumentFile(filePath: string, forceKind?: string): boolean {
249
+ if (forceKind && forceKind !== "markdown") return true;
250
+ const ext = extname(filePath).toLowerCase().replace(/^\./, "");
251
+ return DOC_EXTENSIONS.has(ext);
252
+ }
253
+
227
254
  addDryRun(
228
255
  program
229
256
  .command("put")
230
257
  .argument("[slug]", "page slug (optional; auto-generated if omitted)")
231
- .option("--file <path>", "read markdown from file")
258
+ .option("--file <path>", "read content from file (markdown, pdf, docx, html, txt, json)")
232
259
  .option("--stdin", "read markdown from stdin", false)
233
- .option("--type <type>", "page type")
234
- .option("--title <title>", "page title")
260
+ .option("--type <type>", "page type override")
261
+ .option("--title <title>", "page title override")
262
+ .option("--format <kind>", "force document kind (pdf|docx|html|json|markdown|text) — only needed for --file with non-md files when auto-detect fails")
263
+ .option("--max-bytes <number>", "max bytes for URL/file ingest", "52428800")
264
+ .option("--timeout <ms>", "fetch timeout for URLs in ms", "30000")
235
265
  .description(
236
- "create or update a page (idempotent; upserts by slug). If slug is omitted, it is auto-generated from file name, title, or timestamp.",
266
+ "create or update a page (idempotent; upserts by slug). Auto-detects file type: markdown is parsed normally, PDF/DOCX/HTML/TXT/JSON are extracted and ingested.",
237
267
  )
238
268
  .addHelpText(
239
269
  "after",
240
270
  `
241
271
  Examples:
242
- ebrain put --file api.md # auto-generate slug from file name
272
+ ebrain put --file api.md # markdown parsePageMarkdown
243
273
  ebrain put docs/api --file api.md # explicit slug
274
+ ebrain put --file report.pdf # pdf → auto-extract text
275
+ ebrain put docs/report --file report.pdf # explicit slug for pdf
276
+ ebrain put --file article.docx # docx → auto-extract text
277
+ ebrain put --file https://example.com/a.pdf # URL → download + extract
244
278
  cat note.md | ebrain put --stdin # auto-generate slug from title/timestamp
245
279
  ebrain put --title "My Note" --stdin # auto-generate slug from title
246
280
  ebrain put people/john --type person --title "John Doe"
@@ -255,9 +289,173 @@ Examples:
255
289
  stdin?: boolean;
256
290
  type?: string;
257
291
  title?: string;
292
+ format?: string;
293
+ maxBytes?: string;
294
+ timeout?: string;
258
295
  dryRun?: boolean;
259
296
  },
260
297
  ) => {
298
+ // ── Branch 1: document file (pdf/docx/html/txt/json or URL) ──
299
+ const forceKind = opts.format as DocumentKind | undefined;
300
+ if (opts.file && isDocumentFile(opts.file, opts.format)) {
301
+ const loaded = await loadDocument(opts.file, {
302
+ forceKind,
303
+ fetchTimeoutMs: opts.timeout ? Number(opts.timeout) : undefined,
304
+ maxBytes: opts.maxBytes ? Number(opts.maxBytes) : undefined,
305
+ });
306
+ const content = loaded.text;
307
+ const fileName = loaded.fileName;
308
+ const kind = loaded.kind;
309
+ const sourceRef = loaded.source;
310
+ const sourceType = loaded.sourceType;
311
+ const mimeType = loaded.mimeType;
312
+ const bytes = loaded.bytes;
313
+ const metadata = loaded.metadata;
314
+
315
+ let finalSlug = slug;
316
+ if (!finalSlug) {
317
+ const nameNoExt = fileName.replace(/\.[^.]+$/, "");
318
+ const slugBase = normalizeLongSlug(slugify(nameNoExt));
319
+ finalSlug = `ingest/${slugBase}`;
320
+ }
321
+
322
+ const type = opts.type ?? kind;
323
+ const title =
324
+ opts.title ??
325
+ String(slugToTitle(finalSlug));
326
+ const hash = contentHash(content);
327
+ const frontmatter: Record<string, unknown> = {
328
+ sourceFile: sourceRef,
329
+ sourceType,
330
+ sourceKind: kind,
331
+ sourceMimeType: mimeType,
332
+ sourceBytes: bytes,
333
+ sourceFileName: fileName,
334
+ _contentHash: hash,
335
+ ...metadata,
336
+ };
337
+
338
+ if (isDryRun(opts)) {
339
+ print(program, {
340
+ dryRun: true,
341
+ action: "put",
342
+ slug: finalSlug,
343
+ type,
344
+ title,
345
+ kind,
346
+ sourceType,
347
+ sourceRef,
348
+ mimeType,
349
+ bytes,
350
+ contentLength: content.length,
351
+ contentHash: hash,
352
+ metadata,
353
+ });
354
+ return;
355
+ }
356
+
357
+ await withRepo(program, async (repo) => {
358
+ const jsonOut = isJson(program);
359
+ const spinner = createSpinner();
360
+ const startTime = Date.now();
361
+
362
+ // Check if content has already been ingested (idempotency)
363
+ const existingPage = await repo.getPage(finalSlug);
364
+ const existingHash = existingPage?.frontmatter._contentHash as string | undefined;
365
+
366
+ if (existingHash === hash) {
367
+ if (!jsonOut) {
368
+ header(`Put: ${fileName}`);
369
+ success(`Content unchanged — skipped (hash: ${hash})`);
370
+ }
371
+ print(program, {
372
+ ok: true,
373
+ action: "put",
374
+ slug: finalSlug,
375
+ unchanged: true,
376
+ contentHash: hash,
377
+ });
378
+ return;
379
+ }
380
+
381
+ if (!jsonOut) {
382
+ header(`Put: ${fileName}`);
383
+ keyValue("Kind", kind);
384
+ keyValue("Source", sourceRef);
385
+ if (mimeType) keyValue("Content-Type", mimeType);
386
+ keyValue("Bytes", String(bytes));
387
+ if (existingPage) {
388
+ keyValue("Previous hash", existingHash ?? "none");
389
+ keyValue("New hash", hash);
390
+ }
391
+ spinner.start(`Creating page from ${kind}...`);
392
+ }
393
+
394
+ await repo.putPage({
395
+ slug: finalSlug,
396
+ type,
397
+ title,
398
+ compiledTruth: content,
399
+ timeline: "",
400
+ frontmatter,
401
+ });
402
+
403
+ if (!jsonOut) {
404
+ spinner.succeed(`Page created: ${finalSlug}`);
405
+ keyValue("Type", type);
406
+ keyValue("Content length", `${content.length} chars`);
407
+ }
408
+
409
+ // ── Side-effect operations (only on new/changed content) ──
410
+ await repo.timelineAdd({
411
+ pageSlug: finalSlug,
412
+ date: new Date().toISOString().slice(0, 10),
413
+ source: type,
414
+ summary: `Ingested ${kind} ${fileName}`,
415
+ detail: sourceType === "url" ? `Source URL: ${sourceRef}` : "",
416
+ });
417
+
418
+ try {
419
+ await repo.writeRaw(finalSlug, sourceType, {
420
+ fileName,
421
+ sourceRef,
422
+ kind,
423
+ mimeType,
424
+ bytes,
425
+ metadata,
426
+ ingestedAt: new Date().toISOString(),
427
+ });
428
+ } catch (err) {
429
+ if (!jsonOut) {
430
+ warning(
431
+ `failed to record raw_data: ${err instanceof Error ? err.message : String(err)}`,
432
+ );
433
+ }
434
+ }
435
+
436
+ await applyEntityLinks(repo, finalSlug, content, jsonOut);
437
+
438
+ if (!jsonOut) {
439
+ const duration = formatDuration(Date.now() - startTime);
440
+ success(`Operation completed in ${duration}`);
441
+ }
442
+
443
+ print(program, {
444
+ ok: true,
445
+ action: "put",
446
+ slug: finalSlug,
447
+ kind,
448
+ sourceType,
449
+ sourceRef,
450
+ bytes,
451
+ contentLength: content.length,
452
+ contentHash: hash,
453
+ });
454
+ });
455
+ return;
456
+ }
457
+
458
+ // ── Branch 2: markdown (stdin or .md file) ──
261
459
  const input = await resolveInput(opts.file, opts.stdin ?? false);
262
460
  if (!input.trim()) {
263
461
  throw new Error(
@@ -265,7 +463,7 @@ Examples:
265
463
  );
266
464
  }
267
465
  const parsed = parsePageMarkdown(input);
268
-
466
+
269
467
  // Auto-generate slug if not provided
270
468
  let finalSlug = slug;
271
469
  if (!finalSlug) {
@@ -283,7 +481,7 @@ Examples:
283
481
  finalSlug = `notes/${timestamp}`;
284
482
  }
285
483
  }
286
-
484
+
287
485
  const type =
288
486
  opts.type ??
289
487
  String(parsed.frontmatter.type ?? inferTypeFromSlug(finalSlug));
@@ -291,6 +489,10 @@ Examples:
291
489
  opts.title ??
292
490
  String(parsed.frontmatter.title ?? slugToTitle(finalSlug));
293
491
 
492
+ // Compute content hash and embed in frontmatter for idempotency
493
+ const hash = contentHash(parsed.compiledTruth);
494
+ parsed.frontmatter._contentHash = hash;
495
+
294
496
  if (isDryRun(opts)) {
295
497
  print(program, {
296
498
  dryRun: true,
@@ -299,6 +501,7 @@ Examples:
299
501
  type,
300
502
  title,
301
503
  contentLength: parsed.compiledTruth.length,
504
+ contentHash: hash,
302
505
  hasTimeline: !!parsed.timeline,
303
506
  frontmatterKeys: Object.keys(parsed.frontmatter),
304
507
  });
@@ -309,12 +512,35 @@ Examples:
309
512
  const jsonOut = isJson(program);
310
513
  const spinner = createSpinner();
311
514
  const startTime = Date.now();
312
-
515
+
516
+ // Check if content is unchanged (idempotency)
517
+ const existingPage = await repo.getPage(finalSlug);
518
+ const existingHash = existingPage?.frontmatter._contentHash as string | undefined;
519
+
520
+ if (existingHash === hash) {
521
+ if (!jsonOut) {
522
+ header(`Put: ${finalSlug}`);
523
+ success(`Content unchanged — skipped (hash: ${hash})`);
524
+ }
525
+ print(program, {
526
+ ok: true,
527
+ action: "put",
528
+ slug: finalSlug,
529
+ unchanged: true,
530
+ contentHash: hash,
531
+ });
532
+ return;
533
+ }
534
+
313
535
  if (!jsonOut) {
314
536
  header(`Put: ${finalSlug}`);
537
+ if (existingPage) {
538
+ keyValue("Previous hash", existingHash ?? "none");
539
+ keyValue("New hash", hash);
540
+ }
315
541
  spinner.start(`Creating/updating page...`);
316
542
  }
317
-
543
+
318
544
  const page = await repo.putPage({
319
545
  slug: finalSlug,
320
546
  type,
@@ -323,27 +549,32 @@ Examples:
323
549
  timeline: parsed.timeline,
324
550
  frontmatter: parsed.frontmatter,
325
551
  });
326
-
552
+
327
553
  if (!jsonOut) {
328
554
  spinner.succeed(`Page saved: ${page.slug}`);
329
555
  keyValue("Title", title);
330
556
  keyValue("Type", type);
331
557
  keyValue("Content length", `${parsed.compiledTruth.length} chars`);
332
558
  }
333
-
559
+
334
560
  await applyEntityLinks(
335
561
  repo,
336
562
  finalSlug,
337
563
  parsed.compiledTruth,
338
564
  jsonOut,
339
565
  );
340
-
566
+
341
567
  if (!jsonOut) {
342
568
  const duration = formatDuration(Date.now() - startTime);
343
569
  success(`Operation completed in ${duration}`);
344
570
  }
345
-
346
- print(program, { ok: true, slug: page.slug, updatedAt: page.updatedAt });
571
+
572
+ print(program, {
573
+ ok: true,
574
+ slug: page.slug,
575
+ updatedAt: page.updatedAt,
576
+ contentHash: hash,
577
+ });
347
578
  });
348
579
  },
349
580
  );
@@ -414,18 +645,18 @@ Examples:
414
645
  await withRepo(program, async (repo) => {
415
646
  const jsonOut = isJson(program);
416
647
  const spinner = createSpinner();
417
-
648
+
418
649
  if (!jsonOut) {
419
650
  header(`Delete: ${slug}`);
420
651
  spinner.start(`Deleting page and related data...`);
421
652
  }
422
-
653
+
423
654
  await repo.deletePage(slug);
424
-
655
+
425
656
  if (!jsonOut) {
426
657
  spinner.succeed(`Page deleted: ${slug}`);
427
658
  }
428
-
659
+
429
660
  print(program, { ok: true, action: "delete", slug });
430
661
  });
431
662
  });
@@ -521,7 +752,7 @@ Examples:
521
752
  await withRepo(program, async (repo) => {
522
753
  const limit = Number(opts.limit ?? 10);
523
754
  const hits = await repo.query(question, limit);
524
-
755
+
525
756
  // If --llm flag, generate answer based on multi-layer context
526
757
  if (opts.llm) {
527
758
  const settings = await loadSettings();
@@ -529,20 +760,20 @@ Examples:
529
760
  print(program, { error: "LLM not configured. Set llm.baseURL in settings." });
530
761
  return;
531
762
  }
532
-
763
+
533
764
  const progress = createProgress();
534
765
  progress.start("Searching knowledge base...");
535
-
766
+
536
767
  const contextLimit = Number(opts.contextLimit ?? 5);
537
768
  const topHits = hits.slice(0, contextLimit);
538
-
769
+
539
770
  if (topHits.length === 0) {
540
771
  progress.stop();
541
772
  process.stderr.write("No relevant pages found.\n");
542
773
  print(program, { answer: "No relevant information found in the knowledge base.", sources: [] });
543
774
  return;
544
775
  }
545
-
776
+
546
777
  // Collect multi-layer context (primary + raw data + linked pages scored by relevance)
547
778
  // ~100KB char budget ≈ 25K tokens, safe for most models
548
779
  const MAX_CONTEXT_CHARS = 100_000;
@@ -552,33 +783,33 @@ Examples:
552
783
  progress.update(`Loading ${stage}...`);
553
784
  });
554
785
  const ctxDuration = formatDuration(Date.now() - ctxStart);
555
-
786
+
556
787
  if (sections.length === 0) {
557
788
  progress.stop();
558
789
  process.stderr.write("No content could be loaded.\n");
559
790
  print(program, { answer: "Failed to load page content.", sources: [] });
560
791
  return;
561
792
  }
562
-
793
+
563
794
  progress.succeed(`Loaded ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s) (${ctxDuration})`);
564
795
  const startTime = Date.now();
565
-
796
+
566
797
  const { answer, ok } = await generateAnswerWithStream(question, sections, stats, settings.llm);
567
-
798
+
568
799
  if (!ok) {
569
800
  // If streaming failed, answer contains the error message
570
801
  console.log(answer);
571
802
  return;
572
803
  }
573
-
804
+
574
805
  const duration = formatDuration(Date.now() - startTime);
575
-
806
+
576
807
  // Show sources breakdown
577
808
  console.log("\n---\n**Sources:**\n");
578
809
  for (let i = 0; i < sections.length; i++) {
579
810
  const s = sections[i];
580
811
  const icon = s.type === 'primary' ? '📄' : s.type === 'raw_data' ? '📎' : '🔗';
581
- console.log(`${icon} ${i + 1}. [[${s.slug}|${s.title}]] ${s.label} (${(s.content.length / 1024).toFixed(1)}KB)`);
812
+ console.log(`${icon} ${i + 1}. [[${s.slug}|${s.title}]] - ${s.label} (${(s.content.length / 1024).toFixed(1)}KB)`);
582
813
  }
583
814
  console.log(`\n*Context: ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)*`);
584
815
  } else {
@@ -762,11 +993,11 @@ Examples:
762
993
  throw new Error(`page not found: ${slug}`);
763
994
  }
764
995
  const settings = await loadSettings();
765
-
996
+
766
997
  const progress = createProgress();
767
998
  progress.start(`Extracting timeline from ${slug}...`);
768
999
  const startTime = Date.now();
769
-
1000
+
770
1001
  const result = await repo.extractAndAddTimeline(
771
1002
  slug,
772
1003
  page.compiledTruth,
@@ -774,16 +1005,16 @@ Examples:
774
1005
  opts.defaultDate ?? new Date().toISOString().slice(0, 10),
775
1006
  settings.llm,
776
1007
  );
777
-
1008
+
778
1009
  const duration = formatDuration(Date.now() - startTime);
779
-
1010
+
780
1011
  if (result.entries.length > 0) {
781
1012
  progress.succeed(`${result.entries.length} events extracted (${duration})`);
782
1013
  } else {
783
1014
  progress.stop();
784
1015
  process.stderr.write(`No events found (${duration})\n`);
785
1016
  }
786
-
1017
+
787
1018
  print(program, {
788
1019
  ok: true,
789
1020
  action: "timeline-extract",
@@ -946,7 +1177,7 @@ Examples:
946
1177
  data = JSON.parse(opts.data);
947
1178
  } else if (opts.stdin) {
948
1179
  const raw = await readMaybeStdin();
949
- if (!raw?.trim()) throw new Error("empty stdin pipe JSON");
1180
+ if (!raw?.trim()) throw new Error("empty stdin - pipe JSON");
950
1181
  data = JSON.parse(raw);
951
1182
  } else {
952
1183
  throw new Error("provide --data <json> or --stdin");
@@ -995,7 +1226,7 @@ Examples:
995
1226
  await withRepo(program, async (repo) => {
996
1227
  const root = resolve(dir);
997
1228
  const files = await collectMarkdownFiles(root);
998
-
1229
+
999
1230
  if (isDryRun(opts)) {
1000
1231
  print(program, {
1001
1232
  dryRun: true,
@@ -1011,16 +1242,16 @@ Examples:
1011
1242
  const settings = await loadSettings();
1012
1243
  const spinner = createSpinner();
1013
1244
  const startTime = Date.now();
1014
-
1245
+
1015
1246
  if (!jsonOut) {
1016
1247
  header(`Import: ${root}`);
1017
1248
  }
1018
-
1249
+
1019
1250
  // Phase 1: Parse all files and collect data
1020
1251
  if (!jsonOut) {
1021
1252
  spinner.start(`Scanning ${files.length} files...`);
1022
1253
  }
1023
-
1254
+
1024
1255
  const fileData: Array<{
1025
1256
  file: string;
1026
1257
  slug: string;
@@ -1030,7 +1261,7 @@ Examples:
1030
1261
  timelineEntries: ReturnType<typeof extractTimelineLines>;
1031
1262
  tags: string[];
1032
1263
  }> = [];
1033
-
1264
+
1034
1265
  for (const file of files) {
1035
1266
  const rawSlug = pathToSlug(file, root);
1036
1267
  const slug = normalizeLongSlug(rawSlug);
@@ -1043,19 +1274,19 @@ Examples:
1043
1274
  : [];
1044
1275
  fileData.push({ file, slug, parsed, content, wikiLinks, timelineEntries, tags });
1045
1276
  }
1046
-
1277
+
1047
1278
  if (!jsonOut) {
1048
1279
  spinner.succeed(`Found ${files.length} markdown files`);
1049
1280
  }
1050
-
1281
+
1051
1282
  // Phase 2: Write all pages first (skip embed for performance)
1052
1283
  if (!jsonOut) {
1053
1284
  spinner.start(`Writing ${fileData.length} pages to database...`);
1054
1285
  }
1055
-
1286
+
1056
1287
  const allSlugs: string[] = [];
1057
1288
  const writeErrors: string[] = [];
1058
-
1289
+
1059
1290
  for (let i = 0; i < fileData.length; i++) {
1060
1291
  const { slug, parsed } = fileData[i]!;
1061
1292
  if (!jsonOut && i % 20 === 0) {
@@ -1075,7 +1306,7 @@ Examples:
1075
1306
  writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
1076
1307
  }
1077
1308
  }
1078
-
1309
+
1079
1310
  if (!jsonOut) {
1080
1311
  spinner.succeed(`Wrote ${allSlugs.length} pages to database`);
1081
1312
  if (writeErrors.length > 0) {
@@ -1088,16 +1319,16 @@ Examples:
1088
1319
  }
1089
1320
  }
1090
1321
  }
1091
-
1322
+
1092
1323
  // Phase 3: Parallel entity extraction (main optimization)
1093
1324
  const BATCH_SIZE = 10;
1094
1325
  const entityResults = new Map<string, Awaited<ReturnType<typeof extractRelations>>>();
1095
-
1326
+
1096
1327
  if (settings.llm.baseURL) {
1097
1328
  if (!jsonOut) {
1098
1329
  spinner.start(`Extracting entities with LLM...`);
1099
1330
  }
1100
-
1331
+
1101
1332
  for (let i = 0; i < fileData.length; i += BATCH_SIZE) {
1102
1333
  const batch = fileData.slice(i, i + BATCH_SIZE);
1103
1334
  if (!jsonOut) {
@@ -1112,7 +1343,7 @@ Examples:
1112
1343
  entityResults.set(slug, relations);
1113
1344
  }
1114
1345
  }
1115
-
1346
+
1116
1347
  if (!jsonOut) {
1117
1348
  spinner.succeed(`Entity extraction complete`);
1118
1349
  }
@@ -1121,17 +1352,17 @@ Examples:
1121
1352
  warning(`LLM not configured, skipping entity extraction`);
1122
1353
  }
1123
1354
  }
1124
-
1355
+
1125
1356
  // Phase 4: Write links, tags, timeline, and entity pages
1126
1357
  if (!jsonOut) {
1127
1358
  spinner.start(`Creating links, tags, and timeline entries...`);
1128
1359
  }
1129
-
1360
+
1130
1361
  let linkCount = 0;
1131
1362
  let timelineCount = 0;
1132
1363
  let entityCount = 0;
1133
1364
  let tagCount = 0;
1134
-
1365
+
1135
1366
  // Collect timeline entries for batch insert
1136
1367
  const allTimelineEntries: Array<{
1137
1368
  pageSlug: string;
@@ -1140,14 +1371,14 @@ Examples:
1140
1371
  summary: string;
1141
1372
  detail: string;
1142
1373
  }> = [];
1143
-
1374
+
1144
1375
  for (const { slug, wikiLinks, timelineEntries, tags, content } of fileData) {
1145
1376
  // Wiki links
1146
1377
  for (const link of wikiLinks) {
1147
1378
  await repo.link(slug, link, "import");
1148
1379
  linkCount++;
1149
1380
  }
1150
-
1381
+
1151
1382
  // Collect timeline entries for batch insert
1152
1383
  for (const entry of timelineEntries) {
1153
1384
  allTimelineEntries.push({
@@ -1159,13 +1390,13 @@ Examples:
1159
1390
  });
1160
1391
  timelineCount++;
1161
1392
  }
1162
-
1393
+
1163
1394
  // Tags
1164
1395
  for (const tag of tags) {
1165
1396
  await repo.tag(slug, tag);
1166
1397
  tagCount++;
1167
1398
  }
1168
-
1399
+
1169
1400
  // Entity links from parallel extraction
1170
1401
  const relations = entityResults.get(slug);
1171
1402
  if (relations && relations.length > 0) {
@@ -1175,12 +1406,12 @@ Examples:
1175
1406
  const toCandidate = entityToSlug(r.to.name, r.to.type);
1176
1407
  const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
1177
1408
  const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
1178
-
1409
+
1179
1410
  const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
1180
1411
  const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
1181
1412
  if (c1) entityCount++;
1182
1413
  if (c2) entityCount++;
1183
-
1414
+
1184
1415
  await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
1185
1416
  await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
1186
1417
  await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
@@ -1188,16 +1419,16 @@ Examples:
1188
1419
  }
1189
1420
  }
1190
1421
  }
1191
-
1422
+
1192
1423
  // Batch insert all timeline entries
1193
1424
  if (allTimelineEntries.length > 0) {
1194
1425
  await repo.timelineAddBatch(allTimelineEntries);
1195
1426
  }
1196
-
1427
+
1197
1428
  if (!jsonOut) {
1198
1429
  spinner.succeed(`Created links, tags, and timeline`);
1199
1430
  }
1200
-
1431
+
1201
1432
  // Phase 5: Batch sync all pages to search index
1202
1433
  if (opts.skipIndex) {
1203
1434
  if (!jsonOut) {
@@ -1208,14 +1439,14 @@ Examples:
1208
1439
  spinner.start(`Indexing ${allSlugs.length} pages for search...`);
1209
1440
  }
1210
1441
  await repo.embedAll();
1211
-
1442
+
1212
1443
  if (!jsonOut) {
1213
1444
  spinner.succeed(`Search indexing complete`);
1214
1445
  }
1215
1446
  }
1216
-
1447
+
1217
1448
  const duration = formatDuration(Date.now() - startTime);
1218
-
1449
+
1219
1450
  if (!jsonOut) {
1220
1451
  // Print summary
1221
1452
  header("Import Summary");
@@ -1226,12 +1457,12 @@ Examples:
1226
1457
  keyValue("Timeline entries", String(timelineCount));
1227
1458
  keyValue("Tags added", String(tagCount));
1228
1459
  keyValue("Duration", duration);
1229
-
1460
+
1230
1461
  if (writeErrors.length > 0) {
1231
1462
  warning(`${writeErrors.length} pages had errors`);
1232
1463
  }
1233
1464
  }
1234
-
1465
+
1235
1466
  print(program, {
1236
1467
  ok: true,
1237
1468
  importedFiles: files.length,
@@ -1279,116 +1510,6 @@ Examples:
1279
1510
  });
1280
1511
  });
1281
1512
 
1282
- // -- ingest ---------------------------------------------------------------
1283
-
1284
- addDryRun(
1285
- program
1286
- .command("ingest")
1287
- .argument("[file]", "file path to ingest (omit for stdin)")
1288
- .option("--type <type>", "source type", "doc")
1289
- .option("--stdin", "read from stdin", false)
1290
- .description("ingest a file as a new page (under ingest/<name>)")
1291
- .addHelpText(
1292
- "after",
1293
- `
1294
- Examples:
1295
- ebrain ingest report.pdf --type pdf
1296
- cat article.md | ebrain ingest --stdin --type article
1297
- ebrain ingest report.pdf --type pdf --dry-run
1298
- `,
1299
- ),
1300
- ).action(
1301
- async (
1302
- file: string | undefined,
1303
- opts: { type?: string; stdin?: boolean; dryRun?: boolean },
1304
- ) => {
1305
- let content: string;
1306
- let fileName: string;
1307
-
1308
- if (file) {
1309
- const fullPath = resolve(file);
1310
- if (!(await fileExists(fullPath))) {
1311
- throw new Error(`file not found: ${file}`);
1312
- }
1313
- content = await readTextFile(fullPath);
1314
- fileName = basename(fullPath);
1315
- } else if (opts.stdin) {
1316
- const raw = await readMaybeStdin();
1317
- if (!raw?.trim()) throw new Error("empty stdin — pipe content");
1318
- content = raw;
1319
- fileName = "stdin";
1320
- } else {
1321
- throw new Error("provide <file> or --stdin");
1322
- }
1323
-
1324
- const slug = `ingest/${fileName.replace(/\.[^.]+$/, "")}`;
1325
- const type = opts.type ?? "doc";
1326
-
1327
- if (isDryRun(opts)) {
1328
- print(program, {
1329
- dryRun: true,
1330
- action: "ingest",
1331
- slug,
1332
- type,
1333
- contentLength: content.length,
1334
- });
1335
- return;
1336
- }
1337
-
1338
- await withRepo(program, async (repo) => {
1339
- const jsonOut = isJson(program);
1340
- const spinner = createSpinner();
1341
- const startTime = Date.now();
1342
-
1343
- if (!jsonOut) {
1344
- header(`Ingest: ${fileName}`);
1345
- spinner.start(`Creating page from file...`);
1346
- }
1347
-
1348
- await repo.putPage({
1349
- slug,
1350
- type,
1351
- title: slugToTitle(slug),
1352
- compiledTruth: content,
1353
- timeline: "",
1354
- frontmatter: {
1355
- sourceFile: resolve(fileName),
1356
- sourceType: type,
1357
- },
1358
- });
1359
-
1360
- if (!jsonOut) {
1361
- spinner.succeed(`Page created: ${slug}`);
1362
- keyValue("Source file", fileName);
1363
- keyValue("Type", type);
1364
- keyValue("Content length", `${content.length} chars`);
1365
- }
1366
-
1367
- await repo.timelineAdd({
1368
- pageSlug: slug,
1369
- date: new Date().toISOString().slice(0, 10),
1370
- source: type,
1371
- summary: `Ingested file ${fileName}`,
1372
- detail: "",
1373
- });
1374
-
1375
- await applyEntityLinks(
1376
- repo,
1377
- slug,
1378
- content,
1379
- jsonOut,
1380
- );
1381
-
1382
- if (!jsonOut) {
1383
- const duration = formatDuration(Date.now() - startTime);
1384
- success(`Ingestion completed in ${duration}`);
1385
- }
1386
-
1387
- print(program, { ok: true, action: "ingest", slug });
1388
- });
1389
- },
1390
- );
1391
-
1392
1513
  // -- embed ----------------------------------------------------------------
1393
1514
 
1394
1515
  addDryRun(
@@ -1428,26 +1549,26 @@ Examples:
1428
1549
  const jsonOut = isJson(program);
1429
1550
  const spinner = createSpinner();
1430
1551
  const startTime = Date.now();
1431
-
1552
+
1432
1553
  if (!jsonOut) {
1433
1554
  header("Embed All Pages");
1434
1555
  spinner.start(`Loading pages...`);
1435
1556
  }
1436
-
1557
+
1437
1558
  const pages = await repo.listPages({ limit: 100000 });
1438
-
1559
+
1439
1560
  if (!jsonOut) {
1440
1561
  spinner.update(`Embedding ${pages.length} pages...`);
1441
1562
  }
1442
-
1563
+
1443
1564
  const count = await repo.embedAll();
1444
-
1565
+
1445
1566
  if (!jsonOut) {
1446
1567
  const duration = formatDuration(Date.now() - startTime);
1447
1568
  spinner.succeed(`Embedded ${count} pages`);
1448
1569
  keyValue("Duration", duration);
1449
1570
  }
1450
-
1571
+
1451
1572
  print(program, { embedded: count, mode: "all" });
1452
1573
  });
1453
1574
  return;
@@ -1462,18 +1583,18 @@ Examples:
1462
1583
  await withRepo(program, async (repo) => {
1463
1584
  const jsonOut = isJson(program);
1464
1585
  const spinner = createSpinner();
1465
-
1586
+
1466
1587
  if (!jsonOut) {
1467
1588
  header(`Embed: ${slug}`);
1468
1589
  spinner.start(`Generating embedding for page...`);
1469
1590
  }
1470
-
1591
+
1471
1592
  await repo.syncPageToSearch(slug);
1472
-
1593
+
1473
1594
  if (!jsonOut) {
1474
1595
  spinner.succeed(`Page embedded: ${slug}`);
1475
1596
  }
1476
-
1597
+
1477
1598
  print(program, { embedded: 1, slug });
1478
1599
  });
1479
1600
  },
@@ -1483,27 +1604,106 @@ Examples:
1483
1604
 
1484
1605
  program
1485
1606
  .command("init")
1486
- .description("initialize the ebrain database")
1607
+ .description("initialize ebrain: create config, database, and show setup guide")
1487
1608
  .addHelpText(
1488
1609
  "after",
1489
1610
  `
1490
1611
  Examples:
1491
1612
  ebrain init
1613
+ ebrain init --db ./my.db
1492
1614
  `,
1493
1615
  )
1494
1616
  .action(async () => {
1495
- await withRepo(program, async () => {
1496
- const settings = await loadSettings();
1497
- const dbPath = program.opts().db ?? settings.dbPath;
1498
-
1499
- success(`Database initialized`);
1500
- keyValue("Path", dbPath);
1501
-
1502
- print(program, {
1503
- ok: true,
1504
- dbPath,
1505
- });
1617
+ const jsonOut = isJson(program);
1618
+ const settings = await loadSettings();
1619
+ const cliDb = program.opts().db;
1620
+ const dbPath = cliDb ?? settings.dbPath;
1621
+
1622
+ if (!jsonOut) {
1623
+ header("ebrain init");
1624
+ }
1625
+
1626
+ // Step 1: Create settings.json if it doesn't exist
1627
+ const { createDefaultSettings } = await import("../settings");
1628
+ const settingsCreated = await createDefaultSettings();
1629
+
1630
+ if (!jsonOut) {
1631
+ if (settingsCreated) {
1632
+ success(`Created config: ${SETTINGS_PATH}`);
1633
+ } else {
1634
+ success(`Config already exists: ${SETTINGS_PATH}`);
1635
+ }
1636
+ }
1637
+
1638
+ // Step 2: Check or initialize database
1639
+ const dbExists = await fileExists(dbPath);
1640
+ let dbInitialized = false;
1641
+
1642
+ if (dbExists) {
1643
+ // Database already exists, skip connection attempt to avoid
1644
+ // noisy errors (e.g. embedding function key mismatch)
1645
+ if (!jsonOut) {
1646
+ success(`Database already exists: ${dbPath}`);
1647
+ }
1648
+ dbInitialized = true;
1649
+ } else {
1650
+ // Try to create it without collection - embedding config may not be ready
1651
+ try {
1652
+ const db = await BrainDb.connect(dbPath, settings, { skipCollection: true });
1653
+ await db.close();
1654
+ await new Promise((r) => setTimeout(r, 200));
1655
+ dbInitialized = true;
1656
+ if (!jsonOut) {
1657
+ success(`Database initialized: ${dbPath}`);
1658
+ }
1659
+ } catch {
1660
+ if (!jsonOut) {
1661
+ warning(`Database will be auto-created on first use`);
1662
+ }
1663
+ }
1664
+ }
1665
+
1666
+ // Step 3: Show setup guide
1667
+ if (!jsonOut) {
1668
+ console.log("");
1669
+ separator();
1670
+ info("Quick Start Guide");
1671
+ console.log("");
1672
+
1673
+ subItem("1. Configure LLM (for AI queries):", 0);
1674
+ subItem(` Edit ${SETTINGS_PATH}`, 4);
1675
+ subItem(` Set llm.baseURL to your OpenAI-compatible API endpoint`, 4);
1676
+ subItem(` Set llm.apiKey or export DASHSCOPE_API_KEY`, 4);
1677
+ console.log("");
1678
+
1679
+ subItem("2. Add your first page:", 0);
1680
+ subItem(" echo '# Hello' | ebrain put hello --stdin", 4);
1681
+ console.log("");
1682
+
1683
+ subItem("3. Import a directory of markdown files:", 0);
1684
+ subItem(" ebrain import ./docs", 4);
1685
+ console.log("");
1686
+
1687
+ subItem("4. Query with AI:", 0);
1688
+ subItem(' ebrain query "What did we ship in Q4?" --llm', 4);
1689
+ console.log("");
1690
+
1691
+ subItem("5. Visualize your knowledge graph:", 0);
1692
+ subItem(" ebrain graph", 4);
1693
+ console.log("");
1694
+
1695
+ separator();
1696
+ }
1697
+
1698
+ print(program, {
1699
+ ok: true,
1700
+ settingsPath: SETTINGS_PATH,
1701
+ settingsCreated,
1702
+ dbPath,
1703
+ dbInitialized,
1506
1704
  });
1705
+
1706
+ process.exit(0);
1507
1707
  });
1508
1708
 
1509
1709
  program
@@ -1521,7 +1721,7 @@ Examples:
1521
1721
  await withRepo(program, async (repo) => {
1522
1722
  const jsonOut = isJson(program);
1523
1723
  const stats = await repo.stats();
1524
-
1724
+
1525
1725
  if (!jsonOut) {
1526
1726
  header("Knowledge Base Statistics");
1527
1727
  keyValue("Pages", String(stats.pages));
@@ -1530,7 +1730,7 @@ Examples:
1530
1730
  keyValue("Timeline entries", String(stats.timelineEntries));
1531
1731
  keyValue("Raw data rows", String(stats.rawRows));
1532
1732
  }
1533
-
1733
+
1534
1734
  print(program, stats);
1535
1735
  });
1536
1736
  });
@@ -1591,7 +1791,7 @@ async function withRepo(
1591
1791
  const db = await BrainDb.connect(dbPath, settings);
1592
1792
  const repo = new BrainRepository(db);
1593
1793
  await callback(repo);
1594
-
1794
+
1595
1795
  // Gracefully close database
1596
1796
  // Note: seekdb SDK's InternalEmbeddedClient.close() is empty in embedded mode
1597
1797
  // Data may not flush properly. Use remote seekdb server for reliability.
@@ -1600,10 +1800,10 @@ async function withRepo(
1600
1800
  } catch (e) {
1601
1801
  // Close may fail due to seekdb native bug
1602
1802
  }
1603
-
1803
+
1604
1804
  // Give seekdb extra time after close
1605
1805
  await new Promise((r) => setTimeout(r, 500));
1606
-
1806
+
1607
1807
  // CLI: force exit to bypass seekdb native cleanup segfault
1608
1808
  process.exit(0);
1609
1809
  }
@@ -1646,7 +1846,7 @@ function normalizeLinkSlug(path: string): string {
1646
1846
  }
1647
1847
 
1648
1848
  // ---------------------------------------------------------------------------
1649
- // LLM Answer Generation Multi-layer Context Collection
1849
+ // LLM Answer Generation - Multi-layer Context Collection
1650
1850
  // ---------------------------------------------------------------------------
1651
1851
 
1652
1852
  /** A single section of context for the LLM prompt. */
@@ -1661,12 +1861,12 @@ interface ContextSection {
1661
1861
 
1662
1862
  /**
1663
1863
  * Collect multi-layer context for LLM answer generation.
1664
- *
1864
+ *
1665
1865
  * Layers (in priority order):
1666
1866
  * 1. Primary: compiledTruth + timeline of each hit page
1667
1867
  * 2. Raw data: original documents stored via raw.set
1668
1868
  * 3. Linked pages: compiledTruth of pages linked to/from hit pages
1669
- *
1869
+ *
1670
1870
  * Budget is enforced via total character limit.
1671
1871
  */
1672
1872
  async function collectContextForLLM(
@@ -1765,8 +1965,8 @@ async function collectContextForLLM(
1765
1965
  }
1766
1966
  }
1767
1967
 
1768
- // Layer 3: Linked pages score using cached data + keyword matching
1769
- // No second repo.query() call needed reuse hits scores + keyword fallback
1968
+ // Layer 3: Linked pages - score using cached data + keyword matching
1969
+ // No second repo.query() call needed - reuse hits scores + keyword fallback
1770
1970
  onProgress?.('linked pages');
1771
1971
  const allLinkedSlugs = new Set<string>();
1772
1972
  for (const hit of hits) {
@@ -1872,7 +2072,7 @@ async function collectContextForLLM(
1872
2072
  function computeKeywordRelevance(text: string, question: string): number {
1873
2073
  const STOP_CHARS = new Set('的是了在和我有你就这不人都说上个大国为到以们年会生地要主中子自实家小对多能好可很所把当');
1874
2074
  const questionChars = [...question]
1875
- .filter(c => !/\s|[,,。!?、;::""''()()【】\[\]{}<>\/\\|~`@#$%^&*+=_-]/.test(c) && !STOP_CHARS.has(c));
2075
+ .filter(c => !/\s|[,,。!?、;::""''()()【】\[\]{}<>\/\\|~`@#$%^&*+=_-]/.test(c) && !STOP_CHARS.has(c));
1876
2076
  if (questionChars.length === 0) return 0;
1877
2077
 
1878
2078
  const uniqueChars = new Set(questionChars);
@@ -1923,7 +2123,7 @@ async function generateAnswerWithStream(
1923
2123
  contextParts.push(`## ${header}\n`);
1924
2124
  for (const s of group) {
1925
2125
  sectionIndex++;
1926
- contextParts.push(`### [${sectionIndex}] ${s.title} ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
2126
+ contextParts.push(`### [${sectionIndex}] ${s.title} - ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
1927
2127
  }
1928
2128
  contextParts.push('');
1929
2129
  }
@@ -1934,7 +2134,7 @@ async function generateAnswerWithStream(
1934
2134
 
1935
2135
  const context = contextParts.join('\n');
1936
2136
 
1937
- const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
2137
+ const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
1938
2138
 
1939
2139
  ## 问题
1940
2140
  ${question}
@@ -1944,13 +2144,13 @@ ${question}
1944
2144
  ${context}
1945
2145
 
1946
2146
  ## 回答要求
1947
- - 仅基于提供的知识库内容回答,不要编造信息
1948
- - 如果知识库中没有相关信息,请明确说明
2147
+ - 仅基于提供的知识库内容回答,不要编造信息
2148
+ - 如果知识库中没有相关信息,请明确说明
1949
2149
  - 引用来源时使用 [[slug|标题]] 的格式
1950
2150
  - 使用清晰的 markdown 格式
1951
- - 如果涉及时间线信息,请在回答中体现
2151
+ - 如果涉及时间线信息,请在回答中体现
1952
2152
  - 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
1953
- - 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
2153
+ - 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
1954
2154
 
1955
2155
  ## 回答`;
1956
2156
 
@@ -1965,10 +2165,10 @@ ${context}
1965
2165
 
1966
2166
  try {
1967
2167
  const url = llm.baseURL.endsWith("/") ? llm.baseURL + "chat/completions" : llm.baseURL + "/chat/completions";
1968
-
2168
+
1969
2169
  // Show thinking indicator while waiting for first token
1970
2170
  process.stderr.write(`\x1b[35m💭\x1b[0m \x1b[2mConnecting to ${llm.model}...\x1b[0m\n`);
1971
-
2171
+
1972
2172
  const resp = await fetch(
1973
2173
  url,
1974
2174
  {
@@ -1983,7 +2183,7 @@ ${context}
1983
2183
  messages: [
1984
2184
  {
1985
2185
  role: "system",
1986
- content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
2186
+ content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
1987
2187
  },
1988
2188
  { role: "user", content: prompt },
1989
2189
  ],
@@ -2092,7 +2292,7 @@ async function generateAnswerWithContext(
2092
2292
  contextParts.push(`## ${header}\n`);
2093
2293
  for (const s of group) {
2094
2294
  sectionIndex++;
2095
- contextParts.push(`### [${sectionIndex}] ${s.title} ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
2295
+ contextParts.push(`### [${sectionIndex}] ${s.title} - ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
2096
2296
  }
2097
2297
  contextParts.push('');
2098
2298
  }
@@ -2103,7 +2303,7 @@ async function generateAnswerWithContext(
2103
2303
 
2104
2304
  const context = contextParts.join('\n');
2105
2305
 
2106
- const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
2306
+ const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
2107
2307
 
2108
2308
  ## 问题
2109
2309
  ${question}
@@ -2113,13 +2313,13 @@ ${question}
2113
2313
  ${context}
2114
2314
 
2115
2315
  ## 回答要求
2116
- - 仅基于提供的知识库内容回答,不要编造信息
2117
- - 如果知识库中没有相关信息,请明确说明
2316
+ - 仅基于提供的知识库内容回答,不要编造信息
2317
+ - 如果知识库中没有相关信息,请明确说明
2118
2318
  - 引用来源时使用 [[slug|标题]] 的格式
2119
2319
  - 使用清晰的 markdown 格式
2120
- - 如果涉及时间线信息,请在回答中体现
2320
+ - 如果涉及时间线信息,请在回答中体现
2121
2321
  - 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
2122
- - 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
2322
+ - 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
2123
2323
 
2124
2324
  ## 回答`;
2125
2325
 
@@ -2137,7 +2337,7 @@ ${context}
2137
2337
  messages: [
2138
2338
  {
2139
2339
  role: "system",
2140
- content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
2340
+ content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
2141
2341
  },
2142
2342
  { role: "user", content: prompt },
2143
2343
  ],