ex-brain 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -0
- package/package.json +2 -1
- package/src/ai/compiler.ts +18 -53
- package/src/ai/entity-link.ts +31 -62
- package/src/ai/llm-client.ts +291 -0
- package/src/ai/timeline-extractor.ts +29 -62
- package/src/commands/index.ts +612 -86
- package/src/db/client.ts +121 -15
- package/src/db/errors.ts +178 -0
- package/src/db/schema.ts +1 -0
- package/src/mcp/server.ts +400 -237
- package/src/repositories/brain-repo.ts +576 -358
- package/src/settings.ts +23 -2
- package/src/types/index.ts +1 -0
- package/src/utils/cli-output.ts +569 -0
- package/src/utils/query-sanitizer.ts +63 -0
package/src/commands/index.ts
CHANGED
|
@@ -20,10 +20,23 @@ import {
|
|
|
20
20
|
} from "../markdown/parser";
|
|
21
21
|
import { BrainRepository } from "../repositories/brain-repo";
|
|
22
22
|
import { loadSettings, SETTINGS_PATH, DEFAULT_DB_PATH, type ResolvedLLM } from "../settings";
|
|
23
|
-
import { extractRelations, entityToSlug, EntityType } from "../ai/entity-link";
|
|
23
|
+
import { extractRelations, entityToSlug, type EntityType } from "../ai/entity-link";
|
|
24
24
|
import { registerCompileCommands } from "./compile-cmd";
|
|
25
25
|
import { registerGraphCommand } from "./graph-cmd";
|
|
26
26
|
import { createProgress, formatDuration } from "../utils/progress";
|
|
27
|
+
import {
|
|
28
|
+
success,
|
|
29
|
+
error as cliError,
|
|
30
|
+
warning,
|
|
31
|
+
info,
|
|
32
|
+
step,
|
|
33
|
+
subItem,
|
|
34
|
+
keyValue,
|
|
35
|
+
header,
|
|
36
|
+
createSpinner,
|
|
37
|
+
formatCount,
|
|
38
|
+
type ProgressSpinner,
|
|
39
|
+
} from "../utils/cli-output";
|
|
27
40
|
|
|
28
41
|
// ---------------------------------------------------------------------------
|
|
29
42
|
// Helpers
|
|
@@ -59,32 +72,46 @@ async function applyEntityLinks(
|
|
|
59
72
|
const settings = await loadSettings();
|
|
60
73
|
if (!settings.llm.baseURL) {
|
|
61
74
|
if (!json) {
|
|
62
|
-
|
|
75
|
+
warning(`LLM not configured, skipping entity extraction for ${sourceSlug}`);
|
|
63
76
|
}
|
|
64
77
|
return { created: 0, linked: 0 };
|
|
65
78
|
}
|
|
66
79
|
|
|
67
|
-
const
|
|
80
|
+
const spinner = createSpinner();
|
|
68
81
|
if (!json) {
|
|
69
|
-
|
|
82
|
+
spinner.start(`Extracting entities from ${sourceSlug}...`);
|
|
70
83
|
}
|
|
71
84
|
|
|
72
85
|
const startTime = Date.now();
|
|
73
|
-
|
|
86
|
+
let relations;
|
|
87
|
+
try {
|
|
88
|
+
relations = await extractRelations(content, settings.llm);
|
|
89
|
+
} catch (err) {
|
|
90
|
+
if (!json) {
|
|
91
|
+
spinner.fail(`Entity extraction failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
92
|
+
}
|
|
93
|
+
return { created: 0, linked: 0 };
|
|
94
|
+
}
|
|
74
95
|
|
|
75
96
|
// Filter by confidence
|
|
76
|
-
const
|
|
97
|
+
const confidenceThreshold = settings.extraction.confidenceThreshold;
|
|
98
|
+
const highConfidence = relations.filter((r) => r.confidence >= confidenceThreshold);
|
|
77
99
|
const ignoredCount = relations.length - highConfidence.length;
|
|
78
100
|
|
|
79
101
|
if (highConfidence.length === 0) {
|
|
80
102
|
if (!json) {
|
|
81
|
-
|
|
103
|
+
if (relations.length > 0) {
|
|
104
|
+
spinner.warn(`Found ${relations.length} entities but all below confidence threshold (${confidenceThreshold})`);
|
|
105
|
+
} else {
|
|
106
|
+
spinner.warn(`No entities found in content`);
|
|
107
|
+
}
|
|
82
108
|
}
|
|
83
109
|
return { created: 0, linked: 0 };
|
|
84
110
|
}
|
|
85
111
|
|
|
86
112
|
let created = 0;
|
|
87
113
|
let linked = 0;
|
|
114
|
+
const details: string[] = [];
|
|
88
115
|
|
|
89
116
|
for (const r of highConfidence) {
|
|
90
117
|
// 1. Resolve entity slugs (disambiguation)
|
|
@@ -97,8 +124,8 @@ async function applyEntityLinks(
|
|
|
97
124
|
// 2. Ensure entity pages exist
|
|
98
125
|
const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, sourceSlug);
|
|
99
126
|
const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, sourceSlug);
|
|
100
|
-
if (c1) created += 1;
|
|
101
|
-
if (c2) created += 1;
|
|
127
|
+
if (c1) { created += 1; details.push(`Created: ${r.from.name} (${r.from.type})`); }
|
|
128
|
+
if (c2) { created += 1; details.push(`Created: ${r.to.name} (${r.to.type})`); }
|
|
102
129
|
|
|
103
130
|
// 3. Link between entities (context includes relation type)
|
|
104
131
|
await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
|
|
@@ -113,8 +140,16 @@ async function applyEntityLinks(
|
|
|
113
140
|
|
|
114
141
|
if (!json) {
|
|
115
142
|
const duration = formatDuration(Date.now() - startTime);
|
|
116
|
-
const entityNames = highConfidence.flatMap((r) => [r.from.name, r.to.name]);
|
|
117
|
-
|
|
143
|
+
const entityNames = [...new Set(highConfidence.flatMap((r) => [r.from.name, r.to.name]))];
|
|
144
|
+
spinner.succeed(`Extracted ${entityNames.length} entities: ${entityNames.join(", ")}`);
|
|
145
|
+
|
|
146
|
+
// Print detailed info
|
|
147
|
+
subItem(`${created} entity pages created`);
|
|
148
|
+
subItem(`${linked} links added`);
|
|
149
|
+
if (ignoredCount > 0) {
|
|
150
|
+
subItem(`${ignoredCount} low-confidence relations ignored`);
|
|
151
|
+
}
|
|
152
|
+
subItem(`Completed in ${duration}`);
|
|
118
153
|
}
|
|
119
154
|
|
|
120
155
|
return { created, linked };
|
|
@@ -267,6 +302,15 @@ Examples:
|
|
|
267
302
|
}
|
|
268
303
|
|
|
269
304
|
await withRepo(program, async (repo) => {
|
|
305
|
+
const jsonOut = isJson(program);
|
|
306
|
+
const spinner = createSpinner();
|
|
307
|
+
const startTime = Date.now();
|
|
308
|
+
|
|
309
|
+
if (!jsonOut) {
|
|
310
|
+
header(`Put: ${finalSlug}`);
|
|
311
|
+
spinner.start(`Creating/updating page...`);
|
|
312
|
+
}
|
|
313
|
+
|
|
270
314
|
const page = await repo.putPage({
|
|
271
315
|
slug: finalSlug,
|
|
272
316
|
type,
|
|
@@ -275,12 +319,26 @@ Examples:
|
|
|
275
319
|
timeline: parsed.timeline,
|
|
276
320
|
frontmatter: parsed.frontmatter,
|
|
277
321
|
});
|
|
322
|
+
|
|
323
|
+
if (!jsonOut) {
|
|
324
|
+
spinner.succeed(`Page saved: ${page.slug}`);
|
|
325
|
+
keyValue("Title", title);
|
|
326
|
+
keyValue("Type", type);
|
|
327
|
+
keyValue("Content length", `${parsed.compiledTruth.length} chars`);
|
|
328
|
+
}
|
|
329
|
+
|
|
278
330
|
await applyEntityLinks(
|
|
279
331
|
repo,
|
|
280
332
|
finalSlug,
|
|
281
333
|
parsed.compiledTruth,
|
|
282
|
-
|
|
334
|
+
jsonOut,
|
|
283
335
|
);
|
|
336
|
+
|
|
337
|
+
if (!jsonOut) {
|
|
338
|
+
const duration = formatDuration(Date.now() - startTime);
|
|
339
|
+
success(`Operation completed in ${duration}`);
|
|
340
|
+
}
|
|
341
|
+
|
|
284
342
|
print(program, { ok: true, slug: page.slug, updatedAt: page.updatedAt });
|
|
285
343
|
});
|
|
286
344
|
},
|
|
@@ -350,7 +408,20 @@ Examples:
|
|
|
350
408
|
return;
|
|
351
409
|
}
|
|
352
410
|
await withRepo(program, async (repo) => {
|
|
411
|
+
const jsonOut = isJson(program);
|
|
412
|
+
const spinner = createSpinner();
|
|
413
|
+
|
|
414
|
+
if (!jsonOut) {
|
|
415
|
+
header(`Delete: ${slug}`);
|
|
416
|
+
spinner.start(`Deleting page and related data...`);
|
|
417
|
+
}
|
|
418
|
+
|
|
353
419
|
await repo.deletePage(slug);
|
|
420
|
+
|
|
421
|
+
if (!jsonOut) {
|
|
422
|
+
spinner.succeed(`Page deleted: ${slug}`);
|
|
423
|
+
}
|
|
424
|
+
|
|
354
425
|
print(program, { ok: true, action: "delete", slug });
|
|
355
426
|
});
|
|
356
427
|
});
|
|
@@ -447,7 +518,7 @@ Examples:
|
|
|
447
518
|
const limit = Number(opts.limit ?? 10);
|
|
448
519
|
const hits = await repo.query(question, limit);
|
|
449
520
|
|
|
450
|
-
// If --llm flag, generate answer based on context
|
|
521
|
+
// If --llm flag, generate answer based on multi-layer context
|
|
451
522
|
if (opts.llm) {
|
|
452
523
|
const settings = await loadSettings();
|
|
453
524
|
if (!settings.llm.baseURL) {
|
|
@@ -458,35 +529,48 @@ Examples:
|
|
|
458
529
|
const progress = createProgress();
|
|
459
530
|
progress.start("Searching knowledge base...");
|
|
460
531
|
|
|
461
|
-
// Use excerpts from hits as context (avoids extra DB queries that cause segfault)
|
|
462
532
|
const contextLimit = Number(opts.contextLimit ?? 5);
|
|
463
533
|
const topHits = hits.slice(0, contextLimit);
|
|
464
534
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
}
|
|
535
|
+
if (topHits.length === 0) {
|
|
536
|
+
progress.stop();
|
|
537
|
+
process.stderr.write("No relevant pages found.\n");
|
|
538
|
+
print(program, { answer: "No relevant information found in the knowledge base.", sources: [] });
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// Collect multi-layer context (primary + raw data + linked pages scored by relevance)
|
|
543
|
+
progress.update(`Loading pages, raw documents, and linked content...`);
|
|
544
|
+
// ~100KB char budget ≈ 25K tokens, safe for most models
|
|
545
|
+
const MAX_CONTEXT_CHARS = 100_000;
|
|
546
|
+
const { sections, totalChars, stats } = await collectContextForLLM(repo, topHits, question, MAX_CONTEXT_CHARS);
|
|
547
|
+
|
|
548
|
+
if (sections.length === 0) {
|
|
549
|
+
progress.stop();
|
|
550
|
+
process.stderr.write("No content could be loaded.\n");
|
|
551
|
+
print(program, { answer: "Failed to load page content.", sources: [] });
|
|
552
|
+
return;
|
|
553
|
+
}
|
|
471
554
|
|
|
472
|
-
progress.update(
|
|
555
|
+
progress.update(`Generating answer from ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)...`);
|
|
473
556
|
const startTime = Date.now();
|
|
474
557
|
|
|
475
|
-
const answer = await
|
|
558
|
+
const answer = await generateAnswerWithContext(question, sections, stats, settings.llm);
|
|
476
559
|
|
|
477
560
|
const duration = formatDuration(Date.now() - startTime);
|
|
478
|
-
progress.succeed(`Answer generated (${duration})`);
|
|
561
|
+
progress.succeed(`Answer generated (${duration}, context: ${(totalChars / 1024).toFixed(1)}KB)`);
|
|
479
562
|
|
|
480
|
-
// Output markdown
|
|
563
|
+
// Output answer as markdown
|
|
481
564
|
console.log("\n" + answer);
|
|
482
565
|
|
|
483
|
-
// Show sources
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
});
|
|
566
|
+
// Show sources breakdown
|
|
567
|
+
console.log("\n---\n**Sources:**\n");
|
|
568
|
+
for (let i = 0; i < sections.length; i++) {
|
|
569
|
+
const s = sections[i];
|
|
570
|
+
const icon = s.type === 'primary' ? '📄' : s.type === 'raw_data' ? '📎' : '🔗';
|
|
571
|
+
console.log(`${icon} ${i + 1}. [[${s.slug}|${s.title}]] — ${s.label} (${(s.content.length / 1024).toFixed(1)}KB)`);
|
|
489
572
|
}
|
|
573
|
+
console.log(`\n*Context: ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)*`);
|
|
490
574
|
} else {
|
|
491
575
|
print(program, hits);
|
|
492
576
|
}
|
|
@@ -887,18 +971,21 @@ Examples:
|
|
|
887
971
|
.command("import")
|
|
888
972
|
.argument("<dir>", "directory of markdown files")
|
|
889
973
|
.description("import a directory of markdown files")
|
|
974
|
+
.option("--skip-index", "skip vector indexing (useful if seekdb crashes)")
|
|
890
975
|
.addHelpText(
|
|
891
976
|
"after",
|
|
892
977
|
`
|
|
893
978
|
Examples:
|
|
894
979
|
ebrain import ./docs
|
|
895
980
|
ebrain import ./docs --dry-run
|
|
981
|
+
ebrain import ./docs --skip-index # skip vector indexing
|
|
896
982
|
`,
|
|
897
983
|
),
|
|
898
|
-
).action(async (dir: string, opts: { dryRun?: boolean }) => {
|
|
984
|
+
).action(async (dir: string, opts: { dryRun?: boolean; skipIndex?: boolean }) => {
|
|
899
985
|
await withRepo(program, async (repo) => {
|
|
900
986
|
const root = resolve(dir);
|
|
901
987
|
const files = await collectMarkdownFiles(root);
|
|
988
|
+
|
|
902
989
|
if (isDryRun(opts)) {
|
|
903
990
|
print(program, {
|
|
904
991
|
dryRun: true,
|
|
@@ -912,11 +999,18 @@ Examples:
|
|
|
912
999
|
|
|
913
1000
|
const jsonOut = isJson(program);
|
|
914
1001
|
const settings = await loadSettings();
|
|
915
|
-
const
|
|
1002
|
+
const spinner = createSpinner();
|
|
916
1003
|
const startTime = Date.now();
|
|
917
1004
|
|
|
1005
|
+
if (!jsonOut) {
|
|
1006
|
+
header(`Import: ${root}`);
|
|
1007
|
+
}
|
|
1008
|
+
|
|
918
1009
|
// Phase 1: Parse all files and collect data
|
|
919
|
-
|
|
1010
|
+
if (!jsonOut) {
|
|
1011
|
+
spinner.start(`Scanning ${files.length} files...`);
|
|
1012
|
+
}
|
|
1013
|
+
|
|
920
1014
|
const fileData: Array<{
|
|
921
1015
|
file: string;
|
|
922
1016
|
slug: string;
|
|
@@ -940,33 +1034,64 @@ Examples:
|
|
|
940
1034
|
fileData.push({ file, slug, parsed, content, wikiLinks, timelineEntries, tags });
|
|
941
1035
|
}
|
|
942
1036
|
|
|
943
|
-
|
|
944
|
-
|
|
1037
|
+
if (!jsonOut) {
|
|
1038
|
+
spinner.succeed(`Found ${files.length} markdown files`);
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
// Phase 2: Write all pages first (skip embed for performance)
|
|
1042
|
+
if (!jsonOut) {
|
|
1043
|
+
spinner.start(`Writing ${fileData.length} pages to database...`);
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
const allSlugs: string[] = [];
|
|
1047
|
+
const writeErrors: string[] = [];
|
|
1048
|
+
|
|
945
1049
|
for (let i = 0; i < fileData.length; i++) {
|
|
946
1050
|
const { slug, parsed } = fileData[i]!;
|
|
947
|
-
if (!jsonOut && i %
|
|
948
|
-
|
|
1051
|
+
if (!jsonOut && i % 20 === 0) {
|
|
1052
|
+
spinner.update(`Writing pages... ${i + 1}/${fileData.length}`);
|
|
1053
|
+
}
|
|
1054
|
+
try {
|
|
1055
|
+
await repo.putPage({
|
|
1056
|
+
slug,
|
|
1057
|
+
type: String(parsed.frontmatter.type ?? inferTypeFromSlug(slug)),
|
|
1058
|
+
title: String(parsed.frontmatter.title ?? slugToTitle(slug)),
|
|
1059
|
+
compiledTruth: parsed.compiledTruth,
|
|
1060
|
+
timeline: parsed.timeline,
|
|
1061
|
+
frontmatter: parsed.frontmatter,
|
|
1062
|
+
}, true); // skipEmbed: true for performance
|
|
1063
|
+
allSlugs.push(slug);
|
|
1064
|
+
} catch (err) {
|
|
1065
|
+
writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
if (!jsonOut) {
|
|
1070
|
+
spinner.succeed(`Wrote ${allSlugs.length} pages to database`);
|
|
1071
|
+
if (writeErrors.length > 0) {
|
|
1072
|
+
warning(`${writeErrors.length} pages failed to write`);
|
|
1073
|
+
for (const e of writeErrors.slice(0, 3)) {
|
|
1074
|
+
subItem(e);
|
|
1075
|
+
}
|
|
1076
|
+
if (writeErrors.length > 3) {
|
|
1077
|
+
subItem(`... and ${writeErrors.length - 3} more`);
|
|
1078
|
+
}
|
|
949
1079
|
}
|
|
950
|
-
await repo.putPage({
|
|
951
|
-
slug,
|
|
952
|
-
type: String(parsed.frontmatter.type ?? inferTypeFromSlug(slug)),
|
|
953
|
-
title: String(parsed.frontmatter.title ?? slugToTitle(slug)),
|
|
954
|
-
compiledTruth: parsed.compiledTruth,
|
|
955
|
-
timeline: parsed.timeline,
|
|
956
|
-
frontmatter: parsed.frontmatter,
|
|
957
|
-
});
|
|
958
1080
|
}
|
|
959
1081
|
|
|
960
1082
|
// Phase 3: Parallel entity extraction (main optimization)
|
|
961
|
-
progress.update("Extracting entities...");
|
|
962
1083
|
const BATCH_SIZE = 10;
|
|
963
1084
|
const entityResults = new Map<string, Awaited<ReturnType<typeof extractRelations>>>();
|
|
964
1085
|
|
|
965
1086
|
if (settings.llm.baseURL) {
|
|
1087
|
+
if (!jsonOut) {
|
|
1088
|
+
spinner.start(`Extracting entities with LLM...`);
|
|
1089
|
+
}
|
|
1090
|
+
|
|
966
1091
|
for (let i = 0; i < fileData.length; i += BATCH_SIZE) {
|
|
967
1092
|
const batch = fileData.slice(i, i + BATCH_SIZE).filter(d => d.tags.length === 0);
|
|
968
1093
|
if (!jsonOut) {
|
|
969
|
-
|
|
1094
|
+
spinner.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, fileData.length)}/${fileData.length}`);
|
|
970
1095
|
}
|
|
971
1096
|
const batchPromises = batch.map(async ({ slug, content }) => {
|
|
972
1097
|
const relations = await extractRelations(content, settings.llm);
|
|
@@ -977,13 +1102,34 @@ Examples:
|
|
|
977
1102
|
entityResults.set(slug, relations);
|
|
978
1103
|
}
|
|
979
1104
|
}
|
|
1105
|
+
|
|
1106
|
+
if (!jsonOut) {
|
|
1107
|
+
spinner.succeed(`Entity extraction complete`);
|
|
1108
|
+
}
|
|
1109
|
+
} else {
|
|
1110
|
+
if (!jsonOut) {
|
|
1111
|
+
warning(`LLM not configured, skipping entity extraction`);
|
|
1112
|
+
}
|
|
980
1113
|
}
|
|
981
1114
|
|
|
982
1115
|
// Phase 4: Write links, tags, timeline, and entity pages
|
|
983
|
-
|
|
1116
|
+
if (!jsonOut) {
|
|
1117
|
+
spinner.start(`Creating links, tags, and timeline entries...`);
|
|
1118
|
+
}
|
|
1119
|
+
|
|
984
1120
|
let linkCount = 0;
|
|
985
1121
|
let timelineCount = 0;
|
|
986
1122
|
let entityCount = 0;
|
|
1123
|
+
let tagCount = 0;
|
|
1124
|
+
|
|
1125
|
+
// Collect timeline entries for batch insert
|
|
1126
|
+
const allTimelineEntries: Array<{
|
|
1127
|
+
pageSlug: string;
|
|
1128
|
+
date: string;
|
|
1129
|
+
source: string;
|
|
1130
|
+
summary: string;
|
|
1131
|
+
detail: string;
|
|
1132
|
+
}> = [];
|
|
987
1133
|
|
|
988
1134
|
for (const { slug, wikiLinks, timelineEntries, tags, content } of fileData) {
|
|
989
1135
|
// Wiki links
|
|
@@ -992,9 +1138,9 @@ Examples:
|
|
|
992
1138
|
linkCount++;
|
|
993
1139
|
}
|
|
994
1140
|
|
|
995
|
-
//
|
|
1141
|
+
// Collect timeline entries for batch insert
|
|
996
1142
|
for (const entry of timelineEntries) {
|
|
997
|
-
|
|
1143
|
+
allTimelineEntries.push({
|
|
998
1144
|
pageSlug: slug,
|
|
999
1145
|
date: entry.date,
|
|
1000
1146
|
source: entry.source,
|
|
@@ -1007,6 +1153,7 @@ Examples:
|
|
|
1007
1153
|
// Tags
|
|
1008
1154
|
for (const tag of tags) {
|
|
1009
1155
|
await repo.tag(slug, tag);
|
|
1156
|
+
tagCount++;
|
|
1010
1157
|
}
|
|
1011
1158
|
|
|
1012
1159
|
// Entity links from parallel extraction
|
|
@@ -1032,12 +1179,53 @@ Examples:
|
|
|
1032
1179
|
}
|
|
1033
1180
|
}
|
|
1034
1181
|
|
|
1182
|
+
// Batch insert all timeline entries
|
|
1183
|
+
if (allTimelineEntries.length > 0) {
|
|
1184
|
+
await repo.timelineAddBatch(allTimelineEntries);
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
if (!jsonOut) {
|
|
1188
|
+
spinner.succeed(`Created links, tags, and timeline`);
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
// Phase 5: Batch sync all pages to search index
|
|
1192
|
+
if (opts.skipIndex) {
|
|
1193
|
+
if (!jsonOut) {
|
|
1194
|
+
info(`Skipping vector indexing (--skip-index)`);
|
|
1195
|
+
}
|
|
1196
|
+
} else {
|
|
1197
|
+
if (!jsonOut) {
|
|
1198
|
+
spinner.start(`Indexing ${allSlugs.length} pages for search...`);
|
|
1199
|
+
}
|
|
1200
|
+
await repo.embedAll();
|
|
1201
|
+
|
|
1202
|
+
if (!jsonOut) {
|
|
1203
|
+
spinner.succeed(`Search indexing complete`);
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1035
1207
|
const duration = formatDuration(Date.now() - startTime);
|
|
1036
|
-
|
|
1208
|
+
|
|
1209
|
+
if (!jsonOut) {
|
|
1210
|
+
// Print summary
|
|
1211
|
+
header("Import Summary");
|
|
1212
|
+
keyValue("Files imported", String(files.length));
|
|
1213
|
+
keyValue("Pages created", String(allSlugs.length));
|
|
1214
|
+
keyValue("Entities extracted", String(entityCount));
|
|
1215
|
+
keyValue("Links created", String(linkCount));
|
|
1216
|
+
keyValue("Timeline entries", String(timelineCount));
|
|
1217
|
+
keyValue("Tags added", String(tagCount));
|
|
1218
|
+
keyValue("Duration", duration);
|
|
1219
|
+
|
|
1220
|
+
if (writeErrors.length > 0) {
|
|
1221
|
+
warning(`${writeErrors.length} pages had errors`);
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1037
1224
|
|
|
1038
1225
|
print(program, {
|
|
1226
|
+
ok: true,
|
|
1039
1227
|
importedFiles: files.length,
|
|
1040
|
-
pages:
|
|
1228
|
+
pages: allSlugs.length,
|
|
1041
1229
|
links: linkCount,
|
|
1042
1230
|
timelineEntries: timelineCount,
|
|
1043
1231
|
entities: entityCount,
|
|
@@ -1138,6 +1326,15 @@ Examples:
|
|
|
1138
1326
|
}
|
|
1139
1327
|
|
|
1140
1328
|
await withRepo(program, async (repo) => {
|
|
1329
|
+
const jsonOut = isJson(program);
|
|
1330
|
+
const spinner = createSpinner();
|
|
1331
|
+
const startTime = Date.now();
|
|
1332
|
+
|
|
1333
|
+
if (!jsonOut) {
|
|
1334
|
+
header(`Ingest: ${fileName}`);
|
|
1335
|
+
spinner.start(`Creating page from file...`);
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1141
1338
|
await repo.putPage({
|
|
1142
1339
|
slug,
|
|
1143
1340
|
type,
|
|
@@ -1149,6 +1346,14 @@ Examples:
|
|
|
1149
1346
|
sourceType: type,
|
|
1150
1347
|
},
|
|
1151
1348
|
});
|
|
1349
|
+
|
|
1350
|
+
if (!jsonOut) {
|
|
1351
|
+
spinner.succeed(`Page created: ${slug}`);
|
|
1352
|
+
keyValue("Source file", fileName);
|
|
1353
|
+
keyValue("Type", type);
|
|
1354
|
+
keyValue("Content length", `${content.length} chars`);
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1152
1357
|
await repo.timelineAdd({
|
|
1153
1358
|
pageSlug: slug,
|
|
1154
1359
|
date: new Date().toISOString().slice(0, 10),
|
|
@@ -1156,12 +1361,19 @@ Examples:
|
|
|
1156
1361
|
summary: `Ingested file ${fileName}`,
|
|
1157
1362
|
detail: "",
|
|
1158
1363
|
});
|
|
1364
|
+
|
|
1159
1365
|
await applyEntityLinks(
|
|
1160
1366
|
repo,
|
|
1161
1367
|
slug,
|
|
1162
1368
|
content,
|
|
1163
|
-
|
|
1369
|
+
jsonOut,
|
|
1164
1370
|
);
|
|
1371
|
+
|
|
1372
|
+
if (!jsonOut) {
|
|
1373
|
+
const duration = formatDuration(Date.now() - startTime);
|
|
1374
|
+
success(`Ingestion completed in ${duration}`);
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1165
1377
|
print(program, { ok: true, action: "ingest", slug });
|
|
1166
1378
|
});
|
|
1167
1379
|
},
|
|
@@ -1204,13 +1416,28 @@ Examples:
|
|
|
1204
1416
|
}
|
|
1205
1417
|
await withRepo(program, async (repo) => {
|
|
1206
1418
|
const jsonOut = isJson(program);
|
|
1419
|
+
const spinner = createSpinner();
|
|
1420
|
+
const startTime = Date.now();
|
|
1421
|
+
|
|
1422
|
+
if (!jsonOut) {
|
|
1423
|
+
header("Embed All Pages");
|
|
1424
|
+
spinner.start(`Loading pages...`);
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1207
1427
|
const pages = await repo.listPages({ limit: 100000 });
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1428
|
+
|
|
1429
|
+
if (!jsonOut) {
|
|
1430
|
+
spinner.update(`Embedding ${pages.length} pages...`);
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
const count = await repo.embedAll();
|
|
1434
|
+
|
|
1435
|
+
if (!jsonOut) {
|
|
1436
|
+
const duration = formatDuration(Date.now() - startTime);
|
|
1437
|
+
spinner.succeed(`Embedded ${count} pages`);
|
|
1438
|
+
keyValue("Duration", duration);
|
|
1213
1439
|
}
|
|
1440
|
+
|
|
1214
1441
|
print(program, { embedded: count, mode: "all" });
|
|
1215
1442
|
});
|
|
1216
1443
|
return;
|
|
@@ -1223,7 +1450,20 @@ Examples:
|
|
|
1223
1450
|
return;
|
|
1224
1451
|
}
|
|
1225
1452
|
await withRepo(program, async (repo) => {
|
|
1453
|
+
const jsonOut = isJson(program);
|
|
1454
|
+
const spinner = createSpinner();
|
|
1455
|
+
|
|
1456
|
+
if (!jsonOut) {
|
|
1457
|
+
header(`Embed: ${slug}`);
|
|
1458
|
+
spinner.start(`Generating embedding for page...`);
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1226
1461
|
await repo.syncPageToSearch(slug);
|
|
1462
|
+
|
|
1463
|
+
if (!jsonOut) {
|
|
1464
|
+
spinner.succeed(`Page embedded: ${slug}`);
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1227
1467
|
print(program, { embedded: 1, slug });
|
|
1228
1468
|
});
|
|
1229
1469
|
},
|
|
@@ -1243,10 +1483,15 @@ Examples:
|
|
|
1243
1483
|
)
|
|
1244
1484
|
.action(async () => {
|
|
1245
1485
|
await withRepo(program, async () => {
|
|
1486
|
+
const settings = await loadSettings();
|
|
1487
|
+
const dbPath = program.opts().db ?? settings.dbPath;
|
|
1488
|
+
|
|
1489
|
+
success(`Database initialized`);
|
|
1490
|
+
keyValue("Path", dbPath);
|
|
1491
|
+
|
|
1246
1492
|
print(program, {
|
|
1247
1493
|
ok: true,
|
|
1248
|
-
dbPath
|
|
1249
|
-
program.opts().db ?? (await loadSettings()).dbPath,
|
|
1494
|
+
dbPath,
|
|
1250
1495
|
});
|
|
1251
1496
|
});
|
|
1252
1497
|
});
|
|
@@ -1264,7 +1509,19 @@ Examples:
|
|
|
1264
1509
|
)
|
|
1265
1510
|
.action(async () => {
|
|
1266
1511
|
await withRepo(program, async (repo) => {
|
|
1267
|
-
|
|
1512
|
+
const jsonOut = isJson(program);
|
|
1513
|
+
const stats = await repo.stats();
|
|
1514
|
+
|
|
1515
|
+
if (!jsonOut) {
|
|
1516
|
+
header("Knowledge Base Statistics");
|
|
1517
|
+
keyValue("Pages", String(stats.pages));
|
|
1518
|
+
keyValue("Links", String(stats.links));
|
|
1519
|
+
keyValue("Tags", String(stats.tags));
|
|
1520
|
+
keyValue("Timeline entries", String(stats.timelineEntries));
|
|
1521
|
+
keyValue("Raw data rows", String(stats.rawRows));
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1524
|
+
print(program, stats);
|
|
1268
1525
|
});
|
|
1269
1526
|
});
|
|
1270
1527
|
|
|
@@ -1324,7 +1581,20 @@ async function withRepo(
|
|
|
1324
1581
|
const db = await BrainDb.connect(dbPath, settings);
|
|
1325
1582
|
const repo = new BrainRepository(db);
|
|
1326
1583
|
await callback(repo);
|
|
1327
|
-
|
|
1584
|
+
|
|
1585
|
+
// Gracefully close database
|
|
1586
|
+
// Note: seekdb SDK's InternalEmbeddedClient.close() is empty in embedded mode
|
|
1587
|
+
// Data may not flush properly. Use remote seekdb server for reliability.
|
|
1588
|
+
try {
|
|
1589
|
+
await db.close();
|
|
1590
|
+
} catch (e) {
|
|
1591
|
+
// Close may fail due to seekdb native bug
|
|
1592
|
+
}
|
|
1593
|
+
|
|
1594
|
+
// Give seekdb extra time after close
|
|
1595
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
1596
|
+
|
|
1597
|
+
// CLI: force exit to bypass seekdb native cleanup segfault
|
|
1328
1598
|
process.exit(0);
|
|
1329
1599
|
}
|
|
1330
1600
|
|
|
@@ -1366,18 +1636,249 @@ function normalizeLinkSlug(path: string): string {
|
|
|
1366
1636
|
}
|
|
1367
1637
|
|
|
1368
1638
|
// ---------------------------------------------------------------------------
|
|
1369
|
-
// LLM Answer Generation
|
|
1639
|
+
// LLM Answer Generation — Multi-layer Context Collection
|
|
1370
1640
|
// ---------------------------------------------------------------------------
|
|
1371
1641
|
|
|
1372
|
-
|
|
1642
|
+
/** A single section of context for the LLM prompt. */
|
|
1643
|
+
interface ContextSection {
|
|
1644
|
+
type: 'primary' | 'raw_data' | 'linked';
|
|
1373
1645
|
slug: string;
|
|
1374
1646
|
title: string;
|
|
1375
|
-
|
|
1647
|
+
content: string;
|
|
1648
|
+
/** Human-readable label like "原始文档 (crm)" or "关联页面: projects/alpha". */
|
|
1649
|
+
label: string;
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
/**
|
|
1653
|
+
* Collect multi-layer context for LLM answer generation.
|
|
1654
|
+
*
|
|
1655
|
+
* Layers (in priority order):
|
|
1656
|
+
* 1. Primary: compiledTruth + timeline of each hit page
|
|
1657
|
+
* 2. Raw data: original documents stored via raw.set
|
|
1658
|
+
* 3. Linked pages: compiledTruth of pages linked to/from hit pages
|
|
1659
|
+
*
|
|
1660
|
+
* Budget is enforced via total character limit.
|
|
1661
|
+
*/
|
|
1662
|
+
async function collectContextForLLM(
|
|
1663
|
+
repo: BrainRepository,
|
|
1664
|
+
hits: Array<{ slug: string; title: string; score: number }>,
|
|
1665
|
+
question: string,
|
|
1666
|
+
maxChars: number,
|
|
1667
|
+
): Promise<{ sections: ContextSection[]; totalChars: number; stats: ContextStats }> {
|
|
1668
|
+
const sections: ContextSection[] = [];
|
|
1669
|
+
let totalChars = 0;
|
|
1670
|
+
const stats: ContextStats = {
|
|
1671
|
+
primaryPages: 0,
|
|
1672
|
+
rawDocs: 0,
|
|
1673
|
+
linkedPages: 0,
|
|
1674
|
+
skippedChars: 0,
|
|
1675
|
+
};
|
|
1676
|
+
|
|
1677
|
+
const seenSlugs = new Set<string>();
|
|
1678
|
+
|
|
1679
|
+
function addSection(section: ContextSection): boolean {
|
|
1680
|
+
if (seenSlugs.has(`${section.type}:${section.slug}:${section.label}`)) {
|
|
1681
|
+
return false;
|
|
1682
|
+
}
|
|
1683
|
+
const budget = maxChars - totalChars;
|
|
1684
|
+
if (section.content.length > budget && sections.length > 0) {
|
|
1685
|
+
// Truncate to fit budget
|
|
1686
|
+
section.content = section.content.slice(0, budget - 20) + '\n...[truncated]';
|
|
1687
|
+
stats.skippedChars += section.content.length - budget;
|
|
1688
|
+
}
|
|
1689
|
+
if (section.content.length > 0) {
|
|
1690
|
+
sections.push(section);
|
|
1691
|
+
totalChars += section.content.length;
|
|
1692
|
+
seenSlugs.add(`${section.type}:${section.slug}:${section.label}`);
|
|
1693
|
+
return true;
|
|
1694
|
+
}
|
|
1695
|
+
return false;
|
|
1696
|
+
}
|
|
1697
|
+
|
|
1698
|
+
// Layer 1: Primary pages (compiledTruth + timeline)
|
|
1699
|
+
for (const hit of hits) {
|
|
1700
|
+
const page = await repo.getPage(hit.slug);
|
|
1701
|
+
if (!page) continue;
|
|
1702
|
+
|
|
1703
|
+
const parts: string[] = [];
|
|
1704
|
+
if (page.compiledTruth?.trim()) {
|
|
1705
|
+
parts.push(page.compiledTruth.trim());
|
|
1706
|
+
}
|
|
1707
|
+
const tl = page.timeline?.trim();
|
|
1708
|
+
if (tl) {
|
|
1709
|
+
parts.push(`## 时间线\n${tl}`);
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
if (parts.length > 0) {
|
|
1713
|
+
addSection({
|
|
1714
|
+
type: 'primary',
|
|
1715
|
+
slug: page.slug,
|
|
1716
|
+
title: page.title,
|
|
1717
|
+
content: parts.join('\n\n'),
|
|
1718
|
+
label: `页面正文`,
|
|
1719
|
+
});
|
|
1720
|
+
stats.primaryPages++;
|
|
1721
|
+
}
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
// Layer 2: Raw data (original documents)
|
|
1725
|
+
for (const hit of hits) {
|
|
1726
|
+
try {
|
|
1727
|
+
const rawRows = await repo.readRaw(hit.slug) as Array<{ source: string; data: unknown; fetchedAt?: string }>;
|
|
1728
|
+
for (const row of rawRows) {
|
|
1729
|
+
let rawContent = '';
|
|
1730
|
+
if (typeof row.data === 'string') {
|
|
1731
|
+
rawContent = row.data;
|
|
1732
|
+
} else if (typeof row.data === 'object' && row.data !== null) {
|
|
1733
|
+
rawContent = JSON.stringify(row.data, null, 2);
|
|
1734
|
+
}
|
|
1735
|
+
if (rawContent.trim()) {
|
|
1736
|
+
addSection({
|
|
1737
|
+
type: 'raw_data',
|
|
1738
|
+
slug: hit.slug,
|
|
1739
|
+
title: hit.title,
|
|
1740
|
+
content: rawContent,
|
|
1741
|
+
label: `原始文档 (${row.source})`,
|
|
1742
|
+
});
|
|
1743
|
+
stats.rawDocs++;
|
|
1744
|
+
}
|
|
1745
|
+
}
|
|
1746
|
+
} catch {
|
|
1747
|
+
// Raw data fetch failure is non-fatal
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
// Layer 3: Linked pages — SEMANTICALLY SCORED against the question
|
|
1752
|
+
// Only include linked pages that are actually relevant to what the user asked.
|
|
1753
|
+
const allLinkedSlugs = new Set<string>();
|
|
1754
|
+
for (const hit of hits) {
|
|
1755
|
+
try {
|
|
1756
|
+
const outLinks = await repo.outgoingLinks(hit.slug);
|
|
1757
|
+
outLinks.forEach(l => allLinkedSlugs.add(l.slug));
|
|
1758
|
+
} catch { /* ignore */ }
|
|
1759
|
+
try {
|
|
1760
|
+
const backlinkSlugs = await repo.backlinks(hit.slug);
|
|
1761
|
+
backlinkSlugs.forEach(s => allLinkedSlugs.add(s));
|
|
1762
|
+
} catch { /* ignore */ }
|
|
1763
|
+
}
|
|
1764
|
+
|
|
1765
|
+
if (allLinkedSlugs.size > 0) {
|
|
1766
|
+
// Score linked pages using broad semantic search.
|
|
1767
|
+
// Query a wide set of pages, then intersect with linked slugs.
|
|
1768
|
+
const broadLimit = Math.min(200, Math.max(50, allLinkedSlugs.size));
|
|
1769
|
+
const broadResults = await repo.query(question, broadLimit);
|
|
1770
|
+
const semanticScoreMap = new Map(broadResults.map(h => [h.slug, h.score]));
|
|
1771
|
+
|
|
1772
|
+
// Keyword-based fallback scoring for linked pages without embedding scores
|
|
1773
|
+
const keywordScores = new Map<string, number>();
|
|
1774
|
+
for (const linkedSlug of allLinkedSlugs) {
|
|
1775
|
+
if (semanticScoreMap.has(linkedSlug)) continue;
|
|
1776
|
+
try {
|
|
1777
|
+
const page = await repo.getPage(linkedSlug);
|
|
1778
|
+
if (page) {
|
|
1779
|
+
const text = `${page.title} ${page.compiledTruth}`.slice(0, 2000);
|
|
1780
|
+
keywordScores.set(linkedSlug, computeKeywordRelevance(text, question));
|
|
1781
|
+
}
|
|
1782
|
+
} catch { /* ignore */ }
|
|
1783
|
+
}
|
|
1784
|
+
|
|
1785
|
+
// Combine scores: semantic first, then keyword fallback
|
|
1786
|
+
const scoredLinked = [...allLinkedSlugs].map(slug => ({
|
|
1787
|
+
slug,
|
|
1788
|
+
score: semanticScoreMap.get(slug) ?? keywordScores.get(slug) ?? 0,
|
|
1789
|
+
}));
|
|
1790
|
+
|
|
1791
|
+
// Filter: only include linked pages with meaningful relevance
|
|
1792
|
+
const MIN_LINKED_SCORE = 0.02;
|
|
1793
|
+
const relevantLinked = scoredLinked
|
|
1794
|
+
.filter(s => s.score >= MIN_LINKED_SCORE)
|
|
1795
|
+
.sort((a, b) => b.score - a.score);
|
|
1796
|
+
|
|
1797
|
+
// Fetch content for relevant linked pages (respecting budget)
|
|
1798
|
+
for (const linked of relevantLinked) {
|
|
1799
|
+
if (totalChars >= maxChars) break;
|
|
1800
|
+
|
|
1801
|
+
const linkedPage = await repo.getPage(linked.slug);
|
|
1802
|
+
if (!linkedPage || !linkedPage.compiledTruth?.trim()) continue;
|
|
1803
|
+
|
|
1804
|
+
const remaining = maxChars - totalChars;
|
|
1805
|
+
let content = linkedPage.compiledTruth.trim();
|
|
1806
|
+
if (content.length > remaining - 100) {
|
|
1807
|
+
content = content.slice(0, remaining - 100) + '\n...[truncated]';
|
|
1808
|
+
}
|
|
1809
|
+
|
|
1810
|
+
addSection({
|
|
1811
|
+
type: 'linked',
|
|
1812
|
+
slug: linkedPage.slug,
|
|
1813
|
+
title: linkedPage.title,
|
|
1814
|
+
content,
|
|
1815
|
+
label: `关联页面: ${linkedPage.slug} (相关度: ${(linked.score * 100).toFixed(1)}%)`,
|
|
1816
|
+
});
|
|
1817
|
+
stats.linkedPages++;
|
|
1818
|
+
|
|
1819
|
+
// Also fetch raw data for highly relevant linked pages
|
|
1820
|
+
if (linked.score > 0.1) {
|
|
1821
|
+
try {
|
|
1822
|
+
const rawRows = await repo.readRaw(linked.slug) as Array<{ source: string; data: unknown }>;
|
|
1823
|
+
for (const row of rawRows) {
|
|
1824
|
+
let rawContent = typeof row.data === 'string' ? row.data : JSON.stringify(row.data);
|
|
1825
|
+
if (rawContent.trim().length > 100) {
|
|
1826
|
+
const remaining2 = maxChars - totalChars;
|
|
1827
|
+
if (rawContent.length > remaining2 - 100) {
|
|
1828
|
+
rawContent = rawContent.slice(0, remaining2 - 100) + '\n...[truncated]';
|
|
1829
|
+
}
|
|
1830
|
+
addSection({
|
|
1831
|
+
type: 'raw_data',
|
|
1832
|
+
slug: linked.slug,
|
|
1833
|
+
title: linkedPage.title,
|
|
1834
|
+
content: rawContent,
|
|
1835
|
+
label: `原始文档 (关联: ${row.source})`,
|
|
1836
|
+
});
|
|
1837
|
+
stats.rawDocs++;
|
|
1838
|
+
}
|
|
1839
|
+
}
|
|
1840
|
+
} catch { /* ignore */ }
|
|
1841
|
+
}
|
|
1842
|
+
}
|
|
1843
|
+
}
|
|
1844
|
+
|
|
1845
|
+
return { sections, totalChars, stats };
|
|
1846
|
+
}
|
|
1847
|
+
|
|
1848
|
+
/**
|
|
1849
|
+
* Simple keyword-based relevance scoring (fallback for pages without embeddings).
|
|
1850
|
+
* Computes the fraction of unique meaningful characters from the question
|
|
1851
|
+
* that appear in the text.
|
|
1852
|
+
*/
|
|
1853
|
+
function computeKeywordRelevance(text: string, question: string): number {
|
|
1854
|
+
const STOP_CHARS = new Set('的是了在和我有你就这不人都说上个大国为到以们年会生地要主中子自实家小对多能好可很所把当');
|
|
1855
|
+
const questionChars = [...question]
|
|
1856
|
+
.filter(c => !/\s|[,,。!?、;::""''()()【】\[\]{}<>\/\\|~`@#$%^&*+=_-]/.test(c) && !STOP_CHARS.has(c));
|
|
1857
|
+
if (questionChars.length === 0) return 0;
|
|
1858
|
+
|
|
1859
|
+
const uniqueChars = new Set(questionChars);
|
|
1860
|
+
const lower = text.toLowerCase();
|
|
1861
|
+
let matched = 0;
|
|
1862
|
+
for (const char of uniqueChars) {
|
|
1863
|
+
if (lower.includes(char.toLowerCase())) matched++;
|
|
1864
|
+
}
|
|
1865
|
+
return matched / uniqueChars.size;
|
|
1376
1866
|
}
|
|
1377
1867
|
|
|
1378
|
-
|
|
1868
|
+
interface ContextStats {
|
|
1869
|
+
primaryPages: number;
|
|
1870
|
+
rawDocs: number;
|
|
1871
|
+
linkedPages: number;
|
|
1872
|
+
skippedChars: number;
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
/**
|
|
1876
|
+
* Build LLM prompt from collected context sections and generate answer.
|
|
1877
|
+
*/
|
|
1878
|
+
async function generateAnswerWithContext(
|
|
1379
1879
|
question: string,
|
|
1380
|
-
|
|
1880
|
+
sections: ContextSection[],
|
|
1881
|
+
stats: ContextStats,
|
|
1381
1882
|
llm: ResolvedLLM,
|
|
1382
1883
|
): Promise<string> {
|
|
1383
1884
|
const apiKey = llm.apiKey || process.env[llm.apiKeyEnv] || "";
|
|
@@ -1385,29 +1886,54 @@ async function generateAnswerFromExcerpts(
|
|
|
1385
1886
|
return "Error: LLM API key not configured.";
|
|
1386
1887
|
}
|
|
1387
1888
|
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
return `## Source ${i + 1}: ${p.title}\n**Slug:** ${p.slug}\n\n${p.excerpt}`;
|
|
1392
|
-
})
|
|
1393
|
-
.join("\n\n---\n\n");
|
|
1889
|
+
if (sections.length === 0) {
|
|
1890
|
+
return "知识库中没有找到相关内容。";
|
|
1891
|
+
}
|
|
1394
1892
|
|
|
1395
|
-
|
|
1893
|
+
// Build context sections with clear labels
|
|
1894
|
+
const contextParts: string[] = [];
|
|
1895
|
+
let sectionIndex = 0;
|
|
1896
|
+
|
|
1897
|
+
// Group by type for cleaner output
|
|
1898
|
+
const primarySections = sections.filter(s => s.type === 'primary');
|
|
1899
|
+
const rawSections = sections.filter(s => s.type === 'raw_data');
|
|
1900
|
+
const linkedSections = sections.filter(s => s.type === 'linked');
|
|
1901
|
+
|
|
1902
|
+
function renderSections(group: ContextSection[], header: string) {
|
|
1903
|
+
if (group.length === 0) return;
|
|
1904
|
+
contextParts.push(`## ${header}\n`);
|
|
1905
|
+
for (const s of group) {
|
|
1906
|
+
sectionIndex++;
|
|
1907
|
+
contextParts.push(`### [${sectionIndex}] ${s.title} — ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
|
|
1908
|
+
}
|
|
1909
|
+
contextParts.push('');
|
|
1910
|
+
}
|
|
1911
|
+
|
|
1912
|
+
renderSections(primarySections, '页面正文');
|
|
1913
|
+
renderSections(rawSections, '原始文档');
|
|
1914
|
+
renderSections(linkedSections, '关联页面');
|
|
1915
|
+
|
|
1916
|
+
const context = contextParts.join('\n');
|
|
1396
1917
|
|
|
1397
|
-
|
|
1918
|
+
const prompt = `你是一个知识库助手,请根据提供的知识库内容回答问题。
|
|
1919
|
+
|
|
1920
|
+
## 问题
|
|
1398
1921
|
${question}
|
|
1399
1922
|
|
|
1400
|
-
##
|
|
1401
|
-
|
|
1923
|
+
## 知识库内容
|
|
1924
|
+
|
|
1925
|
+
${context}
|
|
1402
1926
|
|
|
1403
|
-
##
|
|
1404
|
-
-
|
|
1405
|
-
-
|
|
1406
|
-
-
|
|
1407
|
-
-
|
|
1408
|
-
-
|
|
1927
|
+
## 回答要求
|
|
1928
|
+
- 仅基于提供的知识库内容回答,不要编造信息
|
|
1929
|
+
- 如果知识库中没有相关信息,请明确说明
|
|
1930
|
+
- 引用来源时使用 [[slug|标题]] 的格式
|
|
1931
|
+
- 使用清晰的 markdown 格式
|
|
1932
|
+
- 如果涉及时间线信息,请在回答中体现
|
|
1933
|
+
- 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
|
|
1934
|
+
- 语言与提问保持一致(中文提问用中文回答,英文提问用英文回答)
|
|
1409
1935
|
|
|
1410
|
-
##
|
|
1936
|
+
## 回答`;
|
|
1411
1937
|
|
|
1412
1938
|
try {
|
|
1413
1939
|
const resp = await fetch(
|
|
@@ -1423,12 +1949,12 @@ ${context || "(No relevant pages found)"}
|
|
|
1423
1949
|
messages: [
|
|
1424
1950
|
{
|
|
1425
1951
|
role: "system",
|
|
1426
|
-
content: "
|
|
1952
|
+
content: "你是一个专业的知识库助手,基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰,区分信息来源。",
|
|
1427
1953
|
},
|
|
1428
1954
|
{ role: "user", content: prompt },
|
|
1429
1955
|
],
|
|
1430
1956
|
temperature: 0.3,
|
|
1431
|
-
max_tokens:
|
|
1957
|
+
max_tokens: 4096,
|
|
1432
1958
|
}),
|
|
1433
1959
|
},
|
|
1434
1960
|
);
|