llm-kb 0.4.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -6
- package/bin/{chunk-DHOXVEIR.js → chunk-3WBSKCCH.js} +96 -119
- package/bin/chunk-EZ7LPPEP.js +218 -0
- package/bin/chunk-Y2764FFH.js +1356 -0
- package/bin/cli.js +385 -874
- package/bin/{indexer-KSYRIVVN.js → indexer-K37QM2HP.js} +2 -1
- package/bin/public/index.html +949 -0
- package/bin/server-QC5SN6T4.js +1069 -0
- package/package.json +4 -3
|
@@ -0,0 +1,1356 @@
|
|
|
1
|
+
import {
|
|
2
|
+
completeSimple,
|
|
3
|
+
continueKBSession,
|
|
4
|
+
createKBSession,
|
|
5
|
+
getApiKeyForProvider,
|
|
6
|
+
getNodeModulesPath,
|
|
7
|
+
resolveModelCandidates
|
|
8
|
+
} from "./chunk-3WBSKCCH.js";
|
|
9
|
+
|
|
10
|
+
// src/query.ts
|
|
11
|
+
import {
|
|
12
|
+
createAgentSession,
|
|
13
|
+
createBashTool,
|
|
14
|
+
createReadTool,
|
|
15
|
+
createWriteTool,
|
|
16
|
+
DefaultResourceLoader,
|
|
17
|
+
SettingsManager
|
|
18
|
+
} from "@mariozechner/pi-coding-agent";
|
|
19
|
+
|
|
20
|
+
// src/retrying-session.ts
|
|
21
|
+
function getErrorMessage(error) {
|
|
22
|
+
if (error instanceof Error) return error.message;
|
|
23
|
+
return String(error ?? "Unknown error");
|
|
24
|
+
}
|
|
25
|
+
function isRetryableProviderError(error) {
|
|
26
|
+
const message = getErrorMessage(error).toLowerCase();
|
|
27
|
+
return [
|
|
28
|
+
"401",
|
|
29
|
+
"403",
|
|
30
|
+
"429",
|
|
31
|
+
"quota",
|
|
32
|
+
"credit",
|
|
33
|
+
"rate limit",
|
|
34
|
+
"overloaded",
|
|
35
|
+
"overload",
|
|
36
|
+
"unavailable",
|
|
37
|
+
"timeout",
|
|
38
|
+
"timed out",
|
|
39
|
+
"network",
|
|
40
|
+
"connection",
|
|
41
|
+
"provider",
|
|
42
|
+
"api key",
|
|
43
|
+
"authentication",
|
|
44
|
+
"unauthorized",
|
|
45
|
+
"forbidden"
|
|
46
|
+
].some((term) => message.includes(term));
|
|
47
|
+
}
|
|
48
|
+
async function createRetryingSession(options) {
|
|
49
|
+
const { createSession, candidates, validatePromptResult, onRetry } = options;
|
|
50
|
+
if (candidates.length === 0) throw new Error("No usable models available");
|
|
51
|
+
let candidateIndex = 0;
|
|
52
|
+
let current = await createSession(candidates[candidateIndex]);
|
|
53
|
+
let sessionName;
|
|
54
|
+
const subscribers = [];
|
|
55
|
+
const attachSubscribers = (session) => {
|
|
56
|
+
for (const sub of subscribers) session.subscribe(sub);
|
|
57
|
+
};
|
|
58
|
+
const swapTo = async (nextIndex) => {
|
|
59
|
+
const prev = current;
|
|
60
|
+
candidateIndex = nextIndex;
|
|
61
|
+
current = await createSession(candidates[candidateIndex]);
|
|
62
|
+
if (sessionName) current.setSessionName(sessionName);
|
|
63
|
+
attachSubscribers(current);
|
|
64
|
+
prev.dispose();
|
|
65
|
+
};
|
|
66
|
+
const proxy = {
|
|
67
|
+
get sessionId() {
|
|
68
|
+
return current.sessionId;
|
|
69
|
+
},
|
|
70
|
+
get sessionFile() {
|
|
71
|
+
return current.sessionFile;
|
|
72
|
+
},
|
|
73
|
+
get state() {
|
|
74
|
+
return current.state;
|
|
75
|
+
},
|
|
76
|
+
subscribe(fn) {
|
|
77
|
+
subscribers.push(fn);
|
|
78
|
+
return current.subscribe(fn);
|
|
79
|
+
},
|
|
80
|
+
async prompt(text) {
|
|
81
|
+
let lastError;
|
|
82
|
+
for (let i = candidateIndex; i < candidates.length; i++) {
|
|
83
|
+
if (i !== candidateIndex) await swapTo(i);
|
|
84
|
+
const beforeMessageCount = Array.isArray(current.state?.messages) ? current.state.messages.length : 0;
|
|
85
|
+
try {
|
|
86
|
+
const result = await current.prompt(text);
|
|
87
|
+
const validationError = validatePromptResult?.(current, beforeMessageCount);
|
|
88
|
+
if (!validationError) return result;
|
|
89
|
+
const next = candidates[i + 1];
|
|
90
|
+
const error = new Error(validationError);
|
|
91
|
+
lastError = error;
|
|
92
|
+
if (!next) throw error;
|
|
93
|
+
onRetry?.({
|
|
94
|
+
from: candidates[i],
|
|
95
|
+
to: next,
|
|
96
|
+
error,
|
|
97
|
+
attempt: i + 2
|
|
98
|
+
});
|
|
99
|
+
continue;
|
|
100
|
+
} catch (error) {
|
|
101
|
+
lastError = error;
|
|
102
|
+
const next = candidates[i + 1];
|
|
103
|
+
if (!next || !isRetryableProviderError(error)) throw error;
|
|
104
|
+
onRetry?.({
|
|
105
|
+
from: candidates[i],
|
|
106
|
+
to: next,
|
|
107
|
+
error,
|
|
108
|
+
attempt: i + 2
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
throw lastError instanceof Error ? lastError : new Error(getErrorMessage(lastError));
|
|
113
|
+
},
|
|
114
|
+
async reload() {
|
|
115
|
+
return current.reload();
|
|
116
|
+
},
|
|
117
|
+
setSessionName(name) {
|
|
118
|
+
sessionName = name;
|
|
119
|
+
return current.setSessionName(name);
|
|
120
|
+
},
|
|
121
|
+
dispose() {
|
|
122
|
+
return current.dispose();
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
return proxy;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// src/query.ts
|
|
129
|
+
import { readdir as readdir3, mkdir as mkdir3 } from "fs/promises";
|
|
130
|
+
import { existsSync as existsSync4, readdirSync, readFileSync } from "fs";
|
|
131
|
+
|
|
132
|
+
// src/trace-builder.ts
|
|
133
|
+
import { readFile, writeFile, mkdir, readdir } from "fs/promises";
|
|
134
|
+
import { existsSync } from "fs";
|
|
135
|
+
import { join, basename as pathBasename } from "path";
|
|
136
|
+
async function buildTrace(sessionFile, sourcesDir) {
|
|
137
|
+
const raw = await readFile(sessionFile, "utf-8");
|
|
138
|
+
const lines = raw.trim().split("\n").filter(Boolean);
|
|
139
|
+
if (lines.length < 2) return null;
|
|
140
|
+
const entries = [];
|
|
141
|
+
let header = null;
|
|
142
|
+
for (const line of lines) {
|
|
143
|
+
try {
|
|
144
|
+
const obj = JSON.parse(line);
|
|
145
|
+
if (obj.type === "session") header = obj;
|
|
146
|
+
else entries.push(obj);
|
|
147
|
+
} catch {
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
if (!header) return null;
|
|
151
|
+
const messages = entries.filter((e) => e.type === "message");
|
|
152
|
+
const lastAssistant = [...messages].reverse().find(
|
|
153
|
+
(e) => e.message?.role === "assistant" && e.message?.stopReason === "stop"
|
|
154
|
+
);
|
|
155
|
+
if (!lastAssistant) return null;
|
|
156
|
+
const modelChange = entries.find((e) => e.type === "model_change");
|
|
157
|
+
const model = modelChange?.modelId ?? lastAssistant.message?.model ?? void 0;
|
|
158
|
+
const firstUser = messages.find((e) => e.message?.role === "user");
|
|
159
|
+
const question = extractText(firstUser?.message?.content);
|
|
160
|
+
const sessionInfo = entries.find((e) => e.type === "session_info");
|
|
161
|
+
const sessionName = sessionInfo?.name ?? "";
|
|
162
|
+
const mode = sessionName.startsWith("index:") ? "index" : sessionName.startsWith("query:") || question ? "query" : "unknown";
|
|
163
|
+
const answer = extractText(lastAssistant.message?.content);
|
|
164
|
+
const filesRead = [];
|
|
165
|
+
for (const entry of messages) {
|
|
166
|
+
if (entry.message?.role !== "assistant") continue;
|
|
167
|
+
for (const block of entry.message?.content ?? []) {
|
|
168
|
+
if (block.type === "toolCall" && block.name === "read") {
|
|
169
|
+
const p = block.arguments?.path ?? "";
|
|
170
|
+
if (p && !filesRead.includes(p)) filesRead.push(p);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
let filesAvailable = [];
|
|
175
|
+
try {
|
|
176
|
+
const all = await readdir(sourcesDir);
|
|
177
|
+
filesAvailable = all.filter((f) => f.endsWith(".md"));
|
|
178
|
+
} catch {
|
|
179
|
+
}
|
|
180
|
+
const filesSkipped = filesAvailable.filter(
|
|
181
|
+
(f) => !filesRead.some((r) => r.endsWith(f))
|
|
182
|
+
);
|
|
183
|
+
const firstMsg = messages[0];
|
|
184
|
+
const lastMsg = messages[messages.length - 1];
|
|
185
|
+
let durationMs;
|
|
186
|
+
if (firstMsg?.timestamp && lastMsg?.timestamp) {
|
|
187
|
+
durationMs = new Date(lastMsg.timestamp).getTime() - new Date(firstMsg.timestamp).getTime();
|
|
188
|
+
}
|
|
189
|
+
return {
|
|
190
|
+
sessionId: header.id,
|
|
191
|
+
sessionFile: pathBasename(sessionFile),
|
|
192
|
+
timestamp: header.timestamp,
|
|
193
|
+
mode,
|
|
194
|
+
question: question || void 0,
|
|
195
|
+
answer: answer || void 0,
|
|
196
|
+
filesRead,
|
|
197
|
+
filesAvailable,
|
|
198
|
+
filesSkipped,
|
|
199
|
+
model,
|
|
200
|
+
durationMs
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
async function saveTrace(kbRoot, trace) {
|
|
204
|
+
const tracesDir = join(kbRoot, ".llm-kb", "traces");
|
|
205
|
+
await mkdir(tracesDir, { recursive: true });
|
|
206
|
+
const outPath = join(tracesDir, `${trace.sessionId}.json`);
|
|
207
|
+
await writeFile(outPath, JSON.stringify(trace, null, 2) + "\n", "utf-8");
|
|
208
|
+
}
|
|
209
|
+
async function appendToQueryLog(kbRoot, trace) {
|
|
210
|
+
if (trace.mode !== "query" || !trace.question) return;
|
|
211
|
+
const wikiDir = join(kbRoot, ".llm-kb", "wiki");
|
|
212
|
+
await mkdir(wikiDir, { recursive: true });
|
|
213
|
+
const logPath = join(wikiDir, "queries.md");
|
|
214
|
+
const date = new Date(trace.timestamp).toISOString().replace("T", " ").slice(0, 19);
|
|
215
|
+
const durationSec = trace.durationMs ? `${(trace.durationMs / 1e3).toFixed(1)}s` : "?";
|
|
216
|
+
const filesLine = trace.filesRead.length > 0 ? trace.filesRead.map((f) => pathBasename(f)).join(", ") : "_none_";
|
|
217
|
+
let header = "";
|
|
218
|
+
if (!existsSync(logPath)) {
|
|
219
|
+
header = `# Query Log
|
|
220
|
+
|
|
221
|
+
All queries run against this knowledge base.
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
`;
|
|
226
|
+
}
|
|
227
|
+
const entry = [
|
|
228
|
+
`## ${trace.question}`,
|
|
229
|
+
``,
|
|
230
|
+
`- **Date:** ${date}`,
|
|
231
|
+
`- **Model:** ${trace.model ?? "unknown"}`,
|
|
232
|
+
`- **Duration:** ${durationSec}`,
|
|
233
|
+
`- **Files read:** ${filesLine}`,
|
|
234
|
+
trace.filesSkipped.length > 0 ? `- **Files skipped:** ${trace.filesSkipped.join(", ")}` : null,
|
|
235
|
+
``,
|
|
236
|
+
trace.answer ? `### Answer
|
|
237
|
+
|
|
238
|
+
${trace.answer}` : null,
|
|
239
|
+
``,
|
|
240
|
+
`---`,
|
|
241
|
+
``
|
|
242
|
+
].filter((l) => l !== null).join("\n");
|
|
243
|
+
const existing = existsSync(logPath) ? await readFile(logPath, "utf-8") : "";
|
|
244
|
+
await writeFile(logPath, header + entry + existing, "utf-8");
|
|
245
|
+
}
|
|
246
|
+
function extractText(content) {
|
|
247
|
+
if (!content) return "";
|
|
248
|
+
if (typeof content === "string") return content;
|
|
249
|
+
if (Array.isArray(content)) {
|
|
250
|
+
return content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("").trim();
|
|
251
|
+
}
|
|
252
|
+
return "";
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// src/citations.ts
|
|
256
|
+
import { readFile as readFile2, readdir as readdir2 } from "fs/promises";
|
|
257
|
+
import { existsSync as existsSync2 } from "fs";
|
|
258
|
+
import { join as join2 } from "path";
|
|
259
|
+
function parseCitations(agentResponse) {
|
|
260
|
+
const citationsIdx = agentResponse.search(/^CITATIONS:\s*$/im);
|
|
261
|
+
if (citationsIdx < 0) {
|
|
262
|
+
return { answer: agentResponse, citations: [] };
|
|
263
|
+
}
|
|
264
|
+
const answer = agentResponse.slice(0, citationsIdx).trimEnd();
|
|
265
|
+
const citationsBlock = agentResponse.slice(citationsIdx);
|
|
266
|
+
const citations = [];
|
|
267
|
+
const lineRe = /^\s*(?:-\s*)?(?:\[\d+\]\s*)?file:\s*"([^"]+)"\s*,\s*page(s)?:\s*(\[[^\]]+\]|\d+)\s*,\s*quote:\s*"([^"]+)"(.*)/gm;
|
|
268
|
+
let match;
|
|
269
|
+
while ((match = lineRe.exec(citationsBlock)) !== null) {
|
|
270
|
+
const file = match[1];
|
|
271
|
+
const isMultiPage = match[2] === "s";
|
|
272
|
+
const pageStr = match[3].trim();
|
|
273
|
+
const quote = match[4];
|
|
274
|
+
const rest = match[5] || "";
|
|
275
|
+
const citation = { file, page: 0, quote };
|
|
276
|
+
if (isMultiPage) {
|
|
277
|
+
const pageNums = pageStr.replace(/[\[\]]/g, "").split(/\s*,\s*/).map(Number).filter((n) => !isNaN(n));
|
|
278
|
+
citation.page = pageNums[0] || 0;
|
|
279
|
+
const bboxArrayMatch = rest.match(/bbox:\s*\[([^\]]+)\]/);
|
|
280
|
+
if (bboxArrayMatch) {
|
|
281
|
+
const entries = bboxArrayMatch[1].split(/\}\s*,\s*\{/);
|
|
282
|
+
const pageBBoxes = [];
|
|
283
|
+
for (const entry of entries) {
|
|
284
|
+
const clean = entry.replace(/[{}]/g, "");
|
|
285
|
+
const pM = clean.match(/page:\s*(\d+)/);
|
|
286
|
+
const xM = clean.match(/x:\s*([\d.]+)/);
|
|
287
|
+
const yM = clean.match(/y:\s*([\d.]+)/);
|
|
288
|
+
const wM = clean.match(/width:\s*([\d.]+)/);
|
|
289
|
+
const hM = clean.match(/height:\s*([\d.]+)/);
|
|
290
|
+
if (pM && xM && yM && wM && hM) {
|
|
291
|
+
pageBBoxes.push({
|
|
292
|
+
page: parseInt(pM[1]),
|
|
293
|
+
x: parseFloat(xM[1]),
|
|
294
|
+
y: parseFloat(yM[1]),
|
|
295
|
+
width: parseFloat(wM[1]),
|
|
296
|
+
height: parseFloat(hM[1])
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
if (pageBBoxes.length > 0) citation.pages = pageBBoxes;
|
|
301
|
+
}
|
|
302
|
+
} else {
|
|
303
|
+
citation.page = parseInt(pageStr, 10) || 0;
|
|
304
|
+
const bboxMatch = rest.match(/bbox:\s*\{([^}]+)\}/);
|
|
305
|
+
if (bboxMatch) {
|
|
306
|
+
const bboxStr = bboxMatch[1];
|
|
307
|
+
const xM = bboxStr.match(/x:\s*([\d.]+)/);
|
|
308
|
+
const yM = bboxStr.match(/y:\s*([\d.]+)/);
|
|
309
|
+
const wM = bboxStr.match(/width:\s*([\d.]+)/);
|
|
310
|
+
const hM = bboxStr.match(/height:\s*([\d.]+)/);
|
|
311
|
+
if (xM && yM && wM && hM) {
|
|
312
|
+
citation.bbox = {
|
|
313
|
+
x: parseFloat(xM[1]),
|
|
314
|
+
y: parseFloat(yM[1]),
|
|
315
|
+
width: parseFloat(wM[1]),
|
|
316
|
+
height: parseFloat(hM[1])
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
citations.push(citation);
|
|
322
|
+
}
|
|
323
|
+
return { answer, citations };
|
|
324
|
+
}
|
|
325
|
+
var Y_TOLERANCE = 3;
|
|
326
|
+
var X_GAP_COLUMN = 15;
|
|
327
|
+
function buildTextRun(textItems) {
|
|
328
|
+
if (textItems.length === 0) return { text: "", segments: [] };
|
|
329
|
+
const items = textItems.filter((t) => t.text.trim().length > 0);
|
|
330
|
+
if (items.length === 0) return { text: "", segments: [] };
|
|
331
|
+
const sorted = [...items].sort((a, b) => {
|
|
332
|
+
const dy = a.y - b.y;
|
|
333
|
+
if (Math.abs(dy) > Y_TOLERANCE) return dy;
|
|
334
|
+
return a.x - b.x;
|
|
335
|
+
});
|
|
336
|
+
const lines = [];
|
|
337
|
+
let currentLine = [sorted[0]];
|
|
338
|
+
let currentY = sorted[0].y;
|
|
339
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
340
|
+
if (Math.abs(sorted[i].y - currentY) <= Y_TOLERANCE) {
|
|
341
|
+
currentLine.push(sorted[i]);
|
|
342
|
+
} else {
|
|
343
|
+
lines.push(currentLine);
|
|
344
|
+
currentLine = [sorted[i]];
|
|
345
|
+
currentY = sorted[i].y;
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
lines.push(currentLine);
|
|
349
|
+
for (const line of lines) {
|
|
350
|
+
line.sort((a, b) => a.x - b.x);
|
|
351
|
+
}
|
|
352
|
+
let text = "";
|
|
353
|
+
const segments = [];
|
|
354
|
+
for (let li = 0; li < lines.length; li++) {
|
|
355
|
+
const line = lines[li];
|
|
356
|
+
for (let ii = 0; ii < line.length; ii++) {
|
|
357
|
+
const item = line[ii];
|
|
358
|
+
if (ii > 0) {
|
|
359
|
+
const prev = line[ii - 1];
|
|
360
|
+
const gap = item.x - (prev.x + prev.width);
|
|
361
|
+
if (gap > X_GAP_COLUMN) {
|
|
362
|
+
text += " ";
|
|
363
|
+
} else if (gap >= 0) {
|
|
364
|
+
text += " ";
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
const start = text.length;
|
|
368
|
+
text += item.text;
|
|
369
|
+
const end = text.length;
|
|
370
|
+
segments.push({
|
|
371
|
+
start,
|
|
372
|
+
end,
|
|
373
|
+
bbox: { x: item.x, y: item.y, width: item.width, height: item.height }
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
if (li < lines.length - 1) {
|
|
377
|
+
text += "\n";
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
return { text, segments };
|
|
381
|
+
}
|
|
382
|
+
function normalize(s) {
|
|
383
|
+
return s.toLowerCase().replace(/[^\w\s]/g, "").replace(/\s+/g, " ").trim();
|
|
384
|
+
}
|
|
385
|
+
function findSubstring(haystack, needle) {
|
|
386
|
+
const idx = haystack.indexOf(needle);
|
|
387
|
+
if (idx >= 0) return [idx, idx + needle.length];
|
|
388
|
+
return null;
|
|
389
|
+
}
|
|
390
|
+
function findNormalized(haystack, needle) {
|
|
391
|
+
const normHay = normalize(haystack);
|
|
392
|
+
const normNeedle = normalize(needle);
|
|
393
|
+
if (!normNeedle) return null;
|
|
394
|
+
const idx = normHay.indexOf(normNeedle);
|
|
395
|
+
if (idx < 0) return null;
|
|
396
|
+
let normPos = 0;
|
|
397
|
+
let origStart = -1;
|
|
398
|
+
let origEnd = -1;
|
|
399
|
+
for (let i = 0; i < haystack.length && origEnd < 0; i++) {
|
|
400
|
+
const normChar = normalize(haystack[i]);
|
|
401
|
+
if (normChar.length === 0) continue;
|
|
402
|
+
if (/\s/.test(haystack[i])) {
|
|
403
|
+
let j = i;
|
|
404
|
+
while (j < haystack.length && /\s/.test(haystack[j])) j++;
|
|
405
|
+
if (normPos === idx) origStart = i;
|
|
406
|
+
normPos++;
|
|
407
|
+
if (normPos >= idx + normNeedle.length && origEnd < 0) origEnd = j;
|
|
408
|
+
i = j - 1;
|
|
409
|
+
continue;
|
|
410
|
+
}
|
|
411
|
+
if (normPos === idx) origStart = i;
|
|
412
|
+
normPos++;
|
|
413
|
+
if (normPos >= idx + normNeedle.length && origEnd < 0) origEnd = i + 1;
|
|
414
|
+
}
|
|
415
|
+
if (origStart >= 0 && origEnd > origStart) return [origStart, origEnd];
|
|
416
|
+
return null;
|
|
417
|
+
}
|
|
418
|
+
function levenshtein(a, b) {
|
|
419
|
+
const m = a.length;
|
|
420
|
+
const n = b.length;
|
|
421
|
+
if (m === 0) return n;
|
|
422
|
+
if (n === 0) return m;
|
|
423
|
+
let prev = new Array(n + 1);
|
|
424
|
+
let curr = new Array(n + 1);
|
|
425
|
+
for (let j = 0; j <= n; j++) prev[j] = j;
|
|
426
|
+
for (let i = 1; i <= m; i++) {
|
|
427
|
+
curr[0] = i;
|
|
428
|
+
for (let j = 1; j <= n; j++) {
|
|
429
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
430
|
+
curr[j] = Math.min(curr[j - 1] + 1, prev[j] + 1, prev[j - 1] + cost);
|
|
431
|
+
}
|
|
432
|
+
[prev, curr] = [curr, prev];
|
|
433
|
+
}
|
|
434
|
+
return prev[n];
|
|
435
|
+
}
|
|
436
|
+
function findFuzzy(haystack, needle, maxDistRatio = 0.2) {
|
|
437
|
+
const normHay = normalize(haystack);
|
|
438
|
+
const normNeedle = normalize(needle);
|
|
439
|
+
if (!normNeedle || normNeedle.length < 5) return null;
|
|
440
|
+
const baseSize = normNeedle.length;
|
|
441
|
+
const maxDist = Math.ceil(baseSize * maxDistRatio);
|
|
442
|
+
let bestDist = maxDist + 1;
|
|
443
|
+
let bestIdx = -1;
|
|
444
|
+
let bestWinSize = baseSize;
|
|
445
|
+
const minWin = Math.max(5, baseSize - maxDist);
|
|
446
|
+
const maxWin = baseSize + maxDist;
|
|
447
|
+
for (let winSize = minWin; winSize <= maxWin; winSize++) {
|
|
448
|
+
if (winSize > normHay.length) break;
|
|
449
|
+
for (let i = 0; i <= normHay.length - winSize; i++) {
|
|
450
|
+
const window = normHay.substring(i, i + winSize);
|
|
451
|
+
const dist = levenshtein(window, normNeedle);
|
|
452
|
+
if (dist < bestDist) {
|
|
453
|
+
bestDist = dist;
|
|
454
|
+
bestIdx = i;
|
|
455
|
+
bestWinSize = winSize;
|
|
456
|
+
if (dist === 0) break;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
if (bestDist === 0) break;
|
|
460
|
+
}
|
|
461
|
+
if (bestIdx < 0 || bestDist > maxDist) return null;
|
|
462
|
+
let normPos = 0;
|
|
463
|
+
let origStart = 0;
|
|
464
|
+
let origEnd = haystack.length;
|
|
465
|
+
for (let i = 0; i < haystack.length; i++) {
|
|
466
|
+
const ch = haystack[i];
|
|
467
|
+
if (/\s/.test(ch)) {
|
|
468
|
+
let j = i;
|
|
469
|
+
while (j < haystack.length && /\s/.test(haystack[j])) j++;
|
|
470
|
+
if (normPos === bestIdx) origStart = i;
|
|
471
|
+
normPos++;
|
|
472
|
+
if (normPos >= bestIdx + bestWinSize) {
|
|
473
|
+
origEnd = j;
|
|
474
|
+
break;
|
|
475
|
+
}
|
|
476
|
+
i = j - 1;
|
|
477
|
+
continue;
|
|
478
|
+
}
|
|
479
|
+
if (/[^\w\s]/.test(ch)) continue;
|
|
480
|
+
if (normPos === bestIdx) origStart = i;
|
|
481
|
+
normPos++;
|
|
482
|
+
if (normPos >= bestIdx + bestWinSize) {
|
|
483
|
+
origEnd = i + 1;
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
const confidence = 1 - bestDist / baseSize;
|
|
488
|
+
return [origStart, origEnd, confidence];
|
|
489
|
+
}
|
|
490
|
+
function getBoxesForRange(segments, start, end) {
|
|
491
|
+
const boxes = [];
|
|
492
|
+
for (const seg of segments) {
|
|
493
|
+
if (seg.end > start && seg.start < end) {
|
|
494
|
+
boxes.push(seg.bbox);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
return boxes;
|
|
498
|
+
}
|
|
499
|
+
function mergeBoxes(boxes) {
|
|
500
|
+
if (boxes.length === 0) return null;
|
|
501
|
+
let minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity;
|
|
502
|
+
for (const b of boxes) {
|
|
503
|
+
if (b.x < minX) minX = b.x;
|
|
504
|
+
if (b.y < minY) minY = b.y;
|
|
505
|
+
if (b.x + b.width > maxX) maxX = b.x + b.width;
|
|
506
|
+
if (b.y + b.height > maxY) maxY = b.y + b.height;
|
|
507
|
+
}
|
|
508
|
+
return {
|
|
509
|
+
x: Math.round(minX * 100) / 100,
|
|
510
|
+
y: Math.round(minY * 100) / 100,
|
|
511
|
+
width: Math.round((maxX - minX) * 100) / 100,
|
|
512
|
+
height: Math.round((maxY - minY) * 100) / 100
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
function normalizeFilename(name) {
|
|
516
|
+
return name.replace(/\.(md|json|pdf)$/i, "").replace(/^\d+\.\s*/, "").toLowerCase().trim();
|
|
517
|
+
}
|
|
518
|
+
async function resolveJsonPath(file, sourcesDir) {
|
|
519
|
+
const withExt = file.endsWith(".md") ? file : file + ".md";
|
|
520
|
+
const exact = join2(sourcesDir, withExt.replace(/\.md$/, ".json"));
|
|
521
|
+
if (existsSync2(exact)) return exact;
|
|
522
|
+
try {
|
|
523
|
+
const files = await readdir2(sourcesDir);
|
|
524
|
+
const jsonFiles = files.filter((f) => f.endsWith(".json"));
|
|
525
|
+
const needle = normalizeFilename(file);
|
|
526
|
+
for (const jf of jsonFiles) {
|
|
527
|
+
if (normalizeFilename(jf) === needle) return join2(sourcesDir, jf);
|
|
528
|
+
}
|
|
529
|
+
for (const jf of jsonFiles) {
|
|
530
|
+
const candidate = normalizeFilename(jf);
|
|
531
|
+
if (candidate.includes(needle) || needle.includes(candidate)) {
|
|
532
|
+
return join2(sourcesDir, jf);
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
const needleWords = needle.split(/\s+/);
|
|
536
|
+
let bestScore = 0;
|
|
537
|
+
let bestFile = null;
|
|
538
|
+
for (const jf of jsonFiles) {
|
|
539
|
+
const candidateWords = normalizeFilename(jf).split(/\s+/);
|
|
540
|
+
const matches = needleWords.filter((w) => candidateWords.some((c) => c.includes(w) || w.includes(c)));
|
|
541
|
+
const score = matches.length / Math.max(needleWords.length, 1);
|
|
542
|
+
if (score > bestScore && score >= 0.6) {
|
|
543
|
+
bestScore = score;
|
|
544
|
+
bestFile = jf;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
if (bestFile) return join2(sourcesDir, bestFile);
|
|
548
|
+
} catch {
|
|
549
|
+
}
|
|
550
|
+
return null;
|
|
551
|
+
}
|
|
552
|
+
async function matchCitation(citation, sourcesDir, options) {
|
|
553
|
+
const base = {
|
|
554
|
+
...citation,
|
|
555
|
+
matched: false,
|
|
556
|
+
confidence: 0,
|
|
557
|
+
boundingBoxes: [],
|
|
558
|
+
mergedRect: null
|
|
559
|
+
};
|
|
560
|
+
const jsonPath = await resolveJsonPath(citation.file, sourcesDir);
|
|
561
|
+
if (!jsonPath) return base;
|
|
562
|
+
let bboxData;
|
|
563
|
+
try {
|
|
564
|
+
bboxData = JSON.parse(await readFile2(jsonPath, "utf-8"));
|
|
565
|
+
} catch {
|
|
566
|
+
return base;
|
|
567
|
+
}
|
|
568
|
+
const pagesToTry = [];
|
|
569
|
+
if (citation.page && citation.page > 0) {
|
|
570
|
+
const page = bboxData.pages?.find((p) => p.page === citation.page);
|
|
571
|
+
if (page) pagesToTry.push(page);
|
|
572
|
+
const prev = bboxData.pages?.find((p) => p.page === citation.page - 1);
|
|
573
|
+
const next = bboxData.pages?.find((p) => p.page === citation.page + 1);
|
|
574
|
+
if (prev) pagesToTry.push(prev);
|
|
575
|
+
if (next) pagesToTry.push(next);
|
|
576
|
+
} else {
|
|
577
|
+
pagesToTry.push(...bboxData.pages ?? []);
|
|
578
|
+
}
|
|
579
|
+
if (pagesToTry.length === 0) return base;
|
|
580
|
+
let bestMatch = null;
|
|
581
|
+
for (const page of pagesToTry) {
|
|
582
|
+
const textItems = (page.textItems ?? []).map((t) => ({
|
|
583
|
+
text: t.text ?? "",
|
|
584
|
+
x: t.x ?? 0,
|
|
585
|
+
y: t.y ?? 0,
|
|
586
|
+
width: t.width ?? 0,
|
|
587
|
+
height: t.height ?? 0
|
|
588
|
+
}));
|
|
589
|
+
const run = buildTextRun(textItems);
|
|
590
|
+
if (!run.text) continue;
|
|
591
|
+
const exact = findSubstring(run.text, citation.quote);
|
|
592
|
+
if (exact) {
|
|
593
|
+
const boxes = getBoxesForRange(run.segments, exact[0], exact[1]);
|
|
594
|
+
const result = {
|
|
595
|
+
...citation,
|
|
596
|
+
page: page.page,
|
|
597
|
+
matched: true,
|
|
598
|
+
confidence: 1,
|
|
599
|
+
boundingBoxes: boxes,
|
|
600
|
+
mergedRect: mergeBoxes(boxes)
|
|
601
|
+
};
|
|
602
|
+
return result;
|
|
603
|
+
}
|
|
604
|
+
const norm = findNormalized(run.text, citation.quote);
|
|
605
|
+
if (norm) {
|
|
606
|
+
const boxes = getBoxesForRange(run.segments, norm[0], norm[1]);
|
|
607
|
+
const result = {
|
|
608
|
+
...citation,
|
|
609
|
+
page: page.page,
|
|
610
|
+
matched: true,
|
|
611
|
+
confidence: 0.9,
|
|
612
|
+
boundingBoxes: boxes,
|
|
613
|
+
mergedRect: mergeBoxes(boxes)
|
|
614
|
+
};
|
|
615
|
+
if (!bestMatch || result.confidence > bestMatch.confidence) bestMatch = result;
|
|
616
|
+
continue;
|
|
617
|
+
}
|
|
618
|
+
if (options?.skipFuzzy) continue;
|
|
619
|
+
const fuzzy = findFuzzy(run.text, citation.quote);
|
|
620
|
+
if (fuzzy) {
|
|
621
|
+
const [start, end, confidence] = fuzzy;
|
|
622
|
+
const boxes = getBoxesForRange(run.segments, start, end);
|
|
623
|
+
const result = {
|
|
624
|
+
...citation,
|
|
625
|
+
page: page.page,
|
|
626
|
+
matched: confidence >= 0.5,
|
|
627
|
+
confidence,
|
|
628
|
+
boundingBoxes: boxes,
|
|
629
|
+
mergedRect: mergeBoxes(boxes)
|
|
630
|
+
};
|
|
631
|
+
if (!bestMatch || result.confidence > bestMatch.confidence) bestMatch = result;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
return bestMatch ?? base;
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// src/wiki-updater.ts
|
|
638
|
+
import { existsSync as existsSync3 } from "fs";
|
|
639
|
+
import { readFile as readFile3, writeFile as writeFile2, mkdir as mkdir2 } from "fs/promises";
|
|
640
|
+
import { join as join3 } from "path";
|
|
641
|
+
|
|
642
|
+
// src/complete-with-fallback.ts
|
|
643
|
+
function getErrorMessage2(error) {
|
|
644
|
+
if (error instanceof Error) return error.message;
|
|
645
|
+
return String(error ?? "Unknown error");
|
|
646
|
+
}
|
|
647
|
+
function isRetryableProviderError2(error) {
|
|
648
|
+
const message = getErrorMessage2(error).toLowerCase();
|
|
649
|
+
return [
|
|
650
|
+
"401",
|
|
651
|
+
"403",
|
|
652
|
+
"429",
|
|
653
|
+
"quota",
|
|
654
|
+
"credit",
|
|
655
|
+
"rate limit",
|
|
656
|
+
"overloaded",
|
|
657
|
+
"overload",
|
|
658
|
+
"unavailable",
|
|
659
|
+
"timeout",
|
|
660
|
+
"timed out",
|
|
661
|
+
"network",
|
|
662
|
+
"connection",
|
|
663
|
+
"provider",
|
|
664
|
+
"api key",
|
|
665
|
+
"authentication",
|
|
666
|
+
"unauthorized",
|
|
667
|
+
"forbidden"
|
|
668
|
+
].some((term) => message.includes(term));
|
|
669
|
+
}
|
|
670
|
+
async function completeWithFallback(modelId, authStorage, purpose, input) {
|
|
671
|
+
const candidates = await resolveModelCandidates(modelId, authStorage, purpose);
|
|
672
|
+
if (candidates.length === 0) {
|
|
673
|
+
throw new Error(`No usable model found for '${modelId}'. Configure Anthropic, OpenRouter, or OpenAI credentials.`);
|
|
674
|
+
}
|
|
675
|
+
let lastError;
|
|
676
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
677
|
+
const candidate = candidates[i];
|
|
678
|
+
const apiKey = await getApiKeyForProvider(candidate.provider, authStorage);
|
|
679
|
+
if (!apiKey) continue;
|
|
680
|
+
try {
|
|
681
|
+
return await completeSimple(candidate.model, input, { apiKey });
|
|
682
|
+
} catch (error) {
|
|
683
|
+
lastError = error;
|
|
684
|
+
if (i === candidates.length - 1 || !isRetryableProviderError2(error)) throw error;
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
throw lastError instanceof Error ? lastError : new Error(getErrorMessage2(lastError));
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
// src/wiki-updater.ts
|
|
691
|
+
function formatCitationsForWiki(citations) {
|
|
692
|
+
if (citations.length === 0) return "";
|
|
693
|
+
const lines = citations.map(
|
|
694
|
+
(c, i) => ` [${i + 1}] ${c.file}, p.${c.page}: "${c.quote}"`
|
|
695
|
+
);
|
|
696
|
+
return `
|
|
697
|
+
|
|
698
|
+
**Verified citations (preserve page numbers in wiki):**
|
|
699
|
+
${lines.join("\n")}`;
|
|
700
|
+
}
|
|
701
|
+
function buildPrompt(question, answer, sources, date, currentWiki, citations) {
|
|
702
|
+
const citationsSection = citations && citations.length > 0 ? formatCitationsForWiki(citations) : "";
|
|
703
|
+
const rules = `Rules for wiki structure:
|
|
704
|
+
- Use ## for CONCEPTS and TOPICS \u2014 NOT source file names
|
|
705
|
+
Good: "## Electronic Evidence", "## Mob Lynching", "## Burden of Proof"
|
|
706
|
+
Bad: "## Indian Evidence Act.md", "## indian penal code - new.md"
|
|
707
|
+
- Use ### for subtopics within a concept
|
|
708
|
+
- A concept can draw from MULTIPLE source files \u2014 synthesize, don't separate by file
|
|
709
|
+
- If knowledge from this Q&A fits an existing concept, ADD to it \u2014 never duplicate
|
|
710
|
+
- If it's a genuinely new concept, create a new ## section
|
|
711
|
+
- Be concise: bullet points for lists, short prose for explanations
|
|
712
|
+
- ALWAYS include source citations with page numbers inline: (Source: filename, p.X)
|
|
713
|
+
- Every factual claim must have a page-level citation \u2014 this is critical for verification
|
|
714
|
+
- Add cross-references where concepts relate: See also: [[Other Concept]]
|
|
715
|
+
- End each ## section with: *Sources: file1, file2 \xB7 date*
|
|
716
|
+
- Separate ## sections with: ---`;
|
|
717
|
+
if (currentWiki.trim()) {
|
|
718
|
+
return `You are maintaining a concept-organized knowledge wiki.
|
|
719
|
+
|
|
720
|
+
## Current wiki
|
|
721
|
+
${currentWiki}
|
|
722
|
+
|
|
723
|
+
## New Q&A to integrate
|
|
724
|
+
**Question:** ${question}
|
|
725
|
+
**Sources used:** ${sources}
|
|
726
|
+
**Date:** ${date}
|
|
727
|
+
|
|
728
|
+
**Answer:**
|
|
729
|
+
${answer}${citationsSection}
|
|
730
|
+
|
|
731
|
+
---
|
|
732
|
+
|
|
733
|
+
Update the wiki to integrate this new knowledge.
|
|
734
|
+
${rules}
|
|
735
|
+
|
|
736
|
+
Return ONLY the complete updated wiki markdown. No explanation.`;
|
|
737
|
+
}
|
|
738
|
+
return `You are creating a concept-organized knowledge wiki.
|
|
739
|
+
|
|
740
|
+
## First Q&A to add
|
|
741
|
+
**Question:** ${question}
|
|
742
|
+
**Sources used:** ${sources}
|
|
743
|
+
**Date:** ${date}
|
|
744
|
+
|
|
745
|
+
**Answer:**
|
|
746
|
+
${answer}${citationsSection}
|
|
747
|
+
|
|
748
|
+
---
|
|
749
|
+
|
|
750
|
+
Create a clean wiki from this Q&A.
|
|
751
|
+
- Start with: # Knowledge Wiki\\n\\n> Concept-organized knowledge base. Updated after each query.\\n\\n---
|
|
752
|
+
${rules}
|
|
753
|
+
|
|
754
|
+
Return ONLY the wiki markdown. No explanation.`;
|
|
755
|
+
}
|
|
756
|
+
async function updateWiki(kbRoot, trace, authStorage, indexModelId = "claude-haiku-4-5") {
|
|
757
|
+
if (trace.mode !== "query" || !trace.question || !trace.answer) return;
|
|
758
|
+
const wikiDir = join3(kbRoot, ".llm-kb", "wiki");
|
|
759
|
+
await mkdir2(wikiDir, { recursive: true });
|
|
760
|
+
const wikiPath = join3(wikiDir, "wiki.md");
|
|
761
|
+
const currentWiki = existsSync3(wikiPath) ? await readFile3(wikiPath, "utf-8").catch(() => "") : "";
|
|
762
|
+
const sources = trace.filesRead.map((f) => f.split(/[\\/]/).pop() ?? f).filter((f) => f.endsWith(".md") && f !== "index.md" && f !== "wiki.md").join(", ") || "unknown";
|
|
763
|
+
const date = new Date(trace.timestamp).toISOString().slice(0, 10);
|
|
764
|
+
const answer = trace.answerWithoutCitations ?? trace.answer;
|
|
765
|
+
const prompt = buildPrompt(trace.question, answer, sources, date, currentWiki, trace.citations);
|
|
766
|
+
const result = await completeWithFallback(
|
|
767
|
+
indexModelId,
|
|
768
|
+
authStorage,
|
|
769
|
+
"wiki",
|
|
770
|
+
{
|
|
771
|
+
systemPrompt: "You are a precise knowledge librarian. Organize information by CONCEPT, not by source file. Synthesize knowledge from multiple sources into unified topic articles. ALWAYS preserve page-level citations (Source: filename, p.X) for every fact. Return only clean markdown.",
|
|
772
|
+
messages: [{ role: "user", content: prompt, timestamp: Date.now() }]
|
|
773
|
+
}
|
|
774
|
+
);
|
|
775
|
+
const text = result.content.filter((b) => b.type === "text").map((b) => b.text).join("").trim();
|
|
776
|
+
if (text) {
|
|
777
|
+
await writeFile2(wikiPath, text + "\n", "utf-8");
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
// src/query.ts
|
|
782
|
+
import { join as join4, basename } from "path";
|
|
783
|
+
import chalk2 from "chalk";
|
|
784
|
+
|
|
785
|
+
// src/md-stream.ts
|
|
786
|
+
import chalk from "chalk";
|
|
787
|
+
var MarkdownStream = class {
|
|
788
|
+
buffer = "";
|
|
789
|
+
isTTY;
|
|
790
|
+
constructor(isTTY = false) {
|
|
791
|
+
this.isTTY = isTTY;
|
|
792
|
+
}
|
|
793
|
+
/** Feed a text_delta chunk. Returns styled string ready for stdout. */
|
|
794
|
+
push(chunk) {
|
|
795
|
+
if (!this.isTTY) return chunk;
|
|
796
|
+
this.buffer += chunk;
|
|
797
|
+
return this.drain(false);
|
|
798
|
+
}
|
|
799
|
+
/** Flush remaining buffer (call on text_end). */
|
|
800
|
+
end() {
|
|
801
|
+
if (!this.isTTY) return "";
|
|
802
|
+
const out = this.drain(true);
|
|
803
|
+
this.buffer = "";
|
|
804
|
+
return out;
|
|
805
|
+
}
|
|
806
|
+
drain(final) {
|
|
807
|
+
let out = "";
|
|
808
|
+
while (true) {
|
|
809
|
+
const nlIdx = this.buffer.indexOf("\n");
|
|
810
|
+
if (nlIdx === -1) {
|
|
811
|
+
if (final && this.buffer.length > 0) {
|
|
812
|
+
out += this.renderLine(this.buffer);
|
|
813
|
+
this.buffer = "";
|
|
814
|
+
}
|
|
815
|
+
break;
|
|
816
|
+
}
|
|
817
|
+
const line = this.buffer.slice(0, nlIdx);
|
|
818
|
+
this.buffer = this.buffer.slice(nlIdx + 1);
|
|
819
|
+
out += this.renderLine(line) + "\n";
|
|
820
|
+
}
|
|
821
|
+
return out;
|
|
822
|
+
}
|
|
823
|
+
/** Render a single complete line with block + inline styling. */
|
|
824
|
+
renderLine(line) {
|
|
825
|
+
const trimmed = line.trimStart();
|
|
826
|
+
if (/^-{3,}\s*$/.test(trimmed) || /^\*{3,}\s*$/.test(trimmed)) {
|
|
827
|
+
const cols = process.stdout.columns || 80;
|
|
828
|
+
return chalk.dim("\u2500".repeat(Math.min(cols, 60)));
|
|
829
|
+
}
|
|
830
|
+
const headerMatch = trimmed.match(/^(#{1,6})\s+(.*)$/);
|
|
831
|
+
if (headerMatch) {
|
|
832
|
+
const text = this.inline(headerMatch[2]);
|
|
833
|
+
return "\n" + chalk.bold(text);
|
|
834
|
+
}
|
|
835
|
+
const bulletMatch = trimmed.match(/^[-*+]\s+(.*)$/);
|
|
836
|
+
if (bulletMatch) {
|
|
837
|
+
const indent = line.length - trimmed.length;
|
|
838
|
+
return " ".repeat(indent) + chalk.dim("\u2022") + " " + this.inline(bulletMatch[1]);
|
|
839
|
+
}
|
|
840
|
+
const numMatch = trimmed.match(/^(\d+)[.)]\s+(.*)$/);
|
|
841
|
+
if (numMatch) {
|
|
842
|
+
const indent = line.length - trimmed.length;
|
|
843
|
+
return " ".repeat(indent) + chalk.dim(numMatch[1] + ".") + " " + this.inline(numMatch[2]);
|
|
844
|
+
}
|
|
845
|
+
if (/^\|[\s\-:|]+\|$/.test(trimmed)) {
|
|
846
|
+
return chalk.dim(trimmed);
|
|
847
|
+
}
|
|
848
|
+
if (trimmed.startsWith("|") && trimmed.endsWith("|")) {
|
|
849
|
+
return this.inline(line);
|
|
850
|
+
}
|
|
851
|
+
if (trimmed.startsWith(">")) {
|
|
852
|
+
const content = trimmed.replace(/^>+\s*/, "");
|
|
853
|
+
return chalk.dim("\u2502 ") + chalk.italic(this.inline(content));
|
|
854
|
+
}
|
|
855
|
+
return this.inline(line);
|
|
856
|
+
}
|
|
857
|
+
/** Apply inline markdown styling to text. */
|
|
858
|
+
inline(text) {
|
|
859
|
+
text = text.replace(/`([^`]+)`/g, (_, c) => chalk.cyan(c));
|
|
860
|
+
text = text.replace(/\*\*\*(.+?)\*\*\*/g, (_, t) => chalk.bold.italic(t));
|
|
861
|
+
text = text.replace(/\*\*(.+?)\*\*/g, (_, t) => chalk.bold(t));
|
|
862
|
+
text = text.replace(/(?<!\*)\*(.+?)\*(?!\*)/g, (_, t) => chalk.italic(t));
|
|
863
|
+
text = text.replace(/~~(.+?)~~/g, (_, t) => chalk.strikethrough(t));
|
|
864
|
+
text = text.replace(
|
|
865
|
+
/\[([^\]]+)\]\(([^)]+)\)/g,
|
|
866
|
+
(_, label, url) => `${label} ${chalk.dim(`(${url})`)}`
|
|
867
|
+
);
|
|
868
|
+
return text;
|
|
869
|
+
}
|
|
870
|
+
};
|
|
871
|
+
|
|
872
|
+
// src/query.ts
|
|
873
|
+
function extractAnswerText(content) {
|
|
874
|
+
return (content ?? []).filter((b) => b.type === "text").map((b) => b.text ?? "").join("").trim();
|
|
875
|
+
}
|
|
876
|
+
function extractFilesRead(messages) {
|
|
877
|
+
const paths = [];
|
|
878
|
+
for (const msg of messages) {
|
|
879
|
+
if (msg.role !== "assistant") continue;
|
|
880
|
+
for (const block of msg.content ?? []) {
|
|
881
|
+
if (block.type === "toolCall" && block.name === "read") {
|
|
882
|
+
const p = block.arguments?.path ?? "";
|
|
883
|
+
if (p && !paths.includes(p)) paths.push(p);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
return paths;
|
|
888
|
+
}
|
|
889
|
+
function validateQueryResult(session, beforeMessageCount) {
|
|
890
|
+
const messages = Array.isArray(session.state?.messages) ? session.state.messages.slice(beforeMessageCount) : [];
|
|
891
|
+
const assistant = [...messages].reverse().find((m) => m.role === "assistant");
|
|
892
|
+
if (!assistant) return "no assistant response produced";
|
|
893
|
+
const text = extractAnswerText(assistant.content ?? []);
|
|
894
|
+
if (!text.trim()) return "assistant response was empty";
|
|
895
|
+
return void 0;
|
|
896
|
+
}
|
|
897
|
+
function getToolLabel(toolName, args) {
|
|
898
|
+
if (toolName === "read" || toolName === "write" || toolName === "edit") {
|
|
899
|
+
const file = basename(args?.path ?? "");
|
|
900
|
+
if (!file || !/\.[a-z0-9]{1,6}$/i.test(file)) return null;
|
|
901
|
+
const verb = toolName === "read" ? "Reading" : toolName === "write" ? "Writing" : "Editing";
|
|
902
|
+
return `${verb} ${file}`;
|
|
903
|
+
}
|
|
904
|
+
if (toolName === "bash" && args?.command) {
|
|
905
|
+
return `Running bash`;
|
|
906
|
+
}
|
|
907
|
+
return null;
|
|
908
|
+
}
|
|
909
|
+
function buildQueryAgents(sourceFiles, save, wikiContent) {
|
|
910
|
+
const sourceList = sourceFiles.map((f) => ` - ${f}`).join("\n");
|
|
911
|
+
const wikiSection = wikiContent ? `## Knowledge Wiki (use this first)
|
|
912
|
+
|
|
913
|
+
The wiki below contains knowledge already extracted from this knowledge base.
|
|
914
|
+
If the user's question is covered here, answer directly from it \u2014 no need to re-read source files.
|
|
915
|
+
Always cite the original source files mentioned in the wiki.
|
|
916
|
+
|
|
917
|
+
${wikiContent}
|
|
918
|
+
|
|
919
|
+
---
|
|
920
|
+
|
|
921
|
+
` : "";
|
|
922
|
+
const sourceStep = wikiContent ? "If not covered in the wiki above: read the sources" : "How to answer";
|
|
923
|
+
const lines = [
|
|
924
|
+
`# llm-kb Knowledge Base \u2014 Query Mode`,
|
|
925
|
+
``,
|
|
926
|
+
`## MANDATORY: Citation Format`,
|
|
927
|
+
`Every answer MUST end with a CITATIONS block. No exceptions. Not optional. Even for simple or wiki-sourced answers.`,
|
|
928
|
+
`If you do not include a CITATIONS block, the answer is considered INVALID.`,
|
|
929
|
+
``,
|
|
930
|
+
`You MUST place [1], [2], etc. inline in your answer text next to every claim or fact.`,
|
|
931
|
+
`Example: "Revenue grew 12% [1] driven by gaming segment expansion [2]."`,
|
|
932
|
+
`Every factual statement needs an inline citation number. Then ALWAYS end with:`,
|
|
933
|
+
``,
|
|
934
|
+
`CITATIONS:`,
|
|
935
|
+
`[1] file: "document.pdf", page: 3, quote: "exact text from source"`,
|
|
936
|
+
`[2] file: "other.pdf", page: 7, quote: "another exact excerpt"`,
|
|
937
|
+
``,
|
|
938
|
+
`Rules:`,
|
|
939
|
+
`- Use the ORIGINAL PDF filename (e.g. "report.pdf", not the .md version)`,
|
|
940
|
+
`- Quote must be an exact excerpt from the source text`,
|
|
941
|
+
`- Include the correct page number`,
|
|
942
|
+
`- Bounding boxes are optional but improve the experience. If you include them, use this format:`,
|
|
943
|
+
` [1] file: "document.pdf", page: 3, quote: "exact text", bbox: {x: 142, y: 340, width: 234, height: 14}`,
|
|
944
|
+
`- To get bboxes, use a single bash script AFTER writing the full answer:`,
|
|
945
|
+
"```",
|
|
946
|
+
`node -e "`,
|
|
947
|
+
` const fs = require('fs');`,
|
|
948
|
+
` const d = JSON.parse(fs.readFileSync('.llm-kb/wiki/sources/FILE.json','utf8'));`,
|
|
949
|
+
` const p = d.pages.find(p=>p.page===PAGE);`,
|
|
950
|
+
` const items = p.textItems.filter(t=>t.text.includes('KEYWORD'));`,
|
|
951
|
+
` console.log('source:', d.source, 'bbox:', JSON.stringify(items.map(t=>({x:t.x,y:t.y,w:t.width,h:t.height}))));`,
|
|
952
|
+
`"`,
|
|
953
|
+
"```",
|
|
954
|
+
`- If answering from wiki, cite the original source files mentioned in the wiki entries`,
|
|
955
|
+
``,
|
|
956
|
+
wikiSection,
|
|
957
|
+
`## ${sourceStep}`,
|
|
958
|
+
``,
|
|
959
|
+
`1. Read .llm-kb/wiki/index.md to understand all available sources`,
|
|
960
|
+
`2. Select the most relevant source files (usually 2-5) and read them in full`,
|
|
961
|
+
`3. Answer the question with [1], [2] inline references`,
|
|
962
|
+
`4. If you can't find the answer, say so \u2014 don't hallucinate`,
|
|
963
|
+
`5. ALWAYS end with a CITATIONS block (see top of this file)`,
|
|
964
|
+
``,
|
|
965
|
+
`## Available parsed sources`,
|
|
966
|
+
sourceList,
|
|
967
|
+
``,
|
|
968
|
+
`## Non-PDF files (docx, xlsx, pptx)`,
|
|
969
|
+
`Use bash to run Node.js scripts. Libraries are pre-installed via require().`,
|
|
970
|
+
``,
|
|
971
|
+
`### Word (.docx) \u2014 structured XML`,
|
|
972
|
+
`.docx files are ZIP archives containing word/document.xml.`,
|
|
973
|
+
`Read them SELECTIVELY \u2014 extract only what is relevant to the question:`,
|
|
974
|
+
``,
|
|
975
|
+
"```javascript",
|
|
976
|
+
`const AdmZip = require('adm-zip');`,
|
|
977
|
+
`const zip = new AdmZip('file.docx');`,
|
|
978
|
+
`const xml = zip.readAsText('word/document.xml');`,
|
|
979
|
+
`// Parse XML to find specific paragraphs, headings, tables`,
|
|
980
|
+
"```",
|
|
981
|
+
``,
|
|
982
|
+
`Strategy for large .docx files:`,
|
|
983
|
+
`1. First: extract headings/structure to understand the document layout`,
|
|
984
|
+
`2. Then: extract only the sections relevant to the user's question`,
|
|
985
|
+
`NEVER dump the entire document.`,
|
|
986
|
+
``,
|
|
987
|
+
`### Excel (.xlsx) \u2014 use exceljs`,
|
|
988
|
+
`Read specific sheets and ranges, not the whole workbook:`,
|
|
989
|
+
``,
|
|
990
|
+
"```javascript",
|
|
991
|
+
`const ExcelJS = require('exceljs');`,
|
|
992
|
+
`const wb = new ExcelJS.Workbook();`,
|
|
993
|
+
`await wb.xlsx.readFile('file.xlsx');`,
|
|
994
|
+
`const sheet = wb.getWorksheet(1);`,
|
|
995
|
+
`// Read specific rows/columns relevant to the question`,
|
|
996
|
+
"```",
|
|
997
|
+
``,
|
|
998
|
+
`### PowerPoint (.pptx) \u2014 use officeparser`,
|
|
999
|
+
``,
|
|
1000
|
+
"```javascript",
|
|
1001
|
+
`const officeparser = require('officeparser');`,
|
|
1002
|
+
`const text = await officeparser.parseOfficeAsync('file.pptx');`,
|
|
1003
|
+
"```",
|
|
1004
|
+
``,
|
|
1005
|
+
`## Rules`,
|
|
1006
|
+
`- Always cite sources with filename and page number`,
|
|
1007
|
+
`- Read the FULL source file, not just the beginning (for .md sources)`,
|
|
1008
|
+
`- For non-PDF files, extract ONLY relevant sections \u2014 never dump entire files`,
|
|
1009
|
+
`- Prefer primary sources over previous analyses`,
|
|
1010
|
+
``,
|
|
1011
|
+
`## Guidelines`,
|
|
1012
|
+
`A guidelines file may exist at .llm-kb/guidelines.md with learned rules from`,
|
|
1013
|
+
`past evaluations and user preferences. Read it when:`,
|
|
1014
|
+
`- You're unsure about citation accuracy or format`,
|
|
1015
|
+
`- You're about to read source files (guidelines may suggest using wiki instead)`,
|
|
1016
|
+
`- The question touches a topic that may have had issues in past evaluations`
|
|
1017
|
+
];
|
|
1018
|
+
if (save) {
|
|
1019
|
+
lines.push(``, `## Research Mode`, `Save your analysis to .llm-kb/wiki/outputs/ with a descriptive filename.`, `Include the question at the top and all citations.`);
|
|
1020
|
+
}
|
|
1021
|
+
return lines.join("\n");
|
|
1022
|
+
}
|
|
1023
|
+
var WikiUpdateScheduler = class {
|
|
1024
|
+
constructor(everyN, everyMin) {
|
|
1025
|
+
this.everyN = everyN;
|
|
1026
|
+
this.everyMin = everyMin;
|
|
1027
|
+
}
|
|
1028
|
+
everyN;
|
|
1029
|
+
everyMin;
|
|
1030
|
+
stopMsgCount = 0;
|
|
1031
|
+
lastUpdateAt = 0;
|
|
1032
|
+
chain = Promise.resolve();
|
|
1033
|
+
shouldUpdate() {
|
|
1034
|
+
return this.stopMsgCount > 0 && this.stopMsgCount % this.everyN === 0 || this.lastUpdateAt > 0 && Date.now() - this.lastUpdateAt > this.everyMin * 6e4;
|
|
1035
|
+
}
|
|
1036
|
+
enqueue(work) {
|
|
1037
|
+
this.chain = this.chain.then(() => work().catch(() => {
|
|
1038
|
+
}));
|
|
1039
|
+
}
|
|
1040
|
+
onMessageEnd(msg, snap, doUpdate) {
|
|
1041
|
+
if (msg.role !== "assistant" || msg.stopReason !== "stop") return;
|
|
1042
|
+
this.stopMsgCount++;
|
|
1043
|
+
if (this.shouldUpdate()) {
|
|
1044
|
+
this.lastUpdateAt = Date.now();
|
|
1045
|
+
this.enqueue(() => doUpdate(snap().messages));
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
onAgentEnd(msgs, doUpdate) {
|
|
1049
|
+
this.lastUpdateAt = Date.now();
|
|
1050
|
+
this.enqueue(() => doUpdate(msgs));
|
|
1051
|
+
}
|
|
1052
|
+
flush() {
|
|
1053
|
+
return this.chain;
|
|
1054
|
+
}
|
|
1055
|
+
};
|
|
1056
|
+
function subscribeDisplay(session, opts) {
|
|
1057
|
+
const ui = opts.tuiDisplay;
|
|
1058
|
+
const dim = (s) => process.stdout.isTTY ? chalk2.dim(s) : s;
|
|
1059
|
+
const thinLine = () => dim("\u2500".repeat(process.stdout.columns || 80));
|
|
1060
|
+
let phase = "idle";
|
|
1061
|
+
let filesReadCount = 0;
|
|
1062
|
+
let shownToolCalls = /* @__PURE__ */ new Set();
|
|
1063
|
+
let startTime = Date.now();
|
|
1064
|
+
let md = new MarkdownStream(process.stdout.isTTY ?? false);
|
|
1065
|
+
let lastQuestion = "";
|
|
1066
|
+
const scheduler = new WikiUpdateScheduler(5, 3);
|
|
1067
|
+
const sourcesDir = join4(opts.folder, ".llm-kb", "wiki", "sources");
|
|
1068
|
+
const verifyCitations = async (citations) => {
|
|
1069
|
+
if (citations.length === 0) return citations;
|
|
1070
|
+
try {
|
|
1071
|
+
const verified = await Promise.all(
|
|
1072
|
+
citations.map((c) => matchCitation(c, sourcesDir))
|
|
1073
|
+
);
|
|
1074
|
+
return verified.map((m) => {
|
|
1075
|
+
if (m.mergedRect) {
|
|
1076
|
+
return { file: m.file, page: m.page, quote: m.quote, bbox: m.mergedRect, pages: m.pages };
|
|
1077
|
+
}
|
|
1078
|
+
return { file: m.file, page: m.page, quote: m.quote, bbox: m.bbox, pages: m.pages };
|
|
1079
|
+
});
|
|
1080
|
+
} catch {
|
|
1081
|
+
return citations;
|
|
1082
|
+
}
|
|
1083
|
+
};
|
|
1084
|
+
const buildTrace2 = (messages) => {
|
|
1085
|
+
const last = [...messages].reverse().find((m) => m.role === "assistant" && m.stopReason === "stop");
|
|
1086
|
+
if (!last) return null;
|
|
1087
|
+
const filesRead = extractFilesRead(messages);
|
|
1088
|
+
const fullAnswer = extractAnswerText(last.content);
|
|
1089
|
+
const parsed = parseCitations(fullAnswer);
|
|
1090
|
+
return {
|
|
1091
|
+
sessionId: session.sessionId,
|
|
1092
|
+
sessionFile: session.sessionFile ?? "",
|
|
1093
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1094
|
+
mode: "query",
|
|
1095
|
+
question: lastQuestion,
|
|
1096
|
+
answer: fullAnswer,
|
|
1097
|
+
answerWithoutCitations: parsed.answer,
|
|
1098
|
+
citations: parsed.citations.length > 0 ? parsed.citations : void 0,
|
|
1099
|
+
filesRead,
|
|
1100
|
+
filesAvailable: opts.mdFiles,
|
|
1101
|
+
filesSkipped: opts.mdFiles.filter((f) => !filesRead.some((r) => r.endsWith(f))),
|
|
1102
|
+
model: last.model
|
|
1103
|
+
};
|
|
1104
|
+
};
|
|
1105
|
+
const doUpdate = async (messages) => {
|
|
1106
|
+
const trace = buildTrace2(messages);
|
|
1107
|
+
if (!trace) return;
|
|
1108
|
+
if (trace.citations && trace.citations.length > 0) {
|
|
1109
|
+
trace.citations = await verifyCitations(trace.citations);
|
|
1110
|
+
}
|
|
1111
|
+
await saveTrace(opts.folder, trace);
|
|
1112
|
+
await appendToQueryLog(opts.folder, trace);
|
|
1113
|
+
await updateWiki(opts.folder, trace, opts.authStorage);
|
|
1114
|
+
};
|
|
1115
|
+
session.subscribe((event) => {
|
|
1116
|
+
if (event.type === "agent_start") {
|
|
1117
|
+
phase = "idle";
|
|
1118
|
+
filesReadCount = 0;
|
|
1119
|
+
shownToolCalls = /* @__PURE__ */ new Set();
|
|
1120
|
+
startTime = Date.now();
|
|
1121
|
+
md = new MarkdownStream(process.stdout.isTTY ?? false);
|
|
1122
|
+
const modelName = opts.modelId ?? "claude-sonnet-4-6";
|
|
1123
|
+
if (ui) {
|
|
1124
|
+
ui.disableInput();
|
|
1125
|
+
ui.beginResponse(modelName);
|
|
1126
|
+
} else process.stdout.write(dim(`\u27E1 ${modelName}`) + "\n");
|
|
1127
|
+
}
|
|
1128
|
+
if (event.type === "message_update") {
|
|
1129
|
+
const ae = event.assistantMessageEvent;
|
|
1130
|
+
if (ae.type === "thinking_start") {
|
|
1131
|
+
if (!ui) process.stdout.write(dim("\n\u25B8 Thinking\n"));
|
|
1132
|
+
phase = "thinking";
|
|
1133
|
+
}
|
|
1134
|
+
if (ae.type === "thinking_delta") {
|
|
1135
|
+
if (ui) ui.appendThinking(ae.delta);
|
|
1136
|
+
else process.stdout.write(dim(` ${ae.delta}`));
|
|
1137
|
+
}
|
|
1138
|
+
if (ae.type === "thinking_end") {
|
|
1139
|
+
if (ui) ui.endThinking();
|
|
1140
|
+
else process.stdout.write("\n");
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
if (event.type === "message_update") {
|
|
1144
|
+
const ae = event.assistantMessageEvent;
|
|
1145
|
+
if (ae.type === "toolcall_end" && ae.toolCall) {
|
|
1146
|
+
const label = getToolLabel(ae.toolCall.name, ae.toolCall.arguments);
|
|
1147
|
+
if (label) {
|
|
1148
|
+
if (!ui && phase !== "tools") process.stdout.write("\n");
|
|
1149
|
+
phase = "tools";
|
|
1150
|
+
if (ui) {
|
|
1151
|
+
ui.addToolCall(ae.toolCall.id, label, ae.toolCall.name);
|
|
1152
|
+
if (ae.toolCall.name === "bash" && ae.toolCall.arguments?.command) {
|
|
1153
|
+
ui.addCodeBlock(ae.toolCall.arguments.command);
|
|
1154
|
+
}
|
|
1155
|
+
} else {
|
|
1156
|
+
process.stdout.write(dim(` \u25B8 ${label}`) + "\n");
|
|
1157
|
+
if (ae.toolCall.name === "bash" && ae.toolCall.arguments?.command) {
|
|
1158
|
+
const code = ae.toolCall.arguments.command;
|
|
1159
|
+
process.stdout.write(dim(code.split("\n").map((l) => ` ${l}`).join("\n")) + "\n");
|
|
1160
|
+
}
|
|
1161
|
+
shownToolCalls.add(ae.toolCall.id);
|
|
1162
|
+
if (ae.toolCall.name === "read") filesReadCount++;
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
if (event.type === "tool_execution_start") {
|
|
1168
|
+
const { toolCallId, toolName, args } = event;
|
|
1169
|
+
if (ui) {
|
|
1170
|
+
const label = getToolLabel(toolName, args);
|
|
1171
|
+
if (label) ui.addToolCall(toolCallId, label, toolName);
|
|
1172
|
+
} else if (!shownToolCalls.has(toolCallId)) {
|
|
1173
|
+
const label = getToolLabel(toolName, args);
|
|
1174
|
+
if (label) {
|
|
1175
|
+
if (phase !== "tools") process.stdout.write("\n");
|
|
1176
|
+
phase = "tools";
|
|
1177
|
+
process.stdout.write(dim(` \u25B8 ${label}`) + "\n");
|
|
1178
|
+
shownToolCalls.add(toolCallId);
|
|
1179
|
+
if (toolName === "read") filesReadCount++;
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
if (event.type === "tool_execution_end") {
|
|
1184
|
+
const { toolCallId, isError } = event;
|
|
1185
|
+
if (ui) ui.addToolResult(toolCallId, isError);
|
|
1186
|
+
}
|
|
1187
|
+
if (event.type === "message_update") {
|
|
1188
|
+
const ae = event.assistantMessageEvent;
|
|
1189
|
+
if (ae.type === "text_start" && phase !== "answer") {
|
|
1190
|
+
if (ui) ui.beginAnswer();
|
|
1191
|
+
else if (phase === "thinking" || phase === "tools") {
|
|
1192
|
+
process.stdout.write(`
|
|
1193
|
+
${thinLine()}
|
|
1194
|
+
|
|
1195
|
+
`);
|
|
1196
|
+
}
|
|
1197
|
+
phase = "answer";
|
|
1198
|
+
}
|
|
1199
|
+
if (ae.type === "text_delta") {
|
|
1200
|
+
if (ui) ui.appendAnswer(ae.delta);
|
|
1201
|
+
else process.stdout.write(md.push(ae.delta));
|
|
1202
|
+
}
|
|
1203
|
+
if (ae.type === "text_end" && !ui) process.stdout.write(md.end());
|
|
1204
|
+
}
|
|
1205
|
+
if (event.type === "agent_end") {
|
|
1206
|
+
const trace = buildTrace2(event.messages);
|
|
1207
|
+
const rawCitations = trace?.citations ?? [];
|
|
1208
|
+
verifyCitations(rawCitations).then((citations) => {
|
|
1209
|
+
if (trace && citations.length > 0) trace.citations = citations;
|
|
1210
|
+
if (ui) {
|
|
1211
|
+
ui.showCompletion(citations);
|
|
1212
|
+
ui.enableInput();
|
|
1213
|
+
} else {
|
|
1214
|
+
if (citations.length > 0) {
|
|
1215
|
+
process.stdout.write(`
|
|
1216
|
+
${dim("\u2500\u2500 Citations " + "\u2500".repeat(Math.max(0, (process.stdout.columns || 80) - 14)))}
|
|
1217
|
+
`);
|
|
1218
|
+
for (let i = 0; i < citations.length; i++) {
|
|
1219
|
+
const c = citations[i];
|
|
1220
|
+
const pageStr = c.pages && c.pages.length > 0 ? `p.${c.pages.map((p) => p.page).join("-")}` : `p.${c.page}`;
|
|
1221
|
+
const hasBbox = c.bbox || c.pages && c.pages.length > 0;
|
|
1222
|
+
let bboxDetail;
|
|
1223
|
+
if (c.pages && c.pages.length > 0) {
|
|
1224
|
+
bboxDetail = `\u2705 bbox (${c.pages.length} pages)`;
|
|
1225
|
+
} else if (c.bbox) {
|
|
1226
|
+
bboxDetail = `\u2705 bbox (${c.bbox.x},${c.bbox.y} \u2192 ${Math.round(c.bbox.x + c.bbox.width)},${Math.round(c.bbox.y + c.bbox.height)})`;
|
|
1227
|
+
} else {
|
|
1228
|
+
bboxDetail = `\u26A0\uFE0F no bbox`;
|
|
1229
|
+
}
|
|
1230
|
+
const quote = c.quote.length > 60 ? c.quote.slice(0, 57) + "..." : c.quote;
|
|
1231
|
+
process.stdout.write(`
|
|
1232
|
+
${chalk2.bold(`[${i + 1}]`)} \u{1F4C4} ${c.file}, ${pageStr}
|
|
1233
|
+
`);
|
|
1234
|
+
process.stdout.write(dim(` "${quote}"`) + "\n");
|
|
1235
|
+
process.stdout.write(` ${bboxDetail}
|
|
1236
|
+
`);
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
1240
|
+
const source = filesReadCount > 0 ? `${filesReadCount} file${filesReadCount !== 1 ? "s" : ""} read` : "wiki";
|
|
1241
|
+
const citCount = citations.length > 0 ? ` \xB7 ${citations.length} citation${citations.length !== 1 ? "s" : ""}` : "";
|
|
1242
|
+
const stats = `${elapsed}s \xB7 ${source}${citCount}`;
|
|
1243
|
+
const cols = process.stdout.columns || 80;
|
|
1244
|
+
const pad = Math.max(0, cols - stats.length - 4);
|
|
1245
|
+
process.stdout.write(`
|
|
1246
|
+
${dim("\u2500\u2500 " + stats + " " + "\u2500".repeat(pad))}
|
|
1247
|
+
`);
|
|
1248
|
+
}
|
|
1249
|
+
});
|
|
1250
|
+
scheduler.onAgentEnd(event.messages, doUpdate);
|
|
1251
|
+
}
|
|
1252
|
+
if (event.type === "message_end") {
|
|
1253
|
+
scheduler.onMessageEnd(event.message, () => ({ messages: session.state.messages }), doUpdate);
|
|
1254
|
+
}
|
|
1255
|
+
});
|
|
1256
|
+
return {
|
|
1257
|
+
setQuestion(q) {
|
|
1258
|
+
lastQuestion = q;
|
|
1259
|
+
},
|
|
1260
|
+
flush() {
|
|
1261
|
+
return scheduler.flush();
|
|
1262
|
+
}
|
|
1263
|
+
};
|
|
1264
|
+
}
|
|
1265
|
+
async function createChat(folder, options) {
|
|
1266
|
+
const sourcesDir = join4(folder, ".llm-kb", "wiki", "sources");
|
|
1267
|
+
const files = await readdir3(sourcesDir);
|
|
1268
|
+
const mdFiles = files.filter((f) => f.endsWith(".md"));
|
|
1269
|
+
if (mdFiles.length === 0) throw new Error("No sources found. Run 'llm-kb run' first.");
|
|
1270
|
+
if (options.save) await mkdir3(join4(folder, ".llm-kb", "wiki", "outputs"), { recursive: true });
|
|
1271
|
+
process.env.NODE_PATH = getNodeModulesPath();
|
|
1272
|
+
const wikiPath = join4(folder, ".llm-kb", "wiki", "wiki.md");
|
|
1273
|
+
const save = !!options.save;
|
|
1274
|
+
const loader = new DefaultResourceLoader({
|
|
1275
|
+
cwd: folder,
|
|
1276
|
+
agentsFilesOverride: (current) => {
|
|
1277
|
+
const currentFiles = readdirSync(sourcesDir).filter((f) => f.endsWith(".md"));
|
|
1278
|
+
const wiki = existsSync4(wikiPath) ? readFileSync(wikiPath, "utf-8") : "";
|
|
1279
|
+
const content = buildQueryAgents(currentFiles, save, wiki);
|
|
1280
|
+
return {
|
|
1281
|
+
agentsFiles: [...current.agentsFiles, { path: ".llm-kb/AGENTS.md", content }]
|
|
1282
|
+
};
|
|
1283
|
+
}
|
|
1284
|
+
});
|
|
1285
|
+
await loader.reload();
|
|
1286
|
+
const tools = [
|
|
1287
|
+
createReadTool(folder),
|
|
1288
|
+
createBashTool(folder),
|
|
1289
|
+
createWriteTool(folder)
|
|
1290
|
+
];
|
|
1291
|
+
const candidates = options.modelId ? await resolveModelCandidates(options.modelId, options.authStorage, "query") : [];
|
|
1292
|
+
if (options.modelId && candidates.length === 0) {
|
|
1293
|
+
throw new Error(`No usable model found for '${options.modelId}'. Configure Anthropic, OpenRouter, or OpenAI credentials.`);
|
|
1294
|
+
}
|
|
1295
|
+
const session = options.modelId ? await createRetryingSession({
|
|
1296
|
+
candidates,
|
|
1297
|
+
validatePromptResult: validateQueryResult,
|
|
1298
|
+
createSession: async (candidate) => {
|
|
1299
|
+
const { session: session2 } = await createAgentSession({
|
|
1300
|
+
cwd: folder,
|
|
1301
|
+
resourceLoader: loader,
|
|
1302
|
+
tools,
|
|
1303
|
+
sessionManager: options.save ? await createKBSession(folder) : await continueKBSession(folder),
|
|
1304
|
+
settingsManager: SettingsManager.inMemory({ compaction: { enabled: false } }),
|
|
1305
|
+
thinkingLevel: "low",
|
|
1306
|
+
...options.authStorage ? { authStorage: options.authStorage } : {},
|
|
1307
|
+
model: candidate.model
|
|
1308
|
+
});
|
|
1309
|
+
return session2;
|
|
1310
|
+
}
|
|
1311
|
+
}) : (await createAgentSession({
|
|
1312
|
+
cwd: folder,
|
|
1313
|
+
resourceLoader: loader,
|
|
1314
|
+
tools,
|
|
1315
|
+
sessionManager: options.save ? await createKBSession(folder) : await continueKBSession(folder),
|
|
1316
|
+
settingsManager: SettingsManager.inMemory({ compaction: { enabled: false } }),
|
|
1317
|
+
thinkingLevel: "low",
|
|
1318
|
+
...options.authStorage ? { authStorage: options.authStorage } : {}
|
|
1319
|
+
})).session;
|
|
1320
|
+
const display = subscribeDisplay(session, {
|
|
1321
|
+
modelId: options.modelId,
|
|
1322
|
+
authStorage: options.authStorage,
|
|
1323
|
+
folder,
|
|
1324
|
+
mdFiles,
|
|
1325
|
+
tuiDisplay: options.tuiDisplay
|
|
1326
|
+
});
|
|
1327
|
+
async function reloadSources() {
|
|
1328
|
+
await loader.reload();
|
|
1329
|
+
await session.reload();
|
|
1330
|
+
}
|
|
1331
|
+
return { session, display, reloadSources };
|
|
1332
|
+
}
|
|
1333
|
+
async function query(folder, question, options) {
|
|
1334
|
+
const { session, display } = await createChat(folder, options);
|
|
1335
|
+
session.setSessionName(`query: ${question}`);
|
|
1336
|
+
display.setQuestion(question);
|
|
1337
|
+
await session.prompt(question);
|
|
1338
|
+
await display.flush();
|
|
1339
|
+
session.dispose();
|
|
1340
|
+
if (options.save) {
|
|
1341
|
+
const sourcesDir = join4(folder, ".llm-kb", "wiki", "sources");
|
|
1342
|
+
const { buildIndex } = await import("./indexer-K37QM2HP.js");
|
|
1343
|
+
await buildIndex(folder, sourcesDir, void 0, options.authStorage);
|
|
1344
|
+
}
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
export {
|
|
1348
|
+
buildTrace,
|
|
1349
|
+
saveTrace,
|
|
1350
|
+
appendToQueryLog,
|
|
1351
|
+
completeWithFallback,
|
|
1352
|
+
updateWiki,
|
|
1353
|
+
parseCitations,
|
|
1354
|
+
createChat,
|
|
1355
|
+
query
|
|
1356
|
+
};
|