agentel 0.2.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +161 -63
- package/agentlog-spec.md +42 -35
- package/bin/agentlog-recall.js +2 -0
- package/bin/agentlog.js +12 -0
- package/docs/code-reference.md +120 -34
- package/docs/history-source-handling.md +236 -81
- package/docs/release.md +8 -8
- package/package.json +5 -4
- package/src/archive.js +279 -20
- package/src/cli.js +3457 -511
- package/src/config.js +42 -1
- package/src/doctor.js +167 -10
- package/src/importers/gemini.js +369 -7
- package/src/importers.js +1893 -133
- package/src/mcp.js +4 -1
- package/src/parser-versions.js +37 -22
- package/src/paths.js +4 -2
- package/src/redaction.js +140 -17
- package/src/search.js +671 -52
- package/src/supervisor.js +206 -57
- package/src/sync.js +459 -12
package/src/search.js
CHANGED
|
@@ -10,10 +10,33 @@ const { loadConfig } = require("./config");
|
|
|
10
10
|
const { paths, ensureDir, readJson, writeJson } = require("./paths");
|
|
11
11
|
const { canonicalRepo } = require("./repo");
|
|
12
12
|
|
|
13
|
+
const INDEX_VERSION = 3;
|
|
14
|
+
const INDEX_STALE_CHECK_TTL_MS = 5000;
|
|
15
|
+
const SQLITE_QUERY_TIMEOUT_MS = 5000;
|
|
16
|
+
const SQLITE_BUILD_BATCH_SIZE = 100;
|
|
17
|
+
const RIPGREP_SEARCH_TIMEOUT_MS = 8000;
|
|
18
|
+
const RIPGREP_BATCH_FILE_COUNT = 200;
|
|
19
|
+
const MARKDOWN_MATCHES_PER_FILE = 3;
|
|
20
|
+
const _indexCache = {
|
|
21
|
+
path: "",
|
|
22
|
+
mtimeMs: 0,
|
|
23
|
+
size: 0,
|
|
24
|
+
checkedAtMs: 0,
|
|
25
|
+
index: null
|
|
26
|
+
};
|
|
27
|
+
const _ftsCache = {
|
|
28
|
+
path: "",
|
|
29
|
+
mtimeMs: 0,
|
|
30
|
+
size: 0,
|
|
31
|
+
checkedAtMs: 0,
|
|
32
|
+
available: false
|
|
33
|
+
};
|
|
34
|
+
|
|
13
35
|
function buildIndex(env = process.env) {
|
|
14
36
|
const sessions = listSessions(env);
|
|
15
37
|
const docs = [];
|
|
16
|
-
const
|
|
38
|
+
const postings = Object.create(null);
|
|
39
|
+
const df = Object.create(null);
|
|
17
40
|
let totalLength = 0;
|
|
18
41
|
|
|
19
42
|
for (const session of sessions) {
|
|
@@ -28,16 +51,20 @@ function buildIndex(env = process.env) {
|
|
|
28
51
|
for (const chunk of chunkText(indexText)) {
|
|
29
52
|
const tokens = tokenize(chunk);
|
|
30
53
|
if (!tokens.length) continue;
|
|
31
|
-
const tf =
|
|
54
|
+
const tf = Object.create(null);
|
|
32
55
|
for (const token of tokens) tf[token] = (tf[token] || 0) + 1;
|
|
33
|
-
|
|
56
|
+
const docIndex = docs.length;
|
|
57
|
+
for (const token of new Set(tokens)) {
|
|
58
|
+
df[token] = (df[token] || 0) + 1;
|
|
59
|
+
if (!postings[token]) postings[token] = [];
|
|
60
|
+
postings[token].push([docIndex, tf[token]]);
|
|
61
|
+
}
|
|
34
62
|
totalLength += tokens.length;
|
|
35
63
|
docs.push({
|
|
36
64
|
...sourceDoc,
|
|
37
65
|
id: sourceDoc.id || `${session.sessionId}:${sourceDoc.messageIndex ?? docs.length}:${docs.length}`,
|
|
38
66
|
text: chunk,
|
|
39
67
|
matchedText: chunk,
|
|
40
|
-
tf,
|
|
41
68
|
length: tokens.length
|
|
42
69
|
});
|
|
43
70
|
}
|
|
@@ -45,19 +72,360 @@ function buildIndex(env = process.env) {
|
|
|
45
72
|
}
|
|
46
73
|
|
|
47
74
|
const index = {
|
|
48
|
-
version:
|
|
75
|
+
version: INDEX_VERSION,
|
|
49
76
|
builtAt: new Date().toISOString(),
|
|
50
77
|
docCount: docs.length,
|
|
51
78
|
avgDocLength: docs.length ? totalLength / docs.length : 0,
|
|
52
79
|
df,
|
|
80
|
+
postings,
|
|
53
81
|
docs
|
|
54
82
|
};
|
|
55
83
|
const indexPath = paths(env).index;
|
|
56
84
|
ensureDir(path.dirname(indexPath));
|
|
57
|
-
writeJson(indexPath, index);
|
|
85
|
+
writeJson(indexPath, index, { pretty: false });
|
|
86
|
+
buildFtsIndex(index, env);
|
|
87
|
+
rememberIndexCache(indexPath, index);
|
|
58
88
|
return index;
|
|
59
89
|
}
|
|
60
90
|
|
|
91
|
+
function buildIndexSummary(env = process.env) {
|
|
92
|
+
const summary = buildFtsIndexSummary(env);
|
|
93
|
+
const indexPath = paths(env).index;
|
|
94
|
+
ensureDir(path.dirname(indexPath));
|
|
95
|
+
writeJson(indexPath, summary, { pretty: false });
|
|
96
|
+
rememberIndexCache(indexPath, summary);
|
|
97
|
+
return summary;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function summarizeIndex(index) {
|
|
101
|
+
if (!index) return null;
|
|
102
|
+
return {
|
|
103
|
+
version: index.version,
|
|
104
|
+
builtAt: index.builtAt,
|
|
105
|
+
docCount: index.docCount,
|
|
106
|
+
avgDocLength: index.avgDocLength,
|
|
107
|
+
summaryOnly: true
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function readIndexSummary(indexPath) {
|
|
112
|
+
let fd;
|
|
113
|
+
try {
|
|
114
|
+
fd = fs.openSync(indexPath, "r");
|
|
115
|
+
const buffer = Buffer.alloc(8192);
|
|
116
|
+
const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
|
|
117
|
+
const header = buffer.toString("utf8", 0, bytesRead);
|
|
118
|
+
const summary = {
|
|
119
|
+
version: readJsonHeaderNumber(header, "version"),
|
|
120
|
+
builtAt: readJsonHeaderString(header, "builtAt"),
|
|
121
|
+
docCount: readJsonHeaderNumber(header, "docCount"),
|
|
122
|
+
avgDocLength: readJsonHeaderNumber(header, "avgDocLength"),
|
|
123
|
+
summaryOnly: true
|
|
124
|
+
};
|
|
125
|
+
return summary.version == null && summary.docCount == null ? null : summary;
|
|
126
|
+
} catch (error) {
|
|
127
|
+
if (error.code === "ENOENT") return null;
|
|
128
|
+
throw error;
|
|
129
|
+
} finally {
|
|
130
|
+
if (fd != null) fs.closeSync(fd);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function readJsonHeaderNumber(header, key) {
|
|
135
|
+
const match = header.match(new RegExp(`"${escapeRegExp(key)}"\\s*:\\s*(-?\\d+(?:\\.\\d+)?)`));
|
|
136
|
+
return match ? Number(match[1]) : undefined;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function readJsonHeaderString(header, key) {
|
|
140
|
+
const match = header.match(new RegExp(`"${escapeRegExp(key)}"\\s*:\\s*"([^"]*)"`));
|
|
141
|
+
return match ? match[1] : undefined;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function escapeRegExp(value) {
|
|
145
|
+
return String(value).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function buildFtsIndex(index, env = process.env) {
|
|
149
|
+
const ftsPath = paths(env).ftsIndex;
|
|
150
|
+
const tmpPath = `${ftsPath}.${process.pid}.tmp`;
|
|
151
|
+
try {
|
|
152
|
+
ensureDir(path.dirname(ftsPath));
|
|
153
|
+
for (const file of [tmpPath, `${tmpPath}-journal`, `${tmpPath}-wal`, `${tmpPath}-shm`]) {
|
|
154
|
+
try {
|
|
155
|
+
fs.rmSync(file, { force: true });
|
|
156
|
+
} catch {
|
|
157
|
+
// Best effort cleanup before rebuilding the sidecar index.
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
runSqliteScript(tmpPath, [
|
|
161
|
+
"PRAGMA journal_mode=OFF;",
|
|
162
|
+
"PRAGMA synchronous=OFF;",
|
|
163
|
+
"CREATE TABLE meta(key TEXT PRIMARY KEY, value TEXT NOT NULL);",
|
|
164
|
+
"CREATE TABLE docs(",
|
|
165
|
+
" rowid INTEGER PRIMARY KEY,",
|
|
166
|
+
" doc_id TEXT,",
|
|
167
|
+
" session_id TEXT,",
|
|
168
|
+
" provider TEXT,",
|
|
169
|
+
" source_type TEXT,",
|
|
170
|
+
" repo_canonical TEXT,",
|
|
171
|
+
" repo_display TEXT,",
|
|
172
|
+
" scope_canonical TEXT,",
|
|
173
|
+
" cwd TEXT,",
|
|
174
|
+
" title TEXT,",
|
|
175
|
+
" started_at TEXT,",
|
|
176
|
+
" occurred_at TEXT,",
|
|
177
|
+
" role TEXT,",
|
|
178
|
+
" event_id TEXT,",
|
|
179
|
+
" event_kind TEXT,",
|
|
180
|
+
" message_index INTEGER,",
|
|
181
|
+
" path TEXT,",
|
|
182
|
+
" matched_text TEXT",
|
|
183
|
+
");",
|
|
184
|
+
"CREATE VIRTUAL TABLE docs_fts USING fts5(text, tokenize='unicode61', prefix='2 3 4');",
|
|
185
|
+
`INSERT INTO meta(key, value) VALUES ('version', ${sqliteString(String(INDEX_VERSION))});`,
|
|
186
|
+
`INSERT INTO meta(key, value) VALUES ('builtAt', ${sqliteString(index.builtAt || "")});`,
|
|
187
|
+
`INSERT INTO meta(key, value) VALUES ('docCount', ${sqliteString(String(index.docCount || 0))});`
|
|
188
|
+
].join("\n"));
|
|
189
|
+
|
|
190
|
+
insertFtsDocs(tmpPath, index.docs || [], 1);
|
|
191
|
+
runSqliteScript(tmpPath, "INSERT INTO docs_fts(docs_fts) VALUES('optimize');");
|
|
192
|
+
fs.renameSync(tmpPath, ftsPath);
|
|
193
|
+
rememberFtsCache(ftsPath, true);
|
|
194
|
+
return true;
|
|
195
|
+
} catch {
|
|
196
|
+
try {
|
|
197
|
+
fs.rmSync(tmpPath, { force: true });
|
|
198
|
+
} catch {
|
|
199
|
+
// Ignore optional FTS cleanup failure.
|
|
200
|
+
}
|
|
201
|
+
try {
|
|
202
|
+
fs.rmSync(ftsPath, { force: true });
|
|
203
|
+
} catch {
|
|
204
|
+
// Ignore optional FTS cleanup failure.
|
|
205
|
+
}
|
|
206
|
+
rememberFtsCache(ftsPath, false);
|
|
207
|
+
return false;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function buildFtsIndexSummary(env = process.env) {
|
|
212
|
+
const ftsPath = paths(env).ftsIndex;
|
|
213
|
+
const tmpPath = `${ftsPath}.${process.pid}.tmp`;
|
|
214
|
+
const builtAt = new Date().toISOString();
|
|
215
|
+
let docCount = 0;
|
|
216
|
+
let totalLength = 0;
|
|
217
|
+
try {
|
|
218
|
+
ensureDir(path.dirname(ftsPath));
|
|
219
|
+
for (const file of [tmpPath, `${tmpPath}-journal`, `${tmpPath}-wal`, `${tmpPath}-shm`]) {
|
|
220
|
+
try {
|
|
221
|
+
fs.rmSync(file, { force: true });
|
|
222
|
+
} catch {
|
|
223
|
+
// Best effort cleanup before rebuilding the sidecar index.
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
runSqliteScript(tmpPath, [
|
|
227
|
+
"PRAGMA journal_mode=OFF;",
|
|
228
|
+
"PRAGMA synchronous=OFF;",
|
|
229
|
+
"CREATE TABLE meta(key TEXT PRIMARY KEY, value TEXT NOT NULL);",
|
|
230
|
+
"CREATE TABLE docs(",
|
|
231
|
+
" rowid INTEGER PRIMARY KEY,",
|
|
232
|
+
" doc_id TEXT,",
|
|
233
|
+
" session_id TEXT,",
|
|
234
|
+
" provider TEXT,",
|
|
235
|
+
" source_type TEXT,",
|
|
236
|
+
" repo_canonical TEXT,",
|
|
237
|
+
" repo_display TEXT,",
|
|
238
|
+
" scope_canonical TEXT,",
|
|
239
|
+
" cwd TEXT,",
|
|
240
|
+
" title TEXT,",
|
|
241
|
+
" started_at TEXT,",
|
|
242
|
+
" occurred_at TEXT,",
|
|
243
|
+
" role TEXT,",
|
|
244
|
+
" event_id TEXT,",
|
|
245
|
+
" event_kind TEXT,",
|
|
246
|
+
" message_index INTEGER,",
|
|
247
|
+
" path TEXT,",
|
|
248
|
+
" matched_text TEXT",
|
|
249
|
+
");",
|
|
250
|
+
"CREATE VIRTUAL TABLE docs_fts USING fts5(text, tokenize='unicode61', prefix='2 3 4');",
|
|
251
|
+
`INSERT INTO meta(key, value) VALUES ('version', ${sqliteString(String(INDEX_VERSION))});`,
|
|
252
|
+
`INSERT INTO meta(key, value) VALUES ('builtAt', ${sqliteString(builtAt)});`
|
|
253
|
+
].join("\n"));
|
|
254
|
+
|
|
255
|
+
let batch = [];
|
|
256
|
+
for (const session of listSessions(env)) {
|
|
257
|
+
if (session.conversationPath && !fs.existsSync(session.conversationPath)) ensureConversationMarkdown(session, env);
|
|
258
|
+
const events = readEvents(session);
|
|
259
|
+
const eventDocs = events.length ? docsForEvents(session, events) : [];
|
|
260
|
+
if (!eventDocs.length) ensureConversationMarkdown(session, env);
|
|
261
|
+
const sourceDocs = eventDocs.length ? eventDocs : docsForTranscript(session, readTranscript(session.transcriptPath));
|
|
262
|
+
for (const sourceDoc of sourceDocs) {
|
|
263
|
+
const indexText = normalizeIndexText(sourceDoc.text);
|
|
264
|
+
if (!indexText) continue;
|
|
265
|
+
for (const chunk of chunkText(indexText)) {
|
|
266
|
+
const tokens = tokenize(chunk);
|
|
267
|
+
if (!tokens.length) continue;
|
|
268
|
+
const docIndex = docCount;
|
|
269
|
+
totalLength += tokens.length;
|
|
270
|
+
docCount++;
|
|
271
|
+
batch.push({
|
|
272
|
+
...sourceDoc,
|
|
273
|
+
id: sourceDoc.id || `${session.sessionId}:${sourceDoc.messageIndex ?? docIndex}:${docIndex}`,
|
|
274
|
+
text: chunk,
|
|
275
|
+
matchedText: chunk,
|
|
276
|
+
length: tokens.length
|
|
277
|
+
});
|
|
278
|
+
if (batch.length >= SQLITE_BUILD_BATCH_SIZE) {
|
|
279
|
+
insertFtsDocs(tmpPath, batch, docCount - batch.length + 1);
|
|
280
|
+
batch = [];
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
if (batch.length) insertFtsDocs(tmpPath, batch, docCount - batch.length + 1);
|
|
286
|
+
runSqliteScript(tmpPath, [
|
|
287
|
+
`INSERT INTO meta(key, value) VALUES ('docCount', ${sqliteString(String(docCount))});`,
|
|
288
|
+
"INSERT INTO docs_fts(docs_fts) VALUES('optimize');"
|
|
289
|
+
].join("\n"));
|
|
290
|
+
fs.renameSync(tmpPath, ftsPath);
|
|
291
|
+
rememberFtsCache(ftsPath, true);
|
|
292
|
+
} catch (error) {
|
|
293
|
+
try {
|
|
294
|
+
fs.rmSync(tmpPath, { force: true });
|
|
295
|
+
} catch {
|
|
296
|
+
// Ignore optional FTS cleanup failure.
|
|
297
|
+
}
|
|
298
|
+
try {
|
|
299
|
+
fs.rmSync(ftsPath, { force: true });
|
|
300
|
+
} catch {
|
|
301
|
+
// Ignore optional FTS cleanup failure.
|
|
302
|
+
}
|
|
303
|
+
rememberFtsCache(ftsPath, false);
|
|
304
|
+
throw error;
|
|
305
|
+
}
|
|
306
|
+
return {
|
|
307
|
+
version: INDEX_VERSION,
|
|
308
|
+
builtAt,
|
|
309
|
+
docCount,
|
|
310
|
+
avgDocLength: docCount ? totalLength / docCount : 0,
|
|
311
|
+
summaryOnly: true
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
function insertFtsDocs(dbPath, docs, rowidStart = 1) {
|
|
316
|
+
for (let start = 0; start < docs.length; start += SQLITE_BUILD_BATCH_SIZE) {
|
|
317
|
+
const statements = ["BEGIN;"];
|
|
318
|
+
const batch = docs.slice(start, start + SQLITE_BUILD_BATCH_SIZE);
|
|
319
|
+
for (let offset = 0; offset < batch.length; offset++) {
|
|
320
|
+
const rowid = rowidStart + start + offset;
|
|
321
|
+
const doc = batch[offset];
|
|
322
|
+
statements.push(
|
|
323
|
+
`INSERT INTO docs(rowid, doc_id, session_id, provider, source_type, repo_canonical, repo_display, scope_canonical, cwd, title, started_at, occurred_at, role, event_id, event_kind, message_index, path, matched_text) VALUES (` +
|
|
324
|
+
[
|
|
325
|
+
rowid,
|
|
326
|
+
sqliteString(doc.id || ""),
|
|
327
|
+
sqliteString(doc.sessionId || ""),
|
|
328
|
+
sqliteString(doc.provider || ""),
|
|
329
|
+
sqliteString(doc.sourceType || ""),
|
|
330
|
+
sqliteString(doc.repoCanonical || ""),
|
|
331
|
+
sqliteString(doc.repoDisplay || ""),
|
|
332
|
+
sqliteString(doc.scopeCanonical || ""),
|
|
333
|
+
sqliteString(doc.cwd || ""),
|
|
334
|
+
sqliteString(doc.title || ""),
|
|
335
|
+
sqliteString(doc.startedAt || ""),
|
|
336
|
+
sqliteString(doc.occurredAt || ""),
|
|
337
|
+
sqliteString(doc.role || ""),
|
|
338
|
+
sqliteString(doc.eventId || ""),
|
|
339
|
+
sqliteString(doc.eventKind || ""),
|
|
340
|
+
Number.isFinite(Number(doc.messageIndex)) ? Number(doc.messageIndex) : "NULL",
|
|
341
|
+
sqliteString(doc.path || ""),
|
|
342
|
+
sqliteString(doc.matchedText || "")
|
|
343
|
+
].join(", ") +
|
|
344
|
+
");"
|
|
345
|
+
);
|
|
346
|
+
statements.push(`INSERT INTO docs_fts(rowid, text) VALUES (${rowid}, ${sqliteString(doc.text || "")});`);
|
|
347
|
+
}
|
|
348
|
+
statements.push("COMMIT;");
|
|
349
|
+
runSqliteScript(dbPath, statements.join("\n"));
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
function runSqliteScript(dbPath, script) {
|
|
354
|
+
const result = spawnSync("sqlite3", [dbPath], {
|
|
355
|
+
argv0: "agentlog-sqlite",
|
|
356
|
+
input: script,
|
|
357
|
+
encoding: "utf8",
|
|
358
|
+
maxBuffer: 1024 * 1024 * 20,
|
|
359
|
+
timeout: SQLITE_QUERY_TIMEOUT_MS
|
|
360
|
+
});
|
|
361
|
+
if (result.error) throw result.error;
|
|
362
|
+
if (result.status !== 0) throw new Error(String(result.stderr || result.stdout || "sqlite3 failed").trim());
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
function sqliteJson(dbPath, query) {
|
|
366
|
+
const result = spawnSync("sqlite3", [dbPath, "-json", query], {
|
|
367
|
+
argv0: "agentlog-sqlite",
|
|
368
|
+
encoding: "utf8",
|
|
369
|
+
maxBuffer: 1024 * 1024 * 20,
|
|
370
|
+
timeout: SQLITE_QUERY_TIMEOUT_MS
|
|
371
|
+
});
|
|
372
|
+
if (result.error || result.status !== 0) return null;
|
|
373
|
+
try {
|
|
374
|
+
return result.stdout.trim() ? JSON.parse(result.stdout) : [];
|
|
375
|
+
} catch {
|
|
376
|
+
return null;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
function sqliteString(value) {
|
|
381
|
+
return `'${String(value == null ? "" : value).replace(/'/g, "''")}'`;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
function rememberFtsCache(ftsPath, available) {
|
|
385
|
+
let stat = null;
|
|
386
|
+
try {
|
|
387
|
+
stat = fs.statSync(ftsPath);
|
|
388
|
+
} catch {
|
|
389
|
+
stat = null;
|
|
390
|
+
}
|
|
391
|
+
_ftsCache.path = ftsPath;
|
|
392
|
+
_ftsCache.mtimeMs = stat?.mtimeMs || 0;
|
|
393
|
+
_ftsCache.size = stat?.size || 0;
|
|
394
|
+
_ftsCache.checkedAtMs = Date.now();
|
|
395
|
+
_ftsCache.available = Boolean(available && stat);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function ftsIndexAvailable(env = process.env, options = {}) {
|
|
399
|
+
const ftsPath = paths(env).ftsIndex;
|
|
400
|
+
let stat = null;
|
|
401
|
+
try {
|
|
402
|
+
stat = fs.statSync(ftsPath);
|
|
403
|
+
} catch (error) {
|
|
404
|
+
if (error.code !== "ENOENT") throw error;
|
|
405
|
+
return false;
|
|
406
|
+
}
|
|
407
|
+
if (
|
|
408
|
+
_ftsCache.path === ftsPath &&
|
|
409
|
+
_ftsCache.available &&
|
|
410
|
+
_ftsCache.mtimeMs === stat.mtimeMs &&
|
|
411
|
+
_ftsCache.size === stat.size &&
|
|
412
|
+
Date.now() - _ftsCache.checkedAtMs < INDEX_STALE_CHECK_TTL_MS
|
|
413
|
+
) {
|
|
414
|
+
return true;
|
|
415
|
+
}
|
|
416
|
+
const rows = sqliteJson(ftsPath, "SELECT key, value FROM meta WHERE key IN ('version', 'docCount');");
|
|
417
|
+
if (!rows || !rows.some((row) => row.key === "version" && Number(row.value) === INDEX_VERSION)) {
|
|
418
|
+
rememberFtsCache(ftsPath, false);
|
|
419
|
+
return false;
|
|
420
|
+
}
|
|
421
|
+
if (!options.noStaleCheck && indexIsStale(ftsPath, env)) {
|
|
422
|
+
rememberFtsCache(ftsPath, false);
|
|
423
|
+
return false;
|
|
424
|
+
}
|
|
425
|
+
rememberFtsCache(ftsPath, true);
|
|
426
|
+
return true;
|
|
427
|
+
}
|
|
428
|
+
|
|
61
429
|
function docsForEvents(session, events) {
|
|
62
430
|
const indexedKinds = new Set([
|
|
63
431
|
EVENT_KINDS.PROMPT_SUBMITTED,
|
|
@@ -120,18 +488,60 @@ function docsForTranscript(session, messages) {
|
|
|
120
488
|
return docs;
|
|
121
489
|
}
|
|
122
490
|
|
|
123
|
-
function loadIndex(env = process.env) {
|
|
491
|
+
function loadIndex(env = process.env, options = {}) {
|
|
492
|
+
const allowRebuild = !options.noRebuild;
|
|
124
493
|
const indexPath = paths(env).index;
|
|
494
|
+
let stat = null;
|
|
495
|
+
try {
|
|
496
|
+
stat = fs.statSync(indexPath);
|
|
497
|
+
} catch (error) {
|
|
498
|
+
if (error.code !== "ENOENT") throw error;
|
|
499
|
+
}
|
|
500
|
+
if (!stat) return allowRebuild ? buildIndex(env) : null;
|
|
501
|
+
if (
|
|
502
|
+
_indexCache.path === indexPath &&
|
|
503
|
+
_indexCache.index?.version === INDEX_VERSION &&
|
|
504
|
+
_indexCache.mtimeMs === stat.mtimeMs &&
|
|
505
|
+
_indexCache.size === stat.size
|
|
506
|
+
) {
|
|
507
|
+
if (Date.now() - _indexCache.checkedAtMs < INDEX_STALE_CHECK_TTL_MS) return _indexCache.index;
|
|
508
|
+
if (!indexIsStale(indexPath, env)) {
|
|
509
|
+
_indexCache.checkedAtMs = Date.now();
|
|
510
|
+
return _indexCache.index;
|
|
511
|
+
}
|
|
512
|
+
if (!allowRebuild) return null;
|
|
513
|
+
}
|
|
514
|
+
const summary = readIndexSummary(indexPath);
|
|
515
|
+
if (summary?.version !== INDEX_VERSION) return allowRebuild ? buildIndex(env) : null;
|
|
516
|
+
if (indexIsStale(indexPath, env)) return allowRebuild ? buildIndex(env) : null;
|
|
125
517
|
const existing = readJson(indexPath, null);
|
|
126
|
-
if (existing && existing.version ===
|
|
127
|
-
|
|
518
|
+
if (existing && existing.version === INDEX_VERSION) {
|
|
519
|
+
rememberIndexCache(indexPath, existing);
|
|
520
|
+
return existing;
|
|
521
|
+
}
|
|
522
|
+
return allowRebuild ? buildIndex(env) : null;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
function rememberIndexCache(indexPath, index) {
|
|
526
|
+
let stat = null;
|
|
527
|
+
try {
|
|
528
|
+
stat = fs.statSync(indexPath);
|
|
529
|
+
} catch {
|
|
530
|
+
stat = null;
|
|
531
|
+
}
|
|
532
|
+
_indexCache.path = indexPath;
|
|
533
|
+
_indexCache.mtimeMs = stat?.mtimeMs || 0;
|
|
534
|
+
_indexCache.size = stat?.size || 0;
|
|
535
|
+
_indexCache.checkedAtMs = Date.now();
|
|
536
|
+
_indexCache.index = index;
|
|
128
537
|
}
|
|
129
538
|
|
|
130
539
|
function searchPastSessions(query, options = {}, env = process.env) {
|
|
131
540
|
try {
|
|
132
541
|
const eventResults = searchIndexedSessions(query, options, env);
|
|
133
|
-
if (eventResults.length) return eventResults;
|
|
542
|
+
if (eventResults.length || options.skipMarkdownFallback) return eventResults;
|
|
134
543
|
} catch {
|
|
544
|
+
if (options.skipMarkdownFallback) return [];
|
|
135
545
|
// Fall through to the legacy markdown path below.
|
|
136
546
|
}
|
|
137
547
|
return searchMarkdownSessions(query, options, env);
|
|
@@ -139,6 +549,7 @@ function searchPastSessions(query, options = {}, env = process.env) {
|
|
|
139
549
|
|
|
140
550
|
function searchMarkdownSessions(query, options = {}, env = process.env) {
|
|
141
551
|
const limit = Math.max(1, Math.min(Number(options.limit || 10), 50));
|
|
552
|
+
const maxMatches = Math.max(limit * 8, 40);
|
|
142
553
|
const includeWebChats = Boolean(options.includeWebChats);
|
|
143
554
|
const filter = normalizeSessionFilter(options);
|
|
144
555
|
const repo = filter.repo || inferCallingRepo(options.cwd || process.cwd());
|
|
@@ -161,16 +572,18 @@ function searchMarkdownSessions(query, options = {}, env = process.env) {
|
|
|
161
572
|
sessionByPath.set(path.resolve(session._searchPath), session);
|
|
162
573
|
}
|
|
163
574
|
|
|
164
|
-
const
|
|
575
|
+
const searchFiles = [...sessionByPath.keys()];
|
|
576
|
+
const matches = ripgrepMatches(queryTokens, searchFiles, { maxMatches }) || jsLineMatches(queryTokens, searchFiles, { maxMatches });
|
|
165
577
|
const ranked = [];
|
|
166
578
|
const seen = new Set();
|
|
579
|
+
const lineCache = new Map();
|
|
167
580
|
for (const match of matches) {
|
|
168
581
|
const session = sessionByPath.get(path.resolve(match.path));
|
|
169
582
|
if (!session) continue;
|
|
170
583
|
const key = `${session.sessionId}:${match.line}`;
|
|
171
584
|
if (seen.has(key)) continue;
|
|
172
585
|
seen.add(key);
|
|
173
|
-
const excerptText = readLineWindow(match.path, match.line, 4);
|
|
586
|
+
const excerptText = readLineWindow(match.path, match.line, 4, lineCache);
|
|
174
587
|
const lower = excerptText.toLowerCase();
|
|
175
588
|
const matchedTokens = queryTokens.filter((token) => lower.includes(token));
|
|
176
589
|
let score = matchedTokens.length * 5 + (phrase && lower.includes(phrase) ? 10 : 0);
|
|
@@ -185,15 +598,20 @@ function searchMarkdownSessions(query, options = {}, env = process.env) {
|
|
|
185
598
|
cwd: session.cwd || undefined,
|
|
186
599
|
title: session.title || undefined,
|
|
187
600
|
started_at: session.startedAt,
|
|
188
|
-
role: inferRoleFromMarkdown(match.path, match.line),
|
|
601
|
+
role: inferRoleFromMarkdown(match.path, match.line, lineCache),
|
|
189
602
|
excerpt: excerptText.replace(/\s+/g, " ").trim(),
|
|
190
603
|
score: Number(score.toFixed(4)),
|
|
191
604
|
session_link: session._searchPath
|
|
192
605
|
});
|
|
606
|
+
if (ranked.length >= maxMatches) break;
|
|
193
607
|
}
|
|
194
608
|
|
|
195
609
|
ranked.sort((a, b) => b.score - a.score || String(b.started_at).localeCompare(String(a.started_at)));
|
|
196
|
-
|
|
610
|
+
const bySession = new Map();
|
|
611
|
+
for (const item of ranked) {
|
|
612
|
+
if (!bySession.has(item.session_id)) bySession.set(item.session_id, item);
|
|
613
|
+
}
|
|
614
|
+
return [...bySession.values()].slice(0, limit);
|
|
197
615
|
}
|
|
198
616
|
|
|
199
617
|
function searchIndexedSessions(query, options = {}, env = process.env) {
|
|
@@ -202,16 +620,21 @@ function searchIndexedSessions(query, options = {}, env = process.env) {
|
|
|
202
620
|
const filter = normalizeSessionFilter(options);
|
|
203
621
|
const repo = filter.repo || inferCallingRepo(options.cwd || process.cwd());
|
|
204
622
|
const since = parseSinceFilter(options.since);
|
|
205
|
-
const index = loadIndex(env);
|
|
206
623
|
const queryTokens = tokenize(query);
|
|
207
624
|
const phrase = String(query || "").trim().toLowerCase();
|
|
208
625
|
if (!queryTokens.length && !phrase) return [];
|
|
626
|
+
const ftsResults = searchFtsSessions(query, queryTokens, { limit, includeWebChats, filter, repo, since, options }, env);
|
|
627
|
+
if (ftsResults) return ftsResults;
|
|
628
|
+
if (options.skipJsonIndex) return [];
|
|
629
|
+
const index = loadIndex(env, { noRebuild: Boolean(options.noRebuild) });
|
|
630
|
+
if (!index) return [];
|
|
209
631
|
|
|
210
632
|
const scored = [];
|
|
211
|
-
|
|
633
|
+
const candidates = candidateDocsForQuery(index, queryTokens, phrase);
|
|
634
|
+
for (const { doc, docIndex } of candidates) {
|
|
212
635
|
if (!matchesSessionFilter(doc, { ...filter, includeWebChats, since })) continue;
|
|
213
636
|
|
|
214
|
-
let score = bm25Score(doc, queryTokens, index);
|
|
637
|
+
let score = bm25Score(doc, queryTokens, index, docIndex, candidates.termFrequencies);
|
|
215
638
|
if (phrase && doc.text.toLowerCase().includes(phrase)) score += 2.5;
|
|
216
639
|
if (!options.repo && repo && doc.repoCanonical === repo) score *= 1.25;
|
|
217
640
|
if (score > 0) scored.push({ doc, score });
|
|
@@ -243,35 +666,167 @@ function searchIndexedSessions(query, options = {}, env = process.env) {
|
|
|
243
666
|
}));
|
|
244
667
|
}
|
|
245
668
|
|
|
246
|
-
function
|
|
669
|
+
function searchFtsSessions(query, queryTokens, context, env = process.env) {
|
|
670
|
+
const ftsPath = paths(env).ftsIndex;
|
|
671
|
+
if (!ftsIndexAvailable(env, { noStaleCheck: Boolean(context.options.noRebuild || context.options.allowStaleFts) })) return null;
|
|
672
|
+
const matchQuery = ftsMatchQuery(query);
|
|
673
|
+
if (!matchQuery) return [];
|
|
674
|
+
const candidateLimit = Math.max(context.limit * 8, 80);
|
|
675
|
+
const rows = sqliteJson(
|
|
676
|
+
ftsPath,
|
|
677
|
+
[
|
|
678
|
+
"SELECT",
|
|
679
|
+
" d.doc_id, d.session_id, d.provider, d.source_type, d.repo_canonical, d.repo_display,",
|
|
680
|
+
" d.scope_canonical, d.cwd, d.title, d.started_at, d.occurred_at, d.role,",
|
|
681
|
+
" d.event_id, d.event_kind, d.message_index, d.path, d.matched_text,",
|
|
682
|
+
" snippet(docs_fts, 0, '', '', '...', 32) AS excerpt,",
|
|
683
|
+
" bm25(docs_fts) AS rank",
|
|
684
|
+
"FROM docs_fts",
|
|
685
|
+
"JOIN docs d ON d.rowid = docs_fts.rowid",
|
|
686
|
+
`WHERE docs_fts MATCH ${sqliteString(matchQuery)}`,
|
|
687
|
+
"ORDER BY rank ASC, d.occurred_at DESC, d.started_at DESC",
|
|
688
|
+
`LIMIT ${candidateLimit};`
|
|
689
|
+
].join("\n")
|
|
690
|
+
);
|
|
691
|
+
if (!rows) return null;
|
|
692
|
+
const bySession = new Map();
|
|
693
|
+
for (const row of rows) {
|
|
694
|
+
const doc = ftsRowToDoc(row);
|
|
695
|
+
if (!matchesSessionFilter(doc, { ...context.filter, includeWebChats: context.includeWebChats, since: context.since })) continue;
|
|
696
|
+
if (!context.options.repo && context.repo && doc.repoCanonical === context.repo) {
|
|
697
|
+
row.rank = Number(row.rank || 0) - 0.05;
|
|
698
|
+
}
|
|
699
|
+
if (!bySession.has(doc.sessionId)) bySession.set(doc.sessionId, { doc, row });
|
|
700
|
+
if (bySession.size >= context.limit) break;
|
|
701
|
+
}
|
|
702
|
+
return [...bySession.values()].slice(0, context.limit).map(({ doc, row }) => ({
|
|
703
|
+
session_id: doc.sessionId,
|
|
704
|
+
provider: doc.provider,
|
|
705
|
+
source_type: doc.sourceType || undefined,
|
|
706
|
+
repo: doc.repoCanonical || undefined,
|
|
707
|
+
repo_display: doc.repoDisplay || doc.repoCanonical || undefined,
|
|
708
|
+
scope: doc.scopeCanonical || undefined,
|
|
709
|
+
cwd: doc.cwd || undefined,
|
|
710
|
+
title: doc.title || undefined,
|
|
711
|
+
started_at: doc.startedAt,
|
|
712
|
+
role: doc.role,
|
|
713
|
+
event_id: doc.eventId || undefined,
|
|
714
|
+
event_kind: doc.eventKind || undefined,
|
|
715
|
+
message_index: doc.messageIndex ?? undefined,
|
|
716
|
+
matched_text: doc.matchedText ? excerpt(doc.matchedText, queryTokens) : undefined,
|
|
717
|
+
excerpt: row.excerpt || excerpt(doc.matchedText, queryTokens),
|
|
718
|
+
score: Number((-Number(row.rank || 0)).toFixed(4)),
|
|
719
|
+
session_link: doc.path
|
|
720
|
+
}));
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
function ftsRowToDoc(row) {
|
|
724
|
+
return {
|
|
725
|
+
id: row.doc_id || "",
|
|
726
|
+
sessionId: row.session_id || "",
|
|
727
|
+
provider: row.provider || "",
|
|
728
|
+
sourceType: row.source_type || "",
|
|
729
|
+
repoCanonical: row.repo_canonical || "",
|
|
730
|
+
repoDisplay: row.repo_display || "",
|
|
731
|
+
scopeCanonical: row.scope_canonical || "",
|
|
732
|
+
cwd: row.cwd || "",
|
|
733
|
+
title: row.title || "",
|
|
734
|
+
startedAt: row.started_at || "",
|
|
735
|
+
occurredAt: row.occurred_at || "",
|
|
736
|
+
role: row.role || "",
|
|
737
|
+
eventId: row.event_id || "",
|
|
738
|
+
eventKind: row.event_kind || "",
|
|
739
|
+
messageIndex: row.message_index == null ? undefined : Number(row.message_index),
|
|
740
|
+
path: row.path || "",
|
|
741
|
+
matchedText: row.matched_text || ""
|
|
742
|
+
};
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
function ftsMatchQuery(query) {
|
|
746
|
+
const tokens = String(query || "")
|
|
747
|
+
.toLowerCase()
|
|
748
|
+
.match(/[a-z0-9_]{2,}/g);
|
|
749
|
+
if (!tokens?.length) return "";
|
|
750
|
+
return [...new Set(tokens.filter((token) => !STOP_WORDS.has(token)))]
|
|
751
|
+
.slice(0, 12)
|
|
752
|
+
.map((token) => `"${token.replace(/"/g, '""')}"*`)
|
|
753
|
+
.join(" ");
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
function candidateDocsForQuery(index, queryTokens, phrase) {
|
|
757
|
+
const docs = index.docs || [];
|
|
758
|
+
const postings = index.postings || null;
|
|
759
|
+
const uniqueTokens = [...new Set(queryTokens)];
|
|
760
|
+
const termFrequencies = new Map();
|
|
761
|
+
const candidateIndexes = new Set();
|
|
762
|
+
|
|
763
|
+
if (postings && uniqueTokens.length) {
|
|
764
|
+
for (const token of uniqueTokens) {
|
|
765
|
+
const tokenPostings = postings[token] || [];
|
|
766
|
+
if (!tokenPostings.length) continue;
|
|
767
|
+
const frequencies = new Map();
|
|
768
|
+
for (const posting of tokenPostings) {
|
|
769
|
+
const docIndex = Array.isArray(posting) ? posting[0] : posting.doc;
|
|
770
|
+
const tf = Array.isArray(posting) ? posting[1] : posting.tf;
|
|
771
|
+
if (!Number.isInteger(docIndex) || !tf) continue;
|
|
772
|
+
frequencies.set(docIndex, tf);
|
|
773
|
+
candidateIndexes.add(docIndex);
|
|
774
|
+
}
|
|
775
|
+
termFrequencies.set(token, frequencies);
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
if (candidateIndexes.size) {
|
|
780
|
+
const candidates = [...candidateIndexes]
|
|
781
|
+
.map((docIndex) => ({ doc: docs[docIndex], docIndex }))
|
|
782
|
+
.filter((item) => item.doc);
|
|
783
|
+
candidates.termFrequencies = termFrequencies;
|
|
784
|
+
return candidates;
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
const allDocs = docs.map((doc, docIndex) => ({ doc, docIndex }));
|
|
788
|
+
allDocs.termFrequencies = termFrequencies;
|
|
789
|
+
if (phrase && !uniqueTokens.length) return allDocs;
|
|
790
|
+
if (!postings) return allDocs;
|
|
791
|
+
return [];
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
function ripgrepMatches(queryTokens, files, options = {}) {
|
|
247
795
|
if (!queryTokens.length || !files.length) return null;
|
|
248
796
|
const pattern = queryTokens.map(escapeRegex).join("|");
|
|
249
|
-
const result = spawnSync(
|
|
250
|
-
"rg",
|
|
251
|
-
["--json", "--ignore-case", "--line-number", "-e", pattern, "--", ...files],
|
|
252
|
-
{ encoding: "utf8", maxBuffer: 1024 * 1024 * 50 }
|
|
253
|
-
);
|
|
254
|
-
if (result.error && result.error.code === "ENOENT") return null;
|
|
255
|
-
if (result.status !== 0 && result.status !== 1) return null;
|
|
256
797
|
const matches = [];
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
798
|
+
const maxMatches = Math.max(1, Number(options.maxMatches || 200));
|
|
799
|
+
for (let start = 0; start < files.length; start += RIPGREP_BATCH_FILE_COUNT) {
|
|
800
|
+
const batch = files.slice(start, start + RIPGREP_BATCH_FILE_COUNT);
|
|
801
|
+
const result = spawnSync(
|
|
802
|
+
"rg",
|
|
803
|
+
["--json", "--ignore-case", "--line-number", "--max-count", String(MARKDOWN_MATCHES_PER_FILE), "-e", pattern, "--", ...batch],
|
|
804
|
+
{ encoding: "utf8", maxBuffer: 1024 * 1024 * 10, timeout: RIPGREP_SEARCH_TIMEOUT_MS }
|
|
805
|
+
);
|
|
806
|
+
if (result.error && result.error.code === "ENOENT") return null;
|
|
807
|
+
if (result.error && !String(result.stdout || "").trim()) return matches;
|
|
808
|
+
if (result.status !== 0 && result.status !== 1 && !String(result.stdout || "").trim()) return matches;
|
|
809
|
+
for (const line of String(result.stdout || "").split(/\r?\n/)) {
|
|
810
|
+
if (!line.trim()) continue;
|
|
811
|
+
let event;
|
|
812
|
+
try {
|
|
813
|
+
event = JSON.parse(line);
|
|
814
|
+
} catch {
|
|
815
|
+
continue;
|
|
816
|
+
}
|
|
817
|
+
if (event.type !== "match") continue;
|
|
818
|
+
const file = event.data?.path?.text;
|
|
819
|
+
const lineNumber = event.data?.line_number;
|
|
820
|
+
if (file && lineNumber) matches.push({ path: file, line: lineNumber });
|
|
821
|
+
if (matches.length >= maxMatches) return matches;
|
|
264
822
|
}
|
|
265
|
-
if (event.type !== "match") continue;
|
|
266
|
-
const file = event.data?.path?.text;
|
|
267
|
-
const lineNumber = event.data?.line_number;
|
|
268
|
-
if (file && lineNumber) matches.push({ path: file, line: lineNumber });
|
|
269
823
|
}
|
|
270
824
|
return matches;
|
|
271
825
|
}
|
|
272
826
|
|
|
273
|
-
function jsLineMatches(queryTokens, files) {
|
|
827
|
+
function jsLineMatches(queryTokens, files, options = {}) {
|
|
274
828
|
const matches = [];
|
|
829
|
+
const maxMatches = Math.max(1, Number(options.maxMatches || 200));
|
|
275
830
|
for (const file of files) {
|
|
276
831
|
let lines;
|
|
277
832
|
try {
|
|
@@ -279,17 +834,22 @@ function jsLineMatches(queryTokens, files) {
|
|
|
279
834
|
} catch {
|
|
280
835
|
continue;
|
|
281
836
|
}
|
|
837
|
+
let fileMatches = 0;
|
|
282
838
|
for (let index = 0; index < lines.length; index++) {
|
|
283
839
|
const lower = lines[index].toLowerCase();
|
|
284
|
-
if (queryTokens.some((token) => lower.includes(token)))
|
|
840
|
+
if (!queryTokens.some((token) => lower.includes(token))) continue;
|
|
841
|
+
matches.push({ path: file, line: index + 1 });
|
|
842
|
+
fileMatches++;
|
|
843
|
+
if (matches.length >= maxMatches) return matches;
|
|
844
|
+
if (fileMatches >= MARKDOWN_MATCHES_PER_FILE) break;
|
|
285
845
|
}
|
|
286
846
|
}
|
|
287
847
|
return matches;
|
|
288
848
|
}
|
|
289
849
|
|
|
290
|
-
function readLineWindow(file, lineNumber, radius = 2) {
|
|
850
|
+
function readLineWindow(file, lineNumber, radius = 2, cache) {
|
|
291
851
|
try {
|
|
292
|
-
const lines =
|
|
852
|
+
const lines = readMarkdownLines(file, cache);
|
|
293
853
|
const start = Math.max(0, lineNumber - 1 - radius);
|
|
294
854
|
const end = Math.min(lines.length, lineNumber + radius);
|
|
295
855
|
return lines.slice(start, end).join("\n").trim();
|
|
@@ -298,9 +858,9 @@ function readLineWindow(file, lineNumber, radius = 2) {
|
|
|
298
858
|
}
|
|
299
859
|
}
|
|
300
860
|
|
|
301
|
-
function inferRoleFromMarkdown(file, lineNumber) {
|
|
861
|
+
function inferRoleFromMarkdown(file, lineNumber, cache) {
|
|
302
862
|
try {
|
|
303
|
-
const lines =
|
|
863
|
+
const lines = readMarkdownLines(file, cache);
|
|
304
864
|
for (let index = Math.min(lineNumber - 1, lines.length - 1); index >= 0; index--) {
|
|
305
865
|
const match = lines[index].match(/^##\s+([A-Za-z_ -]+)\s+-\s+/);
|
|
306
866
|
if (match) return match[1].trim().toLowerCase().replace(/\s+/g, "_");
|
|
@@ -311,18 +871,26 @@ function inferRoleFromMarkdown(file, lineNumber) {
|
|
|
311
871
|
return "unknown";
|
|
312
872
|
}
|
|
313
873
|
|
|
874
|
+
function readMarkdownLines(file, cache) {
|
|
875
|
+
const key = path.resolve(file);
|
|
876
|
+
if (cache?.has(key)) return cache.get(key);
|
|
877
|
+
const lines = fs.readFileSync(file, "utf8").split(/\r?\n/);
|
|
878
|
+
if (cache) cache.set(key, lines);
|
|
879
|
+
return lines;
|
|
880
|
+
}
|
|
881
|
+
|
|
314
882
|
function escapeRegex(value) {
|
|
315
883
|
return String(value).replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
|
|
316
884
|
}
|
|
317
885
|
|
|
318
|
-
function bm25Score(doc, queryTokens, index) {
|
|
886
|
+
function bm25Score(doc, queryTokens, index, docIndex, termFrequencies) {
|
|
319
887
|
const k1 = 1.2;
|
|
320
888
|
const b = 0.75;
|
|
321
889
|
const n = Math.max(1, index.docCount || 1);
|
|
322
890
|
const avgdl = Math.max(1, index.avgDocLength || 1);
|
|
323
891
|
let score = 0;
|
|
324
892
|
for (const token of queryTokens) {
|
|
325
|
-
const tf = doc
|
|
893
|
+
const tf = termFrequencyForDoc(doc, token, docIndex, termFrequencies);
|
|
326
894
|
if (!tf) continue;
|
|
327
895
|
const df = index.df?.[token] || 0;
|
|
328
896
|
const idf = Math.log(1 + (n - df + 0.5) / (df + 0.5));
|
|
@@ -331,6 +899,12 @@ function bm25Score(doc, queryTokens, index) {
|
|
|
331
899
|
return score;
|
|
332
900
|
}
|
|
333
901
|
|
|
902
|
+
function termFrequencyForDoc(doc, token, docIndex, termFrequencies) {
|
|
903
|
+
const mapped = termFrequencies?.get(token)?.get(docIndex);
|
|
904
|
+
if (mapped) return mapped;
|
|
905
|
+
return doc.tf?.[token] || 0;
|
|
906
|
+
}
|
|
907
|
+
|
|
334
908
|
function chunkText(text, maxTokens = 220, overlap = 40) {
|
|
335
909
|
const words = String(text || "").split(/\s+/).filter(Boolean);
|
|
336
910
|
if (words.length <= maxTokens) return [words.join(" ")].filter(Boolean);
|
|
@@ -415,7 +989,9 @@ function historySessionSummary(session) {
|
|
|
415
989
|
messages: session.messageCount,
|
|
416
990
|
user_messages: Number.isFinite(Number(session.userMessageCount)) ? Number(session.userMessageCount) : undefined,
|
|
417
991
|
usage: session.usage || undefined,
|
|
992
|
+
estimatedUsage: session.estimatedUsage || undefined,
|
|
418
993
|
models: session.models || undefined,
|
|
994
|
+
cursorCommandTypeCounts: session.cursorCommandTypeCounts || undefined,
|
|
419
995
|
conversation: session.conversationPath,
|
|
420
996
|
transcript: session.transcriptPath
|
|
421
997
|
};
|
|
@@ -503,7 +1079,7 @@ function normalizeProviderFilter(value) {
|
|
|
503
1079
|
codex_desktop: { provider: "codex", sourceType: "codex-desktop-history", sourceTypes: ["codex-desktop-history"] },
|
|
504
1080
|
cursor: { provider: "cursor" },
|
|
505
1081
|
cline: { provider: "cline", sourceType: "cline-task-history", sourceTypes: ["cline-task-history"] },
|
|
506
|
-
opencode: { provider: "opencode",
|
|
1082
|
+
opencode: { provider: "opencode", sourceTypes: ["opencode-history", "opencode-sqlite-history"] },
|
|
507
1083
|
aider: { provider: "aider", sourceType: "aider-chat-history", sourceTypes: ["aider-chat-history"] },
|
|
508
1084
|
devin: { provider: "devin" },
|
|
509
1085
|
devin_cli: { provider: "devin", sourceType: "devin-cli-history", sourceTypes: ["devin-cli-history"] },
|
|
@@ -541,17 +1117,58 @@ function matchesRepoFilter(session, repoFilter) {
|
|
|
541
1117
|
.some((value) => value === wanted || value.includes(wanted));
|
|
542
1118
|
}
|
|
543
1119
|
|
|
544
|
-
function reindexIfNeeded(env = process.env) {
|
|
1120
|
+
function reindexIfNeeded(env = process.env, options = {}) {
|
|
545
1121
|
const cfg = loadConfig(env);
|
|
546
|
-
if (cfg.index.paused) return { paused: true, index: readJson(paths(env).index, null) };
|
|
547
1122
|
const indexPath = paths(env).index;
|
|
1123
|
+
if (cfg.index.paused) return { paused: true, index: readIndexSummary(indexPath) };
|
|
1124
|
+
let stat = null;
|
|
548
1125
|
try {
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
// Missing index: build below.
|
|
1126
|
+
stat = fs.statSync(indexPath);
|
|
1127
|
+
} catch (error) {
|
|
1128
|
+
if (error.code !== "ENOENT") throw error;
|
|
553
1129
|
}
|
|
554
|
-
|
|
1130
|
+
if (stat) {
|
|
1131
|
+
const summary = readIndexSummary(indexPath);
|
|
1132
|
+
if (summary?.version !== INDEX_VERSION) return { paused: false, index: rebuildIndexSummary(env, options), rebuilt: true };
|
|
1133
|
+
const intervalMs = configuredIndexIntervalMs(cfg);
|
|
1134
|
+
if (Date.now() - stat.mtimeMs < intervalMs) return { paused: false, index: summary };
|
|
1135
|
+
if (!indexIsStale(indexPath, env)) return { paused: false, index: summary };
|
|
1136
|
+
}
|
|
1137
|
+
return { paused: false, index: rebuildIndexSummary(env, options), rebuilt: true };
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
function configuredIndexIntervalMs(config) {
|
|
1141
|
+
const minutes = Number(config?.index?.intervalMinutes ?? 10);
|
|
1142
|
+
return Math.max(1, Number.isFinite(minutes) ? minutes : 10) * 60 * 1000;
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
function buildIndexInChild(env = process.env) {
|
|
1146
|
+
const script = `
|
|
1147
|
+
process.title = "agentlog-index";
|
|
1148
|
+
const { buildIndexSummary } = require(${JSON.stringify(__filename)});
|
|
1149
|
+
const index = buildIndexSummary(process.env);
|
|
1150
|
+
process.stdout.write(JSON.stringify(index));
|
|
1151
|
+
`;
|
|
1152
|
+
const result = spawnSync(process.execPath, ["-e", script], {
|
|
1153
|
+
argv0: "agentlog-index",
|
|
1154
|
+
env,
|
|
1155
|
+
encoding: "utf8",
|
|
1156
|
+
maxBuffer: 1024 * 1024
|
|
1157
|
+
});
|
|
1158
|
+
if (result.error) throw result.error;
|
|
1159
|
+
if (result.status !== 0) {
|
|
1160
|
+
const message = String(result.stderr || result.stdout || `index rebuild exited with status ${result.status}`).trim();
|
|
1161
|
+
throw new Error(message);
|
|
1162
|
+
}
|
|
1163
|
+
try {
|
|
1164
|
+
return JSON.parse(result.stdout || "null") || null;
|
|
1165
|
+
} catch (error) {
|
|
1166
|
+
throw new Error(`index rebuild returned invalid summary: ${error.message}`);
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
function rebuildIndexSummary(env = process.env, options = {}) {
|
|
1171
|
+
return options.rebuildInProcess ? buildIndexSummary(env) : buildIndexInChild(env);
|
|
555
1172
|
}
|
|
556
1173
|
|
|
557
1174
|
function indexIsStale(indexPath, env = process.env) {
|
|
@@ -608,10 +1225,12 @@ const STOP_WORDS = new Set([
|
|
|
608
1225
|
|
|
609
1226
|
module.exports = {
|
|
610
1227
|
buildIndex,
|
|
1228
|
+
buildIndexSummary,
|
|
611
1229
|
chunkText,
|
|
612
1230
|
listHistorySessions,
|
|
613
1231
|
listRecentSessions,
|
|
614
1232
|
loadIndex,
|
|
1233
|
+
readIndexSummary,
|
|
615
1234
|
reindexIfNeeded,
|
|
616
1235
|
sessionHistoryTime,
|
|
617
1236
|
searchPastSessions,
|