agentel 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/search.js CHANGED
@@ -10,10 +10,33 @@ const { loadConfig } = require("./config");
10
10
  const { paths, ensureDir, readJson, writeJson } = require("./paths");
11
11
  const { canonicalRepo } = require("./repo");
12
12
 
13
+ const INDEX_VERSION = 3;
14
+ const INDEX_STALE_CHECK_TTL_MS = 5000;
15
+ const SQLITE_QUERY_TIMEOUT_MS = 5000;
16
+ const SQLITE_BUILD_BATCH_SIZE = 100;
17
+ const RIPGREP_SEARCH_TIMEOUT_MS = 8000;
18
+ const RIPGREP_BATCH_FILE_COUNT = 200;
19
+ const MARKDOWN_MATCHES_PER_FILE = 3;
20
+ const _indexCache = {
21
+ path: "",
22
+ mtimeMs: 0,
23
+ size: 0,
24
+ checkedAtMs: 0,
25
+ index: null
26
+ };
27
+ const _ftsCache = {
28
+ path: "",
29
+ mtimeMs: 0,
30
+ size: 0,
31
+ checkedAtMs: 0,
32
+ available: false
33
+ };
34
+
13
35
  function buildIndex(env = process.env) {
14
36
  const sessions = listSessions(env);
15
37
  const docs = [];
16
- const df = {};
38
+ const postings = Object.create(null);
39
+ const df = Object.create(null);
17
40
  let totalLength = 0;
18
41
 
19
42
  for (const session of sessions) {
@@ -28,16 +51,20 @@ function buildIndex(env = process.env) {
28
51
  for (const chunk of chunkText(indexText)) {
29
52
  const tokens = tokenize(chunk);
30
53
  if (!tokens.length) continue;
31
- const tf = {};
54
+ const tf = Object.create(null);
32
55
  for (const token of tokens) tf[token] = (tf[token] || 0) + 1;
33
- for (const token of new Set(tokens)) df[token] = (df[token] || 0) + 1;
56
+ const docIndex = docs.length;
57
+ for (const token of new Set(tokens)) {
58
+ df[token] = (df[token] || 0) + 1;
59
+ if (!postings[token]) postings[token] = [];
60
+ postings[token].push([docIndex, tf[token]]);
61
+ }
34
62
  totalLength += tokens.length;
35
63
  docs.push({
36
64
  ...sourceDoc,
37
65
  id: sourceDoc.id || `${session.sessionId}:${sourceDoc.messageIndex ?? docs.length}:${docs.length}`,
38
66
  text: chunk,
39
67
  matchedText: chunk,
40
- tf,
41
68
  length: tokens.length
42
69
  });
43
70
  }
@@ -45,19 +72,360 @@ function buildIndex(env = process.env) {
45
72
  }
46
73
 
47
74
  const index = {
48
- version: 2,
75
+ version: INDEX_VERSION,
49
76
  builtAt: new Date().toISOString(),
50
77
  docCount: docs.length,
51
78
  avgDocLength: docs.length ? totalLength / docs.length : 0,
52
79
  df,
80
+ postings,
53
81
  docs
54
82
  };
55
83
  const indexPath = paths(env).index;
56
84
  ensureDir(path.dirname(indexPath));
57
- writeJson(indexPath, index);
85
+ writeJson(indexPath, index, { pretty: false });
86
+ buildFtsIndex(index, env);
87
+ rememberIndexCache(indexPath, index);
58
88
  return index;
59
89
  }
60
90
 
91
+ function buildIndexSummary(env = process.env) {
92
+ const summary = buildFtsIndexSummary(env);
93
+ const indexPath = paths(env).index;
94
+ ensureDir(path.dirname(indexPath));
95
+ writeJson(indexPath, summary, { pretty: false });
96
+ rememberIndexCache(indexPath, summary);
97
+ return summary;
98
+ }
99
+
100
+ function summarizeIndex(index) {
101
+ if (!index) return null;
102
+ return {
103
+ version: index.version,
104
+ builtAt: index.builtAt,
105
+ docCount: index.docCount,
106
+ avgDocLength: index.avgDocLength,
107
+ summaryOnly: true
108
+ };
109
+ }
110
+
111
+ function readIndexSummary(indexPath) {
112
+ let fd;
113
+ try {
114
+ fd = fs.openSync(indexPath, "r");
115
+ const buffer = Buffer.alloc(8192);
116
+ const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
117
+ const header = buffer.toString("utf8", 0, bytesRead);
118
+ const summary = {
119
+ version: readJsonHeaderNumber(header, "version"),
120
+ builtAt: readJsonHeaderString(header, "builtAt"),
121
+ docCount: readJsonHeaderNumber(header, "docCount"),
122
+ avgDocLength: readJsonHeaderNumber(header, "avgDocLength"),
123
+ summaryOnly: true
124
+ };
125
+ return summary.version == null && summary.docCount == null ? null : summary;
126
+ } catch (error) {
127
+ if (error.code === "ENOENT") return null;
128
+ throw error;
129
+ } finally {
130
+ if (fd != null) fs.closeSync(fd);
131
+ }
132
+ }
133
+
134
+ function readJsonHeaderNumber(header, key) {
135
+ const match = header.match(new RegExp(`"${escapeRegExp(key)}"\\s*:\\s*(-?\\d+(?:\\.\\d+)?)`));
136
+ return match ? Number(match[1]) : undefined;
137
+ }
138
+
139
+ function readJsonHeaderString(header, key) {
140
+ const match = header.match(new RegExp(`"${escapeRegExp(key)}"\\s*:\\s*"([^"]*)"`));
141
+ return match ? match[1] : undefined;
142
+ }
143
+
144
+ function escapeRegExp(value) {
145
+ return String(value).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
146
+ }
147
+
148
+ function buildFtsIndex(index, env = process.env) {
149
+ const ftsPath = paths(env).ftsIndex;
150
+ const tmpPath = `${ftsPath}.${process.pid}.tmp`;
151
+ try {
152
+ ensureDir(path.dirname(ftsPath));
153
+ for (const file of [tmpPath, `${tmpPath}-journal`, `${tmpPath}-wal`, `${tmpPath}-shm`]) {
154
+ try {
155
+ fs.rmSync(file, { force: true });
156
+ } catch {
157
+ // Best effort cleanup before rebuilding the sidecar index.
158
+ }
159
+ }
160
+ runSqliteScript(tmpPath, [
161
+ "PRAGMA journal_mode=OFF;",
162
+ "PRAGMA synchronous=OFF;",
163
+ "CREATE TABLE meta(key TEXT PRIMARY KEY, value TEXT NOT NULL);",
164
+ "CREATE TABLE docs(",
165
+ " rowid INTEGER PRIMARY KEY,",
166
+ " doc_id TEXT,",
167
+ " session_id TEXT,",
168
+ " provider TEXT,",
169
+ " source_type TEXT,",
170
+ " repo_canonical TEXT,",
171
+ " repo_display TEXT,",
172
+ " scope_canonical TEXT,",
173
+ " cwd TEXT,",
174
+ " title TEXT,",
175
+ " started_at TEXT,",
176
+ " occurred_at TEXT,",
177
+ " role TEXT,",
178
+ " event_id TEXT,",
179
+ " event_kind TEXT,",
180
+ " message_index INTEGER,",
181
+ " path TEXT,",
182
+ " matched_text TEXT",
183
+ ");",
184
+ "CREATE VIRTUAL TABLE docs_fts USING fts5(text, tokenize='unicode61', prefix='2 3 4');",
185
+ `INSERT INTO meta(key, value) VALUES ('version', ${sqliteString(String(INDEX_VERSION))});`,
186
+ `INSERT INTO meta(key, value) VALUES ('builtAt', ${sqliteString(index.builtAt || "")});`,
187
+ `INSERT INTO meta(key, value) VALUES ('docCount', ${sqliteString(String(index.docCount || 0))});`
188
+ ].join("\n"));
189
+
190
+ insertFtsDocs(tmpPath, index.docs || [], 1);
191
+ runSqliteScript(tmpPath, "INSERT INTO docs_fts(docs_fts) VALUES('optimize');");
192
+ fs.renameSync(tmpPath, ftsPath);
193
+ rememberFtsCache(ftsPath, true);
194
+ return true;
195
+ } catch {
196
+ try {
197
+ fs.rmSync(tmpPath, { force: true });
198
+ } catch {
199
+ // Ignore optional FTS cleanup failure.
200
+ }
201
+ try {
202
+ fs.rmSync(ftsPath, { force: true });
203
+ } catch {
204
+ // Ignore optional FTS cleanup failure.
205
+ }
206
+ rememberFtsCache(ftsPath, false);
207
+ return false;
208
+ }
209
+ }
210
+
211
+ function buildFtsIndexSummary(env = process.env) {
212
+ const ftsPath = paths(env).ftsIndex;
213
+ const tmpPath = `${ftsPath}.${process.pid}.tmp`;
214
+ const builtAt = new Date().toISOString();
215
+ let docCount = 0;
216
+ let totalLength = 0;
217
+ try {
218
+ ensureDir(path.dirname(ftsPath));
219
+ for (const file of [tmpPath, `${tmpPath}-journal`, `${tmpPath}-wal`, `${tmpPath}-shm`]) {
220
+ try {
221
+ fs.rmSync(file, { force: true });
222
+ } catch {
223
+ // Best effort cleanup before rebuilding the sidecar index.
224
+ }
225
+ }
226
+ runSqliteScript(tmpPath, [
227
+ "PRAGMA journal_mode=OFF;",
228
+ "PRAGMA synchronous=OFF;",
229
+ "CREATE TABLE meta(key TEXT PRIMARY KEY, value TEXT NOT NULL);",
230
+ "CREATE TABLE docs(",
231
+ " rowid INTEGER PRIMARY KEY,",
232
+ " doc_id TEXT,",
233
+ " session_id TEXT,",
234
+ " provider TEXT,",
235
+ " source_type TEXT,",
236
+ " repo_canonical TEXT,",
237
+ " repo_display TEXT,",
238
+ " scope_canonical TEXT,",
239
+ " cwd TEXT,",
240
+ " title TEXT,",
241
+ " started_at TEXT,",
242
+ " occurred_at TEXT,",
243
+ " role TEXT,",
244
+ " event_id TEXT,",
245
+ " event_kind TEXT,",
246
+ " message_index INTEGER,",
247
+ " path TEXT,",
248
+ " matched_text TEXT",
249
+ ");",
250
+ "CREATE VIRTUAL TABLE docs_fts USING fts5(text, tokenize='unicode61', prefix='2 3 4');",
251
+ `INSERT INTO meta(key, value) VALUES ('version', ${sqliteString(String(INDEX_VERSION))});`,
252
+ `INSERT INTO meta(key, value) VALUES ('builtAt', ${sqliteString(builtAt)});`
253
+ ].join("\n"));
254
+
255
+ let batch = [];
256
+ for (const session of listSessions(env)) {
257
+ if (session.conversationPath && !fs.existsSync(session.conversationPath)) ensureConversationMarkdown(session, env);
258
+ const events = readEvents(session);
259
+ const eventDocs = events.length ? docsForEvents(session, events) : [];
260
+ if (!eventDocs.length) ensureConversationMarkdown(session, env);
261
+ const sourceDocs = eventDocs.length ? eventDocs : docsForTranscript(session, readTranscript(session.transcriptPath));
262
+ for (const sourceDoc of sourceDocs) {
263
+ const indexText = normalizeIndexText(sourceDoc.text);
264
+ if (!indexText) continue;
265
+ for (const chunk of chunkText(indexText)) {
266
+ const tokens = tokenize(chunk);
267
+ if (!tokens.length) continue;
268
+ const docIndex = docCount;
269
+ totalLength += tokens.length;
270
+ docCount++;
271
+ batch.push({
272
+ ...sourceDoc,
273
+ id: sourceDoc.id || `${session.sessionId}:${sourceDoc.messageIndex ?? docIndex}:${docIndex}`,
274
+ text: chunk,
275
+ matchedText: chunk,
276
+ length: tokens.length
277
+ });
278
+ if (batch.length >= SQLITE_BUILD_BATCH_SIZE) {
279
+ insertFtsDocs(tmpPath, batch, docCount - batch.length + 1);
280
+ batch = [];
281
+ }
282
+ }
283
+ }
284
+ }
285
+ if (batch.length) insertFtsDocs(tmpPath, batch, docCount - batch.length + 1);
286
+ runSqliteScript(tmpPath, [
287
+ `INSERT INTO meta(key, value) VALUES ('docCount', ${sqliteString(String(docCount))});`,
288
+ "INSERT INTO docs_fts(docs_fts) VALUES('optimize');"
289
+ ].join("\n"));
290
+ fs.renameSync(tmpPath, ftsPath);
291
+ rememberFtsCache(ftsPath, true);
292
+ } catch (error) {
293
+ try {
294
+ fs.rmSync(tmpPath, { force: true });
295
+ } catch {
296
+ // Ignore optional FTS cleanup failure.
297
+ }
298
+ try {
299
+ fs.rmSync(ftsPath, { force: true });
300
+ } catch {
301
+ // Ignore optional FTS cleanup failure.
302
+ }
303
+ rememberFtsCache(ftsPath, false);
304
+ throw error;
305
+ }
306
+ return {
307
+ version: INDEX_VERSION,
308
+ builtAt,
309
+ docCount,
310
+ avgDocLength: docCount ? totalLength / docCount : 0,
311
+ summaryOnly: true
312
+ };
313
+ }
314
+
315
+ function insertFtsDocs(dbPath, docs, rowidStart = 1) {
316
+ for (let start = 0; start < docs.length; start += SQLITE_BUILD_BATCH_SIZE) {
317
+ const statements = ["BEGIN;"];
318
+ const batch = docs.slice(start, start + SQLITE_BUILD_BATCH_SIZE);
319
+ for (let offset = 0; offset < batch.length; offset++) {
320
+ const rowid = rowidStart + start + offset;
321
+ const doc = batch[offset];
322
+ statements.push(
323
+ `INSERT INTO docs(rowid, doc_id, session_id, provider, source_type, repo_canonical, repo_display, scope_canonical, cwd, title, started_at, occurred_at, role, event_id, event_kind, message_index, path, matched_text) VALUES (` +
324
+ [
325
+ rowid,
326
+ sqliteString(doc.id || ""),
327
+ sqliteString(doc.sessionId || ""),
328
+ sqliteString(doc.provider || ""),
329
+ sqliteString(doc.sourceType || ""),
330
+ sqliteString(doc.repoCanonical || ""),
331
+ sqliteString(doc.repoDisplay || ""),
332
+ sqliteString(doc.scopeCanonical || ""),
333
+ sqliteString(doc.cwd || ""),
334
+ sqliteString(doc.title || ""),
335
+ sqliteString(doc.startedAt || ""),
336
+ sqliteString(doc.occurredAt || ""),
337
+ sqliteString(doc.role || ""),
338
+ sqliteString(doc.eventId || ""),
339
+ sqliteString(doc.eventKind || ""),
340
+ Number.isFinite(Number(doc.messageIndex)) ? Number(doc.messageIndex) : "NULL",
341
+ sqliteString(doc.path || ""),
342
+ sqliteString(doc.matchedText || "")
343
+ ].join(", ") +
344
+ ");"
345
+ );
346
+ statements.push(`INSERT INTO docs_fts(rowid, text) VALUES (${rowid}, ${sqliteString(doc.text || "")});`);
347
+ }
348
+ statements.push("COMMIT;");
349
+ runSqliteScript(dbPath, statements.join("\n"));
350
+ }
351
+ }
352
+
353
+ function runSqliteScript(dbPath, script) {
354
+ const result = spawnSync("sqlite3", [dbPath], {
355
+ argv0: "agentlog-sqlite",
356
+ input: script,
357
+ encoding: "utf8",
358
+ maxBuffer: 1024 * 1024 * 20,
359
+ timeout: SQLITE_QUERY_TIMEOUT_MS
360
+ });
361
+ if (result.error) throw result.error;
362
+ if (result.status !== 0) throw new Error(String(result.stderr || result.stdout || "sqlite3 failed").trim());
363
+ }
364
+
365
+ function sqliteJson(dbPath, query) {
366
+ const result = spawnSync("sqlite3", [dbPath, "-json", query], {
367
+ argv0: "agentlog-sqlite",
368
+ encoding: "utf8",
369
+ maxBuffer: 1024 * 1024 * 20,
370
+ timeout: SQLITE_QUERY_TIMEOUT_MS
371
+ });
372
+ if (result.error || result.status !== 0) return null;
373
+ try {
374
+ return result.stdout.trim() ? JSON.parse(result.stdout) : [];
375
+ } catch {
376
+ return null;
377
+ }
378
+ }
379
+
380
+ function sqliteString(value) {
381
+ return `'${String(value == null ? "" : value).replace(/'/g, "''")}'`;
382
+ }
383
+
384
+ function rememberFtsCache(ftsPath, available) {
385
+ let stat = null;
386
+ try {
387
+ stat = fs.statSync(ftsPath);
388
+ } catch {
389
+ stat = null;
390
+ }
391
+ _ftsCache.path = ftsPath;
392
+ _ftsCache.mtimeMs = stat?.mtimeMs || 0;
393
+ _ftsCache.size = stat?.size || 0;
394
+ _ftsCache.checkedAtMs = Date.now();
395
+ _ftsCache.available = Boolean(available && stat);
396
+ }
397
+
398
+ function ftsIndexAvailable(env = process.env, options = {}) {
399
+ const ftsPath = paths(env).ftsIndex;
400
+ let stat = null;
401
+ try {
402
+ stat = fs.statSync(ftsPath);
403
+ } catch (error) {
404
+ if (error.code !== "ENOENT") throw error;
405
+ return false;
406
+ }
407
+ if (
408
+ _ftsCache.path === ftsPath &&
409
+ _ftsCache.available &&
410
+ _ftsCache.mtimeMs === stat.mtimeMs &&
411
+ _ftsCache.size === stat.size &&
412
+ Date.now() - _ftsCache.checkedAtMs < INDEX_STALE_CHECK_TTL_MS
413
+ ) {
414
+ return true;
415
+ }
416
+ const rows = sqliteJson(ftsPath, "SELECT key, value FROM meta WHERE key IN ('version', 'docCount');");
417
+ if (!rows || !rows.some((row) => row.key === "version" && Number(row.value) === INDEX_VERSION)) {
418
+ rememberFtsCache(ftsPath, false);
419
+ return false;
420
+ }
421
+ if (!options.noStaleCheck && indexIsStale(ftsPath, env)) {
422
+ rememberFtsCache(ftsPath, false);
423
+ return false;
424
+ }
425
+ rememberFtsCache(ftsPath, true);
426
+ return true;
427
+ }
428
+
61
429
  function docsForEvents(session, events) {
62
430
  const indexedKinds = new Set([
63
431
  EVENT_KINDS.PROMPT_SUBMITTED,
@@ -120,18 +488,60 @@ function docsForTranscript(session, messages) {
120
488
  return docs;
121
489
  }
122
490
 
123
- function loadIndex(env = process.env) {
491
+ function loadIndex(env = process.env, options = {}) {
492
+ const allowRebuild = !options.noRebuild;
124
493
  const indexPath = paths(env).index;
494
+ let stat = null;
495
+ try {
496
+ stat = fs.statSync(indexPath);
497
+ } catch (error) {
498
+ if (error.code !== "ENOENT") throw error;
499
+ }
500
+ if (!stat) return allowRebuild ? buildIndex(env) : null;
501
+ if (
502
+ _indexCache.path === indexPath &&
503
+ _indexCache.index?.version === INDEX_VERSION &&
504
+ _indexCache.mtimeMs === stat.mtimeMs &&
505
+ _indexCache.size === stat.size
506
+ ) {
507
+ if (Date.now() - _indexCache.checkedAtMs < INDEX_STALE_CHECK_TTL_MS) return _indexCache.index;
508
+ if (!indexIsStale(indexPath, env)) {
509
+ _indexCache.checkedAtMs = Date.now();
510
+ return _indexCache.index;
511
+ }
512
+ if (!allowRebuild) return null;
513
+ }
514
+ const summary = readIndexSummary(indexPath);
515
+ if (summary?.version !== INDEX_VERSION) return allowRebuild ? buildIndex(env) : null;
516
+ if (indexIsStale(indexPath, env)) return allowRebuild ? buildIndex(env) : null;
125
517
  const existing = readJson(indexPath, null);
126
- if (existing && existing.version === 2 && !indexIsStale(indexPath, env)) return existing;
127
- return buildIndex(env);
518
+ if (existing && existing.version === INDEX_VERSION) {
519
+ rememberIndexCache(indexPath, existing);
520
+ return existing;
521
+ }
522
+ return allowRebuild ? buildIndex(env) : null;
523
+ }
524
+
525
+ function rememberIndexCache(indexPath, index) {
526
+ let stat = null;
527
+ try {
528
+ stat = fs.statSync(indexPath);
529
+ } catch {
530
+ stat = null;
531
+ }
532
+ _indexCache.path = indexPath;
533
+ _indexCache.mtimeMs = stat?.mtimeMs || 0;
534
+ _indexCache.size = stat?.size || 0;
535
+ _indexCache.checkedAtMs = Date.now();
536
+ _indexCache.index = index;
128
537
  }
129
538
 
130
539
  function searchPastSessions(query, options = {}, env = process.env) {
131
540
  try {
132
541
  const eventResults = searchIndexedSessions(query, options, env);
133
- if (eventResults.length) return eventResults;
542
+ if (eventResults.length || options.skipMarkdownFallback) return eventResults;
134
543
  } catch {
544
+ if (options.skipMarkdownFallback) return [];
135
545
  // Fall through to the legacy markdown path below.
136
546
  }
137
547
  return searchMarkdownSessions(query, options, env);
@@ -139,6 +549,7 @@ function searchPastSessions(query, options = {}, env = process.env) {
139
549
 
140
550
  function searchMarkdownSessions(query, options = {}, env = process.env) {
141
551
  const limit = Math.max(1, Math.min(Number(options.limit || 10), 50));
552
+ const maxMatches = Math.max(limit * 8, 40);
142
553
  const includeWebChats = Boolean(options.includeWebChats);
143
554
  const filter = normalizeSessionFilter(options);
144
555
  const repo = filter.repo || inferCallingRepo(options.cwd || process.cwd());
@@ -161,16 +572,18 @@ function searchMarkdownSessions(query, options = {}, env = process.env) {
161
572
  sessionByPath.set(path.resolve(session._searchPath), session);
162
573
  }
163
574
 
164
- const matches = ripgrepMatches(queryTokens, [...sessionByPath.keys()]) || jsLineMatches(queryTokens, [...sessionByPath.keys()]);
575
+ const searchFiles = [...sessionByPath.keys()];
576
+ const matches = ripgrepMatches(queryTokens, searchFiles, { maxMatches }) || jsLineMatches(queryTokens, searchFiles, { maxMatches });
165
577
  const ranked = [];
166
578
  const seen = new Set();
579
+ const lineCache = new Map();
167
580
  for (const match of matches) {
168
581
  const session = sessionByPath.get(path.resolve(match.path));
169
582
  if (!session) continue;
170
583
  const key = `${session.sessionId}:${match.line}`;
171
584
  if (seen.has(key)) continue;
172
585
  seen.add(key);
173
- const excerptText = readLineWindow(match.path, match.line, 4);
586
+ const excerptText = readLineWindow(match.path, match.line, 4, lineCache);
174
587
  const lower = excerptText.toLowerCase();
175
588
  const matchedTokens = queryTokens.filter((token) => lower.includes(token));
176
589
  let score = matchedTokens.length * 5 + (phrase && lower.includes(phrase) ? 10 : 0);
@@ -185,15 +598,20 @@ function searchMarkdownSessions(query, options = {}, env = process.env) {
185
598
  cwd: session.cwd || undefined,
186
599
  title: session.title || undefined,
187
600
  started_at: session.startedAt,
188
- role: inferRoleFromMarkdown(match.path, match.line),
601
+ role: inferRoleFromMarkdown(match.path, match.line, lineCache),
189
602
  excerpt: excerptText.replace(/\s+/g, " ").trim(),
190
603
  score: Number(score.toFixed(4)),
191
604
  session_link: session._searchPath
192
605
  });
606
+ if (ranked.length >= maxMatches) break;
193
607
  }
194
608
 
195
609
  ranked.sort((a, b) => b.score - a.score || String(b.started_at).localeCompare(String(a.started_at)));
196
- return ranked.slice(0, limit);
610
+ const bySession = new Map();
611
+ for (const item of ranked) {
612
+ if (!bySession.has(item.session_id)) bySession.set(item.session_id, item);
613
+ }
614
+ return [...bySession.values()].slice(0, limit);
197
615
  }
198
616
 
199
617
  function searchIndexedSessions(query, options = {}, env = process.env) {
@@ -202,16 +620,21 @@ function searchIndexedSessions(query, options = {}, env = process.env) {
202
620
  const filter = normalizeSessionFilter(options);
203
621
  const repo = filter.repo || inferCallingRepo(options.cwd || process.cwd());
204
622
  const since = parseSinceFilter(options.since);
205
- const index = loadIndex(env);
206
623
  const queryTokens = tokenize(query);
207
624
  const phrase = String(query || "").trim().toLowerCase();
208
625
  if (!queryTokens.length && !phrase) return [];
626
+ const ftsResults = searchFtsSessions(query, queryTokens, { limit, includeWebChats, filter, repo, since, options }, env);
627
+ if (ftsResults) return ftsResults;
628
+ if (options.skipJsonIndex) return [];
629
+ const index = loadIndex(env, { noRebuild: Boolean(options.noRebuild) });
630
+ if (!index) return [];
209
631
 
210
632
  const scored = [];
211
- for (const doc of index.docs || []) {
633
+ const candidates = candidateDocsForQuery(index, queryTokens, phrase);
634
+ for (const { doc, docIndex } of candidates) {
212
635
  if (!matchesSessionFilter(doc, { ...filter, includeWebChats, since })) continue;
213
636
 
214
- let score = bm25Score(doc, queryTokens, index);
637
+ let score = bm25Score(doc, queryTokens, index, docIndex, candidates.termFrequencies);
215
638
  if (phrase && doc.text.toLowerCase().includes(phrase)) score += 2.5;
216
639
  if (!options.repo && repo && doc.repoCanonical === repo) score *= 1.25;
217
640
  if (score > 0) scored.push({ doc, score });
@@ -243,35 +666,167 @@ function searchIndexedSessions(query, options = {}, env = process.env) {
243
666
  }));
244
667
  }
245
668
 
246
- function ripgrepMatches(queryTokens, files) {
669
+ function searchFtsSessions(query, queryTokens, context, env = process.env) {
670
+ const ftsPath = paths(env).ftsIndex;
671
+ if (!ftsIndexAvailable(env, { noStaleCheck: Boolean(context.options.noRebuild || context.options.allowStaleFts) })) return null;
672
+ const matchQuery = ftsMatchQuery(query);
673
+ if (!matchQuery) return [];
674
+ const candidateLimit = Math.max(context.limit * 8, 80);
675
+ const rows = sqliteJson(
676
+ ftsPath,
677
+ [
678
+ "SELECT",
679
+ " d.doc_id, d.session_id, d.provider, d.source_type, d.repo_canonical, d.repo_display,",
680
+ " d.scope_canonical, d.cwd, d.title, d.started_at, d.occurred_at, d.role,",
681
+ " d.event_id, d.event_kind, d.message_index, d.path, d.matched_text,",
682
+ " snippet(docs_fts, 0, '', '', '...', 32) AS excerpt,",
683
+ " bm25(docs_fts) AS rank",
684
+ "FROM docs_fts",
685
+ "JOIN docs d ON d.rowid = docs_fts.rowid",
686
+ `WHERE docs_fts MATCH ${sqliteString(matchQuery)}`,
687
+ "ORDER BY rank ASC, d.occurred_at DESC, d.started_at DESC",
688
+ `LIMIT ${candidateLimit};`
689
+ ].join("\n")
690
+ );
691
+ if (!rows) return null;
692
+ const bySession = new Map();
693
+ for (const row of rows) {
694
+ const doc = ftsRowToDoc(row);
695
+ if (!matchesSessionFilter(doc, { ...context.filter, includeWebChats: context.includeWebChats, since: context.since })) continue;
696
+ if (!context.options.repo && context.repo && doc.repoCanonical === context.repo) {
697
+ row.rank = Number(row.rank || 0) - 0.05;
698
+ }
699
+ if (!bySession.has(doc.sessionId)) bySession.set(doc.sessionId, { doc, row });
700
+ if (bySession.size >= context.limit) break;
701
+ }
702
+ return [...bySession.values()].slice(0, context.limit).map(({ doc, row }) => ({
703
+ session_id: doc.sessionId,
704
+ provider: doc.provider,
705
+ source_type: doc.sourceType || undefined,
706
+ repo: doc.repoCanonical || undefined,
707
+ repo_display: doc.repoDisplay || doc.repoCanonical || undefined,
708
+ scope: doc.scopeCanonical || undefined,
709
+ cwd: doc.cwd || undefined,
710
+ title: doc.title || undefined,
711
+ started_at: doc.startedAt,
712
+ role: doc.role,
713
+ event_id: doc.eventId || undefined,
714
+ event_kind: doc.eventKind || undefined,
715
+ message_index: doc.messageIndex ?? undefined,
716
+ matched_text: doc.matchedText ? excerpt(doc.matchedText, queryTokens) : undefined,
717
+ excerpt: row.excerpt || excerpt(doc.matchedText, queryTokens),
718
+ score: Number((-Number(row.rank || 0)).toFixed(4)),
719
+ session_link: doc.path
720
+ }));
721
+ }
722
+
723
+ function ftsRowToDoc(row) {
724
+ return {
725
+ id: row.doc_id || "",
726
+ sessionId: row.session_id || "",
727
+ provider: row.provider || "",
728
+ sourceType: row.source_type || "",
729
+ repoCanonical: row.repo_canonical || "",
730
+ repoDisplay: row.repo_display || "",
731
+ scopeCanonical: row.scope_canonical || "",
732
+ cwd: row.cwd || "",
733
+ title: row.title || "",
734
+ startedAt: row.started_at || "",
735
+ occurredAt: row.occurred_at || "",
736
+ role: row.role || "",
737
+ eventId: row.event_id || "",
738
+ eventKind: row.event_kind || "",
739
+ messageIndex: row.message_index == null ? undefined : Number(row.message_index),
740
+ path: row.path || "",
741
+ matchedText: row.matched_text || ""
742
+ };
743
+ }
744
+
745
+ function ftsMatchQuery(query) {
746
+ const tokens = String(query || "")
747
+ .toLowerCase()
748
+ .match(/[a-z0-9_]{2,}/g);
749
+ if (!tokens?.length) return "";
750
+ return [...new Set(tokens.filter((token) => !STOP_WORDS.has(token)))]
751
+ .slice(0, 12)
752
+ .map((token) => `"${token.replace(/"/g, '""')}"*`)
753
+ .join(" ");
754
+ }
755
+
756
+ function candidateDocsForQuery(index, queryTokens, phrase) {
757
+ const docs = index.docs || [];
758
+ const postings = index.postings || null;
759
+ const uniqueTokens = [...new Set(queryTokens)];
760
+ const termFrequencies = new Map();
761
+ const candidateIndexes = new Set();
762
+
763
+ if (postings && uniqueTokens.length) {
764
+ for (const token of uniqueTokens) {
765
+ const tokenPostings = postings[token] || [];
766
+ if (!tokenPostings.length) continue;
767
+ const frequencies = new Map();
768
+ for (const posting of tokenPostings) {
769
+ const docIndex = Array.isArray(posting) ? posting[0] : posting.doc;
770
+ const tf = Array.isArray(posting) ? posting[1] : posting.tf;
771
+ if (!Number.isInteger(docIndex) || !tf) continue;
772
+ frequencies.set(docIndex, tf);
773
+ candidateIndexes.add(docIndex);
774
+ }
775
+ termFrequencies.set(token, frequencies);
776
+ }
777
+ }
778
+
779
+ if (candidateIndexes.size) {
780
+ const candidates = [...candidateIndexes]
781
+ .map((docIndex) => ({ doc: docs[docIndex], docIndex }))
782
+ .filter((item) => item.doc);
783
+ candidates.termFrequencies = termFrequencies;
784
+ return candidates;
785
+ }
786
+
787
+ const allDocs = docs.map((doc, docIndex) => ({ doc, docIndex }));
788
+ allDocs.termFrequencies = termFrequencies;
789
+ if (phrase && !uniqueTokens.length) return allDocs;
790
+ if (!postings) return allDocs;
791
+ return [];
792
+ }
793
+
794
+ function ripgrepMatches(queryTokens, files, options = {}) {
247
795
  if (!queryTokens.length || !files.length) return null;
248
796
  const pattern = queryTokens.map(escapeRegex).join("|");
249
- const result = spawnSync(
250
- "rg",
251
- ["--json", "--ignore-case", "--line-number", "-e", pattern, "--", ...files],
252
- { encoding: "utf8", maxBuffer: 1024 * 1024 * 50 }
253
- );
254
- if (result.error && result.error.code === "ENOENT") return null;
255
- if (result.status !== 0 && result.status !== 1) return null;
256
797
  const matches = [];
257
- for (const line of String(result.stdout || "").split(/\r?\n/)) {
258
- if (!line.trim()) continue;
259
- let event;
260
- try {
261
- event = JSON.parse(line);
262
- } catch {
263
- continue;
798
+ const maxMatches = Math.max(1, Number(options.maxMatches || 200));
799
+ for (let start = 0; start < files.length; start += RIPGREP_BATCH_FILE_COUNT) {
800
+ const batch = files.slice(start, start + RIPGREP_BATCH_FILE_COUNT);
801
+ const result = spawnSync(
802
+ "rg",
803
+ ["--json", "--ignore-case", "--line-number", "--max-count", String(MARKDOWN_MATCHES_PER_FILE), "-e", pattern, "--", ...batch],
804
+ { encoding: "utf8", maxBuffer: 1024 * 1024 * 10, timeout: RIPGREP_SEARCH_TIMEOUT_MS }
805
+ );
806
+ if (result.error && result.error.code === "ENOENT") return null;
807
+ if (result.error && !String(result.stdout || "").trim()) return matches;
808
+ if (result.status !== 0 && result.status !== 1 && !String(result.stdout || "").trim()) return matches;
809
+ for (const line of String(result.stdout || "").split(/\r?\n/)) {
810
+ if (!line.trim()) continue;
811
+ let event;
812
+ try {
813
+ event = JSON.parse(line);
814
+ } catch {
815
+ continue;
816
+ }
817
+ if (event.type !== "match") continue;
818
+ const file = event.data?.path?.text;
819
+ const lineNumber = event.data?.line_number;
820
+ if (file && lineNumber) matches.push({ path: file, line: lineNumber });
821
+ if (matches.length >= maxMatches) return matches;
264
822
  }
265
- if (event.type !== "match") continue;
266
- const file = event.data?.path?.text;
267
- const lineNumber = event.data?.line_number;
268
- if (file && lineNumber) matches.push({ path: file, line: lineNumber });
269
823
  }
270
824
  return matches;
271
825
  }
272
826
 
273
- function jsLineMatches(queryTokens, files) {
827
+ function jsLineMatches(queryTokens, files, options = {}) {
274
828
  const matches = [];
829
+ const maxMatches = Math.max(1, Number(options.maxMatches || 200));
275
830
  for (const file of files) {
276
831
  let lines;
277
832
  try {
@@ -279,17 +834,22 @@ function jsLineMatches(queryTokens, files) {
279
834
  } catch {
280
835
  continue;
281
836
  }
837
+ let fileMatches = 0;
282
838
  for (let index = 0; index < lines.length; index++) {
283
839
  const lower = lines[index].toLowerCase();
284
- if (queryTokens.some((token) => lower.includes(token))) matches.push({ path: file, line: index + 1 });
840
+ if (!queryTokens.some((token) => lower.includes(token))) continue;
841
+ matches.push({ path: file, line: index + 1 });
842
+ fileMatches++;
843
+ if (matches.length >= maxMatches) return matches;
844
+ if (fileMatches >= MARKDOWN_MATCHES_PER_FILE) break;
285
845
  }
286
846
  }
287
847
  return matches;
288
848
  }
289
849
 
290
- function readLineWindow(file, lineNumber, radius = 2) {
850
+ function readLineWindow(file, lineNumber, radius = 2, cache) {
291
851
  try {
292
- const lines = fs.readFileSync(file, "utf8").split(/\r?\n/);
852
+ const lines = readMarkdownLines(file, cache);
293
853
  const start = Math.max(0, lineNumber - 1 - radius);
294
854
  const end = Math.min(lines.length, lineNumber + radius);
295
855
  return lines.slice(start, end).join("\n").trim();
@@ -298,9 +858,9 @@ function readLineWindow(file, lineNumber, radius = 2) {
298
858
  }
299
859
  }
300
860
 
301
- function inferRoleFromMarkdown(file, lineNumber) {
861
+ function inferRoleFromMarkdown(file, lineNumber, cache) {
302
862
  try {
303
- const lines = fs.readFileSync(file, "utf8").split(/\r?\n/);
863
+ const lines = readMarkdownLines(file, cache);
304
864
  for (let index = Math.min(lineNumber - 1, lines.length - 1); index >= 0; index--) {
305
865
  const match = lines[index].match(/^##\s+([A-Za-z_ -]+)\s+-\s+/);
306
866
  if (match) return match[1].trim().toLowerCase().replace(/\s+/g, "_");
@@ -311,18 +871,26 @@ function inferRoleFromMarkdown(file, lineNumber) {
311
871
  return "unknown";
312
872
  }
313
873
 
874
+ function readMarkdownLines(file, cache) {
875
+ const key = path.resolve(file);
876
+ if (cache?.has(key)) return cache.get(key);
877
+ const lines = fs.readFileSync(file, "utf8").split(/\r?\n/);
878
+ if (cache) cache.set(key, lines);
879
+ return lines;
880
+ }
881
+
314
882
  function escapeRegex(value) {
315
883
  return String(value).replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
316
884
  }
317
885
 
318
- function bm25Score(doc, queryTokens, index) {
886
+ function bm25Score(doc, queryTokens, index, docIndex, termFrequencies) {
319
887
  const k1 = 1.2;
320
888
  const b = 0.75;
321
889
  const n = Math.max(1, index.docCount || 1);
322
890
  const avgdl = Math.max(1, index.avgDocLength || 1);
323
891
  let score = 0;
324
892
  for (const token of queryTokens) {
325
- const tf = doc.tf?.[token] || 0;
893
+ const tf = termFrequencyForDoc(doc, token, docIndex, termFrequencies);
326
894
  if (!tf) continue;
327
895
  const df = index.df?.[token] || 0;
328
896
  const idf = Math.log(1 + (n - df + 0.5) / (df + 0.5));
@@ -331,6 +899,12 @@ function bm25Score(doc, queryTokens, index) {
331
899
  return score;
332
900
  }
333
901
 
902
+ function termFrequencyForDoc(doc, token, docIndex, termFrequencies) {
903
+ const mapped = termFrequencies?.get(token)?.get(docIndex);
904
+ if (mapped) return mapped;
905
+ return doc.tf?.[token] || 0;
906
+ }
907
+
334
908
  function chunkText(text, maxTokens = 220, overlap = 40) {
335
909
  const words = String(text || "").split(/\s+/).filter(Boolean);
336
910
  if (words.length <= maxTokens) return [words.join(" ")].filter(Boolean);
@@ -415,7 +989,9 @@ function historySessionSummary(session) {
415
989
  messages: session.messageCount,
416
990
  user_messages: Number.isFinite(Number(session.userMessageCount)) ? Number(session.userMessageCount) : undefined,
417
991
  usage: session.usage || undefined,
992
+ estimatedUsage: session.estimatedUsage || undefined,
418
993
  models: session.models || undefined,
994
+ cursorCommandTypeCounts: session.cursorCommandTypeCounts || undefined,
419
995
  conversation: session.conversationPath,
420
996
  transcript: session.transcriptPath
421
997
  };
@@ -503,7 +1079,7 @@ function normalizeProviderFilter(value) {
503
1079
  codex_desktop: { provider: "codex", sourceType: "codex-desktop-history", sourceTypes: ["codex-desktop-history"] },
504
1080
  cursor: { provider: "cursor" },
505
1081
  cline: { provider: "cline", sourceType: "cline-task-history", sourceTypes: ["cline-task-history"] },
506
- opencode: { provider: "opencode", sourceType: "opencode-history", sourceTypes: ["opencode-history"] },
1082
+ opencode: { provider: "opencode", sourceTypes: ["opencode-history", "opencode-sqlite-history"] },
507
1083
  aider: { provider: "aider", sourceType: "aider-chat-history", sourceTypes: ["aider-chat-history"] },
508
1084
  devin: { provider: "devin" },
509
1085
  devin_cli: { provider: "devin", sourceType: "devin-cli-history", sourceTypes: ["devin-cli-history"] },
@@ -541,17 +1117,58 @@ function matchesRepoFilter(session, repoFilter) {
541
1117
  .some((value) => value === wanted || value.includes(wanted));
542
1118
  }
543
1119
 
544
- function reindexIfNeeded(env = process.env) {
1120
+ function reindexIfNeeded(env = process.env, options = {}) {
545
1121
  const cfg = loadConfig(env);
546
- if (cfg.index.paused) return { paused: true, index: readJson(paths(env).index, null) };
547
1122
  const indexPath = paths(env).index;
1123
+ if (cfg.index.paused) return { paused: true, index: readIndexSummary(indexPath) };
1124
+ let stat = null;
548
1125
  try {
549
- const stat = fs.statSync(indexPath);
550
- if (Date.now() - stat.mtimeMs < 10 * 60 * 1000) return { paused: false, index: readJson(indexPath, null) };
551
- } catch {
552
- // Missing index: build below.
1126
+ stat = fs.statSync(indexPath);
1127
+ } catch (error) {
1128
+ if (error.code !== "ENOENT") throw error;
553
1129
  }
554
- return { paused: false, index: buildIndex(env) };
1130
+ if (stat) {
1131
+ const summary = readIndexSummary(indexPath);
1132
+ if (summary?.version !== INDEX_VERSION) return { paused: false, index: rebuildIndexSummary(env, options), rebuilt: true };
1133
+ const intervalMs = configuredIndexIntervalMs(cfg);
1134
+ if (Date.now() - stat.mtimeMs < intervalMs) return { paused: false, index: summary };
1135
+ if (!indexIsStale(indexPath, env)) return { paused: false, index: summary };
1136
+ }
1137
+ return { paused: false, index: rebuildIndexSummary(env, options), rebuilt: true };
1138
+ }
1139
+
1140
+ function configuredIndexIntervalMs(config) {
1141
+ const minutes = Number(config?.index?.intervalMinutes ?? 10);
1142
+ return Math.max(1, Number.isFinite(minutes) ? minutes : 10) * 60 * 1000;
1143
+ }
1144
+
1145
+ function buildIndexInChild(env = process.env) {
1146
+ const script = `
1147
+ process.title = "agentlog-index";
1148
+ const { buildIndexSummary } = require(${JSON.stringify(__filename)});
1149
+ const index = buildIndexSummary(process.env);
1150
+ process.stdout.write(JSON.stringify(index));
1151
+ `;
1152
+ const result = spawnSync(process.execPath, ["-e", script], {
1153
+ argv0: "agentlog-index",
1154
+ env,
1155
+ encoding: "utf8",
1156
+ maxBuffer: 1024 * 1024
1157
+ });
1158
+ if (result.error) throw result.error;
1159
+ if (result.status !== 0) {
1160
+ const message = String(result.stderr || result.stdout || `index rebuild exited with status ${result.status}`).trim();
1161
+ throw new Error(message);
1162
+ }
1163
+ try {
1164
+ return JSON.parse(result.stdout || "null") || null;
1165
+ } catch (error) {
1166
+ throw new Error(`index rebuild returned invalid summary: ${error.message}`);
1167
+ }
1168
+ }
1169
+
1170
+ function rebuildIndexSummary(env = process.env, options = {}) {
1171
+ return options.rebuildInProcess ? buildIndexSummary(env) : buildIndexInChild(env);
555
1172
  }
556
1173
 
557
1174
  function indexIsStale(indexPath, env = process.env) {
@@ -608,10 +1225,12 @@ const STOP_WORDS = new Set([
608
1225
 
609
1226
  module.exports = {
610
1227
  buildIndex,
1228
+ buildIndexSummary,
611
1229
  chunkText,
612
1230
  listHistorySessions,
613
1231
  listRecentSessions,
614
1232
  loadIndex,
1233
+ readIndexSummary,
615
1234
  reindexIfNeeded,
616
1235
  sessionHistoryTime,
617
1236
  searchPastSessions,