akm-cli 0.9.0-beta.6 → 0.9.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,12 +11,14 @@
11
11
  * `searchLocal()` and `show.ts`, centralizing the "indexed yet?" gap handling
12
12
  * behind a single entry point.
13
13
  */
14
+ import { spawn } from "node:child_process";
14
15
  import fs from "node:fs";
15
16
  import path from "node:path";
16
17
  import { ASSET_SPECS, TYPE_DIRS } from "../core/asset/asset-spec.js";
17
- import { getDbPath } from "../core/paths.js";
18
+ import { getDataDir, getDbPath } from "../core/paths.js";
18
19
  import { warn } from "../core/warn.js";
19
- import { closeDatabase, getEntryCount, getMeta, openExistingDatabase } from "./db/db.js";
20
+ import { closeDatabase, getEntryCount, getIndexedFilePaths, getMeta, openExistingDatabase } from "./db/db.js";
21
+ import { acquireIndexWriterLease, handoffIndexWriterLeaseToPid } from "./index-writer-lock.js";
20
22
  function getIndexableFiles(root, spec) {
21
23
  if (!fs.existsSync(root))
22
24
  return [];
@@ -52,16 +54,34 @@ function getIndexableFiles(root, spec) {
52
54
  }
53
55
  return files;
54
56
  }
55
- function hasNewerIndexableFiles(stashDir, builtAt) {
56
- if (!builtAt)
57
- return true;
58
- const builtAtMs = new Date(builtAt).getTime();
59
- if (!Number.isFinite(builtAtMs))
60
- return true;
57
+ /**
58
+ * Whether any indexable file under `stashDir` is newer than the last build, or
59
+ * has never been indexed at all.
60
+ *
61
+ * Two independent signals, because neither alone is sufficient:
62
+ * 1. **mtime > builtAt** — catches in-place *edits* of already-indexed files.
63
+ * 2. **path not in `indexedPaths`** — catches *newly added* files. This is
64
+ * clock-independent on purpose: a freshly-written file can have a
65
+ * filesystem mtime that compares as *older* than the wall-clock `builtAt`
66
+ * (the two clocks are not perfectly synchronized and `builtAt` is
67
+ * millisecond-truncated), so the mtime test alone silently misses
68
+ * additions made within ~a millisecond of the previous build.
69
+ *
70
+ * `getIndexableFiles` applies each asset type's own relevance filter, so
71
+ * non-indexed companion files (e.g. `package.json` next to a knowledge doc) are
72
+ * never considered and do not produce false "new file" positives.
73
+ */
74
+ function hasNewerIndexableFiles(stashDir, builtAt, indexedPaths) {
75
+ const builtAtMs = builtAt ? new Date(builtAt).getTime() : Number.NaN;
76
+ const builtAtUsable = Number.isFinite(builtAtMs);
61
77
  for (const [type, spec] of Object.entries(ASSET_SPECS)) {
62
78
  const typeRoot = path.join(stashDir, TYPE_DIRS[type] ?? spec.stashDir);
63
79
  const files = getIndexableFiles(typeRoot, spec);
64
80
  for (const file of files) {
81
+ if (!indexedPaths.has(file))
82
+ return true;
83
+ if (!builtAtUsable)
84
+ return true;
65
85
  try {
66
86
  if (fs.statSync(file).mtimeMs > builtAtMs)
67
87
  return true;
@@ -89,7 +109,7 @@ export function isIndexStale(stashDir) {
89
109
  if (entryCount === 0)
90
110
  return true;
91
111
  const builtAt = getMeta(db, "builtAt");
92
- if (hasNewerIndexableFiles(stashDir, builtAt))
112
+ if (hasNewerIndexableFiles(stashDir, builtAt, getIndexedFilePaths(db)))
93
113
  return true;
94
114
  const storedStashDir = getMeta(db, "stashDir");
95
115
  if (storedStashDir !== stashDir) {
@@ -114,16 +134,84 @@ export function isIndexStale(stashDir) {
114
134
  }
115
135
  }
116
136
  /**
117
- * Run an incremental index when the local index is stale. Best-effort
118
- * failures are logged as warnings but never thrown, so the caller can
119
- * proceed (and surface a proper "not in index" error if the index is
120
- * still unusable).
121
- *
122
- * Returns `true` if an index run was attempted.
137
+ * Whether the existing index can serve queries for `stashDir` *right now*
138
+ * i.e. the DB file exists, the `entries` table holds rows, and those rows were
139
+ * built for this stash (it is the stored primary stash or appears in the
140
+ * stored `stashDirs` set). When this is true the index is at worst
141
+ * content-stale, so the `#607` background-reindex optimization is safe: the
142
+ * caller gets slightly-stale-but-relevant results immediately. When it is
143
+ * false the existing index has nothing relevant to return (no DB, no `entries`
144
+ * table, zero rows, or built for a different stash), so a background reindex
145
+ * would leave the caller empty until the next read — those cases must rebuild
146
+ * inline.
123
147
  */
124
- export async function ensureIndex(stashDir) {
125
- if (!isIndexStale(stashDir))
148
+ function indexCanServeStash(stashDir) {
149
+ const dbPath = getDbPath();
150
+ if (!fs.existsSync(dbPath))
151
+ return false;
152
+ let db;
153
+ try {
154
+ db = openExistingDatabase(dbPath);
155
+ if (getEntryCount(db) === 0)
156
+ return false;
157
+ const storedStashDir = getMeta(db, "stashDir");
158
+ if (storedStashDir === stashDir)
159
+ return true;
160
+ try {
161
+ const storedDirs = JSON.parse(getMeta(db, "stashDirs") ?? "[]");
162
+ return storedDirs.includes(stashDir);
163
+ }
164
+ catch {
165
+ return false;
166
+ }
167
+ }
168
+ catch {
169
+ // No `entries` table (or otherwise unreadable) — cannot serve.
126
170
  return false;
171
+ }
172
+ finally {
173
+ if (db)
174
+ closeDatabase(db);
175
+ }
176
+ }
177
+ /**
178
+ * Spawn a background `akm index` process. Non-blocking — returns immediately.
179
+ * Background callers share the same global index-writer lease as foreground
180
+ * writers, so stale-read-triggered auto-index attempts coalesce safely.
181
+ */
182
+ async function spawnBackgroundReindex(_stashDir) {
183
+ const dataDir = getDataDir();
184
+ const logFile = path.join(dataDir, "logs", "index-background.log");
185
+ fs.mkdirSync(path.dirname(logFile), { recursive: true });
186
+ const lease = await acquireIndexWriterLease({ mode: "try", purpose: "background-reindex-spawn" });
187
+ if (!lease)
188
+ return;
189
+ const akmBin = process.argv[0];
190
+ const akmScript = process.argv[1];
191
+ try {
192
+ const child = spawn(akmBin, [akmScript, "index", "--background"], {
193
+ detached: true,
194
+ stdio: ["ignore", fs.openSync(logFile, "a"), fs.openSync(logFile, "a")],
195
+ env: { ...process.env },
196
+ });
197
+ if (!child.pid) {
198
+ lease.release();
199
+ return;
200
+ }
201
+ handoffIndexWriterLeaseToPid(lease, child.pid, "background-reindex");
202
+ try {
203
+ child.unref();
204
+ }
205
+ catch {
206
+ // ignore
207
+ }
208
+ }
209
+ catch (error) {
210
+ lease.release();
211
+ throw error;
212
+ }
213
+ }
214
+ async function runInlineReindex(stashDir) {
127
215
  try {
128
216
  const { akmIndex } = await import("./indexer.js");
129
217
  await akmIndex({ stashDir });
@@ -134,3 +222,50 @@ export async function ensureIndex(stashDir) {
134
222
  return true;
135
223
  }
136
224
  }
225
+ /**
226
+ * Ensure the local index exists and is fresh enough for the caller's needs.
227
+ *
228
+ * Default mode is `background`, which preserves the low-latency behavior used
229
+ * by read paths (`search`, `show`, `feedback`): when a populated index is
230
+ * merely stale, spawn a detached reindex and proceed against the existing
231
+ * index. When the index is entirely absent (no DB / no `entries` table / zero
232
+ * rows) the rebuild runs inline regardless of mode, since there is nothing to
233
+ * proceed against.
234
+ *
235
+ * `mode: "blocking"` waits for the rebuild to finish before returning. Use
236
+ * this for callers like `improve` whose planning logic depends on a populated
237
+ * `entries` table in the same process.
238
+ *
239
+ * Returns `true` if an index run was attempted.
240
+ */
241
+ export async function ensureIndex(stashDir, options = {}) {
242
+ if (!isIndexStale(stashDir))
243
+ return false;
244
+ // Blocking when explicitly requested, or whenever the existing index cannot
245
+ // serve this stash (absent DB, no `entries` table, zero rows, or built for a
246
+ // different stash): a background reindex returns immediately and would leave
247
+ // a first-time caller (search, curate, wiki, show, feedback) with empty
248
+ // results. Building inline is a one-off cost; a populated index for this
249
+ // stash that is merely content-stale still refreshes in the background.
250
+ if (options.mode === "blocking" || !indexCanServeStash(stashDir)) {
251
+ return runInlineReindex(stashDir);
252
+ }
253
+ // The background path re-invokes the akm CLI as a detached child via
254
+ // `process.argv[1]`. That is only the akm entrypoint when THIS process is the
255
+ // akm CLI itself — which the CLI startup block signals with AKM_CLI_ENTRY=1.
256
+ // In any other host (the in-process test runner, a library embedding akm),
257
+ // argv[1] points at the host (e.g. the test runner), so spawning it would
258
+ // launch the wrong program and orphan it. Build inline there instead — same
259
+ // resulting index, no detached process.
260
+ if (process.env.AKM_CLI_ENTRY !== "1") {
261
+ return runInlineReindex(stashDir);
262
+ }
263
+ try {
264
+ await spawnBackgroundReindex(stashDir);
265
+ return true;
266
+ }
267
+ catch (error) {
268
+ warn("Background reindex spawn failed, proceeding with existing index:", error instanceof Error ? error.message : String(error));
269
+ return true;
270
+ }
271
+ }
@@ -0,0 +1,99 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import fs from "node:fs";
5
+ import path from "node:path";
6
+ import { probeLock, releaseLock, releaseLockIfOwned, tryAcquireLockSync } from "../core/file-lock.js";
7
+ import { getDbPath, getIndexWriterLockPath } from "../core/paths.js";
8
+ const INDEX_WRITER_LOCK_STALE_AFTER_MS = 12 * 60 * 60 * 1000;
9
+ const INDEX_WRITER_WAIT_MS = 100;
10
+ const heldLocks = new Map();
11
+ function buildPayload(purpose, pid = process.pid) {
12
+ return JSON.stringify({
13
+ pid,
14
+ purpose,
15
+ dbPath: getDbPath(),
16
+ startedAt: new Date().toISOString(),
17
+ });
18
+ }
19
+ function delay(ms) {
20
+ return new Promise((resolve) => setTimeout(resolve, ms));
21
+ }
22
+ function throwIfAborted(signal) {
23
+ if (!signal?.aborted)
24
+ return;
25
+ throw signal.reason instanceof Error ? signal.reason : new Error("index writer wait aborted");
26
+ }
27
+ function releaseHeldLock(lockPath) {
28
+ const held = heldLocks.get(lockPath);
29
+ if (!held)
30
+ return;
31
+ held.depth -= 1;
32
+ if (held.depth > 0)
33
+ return;
34
+ heldLocks.delete(lockPath);
35
+ process.off("exit", held.exitHandler);
36
+ releaseLockIfOwned(lockPath, process.pid);
37
+ }
38
+ function retainHeldLock(lockPath) {
39
+ const existing = heldLocks.get(lockPath);
40
+ if (existing) {
41
+ existing.depth += 1;
42
+ return { lockPath, release: () => releaseHeldLock(lockPath) };
43
+ }
44
+ const exitHandler = () => releaseLockIfOwned(lockPath, process.pid);
45
+ process.on("exit", exitHandler);
46
+ heldLocks.set(lockPath, { depth: 1, exitHandler });
47
+ return { lockPath, release: () => releaseHeldLock(lockPath) };
48
+ }
49
+ function detachHeldLock(lockPath) {
50
+ const held = heldLocks.get(lockPath);
51
+ if (!held)
52
+ return;
53
+ heldLocks.delete(lockPath);
54
+ process.off("exit", held.exitHandler);
55
+ }
56
+ export async function acquireIndexWriterLease(options) {
57
+ const mode = options.mode ?? "wait";
58
+ const lockPath = getIndexWriterLockPath();
59
+ fs.mkdirSync(path.dirname(lockPath), { recursive: true });
60
+ if (heldLocks.has(lockPath)) {
61
+ return retainHeldLock(lockPath);
62
+ }
63
+ while (true) {
64
+ throwIfAborted(options.signal);
65
+ if (tryAcquireLockSync(lockPath, buildPayload(options.purpose))) {
66
+ return retainHeldLock(lockPath);
67
+ }
68
+ const probe = probeLock(lockPath, { staleAfterMs: INDEX_WRITER_LOCK_STALE_AFTER_MS });
69
+ if (probe.state === "held" && probe.holderPid === process.pid) {
70
+ return retainHeldLock(lockPath);
71
+ }
72
+ if (probe.state === "stale") {
73
+ releaseLock(lockPath);
74
+ continue;
75
+ }
76
+ if (mode === "try")
77
+ return undefined;
78
+ await delay(INDEX_WRITER_WAIT_MS);
79
+ }
80
+ }
81
+ export async function withIndexWriterLease(options, run) {
82
+ const lease = await acquireIndexWriterLease(options);
83
+ if (!lease) {
84
+ throw new Error(`index writer lease unavailable for ${options.purpose}`);
85
+ }
86
+ try {
87
+ return await run();
88
+ }
89
+ finally {
90
+ lease.release();
91
+ }
92
+ }
93
+ export function handoffIndexWriterLeaseToPid(lease, pid, purpose) {
94
+ fs.writeFileSync(lease.lockPath, buildPayload(purpose, pid), "utf8");
95
+ detachHeldLock(lease.lockPath);
96
+ }
97
+ export function probeIndexWriterLease() {
98
+ return probeLock(getIndexWriterLockPath(), { staleAfterMs: INDEX_WRITER_LOCK_STALE_AFTER_MS });
99
+ }
@@ -12,6 +12,7 @@ import { resolveIndexPassLLM } from "../llm/index-passes.js";
12
12
  import { takeWorkflowDocument } from "../workflows/runtime/document-cache.js";
13
13
  import { clearStaleCacheEntries, closeDatabase, deleteEntriesByDir, deleteEntriesByIds, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getAllEntriesForEmbedding, getEmbeddableEntryCount, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, relinkUsageEvents, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, upsertWorkflowDocument, warnIfVecMissing, } from "./db/db.js";
14
14
  import { deleteStoredGraph } from "./db/graph-db.js";
15
+ import { withIndexWriterLease } from "./index-writer-lock.js";
15
16
  import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isEnrichmentComplete, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./passes/metadata.js";
16
17
  import { buildSearchText } from "./search/search-fields.js";
17
18
  import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./search/semantic-status.js";
@@ -225,119 +226,121 @@ function runCleanPass(db, dryRun) {
225
226
  }
226
227
  // ── Indexer ──────────────────────────────────────────────────────────────────
227
228
  export async function akmIndex(options) {
228
- const stashDir = options?.stashDir || resolveStashDir();
229
- const onProgress = options?.onProgress ?? (() => { });
230
- const signal = options?.signal;
231
- const reEnrich = options?.reEnrich === true;
232
- const full = options?.full === true;
233
- const clean = options?.clean === true;
234
- const dryRun = options?.dryRun === true;
235
- // Load config and resolve all stash sources
236
- const { loadConfig } = await import("../core/config/config.js");
237
- const config = loadConfig();
238
- // One-time, read-only guard: warn if the writable stash still holds an
239
- // un-migrated `vaults/` directory. In 0.9.0 the indexer skips `vaults/`
240
- // entirely, so an unmigrated vault's `.env` data would silently never be
241
- // indexed. Non-destructive only stats, never reads/writes/deletes.
242
- const { warnOnUnmigratedVaults } = await import("./usage/unmigrated-vaults-guard.js");
243
- warnOnUnmigratedVaults(stashDir);
244
- // Ensure git stash caches are extracted before resolving stash dirs,
245
- // so their content directories exist on disk for the walker to discover.
246
- const { ensureSourceCaches, resolveSourceEntries } = await import("./search/search-source.js");
247
- await ensureSourceCaches(config, { force: full });
248
- const allSourceEntries = resolveSourceEntries(stashDir, config);
249
- const allSourceDirs = allSourceEntries.map((s) => s.path);
250
- const t0 = Date.now();
251
- // Open database — pass embedding dimension from config if available
252
- const dbPath = getDbPath();
253
- const embeddingDim = config.embedding?.dimension;
254
- const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
255
- try {
256
- // Determine incremental vs full mode
257
- const prevStashDir = getMeta(db, "stashDir");
258
- const prevBuiltAt = getMeta(db, "builtAt");
259
- const isIncremental = !full && prevStashDir === stashDir && !!prevBuiltAt;
260
- const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
261
- // Assemble the run context
262
- const ctx = {
263
- db,
264
- config,
265
- sources: allSourceEntries,
266
- sourceDirs: allSourceDirs,
267
- full,
268
- reEnrich,
269
- stashDir,
270
- onProgress,
271
- signal,
272
- timing: {
273
- t0,
274
- tWalkStart: t0,
275
- tWalkEnd: t0,
276
- tLlmEnd: t0,
277
- tFtsEnd: t0,
278
- tEmbedEnd: t0,
279
- },
280
- isIncremental,
281
- builtAtMs,
282
- hadRemovedSources: false,
283
- scannedDirs: 0,
284
- skippedDirs: 0,
285
- generatedCount: 0,
286
- walkWarnings: [],
287
- dirsNeedingLlm: [],
288
- embeddingResult: null,
289
- graphExtractionResult: null,
290
- };
291
- onProgress({
292
- phase: "summary",
293
- message: buildIndexSummaryMessage({
229
+ return withIndexWriterLease({ purpose: "akm-index", signal: options?.signal }, async () => {
230
+ const stashDir = options?.stashDir || resolveStashDir();
231
+ const onProgress = options?.onProgress ?? (() => { });
232
+ const signal = options?.signal;
233
+ const reEnrich = options?.reEnrich === true;
234
+ const full = options?.full === true;
235
+ const clean = options?.clean === true;
236
+ const dryRun = options?.dryRun === true;
237
+ // Load config and resolve all stash sources
238
+ const { loadConfig } = await import("../core/config/config.js");
239
+ const config = loadConfig();
240
+ // One-time, read-only guard: warn if the writable stash still holds an
241
+ // un-migrated `vaults/` directory. In 0.9.0 the indexer skips `vaults/`
242
+ // entirely, so an unmigrated vault's `.env` data would silently never be
243
+ // indexed. Non-destructive only stats, never reads/writes/deletes.
244
+ const { warnOnUnmigratedVaults } = await import("./usage/unmigrated-vaults-guard.js");
245
+ warnOnUnmigratedVaults(stashDir);
246
+ // Ensure git stash caches are extracted before resolving stash dirs,
247
+ // so their content directories exist on disk for the walker to discover.
248
+ const { ensureSourceCaches, resolveSourceEntries } = await import("./search/search-source.js");
249
+ await ensureSourceCaches(config, { force: full });
250
+ const allSourceEntries = resolveSourceEntries(stashDir, config);
251
+ const allSourceDirs = allSourceEntries.map((s) => s.path);
252
+ const t0 = Date.now();
253
+ // Open database — pass embedding dimension from config if available
254
+ const dbPath = getDbPath();
255
+ const embeddingDim = config.embedding?.dimension;
256
+ const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
257
+ try {
258
+ // Determine incremental vs full mode
259
+ const prevStashDir = getMeta(db, "stashDir");
260
+ const prevBuiltAt = getMeta(db, "builtAt");
261
+ const isIncremental = !full && prevStashDir === stashDir && !!prevBuiltAt;
262
+ const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
263
+ // Assemble the run context
264
+ const ctx = {
265
+ db,
266
+ config,
267
+ sources: allSourceEntries,
268
+ sourceDirs: allSourceDirs,
269
+ full,
270
+ reEnrich,
271
+ stashDir,
272
+ onProgress,
273
+ signal,
274
+ timing: {
275
+ t0,
276
+ tWalkStart: t0,
277
+ tWalkEnd: t0,
278
+ tLlmEnd: t0,
279
+ tFtsEnd: t0,
280
+ tEmbedEnd: t0,
281
+ },
282
+ isIncremental,
283
+ builtAtMs,
284
+ hadRemovedSources: false,
285
+ scannedDirs: 0,
286
+ skippedDirs: 0,
287
+ generatedCount: 0,
288
+ walkWarnings: [],
289
+ dirsNeedingLlm: [],
290
+ embeddingResult: null,
291
+ graphExtractionResult: null,
292
+ };
293
+ onProgress({
294
+ phase: "summary",
295
+ message: buildIndexSummaryMessage({
296
+ mode: isIncremental ? "incremental" : "full",
297
+ sourcesCount: allSourceDirs.length,
298
+ semanticSearchMode: config.semanticSearchMode,
299
+ embeddingProvider: getEmbeddingProvider(config.embedding),
300
+ llmEnabled: !!resolveIndexPassLLM("enrichment", config),
301
+ vecAvailable: isVecAvailable(db),
302
+ }),
303
+ });
304
+ // ── Phase sequence ───────────────────────────────────────────────────────
305
+ await runSourceCachePhase(ctx);
306
+ await runWalkPhase(ctx);
307
+ await runEmbeddingPhase(ctx);
308
+ await runFinalizePhase(ctx);
309
+ // ────────────────────────────────────────────────────────────────────────
310
+ const { _verification: verification, _totalEntries: totalEntries } = ctx;
311
+ const { timing } = ctx;
312
+ // ── Clean pass ───────────────────────────────────────────────────────────
313
+ // After the normal index completes, remove entries whose source files no
314
+ // longer exist on disk. Remote entries (empty file_path) are skipped.
315
+ let cleanResult;
316
+ if (clean) {
317
+ cleanResult = runCleanPass(db, dryRun);
318
+ }
319
+ // ────────────────────────────────────────────────────────────────────────
320
+ return {
321
+ stashDir,
322
+ totalEntries,
323
+ generatedMetadata: ctx.generatedCount,
324
+ indexPath: dbPath,
294
325
  mode: isIncremental ? "incremental" : "full",
295
- sourcesCount: allSourceDirs.length,
296
- semanticSearchMode: config.semanticSearchMode,
297
- embeddingProvider: getEmbeddingProvider(config.embedding),
298
- llmEnabled: !!resolveIndexPassLLM("enrichment", config),
299
- vecAvailable: isVecAvailable(db),
300
- }),
301
- });
302
- // ── Phase sequence ───────────────────────────────────────────────────────
303
- await runSourceCachePhase(ctx);
304
- await runWalkPhase(ctx);
305
- await runEmbeddingPhase(ctx);
306
- await runFinalizePhase(ctx);
307
- // ────────────────────────────────────────────────────────────────────────
308
- const { _verification: verification, _totalEntries: totalEntries } = ctx;
309
- const { timing } = ctx;
310
- // ── Clean pass ───────────────────────────────────────────────────────────
311
- // After the normal index completes, remove entries whose source files no
312
- // longer exist on disk. Remote entries (empty file_path) are skipped.
313
- let cleanResult;
314
- if (clean) {
315
- cleanResult = runCleanPass(db, dryRun);
326
+ directoriesScanned: ctx.scannedDirs,
327
+ directoriesSkipped: ctx.skippedDirs,
328
+ ...(ctx.walkWarnings.length > 0 ? { warnings: ctx.walkWarnings } : {}),
329
+ verification,
330
+ timing: {
331
+ totalMs: Date.now() - timing.t0,
332
+ walkMs: timing.tWalkEnd - timing.tWalkStart,
333
+ llmMs: timing.tLlmEnd - timing.tWalkEnd,
334
+ embedMs: timing.tEmbedEnd - timing.tLlmEnd,
335
+ ftsMs: timing.tFtsEnd - timing.tEmbedEnd,
336
+ },
337
+ ...(cleanResult !== undefined ? { clean: cleanResult } : {}),
338
+ };
316
339
  }
317
- // ────────────────────────────────────────────────────────────────────────
318
- return {
319
- stashDir,
320
- totalEntries,
321
- generatedMetadata: ctx.generatedCount,
322
- indexPath: dbPath,
323
- mode: isIncremental ? "incremental" : "full",
324
- directoriesScanned: ctx.scannedDirs,
325
- directoriesSkipped: ctx.skippedDirs,
326
- ...(ctx.walkWarnings.length > 0 ? { warnings: ctx.walkWarnings } : {}),
327
- verification,
328
- timing: {
329
- totalMs: Date.now() - timing.t0,
330
- walkMs: timing.tWalkEnd - timing.tWalkStart,
331
- llmMs: timing.tLlmEnd - timing.tWalkEnd,
332
- embedMs: timing.tEmbedEnd - timing.tLlmEnd,
333
- ftsMs: timing.tFtsEnd - timing.tEmbedEnd,
334
- },
335
- ...(cleanResult !== undefined ? { clean: cleanResult } : {}),
336
- };
337
- }
338
- finally {
339
- closeDatabase(db);
340
- }
340
+ finally {
341
+ closeDatabase(db);
342
+ }
343
+ });
341
344
  }
342
345
  // ── Extracted helpers for indexing ────────────────────────────────────────────
343
346
  async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete = false, onProgress) {
@@ -298,7 +298,7 @@ export class ClaudeCodeProvider {
298
298
  const full = path.join(dir, entry.name);
299
299
  if (entry.isDirectory())
300
300
  yield* this.#walkJsonl(full);
301
- else if (entry.name.endsWith(".jsonl"))
301
+ else if (entry.name.endsWith(".jsonl") && entry.name !== "journal.jsonl")
302
302
  yield full;
303
303
  }
304
304
  }
@@ -119,9 +119,23 @@ function looksLikeContextOverflow(message) {
119
119
  /**
120
120
  * Decide whether a first-attempt {@link LlmCallError} is eligible for a single
121
121
  * retry. Retryable: HTTP 5xx (`provider_error` with statusCode >= 500) and
122
- * `network_error` whose message looks like a transient connection reset
123
- * (ECONNRESET / EPIPE / "fetch failed"). NOT retryable: 4xx, `rate_limited`
124
- * (429), `timeout`, `parse_error`, and context-overflow-classified errors.
122
+ * `network_error` whose message looks like a transient connection drop.
123
+ * NOT retryable: 4xx, `rate_limited` (429), `timeout`, `parse_error`, and
124
+ * context-overflow-classified errors.
125
+ *
126
+ * The connection-drop heuristic covers the substrings emitted across runtimes
127
+ * for a mid-flight socket close:
128
+ * - `ECONNRESET` / `EPIPE` — Node/libuv socket reset codes
129
+ * - `fetch failed` — undici's generic wrapper message
130
+ * - `socket connection was closed` — Bun's message for a dropped connection
131
+ * (e.g. "The socket connection was closed unexpectedly.")
132
+ * - `terminated` / `other side closed` — undici's phrasings for the same
133
+ *
134
+ * These all describe a transient transport failure where a second attempt can
135
+ * legitimately succeed, which is exactly the case a single bounded retry is
136
+ * meant to absorb. Before this list was widened, Bun's "socket connection was
137
+ * closed unexpectedly" fell through unretried and surfaced as a recurring
138
+ * failure in the improve/reflect and capability-probe flows.
125
139
  */
126
140
  function isRetryable(err) {
127
141
  if (looksLikeContextOverflow(err.message))
@@ -131,7 +145,12 @@ function isRetryable(err) {
131
145
  }
132
146
  if (err.code === "network_error") {
133
147
  const lower = err.message.toLowerCase();
134
- return lower.includes("econnreset") || lower.includes("epipe") || lower.includes("fetch failed");
148
+ return (lower.includes("econnreset") ||
149
+ lower.includes("epipe") ||
150
+ lower.includes("fetch failed") ||
151
+ lower.includes("socket connection was closed") ||
152
+ lower.includes("terminated") ||
153
+ lower.includes("other side closed"));
135
154
  }
136
155
  return false;
137
156
  }