akm-cli 0.9.0-beta.6 → 0.9.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +71 -0
- package/dist/cli.js +7 -0
- package/dist/commands/feedback-cli.js +42 -37
- package/dist/commands/graph/graph.js +75 -71
- package/dist/commands/health.js +10 -2
- package/dist/commands/improve/consolidate.js +18 -1
- package/dist/commands/improve/distill.js +26 -5
- package/dist/commands/improve/extract-prompt.js +1 -1
- package/dist/commands/improve/improve-auto-accept.js +6 -0
- package/dist/commands/improve/improve-profiles.js +4 -0
- package/dist/commands/improve/improve.js +720 -468
- package/dist/commands/improve/proactive-maintenance.js +113 -0
- package/dist/commands/improve/reflect.js +6 -0
- package/dist/commands/proposal/proposal.js +5 -0
- package/dist/commands/proposal/validators/proposals.js +67 -54
- package/dist/commands/read/curate.js +17 -0
- package/dist/commands/sources/stash-cli.js +10 -2
- package/dist/core/config/config-schema.js +11 -0
- package/dist/core/paths.js +3 -0
- package/dist/core/state-db.js +46 -1
- package/dist/indexer/db/db.js +97 -11
- package/dist/indexer/ensure-index.js +152 -17
- package/dist/indexer/index-writer-lock.js +99 -0
- package/dist/indexer/indexer.js +114 -111
- package/dist/integrations/harnesses/claude/session-log.js +1 -1
- package/dist/llm/client.js +23 -4
- package/dist/scripts/migrate-storage.js +85 -13
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +8 -1
- package/dist/sources/providers/tar-utils.js +16 -8
- package/package.json +1 -1
|
@@ -11,12 +11,14 @@
|
|
|
11
11
|
* `searchLocal()` and `show.ts`, centralizing the "indexed yet?" gap handling
|
|
12
12
|
* behind a single entry point.
|
|
13
13
|
*/
|
|
14
|
+
import { spawn } from "node:child_process";
|
|
14
15
|
import fs from "node:fs";
|
|
15
16
|
import path from "node:path";
|
|
16
17
|
import { ASSET_SPECS, TYPE_DIRS } from "../core/asset/asset-spec.js";
|
|
17
|
-
import { getDbPath } from "../core/paths.js";
|
|
18
|
+
import { getDataDir, getDbPath } from "../core/paths.js";
|
|
18
19
|
import { warn } from "../core/warn.js";
|
|
19
|
-
import { closeDatabase, getEntryCount, getMeta, openExistingDatabase } from "./db/db.js";
|
|
20
|
+
import { closeDatabase, getEntryCount, getIndexedFilePaths, getMeta, openExistingDatabase } from "./db/db.js";
|
|
21
|
+
import { acquireIndexWriterLease, handoffIndexWriterLeaseToPid } from "./index-writer-lock.js";
|
|
20
22
|
function getIndexableFiles(root, spec) {
|
|
21
23
|
if (!fs.existsSync(root))
|
|
22
24
|
return [];
|
|
@@ -52,16 +54,34 @@ function getIndexableFiles(root, spec) {
|
|
|
52
54
|
}
|
|
53
55
|
return files;
|
|
54
56
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
/**
|
|
58
|
+
* Whether any indexable file under `stashDir` is newer than the last build, or
|
|
59
|
+
* has never been indexed at all.
|
|
60
|
+
*
|
|
61
|
+
* Two independent signals, because neither alone is sufficient:
|
|
62
|
+
* 1. **mtime > builtAt** — catches in-place *edits* of already-indexed files.
|
|
63
|
+
* 2. **path not in `indexedPaths`** — catches *newly added* files. This is
|
|
64
|
+
* clock-independent on purpose: a freshly-written file can have a
|
|
65
|
+
* filesystem mtime that compares as *older* than the wall-clock `builtAt`
|
|
66
|
+
* (the two clocks are not perfectly synchronized and `builtAt` is
|
|
67
|
+
* millisecond-truncated), so the mtime test alone silently misses
|
|
68
|
+
* additions made within ~a millisecond of the previous build.
|
|
69
|
+
*
|
|
70
|
+
* `getIndexableFiles` applies each asset type's own relevance filter, so
|
|
71
|
+
* non-indexed companion files (e.g. `package.json` next to a knowledge doc) are
|
|
72
|
+
* never considered and do not produce false "new file" positives.
|
|
73
|
+
*/
|
|
74
|
+
function hasNewerIndexableFiles(stashDir, builtAt, indexedPaths) {
|
|
75
|
+
const builtAtMs = builtAt ? new Date(builtAt).getTime() : Number.NaN;
|
|
76
|
+
const builtAtUsable = Number.isFinite(builtAtMs);
|
|
61
77
|
for (const [type, spec] of Object.entries(ASSET_SPECS)) {
|
|
62
78
|
const typeRoot = path.join(stashDir, TYPE_DIRS[type] ?? spec.stashDir);
|
|
63
79
|
const files = getIndexableFiles(typeRoot, spec);
|
|
64
80
|
for (const file of files) {
|
|
81
|
+
if (!indexedPaths.has(file))
|
|
82
|
+
return true;
|
|
83
|
+
if (!builtAtUsable)
|
|
84
|
+
return true;
|
|
65
85
|
try {
|
|
66
86
|
if (fs.statSync(file).mtimeMs > builtAtMs)
|
|
67
87
|
return true;
|
|
@@ -89,7 +109,7 @@ export function isIndexStale(stashDir) {
|
|
|
89
109
|
if (entryCount === 0)
|
|
90
110
|
return true;
|
|
91
111
|
const builtAt = getMeta(db, "builtAt");
|
|
92
|
-
if (hasNewerIndexableFiles(stashDir, builtAt))
|
|
112
|
+
if (hasNewerIndexableFiles(stashDir, builtAt, getIndexedFilePaths(db)))
|
|
93
113
|
return true;
|
|
94
114
|
const storedStashDir = getMeta(db, "stashDir");
|
|
95
115
|
if (storedStashDir !== stashDir) {
|
|
@@ -114,16 +134,84 @@ export function isIndexStale(stashDir) {
|
|
|
114
134
|
}
|
|
115
135
|
}
|
|
116
136
|
/**
|
|
117
|
-
*
|
|
118
|
-
*
|
|
119
|
-
*
|
|
120
|
-
*
|
|
121
|
-
*
|
|
122
|
-
*
|
|
137
|
+
* Whether the existing index can serve queries for `stashDir` *right now* —
|
|
138
|
+
* i.e. the DB file exists, the `entries` table holds rows, and those rows were
|
|
139
|
+
* built for this stash (it is the stored primary stash or appears in the
|
|
140
|
+
* stored `stashDirs` set). When this is true the index is at worst
|
|
141
|
+
* content-stale, so the `#607` background-reindex optimization is safe: the
|
|
142
|
+
* caller gets slightly-stale-but-relevant results immediately. When it is
|
|
143
|
+
* false the existing index has nothing relevant to return (no DB, no `entries`
|
|
144
|
+
* table, zero rows, or built for a different stash), so a background reindex
|
|
145
|
+
* would leave the caller empty until the next read — those cases must rebuild
|
|
146
|
+
* inline.
|
|
123
147
|
*/
|
|
124
|
-
|
|
125
|
-
|
|
148
|
+
function indexCanServeStash(stashDir) {
|
|
149
|
+
const dbPath = getDbPath();
|
|
150
|
+
if (!fs.existsSync(dbPath))
|
|
151
|
+
return false;
|
|
152
|
+
let db;
|
|
153
|
+
try {
|
|
154
|
+
db = openExistingDatabase(dbPath);
|
|
155
|
+
if (getEntryCount(db) === 0)
|
|
156
|
+
return false;
|
|
157
|
+
const storedStashDir = getMeta(db, "stashDir");
|
|
158
|
+
if (storedStashDir === stashDir)
|
|
159
|
+
return true;
|
|
160
|
+
try {
|
|
161
|
+
const storedDirs = JSON.parse(getMeta(db, "stashDirs") ?? "[]");
|
|
162
|
+
return storedDirs.includes(stashDir);
|
|
163
|
+
}
|
|
164
|
+
catch {
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
// No `entries` table (or otherwise unreadable) — cannot serve.
|
|
126
170
|
return false;
|
|
171
|
+
}
|
|
172
|
+
finally {
|
|
173
|
+
if (db)
|
|
174
|
+
closeDatabase(db);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Spawn a background `akm index` process. Non-blocking — returns immediately.
|
|
179
|
+
* Background callers share the same global index-writer lease as foreground
|
|
180
|
+
* writers, so stale-read-triggered auto-index attempts coalesce safely.
|
|
181
|
+
*/
|
|
182
|
+
async function spawnBackgroundReindex(_stashDir) {
|
|
183
|
+
const dataDir = getDataDir();
|
|
184
|
+
const logFile = path.join(dataDir, "logs", "index-background.log");
|
|
185
|
+
fs.mkdirSync(path.dirname(logFile), { recursive: true });
|
|
186
|
+
const lease = await acquireIndexWriterLease({ mode: "try", purpose: "background-reindex-spawn" });
|
|
187
|
+
if (!lease)
|
|
188
|
+
return;
|
|
189
|
+
const akmBin = process.argv[0];
|
|
190
|
+
const akmScript = process.argv[1];
|
|
191
|
+
try {
|
|
192
|
+
const child = spawn(akmBin, [akmScript, "index", "--background"], {
|
|
193
|
+
detached: true,
|
|
194
|
+
stdio: ["ignore", fs.openSync(logFile, "a"), fs.openSync(logFile, "a")],
|
|
195
|
+
env: { ...process.env },
|
|
196
|
+
});
|
|
197
|
+
if (!child.pid) {
|
|
198
|
+
lease.release();
|
|
199
|
+
return;
|
|
200
|
+
}
|
|
201
|
+
handoffIndexWriterLeaseToPid(lease, child.pid, "background-reindex");
|
|
202
|
+
try {
|
|
203
|
+
child.unref();
|
|
204
|
+
}
|
|
205
|
+
catch {
|
|
206
|
+
// ignore
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
catch (error) {
|
|
210
|
+
lease.release();
|
|
211
|
+
throw error;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
async function runInlineReindex(stashDir) {
|
|
127
215
|
try {
|
|
128
216
|
const { akmIndex } = await import("./indexer.js");
|
|
129
217
|
await akmIndex({ stashDir });
|
|
@@ -134,3 +222,50 @@ export async function ensureIndex(stashDir) {
|
|
|
134
222
|
return true;
|
|
135
223
|
}
|
|
136
224
|
}
|
|
225
|
+
/**
|
|
226
|
+
* Ensure the local index exists and is fresh enough for the caller's needs.
|
|
227
|
+
*
|
|
228
|
+
* Default mode is `background`, which preserves the low-latency behavior used
|
|
229
|
+
* by read paths (`search`, `show`, `feedback`): when a populated index is
|
|
230
|
+
* merely stale, spawn a detached reindex and proceed against the existing
|
|
231
|
+
* index. When the index is entirely absent (no DB / no `entries` table / zero
|
|
232
|
+
* rows) the rebuild runs inline regardless of mode, since there is nothing to
|
|
233
|
+
* proceed against.
|
|
234
|
+
*
|
|
235
|
+
* `mode: "blocking"` waits for the rebuild to finish before returning. Use
|
|
236
|
+
* this for callers like `improve` whose planning logic depends on a populated
|
|
237
|
+
* `entries` table in the same process.
|
|
238
|
+
*
|
|
239
|
+
* Returns `true` if an index run was attempted.
|
|
240
|
+
*/
|
|
241
|
+
export async function ensureIndex(stashDir, options = {}) {
|
|
242
|
+
if (!isIndexStale(stashDir))
|
|
243
|
+
return false;
|
|
244
|
+
// Blocking when explicitly requested, or whenever the existing index cannot
|
|
245
|
+
// serve this stash (absent DB, no `entries` table, zero rows, or built for a
|
|
246
|
+
// different stash): a background reindex returns immediately and would leave
|
|
247
|
+
// a first-time caller (search, curate, wiki, show, feedback) with empty
|
|
248
|
+
// results. Building inline is a one-off cost; a populated index for this
|
|
249
|
+
// stash that is merely content-stale still refreshes in the background.
|
|
250
|
+
if (options.mode === "blocking" || !indexCanServeStash(stashDir)) {
|
|
251
|
+
return runInlineReindex(stashDir);
|
|
252
|
+
}
|
|
253
|
+
// The background path re-invokes the akm CLI as a detached child via
|
|
254
|
+
// `process.argv[1]`. That is only the akm entrypoint when THIS process is the
|
|
255
|
+
// akm CLI itself — which the CLI startup block signals with AKM_CLI_ENTRY=1.
|
|
256
|
+
// In any other host (the in-process test runner, a library embedding akm),
|
|
257
|
+
// argv[1] points at the host (e.g. the test runner), so spawning it would
|
|
258
|
+
// launch the wrong program and orphan it. Build inline there instead — same
|
|
259
|
+
// resulting index, no detached process.
|
|
260
|
+
if (process.env.AKM_CLI_ENTRY !== "1") {
|
|
261
|
+
return runInlineReindex(stashDir);
|
|
262
|
+
}
|
|
263
|
+
try {
|
|
264
|
+
await spawnBackgroundReindex(stashDir);
|
|
265
|
+
return true;
|
|
266
|
+
}
|
|
267
|
+
catch (error) {
|
|
268
|
+
warn("Background reindex spawn failed, proceeding with existing index:", error instanceof Error ? error.message : String(error));
|
|
269
|
+
return true;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
import fs from "node:fs";
|
|
5
|
+
import path from "node:path";
|
|
6
|
+
import { probeLock, releaseLock, releaseLockIfOwned, tryAcquireLockSync } from "../core/file-lock.js";
|
|
7
|
+
import { getDbPath, getIndexWriterLockPath } from "../core/paths.js";
|
|
8
|
+
const INDEX_WRITER_LOCK_STALE_AFTER_MS = 12 * 60 * 60 * 1000;
|
|
9
|
+
const INDEX_WRITER_WAIT_MS = 100;
|
|
10
|
+
const heldLocks = new Map();
|
|
11
|
+
function buildPayload(purpose, pid = process.pid) {
|
|
12
|
+
return JSON.stringify({
|
|
13
|
+
pid,
|
|
14
|
+
purpose,
|
|
15
|
+
dbPath: getDbPath(),
|
|
16
|
+
startedAt: new Date().toISOString(),
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
function delay(ms) {
|
|
20
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
21
|
+
}
|
|
22
|
+
function throwIfAborted(signal) {
|
|
23
|
+
if (!signal?.aborted)
|
|
24
|
+
return;
|
|
25
|
+
throw signal.reason instanceof Error ? signal.reason : new Error("index writer wait aborted");
|
|
26
|
+
}
|
|
27
|
+
function releaseHeldLock(lockPath) {
|
|
28
|
+
const held = heldLocks.get(lockPath);
|
|
29
|
+
if (!held)
|
|
30
|
+
return;
|
|
31
|
+
held.depth -= 1;
|
|
32
|
+
if (held.depth > 0)
|
|
33
|
+
return;
|
|
34
|
+
heldLocks.delete(lockPath);
|
|
35
|
+
process.off("exit", held.exitHandler);
|
|
36
|
+
releaseLockIfOwned(lockPath, process.pid);
|
|
37
|
+
}
|
|
38
|
+
function retainHeldLock(lockPath) {
|
|
39
|
+
const existing = heldLocks.get(lockPath);
|
|
40
|
+
if (existing) {
|
|
41
|
+
existing.depth += 1;
|
|
42
|
+
return { lockPath, release: () => releaseHeldLock(lockPath) };
|
|
43
|
+
}
|
|
44
|
+
const exitHandler = () => releaseLockIfOwned(lockPath, process.pid);
|
|
45
|
+
process.on("exit", exitHandler);
|
|
46
|
+
heldLocks.set(lockPath, { depth: 1, exitHandler });
|
|
47
|
+
return { lockPath, release: () => releaseHeldLock(lockPath) };
|
|
48
|
+
}
|
|
49
|
+
function detachHeldLock(lockPath) {
|
|
50
|
+
const held = heldLocks.get(lockPath);
|
|
51
|
+
if (!held)
|
|
52
|
+
return;
|
|
53
|
+
heldLocks.delete(lockPath);
|
|
54
|
+
process.off("exit", held.exitHandler);
|
|
55
|
+
}
|
|
56
|
+
export async function acquireIndexWriterLease(options) {
|
|
57
|
+
const mode = options.mode ?? "wait";
|
|
58
|
+
const lockPath = getIndexWriterLockPath();
|
|
59
|
+
fs.mkdirSync(path.dirname(lockPath), { recursive: true });
|
|
60
|
+
if (heldLocks.has(lockPath)) {
|
|
61
|
+
return retainHeldLock(lockPath);
|
|
62
|
+
}
|
|
63
|
+
while (true) {
|
|
64
|
+
throwIfAborted(options.signal);
|
|
65
|
+
if (tryAcquireLockSync(lockPath, buildPayload(options.purpose))) {
|
|
66
|
+
return retainHeldLock(lockPath);
|
|
67
|
+
}
|
|
68
|
+
const probe = probeLock(lockPath, { staleAfterMs: INDEX_WRITER_LOCK_STALE_AFTER_MS });
|
|
69
|
+
if (probe.state === "held" && probe.holderPid === process.pid) {
|
|
70
|
+
return retainHeldLock(lockPath);
|
|
71
|
+
}
|
|
72
|
+
if (probe.state === "stale") {
|
|
73
|
+
releaseLock(lockPath);
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
if (mode === "try")
|
|
77
|
+
return undefined;
|
|
78
|
+
await delay(INDEX_WRITER_WAIT_MS);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
export async function withIndexWriterLease(options, run) {
|
|
82
|
+
const lease = await acquireIndexWriterLease(options);
|
|
83
|
+
if (!lease) {
|
|
84
|
+
throw new Error(`index writer lease unavailable for ${options.purpose}`);
|
|
85
|
+
}
|
|
86
|
+
try {
|
|
87
|
+
return await run();
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
lease.release();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
export function handoffIndexWriterLeaseToPid(lease, pid, purpose) {
|
|
94
|
+
fs.writeFileSync(lease.lockPath, buildPayload(purpose, pid), "utf8");
|
|
95
|
+
detachHeldLock(lease.lockPath);
|
|
96
|
+
}
|
|
97
|
+
export function probeIndexWriterLease() {
|
|
98
|
+
return probeLock(getIndexWriterLockPath(), { staleAfterMs: INDEX_WRITER_LOCK_STALE_AFTER_MS });
|
|
99
|
+
}
|
package/dist/indexer/indexer.js
CHANGED
|
@@ -12,6 +12,7 @@ import { resolveIndexPassLLM } from "../llm/index-passes.js";
|
|
|
12
12
|
import { takeWorkflowDocument } from "../workflows/runtime/document-cache.js";
|
|
13
13
|
import { clearStaleCacheEntries, closeDatabase, deleteEntriesByDir, deleteEntriesByIds, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getAllEntriesForEmbedding, getEmbeddableEntryCount, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, relinkUsageEvents, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, upsertWorkflowDocument, warnIfVecMissing, } from "./db/db.js";
|
|
14
14
|
import { deleteStoredGraph } from "./db/graph-db.js";
|
|
15
|
+
import { withIndexWriterLease } from "./index-writer-lock.js";
|
|
15
16
|
import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isEnrichmentComplete, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./passes/metadata.js";
|
|
16
17
|
import { buildSearchText } from "./search/search-fields.js";
|
|
17
18
|
import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./search/semantic-status.js";
|
|
@@ -225,119 +226,121 @@ function runCleanPass(db, dryRun) {
|
|
|
225
226
|
}
|
|
226
227
|
// ── Indexer ──────────────────────────────────────────────────────────────────
|
|
227
228
|
export async function akmIndex(options) {
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
229
|
+
return withIndexWriterLease({ purpose: "akm-index", signal: options?.signal }, async () => {
|
|
230
|
+
const stashDir = options?.stashDir || resolveStashDir();
|
|
231
|
+
const onProgress = options?.onProgress ?? (() => { });
|
|
232
|
+
const signal = options?.signal;
|
|
233
|
+
const reEnrich = options?.reEnrich === true;
|
|
234
|
+
const full = options?.full === true;
|
|
235
|
+
const clean = options?.clean === true;
|
|
236
|
+
const dryRun = options?.dryRun === true;
|
|
237
|
+
// Load config and resolve all stash sources
|
|
238
|
+
const { loadConfig } = await import("../core/config/config.js");
|
|
239
|
+
const config = loadConfig();
|
|
240
|
+
// One-time, read-only guard: warn if the writable stash still holds an
|
|
241
|
+
// un-migrated `vaults/` directory. In 0.9.0 the indexer skips `vaults/`
|
|
242
|
+
// entirely, so an unmigrated vault's `.env` data would silently never be
|
|
243
|
+
// indexed. Non-destructive — only stats, never reads/writes/deletes.
|
|
244
|
+
const { warnOnUnmigratedVaults } = await import("./usage/unmigrated-vaults-guard.js");
|
|
245
|
+
warnOnUnmigratedVaults(stashDir);
|
|
246
|
+
// Ensure git stash caches are extracted before resolving stash dirs,
|
|
247
|
+
// so their content directories exist on disk for the walker to discover.
|
|
248
|
+
const { ensureSourceCaches, resolveSourceEntries } = await import("./search/search-source.js");
|
|
249
|
+
await ensureSourceCaches(config, { force: full });
|
|
250
|
+
const allSourceEntries = resolveSourceEntries(stashDir, config);
|
|
251
|
+
const allSourceDirs = allSourceEntries.map((s) => s.path);
|
|
252
|
+
const t0 = Date.now();
|
|
253
|
+
// Open database — pass embedding dimension from config if available
|
|
254
|
+
const dbPath = getDbPath();
|
|
255
|
+
const embeddingDim = config.embedding?.dimension;
|
|
256
|
+
const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
|
|
257
|
+
try {
|
|
258
|
+
// Determine incremental vs full mode
|
|
259
|
+
const prevStashDir = getMeta(db, "stashDir");
|
|
260
|
+
const prevBuiltAt = getMeta(db, "builtAt");
|
|
261
|
+
const isIncremental = !full && prevStashDir === stashDir && !!prevBuiltAt;
|
|
262
|
+
const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
|
|
263
|
+
// Assemble the run context
|
|
264
|
+
const ctx = {
|
|
265
|
+
db,
|
|
266
|
+
config,
|
|
267
|
+
sources: allSourceEntries,
|
|
268
|
+
sourceDirs: allSourceDirs,
|
|
269
|
+
full,
|
|
270
|
+
reEnrich,
|
|
271
|
+
stashDir,
|
|
272
|
+
onProgress,
|
|
273
|
+
signal,
|
|
274
|
+
timing: {
|
|
275
|
+
t0,
|
|
276
|
+
tWalkStart: t0,
|
|
277
|
+
tWalkEnd: t0,
|
|
278
|
+
tLlmEnd: t0,
|
|
279
|
+
tFtsEnd: t0,
|
|
280
|
+
tEmbedEnd: t0,
|
|
281
|
+
},
|
|
282
|
+
isIncremental,
|
|
283
|
+
builtAtMs,
|
|
284
|
+
hadRemovedSources: false,
|
|
285
|
+
scannedDirs: 0,
|
|
286
|
+
skippedDirs: 0,
|
|
287
|
+
generatedCount: 0,
|
|
288
|
+
walkWarnings: [],
|
|
289
|
+
dirsNeedingLlm: [],
|
|
290
|
+
embeddingResult: null,
|
|
291
|
+
graphExtractionResult: null,
|
|
292
|
+
};
|
|
293
|
+
onProgress({
|
|
294
|
+
phase: "summary",
|
|
295
|
+
message: buildIndexSummaryMessage({
|
|
296
|
+
mode: isIncremental ? "incremental" : "full",
|
|
297
|
+
sourcesCount: allSourceDirs.length,
|
|
298
|
+
semanticSearchMode: config.semanticSearchMode,
|
|
299
|
+
embeddingProvider: getEmbeddingProvider(config.embedding),
|
|
300
|
+
llmEnabled: !!resolveIndexPassLLM("enrichment", config),
|
|
301
|
+
vecAvailable: isVecAvailable(db),
|
|
302
|
+
}),
|
|
303
|
+
});
|
|
304
|
+
// ── Phase sequence ───────────────────────────────────────────────────────
|
|
305
|
+
await runSourceCachePhase(ctx);
|
|
306
|
+
await runWalkPhase(ctx);
|
|
307
|
+
await runEmbeddingPhase(ctx);
|
|
308
|
+
await runFinalizePhase(ctx);
|
|
309
|
+
// ────────────────────────────────────────────────────────────────────────
|
|
310
|
+
const { _verification: verification, _totalEntries: totalEntries } = ctx;
|
|
311
|
+
const { timing } = ctx;
|
|
312
|
+
// ── Clean pass ───────────────────────────────────────────────────────────
|
|
313
|
+
// After the normal index completes, remove entries whose source files no
|
|
314
|
+
// longer exist on disk. Remote entries (empty file_path) are skipped.
|
|
315
|
+
let cleanResult;
|
|
316
|
+
if (clean) {
|
|
317
|
+
cleanResult = runCleanPass(db, dryRun);
|
|
318
|
+
}
|
|
319
|
+
// ────────────────────────────────────────────────────────────────────────
|
|
320
|
+
return {
|
|
321
|
+
stashDir,
|
|
322
|
+
totalEntries,
|
|
323
|
+
generatedMetadata: ctx.generatedCount,
|
|
324
|
+
indexPath: dbPath,
|
|
294
325
|
mode: isIncremental ? "incremental" : "full",
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
const { _verification: verification, _totalEntries: totalEntries } = ctx;
|
|
309
|
-
const { timing } = ctx;
|
|
310
|
-
// ── Clean pass ───────────────────────────────────────────────────────────
|
|
311
|
-
// After the normal index completes, remove entries whose source files no
|
|
312
|
-
// longer exist on disk. Remote entries (empty file_path) are skipped.
|
|
313
|
-
let cleanResult;
|
|
314
|
-
if (clean) {
|
|
315
|
-
cleanResult = runCleanPass(db, dryRun);
|
|
326
|
+
directoriesScanned: ctx.scannedDirs,
|
|
327
|
+
directoriesSkipped: ctx.skippedDirs,
|
|
328
|
+
...(ctx.walkWarnings.length > 0 ? { warnings: ctx.walkWarnings } : {}),
|
|
329
|
+
verification,
|
|
330
|
+
timing: {
|
|
331
|
+
totalMs: Date.now() - timing.t0,
|
|
332
|
+
walkMs: timing.tWalkEnd - timing.tWalkStart,
|
|
333
|
+
llmMs: timing.tLlmEnd - timing.tWalkEnd,
|
|
334
|
+
embedMs: timing.tEmbedEnd - timing.tLlmEnd,
|
|
335
|
+
ftsMs: timing.tFtsEnd - timing.tEmbedEnd,
|
|
336
|
+
},
|
|
337
|
+
...(cleanResult !== undefined ? { clean: cleanResult } : {}),
|
|
338
|
+
};
|
|
316
339
|
}
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
generatedMetadata: ctx.generatedCount,
|
|
322
|
-
indexPath: dbPath,
|
|
323
|
-
mode: isIncremental ? "incremental" : "full",
|
|
324
|
-
directoriesScanned: ctx.scannedDirs,
|
|
325
|
-
directoriesSkipped: ctx.skippedDirs,
|
|
326
|
-
...(ctx.walkWarnings.length > 0 ? { warnings: ctx.walkWarnings } : {}),
|
|
327
|
-
verification,
|
|
328
|
-
timing: {
|
|
329
|
-
totalMs: Date.now() - timing.t0,
|
|
330
|
-
walkMs: timing.tWalkEnd - timing.tWalkStart,
|
|
331
|
-
llmMs: timing.tLlmEnd - timing.tWalkEnd,
|
|
332
|
-
embedMs: timing.tEmbedEnd - timing.tLlmEnd,
|
|
333
|
-
ftsMs: timing.tFtsEnd - timing.tEmbedEnd,
|
|
334
|
-
},
|
|
335
|
-
...(cleanResult !== undefined ? { clean: cleanResult } : {}),
|
|
336
|
-
};
|
|
337
|
-
}
|
|
338
|
-
finally {
|
|
339
|
-
closeDatabase(db);
|
|
340
|
-
}
|
|
340
|
+
finally {
|
|
341
|
+
closeDatabase(db);
|
|
342
|
+
}
|
|
343
|
+
});
|
|
341
344
|
}
|
|
342
345
|
// ── Extracted helpers for indexing ────────────────────────────────────────────
|
|
343
346
|
async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete = false, onProgress) {
|
|
@@ -298,7 +298,7 @@ export class ClaudeCodeProvider {
|
|
|
298
298
|
const full = path.join(dir, entry.name);
|
|
299
299
|
if (entry.isDirectory())
|
|
300
300
|
yield* this.#walkJsonl(full);
|
|
301
|
-
else if (entry.name.endsWith(".jsonl"))
|
|
301
|
+
else if (entry.name.endsWith(".jsonl") && entry.name !== "journal.jsonl")
|
|
302
302
|
yield full;
|
|
303
303
|
}
|
|
304
304
|
}
|
package/dist/llm/client.js
CHANGED
|
@@ -119,9 +119,23 @@ function looksLikeContextOverflow(message) {
|
|
|
119
119
|
/**
|
|
120
120
|
* Decide whether a first-attempt {@link LlmCallError} is eligible for a single
|
|
121
121
|
* retry. Retryable: HTTP 5xx (`provider_error` with statusCode >= 500) and
|
|
122
|
-
* `network_error` whose message looks like a transient connection
|
|
123
|
-
*
|
|
124
|
-
*
|
|
122
|
+
* `network_error` whose message looks like a transient connection drop.
|
|
123
|
+
* NOT retryable: 4xx, `rate_limited` (429), `timeout`, `parse_error`, and
|
|
124
|
+
* context-overflow-classified errors.
|
|
125
|
+
*
|
|
126
|
+
* The connection-drop heuristic covers the substrings emitted across runtimes
|
|
127
|
+
* for a mid-flight socket close:
|
|
128
|
+
* - `ECONNRESET` / `EPIPE` — Node/libuv socket reset codes
|
|
129
|
+
* - `fetch failed` — undici's generic wrapper message
|
|
130
|
+
* - `socket connection was closed` — Bun's message for a dropped connection
|
|
131
|
+
* (e.g. "The socket connection was closed unexpectedly.")
|
|
132
|
+
* - `terminated` / `other side closed` — undici's phrasings for the same
|
|
133
|
+
*
|
|
134
|
+
* These all describe a transient transport failure where a second attempt can
|
|
135
|
+
* legitimately succeed, which is exactly the case a single bounded retry is
|
|
136
|
+
* meant to absorb. Before this list was widened, Bun's "socket connection was
|
|
137
|
+
* closed unexpectedly" fell through unretried and surfaced as a recurring
|
|
138
|
+
* failure in the improve/reflect and capability-probe flows.
|
|
125
139
|
*/
|
|
126
140
|
function isRetryable(err) {
|
|
127
141
|
if (looksLikeContextOverflow(err.message))
|
|
@@ -131,7 +145,12 @@ function isRetryable(err) {
|
|
|
131
145
|
}
|
|
132
146
|
if (err.code === "network_error") {
|
|
133
147
|
const lower = err.message.toLowerCase();
|
|
134
|
-
return lower.includes("econnreset") ||
|
|
148
|
+
return (lower.includes("econnreset") ||
|
|
149
|
+
lower.includes("epipe") ||
|
|
150
|
+
lower.includes("fetch failed") ||
|
|
151
|
+
lower.includes("socket connection was closed") ||
|
|
152
|
+
lower.includes("terminated") ||
|
|
153
|
+
lower.includes("other side closed"));
|
|
135
154
|
}
|
|
136
155
|
return false;
|
|
137
156
|
}
|