akm-cli 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/dist/cli.js +43 -11
- package/dist/commands/history.js +2 -7
- package/dist/commands/info.js +2 -2
- package/dist/commands/installed-stashes.js +44 -0
- package/dist/commands/search.js +2 -2
- package/dist/commands/show.js +4 -19
- package/dist/core/config.js +13 -1
- package/dist/indexer/db-search.js +17 -38
- package/dist/indexer/db.js +51 -1
- package/dist/indexer/indexer.js +312 -115
- package/dist/indexer/manifest.js +18 -23
- package/dist/indexer/metadata.js +253 -21
- package/dist/indexer/search-source.js +10 -4
- package/dist/output/cli-hints.js +3 -2
- package/dist/output/renderers.js +22 -49
- package/dist/registry/build-index.js +13 -18
- package/dist/setup/setup.js +216 -84
- package/dist/sources/providers/git.js +14 -2
- package/dist/wiki/wiki.js +11 -1
- package/dist/workflows/parser.js +19 -4
- package/dist/workflows/runs.js +3 -3
- package/docs/README.md +3 -3
- package/docs/migration/release-notes/0.7.0.md +8 -0
- package/docs/migration/release-notes/0.7.3.md +16 -0
- package/docs/migration/release-notes/0.7.4.md +17 -0
- package/package.json +2 -2
package/dist/indexer/indexer.js
CHANGED
|
@@ -5,10 +5,10 @@ import { getDbPath } from "../core/paths";
|
|
|
5
5
|
import { isVerbose, warn, warnVerbose } from "../core/warn";
|
|
6
6
|
import { resolveIndexPassLLM } from "../llm/index-passes";
|
|
7
7
|
import { takeWorkflowDocument } from "../workflows/document-cache";
|
|
8
|
-
import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getMeta, isVecAvailable, openDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertUtilityScore, warnIfVecMissing, } from "./db";
|
|
8
|
+
import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, warnIfVecMissing, } from "./db";
|
|
9
9
|
import { runGraphExtractionPass } from "./graph-extraction";
|
|
10
10
|
import { runMemoryInferencePass } from "./memory-inference";
|
|
11
|
-
import { generateMetadataFlat, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
|
|
11
|
+
import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
|
|
12
12
|
import { buildSearchText } from "./search-fields";
|
|
13
13
|
import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
|
|
14
14
|
import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
|
|
@@ -23,13 +23,14 @@ export async function akmIndex(options) {
|
|
|
23
23
|
const stashDir = options?.stashDir || resolveStashDir();
|
|
24
24
|
const onProgress = options?.onProgress ?? (() => { });
|
|
25
25
|
const signal = options?.signal;
|
|
26
|
+
const enrich = options?.enrich === true;
|
|
26
27
|
// Load config and resolve all stash sources
|
|
27
28
|
const { loadConfig } = await import("../core/config.js");
|
|
28
29
|
const config = loadConfig();
|
|
29
30
|
// Ensure git stash caches are extracted before resolving stash dirs,
|
|
30
31
|
// so their content directories exist on disk for the walker to discover.
|
|
31
32
|
const { ensureSourceCaches, resolveSourceEntries } = await import("./search-source.js");
|
|
32
|
-
await ensureSourceCaches(config);
|
|
33
|
+
await ensureSourceCaches(config, { force: options?.full === true });
|
|
33
34
|
const allSourceEntries = resolveSourceEntries(stashDir, config);
|
|
34
35
|
const allSourceDirs = allSourceEntries.map((s) => s.path);
|
|
35
36
|
const t0 = Date.now();
|
|
@@ -50,13 +51,11 @@ export async function akmIndex(options) {
|
|
|
50
51
|
sourcesCount: allSourceDirs.length,
|
|
51
52
|
semanticSearchMode: config.semanticSearchMode,
|
|
52
53
|
embeddingProvider: getEmbeddingProvider(config.embedding),
|
|
53
|
-
|
|
54
|
-
// run. Today that means the enrichment pass; future passes plug in
|
|
55
|
-
// via `resolveIndexPassLLM`.
|
|
56
|
-
llmEnabled: !!resolveIndexPassLLM("enrichment", config),
|
|
54
|
+
llmEnabled: enrich && !!resolveIndexPassLLM("enrichment", config),
|
|
57
55
|
vecAvailable: isVecAvailable(db),
|
|
58
56
|
}),
|
|
59
57
|
});
|
|
58
|
+
let hadRemovedSources = false;
|
|
60
59
|
if (options?.full || !isIncremental) {
|
|
61
60
|
// The delete is now merged into the insert transaction inside
|
|
62
61
|
// indexEntries() so that a reader never sees an empty database between
|
|
@@ -83,7 +82,9 @@ export async function akmIndex(options) {
|
|
|
83
82
|
const currentSet = new Set(allSourceDirs);
|
|
84
83
|
for (const dir of prevStashDirs) {
|
|
85
84
|
if (!currentSet.has(dir)) {
|
|
85
|
+
hadRemovedSources = true;
|
|
86
86
|
deleteEntriesByStashDir(db, dir);
|
|
87
|
+
deleteIndexDirStatesByStashDir(db, dir);
|
|
87
88
|
}
|
|
88
89
|
}
|
|
89
90
|
}
|
|
@@ -95,20 +96,25 @@ export async function akmIndex(options) {
|
|
|
95
96
|
// `resolveIndexPassLLM("memory", config)` — when the user has no
|
|
96
97
|
// `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
|
|
97
98
|
// and existing inferred children are left in place.
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
99
|
+
if (enrich) {
|
|
100
|
+
try {
|
|
101
|
+
const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
|
|
102
|
+
if (inferenceResult.writtenFacts > 0) {
|
|
103
|
+
onProgress({
|
|
104
|
+
phase: "llm",
|
|
105
|
+
message: `Memory inference wrote ${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
warn(`Memory inference pass aborted: ${err instanceof Error ? err.message : String(err)}`);
|
|
105
111
|
}
|
|
106
112
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
113
|
+
else {
|
|
114
|
+
onProgress({
|
|
115
|
+
phase: "llm",
|
|
116
|
+
message: "LLM passes disabled; rerun with --enrich to enable inference and enrichment.",
|
|
117
|
+
});
|
|
112
118
|
}
|
|
113
119
|
// Graph extraction pass (#207). Runs after memory inference so any
|
|
114
120
|
// atomic-fact children that just got written are visible to the graph
|
|
@@ -120,17 +126,19 @@ export async function akmIndex(options) {
|
|
|
120
126
|
// `llm.features.graph_extraction` feature flag or the per-pass
|
|
121
127
|
// `index.graph.llm` toggle) is off; the existing graph file is
|
|
122
128
|
// preserved on disk in that case.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
129
|
+
if (enrich) {
|
|
130
|
+
try {
|
|
131
|
+
const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
|
|
132
|
+
if (graphResult.written) {
|
|
133
|
+
onProgress({
|
|
134
|
+
phase: "llm",
|
|
135
|
+
message: `Graph extraction wrote ${graphResult.totalEntities} entit${graphResult.totalEntities === 1 ? "y" : "ies"} and ${graphResult.totalRelations} relation${graphResult.totalRelations === 1 ? "" : "s"} from ${graphResult.extracted} file${graphResult.extracted === 1 ? "" : "s"}.`,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
catch (err) {
|
|
140
|
+
warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
|
|
130
141
|
}
|
|
131
|
-
}
|
|
132
|
-
catch (err) {
|
|
133
|
-
warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
|
|
134
142
|
}
|
|
135
143
|
throwIfAborted(signal);
|
|
136
144
|
const tWalkStart = Date.now();
|
|
@@ -138,7 +146,7 @@ export async function akmIndex(options) {
|
|
|
138
146
|
// doFullDelete=true merges the wipe into the same transaction as the
|
|
139
147
|
// inserts so readers never see an empty database mid-rebuild.
|
|
140
148
|
const doFullDelete = options?.full || !isIncremental;
|
|
141
|
-
const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFullDelete);
|
|
149
|
+
const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
|
|
142
150
|
onProgress({
|
|
143
151
|
phase: "scan",
|
|
144
152
|
message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
|
|
@@ -160,10 +168,10 @@ export async function akmIndex(options) {
|
|
|
160
168
|
const tWalkEnd = Date.now();
|
|
161
169
|
throwIfAborted(signal);
|
|
162
170
|
// Enhance entries with LLM if configured
|
|
163
|
-
await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal);
|
|
171
|
+
await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich);
|
|
164
172
|
onProgress({
|
|
165
173
|
phase: "llm",
|
|
166
|
-
message: resolveIndexPassLLM("enrichment", config)
|
|
174
|
+
message: enrich && resolveIndexPassLLM("enrichment", config)
|
|
167
175
|
? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
|
|
168
176
|
: "LLM enhancement disabled.",
|
|
169
177
|
});
|
|
@@ -263,7 +271,7 @@ export async function akmIndex(options) {
|
|
|
263
271
|
}
|
|
264
272
|
}
|
|
265
273
|
// ── Extracted helpers for indexing ────────────────────────────────────────────
|
|
266
|
-
async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFullDelete = false) {
|
|
274
|
+
async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete = false, onProgress) {
|
|
267
275
|
let scannedDirs = 0;
|
|
268
276
|
let skippedDirs = 0;
|
|
269
277
|
let generatedCount = 0;
|
|
@@ -271,9 +279,29 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
|
|
|
271
279
|
const seenPaths = new Set();
|
|
272
280
|
const dirsNeedingLlm = [];
|
|
273
281
|
const dirRecords = [];
|
|
282
|
+
let processedDirs = 0;
|
|
283
|
+
let priorDirsChanged = hadRemovedSources;
|
|
284
|
+
const reportScanProgress = (message) => {
|
|
285
|
+
onProgress?.({
|
|
286
|
+
phase: "scan",
|
|
287
|
+
message,
|
|
288
|
+
processed: processedDirs,
|
|
289
|
+
total: allSourceEntries.length,
|
|
290
|
+
});
|
|
291
|
+
};
|
|
292
|
+
const reportDirDecision = (kind, dirPath, currentStashDir, reason, persistedRowCount) => {
|
|
293
|
+
if (!isVerbose())
|
|
294
|
+
return;
|
|
295
|
+
const detail = reason.detail ? ` (${reason.detail})` : "";
|
|
296
|
+
const rowInfo = persistedRowCount !== undefined ? `; previous rows=${persistedRowCount}` : "";
|
|
297
|
+
reportScanProgress(`${kind === "scan" ? "Rescanning" : "Skipping"} ${path.relative(currentStashDir, dirPath) || "."} ` +
|
|
298
|
+
`from ${currentStashDir}: ${reason.kind}${detail}${rowInfo}`);
|
|
299
|
+
};
|
|
274
300
|
for (const sourceAdded of allSourceEntries) {
|
|
275
301
|
const currentStashDir = sourceAdded.path;
|
|
276
302
|
const fileContexts = walkStashFlat(currentStashDir);
|
|
303
|
+
processedDirs++;
|
|
304
|
+
reportScanProgress(`Processed ${processedDirs}/${allSourceEntries.length} source${allSourceEntries.length === 1 ? "" : "s"}.`);
|
|
277
305
|
// Wiki-root stashes: all .md files are indexed as wiki pages under wikiName
|
|
278
306
|
if (sourceAdded.wikiName) {
|
|
279
307
|
const wikiName = sourceAdded.wikiName;
|
|
@@ -284,13 +312,17 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
|
|
|
284
312
|
if (!shouldIndexStashFile(currentStashDir, ctx.absPath, { treatStashRootAsWikiRoot: true }))
|
|
285
313
|
continue;
|
|
286
314
|
const relNoExt = ctx.relPath.replace(/\.md$/, "");
|
|
315
|
+
const frontmatter = ctx.frontmatter() ?? {};
|
|
287
316
|
const entry = {
|
|
288
317
|
name: `${wikiName}/${relNoExt}`,
|
|
289
318
|
type: "wiki",
|
|
290
319
|
filename: ctx.fileName,
|
|
291
|
-
|
|
292
|
-
|
|
320
|
+
quality: "generated",
|
|
321
|
+
confidence: 0.55,
|
|
322
|
+
source: "filename",
|
|
293
323
|
};
|
|
324
|
+
applyCuratedFrontmatter(entry, frontmatter);
|
|
325
|
+
applyWikiFrontmatter(entry, frontmatter);
|
|
294
326
|
const dir = ctx.parentDirAbs;
|
|
295
327
|
const group = wikiDirGroups.get(dir);
|
|
296
328
|
if (group) {
|
|
@@ -303,12 +335,32 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
|
|
|
303
335
|
}
|
|
304
336
|
for (const [dirPath, { files, entries }] of wikiDirGroups) {
|
|
305
337
|
if (seenPaths.has(path.resolve(dirPath))) {
|
|
306
|
-
|
|
338
|
+
const reason = { kind: "duplicate-dir" };
|
|
339
|
+
dirRecords.push({ dirPath, currentStashDir, files, stash: null, skip: true, reason });
|
|
340
|
+
reportDirDecision("skip", dirPath, currentStashDir, reason);
|
|
307
341
|
continue;
|
|
308
342
|
}
|
|
309
343
|
seenPaths.add(path.resolve(dirPath));
|
|
344
|
+
const previousState = getDirIndexState(db, dirPath, files, builtAtMs);
|
|
345
|
+
if (isIncremental && !previousState.stale && canUseIncrementalSkip(previousState, priorDirsChanged)) {
|
|
346
|
+
skippedDirs++;
|
|
347
|
+
dirRecords.push({ dirPath, currentStashDir, files, stash: null, skip: true, reason: previousState.reason });
|
|
348
|
+
reportDirDecision("skip", dirPath, currentStashDir, previousState.reason, previousState.persistedRowCount);
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
310
351
|
scannedDirs++;
|
|
311
|
-
|
|
352
|
+
priorDirsChanged = true;
|
|
353
|
+
const reason = isIncremental ? previousState.reason : { kind: "full-rebuild" };
|
|
354
|
+
dirRecords.push({
|
|
355
|
+
dirPath,
|
|
356
|
+
currentStashDir,
|
|
357
|
+
files,
|
|
358
|
+
stash: { entries },
|
|
359
|
+
skip: false,
|
|
360
|
+
reason,
|
|
361
|
+
persistedRowCount: previousState.persistedRowCount,
|
|
362
|
+
});
|
|
363
|
+
reportDirDecision("scan", dirPath, currentStashDir, reason, previousState.persistedRowCount);
|
|
312
364
|
}
|
|
313
365
|
continue;
|
|
314
366
|
}
|
|
@@ -322,46 +374,70 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
|
|
|
322
374
|
dirGroups.set(dir, [ctx.absPath]);
|
|
323
375
|
}
|
|
324
376
|
for (const [dirPath, files] of dirGroups) {
|
|
377
|
+
const indexableFiles = files.filter((file) => shouldIndexStashFile(currentStashDir, file));
|
|
325
378
|
if (seenPaths.has(path.resolve(dirPath))) {
|
|
326
|
-
|
|
379
|
+
const reason = { kind: "duplicate-dir" };
|
|
380
|
+
dirRecords.push({ dirPath, currentStashDir, files: indexableFiles, stash: null, skip: true, reason });
|
|
381
|
+
reportDirDecision("skip", dirPath, currentStashDir, reason);
|
|
327
382
|
continue;
|
|
328
383
|
}
|
|
329
384
|
seenPaths.add(path.resolve(dirPath));
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
const
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
continue;
|
|
337
|
-
}
|
|
385
|
+
if (indexableFiles.length === 0) {
|
|
386
|
+
skippedDirs++;
|
|
387
|
+
const reason = { kind: "no-indexable-files" };
|
|
388
|
+
dirRecords.push({ dirPath, currentStashDir, files: indexableFiles, stash: null, skip: true, reason });
|
|
389
|
+
reportDirDecision("skip", dirPath, currentStashDir, reason);
|
|
390
|
+
continue;
|
|
338
391
|
}
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
}
|
|
353
|
-
}
|
|
392
|
+
const cachedZeroRowState = isIncremental && getCachedZeroRowDirState(db, dirPath, indexableFiles, builtAtMs, priorDirsChanged);
|
|
393
|
+
if (cachedZeroRowState) {
|
|
394
|
+
skippedDirs++;
|
|
395
|
+
dirRecords.push({
|
|
396
|
+
dirPath,
|
|
397
|
+
currentStashDir,
|
|
398
|
+
files: indexableFiles,
|
|
399
|
+
stash: null,
|
|
400
|
+
skip: true,
|
|
401
|
+
reason: cachedZeroRowState.reason,
|
|
402
|
+
});
|
|
403
|
+
reportDirDecision("skip", dirPath, currentStashDir, cachedZeroRowState.reason, cachedZeroRowState.persistedRowCount);
|
|
404
|
+
continue;
|
|
354
405
|
}
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
406
|
+
const generated = await generateMetadataFlat(currentStashDir, indexableFiles);
|
|
407
|
+
if (generated.warnings?.length)
|
|
408
|
+
warnings.push(...generated.warnings);
|
|
409
|
+
const legacyOverrides = loadStashFile(dirPath, { requireFilename: true });
|
|
410
|
+
const { stash, staleFiles } = buildIndexedDirCandidate(dirPath, indexableFiles, generated, legacyOverrides);
|
|
411
|
+
if (generated.entries.length > 0) {
|
|
412
|
+
generatedCount += generated.entries.length;
|
|
413
|
+
}
|
|
414
|
+
const previousState = getDirIndexState(db, dirPath, staleFiles, builtAtMs);
|
|
415
|
+
if (isIncremental && !previousState.stale && canUseIncrementalSkip(previousState, priorDirsChanged)) {
|
|
416
|
+
skippedDirs++;
|
|
417
|
+
dirRecords.push({
|
|
418
|
+
dirPath,
|
|
419
|
+
currentStashDir,
|
|
420
|
+
files: staleFiles,
|
|
421
|
+
stash: null,
|
|
422
|
+
skip: true,
|
|
423
|
+
reason: previousState.reason,
|
|
424
|
+
});
|
|
425
|
+
reportDirDecision("skip", dirPath, currentStashDir, previousState.reason, previousState.persistedRowCount);
|
|
426
|
+
continue;
|
|
363
427
|
}
|
|
364
|
-
|
|
428
|
+
scannedDirs++;
|
|
429
|
+
priorDirsChanged = true;
|
|
430
|
+
const reason = isIncremental ? previousState.reason : { kind: "full-rebuild" };
|
|
431
|
+
dirRecords.push({
|
|
432
|
+
dirPath,
|
|
433
|
+
currentStashDir,
|
|
434
|
+
files: staleFiles,
|
|
435
|
+
stash,
|
|
436
|
+
skip: false,
|
|
437
|
+
reason,
|
|
438
|
+
persistedRowCount: previousState.persistedRowCount,
|
|
439
|
+
});
|
|
440
|
+
reportDirDecision("scan", dirPath, currentStashDir, reason, previousState.persistedRowCount);
|
|
365
441
|
}
|
|
366
442
|
}
|
|
367
443
|
// Phase 2 (sync): write all pre-generated metadata inside a single transaction.
|
|
@@ -393,6 +469,7 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
|
|
|
393
469
|
}
|
|
394
470
|
db.exec("DELETE FROM entries_fts");
|
|
395
471
|
db.exec("DELETE FROM utility_scores");
|
|
472
|
+
db.exec("DELETE FROM index_dir_state");
|
|
396
473
|
// Detach usage_events from entries about to be deleted — null out entry_id
|
|
397
474
|
// but keep entry_ref so events can be re-linked after entries are rebuilt.
|
|
398
475
|
try {
|
|
@@ -403,32 +480,42 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
|
|
|
403
480
|
}
|
|
404
481
|
db.exec("DELETE FROM entries");
|
|
405
482
|
}
|
|
406
|
-
for (const { dirPath, currentStashDir, files, stash, skip } of dirRecords) {
|
|
407
|
-
if (skip)
|
|
483
|
+
for (const { dirPath, currentStashDir, files, stash, skip, reason } of dirRecords) {
|
|
484
|
+
if (skip) {
|
|
485
|
+
if (reason?.kind === "unchanged") {
|
|
486
|
+
const fingerprint = computeDirFingerprint(dirPath, files);
|
|
487
|
+
upsertIndexDirState(db, {
|
|
488
|
+
dirPath,
|
|
489
|
+
fileSetHash: fingerprint.fileSetHash,
|
|
490
|
+
fileMtimeMaxMs: fingerprint.fileMtimeMaxMs,
|
|
491
|
+
reason: reason.kind,
|
|
492
|
+
});
|
|
493
|
+
}
|
|
408
494
|
continue;
|
|
495
|
+
}
|
|
409
496
|
// Delete old entries for this dir (will be re-inserted)
|
|
410
497
|
deleteEntriesByDir(db, dirPath);
|
|
498
|
+
let persistedRows = 0;
|
|
499
|
+
let dedupedRows = 0;
|
|
411
500
|
if (stash) {
|
|
412
|
-
// Build a lookup for matching filename-less entries to actual files
|
|
413
|
-
const fileBasenameMap = buildFileBasenameMap(files);
|
|
414
501
|
for (const entry of stash.entries) {
|
|
415
|
-
const entryPath = entry.filename
|
|
416
|
-
? path.join(dirPath, entry.filename)
|
|
417
|
-
: matchEntryToFile(entry.name, fileBasenameMap, files);
|
|
502
|
+
const entryPath = entry.filename ? path.join(dirPath, entry.filename) : null;
|
|
418
503
|
if (!entryPath)
|
|
419
504
|
continue; // skip unresolvable entries
|
|
420
505
|
if (!shouldIndexStashFile(currentStashDir, entryPath))
|
|
421
506
|
continue;
|
|
422
507
|
// Skip if a higher-priority stash root already indexed this asset
|
|
423
|
-
const
|
|
424
|
-
|
|
425
|
-
|
|
508
|
+
const identityKey = `${entry.type}\0${entry.name}`;
|
|
509
|
+
if (indexedAssetIdentities.has(identityKey)) {
|
|
510
|
+
dedupedRows++;
|
|
426
511
|
continue;
|
|
512
|
+
}
|
|
427
513
|
indexedAssetIdentities.add(identityKey);
|
|
428
514
|
const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
|
|
429
515
|
const searchText = buildSearchText(entry);
|
|
430
516
|
const entryWithSize = attachFileSize(entry, entryPath);
|
|
431
517
|
const entryId = upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, entryWithSize, searchText);
|
|
518
|
+
persistedRows++;
|
|
432
519
|
if (entry.type === "workflow") {
|
|
433
520
|
const doc = takeWorkflowDocument(entry);
|
|
434
521
|
if (doc) {
|
|
@@ -441,12 +528,121 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
|
|
|
441
528
|
dirsNeedingLlm.push({ dirPath, files, currentStashDir, stash });
|
|
442
529
|
}
|
|
443
530
|
}
|
|
531
|
+
const fingerprint = computeDirFingerprint(dirPath, files);
|
|
532
|
+
const persistedReason = persistedRows === 0
|
|
533
|
+
? inferZeroRowReason(stash, reason, warnings, dirPath, dedupedRows)
|
|
534
|
+
: reason?.kind === "full-rebuild"
|
|
535
|
+
? "full-rebuild"
|
|
536
|
+
: (reason?.kind ?? "updated");
|
|
537
|
+
upsertIndexDirState(db, {
|
|
538
|
+
dirPath,
|
|
539
|
+
fileSetHash: fingerprint.fileSetHash,
|
|
540
|
+
fileMtimeMaxMs: fingerprint.fileMtimeMaxMs,
|
|
541
|
+
reason: persistedReason,
|
|
542
|
+
});
|
|
543
|
+
if (persistedRows === 0) {
|
|
544
|
+
warnVerbose(`[index] zero-row ${dirPath}: ${persistedReason}`);
|
|
545
|
+
}
|
|
444
546
|
}
|
|
445
547
|
});
|
|
446
548
|
insertTransaction();
|
|
447
549
|
return { scannedDirs, skippedDirs, generatedCount, warnings, dirsNeedingLlm };
|
|
448
550
|
}
|
|
449
|
-
|
|
551
|
+
function getDirIndexState(db, dirPath, files, builtAtMs) {
|
|
552
|
+
const prevEntries = getEntriesByDir(db, dirPath);
|
|
553
|
+
const fingerprint = computeDirFingerprint(dirPath, files);
|
|
554
|
+
if (prevEntries.length > 0) {
|
|
555
|
+
const staleReason = getDirStaleReason(dirPath, files, prevEntries, builtAtMs);
|
|
556
|
+
if (!staleReason) {
|
|
557
|
+
return { stale: false, reason: { kind: "unchanged" }, persistedRowCount: prevEntries.length };
|
|
558
|
+
}
|
|
559
|
+
return { stale: true, reason: staleReason, persistedRowCount: prevEntries.length };
|
|
560
|
+
}
|
|
561
|
+
const cachedState = getIndexDirState(db, dirPath);
|
|
562
|
+
if (cachedState &&
|
|
563
|
+
cachedState.fileSetHash === fingerprint.fileSetHash &&
|
|
564
|
+
cachedState.fileMtimeMaxMs === fingerprint.fileMtimeMaxMs) {
|
|
565
|
+
return {
|
|
566
|
+
stale: false,
|
|
567
|
+
reason: { kind: "cached-zero-row-state", detail: cachedState.reason },
|
|
568
|
+
persistedRowCount: 0,
|
|
569
|
+
};
|
|
570
|
+
}
|
|
571
|
+
return {
|
|
572
|
+
stale: true,
|
|
573
|
+
reason: { kind: "no-previous-rows", detail: cachedState ? `cached=${cachedState.reason}` : undefined },
|
|
574
|
+
persistedRowCount: 0,
|
|
575
|
+
};
|
|
576
|
+
}
|
|
577
|
+
function getCachedZeroRowDirState(db, dirPath, files, builtAtMs, priorDirsChanged) {
|
|
578
|
+
const state = getDirIndexState(db, dirPath, files, builtAtMs);
|
|
579
|
+
if (state.stale || state.reason.kind !== "cached-zero-row-state")
|
|
580
|
+
return undefined;
|
|
581
|
+
if (!canUseIncrementalSkip(state, priorDirsChanged))
|
|
582
|
+
return undefined;
|
|
583
|
+
return state;
|
|
584
|
+
}
|
|
585
|
+
function canUseIncrementalSkip(state, priorDirsChanged) {
|
|
586
|
+
return !(priorDirsChanged &&
|
|
587
|
+
state.reason.kind === "cached-zero-row-state" &&
|
|
588
|
+
state.reason.detail === "deduped-zero-row");
|
|
589
|
+
}
|
|
590
|
+
function computeDirFingerprint(_dirPath, files) {
|
|
591
|
+
const normalizedFiles = [...new Set(files.map((file) => path.basename(file)))].sort();
|
|
592
|
+
let fileMtimeMaxMs = 0;
|
|
593
|
+
for (const file of files) {
|
|
594
|
+
try {
|
|
595
|
+
fileMtimeMaxMs = Math.max(fileMtimeMaxMs, fs.statSync(file).mtimeMs);
|
|
596
|
+
}
|
|
597
|
+
catch {
|
|
598
|
+
fileMtimeMaxMs = Number.POSITIVE_INFINITY;
|
|
599
|
+
break;
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
return {
|
|
603
|
+
fileSetHash: normalizedFiles.join("\0"),
|
|
604
|
+
fileMtimeMaxMs,
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
function getDirStaleReason(_dirPath, currentFiles, previousEntries, builtAtMs) {
|
|
608
|
+
const prevFileNames = new Set(previousEntries
|
|
609
|
+
.map((ie) => {
|
|
610
|
+
const fromPath = path.basename(ie.filePath);
|
|
611
|
+
return fromPath || ie.entry.filename;
|
|
612
|
+
})
|
|
613
|
+
.filter((e) => !!e));
|
|
614
|
+
const currFileNames = new Set(currentFiles.map((f) => path.basename(f)));
|
|
615
|
+
if (prevFileNames.size !== currFileNames.size) {
|
|
616
|
+
return { kind: "file-set-changed", detail: `${prevFileNames.size} -> ${currFileNames.size} files` };
|
|
617
|
+
}
|
|
618
|
+
for (const name of currFileNames) {
|
|
619
|
+
if (!prevFileNames.has(name))
|
|
620
|
+
return { kind: "file-set-changed", detail: name };
|
|
621
|
+
}
|
|
622
|
+
for (const file of currentFiles) {
|
|
623
|
+
try {
|
|
624
|
+
if (fs.statSync(file).mtimeMs > builtAtMs)
|
|
625
|
+
return { kind: "mtime-changed", detail: path.basename(file) };
|
|
626
|
+
}
|
|
627
|
+
catch {
|
|
628
|
+
return { kind: "missing-file", detail: path.basename(file) };
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
return undefined;
|
|
632
|
+
}
|
|
633
|
+
function inferZeroRowReason(stash, priorReason, warnings, dirPath, dedupedRows) {
|
|
634
|
+
if (dedupedRows > 0)
|
|
635
|
+
return "deduped-zero-row";
|
|
636
|
+
const workflowNoise = warnings.some((warning) => warning.startsWith("Skipped workflow ") && warning.includes(dirPath));
|
|
637
|
+
if (workflowNoise)
|
|
638
|
+
return "workflow-noise";
|
|
639
|
+
if (!stash || stash.entries.length === 0)
|
|
640
|
+
return "empty-generated-set";
|
|
641
|
+
return `zero-row:${priorReason?.kind ?? "unknown"}`;
|
|
642
|
+
}
|
|
643
|
+
async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich = false) {
|
|
644
|
+
if (!enrich)
|
|
645
|
+
return;
|
|
450
646
|
// Resolve per-pass LLM config via the unified shim. Returns undefined when
|
|
451
647
|
// either no `akm.llm` is configured or the user opted this pass out via
|
|
452
648
|
// `index.enrichment.llm = false`. (#208)
|
|
@@ -683,36 +879,25 @@ function verifyIndexState(db, config, totalEntries, embeddingResult) {
|
|
|
683
879
|
vecAvailable,
|
|
684
880
|
};
|
|
685
881
|
}
|
|
686
|
-
function
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
for (const
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
return true;
|
|
704
|
-
}
|
|
705
|
-
}
|
|
706
|
-
// Check .stash.json modification time
|
|
707
|
-
const stashPath = path.join(dirPath, ".stash.json");
|
|
708
|
-
try {
|
|
709
|
-
if (fs.statSync(stashPath).mtimeMs > builtAtMs)
|
|
710
|
-
return true;
|
|
711
|
-
}
|
|
712
|
-
catch {
|
|
713
|
-
// file doesn't exist, not stale
|
|
882
|
+
function buildIndexedDirCandidate(dirPath, indexableFiles, generated, legacyOverrides) {
|
|
883
|
+
const mergedEntries = legacyOverrides
|
|
884
|
+
? generated.entries.map((entry) => mergeLegacyEntry(entry, legacyOverrides.entries))
|
|
885
|
+
: generated.entries;
|
|
886
|
+
const stash = mergedEntries.length > 0 ? { entries: mergedEntries } : legacyOverrides;
|
|
887
|
+
const staleFiles = stash ? resolveIndexedFiles(dirPath, indexableFiles, stash) : indexableFiles;
|
|
888
|
+
return { stash, staleFiles };
|
|
889
|
+
}
|
|
890
|
+
function resolveIndexedFiles(dirPath, files, stash) {
|
|
891
|
+
const fileBasenameMap = buildFileBasenameMap(files);
|
|
892
|
+
const resolved = new Set();
|
|
893
|
+
for (const entry of stash.entries) {
|
|
894
|
+
const entryPath = entry.filename
|
|
895
|
+
? path.join(dirPath, entry.filename)
|
|
896
|
+
: matchEntryToFile(entry.name, fileBasenameMap, files);
|
|
897
|
+
if (entryPath)
|
|
898
|
+
resolved.add(entryPath);
|
|
714
899
|
}
|
|
715
|
-
return
|
|
900
|
+
return resolved.size > 0 ? [...resolved] : files;
|
|
716
901
|
}
|
|
717
902
|
async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
|
|
718
903
|
const { enhanceMetadata } = await import("../llm/metadata-enhance");
|
|
@@ -776,9 +961,9 @@ export function buildFileBasenameMap(files) {
|
|
|
776
961
|
* 1. Exact basename match: entry.name === filename without extension
|
|
777
962
|
* 2. Last path segment match: for entries with names like "dir/sub-entry",
|
|
778
963
|
* try matching the last segment
|
|
779
|
-
* 3.
|
|
964
|
+
* 3. No implicit file fallback: ambiguous legacy entries are skipped
|
|
780
965
|
*/
|
|
781
|
-
export function matchEntryToFile(entryName, fileMap,
|
|
966
|
+
export function matchEntryToFile(entryName, fileMap, _files) {
|
|
782
967
|
// Exact match on entry name
|
|
783
968
|
const exact = fileMap.get(entryName);
|
|
784
969
|
if (exact)
|
|
@@ -790,8 +975,20 @@ export function matchEntryToFile(entryName, fileMap, files) {
|
|
|
790
975
|
if (segmentMatch)
|
|
791
976
|
return segmentMatch;
|
|
792
977
|
}
|
|
793
|
-
|
|
794
|
-
|
|
978
|
+
return null;
|
|
979
|
+
}
|
|
980
|
+
function mergeLegacyEntry(entry, legacyEntries) {
|
|
981
|
+
const legacy = legacyEntries.find((candidate) => candidate.filename === entry.filename);
|
|
982
|
+
if (!legacy)
|
|
983
|
+
return entry;
|
|
984
|
+
return {
|
|
985
|
+
...entry,
|
|
986
|
+
...legacy,
|
|
987
|
+
filename: entry.filename,
|
|
988
|
+
source: legacy.source ?? entry.source,
|
|
989
|
+
quality: legacy.quality ?? entry.quality,
|
|
990
|
+
confidence: legacy.confidence ?? entry.confidence,
|
|
991
|
+
};
|
|
795
992
|
}
|
|
796
993
|
/**
|
|
797
994
|
* Look up a single asset by ref. Spec §6.2 — `akm show` queries this and
|
|
@@ -819,7 +1016,7 @@ export async function lookup(ref) {
|
|
|
819
1016
|
if (sources.length === 0)
|
|
820
1017
|
return null;
|
|
821
1018
|
const dbPath = getDbPath();
|
|
822
|
-
const db =
|
|
1019
|
+
const db = openExistingDatabase(dbPath);
|
|
823
1020
|
try {
|
|
824
1021
|
// entry_key shape: `${stashDir}:${type}:${name}`. Suffix-match on
|
|
825
1022
|
// `:type:name` so we can scope by source dir as a prefix when origin is
|