akm-cli 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,10 @@ import { getDbPath } from "../core/paths";
5
5
  import { isVerbose, warn, warnVerbose } from "../core/warn";
6
6
  import { resolveIndexPassLLM } from "../llm/index-passes";
7
7
  import { takeWorkflowDocument } from "../workflows/document-cache";
8
- import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getMeta, isVecAvailable, openDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertUtilityScore, warnIfVecMissing, } from "./db";
8
+ import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, warnIfVecMissing, } from "./db";
9
9
  import { runGraphExtractionPass } from "./graph-extraction";
10
10
  import { runMemoryInferencePass } from "./memory-inference";
11
- import { generateMetadataFlat, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
11
+ import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
12
12
  import { buildSearchText } from "./search-fields";
13
13
  import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
14
14
  import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
@@ -23,13 +23,14 @@ export async function akmIndex(options) {
23
23
  const stashDir = options?.stashDir || resolveStashDir();
24
24
  const onProgress = options?.onProgress ?? (() => { });
25
25
  const signal = options?.signal;
26
+ const enrich = options?.enrich === true;
26
27
  // Load config and resolve all stash sources
27
28
  const { loadConfig } = await import("../core/config.js");
28
29
  const config = loadConfig();
29
30
  // Ensure git stash caches are extracted before resolving stash dirs,
30
31
  // so their content directories exist on disk for the walker to discover.
31
32
  const { ensureSourceCaches, resolveSourceEntries } = await import("./search-source.js");
32
- await ensureSourceCaches(config);
33
+ await ensureSourceCaches(config, { force: options?.full === true });
33
34
  const allSourceEntries = resolveSourceEntries(stashDir, config);
34
35
  const allSourceDirs = allSourceEntries.map((s) => s.path);
35
36
  const t0 = Date.now();
@@ -50,13 +51,11 @@ export async function akmIndex(options) {
50
51
  sourcesCount: allSourceDirs.length,
51
52
  semanticSearchMode: config.semanticSearchMode,
52
53
  embeddingProvider: getEmbeddingProvider(config.embedding),
53
- // Surface "llm enabled" only when at least one pass would actually
54
- // run. Today that means the enrichment pass; future passes plug in
55
- // via `resolveIndexPassLLM`.
56
- llmEnabled: !!resolveIndexPassLLM("enrichment", config),
54
+ llmEnabled: enrich && !!resolveIndexPassLLM("enrichment", config),
57
55
  vecAvailable: isVecAvailable(db),
58
56
  }),
59
57
  });
58
+ let hadRemovedSources = false;
60
59
  if (options?.full || !isIncremental) {
61
60
  // The delete is now merged into the insert transaction inside
62
61
  // indexEntries() so that a reader never sees an empty database between
@@ -83,7 +82,9 @@ export async function akmIndex(options) {
83
82
  const currentSet = new Set(allSourceDirs);
84
83
  for (const dir of prevStashDirs) {
85
84
  if (!currentSet.has(dir)) {
85
+ hadRemovedSources = true;
86
86
  deleteEntriesByStashDir(db, dir);
87
+ deleteIndexDirStatesByStashDir(db, dir);
87
88
  }
88
89
  }
89
90
  }
@@ -95,20 +96,25 @@ export async function akmIndex(options) {
95
96
  // `resolveIndexPassLLM("memory", config)` — when the user has no
96
97
  // `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
97
98
  // and existing inferred children are left in place.
98
- try {
99
- const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
100
- if (inferenceResult.writtenFacts > 0) {
101
- onProgress({
102
- phase: "llm",
103
- message: `Memory inference wrote ${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
104
- });
99
+ if (enrich) {
100
+ try {
101
+ const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
102
+ if (inferenceResult.writtenFacts > 0) {
103
+ onProgress({
104
+ phase: "llm",
105
+ message: `Memory inference wrote ${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
106
+ });
107
+ }
108
+ }
109
+ catch (err) {
110
+ warn(`Memory inference pass aborted: ${err instanceof Error ? err.message : String(err)}`);
105
111
  }
106
112
  }
107
- catch (err) {
108
- // Defensive — runMemoryInferencePass swallows per-memory failures.
109
- // A thrown error here would only come from an unexpected programming
110
- // bug; surface it as a warning rather than aborting the index run.
111
- warn(`Memory inference pass aborted: ${err instanceof Error ? err.message : String(err)}`);
113
+ else {
114
+ onProgress({
115
+ phase: "llm",
116
+ message: "LLM passes disabled; rerun with --enrich to enable inference and enrichment.",
117
+ });
112
118
  }
113
119
  // Graph extraction pass (#207). Runs after memory inference so any
114
120
  // atomic-fact children that just got written are visible to the graph
@@ -120,17 +126,19 @@ export async function akmIndex(options) {
120
126
  // `llm.features.graph_extraction` feature flag or the per-pass
121
127
  // `index.graph.llm` toggle) is off; the existing graph file is
122
128
  // preserved on disk in that case.
123
- try {
124
- const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
125
- if (graphResult.written) {
126
- onProgress({
127
- phase: "llm",
128
- message: `Graph extraction wrote ${graphResult.totalEntities} entit${graphResult.totalEntities === 1 ? "y" : "ies"} and ${graphResult.totalRelations} relation${graphResult.totalRelations === 1 ? "" : "s"} from ${graphResult.extracted} file${graphResult.extracted === 1 ? "" : "s"}.`,
129
- });
129
+ if (enrich) {
130
+ try {
131
+ const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
132
+ if (graphResult.written) {
133
+ onProgress({
134
+ phase: "llm",
135
+ message: `Graph extraction wrote ${graphResult.totalEntities} entit${graphResult.totalEntities === 1 ? "y" : "ies"} and ${graphResult.totalRelations} relation${graphResult.totalRelations === 1 ? "" : "s"} from ${graphResult.extracted} file${graphResult.extracted === 1 ? "" : "s"}.`,
136
+ });
137
+ }
138
+ }
139
+ catch (err) {
140
+ warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
130
141
  }
131
- }
132
- catch (err) {
133
- warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
134
142
  }
135
143
  throwIfAborted(signal);
136
144
  const tWalkStart = Date.now();
@@ -138,7 +146,7 @@ export async function akmIndex(options) {
138
146
  // doFullDelete=true merges the wipe into the same transaction as the
139
147
  // inserts so readers never see an empty database mid-rebuild.
140
148
  const doFullDelete = options?.full || !isIncremental;
141
- const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFullDelete);
149
+ const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
142
150
  onProgress({
143
151
  phase: "scan",
144
152
  message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
@@ -160,10 +168,10 @@ export async function akmIndex(options) {
160
168
  const tWalkEnd = Date.now();
161
169
  throwIfAborted(signal);
162
170
  // Enhance entries with LLM if configured
163
- await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal);
171
+ await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich);
164
172
  onProgress({
165
173
  phase: "llm",
166
- message: resolveIndexPassLLM("enrichment", config)
174
+ message: enrich && resolveIndexPassLLM("enrichment", config)
167
175
  ? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
168
176
  : "LLM enhancement disabled.",
169
177
  });
@@ -263,7 +271,7 @@ export async function akmIndex(options) {
263
271
  }
264
272
  }
265
273
  // ── Extracted helpers for indexing ────────────────────────────────────────────
266
- async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFullDelete = false) {
274
+ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete = false, onProgress) {
267
275
  let scannedDirs = 0;
268
276
  let skippedDirs = 0;
269
277
  let generatedCount = 0;
@@ -271,9 +279,29 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
271
279
  const seenPaths = new Set();
272
280
  const dirsNeedingLlm = [];
273
281
  const dirRecords = [];
282
+ let processedDirs = 0;
283
+ let priorDirsChanged = hadRemovedSources;
284
+ const reportScanProgress = (message) => {
285
+ onProgress?.({
286
+ phase: "scan",
287
+ message,
288
+ processed: processedDirs,
289
+ total: allSourceEntries.length,
290
+ });
291
+ };
292
+ const reportDirDecision = (kind, dirPath, currentStashDir, reason, persistedRowCount) => {
293
+ if (!isVerbose())
294
+ return;
295
+ const detail = reason.detail ? ` (${reason.detail})` : "";
296
+ const rowInfo = persistedRowCount !== undefined ? `; previous rows=${persistedRowCount}` : "";
297
+ reportScanProgress(`${kind === "scan" ? "Rescanning" : "Skipping"} ${path.relative(currentStashDir, dirPath) || "."} ` +
298
+ `from ${currentStashDir}: ${reason.kind}${detail}${rowInfo}`);
299
+ };
274
300
  for (const sourceAdded of allSourceEntries) {
275
301
  const currentStashDir = sourceAdded.path;
276
302
  const fileContexts = walkStashFlat(currentStashDir);
303
+ processedDirs++;
304
+ reportScanProgress(`Processed ${processedDirs}/${allSourceEntries.length} source${allSourceEntries.length === 1 ? "" : "s"}.`);
277
305
  // Wiki-root stashes: all .md files are indexed as wiki pages under wikiName
278
306
  if (sourceAdded.wikiName) {
279
307
  const wikiName = sourceAdded.wikiName;
@@ -284,13 +312,17 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
284
312
  if (!shouldIndexStashFile(currentStashDir, ctx.absPath, { treatStashRootAsWikiRoot: true }))
285
313
  continue;
286
314
  const relNoExt = ctx.relPath.replace(/\.md$/, "");
315
+ const frontmatter = ctx.frontmatter() ?? {};
287
316
  const entry = {
288
317
  name: `${wikiName}/${relNoExt}`,
289
318
  type: "wiki",
290
319
  filename: ctx.fileName,
291
- description: ctx.frontmatter()?.description,
292
- source: "frontmatter",
320
+ quality: "generated",
321
+ confidence: 0.55,
322
+ source: "filename",
293
323
  };
324
+ applyCuratedFrontmatter(entry, frontmatter);
325
+ applyWikiFrontmatter(entry, frontmatter);
294
326
  const dir = ctx.parentDirAbs;
295
327
  const group = wikiDirGroups.get(dir);
296
328
  if (group) {
@@ -303,12 +335,32 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
303
335
  }
304
336
  for (const [dirPath, { files, entries }] of wikiDirGroups) {
305
337
  if (seenPaths.has(path.resolve(dirPath))) {
306
- dirRecords.push({ dirPath, currentStashDir, files, stash: null, skip: true });
338
+ const reason = { kind: "duplicate-dir" };
339
+ dirRecords.push({ dirPath, currentStashDir, files, stash: null, skip: true, reason });
340
+ reportDirDecision("skip", dirPath, currentStashDir, reason);
307
341
  continue;
308
342
  }
309
343
  seenPaths.add(path.resolve(dirPath));
344
+ const previousState = getDirIndexState(db, dirPath, files, builtAtMs);
345
+ if (isIncremental && !previousState.stale && canUseIncrementalSkip(previousState, priorDirsChanged)) {
346
+ skippedDirs++;
347
+ dirRecords.push({ dirPath, currentStashDir, files, stash: null, skip: true, reason: previousState.reason });
348
+ reportDirDecision("skip", dirPath, currentStashDir, previousState.reason, previousState.persistedRowCount);
349
+ continue;
350
+ }
310
351
  scannedDirs++;
311
- dirRecords.push({ dirPath, currentStashDir, files, stash: { entries }, skip: false });
352
+ priorDirsChanged = true;
353
+ const reason = isIncremental ? previousState.reason : { kind: "full-rebuild" };
354
+ dirRecords.push({
355
+ dirPath,
356
+ currentStashDir,
357
+ files,
358
+ stash: { entries },
359
+ skip: false,
360
+ reason,
361
+ persistedRowCount: previousState.persistedRowCount,
362
+ });
363
+ reportDirDecision("scan", dirPath, currentStashDir, reason, previousState.persistedRowCount);
312
364
  }
313
365
  continue;
314
366
  }
@@ -322,46 +374,70 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
322
374
  dirGroups.set(dir, [ctx.absPath]);
323
375
  }
324
376
  for (const [dirPath, files] of dirGroups) {
377
+ const indexableFiles = files.filter((file) => shouldIndexStashFile(currentStashDir, file));
325
378
  if (seenPaths.has(path.resolve(dirPath))) {
326
- dirRecords.push({ dirPath, currentStashDir, files, stash: null, skip: true });
379
+ const reason = { kind: "duplicate-dir" };
380
+ dirRecords.push({ dirPath, currentStashDir, files: indexableFiles, stash: null, skip: true, reason });
381
+ reportDirDecision("skip", dirPath, currentStashDir, reason);
327
382
  continue;
328
383
  }
329
384
  seenPaths.add(path.resolve(dirPath));
330
- // Incremental: skip directories that haven't changed
331
- if (isIncremental) {
332
- const prevEntries = getEntriesByDir(db, dirPath);
333
- if (prevEntries.length > 0 && !isDirStale(dirPath, files, prevEntries, builtAtMs)) {
334
- skippedDirs++;
335
- dirRecords.push({ dirPath, currentStashDir, files, stash: null, skip: true });
336
- continue;
337
- }
385
+ if (indexableFiles.length === 0) {
386
+ skippedDirs++;
387
+ const reason = { kind: "no-indexable-files" };
388
+ dirRecords.push({ dirPath, currentStashDir, files: indexableFiles, stash: null, skip: true, reason });
389
+ reportDirDecision("skip", dirPath, currentStashDir, reason);
390
+ continue;
338
391
  }
339
- scannedDirs++;
340
- // Try loading existing .stash.json (user metadata overrides)
341
- let stash = loadStashFile(dirPath);
342
- if (stash) {
343
- const coveredFiles = new Set(stash.entries.map((e) => (e.filename ? path.basename(e.filename) : "")).filter((e) => !!e));
344
- const uncoveredFiles = files.filter((f) => !coveredFiles.has(path.basename(f)));
345
- if (uncoveredFiles.length > 0) {
346
- const generated = await generateMetadataFlat(currentStashDir, uncoveredFiles);
347
- if (generated.warnings?.length)
348
- warnings.push(...generated.warnings);
349
- if (generated.entries.length > 0) {
350
- stash = { entries: [...stash.entries, ...generated.entries] };
351
- generatedCount += generated.entries.length;
352
- }
353
- }
392
+ const cachedZeroRowState = isIncremental && getCachedZeroRowDirState(db, dirPath, indexableFiles, builtAtMs, priorDirsChanged);
393
+ if (cachedZeroRowState) {
394
+ skippedDirs++;
395
+ dirRecords.push({
396
+ dirPath,
397
+ currentStashDir,
398
+ files: indexableFiles,
399
+ stash: null,
400
+ skip: true,
401
+ reason: cachedZeroRowState.reason,
402
+ });
403
+ reportDirDecision("skip", dirPath, currentStashDir, cachedZeroRowState.reason, cachedZeroRowState.persistedRowCount);
404
+ continue;
354
405
  }
355
- if (!stash) {
356
- const generated = await generateMetadataFlat(currentStashDir, files);
357
- if (generated.warnings?.length)
358
- warnings.push(...generated.warnings);
359
- if (generated.entries.length > 0) {
360
- stash = { entries: generated.entries };
361
- generatedCount += generated.entries.length;
362
- }
406
+ const generated = await generateMetadataFlat(currentStashDir, indexableFiles);
407
+ if (generated.warnings?.length)
408
+ warnings.push(...generated.warnings);
409
+ const legacyOverrides = loadStashFile(dirPath, { requireFilename: true });
410
+ const { stash, staleFiles } = buildIndexedDirCandidate(dirPath, indexableFiles, generated, legacyOverrides);
411
+ if (generated.entries.length > 0) {
412
+ generatedCount += generated.entries.length;
413
+ }
414
+ const previousState = getDirIndexState(db, dirPath, staleFiles, builtAtMs);
415
+ if (isIncremental && !previousState.stale && canUseIncrementalSkip(previousState, priorDirsChanged)) {
416
+ skippedDirs++;
417
+ dirRecords.push({
418
+ dirPath,
419
+ currentStashDir,
420
+ files: staleFiles,
421
+ stash: null,
422
+ skip: true,
423
+ reason: previousState.reason,
424
+ });
425
+ reportDirDecision("skip", dirPath, currentStashDir, previousState.reason, previousState.persistedRowCount);
426
+ continue;
363
427
  }
364
- dirRecords.push({ dirPath, currentStashDir, files, stash, skip: false });
428
+ scannedDirs++;
429
+ priorDirsChanged = true;
430
+ const reason = isIncremental ? previousState.reason : { kind: "full-rebuild" };
431
+ dirRecords.push({
432
+ dirPath,
433
+ currentStashDir,
434
+ files: staleFiles,
435
+ stash,
436
+ skip: false,
437
+ reason,
438
+ persistedRowCount: previousState.persistedRowCount,
439
+ });
440
+ reportDirDecision("scan", dirPath, currentStashDir, reason, previousState.persistedRowCount);
365
441
  }
366
442
  }
367
443
  // Phase 2 (sync): write all pre-generated metadata inside a single transaction.
@@ -393,6 +469,7 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
393
469
  }
394
470
  db.exec("DELETE FROM entries_fts");
395
471
  db.exec("DELETE FROM utility_scores");
472
+ db.exec("DELETE FROM index_dir_state");
396
473
  // Detach usage_events from entries about to be deleted — null out entry_id
397
474
  // but keep entry_ref so events can be re-linked after entries are rebuilt.
398
475
  try {
@@ -403,32 +480,42 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
403
480
  }
404
481
  db.exec("DELETE FROM entries");
405
482
  }
406
- for (const { dirPath, currentStashDir, files, stash, skip } of dirRecords) {
407
- if (skip)
483
+ for (const { dirPath, currentStashDir, files, stash, skip, reason } of dirRecords) {
484
+ if (skip) {
485
+ if (reason?.kind === "unchanged") {
486
+ const fingerprint = computeDirFingerprint(dirPath, files);
487
+ upsertIndexDirState(db, {
488
+ dirPath,
489
+ fileSetHash: fingerprint.fileSetHash,
490
+ fileMtimeMaxMs: fingerprint.fileMtimeMaxMs,
491
+ reason: reason.kind,
492
+ });
493
+ }
408
494
  continue;
495
+ }
409
496
  // Delete old entries for this dir (will be re-inserted)
410
497
  deleteEntriesByDir(db, dirPath);
498
+ let persistedRows = 0;
499
+ let dedupedRows = 0;
411
500
  if (stash) {
412
- // Build a lookup for matching filename-less entries to actual files
413
- const fileBasenameMap = buildFileBasenameMap(files);
414
501
  for (const entry of stash.entries) {
415
- const entryPath = entry.filename
416
- ? path.join(dirPath, entry.filename)
417
- : matchEntryToFile(entry.name, fileBasenameMap, files);
502
+ const entryPath = entry.filename ? path.join(dirPath, entry.filename) : null;
418
503
  if (!entryPath)
419
504
  continue; // skip unresolvable entries
420
505
  if (!shouldIndexStashFile(currentStashDir, entryPath))
421
506
  continue;
422
507
  // Skip if a higher-priority stash root already indexed this asset
423
- const basename = path.basename(entryPath);
424
- const identityKey = `${entry.type}\0${basename}\0${entry.description ?? ""}`;
425
- if (indexedAssetIdentities.has(identityKey))
508
+ const identityKey = `${entry.type}\0${entry.name}`;
509
+ if (indexedAssetIdentities.has(identityKey)) {
510
+ dedupedRows++;
426
511
  continue;
512
+ }
427
513
  indexedAssetIdentities.add(identityKey);
428
514
  const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
429
515
  const searchText = buildSearchText(entry);
430
516
  const entryWithSize = attachFileSize(entry, entryPath);
431
517
  const entryId = upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, entryWithSize, searchText);
518
+ persistedRows++;
432
519
  if (entry.type === "workflow") {
433
520
  const doc = takeWorkflowDocument(entry);
434
521
  if (doc) {
@@ -441,12 +528,121 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
441
528
  dirsNeedingLlm.push({ dirPath, files, currentStashDir, stash });
442
529
  }
443
530
  }
531
+ const fingerprint = computeDirFingerprint(dirPath, files);
532
+ const persistedReason = persistedRows === 0
533
+ ? inferZeroRowReason(stash, reason, warnings, dirPath, dedupedRows)
534
+ : reason?.kind === "full-rebuild"
535
+ ? "full-rebuild"
536
+ : (reason?.kind ?? "updated");
537
+ upsertIndexDirState(db, {
538
+ dirPath,
539
+ fileSetHash: fingerprint.fileSetHash,
540
+ fileMtimeMaxMs: fingerprint.fileMtimeMaxMs,
541
+ reason: persistedReason,
542
+ });
543
+ if (persistedRows === 0) {
544
+ warnVerbose(`[index] zero-row ${dirPath}: ${persistedReason}`);
545
+ }
444
546
  }
445
547
  });
446
548
  insertTransaction();
447
549
  return { scannedDirs, skippedDirs, generatedCount, warnings, dirsNeedingLlm };
448
550
  }
449
- async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal) {
551
+ function getDirIndexState(db, dirPath, files, builtAtMs) {
552
+ const prevEntries = getEntriesByDir(db, dirPath);
553
+ const fingerprint = computeDirFingerprint(dirPath, files);
554
+ if (prevEntries.length > 0) {
555
+ const staleReason = getDirStaleReason(dirPath, files, prevEntries, builtAtMs);
556
+ if (!staleReason) {
557
+ return { stale: false, reason: { kind: "unchanged" }, persistedRowCount: prevEntries.length };
558
+ }
559
+ return { stale: true, reason: staleReason, persistedRowCount: prevEntries.length };
560
+ }
561
+ const cachedState = getIndexDirState(db, dirPath);
562
+ if (cachedState &&
563
+ cachedState.fileSetHash === fingerprint.fileSetHash &&
564
+ cachedState.fileMtimeMaxMs === fingerprint.fileMtimeMaxMs) {
565
+ return {
566
+ stale: false,
567
+ reason: { kind: "cached-zero-row-state", detail: cachedState.reason },
568
+ persistedRowCount: 0,
569
+ };
570
+ }
571
+ return {
572
+ stale: true,
573
+ reason: { kind: "no-previous-rows", detail: cachedState ? `cached=${cachedState.reason}` : undefined },
574
+ persistedRowCount: 0,
575
+ };
576
+ }
577
+ function getCachedZeroRowDirState(db, dirPath, files, builtAtMs, priorDirsChanged) {
578
+ const state = getDirIndexState(db, dirPath, files, builtAtMs);
579
+ if (state.stale || state.reason.kind !== "cached-zero-row-state")
580
+ return undefined;
581
+ if (!canUseIncrementalSkip(state, priorDirsChanged))
582
+ return undefined;
583
+ return state;
584
+ }
585
+ function canUseIncrementalSkip(state, priorDirsChanged) {
586
+ return !(priorDirsChanged &&
587
+ state.reason.kind === "cached-zero-row-state" &&
588
+ state.reason.detail === "deduped-zero-row");
589
+ }
590
+ function computeDirFingerprint(_dirPath, files) {
591
+ const normalizedFiles = [...new Set(files.map((file) => path.basename(file)))].sort();
592
+ let fileMtimeMaxMs = 0;
593
+ for (const file of files) {
594
+ try {
595
+ fileMtimeMaxMs = Math.max(fileMtimeMaxMs, fs.statSync(file).mtimeMs);
596
+ }
597
+ catch {
598
+ fileMtimeMaxMs = Number.POSITIVE_INFINITY;
599
+ break;
600
+ }
601
+ }
602
+ return {
603
+ fileSetHash: normalizedFiles.join("\0"),
604
+ fileMtimeMaxMs,
605
+ };
606
+ }
607
+ function getDirStaleReason(_dirPath, currentFiles, previousEntries, builtAtMs) {
608
+ const prevFileNames = new Set(previousEntries
609
+ .map((ie) => {
610
+ const fromPath = path.basename(ie.filePath);
611
+ return fromPath || ie.entry.filename;
612
+ })
613
+ .filter((e) => !!e));
614
+ const currFileNames = new Set(currentFiles.map((f) => path.basename(f)));
615
+ if (prevFileNames.size !== currFileNames.size) {
616
+ return { kind: "file-set-changed", detail: `${prevFileNames.size} -> ${currFileNames.size} files` };
617
+ }
618
+ for (const name of currFileNames) {
619
+ if (!prevFileNames.has(name))
620
+ return { kind: "file-set-changed", detail: name };
621
+ }
622
+ for (const file of currentFiles) {
623
+ try {
624
+ if (fs.statSync(file).mtimeMs > builtAtMs)
625
+ return { kind: "mtime-changed", detail: path.basename(file) };
626
+ }
627
+ catch {
628
+ return { kind: "missing-file", detail: path.basename(file) };
629
+ }
630
+ }
631
+ return undefined;
632
+ }
633
+ function inferZeroRowReason(stash, priorReason, warnings, dirPath, dedupedRows) {
634
+ if (dedupedRows > 0)
635
+ return "deduped-zero-row";
636
+ const workflowNoise = warnings.some((warning) => warning.startsWith("Skipped workflow ") && warning.includes(dirPath));
637
+ if (workflowNoise)
638
+ return "workflow-noise";
639
+ if (!stash || stash.entries.length === 0)
640
+ return "empty-generated-set";
641
+ return `zero-row:${priorReason?.kind ?? "unknown"}`;
642
+ }
643
+ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich = false) {
644
+ if (!enrich)
645
+ return;
450
646
  // Resolve per-pass LLM config via the unified shim. Returns undefined when
451
647
  // either no `akm.llm` is configured or the user opted this pass out via
452
648
  // `index.enrichment.llm = false`. (#208)
@@ -683,36 +879,25 @@ function verifyIndexState(db, config, totalEntries, embeddingResult) {
683
879
  vecAvailable,
684
880
  };
685
881
  }
686
- function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
687
- // Check if file set changed (additions or deletions)
688
- const prevFileNames = new Set(previousEntries.map((ie) => ie.entry.filename).filter((e) => !!e));
689
- const currFileNames = new Set(currentFiles.map((f) => path.basename(f)));
690
- if (prevFileNames.size !== currFileNames.size)
691
- return true;
692
- for (const name of currFileNames) {
693
- if (!prevFileNames.has(name))
694
- return true;
695
- }
696
- // Check modification times of current files
697
- for (const file of currentFiles) {
698
- try {
699
- if (fs.statSync(file).mtimeMs > builtAtMs)
700
- return true;
701
- }
702
- catch {
703
- return true;
704
- }
705
- }
706
- // Check .stash.json modification time
707
- const stashPath = path.join(dirPath, ".stash.json");
708
- try {
709
- if (fs.statSync(stashPath).mtimeMs > builtAtMs)
710
- return true;
711
- }
712
- catch {
713
- // file doesn't exist, not stale
882
+ function buildIndexedDirCandidate(dirPath, indexableFiles, generated, legacyOverrides) {
883
+ const mergedEntries = legacyOverrides
884
+ ? generated.entries.map((entry) => mergeLegacyEntry(entry, legacyOverrides.entries))
885
+ : generated.entries;
886
+ const stash = mergedEntries.length > 0 ? { entries: mergedEntries } : legacyOverrides;
887
+ const staleFiles = stash ? resolveIndexedFiles(dirPath, indexableFiles, stash) : indexableFiles;
888
+ return { stash, staleFiles };
889
+ }
890
+ function resolveIndexedFiles(dirPath, files, stash) {
891
+ const fileBasenameMap = buildFileBasenameMap(files);
892
+ const resolved = new Set();
893
+ for (const entry of stash.entries) {
894
+ const entryPath = entry.filename
895
+ ? path.join(dirPath, entry.filename)
896
+ : matchEntryToFile(entry.name, fileBasenameMap, files);
897
+ if (entryPath)
898
+ resolved.add(entryPath);
714
899
  }
715
- return false;
900
+ return resolved.size > 0 ? [...resolved] : files;
716
901
  }
717
902
  async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
718
903
  const { enhanceMetadata } = await import("../llm/metadata-enhance");
@@ -776,9 +961,9 @@ export function buildFileBasenameMap(files) {
776
961
  * 1. Exact basename match: entry.name === filename without extension
777
962
  * 2. Last path segment match: for entries with names like "dir/sub-entry",
778
963
  * try matching the last segment
779
- * 3. Fallback: first file in the directory, or null if no files are available
964
+ * 3. No implicit file fallback: ambiguous legacy entries are skipped
780
965
  */
781
- export function matchEntryToFile(entryName, fileMap, files) {
966
+ export function matchEntryToFile(entryName, fileMap, _files) {
782
967
  // Exact match on entry name
783
968
  const exact = fileMap.get(entryName);
784
969
  if (exact)
@@ -790,8 +975,20 @@ export function matchEntryToFile(entryName, fileMap, files) {
790
975
  if (segmentMatch)
791
976
  return segmentMatch;
792
977
  }
793
- // Fallback to first file, or null if no files are available
794
- return files[0] || null;
978
+ return null;
979
+ }
980
+ function mergeLegacyEntry(entry, legacyEntries) {
981
+ const legacy = legacyEntries.find((candidate) => candidate.filename === entry.filename);
982
+ if (!legacy)
983
+ return entry;
984
+ return {
985
+ ...entry,
986
+ ...legacy,
987
+ filename: entry.filename,
988
+ source: legacy.source ?? entry.source,
989
+ quality: legacy.quality ?? entry.quality,
990
+ confidence: legacy.confidence ?? entry.confidence,
991
+ };
795
992
  }
796
993
  /**
797
994
  * Look up a single asset by ref. Spec §6.2 — `akm show` queries this and
@@ -819,7 +1016,7 @@ export async function lookup(ref) {
819
1016
  if (sources.length === 0)
820
1017
  return null;
821
1018
  const dbPath = getDbPath();
822
- const db = openDatabase(dbPath);
1019
+ const db = openExistingDatabase(dbPath);
823
1020
  try {
824
1021
  // entry_key shape: `${stashDir}:${type}:${name}`. Suffix-match on
825
1022
  // `:type:name` so we can scope by source dir as a prefix when origin is