akm-cli 0.7.5 → 0.8.0-rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/.github/CHANGELOG.md +1 -1
  2. package/dist/cli/parse-args.js +43 -0
  3. package/dist/cli.js +804 -461
  4. package/dist/commands/agent-dispatch.js +102 -0
  5. package/dist/commands/agent-support.js +62 -0
  6. package/dist/commands/config-cli.js +68 -84
  7. package/dist/commands/consolidate.js +823 -0
  8. package/dist/commands/distill-promotion-policy.js +658 -0
  9. package/dist/commands/distill.js +244 -52
  10. package/dist/commands/eval-cases.js +40 -0
  11. package/dist/commands/events.js +2 -23
  12. package/dist/commands/graph.js +222 -0
  13. package/dist/commands/health.js +376 -0
  14. package/dist/commands/help/help-accept.md +9 -0
  15. package/dist/commands/help/help-improve.md +53 -0
  16. package/dist/commands/help/help-proposals.md +15 -0
  17. package/dist/commands/help/help-propose.md +17 -0
  18. package/dist/commands/help/help-reject.md +8 -0
  19. package/dist/commands/history.js +3 -30
  20. package/dist/commands/improve.js +1170 -0
  21. package/dist/commands/info.js +2 -2
  22. package/dist/commands/init.js +2 -2
  23. package/dist/commands/install-audit.js +5 -1
  24. package/dist/commands/installed-stashes.js +118 -138
  25. package/dist/commands/knowledge.js +133 -0
  26. package/dist/commands/lint/agent-linter.js +46 -0
  27. package/dist/commands/lint/base-linter.js +251 -0
  28. package/dist/commands/lint/command-linter.js +46 -0
  29. package/dist/commands/lint/default-linter.js +13 -0
  30. package/dist/commands/lint/index.js +107 -0
  31. package/dist/commands/lint/knowledge-linter.js +13 -0
  32. package/dist/commands/lint/memory-linter.js +58 -0
  33. package/dist/commands/lint/registry.js +33 -0
  34. package/dist/commands/lint/skill-linter.js +42 -0
  35. package/dist/commands/lint/task-linter.js +47 -0
  36. package/dist/commands/lint/types.js +1 -0
  37. package/dist/commands/lint/workflow-linter.js +53 -0
  38. package/dist/commands/lint.js +1 -0
  39. package/dist/commands/proposal.js +8 -7
  40. package/dist/commands/propose.js +78 -28
  41. package/dist/commands/reflect.js +143 -35
  42. package/dist/commands/registry-search.js +2 -2
  43. package/dist/commands/remember.js +54 -0
  44. package/dist/commands/schema-repair.js +130 -0
  45. package/dist/commands/search.js +21 -5
  46. package/dist/commands/show.js +121 -17
  47. package/dist/commands/source-add.js +10 -10
  48. package/dist/commands/source-manage.js +11 -19
  49. package/dist/commands/tasks.js +385 -0
  50. package/dist/commands/url-checker.js +39 -0
  51. package/dist/commands/vault.js +2 -23
  52. package/dist/core/action-contributors.js +25 -0
  53. package/dist/core/asset-registry.js +4 -16
  54. package/dist/core/asset-spec.js +10 -0
  55. package/dist/core/common.js +94 -0
  56. package/dist/core/concurrent.js +22 -0
  57. package/dist/core/config.js +222 -128
  58. package/dist/core/events.js +73 -126
  59. package/dist/core/frontmatter.js +3 -1
  60. package/dist/core/markdown.js +17 -0
  61. package/dist/core/memory-improve.js +678 -0
  62. package/dist/core/parse.js +155 -0
  63. package/dist/core/paths.js +101 -3
  64. package/dist/core/proposal-validators.js +61 -0
  65. package/dist/core/proposals.js +49 -38
  66. package/dist/core/state-db.js +775 -0
  67. package/dist/core/time.js +51 -0
  68. package/dist/core/warn.js +59 -1
  69. package/dist/indexer/db-search.js +52 -238
  70. package/dist/indexer/db.js +377 -1
  71. package/dist/indexer/ensure-index.js +61 -0
  72. package/dist/indexer/graph-boost.js +247 -94
  73. package/dist/indexer/graph-db.js +201 -0
  74. package/dist/indexer/graph-dedup.js +99 -0
  75. package/dist/indexer/graph-extraction.js +409 -76
  76. package/dist/indexer/index-context.js +10 -0
  77. package/dist/indexer/indexer.js +442 -290
  78. package/dist/indexer/llm-cache.js +47 -0
  79. package/dist/indexer/match-contributors.js +141 -0
  80. package/dist/indexer/matchers.js +24 -190
  81. package/dist/indexer/memory-inference.js +63 -29
  82. package/dist/indexer/metadata-contributors.js +26 -0
  83. package/dist/indexer/metadata.js +188 -175
  84. package/dist/indexer/path-resolver.js +89 -0
  85. package/dist/indexer/ranking-contributors.js +204 -0
  86. package/dist/indexer/ranking.js +74 -0
  87. package/dist/indexer/search-hit-enrichers.js +22 -0
  88. package/dist/indexer/search-source.js +24 -9
  89. package/dist/indexer/semantic-status.js +2 -16
  90. package/dist/indexer/walker.js +25 -0
  91. package/dist/integrations/agent/config.js +175 -3
  92. package/dist/integrations/agent/index.js +3 -1
  93. package/dist/integrations/agent/pipeline.js +39 -0
  94. package/dist/integrations/agent/profiles.js +67 -5
  95. package/dist/integrations/agent/prompts.js +77 -72
  96. package/dist/integrations/agent/runners.js +31 -0
  97. package/dist/integrations/agent/sdk-runner.js +120 -0
  98. package/dist/integrations/agent/spawn.js +71 -16
  99. package/dist/integrations/lockfile.js +10 -18
  100. package/dist/integrations/session-logs/index.js +65 -0
  101. package/dist/integrations/session-logs/providers/claude-code.js +56 -0
  102. package/dist/integrations/session-logs/providers/opencode.js +52 -0
  103. package/dist/integrations/session-logs/types.js +1 -0
  104. package/dist/llm/call-ai.js +74 -0
  105. package/dist/llm/client.js +61 -122
  106. package/dist/llm/feature-gate.js +27 -16
  107. package/dist/llm/graph-extract.js +297 -62
  108. package/dist/llm/memory-infer.js +49 -71
  109. package/dist/llm/metadata-enhance.js +39 -22
  110. package/dist/llm/prompts/graph-extract-user-prompt.md +12 -0
  111. package/dist/output/cli-hints-full.md +277 -0
  112. package/dist/output/cli-hints-short.md +65 -0
  113. package/dist/output/cli-hints.js +2 -318
  114. package/dist/output/renderers.js +190 -123
  115. package/dist/output/shapes.js +33 -0
  116. package/dist/output/text.js +239 -2
  117. package/dist/registry/providers/skills-sh.js +61 -49
  118. package/dist/registry/providers/static-index.js +44 -48
  119. package/dist/setup/setup.js +510 -11
  120. package/dist/sources/provider-factory.js +2 -1
  121. package/dist/sources/providers/git.js +2 -2
  122. package/dist/sources/website-ingest.js +4 -0
  123. package/dist/tasks/backends/cron.js +200 -0
  124. package/dist/tasks/backends/exec-utils.js +25 -0
  125. package/dist/tasks/backends/index.js +32 -0
  126. package/dist/tasks/backends/launchd-template.xml +19 -0
  127. package/dist/tasks/backends/launchd.js +184 -0
  128. package/dist/tasks/backends/schtasks-template.xml +29 -0
  129. package/dist/tasks/backends/schtasks.js +212 -0
  130. package/dist/tasks/parser.js +198 -0
  131. package/dist/tasks/resolveAkmBin.js +84 -0
  132. package/dist/tasks/runner.js +432 -0
  133. package/dist/tasks/schedule.js +208 -0
  134. package/dist/tasks/schema.js +13 -0
  135. package/dist/tasks/validator.js +59 -0
  136. package/dist/wiki/index-template.md +12 -0
  137. package/dist/wiki/ingest-workflow-template.md +54 -0
  138. package/dist/wiki/log-template.md +8 -0
  139. package/dist/wiki/schema-template.md +61 -0
  140. package/dist/wiki/wiki-templates.js +12 -0
  141. package/dist/wiki/wiki.js +10 -61
  142. package/dist/workflows/authoring.js +5 -25
  143. package/dist/workflows/renderer.js +8 -3
  144. package/dist/workflows/runs.js +59 -91
  145. package/dist/workflows/validator.js +1 -1
  146. package/dist/workflows/workflow-template.md +24 -0
  147. package/docs/README.md +3 -0
  148. package/docs/migration/release-notes/0.7.0.md +1 -1
  149. package/docs/migration/release-notes/0.8.0.md +43 -0
  150. package/package.json +3 -2
  151. package/dist/templates/wiki-templates.js +0 -100
@@ -1,14 +1,13 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import { isHttpUrl, resolveStashDir, toErrorMessage } from "../core/common";
4
+ import { concurrentMap } from "../core/concurrent";
4
5
  import { getDbPath } from "../core/paths";
5
6
  import { isVerbose, warn, warnVerbose } from "../core/warn";
6
7
  import { resolveIndexPassLLM } from "../llm/index-passes";
7
8
  import { takeWorkflowDocument } from "../workflows/document-cache";
8
- import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, warnIfVecMissing, } from "./db";
9
- import { runGraphExtractionPass } from "./graph-extraction";
10
- import { runMemoryInferencePass } from "./memory-inference";
11
- import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
9
+ import { clearStaleCacheEntries, closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getAllEntriesForEmbedding, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, relinkUsageEvents, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, upsertWorkflowDocument, warnIfVecMissing, } from "./db";
10
+ import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isEnrichmentComplete, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
12
11
  import { buildSearchText } from "./search-fields";
13
12
  import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
14
13
  import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
@@ -18,19 +17,196 @@ function throwIfAborted(signal) {
18
17
  throw signal.reason instanceof Error ? signal.reason : new Error("index interrupted");
19
18
  }
20
19
  }
20
+ function getDefaultLlmConcurrency(llmConfig) {
21
+ if (typeof llmConfig?.concurrency === "number")
22
+ return llmConfig.concurrency;
23
+ if (!llmConfig?.endpoint)
24
+ return 1;
25
+ try {
26
+ const url = new URL(llmConfig.endpoint);
27
+ const host = url.hostname.toLowerCase();
28
+ if (host === "localhost" || host === "127.0.0.1" || host === "::1" || host.endsWith(".localhost"))
29
+ return 1;
30
+ }
31
+ catch {
32
+ return 1;
33
+ }
34
+ return 4;
35
+ }
36
+ // ── Phase functions ──────────────────────────────────────────────────────────
37
+ /**
38
+ * Source cache phase: ensure git stash caches are up to date and purge orphaned
39
+ * entries from removed sources (incremental only).
40
+ */
41
+ async function runSourceCachePhase(ctx) {
42
+ const { db, config, sourceDirs, isIncremental, full } = ctx;
43
+ if (isIncremental && !full) {
44
+ // Purge entries from stash dirs that have been removed since the last run
45
+ // (e.g. after `akm remove`) so orphaned entries don't linger.
46
+ const prevStashDirsJson = getMeta(db, "stashDirs");
47
+ if (prevStashDirsJson) {
48
+ let prevStashDirs = [];
49
+ try {
50
+ const parsed = JSON.parse(prevStashDirsJson);
51
+ if (Array.isArray(parsed)) {
52
+ prevStashDirs = parsed.filter((d) => typeof d === "string");
53
+ }
54
+ else {
55
+ warn("index_meta stashDirs value is not an array — treating as empty");
56
+ }
57
+ }
58
+ catch {
59
+ warn("index_meta stashDirs value is corrupt JSON — treating as empty");
60
+ }
61
+ const currentSet = new Set(sourceDirs);
62
+ for (const dir of prevStashDirs) {
63
+ if (!currentSet.has(dir)) {
64
+ ctx.hadRemovedSources = true;
65
+ deleteEntriesByStashDir(db, dir);
66
+ deleteIndexDirStatesByStashDir(db, dir);
67
+ }
68
+ }
69
+ }
70
+ }
71
+ // Source caches are hydrated before akmIndex() calls this phase; nothing
72
+ // further to do here. The flag is exposed on ctx for runWalkPhase().
73
+ void config;
74
+ }
75
+ /**
76
+ * Walk phase: scan the filesystem, generate metadata, and persist entries to
77
+ * the database. Also kicks off LLM enrichment for directories that need it.
78
+ *
79
+ * Writes `ctx.scannedDirs`, `ctx.skippedDirs`, `ctx.generatedCount`,
80
+ * `ctx.walkWarnings`, and `ctx.dirsNeedingLlm` for downstream phases.
81
+ */
82
+ async function runWalkPhase(ctx) {
83
+ const { db, sources, isIncremental, builtAtMs, hadRemovedSources, full, reEnrich, signal, onProgress, config } = ctx;
84
+ throwIfAborted(signal);
85
+ ctx.timing.tWalkStart = Date.now();
86
+ const doFullDelete = full || !isIncremental;
87
+ const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, sources, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
88
+ ctx.scannedDirs = scannedDirs;
89
+ ctx.skippedDirs = skippedDirs;
90
+ ctx.generatedCount = generatedCount;
91
+ ctx.walkWarnings = warnings;
92
+ ctx.dirsNeedingLlm = dirsNeedingLlm;
93
+ onProgress({
94
+ phase: "scan",
95
+ message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
96
+ });
97
+ // Workflow validation noise gate (issue #273): suppress per-spec stderr lines
98
+ // at default verbosity and emit a single summary instead.
99
+ // In verbose mode the per-spec lines are already printed by
100
+ // buildMetadataSkipWarning at generation time — no second pass needed here.
101
+ if (!isVerbose()) {
102
+ const workflowSkipWarnings = warnings.filter(isWorkflowSkipWarning);
103
+ const skippedWorkflowCount = workflowSkipWarnings.length;
104
+ if (skippedWorkflowCount > 0) {
105
+ const noun = skippedWorkflowCount === 1 ? "workflow spec" : "workflow specs";
106
+ warn(`${skippedWorkflowCount} ${noun} skipped due to validation errors; ` +
107
+ "rerun with --verbose (or AKM_VERBOSE=1) to see details.");
108
+ }
109
+ }
110
+ ctx.timing.tWalkEnd = Date.now();
111
+ throwIfAborted(signal);
112
+ // LLM enrichment for directories that need it
113
+ await enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, true, reEnrich);
114
+ onProgress({
115
+ phase: "llm",
116
+ message: resolveIndexPassLLM("enrichment", config)
117
+ ? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
118
+ : "LLM enhancement disabled.",
119
+ });
120
+ ctx.timing.tLlmEnd = Date.now();
121
+ }
122
+ /**
123
+ * Embedding phase: generate and store vector embeddings for all unembedded
124
+ * entries. Writes `ctx.embeddingResult` for the finalize phase.
125
+ */
126
+ async function runEmbeddingPhase(ctx) {
127
+ const { db, config, signal, onProgress } = ctx;
128
+ throwIfAborted(signal);
129
+ ctx.embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
130
+ ctx.timing.tEmbedEnd = Date.now();
131
+ }
132
+ /**
133
+ * Finalize phase: rebuild FTS, re-link usage events, recompute utility scores,
134
+ * regenerate wiki indexes, update index metadata, and emit the verify event.
135
+ */
136
+ async function runFinalizePhase(ctx) {
137
+ const { db, config, sources, sourceDirs, isIncremental, stashDir, signal, onProgress } = ctx;
138
+ // Rebuild FTS after all inserts. Use incremental mode when this whole
139
+ // index run is incremental — only entries touched by `upsertEntry`
140
+ // since the last rebuild are re-indexed.
141
+ rebuildFts(db, { incremental: isIncremental });
142
+ onProgress({
143
+ phase: "fts",
144
+ message: isIncremental ? "Rebuilt full-text search index (dirty rows only)." : "Rebuilt full-text search index.",
145
+ });
146
+ ctx.timing.tFtsEnd = Date.now();
147
+ // Re-link detached usage_events and recompute utility scores.
148
+ relinkUsageEvents(db);
149
+ recomputeUtilityScores(db);
150
+ // Purge LLM cache entries for assets that no longer exist in the index.
151
+ try {
152
+ clearStaleCacheEntries(db);
153
+ }
154
+ catch {
155
+ /* ignore */
156
+ }
157
+ // Regenerate each wiki's index.md from its pages' frontmatter. Best-effort.
158
+ try {
159
+ const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
160
+ regenerateAllWikiIndexes(stashDir);
161
+ }
162
+ catch {
163
+ /* best-effort */
164
+ }
165
+ throwIfAborted(signal);
166
+ // Update index metadata
167
+ const embeddingResult = ctx.embeddingResult ?? { success: false };
168
+ setMeta(db, "builtAt", new Date().toISOString());
169
+ setMeta(db, "stashDir", stashDir);
170
+ setMeta(db, "stashDirs", JSON.stringify(sourceDirs));
171
+ setMeta(db, "hasEmbeddings", embeddingResult.success ? "1" : "0");
172
+ warnIfVecMissing(db);
173
+ const totalEntries = getEntryCount(db);
174
+ const verification = verifyIndexState(db, config, totalEntries, embeddingResult);
175
+ if (config.semanticSearchMode === "off") {
176
+ clearSemanticStatus();
177
+ }
178
+ else {
179
+ writeSemanticStatus({
180
+ status: verification.semanticStatus === "disabled" ? "pending" : verification.semanticStatus,
181
+ ...(embeddingResult.reason ? { reason: embeddingResult.reason } : {}),
182
+ ...(embeddingResult.message ? { message: embeddingResult.message } : {}),
183
+ providerFingerprint: deriveSemanticProviderFingerprint(config.embedding),
184
+ lastCheckedAt: new Date().toISOString(),
185
+ entryCount: verification.entryCount,
186
+ embeddingCount: verification.embeddingCount,
187
+ });
188
+ }
189
+ onProgress({ phase: "verify", message: verification.message });
190
+ // Store verification result and totalEntries on ctx for the caller to use
191
+ ctx._verification = verification;
192
+ ctx._totalEntries = totalEntries;
193
+ // suppress unused warning — sources was previously used inline
194
+ void sources;
195
+ }
21
196
  // ── Indexer ──────────────────────────────────────────────────────────────────
22
197
  export async function akmIndex(options) {
23
198
  const stashDir = options?.stashDir || resolveStashDir();
24
199
  const onProgress = options?.onProgress ?? (() => { });
25
200
  const signal = options?.signal;
26
- const enrich = options?.enrich === true;
201
+ const reEnrich = options?.reEnrich === true;
202
+ const full = options?.full === true;
27
203
  // Load config and resolve all stash sources
28
204
  const { loadConfig } = await import("../core/config.js");
29
205
  const config = loadConfig();
30
206
  // Ensure git stash caches are extracted before resolving stash dirs,
31
207
  // so their content directories exist on disk for the walker to discover.
32
208
  const { ensureSourceCaches, resolveSourceEntries } = await import("./search-source.js");
33
- await ensureSourceCaches(config, { force: options?.full === true });
209
+ await ensureSourceCaches(config, { force: full });
34
210
  const allSourceEntries = resolveSourceEntries(stashDir, config);
35
211
  const allSourceDirs = allSourceEntries.map((s) => s.path);
36
212
  const t0 = Date.now();
@@ -39,11 +215,41 @@ export async function akmIndex(options) {
39
215
  const embeddingDim = config.embedding?.dimension;
40
216
  const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
41
217
  try {
42
- // Check if we should do incremental
218
+ // Determine incremental vs full mode
43
219
  const prevStashDir = getMeta(db, "stashDir");
44
220
  const prevBuiltAt = getMeta(db, "builtAt");
45
- const isIncremental = !options?.full && prevStashDir === stashDir && !!prevBuiltAt;
221
+ const isIncremental = !full && prevStashDir === stashDir && !!prevBuiltAt;
46
222
  const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
223
+ // Assemble the run context
224
+ const ctx = {
225
+ db,
226
+ config,
227
+ sources: allSourceEntries,
228
+ sourceDirs: allSourceDirs,
229
+ full,
230
+ reEnrich,
231
+ stashDir,
232
+ onProgress,
233
+ signal,
234
+ timing: {
235
+ t0,
236
+ tWalkStart: t0,
237
+ tWalkEnd: t0,
238
+ tLlmEnd: t0,
239
+ tFtsEnd: t0,
240
+ tEmbedEnd: t0,
241
+ },
242
+ isIncremental,
243
+ builtAtMs,
244
+ hadRemovedSources: false,
245
+ scannedDirs: 0,
246
+ skippedDirs: 0,
247
+ generatedCount: 0,
248
+ walkWarnings: [],
249
+ dirsNeedingLlm: [],
250
+ embeddingResult: null,
251
+ graphExtractionResult: null,
252
+ };
47
253
  onProgress({
48
254
  phase: "summary",
49
255
  message: buildIndexSummaryMessage({
@@ -51,230 +257,34 @@ export async function akmIndex(options) {
51
257
  sourcesCount: allSourceDirs.length,
52
258
  semanticSearchMode: config.semanticSearchMode,
53
259
  embeddingProvider: getEmbeddingProvider(config.embedding),
54
- llmEnabled: enrich && !!resolveIndexPassLLM("enrichment", config),
260
+ llmEnabled: !!resolveIndexPassLLM("enrichment", config),
55
261
  vecAvailable: isVecAvailable(db),
56
262
  }),
57
263
  });
58
- let hadRemovedSources = false;
59
- if (options?.full || !isIncremental) {
60
- // The delete is now merged into the insert transaction inside
61
- // indexEntries() so that a reader never sees an empty database between
62
- // the wipe and the re-inserts. The doFullDelete flag signals this path.
63
- }
64
- else {
65
- // Incremental: purge entries from stash dirs that have been removed
66
- // (e.g. after `akm remove`) so orphaned entries don't linger.
67
- const prevStashDirsJson = getMeta(db, "stashDirs");
68
- if (prevStashDirsJson) {
69
- let prevStashDirs = [];
70
- try {
71
- const parsed = JSON.parse(prevStashDirsJson);
72
- if (Array.isArray(parsed)) {
73
- prevStashDirs = parsed.filter((d) => typeof d === "string");
74
- }
75
- else {
76
- warn("index_meta stashDirs value is not an array — treating as empty");
77
- }
78
- }
79
- catch {
80
- warn("index_meta stashDirs value is corrupt JSON — treating as empty");
81
- }
82
- const currentSet = new Set(allSourceDirs);
83
- for (const dir of prevStashDirs) {
84
- if (!currentSet.has(dir)) {
85
- hadRemovedSources = true;
86
- deleteEntriesByStashDir(db, dir);
87
- deleteIndexDirStatesByStashDir(db, dir);
88
- }
89
- }
90
- }
91
- }
92
- throwIfAborted(signal);
93
- // Memory inference pass (#201). Runs before the walk so any derived-memory
94
- // children that get written are picked up by the walker in this same run
95
- // and don't have to wait for the next `akm index`. Gated entirely by
96
- // `resolveIndexPassLLM("memory", config)` — when the user has no
97
- // `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
98
- // and existing inferred children are left in place.
99
- if (enrich) {
100
- try {
101
- const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
102
- if (inferenceResult.writtenFacts > 0 || inferenceResult.skippedNoFacts > 0) {
103
- onProgress({
104
- phase: "llm",
105
- message: `Memory inference reviewed ${inferenceResult.considered} ` +
106
- `${inferenceResult.considered === 1 ? "memory" : "memories"}; wrote ` +
107
- `${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} ` +
108
- `from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}` +
109
- (inferenceResult.skippedNoFacts > 0
110
- ? `; skipped ${inferenceResult.skippedNoFacts} ${inferenceResult.skippedNoFacts === 1 ? "memory" : "memories"} with unusable LLM responses`
111
- : "") +
112
- ".",
113
- });
114
- }
115
- if (inferenceResult.skippedNoFacts > 0) {
116
- warn(`Memory inference skipped ${inferenceResult.skippedNoFacts} ` +
117
- `${inferenceResult.skippedNoFacts === 1 ? "memory" : "memories"} because the LLM returned empty, invalid, or incomplete derived payloads. ` +
118
- "Check your model and token budget.");
119
- }
120
- }
121
- catch (err) {
122
- warn(`Memory inference pass aborted: ${err instanceof Error ? err.message : String(err)}`);
123
- }
124
- }
125
- else {
126
- onProgress({
127
- phase: "llm",
128
- message: "LLM passes disabled; rerun with --enrich to enable inference and enrichment.",
129
- });
130
- }
131
- // Graph extraction pass (#207). Runs after memory inference so any
132
- // atomic-fact children that just got written are visible to the graph
133
- // walk. Persists `<stashRoot>/.akm/graph.json` — an indexer artifact,
134
- // NOT a user-visible asset, so it is not routed through
135
- // writeAssetToSource. The artifact feeds the existing FTS5+boosts
136
- // pipeline as a single boost component (see graph-boost.ts); there is
137
- // no parallel scoring track. Disabled when either gate (the locked
138
- // `llm.features.graph_extraction` feature flag or the per-pass
139
- // `index.graph.llm` toggle) is off; the existing graph file is
140
- // preserved on disk in that case.
141
- if (enrich) {
142
- try {
143
- const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
144
- if (graphResult.written) {
145
- onProgress({
146
- phase: "llm",
147
- message: `Graph extraction wrote ${graphResult.totalEntities} entit${graphResult.totalEntities === 1 ? "y" : "ies"} and ${graphResult.totalRelations} relation${graphResult.totalRelations === 1 ? "" : "s"} from ${graphResult.extracted} file${graphResult.extracted === 1 ? "" : "s"}.`,
148
- });
149
- }
150
- }
151
- catch (err) {
152
- warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
153
- }
154
- }
155
- throwIfAborted(signal);
156
- const tWalkStart = Date.now();
157
- // Walk stash dirs and index entries.
158
- // doFullDelete=true merges the wipe into the same transaction as the
159
- // inserts so readers never see an empty database mid-rebuild.
160
- const doFullDelete = options?.full || !isIncremental;
161
- const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
162
- onProgress({
163
- phase: "scan",
164
- message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
165
- });
166
- // Workflow validation noise gate (issue #273): per-spec stderr lines from
167
- // `buildMetadataSkipWarning` are suppressed at default verbosity in
168
- // `metadata.ts`. Replace them with a single summary line so operators
169
- // running a cold-start search against a fresh registry-cloned source
170
- // don't get the impression akm is broken. Verbose mode keeps the
171
- // per-spec output instead of (not in addition to) the summary.
172
- if (!isVerbose()) {
173
- const skippedWorkflowCount = warnings.filter(isWorkflowSkipWarning).length;
174
- if (skippedWorkflowCount > 0) {
175
- const noun = skippedWorkflowCount === 1 ? "workflow spec" : "workflow specs";
176
- warn(`${skippedWorkflowCount} ${noun} skipped due to validation errors; ` +
177
- "rerun with --verbose (or AKM_VERBOSE=1) to see details.");
178
- }
179
- }
180
- const tWalkEnd = Date.now();
181
- throwIfAborted(signal);
182
- // Enhance entries with LLM if configured
183
- await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich);
184
- onProgress({
185
- phase: "llm",
186
- message: enrich && resolveIndexPassLLM("enrichment", config)
187
- ? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
188
- : "LLM enhancement disabled.",
189
- });
190
- const tLlmEnd = Date.now();
191
- throwIfAborted(signal);
192
- // Rebuild FTS after all inserts. Use incremental mode when this whole
193
- // index run is incremental — only entries touched by `upsertEntry`
194
- // since the last rebuild are re-indexed, instead of re-scanning every
195
- // row on every `akm index` invocation.
196
- rebuildFts(db, { incremental: isIncremental });
197
- onProgress({
198
- phase: "fts",
199
- message: isIncremental ? "Rebuilt full-text search index (dirty rows only)." : "Rebuilt full-text search index.",
200
- });
201
- const tFtsEnd = Date.now();
202
- // Re-link detached usage_events to their new entry_ids via entry_ref.
203
- // entry_ref is "type:name" (e.g., "skill:code-review"), entry_key is "stashDir:type:name".
204
- // Use substr to extract the "type:name" suffix from entry_key for exact comparison
205
- // (avoids LIKE which would require escaping % and _ in user-facing names).
206
- try {
207
- db.exec(`
208
- UPDATE usage_events SET entry_id = (
209
- SELECT e.id FROM entries e
210
- WHERE substr(e.entry_key, length(e.entry_key) - length(usage_events.entry_ref)) = ':' || usage_events.entry_ref
211
- LIMIT 1
212
- )
213
- WHERE entry_id IS NULL AND entry_ref IS NOT NULL
214
- `);
215
- }
216
- catch {
217
- /* ignore if table doesn't exist yet */
218
- }
219
- // Recompute utility scores from usage_events after FTS rebuild
220
- recomputeUtilityScores(db);
221
- // Regenerate each wiki's index.md from its pages' frontmatter. Best-effort
222
- // — errors are caught inside regenerateAllWikiIndexes and never block the
223
- // index run. The primary stash is the only target: additional sources
224
- // are read-only caches, and regenerating their indexes would mutate
225
- // cache content.
226
- try {
227
- const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
228
- regenerateAllWikiIndexes(stashDir);
229
- }
230
- catch {
231
- /* best-effort */
232
- }
233
- throwIfAborted(signal);
234
- // Generate embeddings if semantic search is enabled
235
- const embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
236
- const tEmbedEnd = Date.now();
237
- // Update metadata
238
- setMeta(db, "builtAt", new Date().toISOString());
239
- setMeta(db, "stashDir", stashDir);
240
- setMeta(db, "stashDirs", JSON.stringify(allSourceDirs));
241
- setMeta(db, "hasEmbeddings", embeddingResult.success ? "1" : "0");
242
- const totalEntries = getEntryCount(db);
243
- // Warn on every index run if using JS fallback with many entries
244
- warnIfVecMissing(db);
245
- const tEnd = Date.now();
246
- const verification = verifyIndexState(db, config, totalEntries, embeddingResult);
247
- if (config.semanticSearchMode === "off") {
248
- clearSemanticStatus();
249
- }
250
- else {
251
- writeSemanticStatus({
252
- status: verification.semanticStatus === "disabled" ? "pending" : verification.semanticStatus,
253
- ...(embeddingResult.reason ? { reason: embeddingResult.reason } : {}),
254
- ...(embeddingResult.message ? { message: embeddingResult.message } : {}),
255
- providerFingerprint: deriveSemanticProviderFingerprint(config.embedding),
256
- lastCheckedAt: new Date().toISOString(),
257
- entryCount: verification.entryCount,
258
- embeddingCount: verification.embeddingCount,
259
- });
260
- }
261
- onProgress({ phase: "verify", message: verification.message });
264
+ // ── Phase sequence ───────────────────────────────────────────────────────
265
+ await runSourceCachePhase(ctx);
266
+ await runWalkPhase(ctx);
267
+ await runEmbeddingPhase(ctx);
268
+ await runFinalizePhase(ctx);
269
+ // ────────────────────────────────────────────────────────────────────────
270
+ const { _verification: verification, _totalEntries: totalEntries } = ctx;
271
+ const { timing } = ctx;
262
272
  return {
263
273
  stashDir,
264
274
  totalEntries,
265
- generatedMetadata: generatedCount,
275
+ generatedMetadata: ctx.generatedCount,
266
276
  indexPath: dbPath,
267
277
  mode: isIncremental ? "incremental" : "full",
268
- directoriesScanned: scannedDirs,
269
- directoriesSkipped: skippedDirs,
270
- ...(warnings.length > 0 ? { warnings } : {}),
278
+ directoriesScanned: ctx.scannedDirs,
279
+ directoriesSkipped: ctx.skippedDirs,
280
+ ...(ctx.walkWarnings.length > 0 ? { warnings: ctx.walkWarnings } : {}),
271
281
  verification,
272
282
  timing: {
273
- totalMs: tEnd - t0,
274
- walkMs: tWalkEnd - tWalkStart,
275
- llmMs: tLlmEnd - tWalkEnd,
276
- embedMs: tEmbedEnd - tFtsEnd,
277
- ftsMs: tFtsEnd - tLlmEnd,
283
+ totalMs: Date.now() - timing.t0,
284
+ walkMs: timing.tWalkEnd - timing.tWalkStart,
285
+ llmMs: timing.tLlmEnd - timing.tWalkEnd,
286
+ embedMs: timing.tEmbedEnd - timing.tFtsEnd,
287
+ ftsMs: timing.tFtsEnd - timing.tLlmEnd,
278
288
  },
279
289
  };
280
290
  }
@@ -512,8 +522,10 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
512
522
  if (stash) {
513
523
  for (const entry of stash.entries) {
514
524
  const entryPath = entry.filename ? path.join(dirPath, entry.filename) : null;
515
- if (!entryPath)
516
- continue; // skip unresolvable entries
525
+ if (!entryPath) {
526
+ warn(`Skipping entry with no resolvable path in ${dirPath}`);
527
+ continue;
528
+ }
517
529
  if (!shouldIndexStashFile(currentStashDir, entryPath))
518
530
  continue;
519
531
  // Skip if a higher-priority stash root already indexed this asset
@@ -535,7 +547,9 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
535
547
  }
536
548
  }
537
549
  }
538
- // Collect dirs needing LLM enhancement during the first walk
550
+ // Collect dirs needing LLM enhancement during the first walk.
551
+ // Only dirs with "generated" entries need enrichment (unless reEnrich
552
+ // forces re-processing of already-enriched entries).
539
553
  if (stash.entries.some((e) => e.quality === "generated")) {
540
554
  dirsNeedingLlm.push({ dirPath, files, currentStashDir, stash });
541
555
  }
@@ -553,7 +567,7 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
553
567
  reason: persistedReason,
554
568
  });
555
569
  if (persistedRows === 0) {
556
- warnVerbose(`[index] zero-row ${dirPath}: ${persistedReason}`);
570
+ warn(`[index] zero-row ${dirPath}: ${persistedReason}`);
557
571
  }
558
572
  }
559
573
  });
@@ -652,9 +666,7 @@ function inferZeroRowReason(stash, priorReason, warnings, dirPath, dedupedRows)
652
666
  return "empty-generated-set";
653
667
  return `zero-row:${priorReason?.kind ?? "unknown"}`;
654
668
  }
655
- async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich = false) {
656
- if (!enrich)
657
- return;
669
+ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, _enrich = false, reEnrich = false) {
658
670
  // Resolve per-pass LLM config via the unified shim. Returns undefined when
659
671
  // either no `akm.llm` is configured or the user opted this pass out via
660
672
  // `index.enrichment.llm = false`. (#208)
@@ -665,24 +677,142 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich = f
665
677
  // as a single visible warning instead of silently degrading every entry
666
678
  // and leaving the user wondering why nothing got enhanced.
667
679
  const summary = { attempted: 0, succeeded: 0, failureSamples: [] };
668
- for (const { dirPath, files, currentStashDir, stash: originalStash } of dirsNeedingLlm) {
669
- throwIfAborted(signal);
670
- // Only enhance generated entries; user-provided overrides should not be overwritten
671
- const generatedEntries = originalStash.entries.filter((e) => e.quality === "generated");
672
- if (generatedEntries.length === 0)
673
- continue;
674
- const generatedStash = { entries: generatedEntries };
675
- const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary, signal);
676
- // Re-upsert the enhanced entries in a single transaction so a crash
677
- // cannot leave half the entries updated and the rest stale.
678
- db.transaction(() => {
679
- for (const entry of enhanced.entries) {
680
- const entryPath = entry.filename ? path.join(dirPath, entry.filename) : files[0] || dirPath;
681
- const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
682
- const searchText = buildSearchText(entry);
683
- upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, attachFileSize(entry, entryPath), searchText);
684
- }
685
- })();
680
+ let completedDirs = 0;
681
+ let completedEntries = 0;
682
+ const totalDirs = dirsNeedingLlm.length;
683
+ const totalEntries = dirsNeedingLlm.reduce((sum, { stash }) => {
684
+ const entriesToEnhance = stash.entries.filter((e) => {
685
+ if (e.quality !== "generated" && !(reEnrich && e.quality === "enriched"))
686
+ return false;
687
+ if (!reEnrich && isEnrichmentComplete(e))
688
+ return false;
689
+ return true;
690
+ });
691
+ return sum + entriesToEnhance.length;
692
+ }, 0);
693
+ // P3 wall-clock budget for the enrichment pass. Defaults to llm.timeoutMs
694
+ // (or 10 minutes if not set). Users can extend this via llm.timeoutMs in
695
+ // config no separate knob needed.
696
+ const budgetMs = (llmConfig.timeoutMs ?? 10 * 60 * 1000) * Math.max(totalEntries, 1);
697
+ const enrichDeadline = AbortSignal.timeout(budgetMs);
698
+ let deadlineHit = false;
699
+ const enrichSignal = (() => {
700
+ if (!signal)
701
+ return enrichDeadline;
702
+ // Combine: abort when either fires.
703
+ const controller = new AbortController();
704
+ const onAbort = () => controller.abort();
705
+ signal.addEventListener("abort", onAbort, { once: true });
706
+ enrichDeadline.addEventListener("abort", () => {
707
+ deadlineHit = true;
708
+ controller.abort();
709
+ }, { once: true });
710
+ return controller.signal;
711
+ })();
712
+ if (totalEntries > 0) {
713
+ onProgress?.({
714
+ phase: "llm",
715
+ message: `LLM enhancement starting for ${totalEntries} entr${totalEntries === 1 ? "y" : "ies"} ` +
716
+ `across ${totalDirs} director${totalDirs === 1 ? "y" : "ies"} (concurrency ${getDefaultLlmConcurrency(llmConfig)}).`,
717
+ processed: 0,
718
+ total: totalEntries,
719
+ });
720
+ }
721
+ let currentDirLabel;
722
+ let lastProgressAt = Date.now();
723
+ let heartbeatTimer;
724
+ if (totalEntries > 0 && onProgress) {
725
+ heartbeatTimer = setInterval(() => {
726
+ if (Date.now() - lastProgressAt < 15000)
727
+ return;
728
+ onProgress({
729
+ phase: "llm",
730
+ message: `Still enriching ${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}` +
731
+ (currentDirLabel ? `; waiting on ${currentDirLabel}` : "") +
732
+ ".",
733
+ processed: completedEntries,
734
+ total: totalEntries,
735
+ });
736
+ lastProgressAt = Date.now();
737
+ }, 15000);
738
+ }
739
+ try {
740
+ await concurrentMap(dirsNeedingLlm, async ({ dirPath, files, currentStashDir, stash: originalStash }) => {
741
+ if (enrichSignal.aborted)
742
+ return undefined;
743
+ // Only enhance generated entries (or all when reEnrich=true);
744
+ // user-provided overrides should not be overwritten.
745
+ // Skip entries that are already fully enriched (description + tags + searchHints)
746
+ // unless the caller explicitly requests re-enrichment via reEnrich=true.
747
+ const entriesToEnhance = originalStash.entries.filter((e) => {
748
+ if (e.quality !== "generated" && !(reEnrich && e.quality === "enriched"))
749
+ return false;
750
+ if (!reEnrich && isEnrichmentComplete(e)) {
751
+ warnVerbose(`[akm] skipping LLM enrichment for "${e.name}" — entry already complete`);
752
+ return false;
753
+ }
754
+ return true;
755
+ });
756
+ if (entriesToEnhance.length === 0)
757
+ return undefined;
758
+ currentDirLabel = path.relative(currentStashDir, dirPath) || ".";
759
+ onProgress?.({
760
+ phase: "llm",
761
+ message: `Enhancing ${currentDirLabel} ` +
762
+ `(${entriesToEnhance.length} entr${entriesToEnhance.length === 1 ? "y" : "ies"}).`,
763
+ processed: completedEntries,
764
+ total: totalEntries,
765
+ });
766
+ lastProgressAt = Date.now();
767
+ const targetStash = { entries: entriesToEnhance };
768
+ const entryKeys = entriesToEnhance.map((e) => `${currentStashDir}:${e.type}:${e.name}`);
769
+ const enhanced = await enhanceStashWithLlm(llmConfig, targetStash, files, summary, enrichSignal, db, entryKeys, reEnrich, config, (event) => {
770
+ completedEntries++;
771
+ lastProgressAt = Date.now();
772
+ onProgress?.({
773
+ phase: "llm",
774
+ message: `Enhanced ${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}; ` +
775
+ `${completedDirs}/${totalDirs} director${totalDirs === 1 ? "y" : "ies"} complete` +
776
+ (event.entryName ? `; current ${event.entryName}` : "") +
777
+ (currentDirLabel ? ` in ${currentDirLabel}` : "") +
778
+ (event.outcome === "cache-hit" ? " (cache hit)" : ""),
779
+ processed: completedEntries,
780
+ total: totalEntries,
781
+ });
782
+ });
783
+ // Re-upsert the enhanced entries in a single transaction so a crash
784
+ // cannot leave half the entries updated and the rest stale.
785
+ db.transaction(() => {
786
+ for (const entry of enhanced.entries) {
787
+ const entryPath = entry.filename ? path.join(dirPath, entry.filename) : files[0] || dirPath;
788
+ const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
789
+ const searchText = buildSearchText(entry);
790
+ upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, attachFileSize(entry, entryPath), searchText);
791
+ }
792
+ })();
793
+ completedDirs++;
794
+ lastProgressAt = Date.now();
795
+ onProgress?.({
796
+ phase: "llm",
797
+ message: `Completed ${completedDirs}/${totalDirs} director${totalDirs === 1 ? "y" : "ies"}; ` +
798
+ `${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"} processed.`,
799
+ processed: completedEntries,
800
+ total: totalEntries,
801
+ });
802
+ return undefined;
803
+ },
804
+ // Default concurrency of 4 works well for cloud LLM APIs. Local model
805
+ // servers (LM Studio, Ollama) run one inference at a time — set
806
+ // `llm.concurrency: 1` in config.json to avoid "Model reloaded" / 500
807
+ // errors from concurrent request overload.
808
+ getDefaultLlmConcurrency(llmConfig));
809
+ }
810
+ finally {
811
+ if (heartbeatTimer)
812
+ clearInterval(heartbeatTimer);
813
+ }
814
+ if (deadlineHit) {
815
+ warn("[akm] LLM enrichment budget exceeded. Re-run `akm index` to continue. Increase llm.timeoutMs for a larger budget.");
686
816
  }
687
817
  if (summary.attempted > 0 && summary.succeeded === 0) {
688
818
  const sample = summary.failureSamples.length ? ` Example: ${summary.failureSamples[0]}` : "";
@@ -781,14 +911,6 @@ async function generateEmbeddingsForDb(db, config, onProgress, signal) {
781
911
  }
782
912
  }
783
913
  // ── Helpers ─────────────────────────────────────────────────────────────────
784
- function getAllEntriesForEmbedding(db) {
785
- return db
786
- .prepare(`
787
- SELECT e.id, e.search_text AS searchText, e.entry_key AS entryKey, e.file_path AS filePath FROM entries e
788
- WHERE NOT EXISTS (SELECT 1 FROM embeddings b WHERE b.id = e.id)
789
- `)
790
- .all();
791
- }
792
914
  function attachFileSize(entry, entryPath) {
793
915
  try {
794
916
  return { ...entry, fileSize: fs.statSync(entryPath).size };
@@ -797,28 +919,6 @@ function attachFileSize(entry, entryPath) {
797
919
  return entry;
798
920
  }
799
921
  }
800
- function upsertWorkflowDocument(db, entryId, doc, content) {
801
- const sourceHash = computeSourceHash(content);
802
- db.prepare(`INSERT INTO workflow_documents (entry_id, schema_version, document_json, source_path, source_hash, updated_at)
803
- VALUES (?, ?, ?, ?, ?, ?)
804
- ON CONFLICT(entry_id) DO UPDATE SET
805
- schema_version = excluded.schema_version,
806
- document_json = excluded.document_json,
807
- source_path = excluded.source_path,
808
- source_hash = excluded.source_hash,
809
- updated_at = excluded.updated_at`).run(entryId, doc.schemaVersion, JSON.stringify(doc), doc.source.path, sourceHash, new Date().toISOString());
810
- }
811
- function computeSourceHash(content) {
812
- // Cheap, stable identity for the source markdown — used by future
813
- // incremental fast-paths that skip re-validation when content is unchanged.
814
- // Not security-sensitive; FNV-1a over the bytes is sufficient.
815
- let hash = 0x811c9dc5;
816
- for (let i = 0; i < content.length; i++) {
817
- hash ^= content[i];
818
- hash = Math.imul(hash, 0x01000193);
819
- }
820
- return (hash >>> 0).toString(16);
821
- }
822
922
  function buildIndexSummaryMessage(options) {
823
923
  const stashSourceLabel = options.sourcesCount === 1 ? "stash source" : "stash sources";
824
924
  const semanticDetail = getSemanticSearchLabel(options.semanticSearchMode, options.embeddingProvider, options.vecAvailable);
@@ -911,11 +1011,12 @@ function resolveIndexedFiles(dirPath, files, stash) {
911
1011
  }
912
1012
  return resolved.size > 0 ? [...resolved] : files;
913
1013
  }
914
- async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
1014
+ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal, db, entryKeys, reEnrich, akmConfig, onEntryDone) {
915
1015
  const { enhanceMetadata } = await import("../llm/metadata-enhance");
916
- const enhanced = [];
917
- for (const entry of stash.entries) {
918
- throwIfAborted(signal);
1016
+ const { computeBodyHash, getLlmCacheEntry, upsertLlmCacheEntry } = await import("./db.js");
1017
+ const results = await concurrentMap(stash.entries, async (entry, idx) => {
1018
+ if (signal?.aborted)
1019
+ return entry;
919
1020
  summary.attempted++;
920
1021
  try {
921
1022
  const entryFile = entry.filename
@@ -927,10 +1028,38 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
927
1028
  fileContent = fs.readFileSync(entryFile, "utf8");
928
1029
  }
929
1030
  catch {
930
- /* ignore unreadable files */
1031
+ warn(`Could not read file for LLM enrichment: ${entry.filename ?? entry.name}`);
931
1032
  }
932
1033
  }
933
- const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal);
1034
+ // Incremental cache: skip LLM call when file body is unchanged and
1035
+ // --re-enrich was not requested. The cache key is the entry_key
1036
+ // (stashDir:type:name) which is stable across index runs.
1037
+ const cacheBody = fileContent ?? `${entry.name}\n${entry.description ?? ""}`;
1038
+ const bodyHash = computeBodyHash(cacheBody);
1039
+ const cacheKey = entryKeys?.[idx] ?? `${entry.type}:${entry.name}`;
1040
+ if (db && !reEnrich) {
1041
+ const cached = getLlmCacheEntry(db, cacheKey, bodyHash);
1042
+ if (cached) {
1043
+ try {
1044
+ const parsed = JSON.parse(cached.resultJson);
1045
+ const updated = { ...entry };
1046
+ if (parsed.description)
1047
+ updated.description = parsed.description;
1048
+ if (parsed.searchHints?.length)
1049
+ updated.searchHints = parsed.searchHints;
1050
+ if (parsed.tags?.length)
1051
+ updated.tags = parsed.tags;
1052
+ updated.quality = "enriched";
1053
+ summary.succeeded++;
1054
+ onEntryDone?.({ entryName: entry.name, outcome: "cache-hit" });
1055
+ return updated;
1056
+ }
1057
+ catch {
1058
+ warn(`LLM enrichment cache entry corrupt for ${entry.name}; re-running enrichment`);
1059
+ }
1060
+ }
1061
+ }
1062
+ const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal, akmConfig);
934
1063
  const updated = { ...entry };
935
1064
  if (improvements.description)
936
1065
  updated.description = improvements.description;
@@ -938,19 +1067,39 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
938
1067
  updated.searchHints = improvements.searchHints;
939
1068
  if (improvements.tags?.length)
940
1069
  updated.tags = improvements.tags;
941
- enhanced.push(updated);
1070
+ // Mark as enriched so subsequent index runs skip re-enrichment (P2)
1071
+ updated.quality = "enriched";
1072
+ // Persist to cache so the next run can skip the LLM call when the
1073
+ // file body has not changed.
1074
+ if (db) {
1075
+ upsertLlmCacheEntry(db, cacheKey, bodyHash, JSON.stringify({
1076
+ description: improvements.description,
1077
+ searchHints: improvements.searchHints,
1078
+ tags: improvements.tags,
1079
+ }));
1080
+ }
942
1081
  summary.succeeded++;
1082
+ onEntryDone?.({ entryName: entry.name, outcome: "llm" });
1083
+ return updated;
943
1084
  }
944
1085
  catch (err) {
945
- enhanced.push(entry);
946
1086
  const msg = toErrorMessage(err);
947
1087
  // failureSamples is bounded to 3 items, so a linear scan is cheaper
948
1088
  // than maintaining a parallel Set for membership checks (#177 review).
949
1089
  if (summary.failureSamples.length < 3 && !summary.failureSamples.includes(msg)) {
950
1090
  summary.failureSamples.push(msg);
951
1091
  }
1092
+ onEntryDone?.({ entryName: entry.name, outcome: "failed" });
1093
+ return entry;
952
1094
  }
953
- }
1095
+ },
1096
+ // Default concurrency of 4 works well for cloud LLM APIs. Set
1097
+ // `llm.concurrency: 1` in config.json for local model servers.
1098
+ getDefaultLlmConcurrency(llmConfig));
1099
+ // concurrentMap returns Array<T | undefined>; filter out undefined slots
1100
+ // (which can only occur if the callback itself returned undefined, which
1101
+ // it never does above — but TypeScript needs the filter for type safety).
1102
+ const enhanced = results.map((r, i) => r ?? stash.entries[i]);
954
1103
  return { entries: enhanced };
955
1104
  }
956
1105
  /**
@@ -1129,23 +1278,26 @@ export function recomputeUtilityScores(db) {
1129
1278
  }
1130
1279
  // Batch-load existing utility scores
1131
1280
  const existingScores = new Map();
1132
- const scoreRows = db.prepare("SELECT entry_id, utility FROM utility_scores").all();
1281
+ const scoreRows = db.prepare("SELECT entry_id, utility, last_used_at FROM utility_scores").all();
1133
1282
  for (const row of scoreRows) {
1134
- existingScores.set(row.entry_id, row.utility);
1283
+ existingScores.set(row.entry_id, { utility: row.utility, lastUsedAt: row.last_used_at ?? undefined });
1135
1284
  }
1285
+ const now = new Date().toISOString();
1136
1286
  for (const row of usageRows) {
1137
1287
  const selectRate = row.search_count > 0 ? Math.min(1, row.show_count / row.search_count) : 0;
1138
1288
  const feedbackTotal = row.positive_feedback_count + row.negative_feedback_count;
1139
1289
  const feedbackRate = feedbackTotal > 0 ? Math.max(0, row.positive_feedback_count - row.negative_feedback_count) / feedbackTotal : 0;
1140
1290
  const effectiveRate = Math.max(selectRate, feedbackRate);
1141
- const prevUtility = existingScores.get(row.entry_id) ?? 0;
1291
+ const existing = existingScores.get(row.entry_id);
1292
+ const prevUtility = existing?.utility ?? 0;
1142
1293
  const utility = prevUtility * emaDecay + effectiveRate * emaNew;
1294
+ const lastUsedAt = effectiveRate > 0.5 ? now : (existing?.lastUsedAt ?? undefined);
1143
1295
  upsertUtilityScore(db, row.entry_id, {
1144
1296
  utility,
1145
1297
  showCount: row.show_count,
1146
1298
  searchCount: row.search_count,
1147
1299
  selectRate,
1148
- lastUsedAt: row.last_used_at ?? undefined,
1300
+ lastUsedAt,
1149
1301
  });
1150
1302
  }
1151
1303
  setMeta(db, "last_utility_computed_at", new Date().toISOString());