akm-cli 0.7.4 → 0.8.0-rc.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/{CHANGELOG.md → .github/CHANGELOG.md} +34 -1
  2. package/.github/LICENSE +374 -0
  3. package/dist/cli/parse-args.js +86 -0
  4. package/dist/cli.js +1223 -650
  5. package/dist/commands/agent-dispatch.js +107 -0
  6. package/dist/commands/agent-support.js +62 -0
  7. package/dist/commands/config-cli.js +68 -84
  8. package/dist/commands/consolidate.js +812 -0
  9. package/dist/commands/curate.js +1 -0
  10. package/dist/commands/distill-promotion-policy.js +658 -0
  11. package/dist/commands/distill.js +224 -39
  12. package/dist/commands/eval-cases.js +40 -0
  13. package/dist/commands/events.js +12 -24
  14. package/dist/commands/graph.js +222 -0
  15. package/dist/commands/health.js +376 -0
  16. package/dist/commands/help/help-accept.md +9 -0
  17. package/dist/commands/help/help-improve.md +53 -0
  18. package/dist/commands/help/help-proposals.md +15 -0
  19. package/dist/commands/help/help-propose.md +17 -0
  20. package/dist/commands/help/help-reject.md +8 -0
  21. package/dist/commands/history.js +3 -30
  22. package/dist/commands/improve.js +1161 -0
  23. package/dist/commands/info.js +2 -2
  24. package/dist/commands/init.js +2 -2
  25. package/dist/commands/install-audit.js +5 -1
  26. package/dist/commands/installed-stashes.js +118 -138
  27. package/dist/commands/knowledge.js +133 -0
  28. package/dist/commands/lint/agent-linter.js +46 -0
  29. package/dist/commands/lint/base-linter.js +291 -0
  30. package/dist/commands/lint/command-linter.js +46 -0
  31. package/dist/commands/lint/default-linter.js +13 -0
  32. package/dist/commands/lint/index.js +145 -0
  33. package/dist/commands/lint/knowledge-linter.js +13 -0
  34. package/dist/commands/lint/memory-linter.js +58 -0
  35. package/dist/commands/lint/registry.js +33 -0
  36. package/dist/commands/lint/skill-linter.js +42 -0
  37. package/dist/commands/lint/task-linter.js +47 -0
  38. package/dist/commands/lint/types.js +1 -0
  39. package/dist/commands/lint/vault-key-rules.js +67 -0
  40. package/dist/commands/lint/workflow-linter.js +53 -0
  41. package/dist/commands/lint.js +1 -0
  42. package/dist/commands/migration-help.js +2 -2
  43. package/dist/commands/proposal.js +8 -7
  44. package/dist/commands/propose.js +106 -43
  45. package/dist/commands/reflect.js +167 -41
  46. package/dist/commands/registry-search.js +2 -2
  47. package/dist/commands/remember.js +55 -1
  48. package/dist/commands/schema-repair.js +130 -0
  49. package/dist/commands/search.js +21 -5
  50. package/dist/commands/show.js +135 -55
  51. package/dist/commands/source-add.js +10 -10
  52. package/dist/commands/source-manage.js +11 -19
  53. package/dist/commands/tasks.js +385 -0
  54. package/dist/commands/url-checker.js +39 -0
  55. package/dist/commands/vault.js +173 -87
  56. package/dist/core/action-contributors.js +25 -0
  57. package/dist/core/asset-ref.js +4 -0
  58. package/dist/core/asset-registry.js +5 -17
  59. package/dist/core/asset-spec.js +11 -1
  60. package/dist/core/common.js +100 -0
  61. package/dist/core/concurrent.js +22 -0
  62. package/dist/core/config.js +240 -127
  63. package/dist/core/events.js +87 -123
  64. package/dist/core/frontmatter.js +0 -6
  65. package/dist/core/markdown.js +17 -0
  66. package/dist/core/memory-improve.js +678 -0
  67. package/dist/core/parse.js +155 -0
  68. package/dist/core/paths.js +101 -3
  69. package/dist/core/proposal-validators.js +61 -0
  70. package/dist/core/proposals.js +49 -38
  71. package/dist/core/state-db.js +731 -0
  72. package/dist/core/time.js +51 -0
  73. package/dist/core/warn.js +59 -1
  74. package/dist/indexer/db-search.js +86 -472
  75. package/dist/indexer/db.js +418 -59
  76. package/dist/indexer/ensure-index.js +133 -0
  77. package/dist/indexer/graph-boost.js +247 -94
  78. package/dist/indexer/graph-db.js +201 -0
  79. package/dist/indexer/graph-dedup.js +99 -0
  80. package/dist/indexer/graph-extraction.js +417 -74
  81. package/dist/indexer/index-context.js +10 -0
  82. package/dist/indexer/indexer.js +480 -298
  83. package/dist/indexer/llm-cache.js +47 -0
  84. package/dist/indexer/matchers.js +124 -160
  85. package/dist/indexer/memory-inference.js +63 -29
  86. package/dist/indexer/metadata-contributors.js +26 -0
  87. package/dist/indexer/metadata.js +196 -197
  88. package/dist/indexer/path-resolver.js +89 -0
  89. package/dist/indexer/ranking-contributors.js +204 -0
  90. package/dist/indexer/ranking.js +74 -0
  91. package/dist/indexer/search-hit-enrichers.js +22 -0
  92. package/dist/indexer/search-source.js +24 -9
  93. package/dist/indexer/semantic-status.js +2 -16
  94. package/dist/indexer/walker.js +25 -0
  95. package/dist/integrations/agent/builders.js +109 -0
  96. package/dist/integrations/agent/config.js +203 -3
  97. package/dist/integrations/agent/index.js +5 -2
  98. package/dist/integrations/agent/model-aliases.js +63 -0
  99. package/dist/integrations/agent/profiles.js +67 -5
  100. package/dist/integrations/agent/prompts.js +114 -29
  101. package/dist/integrations/agent/sdk-runner.js +120 -0
  102. package/dist/integrations/agent/spawn.js +158 -34
  103. package/dist/integrations/lockfile.js +10 -18
  104. package/dist/integrations/session-logs/index.js +65 -0
  105. package/dist/integrations/session-logs/providers/claude-code.js +56 -0
  106. package/dist/integrations/session-logs/providers/opencode.js +52 -0
  107. package/dist/integrations/session-logs/types.js +1 -0
  108. package/dist/llm/call-ai.js +74 -0
  109. package/dist/llm/client.js +63 -86
  110. package/dist/llm/feature-gate.js +27 -16
  111. package/dist/llm/graph-extract.js +297 -64
  112. package/dist/llm/memory-infer.js +52 -71
  113. package/dist/llm/metadata-enhance.js +39 -22
  114. package/dist/llm/prompts/graph-extract-user-prompt.md +12 -0
  115. package/dist/output/cli-hints-full.md +277 -0
  116. package/dist/output/cli-hints-short.md +65 -0
  117. package/dist/output/cli-hints.js +2 -309
  118. package/dist/output/renderers.js +226 -257
  119. package/dist/output/shapes.js +109 -96
  120. package/dist/output/text.js +274 -36
  121. package/dist/registry/providers/skills-sh.js +61 -49
  122. package/dist/registry/providers/static-index.js +44 -48
  123. package/dist/registry/resolve.js +8 -16
  124. package/dist/setup/setup.js +510 -11
  125. package/dist/sources/provider-factory.js +2 -1
  126. package/dist/sources/providers/filesystem.js +16 -23
  127. package/dist/sources/providers/git.js +45 -4
  128. package/dist/sources/providers/website.js +15 -22
  129. package/dist/sources/website-ingest.js +4 -0
  130. package/dist/tasks/backends/cron.js +200 -0
  131. package/dist/tasks/backends/exec-utils.js +25 -0
  132. package/dist/tasks/backends/index.js +32 -0
  133. package/dist/tasks/backends/launchd-template.xml +19 -0
  134. package/dist/tasks/backends/launchd.js +184 -0
  135. package/dist/tasks/backends/schtasks-template.xml +29 -0
  136. package/dist/tasks/backends/schtasks.js +212 -0
  137. package/dist/tasks/parser.js +198 -0
  138. package/dist/tasks/resolveAkmBin.js +84 -0
  139. package/dist/tasks/runner.js +432 -0
  140. package/dist/tasks/schedule.js +208 -0
  141. package/dist/tasks/schema.js +13 -0
  142. package/dist/tasks/validator.js +59 -0
  143. package/dist/wiki/index-template.md +12 -0
  144. package/dist/wiki/ingest-workflow-template.md +54 -0
  145. package/dist/wiki/log-template.md +8 -0
  146. package/dist/wiki/schema-template.md +61 -0
  147. package/dist/wiki/wiki-templates.js +12 -0
  148. package/dist/wiki/wiki.js +10 -61
  149. package/dist/workflows/authoring.js +5 -25
  150. package/dist/workflows/db.js +9 -0
  151. package/dist/workflows/renderer.js +8 -3
  152. package/dist/workflows/runs.js +73 -88
  153. package/dist/workflows/scope-key.js +76 -0
  154. package/dist/workflows/validator.js +1 -1
  155. package/dist/workflows/workflow-template.md +24 -0
  156. package/docs/README.md +5 -2
  157. package/docs/migration/release-notes/0.7.0.md +1 -1
  158. package/docs/migration/release-notes/0.7.4.md +1 -1
  159. package/docs/migration/release-notes/0.7.5.md +20 -0
  160. package/docs/migration/release-notes/0.8.0.md +43 -0
  161. package/package.json +4 -3
  162. package/dist/templates/wiki-templates.js +0 -100
@@ -1,14 +1,14 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
+ import { SCRIPT_EXTENSIONS } from "../core/asset-spec";
3
4
  import { isHttpUrl, resolveStashDir, toErrorMessage } from "../core/common";
5
+ import { concurrentMap } from "../core/concurrent";
4
6
  import { getDbPath } from "../core/paths";
5
7
  import { isVerbose, warn, warnVerbose } from "../core/warn";
6
8
  import { resolveIndexPassLLM } from "../llm/index-passes";
7
9
  import { takeWorkflowDocument } from "../workflows/document-cache";
8
- import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, warnIfVecMissing, } from "./db";
9
- import { runGraphExtractionPass } from "./graph-extraction";
10
- import { runMemoryInferencePass } from "./memory-inference";
11
- import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
10
+ import { clearStaleCacheEntries, closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getAllEntriesForEmbedding, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, relinkUsageEvents, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, upsertWorkflowDocument, warnIfVecMissing, } from "./db";
11
+ import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isEnrichmentComplete, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
12
12
  import { buildSearchText } from "./search-fields";
13
13
  import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
14
14
  import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
@@ -18,19 +18,196 @@ function throwIfAborted(signal) {
18
18
  throw signal.reason instanceof Error ? signal.reason : new Error("index interrupted");
19
19
  }
20
20
  }
21
+ function getDefaultLlmConcurrency(llmConfig) {
22
+ if (typeof llmConfig?.concurrency === "number")
23
+ return llmConfig.concurrency;
24
+ if (!llmConfig?.endpoint)
25
+ return 1;
26
+ try {
27
+ const url = new URL(llmConfig.endpoint);
28
+ const host = url.hostname.toLowerCase();
29
+ if (host === "localhost" || host === "127.0.0.1" || host === "::1" || host.endsWith(".localhost"))
30
+ return 1;
31
+ }
32
+ catch {
33
+ return 1;
34
+ }
35
+ return 4;
36
+ }
37
+ // ── Phase functions ──────────────────────────────────────────────────────────
38
+ /**
39
+ * Source cache phase: ensure git stash caches are up to date and purge orphaned
40
+ * entries from removed sources (incremental only).
41
+ */
42
+ async function runSourceCachePhase(ctx) {
43
+ const { db, config, sourceDirs, isIncremental, full } = ctx;
44
+ if (isIncremental && !full) {
45
+ // Purge entries from stash dirs that have been removed since the last run
46
+ // (e.g. after `akm remove`) so orphaned entries don't linger.
47
+ const prevStashDirsJson = getMeta(db, "stashDirs");
48
+ if (prevStashDirsJson) {
49
+ let prevStashDirs = [];
50
+ try {
51
+ const parsed = JSON.parse(prevStashDirsJson);
52
+ if (Array.isArray(parsed)) {
53
+ prevStashDirs = parsed.filter((d) => typeof d === "string");
54
+ }
55
+ else {
56
+ warn("index_meta stashDirs value is not an array — treating as empty");
57
+ }
58
+ }
59
+ catch {
60
+ warn("index_meta stashDirs value is corrupt JSON — treating as empty");
61
+ }
62
+ const currentSet = new Set(sourceDirs);
63
+ for (const dir of prevStashDirs) {
64
+ if (!currentSet.has(dir)) {
65
+ ctx.hadRemovedSources = true;
66
+ deleteEntriesByStashDir(db, dir);
67
+ deleteIndexDirStatesByStashDir(db, dir);
68
+ }
69
+ }
70
+ }
71
+ }
72
+ // Source caches are hydrated before akmIndex() calls this phase; nothing
73
+ // further to do here. The flag is exposed on ctx for runWalkPhase().
74
+ void config;
75
+ }
76
+ /**
77
+ * Walk phase: scan the filesystem, generate metadata, and persist entries to
78
+ * the database. Also kicks off LLM enrichment for directories that need it.
79
+ *
80
+ * Writes `ctx.scannedDirs`, `ctx.skippedDirs`, `ctx.generatedCount`,
81
+ * `ctx.walkWarnings`, and `ctx.dirsNeedingLlm` for downstream phases.
82
+ */
83
+ async function runWalkPhase(ctx) {
84
+ const { db, sources, isIncremental, builtAtMs, hadRemovedSources, full, reEnrich, signal, onProgress, config } = ctx;
85
+ throwIfAborted(signal);
86
+ ctx.timing.tWalkStart = Date.now();
87
+ const doFullDelete = full || !isIncremental;
88
+ const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, sources, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
89
+ ctx.scannedDirs = scannedDirs;
90
+ ctx.skippedDirs = skippedDirs;
91
+ ctx.generatedCount = generatedCount;
92
+ ctx.walkWarnings = warnings;
93
+ ctx.dirsNeedingLlm = dirsNeedingLlm;
94
+ onProgress({
95
+ phase: "scan",
96
+ message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
97
+ });
98
+ // Workflow validation noise gate (issue #273): suppress per-spec stderr lines
99
+ // at default verbosity and emit a single summary instead.
100
+ // In verbose mode the per-spec lines are already printed by
101
+ // buildMetadataSkipWarning at generation time — no second pass needed here.
102
+ if (!isVerbose()) {
103
+ const workflowSkipWarnings = warnings.filter(isWorkflowSkipWarning);
104
+ const skippedWorkflowCount = workflowSkipWarnings.length;
105
+ if (skippedWorkflowCount > 0) {
106
+ const noun = skippedWorkflowCount === 1 ? "workflow spec" : "workflow specs";
107
+ warn(`${skippedWorkflowCount} ${noun} skipped due to validation errors; ` +
108
+ "rerun with --verbose (or AKM_VERBOSE=1) to see details.");
109
+ }
110
+ }
111
+ ctx.timing.tWalkEnd = Date.now();
112
+ throwIfAborted(signal);
113
+ // LLM enrichment for directories that need it
114
+ await enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, true, reEnrich);
115
+ onProgress({
116
+ phase: "llm",
117
+ message: resolveIndexPassLLM("enrichment", config)
118
+ ? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
119
+ : "LLM enhancement disabled.",
120
+ });
121
+ ctx.timing.tLlmEnd = Date.now();
122
+ }
123
+ /**
124
+ * Embedding phase: generate and store vector embeddings for all unembedded
125
+ * entries. Writes `ctx.embeddingResult` for the finalize phase.
126
+ */
127
+ async function runEmbeddingPhase(ctx) {
128
+ const { db, config, signal, onProgress } = ctx;
129
+ throwIfAborted(signal);
130
+ ctx.embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
131
+ ctx.timing.tEmbedEnd = Date.now();
132
+ }
133
+ /**
134
+ * Finalize phase: rebuild FTS, re-link usage events, recompute utility scores,
135
+ * regenerate wiki indexes, update index metadata, and emit the verify event.
136
+ */
137
+ async function runFinalizePhase(ctx) {
138
+ const { db, config, sources, sourceDirs, isIncremental, stashDir, signal, onProgress } = ctx;
139
+ // Rebuild FTS after all inserts. Use incremental mode when this whole
140
+ // index run is incremental — only entries touched by `upsertEntry`
141
+ // since the last rebuild are re-indexed.
142
+ rebuildFts(db, { incremental: isIncremental });
143
+ onProgress({
144
+ phase: "fts",
145
+ message: isIncremental ? "Rebuilt full-text search index (dirty rows only)." : "Rebuilt full-text search index.",
146
+ });
147
+ ctx.timing.tFtsEnd = Date.now();
148
+ // Re-link detached usage_events and recompute utility scores.
149
+ relinkUsageEvents(db);
150
+ recomputeUtilityScores(db);
151
+ // Purge LLM cache entries for assets that no longer exist in the index.
152
+ try {
153
+ clearStaleCacheEntries(db);
154
+ }
155
+ catch {
156
+ /* ignore */
157
+ }
158
+ // Regenerate each wiki's index.md from its pages' frontmatter. Best-effort.
159
+ try {
160
+ const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
161
+ regenerateAllWikiIndexes(stashDir);
162
+ }
163
+ catch {
164
+ /* best-effort */
165
+ }
166
+ throwIfAborted(signal);
167
+ // Update index metadata
168
+ const embeddingResult = ctx.embeddingResult ?? { success: false };
169
+ setMeta(db, "builtAt", new Date().toISOString());
170
+ setMeta(db, "stashDir", stashDir);
171
+ setMeta(db, "stashDirs", JSON.stringify(sourceDirs));
172
+ setMeta(db, "hasEmbeddings", embeddingResult.success ? "1" : "0");
173
+ warnIfVecMissing(db);
174
+ const totalEntries = getEntryCount(db);
175
+ const verification = verifyIndexState(db, config, totalEntries, embeddingResult);
176
+ if (config.semanticSearchMode === "off") {
177
+ clearSemanticStatus();
178
+ }
179
+ else {
180
+ writeSemanticStatus({
181
+ status: verification.semanticStatus === "disabled" ? "pending" : verification.semanticStatus,
182
+ ...(embeddingResult.reason ? { reason: embeddingResult.reason } : {}),
183
+ ...(embeddingResult.message ? { message: embeddingResult.message } : {}),
184
+ providerFingerprint: deriveSemanticProviderFingerprint(config.embedding),
185
+ lastCheckedAt: new Date().toISOString(),
186
+ entryCount: verification.entryCount,
187
+ embeddingCount: verification.embeddingCount,
188
+ });
189
+ }
190
+ onProgress({ phase: "verify", message: verification.message });
191
+ // Store verification result and totalEntries on ctx for the caller to use
192
+ ctx._verification = verification;
193
+ ctx._totalEntries = totalEntries;
194
+ // suppress unused warning — sources was previously used inline
195
+ void sources;
196
+ }
21
197
  // ── Indexer ──────────────────────────────────────────────────────────────────
22
198
  export async function akmIndex(options) {
23
199
  const stashDir = options?.stashDir || resolveStashDir();
24
200
  const onProgress = options?.onProgress ?? (() => { });
25
201
  const signal = options?.signal;
26
- const enrich = options?.enrich === true;
202
+ const reEnrich = options?.reEnrich === true;
203
+ const full = options?.full === true;
27
204
  // Load config and resolve all stash sources
28
205
  const { loadConfig } = await import("../core/config.js");
29
206
  const config = loadConfig();
30
207
  // Ensure git stash caches are extracted before resolving stash dirs,
31
208
  // so their content directories exist on disk for the walker to discover.
32
209
  const { ensureSourceCaches, resolveSourceEntries } = await import("./search-source.js");
33
- await ensureSourceCaches(config, { force: options?.full === true });
210
+ await ensureSourceCaches(config, { force: full });
34
211
  const allSourceEntries = resolveSourceEntries(stashDir, config);
35
212
  const allSourceDirs = allSourceEntries.map((s) => s.path);
36
213
  const t0 = Date.now();
@@ -39,11 +216,41 @@ export async function akmIndex(options) {
39
216
  const embeddingDim = config.embedding?.dimension;
40
217
  const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
41
218
  try {
42
- // Check if we should do incremental
219
+ // Determine incremental vs full mode
43
220
  const prevStashDir = getMeta(db, "stashDir");
44
221
  const prevBuiltAt = getMeta(db, "builtAt");
45
- const isIncremental = !options?.full && prevStashDir === stashDir && !!prevBuiltAt;
222
+ const isIncremental = !full && prevStashDir === stashDir && !!prevBuiltAt;
46
223
  const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
224
+ // Assemble the run context
225
+ const ctx = {
226
+ db,
227
+ config,
228
+ sources: allSourceEntries,
229
+ sourceDirs: allSourceDirs,
230
+ full,
231
+ reEnrich,
232
+ stashDir,
233
+ onProgress,
234
+ signal,
235
+ timing: {
236
+ t0,
237
+ tWalkStart: t0,
238
+ tWalkEnd: t0,
239
+ tLlmEnd: t0,
240
+ tFtsEnd: t0,
241
+ tEmbedEnd: t0,
242
+ },
243
+ isIncremental,
244
+ builtAtMs,
245
+ hadRemovedSources: false,
246
+ scannedDirs: 0,
247
+ skippedDirs: 0,
248
+ generatedCount: 0,
249
+ walkWarnings: [],
250
+ dirsNeedingLlm: [],
251
+ embeddingResult: null,
252
+ graphExtractionResult: null,
253
+ };
47
254
  onProgress({
48
255
  phase: "summary",
49
256
  message: buildIndexSummaryMessage({
@@ -51,218 +258,34 @@ export async function akmIndex(options) {
51
258
  sourcesCount: allSourceDirs.length,
52
259
  semanticSearchMode: config.semanticSearchMode,
53
260
  embeddingProvider: getEmbeddingProvider(config.embedding),
54
- llmEnabled: enrich && !!resolveIndexPassLLM("enrichment", config),
261
+ llmEnabled: !!resolveIndexPassLLM("enrichment", config),
55
262
  vecAvailable: isVecAvailable(db),
56
263
  }),
57
264
  });
58
- let hadRemovedSources = false;
59
- if (options?.full || !isIncremental) {
60
- // The delete is now merged into the insert transaction inside
61
- // indexEntries() so that a reader never sees an empty database between
62
- // the wipe and the re-inserts. The doFullDelete flag signals this path.
63
- }
64
- else {
65
- // Incremental: purge entries from stash dirs that have been removed
66
- // (e.g. after `akm remove`) so orphaned entries don't linger.
67
- const prevStashDirsJson = getMeta(db, "stashDirs");
68
- if (prevStashDirsJson) {
69
- let prevStashDirs = [];
70
- try {
71
- const parsed = JSON.parse(prevStashDirsJson);
72
- if (Array.isArray(parsed)) {
73
- prevStashDirs = parsed.filter((d) => typeof d === "string");
74
- }
75
- else {
76
- warn("index_meta stashDirs value is not an array — treating as empty");
77
- }
78
- }
79
- catch {
80
- warn("index_meta stashDirs value is corrupt JSON — treating as empty");
81
- }
82
- const currentSet = new Set(allSourceDirs);
83
- for (const dir of prevStashDirs) {
84
- if (!currentSet.has(dir)) {
85
- hadRemovedSources = true;
86
- deleteEntriesByStashDir(db, dir);
87
- deleteIndexDirStatesByStashDir(db, dir);
88
- }
89
- }
90
- }
91
- }
92
- throwIfAborted(signal);
93
- // Memory inference pass (#201). Runs before the walk so any derived-memory
94
- // children that get written are picked up by the walker in this same run
95
- // and don't have to wait for the next `akm index`. Gated entirely by
96
- // `resolveIndexPassLLM("memory", config)` — when the user has no
97
- // `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
98
- // and existing inferred children are left in place.
99
- if (enrich) {
100
- try {
101
- const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
102
- if (inferenceResult.writtenFacts > 0) {
103
- onProgress({
104
- phase: "llm",
105
- message: `Memory inference wrote ${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
106
- });
107
- }
108
- }
109
- catch (err) {
110
- warn(`Memory inference pass aborted: ${err instanceof Error ? err.message : String(err)}`);
111
- }
112
- }
113
- else {
114
- onProgress({
115
- phase: "llm",
116
- message: "LLM passes disabled; rerun with --enrich to enable inference and enrichment.",
117
- });
118
- }
119
- // Graph extraction pass (#207). Runs after memory inference so any
120
- // atomic-fact children that just got written are visible to the graph
121
- // walk. Persists `<stashRoot>/.akm/graph.json` — an indexer artifact,
122
- // NOT a user-visible asset, so it is not routed through
123
- // writeAssetToSource. The artifact feeds the existing FTS5+boosts
124
- // pipeline as a single boost component (see graph-boost.ts); there is
125
- // no parallel scoring track. Disabled when either gate (the locked
126
- // `llm.features.graph_extraction` feature flag or the per-pass
127
- // `index.graph.llm` toggle) is off; the existing graph file is
128
- // preserved on disk in that case.
129
- if (enrich) {
130
- try {
131
- const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
132
- if (graphResult.written) {
133
- onProgress({
134
- phase: "llm",
135
- message: `Graph extraction wrote ${graphResult.totalEntities} entit${graphResult.totalEntities === 1 ? "y" : "ies"} and ${graphResult.totalRelations} relation${graphResult.totalRelations === 1 ? "" : "s"} from ${graphResult.extracted} file${graphResult.extracted === 1 ? "" : "s"}.`,
136
- });
137
- }
138
- }
139
- catch (err) {
140
- warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
141
- }
142
- }
143
- throwIfAborted(signal);
144
- const tWalkStart = Date.now();
145
- // Walk stash dirs and index entries.
146
- // doFullDelete=true merges the wipe into the same transaction as the
147
- // inserts so readers never see an empty database mid-rebuild.
148
- const doFullDelete = options?.full || !isIncremental;
149
- const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
150
- onProgress({
151
- phase: "scan",
152
- message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
153
- });
154
- // Workflow validation noise gate (issue #273): per-spec stderr lines from
155
- // `buildMetadataSkipWarning` are suppressed at default verbosity in
156
- // `metadata.ts`. Replace them with a single summary line so operators
157
- // running a cold-start search against a fresh registry-cloned source
158
- // don't get the impression akm is broken. Verbose mode keeps the
159
- // per-spec output instead of (not in addition to) the summary.
160
- if (!isVerbose()) {
161
- const skippedWorkflowCount = warnings.filter(isWorkflowSkipWarning).length;
162
- if (skippedWorkflowCount > 0) {
163
- const noun = skippedWorkflowCount === 1 ? "workflow spec" : "workflow specs";
164
- warn(`${skippedWorkflowCount} ${noun} skipped due to validation errors; ` +
165
- "rerun with --verbose (or AKM_VERBOSE=1) to see details.");
166
- }
167
- }
168
- const tWalkEnd = Date.now();
169
- throwIfAborted(signal);
170
- // Enhance entries with LLM if configured
171
- await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich);
172
- onProgress({
173
- phase: "llm",
174
- message: enrich && resolveIndexPassLLM("enrichment", config)
175
- ? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
176
- : "LLM enhancement disabled.",
177
- });
178
- const tLlmEnd = Date.now();
179
- throwIfAborted(signal);
180
- // Rebuild FTS after all inserts. Use incremental mode when this whole
181
- // index run is incremental — only entries touched by `upsertEntry`
182
- // since the last rebuild are re-indexed, instead of re-scanning every
183
- // row on every `akm index` invocation.
184
- rebuildFts(db, { incremental: isIncremental });
185
- onProgress({
186
- phase: "fts",
187
- message: isIncremental ? "Rebuilt full-text search index (dirty rows only)." : "Rebuilt full-text search index.",
188
- });
189
- const tFtsEnd = Date.now();
190
- // Re-link detached usage_events to their new entry_ids via entry_ref.
191
- // entry_ref is "type:name" (e.g., "skill:code-review"), entry_key is "stashDir:type:name".
192
- // Use substr to extract the "type:name" suffix from entry_key for exact comparison
193
- // (avoids LIKE which would require escaping % and _ in user-facing names).
194
- try {
195
- db.exec(`
196
- UPDATE usage_events SET entry_id = (
197
- SELECT e.id FROM entries e
198
- WHERE substr(e.entry_key, length(e.entry_key) - length(usage_events.entry_ref)) = ':' || usage_events.entry_ref
199
- LIMIT 1
200
- )
201
- WHERE entry_id IS NULL AND entry_ref IS NOT NULL
202
- `);
203
- }
204
- catch {
205
- /* ignore if table doesn't exist yet */
206
- }
207
- // Recompute utility scores from usage_events after FTS rebuild
208
- recomputeUtilityScores(db);
209
- // Regenerate each wiki's index.md from its pages' frontmatter. Best-effort
210
- // — errors are caught inside regenerateAllWikiIndexes and never block the
211
- // index run. The primary stash is the only target: additional sources
212
- // are read-only caches, and regenerating their indexes would mutate
213
- // cache content.
214
- try {
215
- const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
216
- regenerateAllWikiIndexes(stashDir);
217
- }
218
- catch {
219
- /* best-effort */
220
- }
221
- throwIfAborted(signal);
222
- // Generate embeddings if semantic search is enabled
223
- const embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
224
- const tEmbedEnd = Date.now();
225
- // Update metadata
226
- setMeta(db, "builtAt", new Date().toISOString());
227
- setMeta(db, "stashDir", stashDir);
228
- setMeta(db, "stashDirs", JSON.stringify(allSourceDirs));
229
- setMeta(db, "hasEmbeddings", embeddingResult.success ? "1" : "0");
230
- const totalEntries = getEntryCount(db);
231
- // Warn on every index run if using JS fallback with many entries
232
- warnIfVecMissing(db);
233
- const tEnd = Date.now();
234
- const verification = verifyIndexState(db, config, totalEntries, embeddingResult);
235
- if (config.semanticSearchMode === "off") {
236
- clearSemanticStatus();
237
- }
238
- else {
239
- writeSemanticStatus({
240
- status: verification.semanticStatus === "disabled" ? "pending" : verification.semanticStatus,
241
- ...(embeddingResult.reason ? { reason: embeddingResult.reason } : {}),
242
- ...(embeddingResult.message ? { message: embeddingResult.message } : {}),
243
- providerFingerprint: deriveSemanticProviderFingerprint(config.embedding),
244
- lastCheckedAt: new Date().toISOString(),
245
- entryCount: verification.entryCount,
246
- embeddingCount: verification.embeddingCount,
247
- });
248
- }
249
- onProgress({ phase: "verify", message: verification.message });
265
+ // ── Phase sequence ───────────────────────────────────────────────────────
266
+ await runSourceCachePhase(ctx);
267
+ await runWalkPhase(ctx);
268
+ await runEmbeddingPhase(ctx);
269
+ await runFinalizePhase(ctx);
270
+ // ────────────────────────────────────────────────────────────────────────
271
+ const { _verification: verification, _totalEntries: totalEntries } = ctx;
272
+ const { timing } = ctx;
250
273
  return {
251
274
  stashDir,
252
275
  totalEntries,
253
- generatedMetadata: generatedCount,
276
+ generatedMetadata: ctx.generatedCount,
254
277
  indexPath: dbPath,
255
278
  mode: isIncremental ? "incremental" : "full",
256
- directoriesScanned: scannedDirs,
257
- directoriesSkipped: skippedDirs,
258
- ...(warnings.length > 0 ? { warnings } : {}),
279
+ directoriesScanned: ctx.scannedDirs,
280
+ directoriesSkipped: ctx.skippedDirs,
281
+ ...(ctx.walkWarnings.length > 0 ? { warnings: ctx.walkWarnings } : {}),
259
282
  verification,
260
283
  timing: {
261
- totalMs: tEnd - t0,
262
- walkMs: tWalkEnd - tWalkStart,
263
- llmMs: tLlmEnd - tWalkEnd,
264
- embedMs: tEmbedEnd - tFtsEnd,
265
- ftsMs: tFtsEnd - tLlmEnd,
284
+ totalMs: Date.now() - timing.t0,
285
+ walkMs: timing.tWalkEnd - timing.tWalkStart,
286
+ llmMs: timing.tLlmEnd - timing.tWalkEnd,
287
+ embedMs: timing.tEmbedEnd - timing.tFtsEnd,
288
+ ftsMs: timing.tFtsEnd - timing.tLlmEnd,
266
289
  },
267
290
  };
268
291
  }
@@ -500,8 +523,10 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
500
523
  if (stash) {
501
524
  for (const entry of stash.entries) {
502
525
  const entryPath = entry.filename ? path.join(dirPath, entry.filename) : null;
503
- if (!entryPath)
504
- continue; // skip unresolvable entries
526
+ if (!entryPath) {
527
+ warn(`Skipping entry with no resolvable path in ${dirPath}`);
528
+ continue;
529
+ }
505
530
  if (!shouldIndexStashFile(currentStashDir, entryPath))
506
531
  continue;
507
532
  // Skip if a higher-priority stash root already indexed this asset
@@ -523,7 +548,9 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
523
548
  }
524
549
  }
525
550
  }
526
- // Collect dirs needing LLM enhancement during the first walk
551
+ // Collect dirs needing LLM enhancement during the first walk.
552
+ // Only dirs with "generated" entries need enrichment (unless reEnrich
553
+ // forces re-processing of already-enriched entries).
527
554
  if (stash.entries.some((e) => e.quality === "generated")) {
528
555
  dirsNeedingLlm.push({ dirPath, files, currentStashDir, stash });
529
556
  }
@@ -541,7 +568,20 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
541
568
  reason: persistedReason,
542
569
  });
543
570
  if (persistedRows === 0) {
544
- warnVerbose(`[index] zero-row ${dirPath}: ${persistedReason}`);
571
+ // Warn only when the dir had files that *could* produce entries (.md or
572
+ // known script extensions). Dirs with only non-indexable types (.json,
573
+ // .yaml, .conf, .env, .gitkeep) or deduped-only rows are expected and
574
+ // not actionable at normal log level.
575
+ const hasIndexableExtension = files.some((f) => {
576
+ const ext = path.extname(f).toLowerCase();
577
+ return ext === ".md" || SCRIPT_EXTENSIONS.has(ext);
578
+ });
579
+ if (persistedReason !== "deduped-zero-row" && hasIndexableExtension) {
580
+ warn(`[index] zero-row ${dirPath}: ${persistedReason}`);
581
+ }
582
+ else {
583
+ warnVerbose(`[index] zero-row ${dirPath}: ${persistedReason}`);
584
+ }
545
585
  }
546
586
  }
547
587
  });
@@ -640,9 +680,7 @@ function inferZeroRowReason(stash, priorReason, warnings, dirPath, dedupedRows)
640
680
  return "empty-generated-set";
641
681
  return `zero-row:${priorReason?.kind ?? "unknown"}`;
642
682
  }
643
- async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich = false) {
644
- if (!enrich)
645
- return;
683
+ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, _enrich = false, reEnrich = false) {
646
684
  // Resolve per-pass LLM config via the unified shim. Returns undefined when
647
685
  // either no `akm.llm` is configured or the user opted this pass out via
648
686
  // `index.enrichment.llm = false`. (#208)
@@ -653,24 +691,142 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich = f
653
691
  // as a single visible warning instead of silently degrading every entry
654
692
  // and leaving the user wondering why nothing got enhanced.
655
693
  const summary = { attempted: 0, succeeded: 0, failureSamples: [] };
656
- for (const { dirPath, files, currentStashDir, stash: originalStash } of dirsNeedingLlm) {
657
- throwIfAborted(signal);
658
- // Only enhance generated entries; user-provided overrides should not be overwritten
659
- const generatedEntries = originalStash.entries.filter((e) => e.quality === "generated");
660
- if (generatedEntries.length === 0)
661
- continue;
662
- const generatedStash = { entries: generatedEntries };
663
- const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary, signal);
664
- // Re-upsert the enhanced entries in a single transaction so a crash
665
- // cannot leave half the entries updated and the rest stale.
666
- db.transaction(() => {
667
- for (const entry of enhanced.entries) {
668
- const entryPath = entry.filename ? path.join(dirPath, entry.filename) : files[0] || dirPath;
669
- const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
670
- const searchText = buildSearchText(entry);
671
- upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, attachFileSize(entry, entryPath), searchText);
672
- }
673
- })();
694
+ let completedDirs = 0;
695
+ let completedEntries = 0;
696
+ const totalDirs = dirsNeedingLlm.length;
697
+ const totalEntries = dirsNeedingLlm.reduce((sum, { stash }) => {
698
+ const entriesToEnhance = stash.entries.filter((e) => {
699
+ if (e.quality !== "generated" && !(reEnrich && e.quality === "enriched"))
700
+ return false;
701
+ if (!reEnrich && isEnrichmentComplete(e))
702
+ return false;
703
+ return true;
704
+ });
705
+ return sum + entriesToEnhance.length;
706
+ }, 0);
707
+ // P3 wall-clock budget for the enrichment pass. Defaults to llm.timeoutMs
708
+ // (or 10 minutes if not set). Users can extend this via llm.timeoutMs in
709
+ // config no separate knob needed.
710
+ const budgetMs = (llmConfig.timeoutMs ?? 10 * 60 * 1000) * Math.max(totalEntries, 1);
711
+ const enrichDeadline = AbortSignal.timeout(budgetMs);
712
+ let deadlineHit = false;
713
+ const enrichSignal = (() => {
714
+ if (!signal)
715
+ return enrichDeadline;
716
+ // Combine: abort when either fires.
717
+ const controller = new AbortController();
718
+ const onAbort = () => controller.abort();
719
+ signal.addEventListener("abort", onAbort, { once: true });
720
+ enrichDeadline.addEventListener("abort", () => {
721
+ deadlineHit = true;
722
+ controller.abort();
723
+ }, { once: true });
724
+ return controller.signal;
725
+ })();
726
+ if (totalEntries > 0) {
727
+ onProgress?.({
728
+ phase: "llm",
729
+ message: `LLM enhancement starting for ${totalEntries} entr${totalEntries === 1 ? "y" : "ies"} ` +
730
+ `across ${totalDirs} director${totalDirs === 1 ? "y" : "ies"} (concurrency ${getDefaultLlmConcurrency(llmConfig)}).`,
731
+ processed: 0,
732
+ total: totalEntries,
733
+ });
734
+ }
735
+ let currentDirLabel;
736
+ let lastProgressAt = Date.now();
737
+ let heartbeatTimer;
738
+ if (totalEntries > 0 && onProgress) {
739
+ heartbeatTimer = setInterval(() => {
740
+ if (Date.now() - lastProgressAt < 15000)
741
+ return;
742
+ onProgress({
743
+ phase: "llm",
744
+ message: `Still enriching ${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}` +
745
+ (currentDirLabel ? `; waiting on ${currentDirLabel}` : "") +
746
+ ".",
747
+ processed: completedEntries,
748
+ total: totalEntries,
749
+ });
750
+ lastProgressAt = Date.now();
751
+ }, 15000);
752
+ }
753
+ try {
754
+ await concurrentMap(dirsNeedingLlm, async ({ dirPath, files, currentStashDir, stash: originalStash }) => {
755
+ if (enrichSignal.aborted)
756
+ return undefined;
757
+ // Only enhance generated entries (or all when reEnrich=true);
758
+ // user-provided overrides should not be overwritten.
759
+ // Skip entries that are already fully enriched (description + tags + searchHints)
760
+ // unless the caller explicitly requests re-enrichment via reEnrich=true.
761
+ const entriesToEnhance = originalStash.entries.filter((e) => {
762
+ if (e.quality !== "generated" && !(reEnrich && e.quality === "enriched"))
763
+ return false;
764
+ if (!reEnrich && isEnrichmentComplete(e)) {
765
+ warnVerbose(`[akm] skipping LLM enrichment for "${e.name}" — entry already complete`);
766
+ return false;
767
+ }
768
+ return true;
769
+ });
770
+ if (entriesToEnhance.length === 0)
771
+ return undefined;
772
+ currentDirLabel = path.relative(currentStashDir, dirPath) || ".";
773
+ onProgress?.({
774
+ phase: "llm",
775
+ message: `Enhancing ${currentDirLabel} ` +
776
+ `(${entriesToEnhance.length} entr${entriesToEnhance.length === 1 ? "y" : "ies"}).`,
777
+ processed: completedEntries,
778
+ total: totalEntries,
779
+ });
780
+ lastProgressAt = Date.now();
781
+ const targetStash = { entries: entriesToEnhance };
782
+ const entryKeys = entriesToEnhance.map((e) => `${currentStashDir}:${e.type}:${e.name}`);
783
+ const enhanced = await enhanceStashWithLlm(llmConfig, targetStash, files, summary, enrichSignal, db, entryKeys, reEnrich, config, (event) => {
784
+ completedEntries++;
785
+ lastProgressAt = Date.now();
786
+ onProgress?.({
787
+ phase: "llm",
788
+ message: `Enhanced ${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}; ` +
789
+ `${completedDirs}/${totalDirs} director${totalDirs === 1 ? "y" : "ies"} complete` +
790
+ (event.entryName ? `; current ${event.entryName}` : "") +
791
+ (currentDirLabel ? ` in ${currentDirLabel}` : "") +
792
+ (event.outcome === "cache-hit" ? " (cache hit)" : ""),
793
+ processed: completedEntries,
794
+ total: totalEntries,
795
+ });
796
+ });
797
+ // Re-upsert the enhanced entries in a single transaction so a crash
798
+ // cannot leave half the entries updated and the rest stale.
799
+ db.transaction(() => {
800
+ for (const entry of enhanced.entries) {
801
+ const entryPath = entry.filename ? path.join(dirPath, entry.filename) : files[0] || dirPath;
802
+ const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
803
+ const searchText = buildSearchText(entry);
804
+ upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, attachFileSize(entry, entryPath), searchText);
805
+ }
806
+ })();
807
+ completedDirs++;
808
+ lastProgressAt = Date.now();
809
+ onProgress?.({
810
+ phase: "llm",
811
+ message: `Completed ${completedDirs}/${totalDirs} director${totalDirs === 1 ? "y" : "ies"}; ` +
812
+ `${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"} processed.`,
813
+ processed: completedEntries,
814
+ total: totalEntries,
815
+ });
816
+ return undefined;
817
+ },
818
+ // Default concurrency of 4 works well for cloud LLM APIs. Local model
819
+ // servers (LM Studio, Ollama) run one inference at a time — set
820
+ // `llm.concurrency: 1` in config.json to avoid "Model reloaded" / 500
821
+ // errors from concurrent request overload.
822
+ getDefaultLlmConcurrency(llmConfig));
823
+ }
824
+ finally {
825
+ if (heartbeatTimer)
826
+ clearInterval(heartbeatTimer);
827
+ }
828
+ if (deadlineHit) {
829
+ warn("[akm] LLM enrichment budget exceeded. Re-run `akm index` to continue. Increase llm.timeoutMs for a larger budget.");
674
830
  }
675
831
  if (summary.attempted > 0 && summary.succeeded === 0) {
676
832
  const sample = summary.failureSamples.length ? ` Example: ${summary.failureSamples[0]}` : "";
@@ -769,14 +925,6 @@ async function generateEmbeddingsForDb(db, config, onProgress, signal) {
769
925
  }
770
926
  }
771
927
  // ── Helpers ─────────────────────────────────────────────────────────────────
772
- function getAllEntriesForEmbedding(db) {
773
- return db
774
- .prepare(`
775
- SELECT e.id, e.search_text AS searchText, e.entry_key AS entryKey, e.file_path AS filePath FROM entries e
776
- WHERE NOT EXISTS (SELECT 1 FROM embeddings b WHERE b.id = e.id)
777
- `)
778
- .all();
779
- }
780
928
  function attachFileSize(entry, entryPath) {
781
929
  try {
782
930
  return { ...entry, fileSize: fs.statSync(entryPath).size };
@@ -785,28 +933,6 @@ function attachFileSize(entry, entryPath) {
785
933
  return entry;
786
934
  }
787
935
  }
788
- function upsertWorkflowDocument(db, entryId, doc, content) {
789
- const sourceHash = computeSourceHash(content);
790
- db.prepare(`INSERT INTO workflow_documents (entry_id, schema_version, document_json, source_path, source_hash, updated_at)
791
- VALUES (?, ?, ?, ?, ?, ?)
792
- ON CONFLICT(entry_id) DO UPDATE SET
793
- schema_version = excluded.schema_version,
794
- document_json = excluded.document_json,
795
- source_path = excluded.source_path,
796
- source_hash = excluded.source_hash,
797
- updated_at = excluded.updated_at`).run(entryId, doc.schemaVersion, JSON.stringify(doc), doc.source.path, sourceHash, new Date().toISOString());
798
- }
799
- function computeSourceHash(content) {
800
- // Cheap, stable identity for the source markdown — used by future
801
- // incremental fast-paths that skip re-validation when content is unchanged.
802
- // Not security-sensitive; FNV-1a over the bytes is sufficient.
803
- let hash = 0x811c9dc5;
804
- for (let i = 0; i < content.length; i++) {
805
- hash ^= content[i];
806
- hash = Math.imul(hash, 0x01000193);
807
- }
808
- return (hash >>> 0).toString(16);
809
- }
810
936
  function buildIndexSummaryMessage(options) {
811
937
  const stashSourceLabel = options.sourcesCount === 1 ? "stash source" : "stash sources";
812
938
  const semanticDetail = getSemanticSearchLabel(options.semanticSearchMode, options.embeddingProvider, options.vecAvailable);
@@ -899,11 +1025,12 @@ function resolveIndexedFiles(dirPath, files, stash) {
899
1025
  }
900
1026
  return resolved.size > 0 ? [...resolved] : files;
901
1027
  }
902
- async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
1028
+ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal, db, entryKeys, reEnrich, akmConfig, onEntryDone) {
903
1029
  const { enhanceMetadata } = await import("../llm/metadata-enhance");
904
- const enhanced = [];
905
- for (const entry of stash.entries) {
906
- throwIfAborted(signal);
1030
+ const { computeBodyHash, getLlmCacheEntry, upsertLlmCacheEntry } = await import("./db.js");
1031
+ const results = await concurrentMap(stash.entries, async (entry, idx) => {
1032
+ if (signal?.aborted)
1033
+ return entry;
907
1034
  summary.attempted++;
908
1035
  try {
909
1036
  const entryFile = entry.filename
@@ -915,10 +1042,38 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
915
1042
  fileContent = fs.readFileSync(entryFile, "utf8");
916
1043
  }
917
1044
  catch {
918
- /* ignore unreadable files */
1045
+ warn(`Could not read file for LLM enrichment: ${entry.filename ?? entry.name}`);
1046
+ }
1047
+ }
1048
+ // Incremental cache: skip LLM call when file body is unchanged and
1049
+ // --re-enrich was not requested. The cache key is the entry_key
1050
+ // (stashDir:type:name) which is stable across index runs.
1051
+ const cacheBody = fileContent ?? `${entry.name}\n${entry.description ?? ""}`;
1052
+ const bodyHash = computeBodyHash(cacheBody);
1053
+ const cacheKey = entryKeys?.[idx] ?? `${entry.type}:${entry.name}`;
1054
+ if (db && !reEnrich) {
1055
+ const cached = getLlmCacheEntry(db, cacheKey, bodyHash);
1056
+ if (cached) {
1057
+ try {
1058
+ const parsed = JSON.parse(cached.resultJson);
1059
+ const updated = { ...entry };
1060
+ if (parsed.description)
1061
+ updated.description = parsed.description;
1062
+ if (parsed.searchHints?.length)
1063
+ updated.searchHints = parsed.searchHints;
1064
+ if (parsed.tags?.length)
1065
+ updated.tags = parsed.tags;
1066
+ updated.quality = "enriched";
1067
+ summary.succeeded++;
1068
+ onEntryDone?.({ entryName: entry.name, outcome: "cache-hit" });
1069
+ return updated;
1070
+ }
1071
+ catch {
1072
+ warn(`LLM enrichment cache entry corrupt for ${entry.name}; re-running enrichment`);
1073
+ }
919
1074
  }
920
1075
  }
921
- const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal);
1076
+ const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal, akmConfig);
922
1077
  const updated = { ...entry };
923
1078
  if (improvements.description)
924
1079
  updated.description = improvements.description;
@@ -926,19 +1081,39 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
926
1081
  updated.searchHints = improvements.searchHints;
927
1082
  if (improvements.tags?.length)
928
1083
  updated.tags = improvements.tags;
929
- enhanced.push(updated);
1084
+ // Mark as enriched so subsequent index runs skip re-enrichment (P2)
1085
+ updated.quality = "enriched";
1086
+ // Persist to cache so the next run can skip the LLM call when the
1087
+ // file body has not changed.
1088
+ if (db) {
1089
+ upsertLlmCacheEntry(db, cacheKey, bodyHash, JSON.stringify({
1090
+ description: improvements.description,
1091
+ searchHints: improvements.searchHints,
1092
+ tags: improvements.tags,
1093
+ }));
1094
+ }
930
1095
  summary.succeeded++;
1096
+ onEntryDone?.({ entryName: entry.name, outcome: "llm" });
1097
+ return updated;
931
1098
  }
932
1099
  catch (err) {
933
- enhanced.push(entry);
934
1100
  const msg = toErrorMessage(err);
935
1101
  // failureSamples is bounded to 3 items, so a linear scan is cheaper
936
1102
  // than maintaining a parallel Set for membership checks (#177 review).
937
1103
  if (summary.failureSamples.length < 3 && !summary.failureSamples.includes(msg)) {
938
1104
  summary.failureSamples.push(msg);
939
1105
  }
1106
+ onEntryDone?.({ entryName: entry.name, outcome: "failed" });
1107
+ return entry;
940
1108
  }
941
- }
1109
+ },
1110
+ // Default concurrency of 4 works well for cloud LLM APIs. Set
1111
+ // `llm.concurrency: 1` in config.json for local model servers.
1112
+ getDefaultLlmConcurrency(llmConfig));
1113
+ // concurrentMap returns Array<T | undefined>; filter out undefined slots
1114
+ // (which can only occur if the callback itself returned undefined, which
1115
+ // it never does above — but TypeScript needs the filter for type safety).
1116
+ const enhanced = results.map((r, i) => r ?? stash.entries[i]);
942
1117
  return { entries: enhanced };
943
1118
  }
944
1119
  /**
@@ -1018,13 +1193,13 @@ export async function lookup(ref) {
1018
1193
  const dbPath = getDbPath();
1019
1194
  const db = openExistingDatabase(dbPath);
1020
1195
  try {
1021
- // entry_key shape: `${stashDir}:${type}:${name}`. Suffix-match on
1022
- // `:type:name` so we can scope by source dir as a prefix when origin is
1023
- // supplied. Use parameterised queries throughout — names may include
1024
- // user-supplied glob characters.
1025
1196
  const escapeLike = (value) => value.replace(/\\/g, "\\\\").replace(/%/g, "\\%").replace(/_/g, "\\_");
1026
- const suffix = `:${ref.type}:${ref.name}`;
1027
- const escapedSuffix = escapeLike(suffix);
1197
+ // Canonical names strip .md for markdown assets, but users often pass
1198
+ // refs with .md (e.g. command:release.md). Normalize by trying both.
1199
+ const nameVariants = [ref.name];
1200
+ if (ref.name.endsWith(".md")) {
1201
+ nameVariants.push(ref.name.slice(0, -3));
1202
+ }
1028
1203
  const candidateDirs = (() => {
1029
1204
  if (!ref.origin)
1030
1205
  return sources.map((s) => s.path);
@@ -1035,20 +1210,24 @@ export async function lookup(ref) {
1035
1210
  })();
1036
1211
  if (candidateDirs.length === 0)
1037
1212
  return null;
1038
- for (const dir of candidateDirs) {
1039
- const escapedDir = escapeLike(dir);
1040
- const row = db
1041
- .prepare("SELECT entry_key AS entryKey, file_path AS filePath, stash_dir AS stashDir, entry_type AS type FROM entries " +
1042
- "WHERE entry_key LIKE ? ESCAPE '\\' AND entry_type = ? LIMIT 1")
1043
- .get(`${escapedDir}${escapedSuffix}`, ref.type);
1044
- if (row) {
1045
- return {
1046
- entryKey: row.entryKey,
1047
- filePath: row.filePath,
1048
- stashDir: row.stashDir,
1049
- type: row.type,
1050
- name: ref.name,
1051
- };
1213
+ for (const name of nameVariants) {
1214
+ const suffix = `:${ref.type}:${name}`;
1215
+ const escapedSuffix = escapeLike(suffix);
1216
+ for (const dir of candidateDirs) {
1217
+ const escapedDir = escapeLike(dir);
1218
+ const row = db
1219
+ .prepare("SELECT entry_key AS entryKey, file_path AS filePath, stash_dir AS stashDir, entry_type AS type FROM entries " +
1220
+ "WHERE entry_key LIKE ? ESCAPE '\\' AND entry_type = ? LIMIT 1")
1221
+ .get(`${escapedDir}${escapedSuffix}`, ref.type);
1222
+ if (row) {
1223
+ return {
1224
+ entryKey: row.entryKey,
1225
+ filePath: row.filePath,
1226
+ stashDir: row.stashDir,
1227
+ type: row.type,
1228
+ name: ref.name,
1229
+ };
1230
+ }
1052
1231
  }
1053
1232
  }
1054
1233
  return null;
@@ -1113,23 +1292,26 @@ export function recomputeUtilityScores(db) {
1113
1292
  }
1114
1293
  // Batch-load existing utility scores
1115
1294
  const existingScores = new Map();
1116
- const scoreRows = db.prepare("SELECT entry_id, utility FROM utility_scores").all();
1295
+ const scoreRows = db.prepare("SELECT entry_id, utility, last_used_at FROM utility_scores").all();
1117
1296
  for (const row of scoreRows) {
1118
- existingScores.set(row.entry_id, row.utility);
1297
+ existingScores.set(row.entry_id, { utility: row.utility, lastUsedAt: row.last_used_at ?? undefined });
1119
1298
  }
1299
+ const now = new Date().toISOString();
1120
1300
  for (const row of usageRows) {
1121
1301
  const selectRate = row.search_count > 0 ? Math.min(1, row.show_count / row.search_count) : 0;
1122
1302
  const feedbackTotal = row.positive_feedback_count + row.negative_feedback_count;
1123
1303
  const feedbackRate = feedbackTotal > 0 ? Math.max(0, row.positive_feedback_count - row.negative_feedback_count) / feedbackTotal : 0;
1124
1304
  const effectiveRate = Math.max(selectRate, feedbackRate);
1125
- const prevUtility = existingScores.get(row.entry_id) ?? 0;
1305
+ const existing = existingScores.get(row.entry_id);
1306
+ const prevUtility = existing?.utility ?? 0;
1126
1307
  const utility = prevUtility * emaDecay + effectiveRate * emaNew;
1308
+ const lastUsedAt = effectiveRate > 0.5 ? now : (existing?.lastUsedAt ?? undefined);
1127
1309
  upsertUtilityScore(db, row.entry_id, {
1128
1310
  utility,
1129
1311
  showCount: row.show_count,
1130
1312
  searchCount: row.search_count,
1131
1313
  selectRate,
1132
- lastUsedAt: row.last_used_at ?? undefined,
1314
+ lastUsedAt,
1133
1315
  });
1134
1316
  }
1135
1317
  setMeta(db, "last_utility_computed_at", new Date().toISOString());