akm-cli 0.7.5 → 0.8.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/{.github/CHANGELOG.md → CHANGELOG.md} +113 -2
  2. package/README.md +20 -4
  3. package/SECURITY.md +93 -0
  4. package/dist/cli/config-migrate.js +144 -0
  5. package/dist/cli/config-validate.js +39 -0
  6. package/dist/cli/confirm.js +73 -0
  7. package/dist/cli/parse-args.js +133 -0
  8. package/dist/cli.js +1995 -551
  9. package/dist/commands/agent-dispatch.js +110 -0
  10. package/dist/commands/agent-support.js +68 -0
  11. package/dist/commands/completions.js +3 -0
  12. package/dist/commands/config-cli.js +130 -534
  13. package/dist/commands/consolidate.js +1531 -0
  14. package/dist/commands/curate.js +44 -3
  15. package/dist/commands/db-cli.js +23 -0
  16. package/dist/commands/distill-promotion-policy.js +660 -0
  17. package/dist/commands/distill.js +990 -75
  18. package/dist/commands/eval-cases.js +43 -0
  19. package/dist/commands/events.js +5 -23
  20. package/dist/commands/graph.js +477 -0
  21. package/dist/commands/health.js +400 -0
  22. package/dist/commands/help/help-accept.md +9 -0
  23. package/dist/commands/help/help-improve.md +77 -0
  24. package/dist/commands/help/help-proposals.md +15 -0
  25. package/dist/commands/help/help-propose.md +17 -0
  26. package/dist/commands/help/help-reject.md +8 -0
  27. package/dist/commands/history.js +54 -46
  28. package/dist/commands/improve-profiles.js +146 -0
  29. package/dist/commands/improve-result-file.js +103 -0
  30. package/dist/commands/improve.js +2175 -0
  31. package/dist/commands/info.js +5 -2
  32. package/dist/commands/init.js +50 -2
  33. package/dist/commands/installed-stashes.js +102 -139
  34. package/dist/commands/knowledge.js +136 -0
  35. package/dist/commands/lint/agent-linter.js +49 -0
  36. package/dist/commands/lint/base-linter.js +479 -0
  37. package/dist/commands/lint/command-linter.js +49 -0
  38. package/dist/commands/lint/default-linter.js +16 -0
  39. package/dist/commands/lint/index.js +183 -0
  40. package/dist/commands/lint/knowledge-linter.js +16 -0
  41. package/dist/commands/lint/markdown-insertion.js +343 -0
  42. package/dist/commands/lint/memory-linter.js +61 -0
  43. package/dist/commands/lint/registry.js +36 -0
  44. package/dist/commands/lint/skill-linter.js +45 -0
  45. package/dist/commands/lint/task-linter.js +50 -0
  46. package/dist/commands/lint/types.js +4 -0
  47. package/dist/commands/lint/vault-key-rules.js +139 -0
  48. package/dist/commands/lint/workflow-linter.js +56 -0
  49. package/dist/commands/lint.js +4 -0
  50. package/dist/commands/migration-help.js +5 -2
  51. package/dist/commands/proposal.js +66 -12
  52. package/dist/commands/propose.js +86 -31
  53. package/dist/commands/reflect.js +1119 -73
  54. package/dist/commands/registry-search.js +5 -2
  55. package/dist/commands/remember.js +69 -6
  56. package/dist/commands/schema-repair.js +203 -0
  57. package/dist/commands/search.js +115 -14
  58. package/dist/commands/self-update.js +3 -0
  59. package/dist/commands/show.js +144 -25
  60. package/dist/commands/source-add.js +17 -45
  61. package/dist/commands/source-clone.js +3 -0
  62. package/dist/commands/source-manage.js +14 -19
  63. package/dist/commands/tasks.js +438 -0
  64. package/dist/commands/url-checker.js +42 -0
  65. package/dist/commands/vault.js +130 -77
  66. package/dist/core/action-contributors.js +28 -0
  67. package/dist/core/asset-ref.js +7 -0
  68. package/dist/core/asset-registry.js +7 -16
  69. package/dist/core/asset-serialize.js +88 -0
  70. package/dist/core/asset-spec.js +22 -0
  71. package/dist/core/common.js +157 -0
  72. package/dist/core/concurrent.js +25 -0
  73. package/dist/core/config-io.js +347 -0
  74. package/dist/core/config-migration.js +625 -0
  75. package/dist/core/config-schema.js +501 -0
  76. package/dist/core/config-sources.js +108 -0
  77. package/dist/core/config-types.js +4 -0
  78. package/dist/core/config-walker.js +337 -0
  79. package/dist/core/config.js +327 -987
  80. package/dist/core/errors.js +40 -19
  81. package/dist/core/events.js +91 -138
  82. package/dist/core/file-lock.js +104 -0
  83. package/dist/core/frontmatter.js +3 -6
  84. package/dist/core/lesson-lint.js +3 -0
  85. package/dist/core/markdown.js +20 -0
  86. package/dist/core/memory-belief.js +62 -0
  87. package/dist/core/memory-contradiction-detect.js +274 -0
  88. package/dist/core/memory-improve.js +806 -0
  89. package/dist/core/parse.js +158 -0
  90. package/dist/core/paths.js +326 -14
  91. package/dist/core/proposal-quality-validators.js +364 -0
  92. package/dist/core/proposal-validators.js +69 -0
  93. package/dist/core/proposals.js +498 -42
  94. package/dist/core/state-db.js +927 -0
  95. package/dist/core/text-truncation.js +107 -0
  96. package/dist/core/time.js +54 -0
  97. package/dist/core/warn.js +62 -1
  98. package/dist/core/write-source.js +3 -0
  99. package/dist/indexer/db-backup.js +391 -0
  100. package/dist/indexer/db-search.js +152 -253
  101. package/dist/indexer/db.js +933 -103
  102. package/dist/indexer/ensure-index.js +64 -0
  103. package/dist/indexer/file-context.js +3 -0
  104. package/dist/indexer/graph-boost.js +376 -101
  105. package/dist/indexer/graph-db.js +391 -0
  106. package/dist/indexer/graph-dedup.js +95 -0
  107. package/dist/indexer/graph-extraction.js +550 -124
  108. package/dist/indexer/index-context.js +4 -0
  109. package/dist/indexer/indexer.js +506 -291
  110. package/dist/indexer/llm-cache.js +47 -0
  111. package/dist/indexer/manifest.js +3 -0
  112. package/dist/indexer/matchers.js +148 -160
  113. package/dist/indexer/memory-inference.js +99 -74
  114. package/dist/indexer/metadata-contributors.js +29 -0
  115. package/dist/indexer/metadata.js +255 -196
  116. package/dist/indexer/path-resolver.js +92 -0
  117. package/dist/indexer/project-context.js +192 -0
  118. package/dist/indexer/ranking-contributors.js +331 -0
  119. package/dist/indexer/ranking.js +81 -0
  120. package/dist/indexer/search-fields.js +5 -9
  121. package/dist/indexer/search-hit-enrichers.js +111 -0
  122. package/dist/indexer/search-source.js +44 -10
  123. package/dist/indexer/semantic-status.js +5 -16
  124. package/dist/indexer/staleness-detect.js +447 -0
  125. package/dist/indexer/usage-events.js +12 -9
  126. package/dist/indexer/walker.js +28 -0
  127. package/dist/integrations/agent/builders.js +135 -0
  128. package/dist/integrations/agent/config.js +122 -230
  129. package/dist/integrations/agent/detect.js +3 -0
  130. package/dist/integrations/agent/index.js +7 -13
  131. package/dist/integrations/agent/model-aliases.js +55 -0
  132. package/dist/integrations/agent/profiles.js +70 -5
  133. package/dist/integrations/agent/prompts.js +150 -74
  134. package/dist/integrations/agent/runner.js +151 -0
  135. package/dist/integrations/agent/sdk-runner.js +126 -0
  136. package/dist/integrations/agent/spawn.js +118 -23
  137. package/dist/integrations/github.js +3 -0
  138. package/dist/integrations/lockfile.js +32 -69
  139. package/dist/integrations/session-logs/index.js +68 -0
  140. package/dist/integrations/session-logs/providers/claude-code.js +59 -0
  141. package/dist/integrations/session-logs/providers/opencode.js +55 -0
  142. package/dist/integrations/session-logs/types.js +4 -0
  143. package/dist/llm/call-ai.js +62 -0
  144. package/dist/llm/client.js +72 -124
  145. package/dist/llm/embedder.js +3 -19
  146. package/dist/llm/embedders/cache.js +3 -7
  147. package/dist/llm/embedders/local.js +3 -0
  148. package/dist/llm/embedders/remote.js +20 -8
  149. package/dist/llm/embedders/types.js +3 -7
  150. package/dist/llm/feature-gate.js +89 -48
  151. package/dist/llm/graph-extract.js +676 -70
  152. package/dist/llm/index-passes.js +9 -23
  153. package/dist/llm/memory-infer.js +52 -71
  154. package/dist/llm/metadata-enhance.js +42 -29
  155. package/dist/llm/prompts/graph-extract-user-prompt.md +35 -0
  156. package/dist/output/cli-hints-full.md +281 -0
  157. package/dist/output/cli-hints-short.md +65 -0
  158. package/dist/output/cli-hints.js +5 -318
  159. package/dist/output/context.js +3 -0
  160. package/dist/output/renderers.js +223 -256
  161. package/dist/output/shapes.js +150 -105
  162. package/dist/output/text.js +318 -30
  163. package/dist/registry/build-index.js +3 -0
  164. package/dist/registry/create-provider-registry.js +3 -0
  165. package/dist/registry/factory.js +3 -0
  166. package/dist/registry/origin-resolve.js +3 -0
  167. package/dist/registry/providers/index.js +3 -0
  168. package/dist/registry/providers/skills-sh.js +70 -49
  169. package/dist/registry/providers/static-index.js +53 -48
  170. package/dist/registry/providers/types.js +3 -24
  171. package/dist/registry/resolve.js +11 -16
  172. package/dist/registry/types.js +3 -0
  173. package/dist/scripts/migrate-storage.js +17307 -0
  174. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +8900 -0
  175. package/dist/scripts/migrations/v16-to-v17.js +141 -0
  176. package/dist/setup/detect.js +3 -0
  177. package/dist/setup/ripgrep-install.js +3 -0
  178. package/dist/setup/ripgrep-resolve.js +3 -0
  179. package/dist/setup/setup.js +775 -37
  180. package/dist/setup/steps.js +3 -15
  181. package/dist/sources/include.js +3 -0
  182. package/dist/sources/provider-factory.js +5 -12
  183. package/dist/sources/provider.js +3 -20
  184. package/dist/sources/providers/filesystem.js +19 -23
  185. package/dist/sources/providers/git.js +7 -5
  186. package/dist/sources/providers/index.js +3 -0
  187. package/dist/sources/providers/install-types.js +3 -13
  188. package/dist/sources/providers/npm.js +3 -4
  189. package/dist/sources/providers/provider-utils.js +3 -0
  190. package/dist/sources/providers/sync-from-ref.js +3 -11
  191. package/dist/sources/providers/tar-utils.js +3 -0
  192. package/dist/sources/providers/website.js +18 -22
  193. package/dist/sources/resolve.js +3 -0
  194. package/dist/sources/types.js +3 -0
  195. package/dist/sources/website-ingest.js +7 -0
  196. package/dist/tasks/backends/cron.js +203 -0
  197. package/dist/tasks/backends/exec-utils.js +28 -0
  198. package/dist/tasks/backends/index.js +24 -0
  199. package/dist/tasks/backends/launchd-template.xml +19 -0
  200. package/dist/tasks/backends/launchd.js +187 -0
  201. package/dist/tasks/backends/schtasks-template.xml +29 -0
  202. package/dist/tasks/backends/schtasks.js +215 -0
  203. package/dist/tasks/parser.js +211 -0
  204. package/dist/tasks/resolveAkmBin.js +87 -0
  205. package/dist/tasks/runner.js +458 -0
  206. package/dist/tasks/schedule.js +211 -0
  207. package/dist/tasks/schema.js +15 -0
  208. package/dist/tasks/validator.js +62 -0
  209. package/dist/version.js +3 -0
  210. package/dist/wiki/index-template.md +12 -0
  211. package/dist/wiki/ingest-workflow-template.md +54 -0
  212. package/dist/wiki/log-template.md +8 -0
  213. package/dist/wiki/schema-template.md +61 -0
  214. package/dist/wiki/wiki-templates.js +15 -0
  215. package/dist/wiki/wiki.js +13 -61
  216. package/dist/workflows/authoring.js +8 -25
  217. package/dist/workflows/cli.js +3 -0
  218. package/dist/workflows/db.js +140 -10
  219. package/dist/workflows/document-cache.js +3 -10
  220. package/dist/workflows/parser.js +3 -0
  221. package/dist/workflows/renderer.js +11 -3
  222. package/dist/workflows/runs.js +62 -91
  223. package/dist/workflows/schema.js +3 -0
  224. package/dist/workflows/scope-key.js +3 -0
  225. package/dist/workflows/validator.js +4 -8
  226. package/dist/workflows/workflow-template.md +24 -0
  227. package/docs/README.md +9 -2
  228. package/docs/data-and-telemetry.md +225 -0
  229. package/docs/migration/release-notes/0.7.0.md +1 -1
  230. package/docs/migration/release-notes/0.7.5.md +2 -2
  231. package/docs/migration/release-notes/0.8.0.md +48 -0
  232. package/docs/migration/v0.7-to-v0.8.md +1307 -0
  233. package/package.json +20 -8
  234. package/.github/LICENSE +0 -374
  235. package/dist/commands/install-audit.js +0 -381
  236. package/dist/templates/wiki-templates.js +0 -100
@@ -1,14 +1,18 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
1
4
  import fs from "node:fs";
2
5
  import path from "node:path";
6
+ import { SCRIPT_EXTENSIONS } from "../core/asset-spec";
3
7
  import { isHttpUrl, resolveStashDir, toErrorMessage } from "../core/common";
8
+ import { concurrentMap } from "../core/concurrent";
4
9
  import { getDbPath } from "../core/paths";
5
10
  import { isVerbose, warn, warnVerbose } from "../core/warn";
6
11
  import { resolveIndexPassLLM } from "../llm/index-passes";
7
12
  import { takeWorkflowDocument } from "../workflows/document-cache";
8
- import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, warnIfVecMissing, } from "./db";
9
- import { runGraphExtractionPass } from "./graph-extraction";
10
- import { runMemoryInferencePass } from "./memory-inference";
11
- import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
13
+ import { clearStaleCacheEntries, closeDatabase, deleteEntriesByDir, deleteEntriesByIds, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getAllEntriesForEmbedding, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, relinkUsageEvents, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, upsertWorkflowDocument, warnIfVecMissing, } from "./db";
14
+ import { deleteStoredGraph } from "./graph-db";
15
+ import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isEnrichmentComplete, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
12
16
  import { buildSearchText } from "./search-fields";
13
17
  import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
14
18
  import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
@@ -18,19 +22,222 @@ function throwIfAborted(signal) {
18
22
  throw signal.reason instanceof Error ? signal.reason : new Error("index interrupted");
19
23
  }
20
24
  }
25
+ function getDefaultLlmConcurrency(llmConfig) {
26
+ if (typeof llmConfig?.concurrency === "number")
27
+ return llmConfig.concurrency;
28
+ if (!llmConfig?.endpoint)
29
+ return 1;
30
+ try {
31
+ const url = new URL(llmConfig.endpoint);
32
+ const host = url.hostname.toLowerCase();
33
+ if (host === "localhost" || host === "127.0.0.1" || host === "::1" || host.endsWith(".localhost"))
34
+ return 1;
35
+ }
36
+ catch {
37
+ return 1;
38
+ }
39
+ return 4;
40
+ }
41
+ // ── Phase functions ──────────────────────────────────────────────────────────
42
+ /**
43
+ * Source cache phase: ensure git stash caches are up to date and purge orphaned
44
+ * entries from removed sources (incremental only).
45
+ */
46
+ async function runSourceCachePhase(ctx) {
47
+ const { db, config, sourceDirs, isIncremental, full } = ctx;
48
+ if (isIncremental && !full) {
49
+ // Purge entries from stash dirs that have been removed since the last run
50
+ // (e.g. after `akm remove`) so orphaned entries don't linger.
51
+ const prevStashDirsJson = getMeta(db, "stashDirs");
52
+ if (prevStashDirsJson) {
53
+ let prevStashDirs = [];
54
+ try {
55
+ const parsed = JSON.parse(prevStashDirsJson);
56
+ if (Array.isArray(parsed)) {
57
+ prevStashDirs = parsed.filter((d) => typeof d === "string");
58
+ }
59
+ else {
60
+ warn("index_meta stashDirs value is not an array — treating as empty");
61
+ }
62
+ }
63
+ catch {
64
+ warn("index_meta stashDirs value is corrupt JSON — treating as empty");
65
+ }
66
+ const currentSet = new Set(sourceDirs);
67
+ for (const dir of prevStashDirs) {
68
+ if (!currentSet.has(dir)) {
69
+ ctx.hadRemovedSources = true;
70
+ deleteEntriesByStashDir(db, dir);
71
+ deleteIndexDirStatesByStashDir(db, dir);
72
+ deleteStoredGraph(db, dir);
73
+ }
74
+ }
75
+ }
76
+ }
77
+ // Source caches are hydrated before akmIndex() calls this phase; nothing
78
+ // further to do here. The flag is exposed on ctx for runWalkPhase().
79
+ void config;
80
+ }
81
+ /**
82
+ * Walk phase: scan the filesystem, generate metadata, and persist entries to
83
+ * the database. Also kicks off LLM enrichment for directories that need it.
84
+ *
85
+ * Writes `ctx.scannedDirs`, `ctx.skippedDirs`, `ctx.generatedCount`,
86
+ * `ctx.walkWarnings`, and `ctx.dirsNeedingLlm` for downstream phases.
87
+ */
88
+ async function runWalkPhase(ctx) {
89
+ const { db, sources, isIncremental, builtAtMs, hadRemovedSources, full, reEnrich, signal, onProgress, config } = ctx;
90
+ throwIfAborted(signal);
91
+ ctx.timing.tWalkStart = Date.now();
92
+ const doFullDelete = full || !isIncremental;
93
+ const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, sources, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
94
+ ctx.scannedDirs = scannedDirs;
95
+ ctx.skippedDirs = skippedDirs;
96
+ ctx.generatedCount = generatedCount;
97
+ ctx.walkWarnings = warnings;
98
+ ctx.dirsNeedingLlm = dirsNeedingLlm;
99
+ onProgress({
100
+ phase: "scan",
101
+ message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
102
+ });
103
+ // Workflow validation noise gate (issue #273): suppress per-spec stderr lines
104
+ // at default verbosity and emit a single summary instead.
105
+ // In verbose mode the per-spec lines are already printed by
106
+ // buildMetadataSkipWarning at generation time — no second pass needed here.
107
+ if (!isVerbose()) {
108
+ const workflowSkipWarnings = warnings.filter(isWorkflowSkipWarning);
109
+ const skippedWorkflowCount = workflowSkipWarnings.length;
110
+ if (skippedWorkflowCount > 0) {
111
+ const noun = skippedWorkflowCount === 1 ? "workflow spec" : "workflow specs";
112
+ warn(`${skippedWorkflowCount} ${noun} skipped due to validation errors; ` +
113
+ "rerun with --verbose (or AKM_VERBOSE=1) to see details.");
114
+ }
115
+ }
116
+ ctx.timing.tWalkEnd = Date.now();
117
+ throwIfAborted(signal);
118
+ // LLM enrichment for directories that need it
119
+ await enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, true, reEnrich);
120
+ onProgress({
121
+ phase: "llm",
122
+ message: resolveIndexPassLLM("enrichment", config)
123
+ ? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
124
+ : "LLM enhancement disabled.",
125
+ });
126
+ ctx.timing.tLlmEnd = Date.now();
127
+ }
128
+ /**
129
+ * Embedding phase: generate and store vector embeddings for all unembedded
130
+ * entries. Writes `ctx.embeddingResult` for the finalize phase.
131
+ */
132
+ async function runEmbeddingPhase(ctx) {
133
+ const { db, config, signal, onProgress } = ctx;
134
+ throwIfAborted(signal);
135
+ ctx.embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
136
+ ctx.timing.tEmbedEnd = Date.now();
137
+ }
138
+ /**
139
+ * Finalize phase: rebuild FTS, re-link usage events, recompute utility scores,
140
+ * regenerate wiki indexes, update index metadata, and emit the verify event.
141
+ */
142
+ async function runFinalizePhase(ctx) {
143
+ const { db, config, sources, sourceDirs, isIncremental, stashDir, signal, onProgress } = ctx;
144
+ // Rebuild FTS after all inserts. Use incremental mode when this whole
145
+ // index run is incremental — only entries touched by `upsertEntry`
146
+ // since the last rebuild are re-indexed.
147
+ rebuildFts(db, { incremental: isIncremental });
148
+ onProgress({
149
+ phase: "fts",
150
+ message: isIncremental ? "Rebuilt full-text search index (dirty rows only)." : "Rebuilt full-text search index.",
151
+ });
152
+ ctx.timing.tFtsEnd = Date.now();
153
+ // Re-link detached usage_events and recompute utility scores.
154
+ relinkUsageEvents(db);
155
+ recomputeUtilityScores(db);
156
+ // Purge LLM cache entries for assets that no longer exist in the index.
157
+ try {
158
+ clearStaleCacheEntries(db);
159
+ }
160
+ catch {
161
+ /* ignore */
162
+ }
163
+ // Regenerate each wiki's index.md from its pages' frontmatter. Best-effort.
164
+ try {
165
+ const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
166
+ regenerateAllWikiIndexes(stashDir);
167
+ }
168
+ catch {
169
+ /* best-effort */
170
+ }
171
+ throwIfAborted(signal);
172
+ // Update index metadata
173
+ const embeddingResult = ctx.embeddingResult ?? { success: false };
174
+ setMeta(db, "builtAt", new Date().toISOString());
175
+ setMeta(db, "stashDir", stashDir);
176
+ setMeta(db, "stashDirs", JSON.stringify(sourceDirs));
177
+ setMeta(db, "hasEmbeddings", embeddingResult.success ? "1" : "0");
178
+ warnIfVecMissing(db);
179
+ const totalEntries = getEntryCount(db);
180
+ const verification = verifyIndexState(db, config, totalEntries, embeddingResult);
181
+ if (config.semanticSearchMode === "off") {
182
+ clearSemanticStatus();
183
+ }
184
+ else {
185
+ writeSemanticStatus({
186
+ status: verification.semanticStatus === "disabled" ? "pending" : verification.semanticStatus,
187
+ ...(embeddingResult.reason ? { reason: embeddingResult.reason } : {}),
188
+ ...(embeddingResult.message ? { message: embeddingResult.message } : {}),
189
+ providerFingerprint: deriveSemanticProviderFingerprint(config.embedding),
190
+ lastCheckedAt: new Date().toISOString(),
191
+ entryCount: verification.entryCount,
192
+ embeddingCount: verification.embeddingCount,
193
+ });
194
+ }
195
+ onProgress({ phase: "verify", message: verification.message });
196
+ // Store verification result and totalEntries on ctx for the caller to use
197
+ ctx._verification = verification;
198
+ ctx._totalEntries = totalEntries;
199
+ // suppress unused warning — sources was previously used inline
200
+ void sources;
201
+ }
202
+ // ── Clean pass ───────────────────────────────────────────────────────────────
203
+ /**
204
+ * Post-index clean pass: scan the `entries` table for rows whose source file
205
+ * no longer exists on disk and remove them (unless `dryRun` is true).
206
+ *
207
+ * Only rows with a non-empty `file_path` are checked — remote/virtual entries
208
+ * that have no local path are always skipped.
209
+ */
210
+ function runCleanPass(db, dryRun) {
211
+ const allEntries = db.prepare("SELECT id, entry_key AS ref, file_path AS path FROM entries").all();
212
+ // Only check entries that have a non-empty local path (skip remote/virtual).
213
+ const localEntries = allEntries.filter((e) => typeof e.path === "string" && e.path.trim() !== "");
214
+ const missing = localEntries.filter((e) => !fs.existsSync(e.path));
215
+ if (!dryRun && missing.length > 0) {
216
+ deleteEntriesByIds(db, missing.map((e) => e.id));
217
+ }
218
+ return {
219
+ checked: localEntries.length,
220
+ removed: dryRun ? 0 : missing.length,
221
+ removedRefs: missing.map((e) => e.ref),
222
+ dryRun,
223
+ };
224
+ }
21
225
  // ── Indexer ──────────────────────────────────────────────────────────────────
22
226
  export async function akmIndex(options) {
23
227
  const stashDir = options?.stashDir || resolveStashDir();
24
228
  const onProgress = options?.onProgress ?? (() => { });
25
229
  const signal = options?.signal;
26
- const enrich = options?.enrich === true;
230
+ const reEnrich = options?.reEnrich === true;
231
+ const full = options?.full === true;
232
+ const clean = options?.clean === true;
233
+ const dryRun = options?.dryRun === true;
27
234
  // Load config and resolve all stash sources
28
235
  const { loadConfig } = await import("../core/config.js");
29
236
  const config = loadConfig();
30
237
  // Ensure git stash caches are extracted before resolving stash dirs,
31
238
  // so their content directories exist on disk for the walker to discover.
32
239
  const { ensureSourceCaches, resolveSourceEntries } = await import("./search-source.js");
33
- await ensureSourceCaches(config, { force: options?.full === true });
240
+ await ensureSourceCaches(config, { force: full });
34
241
  const allSourceEntries = resolveSourceEntries(stashDir, config);
35
242
  const allSourceDirs = allSourceEntries.map((s) => s.path);
36
243
  const t0 = Date.now();
@@ -39,11 +246,41 @@ export async function akmIndex(options) {
39
246
  const embeddingDim = config.embedding?.dimension;
40
247
  const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
41
248
  try {
42
- // Check if we should do incremental
249
+ // Determine incremental vs full mode
43
250
  const prevStashDir = getMeta(db, "stashDir");
44
251
  const prevBuiltAt = getMeta(db, "builtAt");
45
- const isIncremental = !options?.full && prevStashDir === stashDir && !!prevBuiltAt;
252
+ const isIncremental = !full && prevStashDir === stashDir && !!prevBuiltAt;
46
253
  const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
254
+ // Assemble the run context
255
+ const ctx = {
256
+ db,
257
+ config,
258
+ sources: allSourceEntries,
259
+ sourceDirs: allSourceDirs,
260
+ full,
261
+ reEnrich,
262
+ stashDir,
263
+ onProgress,
264
+ signal,
265
+ timing: {
266
+ t0,
267
+ tWalkStart: t0,
268
+ tWalkEnd: t0,
269
+ tLlmEnd: t0,
270
+ tFtsEnd: t0,
271
+ tEmbedEnd: t0,
272
+ },
273
+ isIncremental,
274
+ builtAtMs,
275
+ hadRemovedSources: false,
276
+ scannedDirs: 0,
277
+ skippedDirs: 0,
278
+ generatedCount: 0,
279
+ walkWarnings: [],
280
+ dirsNeedingLlm: [],
281
+ embeddingResult: null,
282
+ graphExtractionResult: null,
283
+ };
47
284
  onProgress({
48
285
  phase: "summary",
49
286
  message: buildIndexSummaryMessage({
@@ -51,231 +288,44 @@ export async function akmIndex(options) {
51
288
  sourcesCount: allSourceDirs.length,
52
289
  semanticSearchMode: config.semanticSearchMode,
53
290
  embeddingProvider: getEmbeddingProvider(config.embedding),
54
- llmEnabled: enrich && !!resolveIndexPassLLM("enrichment", config),
291
+ llmEnabled: !!resolveIndexPassLLM("enrichment", config),
55
292
  vecAvailable: isVecAvailable(db),
56
293
  }),
57
294
  });
58
- let hadRemovedSources = false;
59
- if (options?.full || !isIncremental) {
60
- // The delete is now merged into the insert transaction inside
61
- // indexEntries() so that a reader never sees an empty database between
62
- // the wipe and the re-inserts. The doFullDelete flag signals this path.
63
- }
64
- else {
65
- // Incremental: purge entries from stash dirs that have been removed
66
- // (e.g. after `akm remove`) so orphaned entries don't linger.
67
- const prevStashDirsJson = getMeta(db, "stashDirs");
68
- if (prevStashDirsJson) {
69
- let prevStashDirs = [];
70
- try {
71
- const parsed = JSON.parse(prevStashDirsJson);
72
- if (Array.isArray(parsed)) {
73
- prevStashDirs = parsed.filter((d) => typeof d === "string");
74
- }
75
- else {
76
- warn("index_meta stashDirs value is not an array — treating as empty");
77
- }
78
- }
79
- catch {
80
- warn("index_meta stashDirs value is corrupt JSON — treating as empty");
81
- }
82
- const currentSet = new Set(allSourceDirs);
83
- for (const dir of prevStashDirs) {
84
- if (!currentSet.has(dir)) {
85
- hadRemovedSources = true;
86
- deleteEntriesByStashDir(db, dir);
87
- deleteIndexDirStatesByStashDir(db, dir);
88
- }
89
- }
90
- }
295
+ // ── Phase sequence ───────────────────────────────────────────────────────
296
+ await runSourceCachePhase(ctx);
297
+ await runWalkPhase(ctx);
298
+ await runEmbeddingPhase(ctx);
299
+ await runFinalizePhase(ctx);
300
+ // ────────────────────────────────────────────────────────────────────────
301
+ const { _verification: verification, _totalEntries: totalEntries } = ctx;
302
+ const { timing } = ctx;
303
+ // ── Clean pass ───────────────────────────────────────────────────────────
304
+ // After the normal index completes, remove entries whose source files no
305
+ // longer exist on disk. Remote entries (empty file_path) are skipped.
306
+ let cleanResult;
307
+ if (clean) {
308
+ cleanResult = runCleanPass(db, dryRun);
91
309
  }
92
- throwIfAborted(signal);
93
- // Memory inference pass (#201). Runs before the walk so any derived-memory
94
- // children that get written are picked up by the walker in this same run
95
- // and don't have to wait for the next `akm index`. Gated entirely by
96
- // `resolveIndexPassLLM("memory", config)` — when the user has no
97
- // `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
98
- // and existing inferred children are left in place.
99
- if (enrich) {
100
- try {
101
- const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
102
- if (inferenceResult.writtenFacts > 0 || inferenceResult.skippedNoFacts > 0) {
103
- onProgress({
104
- phase: "llm",
105
- message: `Memory inference reviewed ${inferenceResult.considered} ` +
106
- `${inferenceResult.considered === 1 ? "memory" : "memories"}; wrote ` +
107
- `${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} ` +
108
- `from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}` +
109
- (inferenceResult.skippedNoFacts > 0
110
- ? `; skipped ${inferenceResult.skippedNoFacts} ${inferenceResult.skippedNoFacts === 1 ? "memory" : "memories"} with unusable LLM responses`
111
- : "") +
112
- ".",
113
- });
114
- }
115
- if (inferenceResult.skippedNoFacts > 0) {
116
- warn(`Memory inference skipped ${inferenceResult.skippedNoFacts} ` +
117
- `${inferenceResult.skippedNoFacts === 1 ? "memory" : "memories"} because the LLM returned empty, invalid, or incomplete derived payloads. ` +
118
- "Check your model and token budget.");
119
- }
120
- }
121
- catch (err) {
122
- warn(`Memory inference pass aborted: ${err instanceof Error ? err.message : String(err)}`);
123
- }
124
- }
125
- else {
126
- onProgress({
127
- phase: "llm",
128
- message: "LLM passes disabled; rerun with --enrich to enable inference and enrichment.",
129
- });
130
- }
131
- // Graph extraction pass (#207). Runs after memory inference so any
132
- // atomic-fact children that just got written are visible to the graph
133
- // walk. Persists `<stashRoot>/.akm/graph.json` — an indexer artifact,
134
- // NOT a user-visible asset, so it is not routed through
135
- // writeAssetToSource. The artifact feeds the existing FTS5+boosts
136
- // pipeline as a single boost component (see graph-boost.ts); there is
137
- // no parallel scoring track. Disabled when either gate (the locked
138
- // `llm.features.graph_extraction` feature flag or the per-pass
139
- // `index.graph.llm` toggle) is off; the existing graph file is
140
- // preserved on disk in that case.
141
- if (enrich) {
142
- try {
143
- const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
144
- if (graphResult.written) {
145
- onProgress({
146
- phase: "llm",
147
- message: `Graph extraction wrote ${graphResult.totalEntities} entit${graphResult.totalEntities === 1 ? "y" : "ies"} and ${graphResult.totalRelations} relation${graphResult.totalRelations === 1 ? "" : "s"} from ${graphResult.extracted} file${graphResult.extracted === 1 ? "" : "s"}.`,
148
- });
149
- }
150
- }
151
- catch (err) {
152
- warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
153
- }
154
- }
155
- throwIfAborted(signal);
156
- const tWalkStart = Date.now();
157
- // Walk stash dirs and index entries.
158
- // doFullDelete=true merges the wipe into the same transaction as the
159
- // inserts so readers never see an empty database mid-rebuild.
160
- const doFullDelete = options?.full || !isIncremental;
161
- const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
162
- onProgress({
163
- phase: "scan",
164
- message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
165
- });
166
- // Workflow validation noise gate (issue #273): per-spec stderr lines from
167
- // `buildMetadataSkipWarning` are suppressed at default verbosity in
168
- // `metadata.ts`. Replace them with a single summary line so operators
169
- // running a cold-start search against a fresh registry-cloned source
170
- // don't get the impression akm is broken. Verbose mode keeps the
171
- // per-spec output instead of (not in addition to) the summary.
172
- if (!isVerbose()) {
173
- const skippedWorkflowCount = warnings.filter(isWorkflowSkipWarning).length;
174
- if (skippedWorkflowCount > 0) {
175
- const noun = skippedWorkflowCount === 1 ? "workflow spec" : "workflow specs";
176
- warn(`${skippedWorkflowCount} ${noun} skipped due to validation errors; ` +
177
- "rerun with --verbose (or AKM_VERBOSE=1) to see details.");
178
- }
179
- }
180
- const tWalkEnd = Date.now();
181
- throwIfAborted(signal);
182
- // Enhance entries with LLM if configured
183
- await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich);
184
- onProgress({
185
- phase: "llm",
186
- message: enrich && resolveIndexPassLLM("enrichment", config)
187
- ? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
188
- : "LLM enhancement disabled.",
189
- });
190
- const tLlmEnd = Date.now();
191
- throwIfAborted(signal);
192
- // Rebuild FTS after all inserts. Use incremental mode when this whole
193
- // index run is incremental — only entries touched by `upsertEntry`
194
- // since the last rebuild are re-indexed, instead of re-scanning every
195
- // row on every `akm index` invocation.
196
- rebuildFts(db, { incremental: isIncremental });
197
- onProgress({
198
- phase: "fts",
199
- message: isIncremental ? "Rebuilt full-text search index (dirty rows only)." : "Rebuilt full-text search index.",
200
- });
201
- const tFtsEnd = Date.now();
202
- // Re-link detached usage_events to their new entry_ids via entry_ref.
203
- // entry_ref is "type:name" (e.g., "skill:code-review"), entry_key is "stashDir:type:name".
204
- // Use substr to extract the "type:name" suffix from entry_key for exact comparison
205
- // (avoids LIKE which would require escaping % and _ in user-facing names).
206
- try {
207
- db.exec(`
208
- UPDATE usage_events SET entry_id = (
209
- SELECT e.id FROM entries e
210
- WHERE substr(e.entry_key, length(e.entry_key) - length(usage_events.entry_ref)) = ':' || usage_events.entry_ref
211
- LIMIT 1
212
- )
213
- WHERE entry_id IS NULL AND entry_ref IS NOT NULL
214
- `);
215
- }
216
- catch {
217
- /* ignore if table doesn't exist yet */
218
- }
219
- // Recompute utility scores from usage_events after FTS rebuild
220
- recomputeUtilityScores(db);
221
- // Regenerate each wiki's index.md from its pages' frontmatter. Best-effort
222
- // — errors are caught inside regenerateAllWikiIndexes and never block the
223
- // index run. The primary stash is the only target: additional sources
224
- // are read-only caches, and regenerating their indexes would mutate
225
- // cache content.
226
- try {
227
- const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
228
- regenerateAllWikiIndexes(stashDir);
229
- }
230
- catch {
231
- /* best-effort */
232
- }
233
- throwIfAborted(signal);
234
- // Generate embeddings if semantic search is enabled
235
- const embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
236
- const tEmbedEnd = Date.now();
237
- // Update metadata
238
- setMeta(db, "builtAt", new Date().toISOString());
239
- setMeta(db, "stashDir", stashDir);
240
- setMeta(db, "stashDirs", JSON.stringify(allSourceDirs));
241
- setMeta(db, "hasEmbeddings", embeddingResult.success ? "1" : "0");
242
- const totalEntries = getEntryCount(db);
243
- // Warn on every index run if using JS fallback with many entries
244
- warnIfVecMissing(db);
245
- const tEnd = Date.now();
246
- const verification = verifyIndexState(db, config, totalEntries, embeddingResult);
247
- if (config.semanticSearchMode === "off") {
248
- clearSemanticStatus();
249
- }
250
- else {
251
- writeSemanticStatus({
252
- status: verification.semanticStatus === "disabled" ? "pending" : verification.semanticStatus,
253
- ...(embeddingResult.reason ? { reason: embeddingResult.reason } : {}),
254
- ...(embeddingResult.message ? { message: embeddingResult.message } : {}),
255
- providerFingerprint: deriveSemanticProviderFingerprint(config.embedding),
256
- lastCheckedAt: new Date().toISOString(),
257
- entryCount: verification.entryCount,
258
- embeddingCount: verification.embeddingCount,
259
- });
260
- }
261
- onProgress({ phase: "verify", message: verification.message });
310
+ // ────────────────────────────────────────────────────────────────────────
262
311
  return {
263
312
  stashDir,
264
313
  totalEntries,
265
- generatedMetadata: generatedCount,
314
+ generatedMetadata: ctx.generatedCount,
266
315
  indexPath: dbPath,
267
316
  mode: isIncremental ? "incremental" : "full",
268
- directoriesScanned: scannedDirs,
269
- directoriesSkipped: skippedDirs,
270
- ...(warnings.length > 0 ? { warnings } : {}),
317
+ directoriesScanned: ctx.scannedDirs,
318
+ directoriesSkipped: ctx.skippedDirs,
319
+ ...(ctx.walkWarnings.length > 0 ? { warnings: ctx.walkWarnings } : {}),
271
320
  verification,
272
321
  timing: {
273
- totalMs: tEnd - t0,
274
- walkMs: tWalkEnd - tWalkStart,
275
- llmMs: tLlmEnd - tWalkEnd,
276
- embedMs: tEmbedEnd - tFtsEnd,
277
- ftsMs: tFtsEnd - tLlmEnd,
322
+ totalMs: Date.now() - timing.t0,
323
+ walkMs: timing.tWalkEnd - timing.tWalkStart,
324
+ llmMs: timing.tLlmEnd - timing.tWalkEnd,
325
+ embedMs: timing.tEmbedEnd - timing.tFtsEnd,
326
+ ftsMs: timing.tFtsEnd - timing.tLlmEnd,
278
327
  },
328
+ ...(cleanResult !== undefined ? { clean: cleanResult } : {}),
279
329
  };
280
330
  }
281
331
  finally {
@@ -512,8 +562,10 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
512
562
  if (stash) {
513
563
  for (const entry of stash.entries) {
514
564
  const entryPath = entry.filename ? path.join(dirPath, entry.filename) : null;
515
- if (!entryPath)
516
- continue; // skip unresolvable entries
565
+ if (!entryPath) {
566
+ warn(`Skipping entry with no resolvable path in ${dirPath}`);
567
+ continue;
568
+ }
517
569
  if (!shouldIndexStashFile(currentStashDir, entryPath))
518
570
  continue;
519
571
  // Skip if a higher-priority stash root already indexed this asset
@@ -535,7 +587,9 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
535
587
  }
536
588
  }
537
589
  }
538
- // Collect dirs needing LLM enhancement during the first walk
590
+ // Collect dirs needing LLM enhancement during the first walk.
591
+ // Only dirs with "generated" entries need enrichment (unless reEnrich
592
+ // forces re-processing of already-enriched entries).
539
593
  if (stash.entries.some((e) => e.quality === "generated")) {
540
594
  dirsNeedingLlm.push({ dirPath, files, currentStashDir, stash });
541
595
  }
@@ -553,7 +607,20 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
553
607
  reason: persistedReason,
554
608
  });
555
609
  if (persistedRows === 0) {
556
- warnVerbose(`[index] zero-row ${dirPath}: ${persistedReason}`);
610
+ // Warn only when the dir had files that *could* produce entries (.md or
611
+ // known script extensions). Dirs with only non-indexable types (.json,
612
+ // .yaml, .conf, .env, .gitkeep) or deduped-only rows are expected and
613
+ // not actionable at normal log level.
614
+ const hasIndexableExtension = files.some((f) => {
615
+ const ext = path.extname(f).toLowerCase();
616
+ return ext === ".md" || SCRIPT_EXTENSIONS.has(ext);
617
+ });
618
+ if (persistedReason !== "deduped-zero-row" && hasIndexableExtension) {
619
+ warn(`[index] zero-row ${dirPath}: ${persistedReason}`);
620
+ }
621
+ else {
622
+ warnVerbose(`[index] zero-row ${dirPath}: ${persistedReason}`);
623
+ }
557
624
  }
558
625
  }
559
626
  });
@@ -652,9 +719,7 @@ function inferZeroRowReason(stash, priorReason, warnings, dirPath, dedupedRows)
652
719
  return "empty-generated-set";
653
720
  return `zero-row:${priorReason?.kind ?? "unknown"}`;
654
721
  }
655
- async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich = false) {
656
- if (!enrich)
657
- return;
722
+ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, _enrich = false, reEnrich = false) {
658
723
  // Resolve per-pass LLM config via the unified shim. Returns undefined when
659
724
  // either no `akm.llm` is configured or the user opted this pass out via
660
725
  // `index.enrichment.llm = false`. (#208)
@@ -665,24 +730,142 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich = f
665
730
  // as a single visible warning instead of silently degrading every entry
666
731
  // and leaving the user wondering why nothing got enhanced.
667
732
  const summary = { attempted: 0, succeeded: 0, failureSamples: [] };
668
- for (const { dirPath, files, currentStashDir, stash: originalStash } of dirsNeedingLlm) {
669
- throwIfAborted(signal);
670
- // Only enhance generated entries; user-provided overrides should not be overwritten
671
- const generatedEntries = originalStash.entries.filter((e) => e.quality === "generated");
672
- if (generatedEntries.length === 0)
673
- continue;
674
- const generatedStash = { entries: generatedEntries };
675
- const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary, signal);
676
- // Re-upsert the enhanced entries in a single transaction so a crash
677
- // cannot leave half the entries updated and the rest stale.
678
- db.transaction(() => {
679
- for (const entry of enhanced.entries) {
680
- const entryPath = entry.filename ? path.join(dirPath, entry.filename) : files[0] || dirPath;
681
- const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
682
- const searchText = buildSearchText(entry);
683
- upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, attachFileSize(entry, entryPath), searchText);
684
- }
685
- })();
733
+ let completedDirs = 0;
734
+ let completedEntries = 0;
735
+ const totalDirs = dirsNeedingLlm.length;
736
+ const totalEntries = dirsNeedingLlm.reduce((sum, { stash }) => {
737
+ const entriesToEnhance = stash.entries.filter((e) => {
738
+ if (e.quality !== "generated" && !(reEnrich && e.quality === "enriched"))
739
+ return false;
740
+ if (!reEnrich && isEnrichmentComplete(e))
741
+ return false;
742
+ return true;
743
+ });
744
+ return sum + entriesToEnhance.length;
745
+ }, 0);
746
+ // P3 wall-clock budget for the enrichment pass. Defaults to llm.timeoutMs
747
+ // (or 10 minutes if not set). Users can extend this via llm.timeoutMs in
748
+ // config no separate knob needed.
749
+ const budgetMs = (llmConfig.timeoutMs ?? 10 * 60 * 1000) * Math.max(totalEntries, 1);
750
+ const enrichDeadline = AbortSignal.timeout(budgetMs);
751
+ let deadlineHit = false;
752
+ const enrichSignal = (() => {
753
+ if (!signal)
754
+ return enrichDeadline;
755
+ // Combine: abort when either fires.
756
+ const controller = new AbortController();
757
+ const onAbort = () => controller.abort();
758
+ signal.addEventListener("abort", onAbort, { once: true });
759
+ enrichDeadline.addEventListener("abort", () => {
760
+ deadlineHit = true;
761
+ controller.abort();
762
+ }, { once: true });
763
+ return controller.signal;
764
+ })();
765
+ if (totalEntries > 0) {
766
+ onProgress?.({
767
+ phase: "llm",
768
+ message: `LLM enhancement starting for ${totalEntries} entr${totalEntries === 1 ? "y" : "ies"} ` +
769
+ `across ${totalDirs} director${totalDirs === 1 ? "y" : "ies"} (concurrency ${getDefaultLlmConcurrency(llmConfig)}).`,
770
+ processed: 0,
771
+ total: totalEntries,
772
+ });
773
+ }
774
+ let currentDirLabel;
775
+ let lastProgressAt = Date.now();
776
+ let heartbeatTimer;
777
+ if (totalEntries > 0 && onProgress) {
778
+ heartbeatTimer = setInterval(() => {
779
+ if (Date.now() - lastProgressAt < 15000)
780
+ return;
781
+ onProgress({
782
+ phase: "llm",
783
+ message: `Still enriching ${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}` +
784
+ (currentDirLabel ? `; waiting on ${currentDirLabel}` : "") +
785
+ ".",
786
+ processed: completedEntries,
787
+ total: totalEntries,
788
+ });
789
+ lastProgressAt = Date.now();
790
+ }, 15000);
791
+ }
792
+ try {
793
+ await concurrentMap(dirsNeedingLlm, async ({ dirPath, files, currentStashDir, stash: originalStash }) => {
794
+ if (enrichSignal.aborted)
795
+ return undefined;
796
+ // Only enhance generated entries (or all when reEnrich=true);
797
+ // user-provided overrides should not be overwritten.
798
+ // Skip entries that are already fully enriched (description + tags + searchHints)
799
+ // unless the caller explicitly requests re-enrichment via reEnrich=true.
800
+ const entriesToEnhance = originalStash.entries.filter((e) => {
801
+ if (e.quality !== "generated" && !(reEnrich && e.quality === "enriched"))
802
+ return false;
803
+ if (!reEnrich && isEnrichmentComplete(e)) {
804
+ warnVerbose(`[akm] skipping LLM enrichment for "${e.name}" — entry already complete`);
805
+ return false;
806
+ }
807
+ return true;
808
+ });
809
+ if (entriesToEnhance.length === 0)
810
+ return undefined;
811
+ currentDirLabel = path.relative(currentStashDir, dirPath) || ".";
812
+ onProgress?.({
813
+ phase: "llm",
814
+ message: `Enhancing ${currentDirLabel} ` +
815
+ `(${entriesToEnhance.length} entr${entriesToEnhance.length === 1 ? "y" : "ies"}).`,
816
+ processed: completedEntries,
817
+ total: totalEntries,
818
+ });
819
+ lastProgressAt = Date.now();
820
+ const targetStash = { entries: entriesToEnhance };
821
+ const entryKeys = entriesToEnhance.map((e) => `${currentStashDir}:${e.type}:${e.name}`);
822
+ const enhanced = await enhanceStashWithLlm(llmConfig, targetStash, files, summary, enrichSignal, db, entryKeys, reEnrich, config, (event) => {
823
+ completedEntries++;
824
+ lastProgressAt = Date.now();
825
+ onProgress?.({
826
+ phase: "llm",
827
+ message: `Enhanced ${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}; ` +
828
+ `${completedDirs}/${totalDirs} director${totalDirs === 1 ? "y" : "ies"} complete` +
829
+ (event.entryName ? `; current ${event.entryName}` : "") +
830
+ (currentDirLabel ? ` in ${currentDirLabel}` : "") +
831
+ (event.outcome === "cache-hit" ? " (cache hit)" : ""),
832
+ processed: completedEntries,
833
+ total: totalEntries,
834
+ });
835
+ });
836
+ // Re-upsert the enhanced entries in a single transaction so a crash
837
+ // cannot leave half the entries updated and the rest stale.
838
+ db.transaction(() => {
839
+ for (const entry of enhanced.entries) {
840
+ const entryPath = entry.filename ? path.join(dirPath, entry.filename) : files[0] || dirPath;
841
+ const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
842
+ const searchText = buildSearchText(entry);
843
+ upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, attachFileSize(entry, entryPath), searchText);
844
+ }
845
+ })();
846
+ completedDirs++;
847
+ lastProgressAt = Date.now();
848
+ onProgress?.({
849
+ phase: "llm",
850
+ message: `Completed ${completedDirs}/${totalDirs} director${totalDirs === 1 ? "y" : "ies"}; ` +
851
+ `${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"} processed.`,
852
+ processed: completedEntries,
853
+ total: totalEntries,
854
+ });
855
+ return undefined;
856
+ },
857
+ // Default concurrency of 4 works well for cloud LLM APIs. Local model
858
+ // servers (LM Studio, Ollama) run one inference at a time — set
859
+ // `llm.concurrency: 1` in config.json to avoid "Model reloaded" / 500
860
+ // errors from concurrent request overload.
861
+ getDefaultLlmConcurrency(llmConfig));
862
+ }
863
+ finally {
864
+ if (heartbeatTimer)
865
+ clearInterval(heartbeatTimer);
866
+ }
867
+ if (deadlineHit) {
868
+ warn("[akm] LLM enrichment budget exceeded. Re-run `akm index` to continue. Increase llm.timeoutMs for a larger budget.");
686
869
  }
687
870
  if (summary.attempted > 0 && summary.succeeded === 0) {
688
871
  const sample = summary.failureSamples.length ? ` Example: ${summary.failureSamples[0]}` : "";
@@ -754,14 +937,24 @@ async function generateEmbeddingsForDb(db, config, onProgress, signal) {
754
937
  throwIfAborted(signal);
755
938
  // Wrap all embedding upserts in a single transaction so partial
756
939
  // state is rolled back on failure rather than leaving the table half-filled.
940
+ let storedCount = 0;
941
+ let skippedCount = 0;
757
942
  db.transaction(() => {
758
943
  for (let i = 0; i < allEntries.length; i++) {
759
- upsertEmbedding(db, allEntries[i].id, embeddings[i]);
944
+ if (upsertEmbedding(db, allEntries[i].id, embeddings[i])) {
945
+ storedCount++;
946
+ }
947
+ else {
948
+ skippedCount++;
949
+ }
760
950
  }
761
951
  })();
952
+ if (skippedCount > 0) {
953
+ warn(`[embed] ${skippedCount} embedding${skippedCount === 1 ? "" : "s"} skipped (entry deleted between queue and write)`);
954
+ }
762
955
  onProgress({
763
956
  phase: "embeddings",
764
- message: `Stored ${embeddings.length} embedding${embeddings.length === 1 ? "" : "s"}.`,
957
+ message: `Stored ${storedCount} embedding${storedCount === 1 ? "" : "s"}.`,
765
958
  });
766
959
  setMeta(db, "embeddingFingerprint", currentFingerprint);
767
960
  return { success: true };
@@ -781,14 +974,6 @@ async function generateEmbeddingsForDb(db, config, onProgress, signal) {
781
974
  }
782
975
  }
783
976
  // ── Helpers ─────────────────────────────────────────────────────────────────
784
- function getAllEntriesForEmbedding(db) {
785
- return db
786
- .prepare(`
787
- SELECT e.id, e.search_text AS searchText, e.entry_key AS entryKey, e.file_path AS filePath FROM entries e
788
- WHERE NOT EXISTS (SELECT 1 FROM embeddings b WHERE b.id = e.id)
789
- `)
790
- .all();
791
- }
792
977
  function attachFileSize(entry, entryPath) {
793
978
  try {
794
979
  return { ...entry, fileSize: fs.statSync(entryPath).size };
@@ -797,28 +982,6 @@ function attachFileSize(entry, entryPath) {
797
982
  return entry;
798
983
  }
799
984
  }
800
- function upsertWorkflowDocument(db, entryId, doc, content) {
801
- const sourceHash = computeSourceHash(content);
802
- db.prepare(`INSERT INTO workflow_documents (entry_id, schema_version, document_json, source_path, source_hash, updated_at)
803
- VALUES (?, ?, ?, ?, ?, ?)
804
- ON CONFLICT(entry_id) DO UPDATE SET
805
- schema_version = excluded.schema_version,
806
- document_json = excluded.document_json,
807
- source_path = excluded.source_path,
808
- source_hash = excluded.source_hash,
809
- updated_at = excluded.updated_at`).run(entryId, doc.schemaVersion, JSON.stringify(doc), doc.source.path, sourceHash, new Date().toISOString());
810
- }
811
- function computeSourceHash(content) {
812
- // Cheap, stable identity for the source markdown — used by future
813
- // incremental fast-paths that skip re-validation when content is unchanged.
814
- // Not security-sensitive; FNV-1a over the bytes is sufficient.
815
- let hash = 0x811c9dc5;
816
- for (let i = 0; i < content.length; i++) {
817
- hash ^= content[i];
818
- hash = Math.imul(hash, 0x01000193);
819
- }
820
- return (hash >>> 0).toString(16);
821
- }
822
985
  function buildIndexSummaryMessage(options) {
823
986
  const stashSourceLabel = options.sourcesCount === 1 ? "stash source" : "stash sources";
824
987
  const semanticDetail = getSemanticSearchLabel(options.semanticSearchMode, options.embeddingProvider, options.vecAvailable);
@@ -911,11 +1074,12 @@ function resolveIndexedFiles(dirPath, files, stash) {
911
1074
  }
912
1075
  return resolved.size > 0 ? [...resolved] : files;
913
1076
  }
914
- async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
1077
+ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal, db, entryKeys, reEnrich, akmConfig, onEntryDone) {
915
1078
  const { enhanceMetadata } = await import("../llm/metadata-enhance");
916
- const enhanced = [];
917
- for (const entry of stash.entries) {
918
- throwIfAborted(signal);
1079
+ const { computeBodyHash, getLlmCacheEntry, upsertLlmCacheEntry } = await import("./db.js");
1080
+ const results = await concurrentMap(stash.entries, async (entry, idx) => {
1081
+ if (signal?.aborted)
1082
+ return entry;
919
1083
  summary.attempted++;
920
1084
  try {
921
1085
  const entryFile = entry.filename
@@ -927,10 +1091,38 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
927
1091
  fileContent = fs.readFileSync(entryFile, "utf8");
928
1092
  }
929
1093
  catch {
930
- /* ignore unreadable files */
1094
+ warn(`Could not read file for LLM enrichment: ${entry.filename ?? entry.name}`);
1095
+ }
1096
+ }
1097
+ // Incremental cache: skip LLM call when file body is unchanged and
1098
+ // --re-enrich was not requested. The cache key is the entry_key
1099
+ // (stashDir:type:name) which is stable across index runs.
1100
+ const cacheBody = fileContent ?? `${entry.name}\n${entry.description ?? ""}`;
1101
+ const bodyHash = computeBodyHash(cacheBody);
1102
+ const cacheKey = entryKeys?.[idx] ?? `${entry.type}:${entry.name}`;
1103
+ if (db && !reEnrich) {
1104
+ const cached = getLlmCacheEntry(db, cacheKey, bodyHash);
1105
+ if (cached) {
1106
+ try {
1107
+ const parsed = JSON.parse(cached.resultJson);
1108
+ const updated = { ...entry };
1109
+ if (parsed.description)
1110
+ updated.description = parsed.description;
1111
+ if (parsed.searchHints?.length)
1112
+ updated.searchHints = parsed.searchHints;
1113
+ if (parsed.tags?.length)
1114
+ updated.tags = parsed.tags;
1115
+ updated.quality = "enriched";
1116
+ summary.succeeded++;
1117
+ onEntryDone?.({ entryName: entry.name, outcome: "cache-hit" });
1118
+ return updated;
1119
+ }
1120
+ catch {
1121
+ warn(`LLM enrichment cache entry corrupt for ${entry.name}; re-running enrichment`);
1122
+ }
931
1123
  }
932
1124
  }
933
- const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal);
1125
+ const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal, akmConfig);
934
1126
  const updated = { ...entry };
935
1127
  if (improvements.description)
936
1128
  updated.description = improvements.description;
@@ -938,19 +1130,39 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
938
1130
  updated.searchHints = improvements.searchHints;
939
1131
  if (improvements.tags?.length)
940
1132
  updated.tags = improvements.tags;
941
- enhanced.push(updated);
1133
+ // Mark as enriched so subsequent index runs skip re-enrichment (P2)
1134
+ updated.quality = "enriched";
1135
+ // Persist to cache so the next run can skip the LLM call when the
1136
+ // file body has not changed.
1137
+ if (db) {
1138
+ upsertLlmCacheEntry(db, cacheKey, bodyHash, JSON.stringify({
1139
+ description: improvements.description,
1140
+ searchHints: improvements.searchHints,
1141
+ tags: improvements.tags,
1142
+ }));
1143
+ }
942
1144
  summary.succeeded++;
1145
+ onEntryDone?.({ entryName: entry.name, outcome: "llm" });
1146
+ return updated;
943
1147
  }
944
1148
  catch (err) {
945
- enhanced.push(entry);
946
1149
  const msg = toErrorMessage(err);
947
1150
  // failureSamples is bounded to 3 items, so a linear scan is cheaper
948
1151
  // than maintaining a parallel Set for membership checks (#177 review).
949
1152
  if (summary.failureSamples.length < 3 && !summary.failureSamples.includes(msg)) {
950
1153
  summary.failureSamples.push(msg);
951
1154
  }
1155
+ onEntryDone?.({ entryName: entry.name, outcome: "failed" });
1156
+ return entry;
952
1157
  }
953
- }
1158
+ },
1159
+ // Default concurrency of 4 works well for cloud LLM APIs. Set
1160
+ // `llm.concurrency: 1` in config.json for local model servers.
1161
+ getDefaultLlmConcurrency(llmConfig));
1162
+ // concurrentMap returns Array<T | undefined>; filter out undefined slots
1163
+ // (which can only occur if the callback itself returned undefined, which
1164
+ // it never does above — but TypeScript needs the filter for type safety).
1165
+ const enhanced = results.map((r, i) => r ?? stash.entries[i]);
954
1166
  return { entries: enhanced };
955
1167
  }
956
1168
  /**
@@ -1129,23 +1341,26 @@ export function recomputeUtilityScores(db) {
1129
1341
  }
1130
1342
  // Batch-load existing utility scores
1131
1343
  const existingScores = new Map();
1132
- const scoreRows = db.prepare("SELECT entry_id, utility FROM utility_scores").all();
1344
+ const scoreRows = db.prepare("SELECT entry_id, utility, last_used_at FROM utility_scores").all();
1133
1345
  for (const row of scoreRows) {
1134
- existingScores.set(row.entry_id, row.utility);
1346
+ existingScores.set(row.entry_id, { utility: row.utility, lastUsedAt: row.last_used_at ?? undefined });
1135
1347
  }
1348
+ const now = new Date().toISOString();
1136
1349
  for (const row of usageRows) {
1137
1350
  const selectRate = row.search_count > 0 ? Math.min(1, row.show_count / row.search_count) : 0;
1138
1351
  const feedbackTotal = row.positive_feedback_count + row.negative_feedback_count;
1139
1352
  const feedbackRate = feedbackTotal > 0 ? Math.max(0, row.positive_feedback_count - row.negative_feedback_count) / feedbackTotal : 0;
1140
1353
  const effectiveRate = Math.max(selectRate, feedbackRate);
1141
- const prevUtility = existingScores.get(row.entry_id) ?? 0;
1354
+ const existing = existingScores.get(row.entry_id);
1355
+ const prevUtility = existing?.utility ?? 0;
1142
1356
  const utility = prevUtility * emaDecay + effectiveRate * emaNew;
1357
+ const lastUsedAt = effectiveRate > 0.5 ? now : (existing?.lastUsedAt ?? undefined);
1143
1358
  upsertUtilityScore(db, row.entry_id, {
1144
1359
  utility,
1145
1360
  showCount: row.show_count,
1146
1361
  searchCount: row.search_count,
1147
1362
  selectRate,
1148
- lastUsedAt: row.last_used_at ?? undefined,
1363
+ lastUsedAt,
1149
1364
  });
1150
1365
  }
1151
1366
  setMeta(db, "last_utility_computed_at", new Date().toISOString());