akm-cli 0.9.0-beta.52 → 0.9.0-beta.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/assets/hints/cli-hints-full.md +6 -5
  2. package/dist/cli/clack.js +56 -0
  3. package/dist/cli/confirm.js +1 -1
  4. package/dist/cli.js +0 -7
  5. package/dist/commands/env/env-cli.js +3 -2
  6. package/dist/commands/env/env.js +14 -67
  7. package/dist/commands/health/checks.js +28 -15
  8. package/dist/commands/health/html-report.js +33 -10
  9. package/dist/commands/health.js +222 -22
  10. package/dist/commands/improve/collapse-detector.js +419 -0
  11. package/dist/commands/improve/consolidate.js +72 -54
  12. package/dist/commands/improve/distill.js +79 -13
  13. package/dist/commands/improve/extract.js +13 -6
  14. package/dist/commands/improve/homeostatic.js +109 -79
  15. package/dist/commands/improve/improve-cli.js +67 -1
  16. package/dist/commands/improve/improve.js +10 -0
  17. package/dist/commands/improve/loop-stages.js +39 -1
  18. package/dist/commands/improve/outcome-loop.js +33 -19
  19. package/dist/commands/improve/preparation.js +36 -11
  20. package/dist/commands/improve/salience.js +49 -32
  21. package/dist/commands/read/curate.js +9 -13
  22. package/dist/commands/read/knowledge.js +4 -0
  23. package/dist/commands/read/search-cli.js +6 -4
  24. package/dist/commands/read/search.js +12 -5
  25. package/dist/commands/read/show.js +6 -8
  26. package/dist/commands/sources/add-cli.js +1 -1
  27. package/dist/commands/sources/init.js +12 -0
  28. package/dist/commands/sources/stash-cli.js +1 -1
  29. package/dist/commands/tasks/default-tasks.js +12 -0
  30. package/dist/core/asset/asset-spec.js +3 -2
  31. package/dist/core/config/config-schema.js +39 -17
  32. package/dist/core/config/config.js +12 -0
  33. package/dist/core/eval/rank-metrics.js +113 -0
  34. package/dist/core/state/migrations.js +56 -0
  35. package/dist/core/state-db.js +146 -19
  36. package/dist/core/warn.js +21 -0
  37. package/dist/indexer/db/db.js +6 -0
  38. package/dist/indexer/ensure-index.js +36 -92
  39. package/dist/indexer/index-writer-lock.js +9 -11
  40. package/dist/indexer/index-written-assets.js +105 -0
  41. package/dist/indexer/indexer.js +16 -4
  42. package/dist/indexer/passes/metadata.js +20 -0
  43. package/dist/indexer/read-preflight.js +23 -0
  44. package/dist/indexer/search/db-search.js +29 -1
  45. package/dist/indexer/search/ranking-contributors.js +33 -1
  46. package/dist/indexer/search/ranking.js +66 -0
  47. package/dist/indexer/search/search-fields.js +6 -0
  48. package/dist/indexer/walk/walker.js +21 -13
  49. package/dist/integrations/agent/detect.js +9 -0
  50. package/dist/integrations/agent/index.js +1 -1
  51. package/dist/llm/client.js +12 -0
  52. package/dist/llm/embedder.js +26 -2
  53. package/dist/llm/embedders/local.js +7 -1
  54. package/dist/llm/feature-gate.js +6 -2
  55. package/dist/output/renderers.js +8 -13
  56. package/dist/output/shapes/helpers.js +0 -3
  57. package/dist/output/shapes/passthrough.js +1 -0
  58. package/dist/scripts/migrate-storage.js +178 -35
  59. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +46 -19
  60. package/dist/setup/detect.js +9 -0
  61. package/dist/setup/registry-stash-loader.js +12 -0
  62. package/dist/setup/setup.js +1 -1
  63. package/dist/storage/repositories/index-db.js +10 -1
  64. package/dist/tasks/backends/index.js +9 -0
  65. package/dist/tasks/runner.js +9 -0
  66. package/package.json +2 -4
@@ -2,23 +2,29 @@
2
2
  // License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
4
  /**
5
- * Auto-index: silently run an incremental `akm index` when the local index
6
- * is stale or absent, so that `search`, `show`, and `feedback` always operate
7
- * against current on-disk state without requiring the user to manually run
8
- * `akm index` first.
5
+ * Auto-index bootstrap: silently build the local index inline when it cannot
6
+ * serve the caller's stash at all (missing DB, no `entries` table, zero rows,
7
+ * or built for a different stash), so `search`, `show`, and `feedback` work
8
+ * on first use without a manual `akm index`.
9
9
  *
10
- * This replaces the old filesystem fallbacks that were scattered across
11
- * `searchLocal()` and `show.ts`, centralizing the "indexed yet?" gap handling
12
- * behind a single entry point.
10
+ * Content FRESHNESS is intentionally not this module's job on the read path.
11
+ * Writers maintain the index (`indexWrittenAssets` for `remember`/extract
12
+ * session assets; the mutation commands run `akmIndex()` themselves), and the
13
+ * improve cron / explicit `akm index` do full refreshes. Reads serve whatever
14
+ * populated index exists. The previous design — a staleness walk plus a
15
+ * detached background reindex per read — made every read on an actively
16
+ * written stash spawn a writer that the read's own telemetry then queued
17
+ * behind (see docs/design/read-path-reindex-contention-findings.md).
18
+ *
19
+ * `mode: "blocking"` (improve) still checks staleness and rebuilds inline,
20
+ * because its planning logic needs a current `entries` table in-process.
13
21
  */
14
- import { spawn } from "node:child_process";
15
22
  import fs from "node:fs";
16
23
  import path from "node:path";
17
24
  import { ASSET_SPECS, TYPE_DIRS } from "../core/asset/asset-spec.js";
18
- import { getDataDir, getDbPath } from "../core/paths.js";
25
+ import { getDbPath } from "../core/paths.js";
19
26
  import { warn } from "../core/warn.js";
20
27
  import { closeDatabase, getEntryCount, getIndexedFilePaths, getMeta, openExistingDatabase } from "./db/db.js";
21
- import { acquireIndexWriterLease, handoffIndexWriterLeaseToPid } from "./index-writer-lock.js";
22
28
  function getIndexableFiles(root, spec) {
23
29
  if (!fs.existsSync(root))
24
30
  return [];
@@ -138,12 +144,9 @@ export function isIndexStale(stashDir) {
138
144
  * i.e. the DB file exists, the `entries` table holds rows, and those rows were
139
145
  * built for this stash (it is the stored primary stash or appears in the
140
146
  * stored `stashDirs` set). When this is true the index is at worst
141
- * content-stale, so the `#607` background-reindex optimization is safe: the
142
- * caller gets slightly-stale-but-relevant results immediately. When it is
143
- * false the existing index has nothing relevant to return (no DB, no `entries`
144
- * table, zero rows, or built for a different stash), so a background reindex
145
- * would leave the caller empty until the next read — those cases must rebuild
146
- * inline.
147
+ * content-stale, so read paths serve it as-is. When it is false the existing
148
+ * index has nothing relevant to return (no DB, no `entries` table, zero rows,
149
+ * or built for a different stash), so those cases must rebuild inline.
147
150
  */
148
151
  function indexCanServeStash(stashDir) {
149
152
  const dbPath = getDbPath();
@@ -174,43 +177,6 @@ function indexCanServeStash(stashDir) {
174
177
  closeDatabase(db);
175
178
  }
176
179
  }
177
- /**
178
- * Spawn a background `akm index` process. Non-blocking — returns immediately.
179
- * Background callers share the same global index-writer lease as foreground
180
- * writers, so stale-read-triggered auto-index attempts coalesce safely.
181
- */
182
- async function spawnBackgroundReindex(_stashDir) {
183
- const dataDir = getDataDir();
184
- const logFile = path.join(dataDir, "logs", "index-background.log");
185
- fs.mkdirSync(path.dirname(logFile), { recursive: true });
186
- const lease = await acquireIndexWriterLease({ mode: "try", purpose: "background-reindex-spawn" });
187
- if (!lease)
188
- return;
189
- const akmBin = process.argv[0];
190
- const akmScript = process.argv[1];
191
- try {
192
- const child = spawn(akmBin, [akmScript, "index", "--background"], {
193
- detached: true,
194
- stdio: ["ignore", fs.openSync(logFile, "a"), fs.openSync(logFile, "a")],
195
- env: { ...process.env },
196
- });
197
- if (!child.pid) {
198
- lease.release();
199
- return;
200
- }
201
- handoffIndexWriterLeaseToPid(lease, child.pid, "background-reindex");
202
- try {
203
- child.unref();
204
- }
205
- catch {
206
- // ignore
207
- }
208
- }
209
- catch (error) {
210
- lease.release();
211
- throw error;
212
- }
213
- }
214
180
  async function runInlineReindex(stashDir) {
215
181
  try {
216
182
  const { akmIndex } = await import("./indexer.js");
@@ -219,53 +185,31 @@ async function runInlineReindex(stashDir) {
219
185
  }
220
186
  catch (error) {
221
187
  warn("Auto-index failed, proceeding with existing index:", error instanceof Error ? error.message : String(error));
222
- return true;
188
+ return false;
223
189
  }
224
190
  }
225
191
  /**
226
- * Ensure the local index exists and is fresh enough for the caller's needs.
192
+ * Ensure the local index exists and can serve the caller.
227
193
  *
228
- * Default mode is `background`, which preserves the low-latency behavior used
229
- * by read paths (`search`, `show`, `feedback`): when a populated index is
230
- * merely stale, spawn a detached reindex and proceed against the existing
231
- * index. When the index is entirely absent (no DB / no `entries` table / zero
232
- * rows) the rebuild runs inline regardless of mode, since there is nothing to
233
- * proceed against.
194
+ * Default mode is `background` the read-path contract (`search`, `show`,
195
+ * `feedback`): a populated index built for this stash is served as-is (its
196
+ * freshness is the writers' job, see module doc); an unusable index rebuilds
197
+ * inline, since there is nothing to proceed against.
234
198
  *
235
- * `mode: "blocking"` waits for the rebuild to finish before returning. Use
236
- * this for callers like `improve` whose planning logic depends on a populated
237
- * `entries` table in the same process.
199
+ * `mode: "blocking"` additionally treats content-staleness as a rebuild
200
+ * trigger and waits for it. Use this for callers like `improve` whose
201
+ * planning logic depends on a current `entries` table in the same process.
238
202
  *
239
- * Returns `true` if an index run was attempted.
203
+ * Returns `true` only when an inline index run succeeds.
204
+ * A rebuild attempt that fails (throws) resolves to `false`.
240
205
  */
241
206
  export async function ensureIndex(stashDir, options = {}) {
242
- if (!isIndexStale(stashDir))
243
- return false;
244
- // Blocking when explicitly requested, or whenever the existing index cannot
245
- // serve this stash (absent DB, no `entries` table, zero rows, or built for a
246
- // different stash): a background reindex returns immediately and would leave
247
- // a first-time caller (search, curate, wiki, show, feedback) with empty
248
- // results. Building inline is a one-off cost; a populated index for this
249
- // stash that is merely content-stale still refreshes in the background.
250
- if (options.mode === "blocking" || !indexCanServeStash(stashDir)) {
251
- return runInlineReindex(stashDir);
252
- }
253
- // The background path re-invokes the akm CLI as a detached child via
254
- // `process.argv[1]`. That is only the akm entrypoint when THIS process is the
255
- // akm CLI itself — which the CLI startup block signals with AKM_CLI_ENTRY=1.
256
- // In any other host (the in-process test runner, a library embedding akm),
257
- // argv[1] points at the host (e.g. the test runner), so spawning it would
258
- // launch the wrong program and orphan it. Build inline there instead — same
259
- // resulting index, no detached process.
260
- if (process.env.AKM_CLI_ENTRY !== "1") {
207
+ if (options.mode === "blocking") {
208
+ if (!isIndexStale(stashDir))
209
+ return false;
261
210
  return runInlineReindex(stashDir);
262
211
  }
263
- try {
264
- await spawnBackgroundReindex(stashDir);
265
- return true;
266
- }
267
- catch (error) {
268
- warn("Background reindex spawn failed, proceeding with existing index:", error instanceof Error ? error.message : String(error));
269
- return true;
270
- }
212
+ if (indexCanServeStash(stashDir))
213
+ return false;
214
+ return runInlineReindex(stashDir);
271
215
  }
@@ -7,6 +7,7 @@ import { probeLock, releaseLock, releaseLockIfOwned, tryAcquireLockSync } from "
7
7
  import { getDbPath, getIndexWriterLockPath } from "../core/paths.js";
8
8
  const INDEX_WRITER_LOCK_STALE_AFTER_MS = 12 * 60 * 60 * 1000;
9
9
  const INDEX_WRITER_WAIT_MS = 100;
10
+ const DEFAULT_INDEX_WRITER_MAX_WAIT_MS = 10 * 60 * 1000;
10
11
  const heldLocks = new Map();
11
12
  function buildPayload(purpose, pid = process.pid) {
12
13
  return JSON.stringify({
@@ -46,16 +47,11 @@ function retainHeldLock(lockPath) {
46
47
  heldLocks.set(lockPath, { depth: 1, exitHandler });
47
48
  return { lockPath, release: () => releaseHeldLock(lockPath) };
48
49
  }
49
- function detachHeldLock(lockPath) {
50
- const held = heldLocks.get(lockPath);
51
- if (!held)
52
- return;
53
- heldLocks.delete(lockPath);
54
- process.off("exit", held.exitHandler);
55
- }
56
50
  export async function acquireIndexWriterLease(options) {
57
51
  const mode = options.mode ?? "wait";
58
52
  const lockPath = getIndexWriterLockPath();
53
+ const startedAt = Date.now();
54
+ const maxWaitMs = options.maxWaitMs ?? DEFAULT_INDEX_WRITER_MAX_WAIT_MS;
59
55
  fs.mkdirSync(path.dirname(lockPath), { recursive: true });
60
56
  if (heldLocks.has(lockPath)) {
61
57
  return retainHeldLock(lockPath);
@@ -75,6 +71,12 @@ export async function acquireIndexWriterLease(options) {
75
71
  }
76
72
  if (mode === "try")
77
73
  return undefined;
74
+ // Held by another live process. Time out only *after* a real acquisition
75
+ // attempt, so a caller with maxWaitMs:0 still gets one chance at a free lock
76
+ // instead of throwing before it ever tries.
77
+ if (maxWaitMs >= 0 && Date.now() - startedAt >= maxWaitMs) {
78
+ throw new Error(`timed out waiting for index writer lease for ${options.purpose}`);
79
+ }
78
80
  await delay(INDEX_WRITER_WAIT_MS);
79
81
  }
80
82
  }
@@ -90,10 +92,6 @@ export async function withIndexWriterLease(options, run) {
90
92
  lease.release();
91
93
  }
92
94
  }
93
- export function handoffIndexWriterLeaseToPid(lease, pid, purpose) {
94
- fs.writeFileSync(lease.lockPath, buildPayload(purpose, pid), "utf8");
95
- detachHeldLock(lease.lockPath);
96
- }
97
95
  export function probeIndexWriterLease() {
98
96
  return probeLock(getIndexWriterLockPath(), { staleAfterMs: INDEX_WRITER_LOCK_STALE_AFTER_MS });
99
97
  }
@@ -0,0 +1,105 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Write-path indexing: targeted single-file index updates for asset writers.
6
+ *
7
+ * The index is maintained eagerly by every first-class mutation command
8
+ * (`source add`, `wiki`, `workflow`, `setup` all run `akmIndex()` after
9
+ * writing). The memory write paths — `akm remember` / `writeMarkdownAsset`
10
+ * and extract's session assets — historically did not, which is why reads
11
+ * used to compensate with stale-triggered background reindexes (the
12
+ * lock-contention footgun removed alongside this module's introduction; see
13
+ * docs/design/read-path-reindex-contention-findings.md §7).
14
+ *
15
+ * This is NOT a general reindex. It upserts exactly the files the caller just
16
+ * wrote: frontmatter/metadata via the shared matcher pipeline, the `entries`
17
+ * row, and an incremental FTS refresh. Embeddings, index-time LLM passes,
18
+ * graph extraction, `builtAt`, and the per-dir walk cache are all deliberately
19
+ * untouched — the next full run heals them (the opportunistic-recovery
20
+ * strategy of docs/technical/index-consistency-adr.md).
21
+ */
22
+ import fs from "node:fs";
23
+ import path from "node:path";
24
+ import { getDbPath } from "../core/paths.js";
25
+ import { warnVerbose } from "../core/warn.js";
26
+ import { closeDatabase, getEntryCount, openExistingDatabase, rebuildFts, upsertEntry } from "./db/db.js";
27
+ import { generateMetadataFlat } from "./passes/metadata.js";
28
+ import { buildSearchText } from "./search/search-fields.js";
29
+ /**
30
+ * Busy-timeout (ms) for write-path index upserts. A real write — unlike the
31
+ * 250ms telemetry inserts — but it must not hang `akm remember` for the full
32
+ * default 30s behind a running full reindex. When it times out, the upsert is
33
+ * skipped and the asset becomes searchable after that reindex instead.
34
+ */
35
+ export const WRITE_PATH_INDEX_BUSY_TIMEOUT_MS = 5_000;
36
+ /**
37
+ * Index the given just-written asset files into the existing local index.
38
+ *
39
+ * FAIL-OPEN at every step: any error (index.db absent, empty, locked past the
40
+ * busy timeout, unparseable file) is reduced to a verbose-only warning and the
41
+ * write command succeeds untouched. The degraded outcome is exactly the
42
+ * pre-write-path-indexing behavior: the asset appears after the next full
43
+ * `akm index` / improve-cron run.
44
+ *
45
+ * An absent or empty index is skipped on purpose — bootstrap belongs to the
46
+ * first read (`ensureIndex`) or an explicit `akm index`, which also cover
47
+ * embeddings and the other passes this fast path skips.
48
+ */
49
+ export async function indexWrittenAssets(stashDir, filePaths) {
50
+ try {
51
+ const dbPath = getDbPath();
52
+ if (!fs.existsSync(dbPath))
53
+ return;
54
+ // The full walk never descends into dot-directories (they hold state like
55
+ // `.meta/`, `.stash.json`), and `shouldIndexStashFile` relies on the walker
56
+ // for that — mirror it here so this fast path indexes exactly what a full
57
+ // run would.
58
+ const files = filePaths.filter((f) => {
59
+ if (!fs.existsSync(f))
60
+ return false;
61
+ const rel = path.relative(stashDir, f);
62
+ return !rel.split(/[\\/]+/).some((segment) => segment.startsWith("."));
63
+ });
64
+ if (files.length === 0)
65
+ return;
66
+ // Generate metadata BEFORE opening the DB so the write window stays
67
+ // short. One call per file keeps the entry↔path pairing exact.
68
+ const pairs = [];
69
+ for (const file of files) {
70
+ const generated = await generateMetadataFlat(stashDir, [file]);
71
+ const entry = generated.entries[0];
72
+ // Workflows carry a side-table document upsert this fast path doesn't
73
+ // do; no current caller writes them, but guard so one never lands
74
+ // half-indexed.
75
+ if (entry && entry.type !== "workflow")
76
+ pairs.push({ file, entry });
77
+ }
78
+ if (pairs.length === 0)
79
+ return;
80
+ const db = openExistingDatabase(dbPath);
81
+ try {
82
+ db.exec(`PRAGMA busy_timeout = ${WRITE_PATH_INDEX_BUSY_TIMEOUT_MS}`);
83
+ if (getEntryCount(db) === 0)
84
+ return;
85
+ for (const { file, entry } of pairs) {
86
+ const entryKey = `${stashDir}:${entry.type}:${entry.name}`;
87
+ let entryWithSize = entry;
88
+ try {
89
+ entryWithSize = { ...entry, fileSize: fs.statSync(file).size };
90
+ }
91
+ catch {
92
+ // stat raced a delete — index without the size, like the full walk does.
93
+ }
94
+ upsertEntry(db, entryKey, path.dirname(file), file, stashDir, entryWithSize, buildSearchText(entry));
95
+ }
96
+ rebuildFts(db, { incremental: true });
97
+ }
98
+ finally {
99
+ closeDatabase(db);
100
+ }
101
+ }
102
+ catch (error) {
103
+ warnVerbose("Write-path index update skipped (asset appears after the next full index):", error instanceof Error ? error.message : String(error));
104
+ }
105
+ }
@@ -118,7 +118,7 @@ async function runWalkPhase(ctx) {
118
118
  ctx.timing.tWalkEnd = Date.now();
119
119
  throwIfAborted(signal);
120
120
  // LLM enrichment for directories that need it
121
- await enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, true, reEnrich);
121
+ await enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, reEnrich);
122
122
  onProgress({
123
123
  phase: "llm",
124
124
  message: resolveIndexPassLLM("enrichment", config)
@@ -226,7 +226,19 @@ function runCleanPass(db, dryRun) {
226
226
  };
227
227
  }
228
228
  // ── Indexer ──────────────────────────────────────────────────────────────────
229
+ // ── Test seam ────────────────────────────────────────────────────────────────
230
+ // Swap-and-restore override. Inert in production; only tests call the setter.
231
+ let akmIndexOverride;
232
+ /** TEST-ONLY. Swap the implementation of `akmIndex`; pass undefined to restore. */
233
+ export function _setAkmIndexForTests(fake) {
234
+ akmIndexOverride = fake;
235
+ }
229
236
  export async function akmIndex(options) {
237
+ if (akmIndexOverride)
238
+ return akmIndexOverride(options);
239
+ return akmIndexReal(options);
240
+ }
241
+ async function akmIndexReal(options) {
230
242
  return withIndexWriterLease({ purpose: "akm-index", signal: options?.signal }, async () => {
231
243
  const stashDir = options?.stashDir || resolveStashDir();
232
244
  const onProgress = options?.onProgress ?? (() => { });
@@ -640,7 +652,7 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
640
652
  insertTransaction();
641
653
  return { scannedDirs, skippedDirs, generatedCount, warnings, dirsNeedingLlm };
642
654
  }
643
- async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, _enrich = false, reEnrich = false) {
655
+ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, reEnrich = false) {
644
656
  // Resolve per-pass LLM config via the unified shim. Returns undefined when
645
657
  // either no `akm.llm` is configured or the user opted this pass out via
646
658
  // `index.enrichment.llm = false`. (#208)
@@ -977,7 +989,7 @@ function resolveIndexedFiles(dirPath, files, stash) {
977
989
  for (const entry of stash.entries) {
978
990
  const entryPath = entry.filename
979
991
  ? path.join(dirPath, entry.filename)
980
- : matchEntryToFile(entry.name, fileBasenameMap, files);
992
+ : matchEntryToFile(entry.name, fileBasenameMap);
981
993
  if (entryPath)
982
994
  resolved.add(entryPath);
983
995
  }
@@ -1096,7 +1108,7 @@ export function buildFileBasenameMap(files) {
1096
1108
  * try matching the last segment
1097
1109
  * 3. No implicit file fallback: ambiguous legacy entries are skipped
1098
1110
  */
1099
- export function matchEntryToFile(entryName, fileMap, _files) {
1111
+ export function matchEntryToFile(entryName, fileMap) {
1100
1112
  // Exact match on entry name
1101
1113
  const exact = fileMap.get(entryName);
1102
1114
  if (exact)
@@ -191,6 +191,15 @@ export function validateStashEntry(entry) {
191
191
  const contradictedBy = normalizeNonEmptyStringList(e.contradictedBy);
192
192
  if (contradictedBy)
193
193
  result.contradictedBy = contradictedBy;
194
+ // R5 — consolidation provenance fields must survive the whitelist too, or
195
+ // stash.json-overridden merge products lose merge-following + generation
196
+ // counting in the collapse detector.
197
+ if (typeof e.generation === "number" && Number.isFinite(e.generation) && e.generation > 0) {
198
+ result.generation = Math.floor(e.generation);
199
+ }
200
+ const sourceRefs = normalizeNonEmptyStringList(e.sourceRefs);
201
+ if (sourceRefs)
202
+ result.sourceRefs = sourceRefs;
194
203
  const currentBeliefRefs = normalizeNonEmptyStringList(e.currentBeliefRefs);
195
204
  if (currentBeliefRefs)
196
205
  result.currentBeliefRefs = currentBeliefRefs;
@@ -342,6 +351,17 @@ export function applyCuratedFrontmatter(entry, fmData) {
342
351
  const contradictedBy = normalizeStringListOrUndefined(fmData.contradictedBy);
343
352
  if (contradictedBy)
344
353
  entry.contradictedBy = contradictedBy;
354
+ // R5 — consolidation provenance. `generation` (merge depth counter) and
355
+ // `source_refs` (merge/distill provenance pointers) are written by the
356
+ // improve pipeline; captured into the index so the collapse detector can
357
+ // count over-generation assets and follow merges without filesystem reads.
358
+ const generation = fmData.generation;
359
+ if (typeof generation === "number" && Number.isFinite(generation) && generation > 0) {
360
+ entry.generation = Math.floor(generation);
361
+ }
362
+ const sourceRefs = normalizeStringListOrUndefined(fmData.source_refs);
363
+ if (sourceRefs)
364
+ entry.sourceRefs = sourceRefs;
345
365
  const currentBeliefRefs = normalizeStringListOrUndefined(fmData.currentBeliefRefs);
346
366
  if (currentBeliefRefs)
347
367
  entry.currentBeliefRefs = currentBeliefRefs;
@@ -0,0 +1,23 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import { ensureIndex } from "./ensure-index.js";
5
+ import { resolveSourceEntries } from "./search/search-source.js";
6
+ /** Resolve the active read sources using the same resolution rules as search/show. */
7
+ export function resolveReadSources(overrideStashDir, existingConfig) {
8
+ const sources = resolveSourceEntries(overrideStashDir, existingConfig);
9
+ return { sources, primarySource: sources[0] };
10
+ }
11
+ /** Ensure the primary source index is readable for reads, when a primary exists. */
12
+ export async function ensurePrimaryIndexForRead(primarySource) {
13
+ if (!primarySource?.path)
14
+ return false;
15
+ return ensureIndex(primarySource.path);
16
+ }
17
+ /**
18
+ * Convenience helper for callers that only need to ensure a read index from a
19
+ * configured stash path and default config.
20
+ */
21
+ export async function ensurePrimaryIndexFromConfig(overrideStashDir, existingConfig) {
22
+ return ensurePrimaryIndexForRead(resolveReadSources(overrideStashDir, existingConfig).primarySource);
23
+ }
@@ -29,6 +29,29 @@ import { applyRankingRules, combineSearchScores, normalizeFtsScores } from "./ra
29
29
  import { enrichSearchHit } from "./search-hit-enrichers.js";
30
30
  import { buildEditHint, findSourceForPath, isEditable } from "./search-source.js";
31
31
  import { deriveSemanticProviderFingerprint, getEffectiveSemanticStatus, isSemanticRuntimeReady, readSemanticStatus, } from "./semantic-status.js";
32
+ /**
33
+ * Age past which search surfaces a "run akm index" hint. Reads serve the
34
+ * existing index as-is (freshness is the writers' job — `indexWrittenAssets`
35
+ * plus full runs), so on installs with no improve cron a hand-edited or
36
+ * git-pulled file stays invisible until someone reindexes. The hint makes that
37
+ * actionable without re-introducing read-triggered reindexing.
38
+ */
39
+ const STALE_INDEX_HINT_MS = 7 * 24 * 60 * 60 * 1000;
40
+ function buildStaleIndexHint(db) {
41
+ try {
42
+ const builtAt = getMeta(db, "builtAt");
43
+ if (!builtAt)
44
+ return undefined;
45
+ const ageMs = Date.now() - new Date(builtAt).getTime();
46
+ if (!Number.isFinite(ageMs) || ageMs < STALE_INDEX_HINT_MS)
47
+ return undefined;
48
+ const days = Math.floor(ageMs / (24 * 60 * 60 * 1000));
49
+ return `Search index was last built ${days} day(s) ago. Files added or edited outside akm since then are not searchable — run 'akm index' to refresh.`;
50
+ }
51
+ catch {
52
+ return undefined;
53
+ }
54
+ }
32
55
  export function buildLocalAction(type, ref, registry = defaultRendererRegistry) {
33
56
  return buildActionFromContributors({ type, ref }, defaultActionContributors(registry)) ?? `akm show ${ref}`;
34
57
  }
@@ -95,7 +118,9 @@ export async function searchLocal(input) {
95
118
  if (config.semanticSearchMode === "auto" && semanticStatus === "blocked") {
96
119
  warnings.push("Semantic search is currently blocked. Using keyword search until the semantic backend is healthy again.");
97
120
  }
98
- // Auto-index when stale so the DB is always current before querying.
121
+ // Bootstrap-only: builds the index inline when it cannot serve this stash.
122
+ // Content freshness is the writers' job (indexWrittenAssets + full runs);
123
+ // reads serve the existing index as-is.
99
124
  await ensureIndex(stashDir);
100
125
  const dbPath = getDbPath();
101
126
  if (!fs.existsSync(dbPath)) {
@@ -117,6 +142,9 @@ export async function searchLocal(input) {
117
142
  mode: "keyword",
118
143
  };
119
144
  }
145
+ const staleHint = buildStaleIndexHint(db);
146
+ if (staleHint)
147
+ warnings.push(staleHint);
120
148
  const { hits, embedMs, rankMs } = await searchDatabase(db, query, searchType, limit, stashDir, allSourceDirs, config, sources, rendererRegistry, filters, includeProposed, beliefFilter, restrictToSources, includeExcludedTypes, disableProjectContext, disableScopedUtility);
121
149
  return {
122
150
  hits,
@@ -17,6 +17,14 @@ const TYPE_BOOST = {
17
17
  const MAX_BOOST_SUM = 3.0;
18
18
  const UTILITY_WEIGHT = 0.5;
19
19
  const UTILITY_MAX_BOOST = 1.5;
20
+ /**
21
+ * R2 (docs/design/improve-self-learning-analysis.md) — weight of the improve
22
+ * loop's `asset_salience.rank_score` in user-facing ranking. Bounded well
23
+ * below the utility boost so the composed signal refines, never dominates,
24
+ * lexical/semantic relevance. rank_score ∈ [0,1] → boost ∈ [1, 1.2].
25
+ */
26
+ const SALIENCE_WEIGHT = 0.2;
27
+ const SALIENCE_MAX_BOOST = 1.2;
20
28
  /**
21
29
  * Phase 2A / Rec 5: default recency half-life (days) used when no
22
30
  * `utilityDecayConfig` is supplied to the ranking pipeline. Matches the
@@ -334,7 +342,31 @@ export const defaultRankingContributors = [
334
342
  pinnedFactRankingContributor,
335
343
  projectContextRankingContributor,
336
344
  ];
337
- export const defaultUtilityRankingContributors = [utilityRankingContributor];
345
+ /**
346
+ * R2 — compose the improve loop's salience core into user-facing ranking.
347
+ *
348
+ * `asset_salience.rank_score` (encoding + outcome + retrieval projection,
349
+ * maintained every improve run) previously drove only improve's INTERNAL
350
+ * maintenance selection — the "better assets surface more" loop ran solely
351
+ * through the utility EMA. This bounded multiplicative boost closes the outer
352
+ * loop: usage/outcome-reinforced assets rank higher in `search`/`curate`.
353
+ */
354
+ const salienceRankingContributor = {
355
+ name: "salience-ranking",
356
+ appliesTo(item, ctx) {
357
+ const rank = ctx.salienceRankScores?.get(item.id);
358
+ return rank !== undefined && rank > 0;
359
+ },
360
+ apply(item, ctx) {
361
+ const rank = ctx.salienceRankScores?.get(item.id) ?? 0;
362
+ const rawBoost = 1 + Math.min(1, Math.max(0, rank)) * SALIENCE_WEIGHT;
363
+ item.score *= Math.min(rawBoost, SALIENCE_MAX_BOOST);
364
+ },
365
+ };
366
+ export const defaultUtilityRankingContributors = [
367
+ utilityRankingContributor,
368
+ salienceRankingContributor,
369
+ ];
338
370
  export function applyScoreContributors(item, ctx, contributors = defaultRankingContributors) {
339
371
  let boostSum = 0;
340
372
  for (const contributor of contributors) {
@@ -1,8 +1,68 @@
1
1
  // This Source Code Form is subject to the terms of the Mozilla Public
2
2
  // License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import fs from "node:fs";
5
+ import { makeAssetRef } from "../../core/asset/asset-ref.js";
6
+ import { getStateDbPath } from "../../core/state-db.js";
7
+ import { openDatabase } from "../../storage/database.js";
4
8
  import { getUtilityScoresByIds } from "../db/db.js";
5
9
  import { applyScoreContributors, applyUtilityContributors } from "./ranking-contributors.js";
10
+ /**
11
+ * R2 — best-effort load of `asset_salience.rank_score` from state.db for the
12
+ * ranked items. Fail-open: any error (state.db locked by a concurrent improve
13
+ * run, missing table, unreadable path) returns an empty map, which makes the
14
+ * salience contributor a no-op — byte-identical to pre-R2 ranking.
15
+ *
16
+ * Deliberately NOT `openStateDatabase()`: that helper runs migrations and sets
17
+ * a 30 s busy timeout — too heavy for a search hot path. This opens read-only,
18
+ * never creates or migrates state.db (missing file / missing table = empty
19
+ * map), and caps lock waits at 250 ms so a concurrent improve run can only
20
+ * ever cost the search a quarter second, not a stall.
21
+ */
22
+ export function loadSalienceRankScores(items) {
23
+ const result = new Map();
24
+ if (items.length === 0)
25
+ return result;
26
+ try {
27
+ const dbPath = getStateDbPath();
28
+ if (!fs.existsSync(dbPath))
29
+ return result; // improve loop has never run here
30
+ const idByRef = new Map();
31
+ for (const item of items) {
32
+ idByRef.set(makeAssetRef(item.entry.type, item.entry.name), item.id);
33
+ }
34
+ const stateDb = openDatabase(dbPath, { readonly: true });
35
+ try {
36
+ try {
37
+ stateDb.exec("PRAGMA busy_timeout = 250");
38
+ }
39
+ catch {
40
+ // pragma failure on a readonly handle is fine — default timeout applies
41
+ }
42
+ const refs = [...idByRef.keys()];
43
+ const CHUNK = 500;
44
+ for (let i = 0; i < refs.length; i += CHUNK) {
45
+ const chunk = refs.slice(i, i + CHUNK);
46
+ const placeholders = chunk.map(() => "?").join(",");
47
+ const rows = stateDb
48
+ .prepare(`SELECT asset_ref, rank_score FROM asset_salience WHERE asset_ref IN (${placeholders})`)
49
+ .all(...chunk);
50
+ for (const row of rows) {
51
+ const id = idByRef.get(row.asset_ref);
52
+ if (id !== undefined)
53
+ result.set(id, row.rank_score);
54
+ }
55
+ }
56
+ }
57
+ finally {
58
+ stateDb.close();
59
+ }
60
+ }
61
+ catch {
62
+ // Fail open — search must never break because state.db is unavailable.
63
+ }
64
+ return result;
65
+ }
6
66
  export function normalizeFtsScores(results) {
7
67
  const ftsScoreMap = new Map();
8
68
  if (results.length === 0)
@@ -71,12 +131,18 @@ export function applyRankingRules(options) {
71
131
  applyScoreContributors(item, rankingContext);
72
132
  }
73
133
  const { global: utilScoresMap, scoped: scopedUtilScoresMap } = getUtilityScoresByIds(options.db, options.items.map((item) => item.id), options.scopeKey);
134
+ // R2 — compose the improve loop's salience into user-facing ranking.
135
+ // undefined = load from state.db (default); null = explicitly disabled.
136
+ const salienceRankScores = options.salienceRankScores === null
137
+ ? new Map()
138
+ : (options.salienceRankScores ?? loadSalienceRankScores(options.items));
74
139
  const utilityContext = {
75
140
  ...rankingContext,
76
141
  utilityScores: utilScoresMap,
77
142
  scopedUtilityScores: scopedUtilScoresMap,
78
143
  utilityDecayConfig: options.utilityDecayConfig,
79
144
  positiveFeedbackCounts: options.positiveFeedbackCounts,
145
+ salienceRankScores,
80
146
  };
81
147
  for (const item of options.items) {
82
148
  applyUtilityContributors(item, utilityContext);
@@ -11,6 +11,12 @@
11
11
  * - hints: searchHints + examples + usage + intent fields
12
12
  * - content: TOC headings (lowest-weight catch-all)
13
13
  */
14
+ // NOTE (R5): the collapse detector's frozen canary queries are built from the
15
+ // same surface this function indexes (name tokens / tags / description) and
16
+ // scored via FTS against it. Changing what buildSearchFields includes shifts
17
+ // the detector's recall baseline for ALL existing canary sets — coordinate
18
+ // with src/commands/improve/collapse-detector.ts (buildCanaryQuery) and expect
19
+ // operators to re-mint via `akm improve canary --refresh` after such a change.
14
20
  export function buildSearchFields(entry) {
15
21
  const name = entry.name.replace(/[-_]/g, " ").toLowerCase();
16
22
  const description = (entry.description ?? "").toLowerCase();