akm-cli 0.9.0-beta.52 → 0.9.0-beta.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/assets/hints/cli-hints-full.md +6 -5
  2. package/dist/cli.js +0 -7
  3. package/dist/commands/env/env-cli.js +3 -2
  4. package/dist/commands/env/env.js +14 -67
  5. package/dist/commands/health/checks.js +28 -15
  6. package/dist/commands/health.js +68 -1
  7. package/dist/commands/improve/collapse-detector.js +419 -0
  8. package/dist/commands/improve/consolidate.js +72 -54
  9. package/dist/commands/improve/distill.js +79 -13
  10. package/dist/commands/improve/extract.js +13 -6
  11. package/dist/commands/improve/homeostatic.js +109 -79
  12. package/dist/commands/improve/improve-cli.js +67 -1
  13. package/dist/commands/improve/improve.js +10 -0
  14. package/dist/commands/improve/loop-stages.js +39 -1
  15. package/dist/commands/improve/outcome-loop.js +15 -3
  16. package/dist/commands/improve/preparation.js +17 -8
  17. package/dist/commands/improve/salience.js +49 -32
  18. package/dist/commands/read/curate.js +5 -9
  19. package/dist/commands/read/knowledge.js +4 -0
  20. package/dist/commands/read/search.js +5 -2
  21. package/dist/commands/read/show.js +3 -3
  22. package/dist/core/asset/asset-spec.js +3 -2
  23. package/dist/core/config/config-schema.js +39 -17
  24. package/dist/core/eval/rank-metrics.js +113 -0
  25. package/dist/core/state/migrations.js +56 -0
  26. package/dist/core/state-db.js +146 -19
  27. package/dist/indexer/ensure-index.js +33 -90
  28. package/dist/indexer/index-writer-lock.js +0 -11
  29. package/dist/indexer/index-written-assets.js +105 -0
  30. package/dist/indexer/passes/metadata.js +20 -0
  31. package/dist/indexer/search/db-search.js +29 -1
  32. package/dist/indexer/search/ranking-contributors.js +33 -1
  33. package/dist/indexer/search/ranking.js +66 -0
  34. package/dist/indexer/search/search-fields.js +6 -0
  35. package/dist/llm/feature-gate.js +6 -2
  36. package/dist/output/renderers.js +8 -13
  37. package/dist/output/shapes/helpers.js +0 -3
  38. package/dist/output/shapes/passthrough.js +1 -0
  39. package/dist/scripts/migrate-storage.js +152 -33
  40. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +41 -18
  41. package/dist/storage/repositories/index-db.js +10 -1
  42. package/package.json +2 -4
@@ -472,19 +472,20 @@ export function insertProposalIfAbsent(db, proposal, stashDir) {
472
472
  /**
473
473
  * Errors `BEGIN IMMEDIATE` can throw under concurrent-writer contention that are
474
474
  * transient (the statement did NOT start a usable transaction) and safe to
475
- * retry after clearing any phantom transaction state:
475
+ * retry:
476
476
  * - "database is locked" / SQLITE_BUSY — another writer holds the lock.
477
- * - "cannot start a transaction within a transaction" — bun:sqlite can leave
478
- * the connection reporting an open transaction after a contended busy-wait
479
- * on BEGIN IMMEDIATE (observed only under heavy parallel load, e.g. the
480
- * proposal-queue worker race). A ROLLBACK clears that phantom state.
481
477
  * These are start-of-transaction failures only; an error thrown by `fn` is a
482
478
  * real failure and is NEVER retried.
479
+ *
480
+ * "cannot start a transaction within a transaction" is deliberately NOT
481
+ * retryable: it means a transaction is already open on this connection (a
482
+ * re-entrant call — handled by the entry guard in withImmediateTransaction),
483
+ * and "retrying" it with a ROLLBACK would destroy the caller's transaction
484
+ * (issue #686).
483
485
  */
484
486
  function isRetryableBeginError(err) {
485
487
  const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
486
- return (msg.includes("within a transaction") ||
487
- msg.includes("database is locked") ||
488
+ return (msg.includes("database is locked") ||
488
489
  msg.includes("database table is locked") ||
489
490
  // Phantom BEGIN (see below) — synthesized when BEGIN IMMEDIATE returns
490
491
  // without opening a transaction. Safe to retry: fn() has not run.
@@ -498,6 +499,16 @@ function sleepSyncMs(ms) {
498
499
  Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
499
500
  }
500
501
  export function withImmediateTransaction(db, fn) {
502
+ // Re-entrancy guard (issue #686): if a transaction is already open on this
503
+ // connection (e.g. a nested withImmediateTransaction call inside an outer
504
+ // frame's fn), join it — run fn directly with no BEGIN/COMMIT/ROLLBACK of
505
+ // our own. Without this, the nested BEGIN throws "cannot start a transaction
506
+ // within a transaction", which the old retry path answered with an
507
+ // unconditional ROLLBACK — destroying the OUTER transaction and leaving its
508
+ // COMMIT to fail with "cannot commit - no transaction is active".
509
+ if (db.inTransaction) {
510
+ return fn();
511
+ }
501
512
  let lastBeginErr;
502
513
  for (let attempt = 1; attempt <= WITH_IMMEDIATE_TX_MAX_ATTEMPTS; attempt++) {
503
514
  try {
@@ -515,13 +526,15 @@ export function withImmediateTransaction(db, fn) {
515
526
  catch (err) {
516
527
  lastBeginErr = err;
517
528
  if (isRetryableBeginError(err) && attempt < WITH_IMMEDIATE_TX_MAX_ATTEMPTS) {
518
- // Clear any phantom/stale transaction left by the contended BEGIN, then
519
- // retry with a small backoff so concurrent writers serialize cleanly.
520
- try {
521
- db.exec("ROLLBACK");
522
- }
523
- catch {
524
- // No active transaction to roll back — fine.
529
+ // Only roll back a transaction we can see never blind-ROLLBACK, since
530
+ // that could destroy a transaction this frame does not own.
531
+ if (db.inTransaction) {
532
+ try {
533
+ db.exec("ROLLBACK");
534
+ }
535
+ catch {
536
+ // Transaction already gone — fine.
537
+ }
525
538
  }
526
539
  sleepSyncMs(2 ** (attempt - 1));
527
540
  continue;
@@ -530,15 +543,25 @@ export function withImmediateTransaction(db, fn) {
530
543
  }
531
544
  try {
532
545
  const result = fn();
546
+ if (!db.inTransaction) {
547
+ // The transaction we opened vanished while fn() ran (e.g. an
548
+ // auto-rollback or a stray ROLLBACK inside fn). fn's writes may have
549
+ // escaped serialization, so retrying is unsafe — fail loudly instead of
550
+ // letting COMMIT throw the opaque "cannot commit - no transaction is
551
+ // active" SQLiteError.
552
+ throw new Error("withImmediateTransaction invariant violated: transaction opened by BEGIN IMMEDIATE was no longer active after the transaction body ran; refusing to COMMIT (writes may have escaped serialization)");
553
+ }
533
554
  db.exec("COMMIT");
534
555
  return result;
535
556
  }
536
557
  catch (err) {
537
- try {
538
- db.exec("ROLLBACK");
539
- }
540
- catch {
541
- // Ignore rollback failures so the original error is preserved.
558
+ if (db.inTransaction) {
559
+ try {
560
+ db.exec("ROLLBACK");
561
+ }
562
+ catch {
563
+ // Ignore rollback failures so the original error is preserved.
564
+ }
542
565
  }
543
566
  throw err; // a real error inside the transaction body — never retried.
544
567
  }
@@ -1252,3 +1275,107 @@ export function upsertBodyEmbeddings(db, entries) {
1252
1275
  }
1253
1276
  })();
1254
1277
  }
1278
+ /** Insert a freshly minted canary set (all rows active, one shared set id). */
1279
+ export function insertCanaries(db, canarySetId, canaries, now) {
1280
+ if (canaries.length === 0)
1281
+ return;
1282
+ const ts = now ?? new Date().toISOString();
1283
+ const stmt = db.prepare(`
1284
+ INSERT INTO canary_queries (canary_set_id, anchor_ref, query, source, active, created_at)
1285
+ VALUES (?, ?, ?, ?, 1, ?)
1286
+ `);
1287
+ db.transaction(() => {
1288
+ for (const c of canaries) {
1289
+ stmt.run(canarySetId, c.anchorRef, c.query, c.source ?? "auto", ts);
1290
+ }
1291
+ })();
1292
+ }
1293
+ /** Load the active canary set (empty array = never minted). */
1294
+ export function getActiveCanaries(db) {
1295
+ // Scope to the NEWEST active set: if an interrupted refresh (or a bug) ever
1296
+ // leaves two sets active, mixing their rows would silently corrupt the
1297
+ // recall/entropy trend baselines. The newest set wins; stale-active rows are
1298
+ // simply never returned.
1299
+ return db
1300
+ .prepare(`SELECT * FROM canary_queries
1301
+ WHERE active = 1 AND canary_set_id = (
1302
+ SELECT canary_set_id FROM canary_queries WHERE active = 1
1303
+ ORDER BY created_at DESC, id DESC LIMIT 1
1304
+ )
1305
+ ORDER BY id`)
1306
+ .all();
1307
+ }
1308
+ /** Load one canary set's rows by its exact set id (any active state), insertion order. */
1309
+ export function getCanariesBySetId(db, canarySetId) {
1310
+ return db
1311
+ .prepare(`SELECT * FROM canary_queries WHERE canary_set_id = ? ORDER BY id`)
1312
+ .all(canarySetId);
1313
+ }
1314
+ /** List every distinct canary_set_id that still has active rows. */
1315
+ export function listActiveCanarySetIds(db) {
1316
+ const rows = db.prepare(`SELECT DISTINCT canary_set_id FROM canary_queries WHERE active = 1`).all();
1317
+ return rows.map((r) => r.canary_set_id);
1318
+ }
1319
+ /**
1320
+ * Deactivate every canary row in a set. Rows are RETAINED (active = 0) so
1321
+ * historical improve_cycle_metrics rows keyed on the old canary_set_id stay
1322
+ * interpretable; only `akm improve canary --refresh` calls this.
1323
+ */
1324
+ export function deactivateCanarySet(db, canarySetId) {
1325
+ const result = db
1326
+ .prepare(`UPDATE canary_queries SET active = 0 WHERE canary_set_id = ? AND active = 1`)
1327
+ .run(canarySetId);
1328
+ const changes = result.changes ?? 0;
1329
+ return typeof changes === "bigint" ? Number(changes) : changes;
1330
+ }
1331
+ /** Persist one qualifying cycle's store-health snapshot. */
1332
+ export function insertCycleMetrics(db, row) {
1333
+ db.prepare(`
1334
+ INSERT INTO improve_cycle_metrics
1335
+ (run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
1336
+ canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
1337
+ mean_bigram_diversity, over_generation_count, accepted_actions,
1338
+ merge_floor_violations, alerts_json)
1339
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1340
+ `).run(row.run_id, row.ts, row.pass, row.canary_set_id, row.mean_recall, row.mean_ndcg, row.mean_mrr, row.canary_ranks_json, row.store_total, row.store_by_type_json, row.distinct_content_ratio, row.mean_bigram_diversity, row.over_generation_count, row.accepted_actions, row.merge_floor_violations, row.alerts_json);
1341
+ }
1342
+ /**
1343
+ * Load the most recent cycle rows for one canary set, OLDEST-first (the alert
1344
+ * evaluator's window order). Scoped by canary_set_id so trends never compare
1345
+ * across canary re-mints.
1346
+ */
1347
+ export function queryRecentCycleMetrics(db, canarySetId, limit) {
1348
+ const rows = db
1349
+ .prepare(`SELECT run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
1350
+ canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
1351
+ mean_bigram_diversity, over_generation_count, accepted_actions,
1352
+ merge_floor_violations, alerts_json
1353
+ FROM improve_cycle_metrics WHERE canary_set_id = ?
1354
+ ORDER BY ts DESC, id DESC LIMIT ?`)
1355
+ .all(canarySetId, Math.max(0, limit));
1356
+ return rows.reverse();
1357
+ }
1358
+ /** Load the single most recent cycle row across all canary sets (health surface). */
1359
+ export function getLatestCycleMetrics(db) {
1360
+ const row = db
1361
+ .prepare(`SELECT run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
1362
+ canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
1363
+ mean_bigram_diversity, over_generation_count, accepted_actions,
1364
+ merge_floor_violations, alerts_json
1365
+ FROM improve_cycle_metrics ORDER BY ts DESC, id DESC LIMIT 1`)
1366
+ .get();
1367
+ return row == null ? undefined : row;
1368
+ }
1369
+ /**
1370
+ * Delete cycle rows older than `retentionDays` (default 365 — owner-approved;
1371
+ * a slow collapse needs a longer trend window than the 90-day events log).
1372
+ * Returns the purged row count. canary_queries rows are never purged.
1373
+ */
1374
+ export function purgeOldCycleMetrics(db, retentionDays = 365) {
1375
+ if (!Number.isFinite(retentionDays) || retentionDays <= 0)
1376
+ return 0;
1377
+ const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
1378
+ const result = db.prepare("DELETE FROM improve_cycle_metrics WHERE ts < ?").run(cutoff);
1379
+ const changes = result.changes ?? 0;
1380
+ return typeof changes === "bigint" ? Number(changes) : changes;
1381
+ }
@@ -2,23 +2,29 @@
2
2
  // License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
4
  /**
5
- * Auto-index: silently run an incremental `akm index` when the local index
6
- * is stale or absent, so that `search`, `show`, and `feedback` always operate
7
- * against current on-disk state without requiring the user to manually run
8
- * `akm index` first.
5
+ * Auto-index bootstrap: silently build the local index inline when it cannot
6
+ * serve the caller's stash at all (missing DB, no `entries` table, zero rows,
7
+ * or built for a different stash), so `search`, `show`, and `feedback` work
8
+ * on first use without a manual `akm index`.
9
9
  *
10
- * This replaces the old filesystem fallbacks that were scattered across
11
- * `searchLocal()` and `show.ts`, centralizing the "indexed yet?" gap handling
12
- * behind a single entry point.
10
+ * Content FRESHNESS is intentionally not this module's job on the read path.
11
+ * Writers maintain the index (`indexWrittenAssets` for `remember`/extract
12
+ * session assets; the mutation commands run `akmIndex()` themselves), and the
13
+ * improve cron / explicit `akm index` do full refreshes. Reads serve whatever
14
+ * populated index exists. The previous design — a staleness walk plus a
15
+ * detached background reindex per read — made every read on an actively
16
+ * written stash spawn a writer that the read's own telemetry then queued
17
+ * behind (see docs/design/read-path-reindex-contention-findings.md).
18
+ *
19
+ * `mode: "blocking"` (improve) still checks staleness and rebuilds inline,
20
+ * because its planning logic needs a current `entries` table in-process.
13
21
  */
14
- import { spawn } from "node:child_process";
15
22
  import fs from "node:fs";
16
23
  import path from "node:path";
17
24
  import { ASSET_SPECS, TYPE_DIRS } from "../core/asset/asset-spec.js";
18
- import { getDataDir, getDbPath } from "../core/paths.js";
25
+ import { getDbPath } from "../core/paths.js";
19
26
  import { warn } from "../core/warn.js";
20
27
  import { closeDatabase, getEntryCount, getIndexedFilePaths, getMeta, openExistingDatabase } from "./db/db.js";
21
- import { acquireIndexWriterLease, handoffIndexWriterLeaseToPid } from "./index-writer-lock.js";
22
28
  function getIndexableFiles(root, spec) {
23
29
  if (!fs.existsSync(root))
24
30
  return [];
@@ -138,12 +144,9 @@ export function isIndexStale(stashDir) {
138
144
  * i.e. the DB file exists, the `entries` table holds rows, and those rows were
139
145
  * built for this stash (it is the stored primary stash or appears in the
140
146
  * stored `stashDirs` set). When this is true the index is at worst
141
- * content-stale, so the `#607` background-reindex optimization is safe: the
142
- * caller gets slightly-stale-but-relevant results immediately. When it is
143
- * false the existing index has nothing relevant to return (no DB, no `entries`
144
- * table, zero rows, or built for a different stash), so a background reindex
145
- * would leave the caller empty until the next read — those cases must rebuild
146
- * inline.
147
+ * content-stale, so read paths serve it as-is. When it is false the existing
148
+ * index has nothing relevant to return (no DB, no `entries` table, zero rows,
149
+ * or built for a different stash), so those cases must rebuild inline.
147
150
  */
148
151
  function indexCanServeStash(stashDir) {
149
152
  const dbPath = getDbPath();
@@ -174,43 +177,6 @@ function indexCanServeStash(stashDir) {
174
177
  closeDatabase(db);
175
178
  }
176
179
  }
177
- /**
178
- * Spawn a background `akm index` process. Non-blocking — returns immediately.
179
- * Background callers share the same global index-writer lease as foreground
180
- * writers, so stale-read-triggered auto-index attempts coalesce safely.
181
- */
182
- async function spawnBackgroundReindex(_stashDir) {
183
- const dataDir = getDataDir();
184
- const logFile = path.join(dataDir, "logs", "index-background.log");
185
- fs.mkdirSync(path.dirname(logFile), { recursive: true });
186
- const lease = await acquireIndexWriterLease({ mode: "try", purpose: "background-reindex-spawn" });
187
- if (!lease)
188
- return;
189
- const akmBin = process.argv[0];
190
- const akmScript = process.argv[1];
191
- try {
192
- const child = spawn(akmBin, [akmScript, "index", "--background"], {
193
- detached: true,
194
- stdio: ["ignore", fs.openSync(logFile, "a"), fs.openSync(logFile, "a")],
195
- env: { ...process.env },
196
- });
197
- if (!child.pid) {
198
- lease.release();
199
- return;
200
- }
201
- handoffIndexWriterLeaseToPid(lease, child.pid, "background-reindex");
202
- try {
203
- child.unref();
204
- }
205
- catch {
206
- // ignore
207
- }
208
- }
209
- catch (error) {
210
- lease.release();
211
- throw error;
212
- }
213
- }
214
180
  async function runInlineReindex(stashDir) {
215
181
  try {
216
182
  const { akmIndex } = await import("./indexer.js");
@@ -223,49 +189,26 @@ async function runInlineReindex(stashDir) {
223
189
  }
224
190
  }
225
191
  /**
226
- * Ensure the local index exists and is fresh enough for the caller's needs.
192
+ * Ensure the local index exists and can serve the caller.
227
193
  *
228
- * Default mode is `background`, which preserves the low-latency behavior used
229
- * by read paths (`search`, `show`, `feedback`): when a populated index is
230
- * merely stale, spawn a detached reindex and proceed against the existing
231
- * index. When the index is entirely absent (no DB / no `entries` table / zero
232
- * rows) the rebuild runs inline regardless of mode, since there is nothing to
233
- * proceed against.
194
+ * Default mode is `background` the read-path contract (`search`, `show`,
195
+ * `feedback`): a populated index built for this stash is served as-is (its
196
+ * freshness is the writers' job, see module doc); an unusable index rebuilds
197
+ * inline, since there is nothing to proceed against.
234
198
  *
235
- * `mode: "blocking"` waits for the rebuild to finish before returning. Use
236
- * this for callers like `improve` whose planning logic depends on a populated
237
- * `entries` table in the same process.
199
+ * `mode: "blocking"` additionally treats content-staleness as a rebuild
200
+ * trigger and waits for it. Use this for callers like `improve` whose
201
+ * planning logic depends on a current `entries` table in the same process.
238
202
  *
239
203
  * Returns `true` if an index run was attempted.
240
204
  */
241
205
  export async function ensureIndex(stashDir, options = {}) {
242
- if (!isIndexStale(stashDir))
243
- return false;
244
- // Blocking when explicitly requested, or whenever the existing index cannot
245
- // serve this stash (absent DB, no `entries` table, zero rows, or built for a
246
- // different stash): a background reindex returns immediately and would leave
247
- // a first-time caller (search, curate, wiki, show, feedback) with empty
248
- // results. Building inline is a one-off cost; a populated index for this
249
- // stash that is merely content-stale still refreshes in the background.
250
- if (options.mode === "blocking" || !indexCanServeStash(stashDir)) {
251
- return runInlineReindex(stashDir);
252
- }
253
- // The background path re-invokes the akm CLI as a detached child via
254
- // `process.argv[1]`. That is only the akm entrypoint when THIS process is the
255
- // akm CLI itself — which the CLI startup block signals with AKM_CLI_ENTRY=1.
256
- // In any other host (the in-process test runner, a library embedding akm),
257
- // argv[1] points at the host (e.g. the test runner), so spawning it would
258
- // launch the wrong program and orphan it. Build inline there instead — same
259
- // resulting index, no detached process.
260
- if (process.env.AKM_CLI_ENTRY !== "1") {
206
+ if (options.mode === "blocking") {
207
+ if (!isIndexStale(stashDir))
208
+ return false;
261
209
  return runInlineReindex(stashDir);
262
210
  }
263
- try {
264
- await spawnBackgroundReindex(stashDir);
265
- return true;
266
- }
267
- catch (error) {
268
- warn("Background reindex spawn failed, proceeding with existing index:", error instanceof Error ? error.message : String(error));
269
- return true;
270
- }
211
+ if (indexCanServeStash(stashDir))
212
+ return false;
213
+ return runInlineReindex(stashDir);
271
214
  }
@@ -46,13 +46,6 @@ function retainHeldLock(lockPath) {
46
46
  heldLocks.set(lockPath, { depth: 1, exitHandler });
47
47
  return { lockPath, release: () => releaseHeldLock(lockPath) };
48
48
  }
49
- function detachHeldLock(lockPath) {
50
- const held = heldLocks.get(lockPath);
51
- if (!held)
52
- return;
53
- heldLocks.delete(lockPath);
54
- process.off("exit", held.exitHandler);
55
- }
56
49
  export async function acquireIndexWriterLease(options) {
57
50
  const mode = options.mode ?? "wait";
58
51
  const lockPath = getIndexWriterLockPath();
@@ -90,10 +83,6 @@ export async function withIndexWriterLease(options, run) {
90
83
  lease.release();
91
84
  }
92
85
  }
93
- export function handoffIndexWriterLeaseToPid(lease, pid, purpose) {
94
- fs.writeFileSync(lease.lockPath, buildPayload(purpose, pid), "utf8");
95
- detachHeldLock(lease.lockPath);
96
- }
97
86
  export function probeIndexWriterLease() {
98
87
  return probeLock(getIndexWriterLockPath(), { staleAfterMs: INDEX_WRITER_LOCK_STALE_AFTER_MS });
99
88
  }
@@ -0,0 +1,105 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Write-path indexing: targeted single-file index updates for asset writers.
6
+ *
7
+ * The index is maintained eagerly by every first-class mutation command
8
+ * (`source add`, `wiki`, `workflow`, `setup` all run `akmIndex()` after
9
+ * writing). The memory write paths — `akm remember` / `writeMarkdownAsset`
10
+ * and extract's session assets — historically did not, which is why reads
11
+ * used to compensate with stale-triggered background reindexes (the
12
+ * lock-contention footgun removed alongside this module's introduction; see
13
+ * docs/design/read-path-reindex-contention-findings.md §7).
14
+ *
15
+ * This is NOT a general reindex. It upserts exactly the files the caller just
16
+ * wrote: frontmatter/metadata via the shared matcher pipeline, the `entries`
17
+ * row, and an incremental FTS refresh. Embeddings, index-time LLM passes,
18
+ * graph extraction, `builtAt`, and the per-dir walk cache are all deliberately
19
+ * untouched — the next full run heals them (the opportunistic-recovery
20
+ * strategy of docs/technical/index-consistency-adr.md).
21
+ */
22
+ import fs from "node:fs";
23
+ import path from "node:path";
24
+ import { getDbPath } from "../core/paths.js";
25
+ import { warnVerbose } from "../core/warn.js";
26
+ import { closeDatabase, getEntryCount, openExistingDatabase, rebuildFts, upsertEntry } from "./db/db.js";
27
+ import { generateMetadataFlat } from "./passes/metadata.js";
28
+ import { buildSearchText } from "./search/search-fields.js";
29
+ /**
30
+ * Busy-timeout (ms) for write-path index upserts. A real write — unlike the
31
+ * 250ms telemetry inserts — but it must not hang `akm remember` for the full
32
+ * default 30s behind a running full reindex. When it times out, the upsert is
33
+ * skipped and the asset becomes searchable after that reindex instead.
34
+ */
35
+ export const WRITE_PATH_INDEX_BUSY_TIMEOUT_MS = 5_000;
36
+ /**
37
+ * Index the given just-written asset files into the existing local index.
38
+ *
39
+ * FAIL-OPEN at every step: any error (index.db absent, empty, locked past the
40
+ * busy timeout, unparseable file) is reduced to a verbose-only warning and the
41
+ * write command succeeds untouched. The degraded outcome is exactly the
42
+ * pre-write-path-indexing behavior: the asset appears after the next full
43
+ * `akm index` / improve-cron run.
44
+ *
45
+ * An absent or empty index is skipped on purpose — bootstrap belongs to the
46
+ * first read (`ensureIndex`) or an explicit `akm index`, which also cover
47
+ * embeddings and the other passes this fast path skips.
48
+ */
49
+ export async function indexWrittenAssets(stashDir, filePaths) {
50
+ try {
51
+ const dbPath = getDbPath();
52
+ if (!fs.existsSync(dbPath))
53
+ return;
54
+ // The full walk never descends into dot-directories (they hold state like
55
+ // `.meta/`, `.stash.json`), and `shouldIndexStashFile` relies on the walker
56
+ // for that — mirror it here so this fast path indexes exactly what a full
57
+ // run would.
58
+ const files = filePaths.filter((f) => {
59
+ if (!fs.existsSync(f))
60
+ return false;
61
+ const rel = path.relative(stashDir, f);
62
+ return !rel.split(/[\\/]+/).some((segment) => segment.startsWith("."));
63
+ });
64
+ if (files.length === 0)
65
+ return;
66
+ // Generate metadata BEFORE opening the DB so the write window stays
67
+ // short. One call per file keeps the entry↔path pairing exact.
68
+ const pairs = [];
69
+ for (const file of files) {
70
+ const generated = await generateMetadataFlat(stashDir, [file]);
71
+ const entry = generated.entries[0];
72
+ // Workflows carry a side-table document upsert this fast path doesn't
73
+ // do; no current caller writes them, but guard so one never lands
74
+ // half-indexed.
75
+ if (entry && entry.type !== "workflow")
76
+ pairs.push({ file, entry });
77
+ }
78
+ if (pairs.length === 0)
79
+ return;
80
+ const db = openExistingDatabase(dbPath);
81
+ try {
82
+ db.exec(`PRAGMA busy_timeout = ${WRITE_PATH_INDEX_BUSY_TIMEOUT_MS}`);
83
+ if (getEntryCount(db) === 0)
84
+ return;
85
+ for (const { file, entry } of pairs) {
86
+ const entryKey = `${stashDir}:${entry.type}:${entry.name}`;
87
+ let entryWithSize = entry;
88
+ try {
89
+ entryWithSize = { ...entry, fileSize: fs.statSync(file).size };
90
+ }
91
+ catch {
92
+ // stat raced a delete — index without the size, like the full walk does.
93
+ }
94
+ upsertEntry(db, entryKey, path.dirname(file), file, stashDir, entryWithSize, buildSearchText(entry));
95
+ }
96
+ rebuildFts(db, { incremental: true });
97
+ }
98
+ finally {
99
+ closeDatabase(db);
100
+ }
101
+ }
102
+ catch (error) {
103
+ warnVerbose("Write-path index update skipped (asset appears after the next full index):", error instanceof Error ? error.message : String(error));
104
+ }
105
+ }
@@ -191,6 +191,15 @@ export function validateStashEntry(entry) {
191
191
  const contradictedBy = normalizeNonEmptyStringList(e.contradictedBy);
192
192
  if (contradictedBy)
193
193
  result.contradictedBy = contradictedBy;
194
+ // R5 — consolidation provenance fields must survive the whitelist too, or
195
+ // stash.json-overridden merge products lose merge-following + generation
196
+ // counting in the collapse detector.
197
+ if (typeof e.generation === "number" && Number.isFinite(e.generation) && e.generation > 0) {
198
+ result.generation = Math.floor(e.generation);
199
+ }
200
+ const sourceRefs = normalizeNonEmptyStringList(e.sourceRefs);
201
+ if (sourceRefs)
202
+ result.sourceRefs = sourceRefs;
194
203
  const currentBeliefRefs = normalizeNonEmptyStringList(e.currentBeliefRefs);
195
204
  if (currentBeliefRefs)
196
205
  result.currentBeliefRefs = currentBeliefRefs;
@@ -342,6 +351,17 @@ export function applyCuratedFrontmatter(entry, fmData) {
342
351
  const contradictedBy = normalizeStringListOrUndefined(fmData.contradictedBy);
343
352
  if (contradictedBy)
344
353
  entry.contradictedBy = contradictedBy;
354
+ // R5 — consolidation provenance. `generation` (merge depth counter) and
355
+ // `source_refs` (merge/distill provenance pointers) are written by the
356
+ // improve pipeline; captured into the index so the collapse detector can
357
+ // count over-generation assets and follow merges without filesystem reads.
358
+ const generation = fmData.generation;
359
+ if (typeof generation === "number" && Number.isFinite(generation) && generation > 0) {
360
+ entry.generation = Math.floor(generation);
361
+ }
362
+ const sourceRefs = normalizeStringListOrUndefined(fmData.source_refs);
363
+ if (sourceRefs)
364
+ entry.sourceRefs = sourceRefs;
345
365
  const currentBeliefRefs = normalizeStringListOrUndefined(fmData.currentBeliefRefs);
346
366
  if (currentBeliefRefs)
347
367
  entry.currentBeliefRefs = currentBeliefRefs;
@@ -29,6 +29,29 @@ import { applyRankingRules, combineSearchScores, normalizeFtsScores } from "./ra
29
29
  import { enrichSearchHit } from "./search-hit-enrichers.js";
30
30
  import { buildEditHint, findSourceForPath, isEditable } from "./search-source.js";
31
31
  import { deriveSemanticProviderFingerprint, getEffectiveSemanticStatus, isSemanticRuntimeReady, readSemanticStatus, } from "./semantic-status.js";
32
+ /**
33
+ * Age past which search surfaces a "run akm index" hint. Reads serve the
34
+ * existing index as-is (freshness is the writers' job — `indexWrittenAssets`
35
+ * plus full runs), so on installs with no improve cron a hand-edited or
36
+ * git-pulled file stays invisible until someone reindexes. The hint makes that
37
+ * actionable without re-introducing read-triggered reindexing.
38
+ */
39
+ const STALE_INDEX_HINT_MS = 7 * 24 * 60 * 60 * 1000;
40
+ function buildStaleIndexHint(db) {
41
+ try {
42
+ const builtAt = getMeta(db, "builtAt");
43
+ if (!builtAt)
44
+ return undefined;
45
+ const ageMs = Date.now() - new Date(builtAt).getTime();
46
+ if (!Number.isFinite(ageMs) || ageMs < STALE_INDEX_HINT_MS)
47
+ return undefined;
48
+ const days = Math.floor(ageMs / (24 * 60 * 60 * 1000));
49
+ return `Search index was last built ${days} day(s) ago. Files added or edited outside akm since then are not searchable — run 'akm index' to refresh.`;
50
+ }
51
+ catch {
52
+ return undefined;
53
+ }
54
+ }
32
55
  export function buildLocalAction(type, ref, registry = defaultRendererRegistry) {
33
56
  return buildActionFromContributors({ type, ref }, defaultActionContributors(registry)) ?? `akm show ${ref}`;
34
57
  }
@@ -95,7 +118,9 @@ export async function searchLocal(input) {
95
118
  if (config.semanticSearchMode === "auto" && semanticStatus === "blocked") {
96
119
  warnings.push("Semantic search is currently blocked. Using keyword search until the semantic backend is healthy again.");
97
120
  }
98
- // Auto-index when stale so the DB is always current before querying.
121
+ // Bootstrap-only: builds the index inline when it cannot serve this stash.
122
+ // Content freshness is the writers' job (indexWrittenAssets + full runs);
123
+ // reads serve the existing index as-is.
99
124
  await ensureIndex(stashDir);
100
125
  const dbPath = getDbPath();
101
126
  if (!fs.existsSync(dbPath)) {
@@ -117,6 +142,9 @@ export async function searchLocal(input) {
117
142
  mode: "keyword",
118
143
  };
119
144
  }
145
+ const staleHint = buildStaleIndexHint(db);
146
+ if (staleHint)
147
+ warnings.push(staleHint);
120
148
  const { hits, embedMs, rankMs } = await searchDatabase(db, query, searchType, limit, stashDir, allSourceDirs, config, sources, rendererRegistry, filters, includeProposed, beliefFilter, restrictToSources, includeExcludedTypes, disableProjectContext, disableScopedUtility);
121
149
  return {
122
150
  hits,
@@ -17,6 +17,14 @@ const TYPE_BOOST = {
17
17
  const MAX_BOOST_SUM = 3.0;
18
18
  const UTILITY_WEIGHT = 0.5;
19
19
  const UTILITY_MAX_BOOST = 1.5;
20
+ /**
21
+ * R2 (docs/design/improve-self-learning-analysis.md) — weight of the improve
22
+ * loop's `asset_salience.rank_score` in user-facing ranking. Bounded well
23
+ * below the utility boost so the composed signal refines, never dominates,
24
+ * lexical/semantic relevance. rank_score ∈ [0,1] → boost ∈ [1, 1.2].
25
+ */
26
+ const SALIENCE_WEIGHT = 0.2;
27
+ const SALIENCE_MAX_BOOST = 1.2;
20
28
  /**
21
29
  * Phase 2A / Rec 5: default recency half-life (days) used when no
22
30
  * `utilityDecayConfig` is supplied to the ranking pipeline. Matches the
@@ -334,7 +342,31 @@ export const defaultRankingContributors = [
334
342
  pinnedFactRankingContributor,
335
343
  projectContextRankingContributor,
336
344
  ];
337
- export const defaultUtilityRankingContributors = [utilityRankingContributor];
345
+ /**
346
+ * R2 — compose the improve loop's salience core into user-facing ranking.
347
+ *
348
+ * `asset_salience.rank_score` (encoding + outcome + retrieval projection,
349
+ * maintained every improve run) previously drove only improve's INTERNAL
350
+ * maintenance selection — the "better assets surface more" loop ran solely
351
+ * through the utility EMA. This bounded multiplicative boost closes the outer
352
+ * loop: usage/outcome-reinforced assets rank higher in `search`/`curate`.
353
+ */
354
+ const salienceRankingContributor = {
355
+ name: "salience-ranking",
356
+ appliesTo(item, ctx) {
357
+ const rank = ctx.salienceRankScores?.get(item.id);
358
+ return rank !== undefined && rank > 0;
359
+ },
360
+ apply(item, ctx) {
361
+ const rank = ctx.salienceRankScores?.get(item.id) ?? 0;
362
+ const rawBoost = 1 + Math.min(1, Math.max(0, rank)) * SALIENCE_WEIGHT;
363
+ item.score *= Math.min(rawBoost, SALIENCE_MAX_BOOST);
364
+ },
365
+ };
366
+ export const defaultUtilityRankingContributors = [
367
+ utilityRankingContributor,
368
+ salienceRankingContributor,
369
+ ];
338
370
  export function applyScoreContributors(item, ctx, contributors = defaultRankingContributors) {
339
371
  let boostSum = 0;
340
372
  for (const contributor of contributors) {