akm-cli 0.9.0-beta.52 → 0.9.0-beta.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/hints/cli-hints-full.md +6 -5
- package/dist/cli.js +0 -7
- package/dist/commands/env/env-cli.js +3 -2
- package/dist/commands/env/env.js +14 -67
- package/dist/commands/health/checks.js +28 -15
- package/dist/commands/health.js +68 -1
- package/dist/commands/improve/collapse-detector.js +419 -0
- package/dist/commands/improve/consolidate.js +72 -54
- package/dist/commands/improve/distill.js +79 -13
- package/dist/commands/improve/extract.js +13 -6
- package/dist/commands/improve/homeostatic.js +109 -79
- package/dist/commands/improve/improve-cli.js +67 -1
- package/dist/commands/improve/improve.js +10 -0
- package/dist/commands/improve/loop-stages.js +39 -1
- package/dist/commands/improve/outcome-loop.js +15 -3
- package/dist/commands/improve/preparation.js +17 -8
- package/dist/commands/improve/salience.js +49 -32
- package/dist/commands/read/curate.js +5 -9
- package/dist/commands/read/knowledge.js +4 -0
- package/dist/commands/read/search.js +5 -2
- package/dist/commands/read/show.js +3 -3
- package/dist/core/asset/asset-spec.js +3 -2
- package/dist/core/config/config-schema.js +39 -17
- package/dist/core/eval/rank-metrics.js +113 -0
- package/dist/core/state/migrations.js +56 -0
- package/dist/core/state-db.js +146 -19
- package/dist/indexer/ensure-index.js +33 -90
- package/dist/indexer/index-writer-lock.js +0 -11
- package/dist/indexer/index-written-assets.js +105 -0
- package/dist/indexer/passes/metadata.js +20 -0
- package/dist/indexer/search/db-search.js +29 -1
- package/dist/indexer/search/ranking-contributors.js +33 -1
- package/dist/indexer/search/ranking.js +66 -0
- package/dist/indexer/search/search-fields.js +6 -0
- package/dist/llm/feature-gate.js +6 -2
- package/dist/output/renderers.js +8 -13
- package/dist/output/shapes/helpers.js +0 -3
- package/dist/output/shapes/passthrough.js +1 -0
- package/dist/scripts/migrate-storage.js +152 -33
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +41 -18
- package/dist/storage/repositories/index-db.js +10 -1
- package/package.json +2 -4
package/dist/core/state-db.js
CHANGED
|
@@ -472,19 +472,20 @@ export function insertProposalIfAbsent(db, proposal, stashDir) {
|
|
|
472
472
|
/**
|
|
473
473
|
* Errors `BEGIN IMMEDIATE` can throw under concurrent-writer contention that are
|
|
474
474
|
* transient (the statement did NOT start a usable transaction) and safe to
|
|
475
|
-
* retry
|
|
475
|
+
* retry:
|
|
476
476
|
* - "database is locked" / SQLITE_BUSY — another writer holds the lock.
|
|
477
|
-
* - "cannot start a transaction within a transaction" — bun:sqlite can leave
|
|
478
|
-
* the connection reporting an open transaction after a contended busy-wait
|
|
479
|
-
* on BEGIN IMMEDIATE (observed only under heavy parallel load, e.g. the
|
|
480
|
-
* proposal-queue worker race). A ROLLBACK clears that phantom state.
|
|
481
477
|
* These are start-of-transaction failures only; an error thrown by `fn` is a
|
|
482
478
|
* real failure and is NEVER retried.
|
|
479
|
+
*
|
|
480
|
+
* "cannot start a transaction within a transaction" is deliberately NOT
|
|
481
|
+
* retryable: it means a transaction is already open on this connection (a
|
|
482
|
+
* re-entrant call — handled by the entry guard in withImmediateTransaction),
|
|
483
|
+
* and "retrying" it with a ROLLBACK would destroy the caller's transaction
|
|
484
|
+
* (issue #686).
|
|
483
485
|
*/
|
|
484
486
|
function isRetryableBeginError(err) {
|
|
485
487
|
const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
|
|
486
|
-
return (msg.includes("
|
|
487
|
-
msg.includes("database is locked") ||
|
|
488
|
+
return (msg.includes("database is locked") ||
|
|
488
489
|
msg.includes("database table is locked") ||
|
|
489
490
|
// Phantom BEGIN (see below) — synthesized when BEGIN IMMEDIATE returns
|
|
490
491
|
// without opening a transaction. Safe to retry: fn() has not run.
|
|
@@ -498,6 +499,16 @@ function sleepSyncMs(ms) {
|
|
|
498
499
|
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
|
|
499
500
|
}
|
|
500
501
|
export function withImmediateTransaction(db, fn) {
|
|
502
|
+
// Re-entrancy guard (issue #686): if a transaction is already open on this
|
|
503
|
+
// connection (e.g. a nested withImmediateTransaction call inside an outer
|
|
504
|
+
// frame's fn), join it — run fn directly with no BEGIN/COMMIT/ROLLBACK of
|
|
505
|
+
// our own. Without this, the nested BEGIN throws "cannot start a transaction
|
|
506
|
+
// within a transaction", which the old retry path answered with an
|
|
507
|
+
// unconditional ROLLBACK — destroying the OUTER transaction and leaving its
|
|
508
|
+
// COMMIT to fail with "cannot commit - no transaction is active".
|
|
509
|
+
if (db.inTransaction) {
|
|
510
|
+
return fn();
|
|
511
|
+
}
|
|
501
512
|
let lastBeginErr;
|
|
502
513
|
for (let attempt = 1; attempt <= WITH_IMMEDIATE_TX_MAX_ATTEMPTS; attempt++) {
|
|
503
514
|
try {
|
|
@@ -515,13 +526,15 @@ export function withImmediateTransaction(db, fn) {
|
|
|
515
526
|
catch (err) {
|
|
516
527
|
lastBeginErr = err;
|
|
517
528
|
if (isRetryableBeginError(err) && attempt < WITH_IMMEDIATE_TX_MAX_ATTEMPTS) {
|
|
518
|
-
//
|
|
519
|
-
//
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
529
|
+
// Only roll back a transaction we can see — never blind-ROLLBACK, since
|
|
530
|
+
// that could destroy a transaction this frame does not own.
|
|
531
|
+
if (db.inTransaction) {
|
|
532
|
+
try {
|
|
533
|
+
db.exec("ROLLBACK");
|
|
534
|
+
}
|
|
535
|
+
catch {
|
|
536
|
+
// Transaction already gone — fine.
|
|
537
|
+
}
|
|
525
538
|
}
|
|
526
539
|
sleepSyncMs(2 ** (attempt - 1));
|
|
527
540
|
continue;
|
|
@@ -530,15 +543,25 @@ export function withImmediateTransaction(db, fn) {
|
|
|
530
543
|
}
|
|
531
544
|
try {
|
|
532
545
|
const result = fn();
|
|
546
|
+
if (!db.inTransaction) {
|
|
547
|
+
// The transaction we opened vanished while fn() ran (e.g. an
|
|
548
|
+
// auto-rollback or a stray ROLLBACK inside fn). fn's writes may have
|
|
549
|
+
// escaped serialization, so retrying is unsafe — fail loudly instead of
|
|
550
|
+
// letting COMMIT throw the opaque "cannot commit - no transaction is
|
|
551
|
+
// active" SQLiteError.
|
|
552
|
+
throw new Error("withImmediateTransaction invariant violated: transaction opened by BEGIN IMMEDIATE was no longer active after the transaction body ran; refusing to COMMIT (writes may have escaped serialization)");
|
|
553
|
+
}
|
|
533
554
|
db.exec("COMMIT");
|
|
534
555
|
return result;
|
|
535
556
|
}
|
|
536
557
|
catch (err) {
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
558
|
+
if (db.inTransaction) {
|
|
559
|
+
try {
|
|
560
|
+
db.exec("ROLLBACK");
|
|
561
|
+
}
|
|
562
|
+
catch {
|
|
563
|
+
// Ignore rollback failures so the original error is preserved.
|
|
564
|
+
}
|
|
542
565
|
}
|
|
543
566
|
throw err; // a real error inside the transaction body — never retried.
|
|
544
567
|
}
|
|
@@ -1252,3 +1275,107 @@ export function upsertBodyEmbeddings(db, entries) {
|
|
|
1252
1275
|
}
|
|
1253
1276
|
})();
|
|
1254
1277
|
}
|
|
1278
|
+
/** Insert a freshly minted canary set (all rows active, one shared set id). */
|
|
1279
|
+
export function insertCanaries(db, canarySetId, canaries, now) {
|
|
1280
|
+
if (canaries.length === 0)
|
|
1281
|
+
return;
|
|
1282
|
+
const ts = now ?? new Date().toISOString();
|
|
1283
|
+
const stmt = db.prepare(`
|
|
1284
|
+
INSERT INTO canary_queries (canary_set_id, anchor_ref, query, source, active, created_at)
|
|
1285
|
+
VALUES (?, ?, ?, ?, 1, ?)
|
|
1286
|
+
`);
|
|
1287
|
+
db.transaction(() => {
|
|
1288
|
+
for (const c of canaries) {
|
|
1289
|
+
stmt.run(canarySetId, c.anchorRef, c.query, c.source ?? "auto", ts);
|
|
1290
|
+
}
|
|
1291
|
+
})();
|
|
1292
|
+
}
|
|
1293
|
+
/** Load the active canary set (empty array = never minted). */
|
|
1294
|
+
export function getActiveCanaries(db) {
|
|
1295
|
+
// Scope to the NEWEST active set: if an interrupted refresh (or a bug) ever
|
|
1296
|
+
// leaves two sets active, mixing their rows would silently corrupt the
|
|
1297
|
+
// recall/entropy trend baselines. The newest set wins; stale-active rows are
|
|
1298
|
+
// simply never returned.
|
|
1299
|
+
return db
|
|
1300
|
+
.prepare(`SELECT * FROM canary_queries
|
|
1301
|
+
WHERE active = 1 AND canary_set_id = (
|
|
1302
|
+
SELECT canary_set_id FROM canary_queries WHERE active = 1
|
|
1303
|
+
ORDER BY created_at DESC, id DESC LIMIT 1
|
|
1304
|
+
)
|
|
1305
|
+
ORDER BY id`)
|
|
1306
|
+
.all();
|
|
1307
|
+
}
|
|
1308
|
+
/** Load one canary set's rows by its exact set id (any active state), insertion order. */
|
|
1309
|
+
export function getCanariesBySetId(db, canarySetId) {
|
|
1310
|
+
return db
|
|
1311
|
+
.prepare(`SELECT * FROM canary_queries WHERE canary_set_id = ? ORDER BY id`)
|
|
1312
|
+
.all(canarySetId);
|
|
1313
|
+
}
|
|
1314
|
+
/** List every distinct canary_set_id that still has active rows. */
|
|
1315
|
+
export function listActiveCanarySetIds(db) {
|
|
1316
|
+
const rows = db.prepare(`SELECT DISTINCT canary_set_id FROM canary_queries WHERE active = 1`).all();
|
|
1317
|
+
return rows.map((r) => r.canary_set_id);
|
|
1318
|
+
}
|
|
1319
|
+
/**
|
|
1320
|
+
* Deactivate every canary row in a set. Rows are RETAINED (active = 0) so
|
|
1321
|
+
* historical improve_cycle_metrics rows keyed on the old canary_set_id stay
|
|
1322
|
+
* interpretable; only `akm improve canary --refresh` calls this.
|
|
1323
|
+
*/
|
|
1324
|
+
export function deactivateCanarySet(db, canarySetId) {
|
|
1325
|
+
const result = db
|
|
1326
|
+
.prepare(`UPDATE canary_queries SET active = 0 WHERE canary_set_id = ? AND active = 1`)
|
|
1327
|
+
.run(canarySetId);
|
|
1328
|
+
const changes = result.changes ?? 0;
|
|
1329
|
+
return typeof changes === "bigint" ? Number(changes) : changes;
|
|
1330
|
+
}
|
|
1331
|
+
/** Persist one qualifying cycle's store-health snapshot. */
|
|
1332
|
+
export function insertCycleMetrics(db, row) {
|
|
1333
|
+
db.prepare(`
|
|
1334
|
+
INSERT INTO improve_cycle_metrics
|
|
1335
|
+
(run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
|
|
1336
|
+
canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
|
|
1337
|
+
mean_bigram_diversity, over_generation_count, accepted_actions,
|
|
1338
|
+
merge_floor_violations, alerts_json)
|
|
1339
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1340
|
+
`).run(row.run_id, row.ts, row.pass, row.canary_set_id, row.mean_recall, row.mean_ndcg, row.mean_mrr, row.canary_ranks_json, row.store_total, row.store_by_type_json, row.distinct_content_ratio, row.mean_bigram_diversity, row.over_generation_count, row.accepted_actions, row.merge_floor_violations, row.alerts_json);
|
|
1341
|
+
}
|
|
1342
|
+
/**
|
|
1343
|
+
* Load the most recent cycle rows for one canary set, OLDEST-first (the alert
|
|
1344
|
+
* evaluator's window order). Scoped by canary_set_id so trends never compare
|
|
1345
|
+
* across canary re-mints.
|
|
1346
|
+
*/
|
|
1347
|
+
export function queryRecentCycleMetrics(db, canarySetId, limit) {
|
|
1348
|
+
const rows = db
|
|
1349
|
+
.prepare(`SELECT run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
|
|
1350
|
+
canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
|
|
1351
|
+
mean_bigram_diversity, over_generation_count, accepted_actions,
|
|
1352
|
+
merge_floor_violations, alerts_json
|
|
1353
|
+
FROM improve_cycle_metrics WHERE canary_set_id = ?
|
|
1354
|
+
ORDER BY ts DESC, id DESC LIMIT ?`)
|
|
1355
|
+
.all(canarySetId, Math.max(0, limit));
|
|
1356
|
+
return rows.reverse();
|
|
1357
|
+
}
|
|
1358
|
+
/** Load the single most recent cycle row across all canary sets (health surface). */
|
|
1359
|
+
export function getLatestCycleMetrics(db) {
|
|
1360
|
+
const row = db
|
|
1361
|
+
.prepare(`SELECT run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
|
|
1362
|
+
canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
|
|
1363
|
+
mean_bigram_diversity, over_generation_count, accepted_actions,
|
|
1364
|
+
merge_floor_violations, alerts_json
|
|
1365
|
+
FROM improve_cycle_metrics ORDER BY ts DESC, id DESC LIMIT 1`)
|
|
1366
|
+
.get();
|
|
1367
|
+
return row == null ? undefined : row;
|
|
1368
|
+
}
|
|
1369
|
+
/**
|
|
1370
|
+
* Delete cycle rows older than `retentionDays` (default 365 — owner-approved;
|
|
1371
|
+
* a slow collapse needs a longer trend window than the 90-day events log).
|
|
1372
|
+
* Returns the purged row count. canary_queries rows are never purged.
|
|
1373
|
+
*/
|
|
1374
|
+
export function purgeOldCycleMetrics(db, retentionDays = 365) {
|
|
1375
|
+
if (!Number.isFinite(retentionDays) || retentionDays <= 0)
|
|
1376
|
+
return 0;
|
|
1377
|
+
const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
|
|
1378
|
+
const result = db.prepare("DELETE FROM improve_cycle_metrics WHERE ts < ?").run(cutoff);
|
|
1379
|
+
const changes = result.changes ?? 0;
|
|
1380
|
+
return typeof changes === "bigint" ? Number(changes) : changes;
|
|
1381
|
+
}
|
|
@@ -2,23 +2,29 @@
|
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
4
|
/**
|
|
5
|
-
* Auto-index: silently
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* `akm index
|
|
5
|
+
* Auto-index bootstrap: silently build the local index inline when it cannot
|
|
6
|
+
* serve the caller's stash at all (missing DB, no `entries` table, zero rows,
|
|
7
|
+
* or built for a different stash), so `search`, `show`, and `feedback` work
|
|
8
|
+
* on first use without a manual `akm index`.
|
|
9
9
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
10
|
+
* Content FRESHNESS is intentionally not this module's job on the read path.
|
|
11
|
+
* Writers maintain the index (`indexWrittenAssets` for `remember`/extract
|
|
12
|
+
* session assets; the mutation commands run `akmIndex()` themselves), and the
|
|
13
|
+
* improve cron / explicit `akm index` do full refreshes. Reads serve whatever
|
|
14
|
+
* populated index exists. The previous design — a staleness walk plus a
|
|
15
|
+
* detached background reindex per read — made every read on an actively
|
|
16
|
+
* written stash spawn a writer that the read's own telemetry then queued
|
|
17
|
+
* behind (see docs/design/read-path-reindex-contention-findings.md).
|
|
18
|
+
*
|
|
19
|
+
* `mode: "blocking"` (improve) still checks staleness and rebuilds inline,
|
|
20
|
+
* because its planning logic needs a current `entries` table in-process.
|
|
13
21
|
*/
|
|
14
|
-
import { spawn } from "node:child_process";
|
|
15
22
|
import fs from "node:fs";
|
|
16
23
|
import path from "node:path";
|
|
17
24
|
import { ASSET_SPECS, TYPE_DIRS } from "../core/asset/asset-spec.js";
|
|
18
|
-
import {
|
|
25
|
+
import { getDbPath } from "../core/paths.js";
|
|
19
26
|
import { warn } from "../core/warn.js";
|
|
20
27
|
import { closeDatabase, getEntryCount, getIndexedFilePaths, getMeta, openExistingDatabase } from "./db/db.js";
|
|
21
|
-
import { acquireIndexWriterLease, handoffIndexWriterLeaseToPid } from "./index-writer-lock.js";
|
|
22
28
|
function getIndexableFiles(root, spec) {
|
|
23
29
|
if (!fs.existsSync(root))
|
|
24
30
|
return [];
|
|
@@ -138,12 +144,9 @@ export function isIndexStale(stashDir) {
|
|
|
138
144
|
* i.e. the DB file exists, the `entries` table holds rows, and those rows were
|
|
139
145
|
* built for this stash (it is the stored primary stash or appears in the
|
|
140
146
|
* stored `stashDirs` set). When this is true the index is at worst
|
|
141
|
-
* content-stale, so
|
|
142
|
-
*
|
|
143
|
-
*
|
|
144
|
-
* table, zero rows, or built for a different stash), so a background reindex
|
|
145
|
-
* would leave the caller empty until the next read — those cases must rebuild
|
|
146
|
-
* inline.
|
|
147
|
+
* content-stale, so read paths serve it as-is. When it is false the existing
|
|
148
|
+
* index has nothing relevant to return (no DB, no `entries` table, zero rows,
|
|
149
|
+
* or built for a different stash), so those cases must rebuild inline.
|
|
147
150
|
*/
|
|
148
151
|
function indexCanServeStash(stashDir) {
|
|
149
152
|
const dbPath = getDbPath();
|
|
@@ -174,43 +177,6 @@ function indexCanServeStash(stashDir) {
|
|
|
174
177
|
closeDatabase(db);
|
|
175
178
|
}
|
|
176
179
|
}
|
|
177
|
-
/**
|
|
178
|
-
* Spawn a background `akm index` process. Non-blocking — returns immediately.
|
|
179
|
-
* Background callers share the same global index-writer lease as foreground
|
|
180
|
-
* writers, so stale-read-triggered auto-index attempts coalesce safely.
|
|
181
|
-
*/
|
|
182
|
-
async function spawnBackgroundReindex(_stashDir) {
|
|
183
|
-
const dataDir = getDataDir();
|
|
184
|
-
const logFile = path.join(dataDir, "logs", "index-background.log");
|
|
185
|
-
fs.mkdirSync(path.dirname(logFile), { recursive: true });
|
|
186
|
-
const lease = await acquireIndexWriterLease({ mode: "try", purpose: "background-reindex-spawn" });
|
|
187
|
-
if (!lease)
|
|
188
|
-
return;
|
|
189
|
-
const akmBin = process.argv[0];
|
|
190
|
-
const akmScript = process.argv[1];
|
|
191
|
-
try {
|
|
192
|
-
const child = spawn(akmBin, [akmScript, "index", "--background"], {
|
|
193
|
-
detached: true,
|
|
194
|
-
stdio: ["ignore", fs.openSync(logFile, "a"), fs.openSync(logFile, "a")],
|
|
195
|
-
env: { ...process.env },
|
|
196
|
-
});
|
|
197
|
-
if (!child.pid) {
|
|
198
|
-
lease.release();
|
|
199
|
-
return;
|
|
200
|
-
}
|
|
201
|
-
handoffIndexWriterLeaseToPid(lease, child.pid, "background-reindex");
|
|
202
|
-
try {
|
|
203
|
-
child.unref();
|
|
204
|
-
}
|
|
205
|
-
catch {
|
|
206
|
-
// ignore
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
catch (error) {
|
|
210
|
-
lease.release();
|
|
211
|
-
throw error;
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
180
|
async function runInlineReindex(stashDir) {
|
|
215
181
|
try {
|
|
216
182
|
const { akmIndex } = await import("./indexer.js");
|
|
@@ -223,49 +189,26 @@ async function runInlineReindex(stashDir) {
|
|
|
223
189
|
}
|
|
224
190
|
}
|
|
225
191
|
/**
|
|
226
|
-
* Ensure the local index exists and
|
|
192
|
+
* Ensure the local index exists and can serve the caller.
|
|
227
193
|
*
|
|
228
|
-
* Default mode is `background
|
|
229
|
-
*
|
|
230
|
-
*
|
|
231
|
-
*
|
|
232
|
-
* rows) the rebuild runs inline regardless of mode, since there is nothing to
|
|
233
|
-
* proceed against.
|
|
194
|
+
* Default mode is `background` — the read-path contract (`search`, `show`,
|
|
195
|
+
* `feedback`): a populated index built for this stash is served as-is (its
|
|
196
|
+
* freshness is the writers' job, see module doc); an unusable index rebuilds
|
|
197
|
+
* inline, since there is nothing to proceed against.
|
|
234
198
|
*
|
|
235
|
-
* `mode: "blocking"`
|
|
236
|
-
* this for callers like `improve` whose
|
|
237
|
-
* `entries` table in the same process.
|
|
199
|
+
* `mode: "blocking"` additionally treats content-staleness as a rebuild
|
|
200
|
+
* trigger and waits for it. Use this for callers like `improve` whose
|
|
201
|
+
* planning logic depends on a current `entries` table in the same process.
|
|
238
202
|
*
|
|
239
203
|
* Returns `true` if an index run was attempted.
|
|
240
204
|
*/
|
|
241
205
|
export async function ensureIndex(stashDir, options = {}) {
|
|
242
|
-
if (
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
// serve this stash (absent DB, no `entries` table, zero rows, or built for a
|
|
246
|
-
// different stash): a background reindex returns immediately and would leave
|
|
247
|
-
// a first-time caller (search, curate, wiki, show, feedback) with empty
|
|
248
|
-
// results. Building inline is a one-off cost; a populated index for this
|
|
249
|
-
// stash that is merely content-stale still refreshes in the background.
|
|
250
|
-
if (options.mode === "blocking" || !indexCanServeStash(stashDir)) {
|
|
251
|
-
return runInlineReindex(stashDir);
|
|
252
|
-
}
|
|
253
|
-
// The background path re-invokes the akm CLI as a detached child via
|
|
254
|
-
// `process.argv[1]`. That is only the akm entrypoint when THIS process is the
|
|
255
|
-
// akm CLI itself — which the CLI startup block signals with AKM_CLI_ENTRY=1.
|
|
256
|
-
// In any other host (the in-process test runner, a library embedding akm),
|
|
257
|
-
// argv[1] points at the host (e.g. the test runner), so spawning it would
|
|
258
|
-
// launch the wrong program and orphan it. Build inline there instead — same
|
|
259
|
-
// resulting index, no detached process.
|
|
260
|
-
if (process.env.AKM_CLI_ENTRY !== "1") {
|
|
206
|
+
if (options.mode === "blocking") {
|
|
207
|
+
if (!isIndexStale(stashDir))
|
|
208
|
+
return false;
|
|
261
209
|
return runInlineReindex(stashDir);
|
|
262
210
|
}
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
}
|
|
267
|
-
catch (error) {
|
|
268
|
-
warn("Background reindex spawn failed, proceeding with existing index:", error instanceof Error ? error.message : String(error));
|
|
269
|
-
return true;
|
|
270
|
-
}
|
|
211
|
+
if (indexCanServeStash(stashDir))
|
|
212
|
+
return false;
|
|
213
|
+
return runInlineReindex(stashDir);
|
|
271
214
|
}
|
|
@@ -46,13 +46,6 @@ function retainHeldLock(lockPath) {
|
|
|
46
46
|
heldLocks.set(lockPath, { depth: 1, exitHandler });
|
|
47
47
|
return { lockPath, release: () => releaseHeldLock(lockPath) };
|
|
48
48
|
}
|
|
49
|
-
function detachHeldLock(lockPath) {
|
|
50
|
-
const held = heldLocks.get(lockPath);
|
|
51
|
-
if (!held)
|
|
52
|
-
return;
|
|
53
|
-
heldLocks.delete(lockPath);
|
|
54
|
-
process.off("exit", held.exitHandler);
|
|
55
|
-
}
|
|
56
49
|
export async function acquireIndexWriterLease(options) {
|
|
57
50
|
const mode = options.mode ?? "wait";
|
|
58
51
|
const lockPath = getIndexWriterLockPath();
|
|
@@ -90,10 +83,6 @@ export async function withIndexWriterLease(options, run) {
|
|
|
90
83
|
lease.release();
|
|
91
84
|
}
|
|
92
85
|
}
|
|
93
|
-
export function handoffIndexWriterLeaseToPid(lease, pid, purpose) {
|
|
94
|
-
fs.writeFileSync(lease.lockPath, buildPayload(purpose, pid), "utf8");
|
|
95
|
-
detachHeldLock(lease.lockPath);
|
|
96
|
-
}
|
|
97
86
|
export function probeIndexWriterLease() {
|
|
98
87
|
return probeLock(getIndexWriterLockPath(), { staleAfterMs: INDEX_WRITER_LOCK_STALE_AFTER_MS });
|
|
99
88
|
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* Write-path indexing: targeted single-file index updates for asset writers.
|
|
6
|
+
*
|
|
7
|
+
* The index is maintained eagerly by every first-class mutation command
|
|
8
|
+
* (`source add`, `wiki`, `workflow`, `setup` all run `akmIndex()` after
|
|
9
|
+
* writing). The memory write paths — `akm remember` / `writeMarkdownAsset`
|
|
10
|
+
* and extract's session assets — historically did not, which is why reads
|
|
11
|
+
* used to compensate with stale-triggered background reindexes (the
|
|
12
|
+
* lock-contention footgun removed alongside this module's introduction; see
|
|
13
|
+
* docs/design/read-path-reindex-contention-findings.md §7).
|
|
14
|
+
*
|
|
15
|
+
* This is NOT a general reindex. It upserts exactly the files the caller just
|
|
16
|
+
* wrote: frontmatter/metadata via the shared matcher pipeline, the `entries`
|
|
17
|
+
* row, and an incremental FTS refresh. Embeddings, index-time LLM passes,
|
|
18
|
+
* graph extraction, `builtAt`, and the per-dir walk cache are all deliberately
|
|
19
|
+
* untouched — the next full run heals them (the opportunistic-recovery
|
|
20
|
+
* strategy of docs/technical/index-consistency-adr.md).
|
|
21
|
+
*/
|
|
22
|
+
import fs from "node:fs";
|
|
23
|
+
import path from "node:path";
|
|
24
|
+
import { getDbPath } from "../core/paths.js";
|
|
25
|
+
import { warnVerbose } from "../core/warn.js";
|
|
26
|
+
import { closeDatabase, getEntryCount, openExistingDatabase, rebuildFts, upsertEntry } from "./db/db.js";
|
|
27
|
+
import { generateMetadataFlat } from "./passes/metadata.js";
|
|
28
|
+
import { buildSearchText } from "./search/search-fields.js";
|
|
29
|
+
/**
|
|
30
|
+
* Busy-timeout (ms) for write-path index upserts. A real write — unlike the
|
|
31
|
+
* 250ms telemetry inserts — but it must not hang `akm remember` for the full
|
|
32
|
+
* default 30s behind a running full reindex. When it times out, the upsert is
|
|
33
|
+
* skipped and the asset becomes searchable after that reindex instead.
|
|
34
|
+
*/
|
|
35
|
+
export const WRITE_PATH_INDEX_BUSY_TIMEOUT_MS = 5_000;
|
|
36
|
+
/**
|
|
37
|
+
* Index the given just-written asset files into the existing local index.
|
|
38
|
+
*
|
|
39
|
+
* FAIL-OPEN at every step: any error (index.db absent, empty, locked past the
|
|
40
|
+
* busy timeout, unparseable file) is reduced to a verbose-only warning and the
|
|
41
|
+
* write command succeeds untouched. The degraded outcome is exactly the
|
|
42
|
+
* pre-write-path-indexing behavior: the asset appears after the next full
|
|
43
|
+
* `akm index` / improve-cron run.
|
|
44
|
+
*
|
|
45
|
+
* An absent or empty index is skipped on purpose — bootstrap belongs to the
|
|
46
|
+
* first read (`ensureIndex`) or an explicit `akm index`, which also cover
|
|
47
|
+
* embeddings and the other passes this fast path skips.
|
|
48
|
+
*/
|
|
49
|
+
export async function indexWrittenAssets(stashDir, filePaths) {
|
|
50
|
+
try {
|
|
51
|
+
const dbPath = getDbPath();
|
|
52
|
+
if (!fs.existsSync(dbPath))
|
|
53
|
+
return;
|
|
54
|
+
// The full walk never descends into dot-directories (they hold state like
|
|
55
|
+
// `.meta/`, `.stash.json`), and `shouldIndexStashFile` relies on the walker
|
|
56
|
+
// for that — mirror it here so this fast path indexes exactly what a full
|
|
57
|
+
// run would.
|
|
58
|
+
const files = filePaths.filter((f) => {
|
|
59
|
+
if (!fs.existsSync(f))
|
|
60
|
+
return false;
|
|
61
|
+
const rel = path.relative(stashDir, f);
|
|
62
|
+
return !rel.split(/[\\/]+/).some((segment) => segment.startsWith("."));
|
|
63
|
+
});
|
|
64
|
+
if (files.length === 0)
|
|
65
|
+
return;
|
|
66
|
+
// Generate metadata BEFORE opening the DB so the write window stays
|
|
67
|
+
// short. One call per file keeps the entry↔path pairing exact.
|
|
68
|
+
const pairs = [];
|
|
69
|
+
for (const file of files) {
|
|
70
|
+
const generated = await generateMetadataFlat(stashDir, [file]);
|
|
71
|
+
const entry = generated.entries[0];
|
|
72
|
+
// Workflows carry a side-table document upsert this fast path doesn't
|
|
73
|
+
// do; no current caller writes them, but guard so one never lands
|
|
74
|
+
// half-indexed.
|
|
75
|
+
if (entry && entry.type !== "workflow")
|
|
76
|
+
pairs.push({ file, entry });
|
|
77
|
+
}
|
|
78
|
+
if (pairs.length === 0)
|
|
79
|
+
return;
|
|
80
|
+
const db = openExistingDatabase(dbPath);
|
|
81
|
+
try {
|
|
82
|
+
db.exec(`PRAGMA busy_timeout = ${WRITE_PATH_INDEX_BUSY_TIMEOUT_MS}`);
|
|
83
|
+
if (getEntryCount(db) === 0)
|
|
84
|
+
return;
|
|
85
|
+
for (const { file, entry } of pairs) {
|
|
86
|
+
const entryKey = `${stashDir}:${entry.type}:${entry.name}`;
|
|
87
|
+
let entryWithSize = entry;
|
|
88
|
+
try {
|
|
89
|
+
entryWithSize = { ...entry, fileSize: fs.statSync(file).size };
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
// stat raced a delete — index without the size, like the full walk does.
|
|
93
|
+
}
|
|
94
|
+
upsertEntry(db, entryKey, path.dirname(file), file, stashDir, entryWithSize, buildSearchText(entry));
|
|
95
|
+
}
|
|
96
|
+
rebuildFts(db, { incremental: true });
|
|
97
|
+
}
|
|
98
|
+
finally {
|
|
99
|
+
closeDatabase(db);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
catch (error) {
|
|
103
|
+
warnVerbose("Write-path index update skipped (asset appears after the next full index):", error instanceof Error ? error.message : String(error));
|
|
104
|
+
}
|
|
105
|
+
}
|
|
@@ -191,6 +191,15 @@ export function validateStashEntry(entry) {
|
|
|
191
191
|
const contradictedBy = normalizeNonEmptyStringList(e.contradictedBy);
|
|
192
192
|
if (contradictedBy)
|
|
193
193
|
result.contradictedBy = contradictedBy;
|
|
194
|
+
// R5 — consolidation provenance fields must survive the whitelist too, or
|
|
195
|
+
// stash.json-overridden merge products lose merge-following + generation
|
|
196
|
+
// counting in the collapse detector.
|
|
197
|
+
if (typeof e.generation === "number" && Number.isFinite(e.generation) && e.generation > 0) {
|
|
198
|
+
result.generation = Math.floor(e.generation);
|
|
199
|
+
}
|
|
200
|
+
const sourceRefs = normalizeNonEmptyStringList(e.sourceRefs);
|
|
201
|
+
if (sourceRefs)
|
|
202
|
+
result.sourceRefs = sourceRefs;
|
|
194
203
|
const currentBeliefRefs = normalizeNonEmptyStringList(e.currentBeliefRefs);
|
|
195
204
|
if (currentBeliefRefs)
|
|
196
205
|
result.currentBeliefRefs = currentBeliefRefs;
|
|
@@ -342,6 +351,17 @@ export function applyCuratedFrontmatter(entry, fmData) {
|
|
|
342
351
|
const contradictedBy = normalizeStringListOrUndefined(fmData.contradictedBy);
|
|
343
352
|
if (contradictedBy)
|
|
344
353
|
entry.contradictedBy = contradictedBy;
|
|
354
|
+
// R5 — consolidation provenance. `generation` (merge depth counter) and
|
|
355
|
+
// `source_refs` (merge/distill provenance pointers) are written by the
|
|
356
|
+
// improve pipeline; captured into the index so the collapse detector can
|
|
357
|
+
// count over-generation assets and follow merges without filesystem reads.
|
|
358
|
+
const generation = fmData.generation;
|
|
359
|
+
if (typeof generation === "number" && Number.isFinite(generation) && generation > 0) {
|
|
360
|
+
entry.generation = Math.floor(generation);
|
|
361
|
+
}
|
|
362
|
+
const sourceRefs = normalizeStringListOrUndefined(fmData.source_refs);
|
|
363
|
+
if (sourceRefs)
|
|
364
|
+
entry.sourceRefs = sourceRefs;
|
|
345
365
|
const currentBeliefRefs = normalizeStringListOrUndefined(fmData.currentBeliefRefs);
|
|
346
366
|
if (currentBeliefRefs)
|
|
347
367
|
entry.currentBeliefRefs = currentBeliefRefs;
|
|
@@ -29,6 +29,29 @@ import { applyRankingRules, combineSearchScores, normalizeFtsScores } from "./ra
|
|
|
29
29
|
import { enrichSearchHit } from "./search-hit-enrichers.js";
|
|
30
30
|
import { buildEditHint, findSourceForPath, isEditable } from "./search-source.js";
|
|
31
31
|
import { deriveSemanticProviderFingerprint, getEffectiveSemanticStatus, isSemanticRuntimeReady, readSemanticStatus, } from "./semantic-status.js";
|
|
32
|
+
/**
|
|
33
|
+
* Age past which search surfaces a "run akm index" hint. Reads serve the
|
|
34
|
+
* existing index as-is (freshness is the writers' job — `indexWrittenAssets`
|
|
35
|
+
* plus full runs), so on installs with no improve cron a hand-edited or
|
|
36
|
+
* git-pulled file stays invisible until someone reindexes. The hint makes that
|
|
37
|
+
* actionable without re-introducing read-triggered reindexing.
|
|
38
|
+
*/
|
|
39
|
+
const STALE_INDEX_HINT_MS = 7 * 24 * 60 * 60 * 1000;
|
|
40
|
+
function buildStaleIndexHint(db) {
|
|
41
|
+
try {
|
|
42
|
+
const builtAt = getMeta(db, "builtAt");
|
|
43
|
+
if (!builtAt)
|
|
44
|
+
return undefined;
|
|
45
|
+
const ageMs = Date.now() - new Date(builtAt).getTime();
|
|
46
|
+
if (!Number.isFinite(ageMs) || ageMs < STALE_INDEX_HINT_MS)
|
|
47
|
+
return undefined;
|
|
48
|
+
const days = Math.floor(ageMs / (24 * 60 * 60 * 1000));
|
|
49
|
+
return `Search index was last built ${days} day(s) ago. Files added or edited outside akm since then are not searchable — run 'akm index' to refresh.`;
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
return undefined;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
32
55
|
export function buildLocalAction(type, ref, registry = defaultRendererRegistry) {
|
|
33
56
|
return buildActionFromContributors({ type, ref }, defaultActionContributors(registry)) ?? `akm show ${ref}`;
|
|
34
57
|
}
|
|
@@ -95,7 +118,9 @@ export async function searchLocal(input) {
|
|
|
95
118
|
if (config.semanticSearchMode === "auto" && semanticStatus === "blocked") {
|
|
96
119
|
warnings.push("Semantic search is currently blocked. Using keyword search until the semantic backend is healthy again.");
|
|
97
120
|
}
|
|
98
|
-
//
|
|
121
|
+
// Bootstrap-only: builds the index inline when it cannot serve this stash.
|
|
122
|
+
// Content freshness is the writers' job (indexWrittenAssets + full runs);
|
|
123
|
+
// reads serve the existing index as-is.
|
|
99
124
|
await ensureIndex(stashDir);
|
|
100
125
|
const dbPath = getDbPath();
|
|
101
126
|
if (!fs.existsSync(dbPath)) {
|
|
@@ -117,6 +142,9 @@ export async function searchLocal(input) {
|
|
|
117
142
|
mode: "keyword",
|
|
118
143
|
};
|
|
119
144
|
}
|
|
145
|
+
const staleHint = buildStaleIndexHint(db);
|
|
146
|
+
if (staleHint)
|
|
147
|
+
warnings.push(staleHint);
|
|
120
148
|
const { hits, embedMs, rankMs } = await searchDatabase(db, query, searchType, limit, stashDir, allSourceDirs, config, sources, rendererRegistry, filters, includeProposed, beliefFilter, restrictToSources, includeExcludedTypes, disableProjectContext, disableScopedUtility);
|
|
121
149
|
return {
|
|
122
150
|
hits,
|
|
@@ -17,6 +17,14 @@ const TYPE_BOOST = {
|
|
|
17
17
|
const MAX_BOOST_SUM = 3.0;
|
|
18
18
|
const UTILITY_WEIGHT = 0.5;
|
|
19
19
|
const UTILITY_MAX_BOOST = 1.5;
|
|
20
|
+
/**
|
|
21
|
+
* R2 (docs/design/improve-self-learning-analysis.md) — weight of the improve
|
|
22
|
+
* loop's `asset_salience.rank_score` in user-facing ranking. Bounded well
|
|
23
|
+
* below the utility boost so the composed signal refines, never dominates,
|
|
24
|
+
* lexical/semantic relevance. rank_score ∈ [0,1] → boost ∈ [1, 1.2].
|
|
25
|
+
*/
|
|
26
|
+
const SALIENCE_WEIGHT = 0.2;
|
|
27
|
+
const SALIENCE_MAX_BOOST = 1.2;
|
|
20
28
|
/**
|
|
21
29
|
* Phase 2A / Rec 5: default recency half-life (days) used when no
|
|
22
30
|
* `utilityDecayConfig` is supplied to the ranking pipeline. Matches the
|
|
@@ -334,7 +342,31 @@ export const defaultRankingContributors = [
|
|
|
334
342
|
pinnedFactRankingContributor,
|
|
335
343
|
projectContextRankingContributor,
|
|
336
344
|
];
|
|
337
|
-
|
|
345
|
+
/**
|
|
346
|
+
* R2 — compose the improve loop's salience core into user-facing ranking.
|
|
347
|
+
*
|
|
348
|
+
* `asset_salience.rank_score` (encoding + outcome + retrieval projection,
|
|
349
|
+
* maintained every improve run) previously drove only improve's INTERNAL
|
|
350
|
+
* maintenance selection — the "better assets surface more" loop ran solely
|
|
351
|
+
* through the utility EMA. This bounded multiplicative boost closes the outer
|
|
352
|
+
* loop: usage/outcome-reinforced assets rank higher in `search`/`curate`.
|
|
353
|
+
*/
|
|
354
|
+
const salienceRankingContributor = {
|
|
355
|
+
name: "salience-ranking",
|
|
356
|
+
appliesTo(item, ctx) {
|
|
357
|
+
const rank = ctx.salienceRankScores?.get(item.id);
|
|
358
|
+
return rank !== undefined && rank > 0;
|
|
359
|
+
},
|
|
360
|
+
apply(item, ctx) {
|
|
361
|
+
const rank = ctx.salienceRankScores?.get(item.id) ?? 0;
|
|
362
|
+
const rawBoost = 1 + Math.min(1, Math.max(0, rank)) * SALIENCE_WEIGHT;
|
|
363
|
+
item.score *= Math.min(rawBoost, SALIENCE_MAX_BOOST);
|
|
364
|
+
},
|
|
365
|
+
};
|
|
366
|
+
export const defaultUtilityRankingContributors = [
|
|
367
|
+
utilityRankingContributor,
|
|
368
|
+
salienceRankingContributor,
|
|
369
|
+
];
|
|
338
370
|
export function applyScoreContributors(item, ctx, contributors = defaultRankingContributors) {
|
|
339
371
|
let boostSum = 0;
|
|
340
372
|
for (const contributor of contributors) {
|