akm-cli 0.9.0-beta.2 → 0.9.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +87 -0
  2. package/dist/assets/templates/html/default.html +78 -0
  3. package/dist/assets/templates/html/health.html +560 -0
  4. package/dist/assets/templates/html/vendor/echarts.min.js +45 -0
  5. package/dist/cli/shared.js +21 -5
  6. package/dist/cli.js +36 -5
  7. package/dist/commands/health/html-report.js +448 -0
  8. package/dist/commands/health.js +97 -6
  9. package/dist/commands/improve/extract.js +38 -2
  10. package/dist/commands/improve/improve-auto-accept.js +27 -1
  11. package/dist/commands/improve/improve.js +167 -53
  12. package/dist/commands/improve/reflect-noise.js +0 -0
  13. package/dist/commands/improve/reflect.js +25 -0
  14. package/dist/commands/proposal/drain.js +73 -6
  15. package/dist/commands/proposal/proposal-cli.js +22 -10
  16. package/dist/commands/proposal/proposal.js +12 -1
  17. package/dist/commands/proposal/validators/proposals.js +361 -338
  18. package/dist/commands/remember.js +6 -2
  19. package/dist/core/config/config-schema.js +5 -0
  20. package/dist/core/logs-db.js +304 -0
  21. package/dist/core/state-db.js +107 -14
  22. package/dist/indexer/db/db.js +2 -2
  23. package/dist/indexer/passes/memory-inference.js +61 -22
  24. package/dist/integrations/harnesses/claude/session-log.js +16 -4
  25. package/dist/llm/client.js +15 -0
  26. package/dist/llm/usage-persist.js +77 -0
  27. package/dist/llm/usage-telemetry.js +103 -0
  28. package/dist/output/context.js +3 -2
  29. package/dist/output/html-render.js +73 -0
  30. package/dist/output/shapes/helpers.js +17 -1
  31. package/dist/output/text/helpers.js +69 -1
  32. package/dist/scripts/migrate-storage.js +65 -14
  33. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +14 -2
  34. package/dist/tasks/runner.js +99 -16
  35. package/dist/workflows/db.js +4 -0
  36. package/package.json +1 -1
@@ -149,6 +149,10 @@ export async function runLlmEnrich(body) {
149
149
  return { tags: [] };
150
150
  }
151
151
  const { chatCompletion, parseEmbeddedJsonResponse: parseJsonResponse } = await import("../llm/client.js");
152
+ // #576: attribute this entry point's LLM call to the `remember` stage. The
153
+ // wrapper is ambient — if a usage sink is active it tags the record; if not,
154
+ // it is a no-op.
155
+ const { withLlmStage } = await import("../llm/usage-telemetry.js");
152
156
  const prompt = `You are a memory tagger for a developer knowledge base.
153
157
  Given the memory text below, return ONLY a JSON object with these fields:
154
158
  - "tags": array of 1-5 short lowercase keyword tags
@@ -164,10 +168,10 @@ Return ONLY the JSON object, no prose, no markdown fences.`;
164
168
  const result = await (async () => {
165
169
  try {
166
170
  return await Promise.race([
167
- chatCompletion(llmConfig, [
171
+ withLlmStage("remember", () => chatCompletion(llmConfig, [
168
172
  { role: "system", content: "Return only valid JSON. No prose." },
169
173
  { role: "user", content: prompt },
170
- ], { maxTokens: 256, temperature: 0.1 }),
174
+ ], { maxTokens: 256, temperature: 0.1 })),
171
175
  new Promise((_, reject) => {
172
176
  timeoutHandle = setTimeout(() => reject(new Error("LLM enrichment timed out")), LLM_ENRICH_TIMEOUT_MS);
173
177
  }),
@@ -138,6 +138,11 @@ export const ImproveProcessConfigSchema = z
138
138
  // Extract process config (only meaningful for extract process)
139
139
  defaultSince: z.string().min(1).optional(),
140
140
  maxTotalChars: positiveInt.optional(),
141
+ // Extract process: minimum raw session size (pre-filter inputCount) below
142
+ // which the extract LLM call is skipped (#595/#596). 0 disables the gate.
143
+ // Absent = default 10 (skip only truly empty sessions). Only meaningful
144
+ // on the `extract` process.
145
+ minContentChars: z.number().int().min(0).optional(),
141
146
  maxChunkSize: z.number().int().min(1).max(50).optional(),
142
147
  // Extract process: minimum number of new (unseen, in-window) candidate
143
148
  // sessions below which the extract pass skips entirely (emits an
@@ -0,0 +1,304 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * logs.db — Dedicated SQLite database for task/run log lines (#579).
6
+ *
7
+ * Replaces grep-the-flat-file consumption of `<cacheDir>/tasks/logs/<id>/<ts>.log`
8
+ * with structured, indexed rows: `{ts, task_id, run_id, stream, level, line}`.
9
+ * The strategic direction (stop scattering data across files/folders) means
10
+ * every NEW log consumer queries this database; the per-run text file written
11
+ * by the task runner is retained only as a transitional tail for humans —
12
+ * see docs/technical/logs-audit.md for the full producer audit.
13
+ *
14
+ * ## Why a separate database from state.db
15
+ *
16
+ * Log lines are high-volume, append-only, and freely purgeable; state.db rows
17
+ * (events, proposals, task_history) are durable records. Separating them keeps
18
+ * state.db small and lets log retention be aggressive without touching durable
19
+ * state. Cross-db queries (e.g. "failed task_history row → its log lines") use
20
+ * SQLite ATTACH — see {@link attachStateDatabase}.
21
+ *
22
+ * ## run_id
23
+ *
24
+ * state.db's `task_history` identifies a run by the unique pair
25
+ * `(task_id, started_at)` (see migration 002 in state-db.ts). logs.db encodes
26
+ * that pair as a single string — {@link buildTaskRunId} — so log rows can be
27
+ * joined back to their history row:
28
+ *
29
+ * l.run_id = th.task_id || '@' || th.started_at
30
+ *
31
+ * ## Schema evolution
32
+ *
33
+ * Same migration-safety contract as state.db: append-only `MIGRATIONS` applied
34
+ * through the shared runner in src/storage/engines/sqlite-migrations.ts.
35
+ *
36
+ * @module logs-db
37
+ */
38
+ import fs from "node:fs";
39
+ import path from "node:path";
40
+ import { openDatabase } from "../storage/database.js";
41
+ import { runMigrations as runSqliteMigrations } from "../storage/engines/sqlite-migrations.js";
42
+ import { getDataDir } from "./paths.js";
43
+ import { getStateDbPath } from "./state-db.js";
44
+ // ── Path helper ──────────────────────────────────────────────────────────────
45
+ /**
46
+ * Default path: `<dataDir>/logs.db` — alongside state.db so cooperating
47
+ * processes sharing a data root automatically share the same logs database
48
+ * (same `AKM_DATA_DIR` / XDG env-isolation as {@link getStateDbPath}).
49
+ */
50
+ export function getLogsDbPath() {
51
+ return path.join(getDataDir(), "logs.db");
52
+ }
53
+ // ── Database open ────────────────────────────────────────────────────────────
54
+ /**
55
+ * Open (and initialise / migrate) the logs database.
56
+ *
57
+ * @param dbPath - Override the database file path (tests pass a tmpdir path).
58
+ *
59
+ * PRAGMA rationale:
60
+ *
61
+ * journal_mode = WAL
62
+ * Readers never block writers and vice-versa; crashes are safe (the WAL is
63
+ * replayed on next open). Required because the task runner writes log rows
64
+ * while `akm health` may be reading them.
65
+ *
66
+ * busy_timeout = 30000
67
+ * Log writes happen at the end of scheduled task runs, which can pile up
68
+ * (cron fan-out). 30 s of retry absorbs a slow concurrent writer instead of
69
+ * surfacing SQLITE_BUSY and dropping log lines.
70
+ */
71
+ export function openLogsDatabase(dbPath) {
72
+ const resolvedPath = dbPath ?? getLogsDbPath();
73
+ const dir = path.dirname(resolvedPath);
74
+ if (!fs.existsSync(dir)) {
75
+ fs.mkdirSync(dir, { recursive: true });
76
+ }
77
+ const db = openDatabase(resolvedPath);
78
+ // PRAGMAs must run before any DDL or DML.
79
+ db.exec("PRAGMA journal_mode = WAL");
80
+ db.exec("PRAGMA busy_timeout = 30000");
81
+ runMigrations(db);
82
+ return db;
83
+ }
84
+ // ── Migrations ───────────────────────────────────────────────────────────────
85
+ /**
86
+ * All migrations in application order. APPEND only — never insert in the
87
+ * middle or reorder. Same contract as state.db's MIGRATIONS array.
88
+ */
89
+ const MIGRATIONS = [
90
+ // ── Migration 001 — task_logs ───────────────────────────────────────────────
91
+ //
92
+ // One row per log line emitted by a task run.
93
+ //
94
+ // Indexed (query) columns:
95
+ // ts TEXT — ISO-8601 UTC; range queries ("logs in the last hour").
96
+ // task_id TEXT — task identifier; per-task log views.
97
+ // run_id TEXT — buildTaskRunId(task_id, started_at); per-run log views
98
+ // and the join key back to state.db task_history.
99
+ //
100
+ // Non-indexed columns:
101
+ // stream TEXT — 'stdout' | 'stderr'; which pipe the line came from.
102
+ // level TEXT — 'info' | 'warn' | 'error'; runner-assigned severity
103
+ // ('info' for captured stdout, 'error' for stderr and
104
+ // failure diagnostics).
105
+ // line TEXT — the log line itself (no trailing newline).
106
+ //
107
+ // ADD COLUMN extension points (future migrations):
108
+ // ALTER TABLE task_logs ADD COLUMN seq INTEGER DEFAULT NULL;
109
+ // ALTER TABLE task_logs ADD COLUMN source TEXT DEFAULT NULL;
110
+ //
111
+ // TTL: rows where ts < NOW() - retention can be deleted by purgeOldTaskLogs().
112
+ // No automatic deletion occurs here.
113
+ {
114
+ id: "001-task-logs",
115
+ up: `
116
+ CREATE TABLE IF NOT EXISTS task_logs (
117
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
118
+ ts TEXT NOT NULL,
119
+ task_id TEXT NOT NULL,
120
+ run_id TEXT NOT NULL,
121
+ stream TEXT NOT NULL DEFAULT 'stdout',
122
+ level TEXT NOT NULL DEFAULT 'info',
123
+ line TEXT NOT NULL
124
+ );
125
+
126
+ -- Query patterns:
127
+ -- SELECT … WHERE ts >= ? AND ts <= ? → idx_task_logs_ts (purge, windows)
128
+ -- SELECT … WHERE task_id = ? → idx_task_logs_task_id
129
+ -- SELECT … WHERE run_id = ? → idx_task_logs_run_id (per-run tail)
130
+ CREATE INDEX IF NOT EXISTS idx_task_logs_ts ON task_logs(ts);
131
+ CREATE INDEX IF NOT EXISTS idx_task_logs_task_id ON task_logs(task_id);
132
+ CREATE INDEX IF NOT EXISTS idx_task_logs_run_id ON task_logs(run_id);
133
+ `,
134
+ },
135
+ ];
136
+ /**
137
+ * Apply every pending migration. Called automatically by
138
+ * {@link openLogsDatabase}; exported for the same test seams state-db exposes.
139
+ */
140
+ export function runMigrations(db) {
141
+ runSqliteMigrations(db, MIGRATIONS);
142
+ }
143
+ // ── run_id ───────────────────────────────────────────────────────────────────
144
+ /**
145
+ * Encode a task run's identity — the unique `(task_id, started_at)` pair from
146
+ * state.db `task_history` — as a single run_id string.
147
+ *
148
+ * The format MUST stay in sync with the SQL expression
149
+ * `task_id || '@' || started_at` used by {@link queryFailedRunLogLines}.
150
+ */
151
+ export function buildTaskRunId(taskId, startedAtIso) {
152
+ return `${taskId}@${startedAtIso}`;
153
+ }
154
+ /**
155
+ * Insert a batch of log lines for one task run in a single transaction.
156
+ * Returns the number of rows inserted. Lines are stored in array order
157
+ * (ascending rowid), so reading back `ORDER BY id` reproduces emission order.
158
+ *
159
+ * Errors propagate — the task runner wraps this in its own best-effort
160
+ * handling (mirroring `appendHistory`) so an unwritable logs.db never fails
161
+ * a task run.
162
+ */
163
+ export function insertTaskLogLines(db, input) {
164
+ if (input.lines.length === 0)
165
+ return 0;
166
+ const stmt = db.prepare(`INSERT INTO task_logs (ts, task_id, run_id, stream, level, line)
167
+ VALUES (?, ?, ?, ?, ?, ?)`);
168
+ db.transaction(() => {
169
+ for (const entry of input.lines) {
170
+ stmt.run(input.ts, input.taskId, input.runId, entry.stream ?? "stdout", entry.level ?? "info", entry.line);
171
+ }
172
+ })();
173
+ return input.lines.length;
174
+ }
175
+ /**
176
+ * Read log lines matching the filter, in emission order (ascending id).
177
+ *
178
+ * Connection-lifetime rule (WS5): `.all()` materializes a plain array before
179
+ * returning.
180
+ */
181
+ export function queryTaskLogs(db, options = {}) {
182
+ const conditions = [];
183
+ const params = [];
184
+ if (options.taskId) {
185
+ conditions.push("task_id = ?");
186
+ params.push(options.taskId);
187
+ }
188
+ if (options.runId) {
189
+ conditions.push("run_id = ?");
190
+ params.push(options.runId);
191
+ }
192
+ if (options.stream) {
193
+ conditions.push("stream = ?");
194
+ params.push(options.stream);
195
+ }
196
+ if (options.since) {
197
+ conditions.push("ts >= ?");
198
+ params.push(options.since);
199
+ }
200
+ if (options.until) {
201
+ conditions.push("ts < ?");
202
+ params.push(options.until);
203
+ }
204
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
205
+ const limit = options.limit !== undefined && options.limit >= 0 ? ` LIMIT ${Math.floor(options.limit)}` : "";
206
+ return db
207
+ .prepare(`SELECT id, ts, task_id, run_id, stream, level, line FROM task_logs ${where} ORDER BY id ASC${limit}`)
208
+ .all(...params);
209
+ }
210
+ /**
211
+ * Bulk membership check: which of `runIds` have at least one log row?
212
+ * Used by `akm health` to compute the log-backing rate from the database
213
+ * instead of `fs.existsSync` over scattered files. Chunked to stay under
214
+ * SQLite's bound-parameter ceiling.
215
+ */
216
+ export function getLoggedRunIds(db, runIds) {
217
+ const out = new Set();
218
+ if (runIds.length === 0)
219
+ return out;
220
+ const CHUNK = 500;
221
+ for (let i = 0; i < runIds.length; i += CHUNK) {
222
+ const chunk = runIds.slice(i, i + CHUNK);
223
+ const placeholders = chunk.map(() => "?").join(",");
224
+ const rows = db
225
+ .prepare(`SELECT DISTINCT run_id FROM task_logs WHERE run_id IN (${placeholders})`)
226
+ .all(...chunk);
227
+ for (const row of rows)
228
+ out.add(row.run_id);
229
+ }
230
+ return out;
231
+ }
232
+ // ── Cross-db: ATTACH state.db ────────────────────────────────────────────────
233
+ /**
234
+ * ATTACH state.db to an open logs.db handle under the schema name `state`,
235
+ * enabling cross-db joins like task_history × task_logs.
236
+ *
237
+ * The state.db file must already exist (callers always open state.db first in
238
+ * practice); attaching a non-existent path would silently create an empty,
239
+ * unmigrated database file, so this throws instead.
240
+ */
241
+ export function attachStateDatabase(db, stateDbPath) {
242
+ const resolved = stateDbPath ?? getStateDbPath();
243
+ if (!fs.existsSync(resolved)) {
244
+ throw new Error(`Cannot ATTACH state.db: file does not exist at ${resolved}`);
245
+ }
246
+ // prepare().run() rather than db.run(): both drivers support parameterised
247
+ // ATTACH through a prepared statement, and no other call site uses db.run().
248
+ db.prepare("ATTACH DATABASE ? AS state").run(resolved);
249
+ }
250
+ /**
251
+ * Convenience: open logs.db with state.db attached as `state`. The returned
252
+ * handle supports cross-db queries such as {@link queryFailedRunLogLines}.
253
+ * Close it like any other handle (DETACH is implicit on close).
254
+ */
255
+ export function openLogsDatabaseWithState(logsDbPath, stateDbPath) {
256
+ const db = openLogsDatabase(logsDbPath);
257
+ try {
258
+ attachStateDatabase(db, stateDbPath);
259
+ }
260
+ catch (err) {
261
+ db.close();
262
+ throw err;
263
+ }
264
+ return db;
265
+ }
266
+ /**
267
+ * Cross-db join: every log line belonging to a FAILED task_history run whose
268
+ * `started_at` is `>= since` (all failed runs when omitted). Requires a handle
269
+ * opened via {@link openLogsDatabaseWithState}.
270
+ *
271
+ * The join key is the run_id encoding documented on {@link buildTaskRunId}:
272
+ * `task_logs.run_id = task_history.task_id || '@' || task_history.started_at`.
273
+ */
274
+ export function queryFailedRunLogLines(db, options = {}) {
275
+ const conditions = ["th.status = 'failed'"];
276
+ const params = [];
277
+ if (options.since) {
278
+ conditions.push("th.started_at >= ?");
279
+ params.push(options.since);
280
+ }
281
+ const limit = options.limit !== undefined && options.limit >= 0 ? ` LIMIT ${Math.floor(options.limit)}` : "";
282
+ return db
283
+ .prepare(`SELECT th.task_id, l.run_id, th.started_at, th.status, l.ts, l.stream, l.level, l.line
284
+ FROM state.task_history th
285
+ JOIN task_logs l ON l.run_id = th.task_id || '@' || th.started_at
286
+ WHERE ${conditions.join(" AND ")}
287
+ ORDER BY th.started_at DESC, l.id ASC${limit}`)
288
+ .all(...params);
289
+ }
290
+ // ── Retention ────────────────────────────────────────────────────────────────
291
+ /**
292
+ * Delete task_logs rows older than `retentionDays` (default: 90). Mirrors
293
+ * `purgeOldEvents` / `purgeOldImproveRuns` in state-db.ts — same default, same
294
+ * return shape (rows deleted), same disabled-when-non-positive semantics.
295
+ * Wired into the improve maintenance pass alongside the state.db purges.
296
+ */
297
+ export function purgeOldTaskLogs(db, retentionDays = 90) {
298
+ if (!Number.isFinite(retentionDays) || retentionDays <= 0)
299
+ return 0;
300
+ const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
301
+ const result = db.prepare("DELETE FROM task_logs WHERE ts < ?").run(cutoff);
302
+ const changes = result.changes ?? 0;
303
+ return typeof changes === "bigint" ? Number(changes) : changes;
304
+ }
@@ -86,11 +86,12 @@ export function getStateDbPath() {
86
86
  * backwards compatibility; enabling them prevents orphaned rows in tables
87
87
  * that reference each other (not used in v1 schema but guards future ones).
88
88
  *
89
- * busy_timeout = 5000
89
+ * busy_timeout = 30000
90
90
  * When another connection holds a write lock, SQLite retries for up to
91
- * 5 000 ms before returning SQLITE_BUSY. Without this, the default timeout
92
- * is 0 ms — any concurrent writer causes an immediate error. 5 s matches
93
- * the same value used in openDatabase() for index.db.
91
+ * 30 000 ms before returning SQLITE_BUSY. Without this, the default timeout
92
+ * is 0 ms — any concurrent writer causes an immediate error. 30 s (#589)
93
+ * matches the value used in openDatabase() for index.db; 5 s proved too
94
+ * narrow when a post-inference reindex overlapped a parallel event write.
94
95
  */
95
96
  export function openStateDatabase(dbPath) {
96
97
  const resolvedPath = dbPath ?? getStateDbPath();
@@ -102,7 +103,7 @@ export function openStateDatabase(dbPath) {
102
103
  // PRAGMAs must run before any DDL or DML.
103
104
  db.exec("PRAGMA journal_mode = WAL");
104
105
  db.exec("PRAGMA foreign_keys = ON");
105
- db.exec("PRAGMA busy_timeout = 5000");
106
+ db.exec("PRAGMA busy_timeout = 30000");
106
107
  runMigrations(db);
107
108
  return db;
108
109
  }
@@ -190,7 +191,9 @@ const MIGRATIONS = [
190
191
  --
191
192
  -- Extensible (metadata_json) columns:
192
193
  -- metadata_json TEXT — JSON object for future proposal fields.
193
- -- Current fields stored here: sourceRun, review.
194
+ -- Current fields stored here: sourceRun,
195
+ -- review, confidence, gateDecision (#577),
196
+ -- backupContent.
194
197
  --
195
198
  -- ADD COLUMN extension points (future migrations):
196
199
  -- ALTER TABLE proposals ADD COLUMN source_run TEXT DEFAULT NULL;
@@ -458,6 +461,33 @@ const MIGRATIONS = [
458
461
  ON extract_sessions_seen(processed_at);
459
462
  `,
460
463
  },
464
+ // ── Migration 005 — proposal_fs_imports ─────────────────────────────────────
465
+ //
466
+ // One-shot ledger for the legacy filesystem→SQLite proposal import (#578).
467
+ //
468
+ // Before 0.9.0 the proposal queue lived as per-uuid JSON directories under
469
+ // `<stashDir>/.akm/proposals/` and the `proposals` table (created in 001) was
470
+ // dead weight. 0.9.0 makes the table canonical; the first proposal operation
471
+ // against a stash imports any legacy `proposal.json` files it finds (INSERT
472
+ // OR IGNORE, so re-runs never duplicate) and records the stash here so later
473
+ // invocations skip the directory walk entirely.
474
+ //
475
+ // Indexed (query) columns:
476
+ // stash_dir TEXT PK — absolute stash root the import ran against.
477
+ //
478
+ // Non-indexed columns:
479
+ // imported_at TEXT — ISO-8601 UTC; when the import completed.
480
+ // imported_count INTEGER — rows actually inserted by the import.
481
+ {
482
+ id: "005-proposal-fs-imports",
483
+ up: `
484
+ CREATE TABLE IF NOT EXISTS proposal_fs_imports (
485
+ stash_dir TEXT PRIMARY KEY,
486
+ imported_at TEXT NOT NULL,
487
+ imported_count INTEGER NOT NULL DEFAULT 0
488
+ );
489
+ `,
490
+ },
461
491
  ];
462
492
  /**
463
493
  * Apply every pending migration in a single transaction per migration.
@@ -529,6 +559,9 @@ export function proposalRowToProposal(row) {
529
559
  ...(frontmatter !== undefined ? { frontmatter } : {}),
530
560
  },
531
561
  ...(meta.review !== undefined ? { review: meta.review } : {}),
562
+ ...(typeof meta.confidence === "number" ? { confidence: meta.confidence } : {}),
563
+ ...(meta.gateDecision !== undefined ? { gateDecision: meta.gateDecision } : {}),
564
+ ...(typeof meta.backupContent === "string" ? { backupContent: meta.backupContent } : {}),
532
565
  };
533
566
  }
534
567
  /**
@@ -542,6 +575,12 @@ export function proposalToRowValues(proposal, stashDir) {
542
575
  metaObj.sourceRun = proposal.sourceRun;
543
576
  if (proposal.review !== undefined)
544
577
  metaObj.review = proposal.review;
578
+ if (proposal.confidence !== undefined)
579
+ metaObj.confidence = proposal.confidence;
580
+ if (proposal.gateDecision !== undefined)
581
+ metaObj.gateDecision = proposal.gateDecision;
582
+ if (proposal.backupContent !== undefined)
583
+ metaObj.backupContent = proposal.backupContent;
545
584
  return {
546
585
  id: proposal.id,
547
586
  stash_dir: stashDir,
@@ -656,7 +695,10 @@ export function upsertProposal(db, proposal, stashDir) {
656
695
  }
657
696
  /**
658
697
  * List proposals, optionally filtered by stashDir, status, and/or ref.
659
- * Results are sorted by created_at ASC to match the existing listProposals() behaviour.
698
+ *
699
+ * Results are ordered by `created_at ASC` (matching the historical
700
+ * `listProposals()` sort), with `rowid` as a deterministic tiebreak so two
701
+ * proposals created in the same millisecond list in insertion order.
660
702
  */
661
703
  export function listStateProposals(db, options = {}) {
662
704
  const conditions = [];
@@ -677,21 +719,72 @@ export function listStateProposals(db, options = {}) {
677
719
  const rows = db
678
720
  .prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
679
721
  content, frontmatter_json, metadata_json
680
- FROM proposals ${where} ORDER BY created_at ASC`)
722
+ FROM proposals ${where} ORDER BY created_at ASC, rowid ASC`)
681
723
  .all(...params);
682
724
  return rows.map(proposalRowToProposal);
683
725
  }
684
726
  /**
685
- * Look up a single proposal by id. Returns undefined when not found.
727
+ * Look up a single proposal by id, optionally scoped to one stash root.
728
+ * Returns undefined when not found.
686
729
  */
687
- export function getStateProposal(db, id) {
688
- const row = db
689
- .prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
730
+ export function getStateProposal(db, id, stashDir) {
731
+ const sql = `SELECT id, stash_dir, ref, status, source, created_at, updated_at,
690
732
  content, frontmatter_json, metadata_json
691
- FROM proposals WHERE id = ?`)
692
- .get(id);
733
+ FROM proposals WHERE id = ?${stashDir ? " AND stash_dir = ?" : ""}`;
734
+ const row = (stashDir ? db.prepare(sql).get(id, stashDir) : db.prepare(sql).get(id));
693
735
  return row ? proposalRowToProposal(row) : undefined;
694
736
  }
737
+ /**
738
+ * Find PENDING proposal ids in one stash whose id starts with `idPrefix`.
739
+ * Backs the UUID-prefix form of `akm proposal show/accept/... <prefix>` —
740
+ * prefix resolution is deliberately scoped to the live (pending) queue,
741
+ * mirroring the historical behaviour of scanning only the live directory.
742
+ *
743
+ * `%` / `_` / `\` in the prefix are escaped so the LIKE pattern is literal.
744
+ */
745
+ export function listStateProposalIdsByPrefix(db, stashDir, idPrefix) {
746
+ const escaped = idPrefix.replace(/[\\%_]/g, (ch) => `\\${ch}`);
747
+ const rows = db
748
+ .prepare(`SELECT id FROM proposals
749
+ WHERE stash_dir = ? AND status = 'pending' AND id LIKE ? ESCAPE '\\'
750
+ ORDER BY id ASC`)
751
+ .all(stashDir, `${escaped}%`);
752
+ return rows.map((r) => r.id);
753
+ }
754
+ /**
755
+ * Whether the legacy filesystem proposal import has already run for `stashDir`.
756
+ * See migration 005 (`proposal_fs_imports`).
757
+ */
758
+ export function hasImportedFsProposals(db, stashDir) {
759
+ // Drivers disagree on the no-row sentinel (bun:sqlite → null,
760
+ // better-sqlite3 → undefined) — Boolean() covers both.
761
+ return Boolean(db.prepare("SELECT 1 FROM proposal_fs_imports WHERE stash_dir = ?").get(stashDir));
762
+ }
763
+ /**
764
+ * Record that the legacy filesystem proposal import completed for `stashDir`
765
+ * so subsequent invocations skip the directory walk. INSERT OR REPLACE keeps
766
+ * the call idempotent.
767
+ */
768
+ export function recordFsProposalsImport(db, stashDir, importedCount) {
769
+ db.prepare("INSERT OR REPLACE INTO proposal_fs_imports (stash_dir, imported_at, imported_count) VALUES (?, ?, ?)").run(stashDir, new Date().toISOString(), importedCount);
770
+ }
771
+ /**
772
+ * Insert a proposal row ONLY when the id is not already present (used by the
773
+ * legacy filesystem import so re-runs never clobber rows that have since been
774
+ * mutated through the canonical store). Returns true when a row was inserted.
775
+ */
776
+ export function insertProposalIfAbsent(db, proposal, stashDir) {
777
+ const v = proposalToRowValues(proposal, stashDir);
778
+ const result = db
779
+ .prepare(`
780
+ INSERT OR IGNORE INTO proposals
781
+ (id, stash_dir, ref, status, source, created_at, updated_at, content, frontmatter_json, metadata_json)
782
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
783
+ `)
784
+ .run(v.id, v.stash_dir, v.ref, v.status, v.source, v.created_at, v.updated_at, v.content, v.frontmatter_json, v.metadata_json);
785
+ const changes = result.changes ?? 0;
786
+ return Number(changes) > 0;
787
+ }
695
788
  // ── task_history table helpers ───────────────────────────────────────────────
696
789
  /**
697
790
  * Upsert a task history row.
@@ -28,7 +28,7 @@ export function openDatabase(dbPath, options) {
28
28
  }
29
29
  const db = openSqlite(resolvedPath);
30
30
  db.exec("PRAGMA journal_mode = WAL");
31
- db.exec("PRAGMA busy_timeout = 5000");
31
+ db.exec("PRAGMA busy_timeout = 30000");
32
32
  db.exec("PRAGMA foreign_keys = ON");
33
33
  // Try to load sqlite-vec extension
34
34
  loadVecExtension(db);
@@ -69,7 +69,7 @@ export function openExistingDatabase(dbPath) {
69
69
  const resolvedPath = dbPath ?? getDbPath();
70
70
  const db = openSqlite(resolvedPath);
71
71
  db.exec("PRAGMA journal_mode = WAL");
72
- db.exec("PRAGMA busy_timeout = 5000");
72
+ db.exec("PRAGMA busy_timeout = 30000");
73
73
  db.exec("PRAGMA foreign_keys = ON");
74
74
  // Existing-DB callers must not mutate schema or embedding metadata on open,
75
75
  // but some paths still need write access to usage_events and other tables.
@@ -119,6 +119,26 @@ export async function runMemoryInferencePass(ctx) {
119
119
  // 2026-05-26).
120
120
  if (signal?.aborted)
121
121
  return { aborted: true };
122
+ // Pre-check (#588): when `<parent>.derived.md` is already on disk the
123
+ // inference is by definition complete — the parent only looks pending
124
+ // because `markParentProcessed` never ran (process killed between the
125
+ // child write and the mark) or the child was created externally (e.g.
126
+ // consolidation). Skip the LLM/cache call entirely and mark the parent
127
+ // so it never re-pends. Before this check, production measurements
128
+ // showed ~55% of the pass's LLM budget re-deriving such parents only to
129
+ // discover the existing child after the fact.
130
+ if (fs.existsSync(derivedChildPath(record))) {
131
+ markParentProcessed(record);
132
+ return {
133
+ skipped: false,
134
+ splitParent: false,
135
+ written: 0,
136
+ fromCache: false,
137
+ retryAttempts: 0,
138
+ childExists: true,
139
+ precheck: true,
140
+ };
141
+ }
122
142
  // Incremental cache: skip LLM call when body hash is unchanged and
123
143
  // --re-enrich was not requested. The cache ref is the absolute file path.
124
144
  const validate = (raw) => {
@@ -171,23 +191,30 @@ export async function runMemoryInferencePass(ctx) {
171
191
  return { skipped: false, splitParent: true, written: writeOutcome.written, fromCache, retryAttempts };
172
192
  }
173
193
  // LLM produced a valid derived draft but no file was written — either
174
- // because `<parent>.derived.md` already exists on disk or
175
- // `writeAssetToSource` threw. Categorise as `childExists` so the
176
- // attempt is accounted for in health metrics rather than vanishing
177
- // into the freshAttempts denominator.
194
+ // because `<parent>.derived.md` appeared on disk after the pre-check
195
+ // above (a rare mid-flight race) or `writeAssetToSource` threw.
196
+ // Categorise as `childExists` so the consumed attempt is accounted for
197
+ // in health metrics rather than vanishing into the freshAttempts
198
+ // denominator.
178
199
  //
179
- // When the child already exists on disk the inference is, by definition,
180
- // already complete — so mark the parent processed here too (#550).
181
- // Without this, `isPendingMemory()` re-queues the same parent every run
182
- // (the `written > 0` path was previously the only site that marks it),
183
- // causing permanent re-queueing and wasted LLM calls. A genuine write
184
- // *failure* (`writeAssetToSource` threw) must NOT mark the parent it
185
- // should be retried next run — so we key off the explicit `childExists`
186
- // outcome rather than the conflated `written === 0`.
200
+ // When the child exists the inference is, by definition, complete — so
201
+ // mark the parent processed here too (#550), otherwise
202
+ // `isPendingMemory()` re-queues the same parent every run. A genuine
203
+ // write *failure* (`writeAssetToSource` threw) must NOT mark the parent
204
+ // it should be retried next run so we key off the explicit
205
+ // `childExists` outcome rather than the conflated `written === 0`.
187
206
  if (writeOutcome.childExists) {
188
207
  markParentProcessed(record);
189
208
  }
190
- return { skipped: false, splitParent: false, written: 0, fromCache, retryAttempts, childExists: true };
209
+ return {
210
+ skipped: false,
211
+ splitParent: false,
212
+ written: 0,
213
+ fromCache,
214
+ retryAttempts,
215
+ childExists: true,
216
+ precheck: false,
217
+ };
191
218
  },
192
219
  // Default concurrency of 4 for cloud APIs. Set `llm.concurrency: 1`
193
220
  // in config.json for local model servers (LM Studio, Ollama).
@@ -224,11 +251,16 @@ export async function runMemoryInferencePass(ctx) {
224
251
  result.writtenFacts += res.written;
225
252
  }
226
253
  else if ("childExists" in res && res.childExists) {
227
- // LLM call was consumed but the derived file already existed (or the
228
- // write threw). Track separately so this category is observable in
229
- // health output and stops bleeding into the freshAttempts denominator.
254
+ // Derived child already on disk. Track separately so this category is
255
+ // observable in health output and stops bleeding into the
256
+ // freshAttempts denominator. Pre-check skips (#588) are the routine
257
+ // self-healing path — no LLM attempt was consumed and the parent has
258
+ // been marked processed — so only the rare post-LLM case (mid-flight
259
+ // race or write failure) warrants a per-ref warning.
230
260
  result.skippedChildExists += 1;
231
- warn(`memory inference: derived child for ${pending[i]?.ref ?? "<unknown>"} already existed or write failed; counted as skippedChildExists`);
261
+ if (!res.precheck) {
262
+ warn(`memory inference: derived child for ${pending[i]?.ref ?? "<unknown>"} already existed or write failed; counted as skippedChildExists`);
263
+ }
232
264
  }
233
265
  else {
234
266
  // The per-record state machine should cover every outcome. A hit here
@@ -324,6 +356,14 @@ function toMemoryName(memoriesDir, filePath) {
324
356
  // user has organised under memories/.
325
357
  return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
326
358
  }
359
+ /**
360
+ * Absolute path of the derived child for a parent memory. Single source of
361
+ * truth for the `<parent>.derived.md` naming convention — used both by the
362
+ * pre-LLM existence check (#588) and the write path.
363
+ */
364
+ function derivedChildPath(parent) {
365
+ return path.join(parent.stashRoot, "memories", `${parent.name}.derived.md`);
366
+ }
327
367
  async function writeDerivedMemory(parent, derived) {
328
368
  const writeTarget = {
329
369
  kind: "filesystem",
@@ -338,11 +378,10 @@ async function writeDerivedMemory(parent, derived) {
338
378
  };
339
379
  const childName = `${parent.name}.derived`;
340
380
  const childRefStr = `memory:${childName}`;
341
- const childPath = path.join(parent.stashRoot, "memories", `${childName}.md`);
342
- if (fs.existsSync(childPath)) {
343
- // The derived child is already on disk inference for this parent is
344
- // complete. Report `childExists` so the caller marks the parent processed
345
- // (#550) instead of re-queueing it forever.
381
+ if (fs.existsSync(derivedChildPath(parent))) {
382
+ // The derived child appeared on disk after the caller's pre-check (#588)
383
+ // a rare mid-flight race. Report `childExists` so the caller marks the
384
+ // parent processed (#550) instead of re-queueing it forever.
346
385
  return { written: 0, childExists: true };
347
386
  }
348
387
  try {