akm-cli 0.9.0-beta.1 → 0.9.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +128 -0
  2. package/dist/assets/templates/html/default.html +78 -0
  3. package/dist/assets/templates/html/health.html +560 -0
  4. package/dist/assets/templates/html/vendor/echarts.min.js +45 -0
  5. package/dist/cli/shared.js +21 -5
  6. package/dist/cli.js +36 -5
  7. package/dist/commands/config-cli.js +0 -10
  8. package/dist/commands/health/html-report.js +448 -0
  9. package/dist/commands/health.js +97 -6
  10. package/dist/commands/improve/extract.js +38 -2
  11. package/dist/commands/improve/improve-auto-accept.js +27 -1
  12. package/dist/commands/improve/improve-cli.js +7 -0
  13. package/dist/commands/improve/improve.js +201 -66
  14. package/dist/commands/improve/reflect-noise.js +0 -0
  15. package/dist/commands/improve/reflect.js +25 -0
  16. package/dist/commands/proposal/drain.js +73 -6
  17. package/dist/commands/proposal/proposal-cli.js +22 -10
  18. package/dist/commands/proposal/proposal.js +12 -1
  19. package/dist/commands/proposal/validators/proposals.js +361 -338
  20. package/dist/commands/remember.js +6 -2
  21. package/dist/commands/tasks/tasks.js +32 -8
  22. package/dist/core/config/config-schema.js +5 -0
  23. package/dist/core/logs-db.js +304 -0
  24. package/dist/core/state-db.js +107 -14
  25. package/dist/indexer/db/db.js +2 -2
  26. package/dist/indexer/passes/memory-inference.js +61 -22
  27. package/dist/integrations/harnesses/claude/session-log.js +16 -4
  28. package/dist/llm/client.js +15 -0
  29. package/dist/llm/usage-persist.js +77 -0
  30. package/dist/llm/usage-telemetry.js +103 -0
  31. package/dist/output/context.js +3 -2
  32. package/dist/output/html-render.js +73 -0
  33. package/dist/output/shapes/helpers.js +17 -1
  34. package/dist/output/text/helpers.js +69 -1
  35. package/dist/scripts/migrate-storage.js +65 -14
  36. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +14 -2
  37. package/dist/tasks/backends/cron.js +46 -9
  38. package/dist/tasks/runner.js +99 -16
  39. package/dist/workflows/db.js +4 -0
  40. package/package.json +1 -1
  41. package/dist/commands/config-edit.js +0 -344
@@ -149,6 +149,10 @@ export async function runLlmEnrich(body) {
149
149
  return { tags: [] };
150
150
  }
151
151
  const { chatCompletion, parseEmbeddedJsonResponse: parseJsonResponse } = await import("../llm/client.js");
152
+ // #576: attribute this entry point's LLM call to the `remember` stage. The
153
+ // wrapper is ambient — if a usage sink is active it tags the record; if not,
154
+ // it is a no-op.
155
+ const { withLlmStage } = await import("../llm/usage-telemetry.js");
152
156
  const prompt = `You are a memory tagger for a developer knowledge base.
153
157
  Given the memory text below, return ONLY a JSON object with these fields:
154
158
  - "tags": array of 1-5 short lowercase keyword tags
@@ -164,10 +168,10 @@ Return ONLY the JSON object, no prose, no markdown fences.`;
164
168
  const result = await (async () => {
165
169
  try {
166
170
  return await Promise.race([
167
- chatCompletion(llmConfig, [
171
+ withLlmStage("remember", () => chatCompletion(llmConfig, [
168
172
  { role: "system", content: "Return only valid JSON. No prose." },
169
173
  { role: "user", content: prompt },
170
- ], { maxTokens: 256, temperature: 0.1 }),
174
+ ], { maxTokens: 256, temperature: 0.1 })),
171
175
  new Promise((_, reject) => {
172
176
  timeoutHandle = setTimeout(() => reject(new Error("LLM enrichment timed out")), LLM_ENRICH_TIMEOUT_MS);
173
177
  }),
@@ -218,7 +218,13 @@ export async function akmTasksSetEnabled(id, enabled) {
218
218
  fs.writeFileSync(filePath, updated, "utf8");
219
219
  const sched = selectBackend();
220
220
  try {
221
- await sched.setEnabled(normalised, enabled);
221
+ // Reinstall from the (just-updated) definition rather than only toggling
222
+ // the comment. A plain toggle leaves a stale schedule in place if the
223
+ // .yml's `schedule:` changed while the task was disabled — re-enabling
224
+ // would silently keep the old cron line. install() renders the block with
225
+ // both the current schedule and the new enabled state, and is idempotent.
226
+ const task = parseTaskDocument({ yaml: updated, filePath, id: normalised });
227
+ await sched.install(task);
222
228
  }
223
229
  catch (err) {
224
230
  // Roll the file back so the YAML source-of-truth and the OS
@@ -254,9 +260,12 @@ export async function akmTasksHistory(input) {
254
260
  * Reconcile the on-disk task files with the OS scheduler.
255
261
  * • install missing tasks (after validating them — invalid files are
256
262
  * skipped with a per-task reason rather than aborting the whole sync)
263
+ * • reinstall tasks whose schedule or enabled state changed in the .yml
264
+ * (drift detected by comparing the backend's installed signature against
265
+ * the signature the current definition would produce)
257
266
  * • remove orphan scheduler entries that no longer have a backing file
258
267
  */
259
- export async function akmTasksSync() {
268
+ export async function akmTasksSync(deps = {}) {
260
269
  const stashDir = resolveStashDir();
261
270
  const typeRoot = path.join(stashDir, "tasks");
262
271
  if (fs.existsSync(typeRoot))
@@ -267,10 +276,13 @@ export async function akmTasksSync() {
267
276
  .filter((f) => f.endsWith(".yml"))
268
277
  .map((f) => f.slice(0, -4))
269
278
  : [];
270
- const sched = selectBackend();
279
+ const sched = deps.backend ?? selectBackend();
271
280
  const backend = backendNameForPlatform();
272
- const present = new Set((await sched.list()).map((t) => t.id));
281
+ // Map id installed signature so sync can detect schedule/enabled drift on
282
+ // tasks that already exist in the scheduler, not just presence/absence.
283
+ const present = new Map((await sched.list()).map((t) => [t.id, t.signature]));
273
284
  const installed = [];
285
+ const updated = [];
274
286
  const unchanged = [];
275
287
  const skipped = [];
276
288
  for (const id of fileIds) {
@@ -290,22 +302,34 @@ export async function akmTasksSync() {
290
302
  skipped.push({ id, reason: err instanceof Error ? err.message : String(err) });
291
303
  continue;
292
304
  }
293
- if (present.has(id)) {
305
+ if (!present.has(id)) {
306
+ await sched.install(task);
307
+ installed.push(id);
308
+ continue;
309
+ }
310
+ // Already installed — reconcile against the current definition. Compare the
311
+ // installed signature to what this task would render to; reinstall on drift.
312
+ // When the backend can't produce a signature (no expectedSignature, or it
313
+ // didn't record one), reinstall unconditionally — install() is idempotent,
314
+ // so the cost is one crontab write and correctness is guaranteed.
315
+ const installedSig = present.get(id);
316
+ const expectedSig = sched.expectedSignature?.(task);
317
+ if (installedSig !== undefined && expectedSig !== undefined && installedSig === expectedSig) {
294
318
  unchanged.push(id);
295
319
  }
296
320
  else {
297
321
  await sched.install(task);
298
- installed.push(id);
322
+ updated.push(id);
299
323
  }
300
324
  }
301
325
  const removed = [];
302
- for (const installedId of present) {
326
+ for (const installedId of present.keys()) {
303
327
  if (!fileIds.includes(installedId)) {
304
328
  await sched.uninstall(installedId);
305
329
  removed.push(installedId);
306
330
  }
307
331
  }
308
- return { installed, removed, unchanged, skipped, backend: sched.name };
332
+ return { installed, updated, removed, unchanged, skipped, backend: sched.name };
309
333
  }
310
334
  export async function akmTasksDoctor() {
311
335
  const warnings = [];
@@ -138,6 +138,11 @@ export const ImproveProcessConfigSchema = z
138
138
  // Extract process config (only meaningful for extract process)
139
139
  defaultSince: z.string().min(1).optional(),
140
140
  maxTotalChars: positiveInt.optional(),
141
+ // Extract process: minimum raw session size (pre-filter inputCount) below
142
+ // which the extract LLM call is skipped (#595/#596). 0 disables the gate.
143
+ // Absent = default 10 (skip only truly empty sessions). Only meaningful
144
+ // on the `extract` process.
145
+ minContentChars: z.number().int().min(0).optional(),
141
146
  maxChunkSize: z.number().int().min(1).max(50).optional(),
142
147
  // Extract process: minimum number of new (unseen, in-window) candidate
143
148
  // sessions below which the extract pass skips entirely (emits an
@@ -0,0 +1,304 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * logs.db — Dedicated SQLite database for task/run log lines (#579).
6
+ *
7
+ * Replaces grep-the-flat-file consumption of `<cacheDir>/tasks/logs/<id>/<ts>.log`
8
+ * with structured, indexed rows: `{ts, task_id, run_id, stream, level, line}`.
9
+ * The strategic direction (stop scattering data across files/folders) means
10
+ * every NEW log consumer queries this database; the per-run text file written
11
+ * by the task runner is retained only as a transitional tail for humans —
12
+ * see docs/technical/logs-audit.md for the full producer audit.
13
+ *
14
+ * ## Why a separate database from state.db
15
+ *
16
+ * Log lines are high-volume, append-only, and freely purgeable; state.db rows
17
+ * (events, proposals, task_history) are durable records. Separating them keeps
18
+ * state.db small and lets log retention be aggressive without touching durable
19
+ * state. Cross-db queries (e.g. "failed task_history row → its log lines") use
20
+ * SQLite ATTACH — see {@link attachStateDatabase}.
21
+ *
22
+ * ## run_id
23
+ *
24
+ * state.db's `task_history` identifies a run by the unique pair
25
+ * `(task_id, started_at)` (see migration 002 in state-db.ts). logs.db encodes
26
+ * that pair as a single string — {@link buildTaskRunId} — so log rows can be
27
+ * joined back to their history row:
28
+ *
29
+ * l.run_id = th.task_id || '@' || th.started_at
30
+ *
31
+ * ## Schema evolution
32
+ *
33
+ * Same migration-safety contract as state.db: append-only `MIGRATIONS` applied
34
+ * through the shared runner in src/storage/engines/sqlite-migrations.ts.
35
+ *
36
+ * @module logs-db
37
+ */
38
+ import fs from "node:fs";
39
+ import path from "node:path";
40
+ import { openDatabase } from "../storage/database.js";
41
+ import { runMigrations as runSqliteMigrations } from "../storage/engines/sqlite-migrations.js";
42
+ import { getDataDir } from "./paths.js";
43
+ import { getStateDbPath } from "./state-db.js";
44
+ // ── Path helper ──────────────────────────────────────────────────────────────
45
+ /**
46
+ * Default path: `<dataDir>/logs.db` — alongside state.db so cooperating
47
+ * processes sharing a data root automatically share the same logs database
48
+ * (same `AKM_DATA_DIR` / XDG env-isolation as {@link getStateDbPath}).
49
+ */
50
+ export function getLogsDbPath() {
51
+ return path.join(getDataDir(), "logs.db");
52
+ }
53
+ // ── Database open ────────────────────────────────────────────────────────────
54
+ /**
55
+ * Open (and initialise / migrate) the logs database.
56
+ *
57
+ * @param dbPath - Override the database file path (tests pass a tmpdir path).
58
+ *
59
+ * PRAGMA rationale:
60
+ *
61
+ * journal_mode = WAL
62
+ * Readers never block writers and vice-versa; crashes are safe (the WAL is
63
+ * replayed on next open). Required because the task runner writes log rows
64
+ * while `akm health` may be reading them.
65
+ *
66
+ * busy_timeout = 30000
67
+ * Log writes happen at the end of scheduled task runs, which can pile up
68
+ * (cron fan-out). 30 s of retry absorbs a slow concurrent writer instead of
69
+ * surfacing SQLITE_BUSY and dropping log lines.
70
+ */
71
+ export function openLogsDatabase(dbPath) {
72
+ const resolvedPath = dbPath ?? getLogsDbPath();
73
+ const dir = path.dirname(resolvedPath);
74
+ if (!fs.existsSync(dir)) {
75
+ fs.mkdirSync(dir, { recursive: true });
76
+ }
77
+ const db = openDatabase(resolvedPath);
78
+ // PRAGMAs must run before any DDL or DML.
79
+ db.exec("PRAGMA journal_mode = WAL");
80
+ db.exec("PRAGMA busy_timeout = 30000");
81
+ runMigrations(db);
82
+ return db;
83
+ }
84
+ // ── Migrations ───────────────────────────────────────────────────────────────
85
+ /**
86
+ * All migrations in application order. APPEND only — never insert in the
87
+ * middle or reorder. Same contract as state.db's MIGRATIONS array.
88
+ */
89
+ const MIGRATIONS = [
90
+ // ── Migration 001 — task_logs ───────────────────────────────────────────────
91
+ //
92
+ // One row per log line emitted by a task run.
93
+ //
94
+ // Indexed (query) columns:
95
+ // ts TEXT — ISO-8601 UTC; range queries ("logs in the last hour").
96
+ // task_id TEXT — task identifier; per-task log views.
97
+ // run_id TEXT — buildTaskRunId(task_id, started_at); per-run log views
98
+ // and the join key back to state.db task_history.
99
+ //
100
+ // Non-indexed columns:
101
+ // stream TEXT — 'stdout' | 'stderr'; which pipe the line came from.
102
+ // level TEXT — 'info' | 'warn' | 'error'; runner-assigned severity
103
+ // ('info' for captured stdout, 'error' for stderr and
104
+ // failure diagnostics).
105
+ // line TEXT — the log line itself (no trailing newline).
106
+ //
107
+ // ADD COLUMN extension points (future migrations):
108
+ // ALTER TABLE task_logs ADD COLUMN seq INTEGER DEFAULT NULL;
109
+ // ALTER TABLE task_logs ADD COLUMN source TEXT DEFAULT NULL;
110
+ //
111
+ // TTL: rows where ts < NOW() - retention can be deleted by purgeOldTaskLogs().
112
+ // No automatic deletion occurs here.
113
+ {
114
+ id: "001-task-logs",
115
+ up: `
116
+ CREATE TABLE IF NOT EXISTS task_logs (
117
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
118
+ ts TEXT NOT NULL,
119
+ task_id TEXT NOT NULL,
120
+ run_id TEXT NOT NULL,
121
+ stream TEXT NOT NULL DEFAULT 'stdout',
122
+ level TEXT NOT NULL DEFAULT 'info',
123
+ line TEXT NOT NULL
124
+ );
125
+
126
+ -- Query patterns:
127
+ -- SELECT … WHERE ts >= ? AND ts <= ? → idx_task_logs_ts (purge, windows)
128
+ -- SELECT … WHERE task_id = ? → idx_task_logs_task_id
129
+ -- SELECT … WHERE run_id = ? → idx_task_logs_run_id (per-run tail)
130
+ CREATE INDEX IF NOT EXISTS idx_task_logs_ts ON task_logs(ts);
131
+ CREATE INDEX IF NOT EXISTS idx_task_logs_task_id ON task_logs(task_id);
132
+ CREATE INDEX IF NOT EXISTS idx_task_logs_run_id ON task_logs(run_id);
133
+ `,
134
+ },
135
+ ];
136
+ /**
137
+ * Apply every pending migration. Called automatically by
138
+ * {@link openLogsDatabase}; exported for the same test seams state-db exposes.
139
+ */
140
+ export function runMigrations(db) {
141
+ runSqliteMigrations(db, MIGRATIONS);
142
+ }
143
+ // ── run_id ───────────────────────────────────────────────────────────────────
144
+ /**
145
+ * Encode a task run's identity — the unique `(task_id, started_at)` pair from
146
+ * state.db `task_history` — as a single run_id string.
147
+ *
148
+ * The format MUST stay in sync with the SQL expression
149
+ * `task_id || '@' || started_at` used by {@link queryFailedRunLogLines}.
150
+ */
151
+ export function buildTaskRunId(taskId, startedAtIso) {
152
+ return `${taskId}@${startedAtIso}`;
153
+ }
154
+ /**
155
+ * Insert a batch of log lines for one task run in a single transaction.
156
+ * Returns the number of rows inserted. Lines are stored in array order
157
+ * (ascending rowid), so reading back `ORDER BY id` reproduces emission order.
158
+ *
159
+ * Errors propagate — the task runner wraps this in its own best-effort
160
+ * handling (mirroring `appendHistory`) so an unwritable logs.db never fails
161
+ * a task run.
162
+ */
163
+ export function insertTaskLogLines(db, input) {
164
+ if (input.lines.length === 0)
165
+ return 0;
166
+ const stmt = db.prepare(`INSERT INTO task_logs (ts, task_id, run_id, stream, level, line)
167
+ VALUES (?, ?, ?, ?, ?, ?)`);
168
+ db.transaction(() => {
169
+ for (const entry of input.lines) {
170
+ stmt.run(input.ts, input.taskId, input.runId, entry.stream ?? "stdout", entry.level ?? "info", entry.line);
171
+ }
172
+ })();
173
+ return input.lines.length;
174
+ }
175
+ /**
176
+ * Read log lines matching the filter, in emission order (ascending id).
177
+ *
178
+ * Connection-lifetime rule (WS5): `.all()` materializes a plain array before
179
+ * returning.
180
+ */
181
+ export function queryTaskLogs(db, options = {}) {
182
+ const conditions = [];
183
+ const params = [];
184
+ if (options.taskId) {
185
+ conditions.push("task_id = ?");
186
+ params.push(options.taskId);
187
+ }
188
+ if (options.runId) {
189
+ conditions.push("run_id = ?");
190
+ params.push(options.runId);
191
+ }
192
+ if (options.stream) {
193
+ conditions.push("stream = ?");
194
+ params.push(options.stream);
195
+ }
196
+ if (options.since) {
197
+ conditions.push("ts >= ?");
198
+ params.push(options.since);
199
+ }
200
+ if (options.until) {
201
+ conditions.push("ts < ?");
202
+ params.push(options.until);
203
+ }
204
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
205
+ const limit = options.limit !== undefined && options.limit >= 0 ? ` LIMIT ${Math.floor(options.limit)}` : "";
206
+ return db
207
+ .prepare(`SELECT id, ts, task_id, run_id, stream, level, line FROM task_logs ${where} ORDER BY id ASC${limit}`)
208
+ .all(...params);
209
+ }
210
+ /**
211
+ * Bulk membership check: which of `runIds` have at least one log row?
212
+ * Used by `akm health` to compute the log-backing rate from the database
213
+ * instead of `fs.existsSync` over scattered files. Chunked to stay under
214
+ * SQLite's bound-parameter ceiling.
215
+ */
216
+ export function getLoggedRunIds(db, runIds) {
217
+ const out = new Set();
218
+ if (runIds.length === 0)
219
+ return out;
220
+ const CHUNK = 500;
221
+ for (let i = 0; i < runIds.length; i += CHUNK) {
222
+ const chunk = runIds.slice(i, i + CHUNK);
223
+ const placeholders = chunk.map(() => "?").join(",");
224
+ const rows = db
225
+ .prepare(`SELECT DISTINCT run_id FROM task_logs WHERE run_id IN (${placeholders})`)
226
+ .all(...chunk);
227
+ for (const row of rows)
228
+ out.add(row.run_id);
229
+ }
230
+ return out;
231
+ }
232
+ // ── Cross-db: ATTACH state.db ────────────────────────────────────────────────
233
+ /**
234
+ * ATTACH state.db to an open logs.db handle under the schema name `state`,
235
+ * enabling cross-db joins like task_history × task_logs.
236
+ *
237
+ * The state.db file must already exist (callers always open state.db first in
238
+ * practice); attaching a non-existent path would silently create an empty,
239
+ * unmigrated database file, so this throws instead.
240
+ */
241
+ export function attachStateDatabase(db, stateDbPath) {
242
+ const resolved = stateDbPath ?? getStateDbPath();
243
+ if (!fs.existsSync(resolved)) {
244
+ throw new Error(`Cannot ATTACH state.db: file does not exist at ${resolved}`);
245
+ }
246
+ // prepare().run() rather than db.run(): both drivers support parameterised
247
+ // ATTACH through a prepared statement, and no other call site uses db.run().
248
+ db.prepare("ATTACH DATABASE ? AS state").run(resolved);
249
+ }
250
+ /**
251
+ * Convenience: open logs.db with state.db attached as `state`. The returned
252
+ * handle supports cross-db queries such as {@link queryFailedRunLogLines}.
253
+ * Close it like any other handle (DETACH is implicit on close).
254
+ */
255
+ export function openLogsDatabaseWithState(logsDbPath, stateDbPath) {
256
+ const db = openLogsDatabase(logsDbPath);
257
+ try {
258
+ attachStateDatabase(db, stateDbPath);
259
+ }
260
+ catch (err) {
261
+ db.close();
262
+ throw err;
263
+ }
264
+ return db;
265
+ }
266
+ /**
267
+ * Cross-db join: every log line belonging to a FAILED task_history run whose
268
+ * `started_at` is `>= since` (all failed runs when omitted). Requires a handle
269
+ * opened via {@link openLogsDatabaseWithState}.
270
+ *
271
+ * The join key is the run_id encoding documented on {@link buildTaskRunId}:
272
+ * `task_logs.run_id = task_history.task_id || '@' || task_history.started_at`.
273
+ */
274
+ export function queryFailedRunLogLines(db, options = {}) {
275
+ const conditions = ["th.status = 'failed'"];
276
+ const params = [];
277
+ if (options.since) {
278
+ conditions.push("th.started_at >= ?");
279
+ params.push(options.since);
280
+ }
281
+ const limit = options.limit !== undefined && options.limit >= 0 ? ` LIMIT ${Math.floor(options.limit)}` : "";
282
+ return db
283
+ .prepare(`SELECT th.task_id, l.run_id, th.started_at, th.status, l.ts, l.stream, l.level, l.line
284
+ FROM state.task_history th
285
+ JOIN task_logs l ON l.run_id = th.task_id || '@' || th.started_at
286
+ WHERE ${conditions.join(" AND ")}
287
+ ORDER BY th.started_at DESC, l.id ASC${limit}`)
288
+ .all(...params);
289
+ }
290
+ // ── Retention ────────────────────────────────────────────────────────────────
291
+ /**
292
+ * Delete task_logs rows older than `retentionDays` (default: 90). Mirrors
293
+ * `purgeOldEvents` / `purgeOldImproveRuns` in state-db.ts — same default, same
294
+ * return shape (rows deleted), same disabled-when-non-positive semantics.
295
+ * Wired into the improve maintenance pass alongside the state.db purges.
296
+ */
297
+ export function purgeOldTaskLogs(db, retentionDays = 90) {
298
+ if (!Number.isFinite(retentionDays) || retentionDays <= 0)
299
+ return 0;
300
+ const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
301
+ const result = db.prepare("DELETE FROM task_logs WHERE ts < ?").run(cutoff);
302
+ const changes = result.changes ?? 0;
303
+ return typeof changes === "bigint" ? Number(changes) : changes;
304
+ }
@@ -86,11 +86,12 @@ export function getStateDbPath() {
86
86
  * backwards compatibility; enabling them prevents orphaned rows in tables
87
87
  * that reference each other (not used in v1 schema but guards future ones).
88
88
  *
89
- * busy_timeout = 5000
89
+ * busy_timeout = 30000
90
90
  * When another connection holds a write lock, SQLite retries for up to
91
- * 5 000 ms before returning SQLITE_BUSY. Without this, the default timeout
92
- * is 0 ms — any concurrent writer causes an immediate error. 5 s matches
93
- * the same value used in openDatabase() for index.db.
91
+ * 30 000 ms before returning SQLITE_BUSY. Without this, the default timeout
92
+ * is 0 ms — any concurrent writer causes an immediate error. 30 s (#589)
93
+ * matches the value used in openDatabase() for index.db; 5 s proved too
94
+ * narrow when a post-inference reindex overlapped a parallel event write.
94
95
  */
95
96
  export function openStateDatabase(dbPath) {
96
97
  const resolvedPath = dbPath ?? getStateDbPath();
@@ -102,7 +103,7 @@ export function openStateDatabase(dbPath) {
102
103
  // PRAGMAs must run before any DDL or DML.
103
104
  db.exec("PRAGMA journal_mode = WAL");
104
105
  db.exec("PRAGMA foreign_keys = ON");
105
- db.exec("PRAGMA busy_timeout = 5000");
106
+ db.exec("PRAGMA busy_timeout = 30000");
106
107
  runMigrations(db);
107
108
  return db;
108
109
  }
@@ -190,7 +191,9 @@ const MIGRATIONS = [
190
191
  --
191
192
  -- Extensible (metadata_json) columns:
192
193
  -- metadata_json TEXT — JSON object for future proposal fields.
193
- -- Current fields stored here: sourceRun, review.
194
+ -- Current fields stored here: sourceRun,
195
+ -- review, confidence, gateDecision (#577),
196
+ -- backupContent.
194
197
  --
195
198
  -- ADD COLUMN extension points (future migrations):
196
199
  -- ALTER TABLE proposals ADD COLUMN source_run TEXT DEFAULT NULL;
@@ -458,6 +461,33 @@ const MIGRATIONS = [
458
461
  ON extract_sessions_seen(processed_at);
459
462
  `,
460
463
  },
464
+ // ── Migration 005 — proposal_fs_imports ─────────────────────────────────────
465
+ //
466
+ // One-shot ledger for the legacy filesystem→SQLite proposal import (#578).
467
+ //
468
+ // Before 0.9.0 the proposal queue lived as per-uuid JSON directories under
469
+ // `<stashDir>/.akm/proposals/` and the `proposals` table (created in 001) was
470
+ // dead weight. 0.9.0 makes the table canonical; the first proposal operation
471
+ // against a stash imports any legacy `proposal.json` files it finds (INSERT
472
+ // OR IGNORE, so re-runs never duplicate) and records the stash here so later
473
+ // invocations skip the directory walk entirely.
474
+ //
475
+ // Indexed (query) columns:
476
+ // stash_dir TEXT PK — absolute stash root the import ran against.
477
+ //
478
+ // Non-indexed columns:
479
+ // imported_at TEXT — ISO-8601 UTC; when the import completed.
480
+ // imported_count INTEGER — rows actually inserted by the import.
481
+ {
482
+ id: "005-proposal-fs-imports",
483
+ up: `
484
+ CREATE TABLE IF NOT EXISTS proposal_fs_imports (
485
+ stash_dir TEXT PRIMARY KEY,
486
+ imported_at TEXT NOT NULL,
487
+ imported_count INTEGER NOT NULL DEFAULT 0
488
+ );
489
+ `,
490
+ },
461
491
  ];
462
492
  /**
463
493
  * Apply every pending migration in a single transaction per migration.
@@ -529,6 +559,9 @@ export function proposalRowToProposal(row) {
529
559
  ...(frontmatter !== undefined ? { frontmatter } : {}),
530
560
  },
531
561
  ...(meta.review !== undefined ? { review: meta.review } : {}),
562
+ ...(typeof meta.confidence === "number" ? { confidence: meta.confidence } : {}),
563
+ ...(meta.gateDecision !== undefined ? { gateDecision: meta.gateDecision } : {}),
564
+ ...(typeof meta.backupContent === "string" ? { backupContent: meta.backupContent } : {}),
532
565
  };
533
566
  }
534
567
  /**
@@ -542,6 +575,12 @@ export function proposalToRowValues(proposal, stashDir) {
542
575
  metaObj.sourceRun = proposal.sourceRun;
543
576
  if (proposal.review !== undefined)
544
577
  metaObj.review = proposal.review;
578
+ if (proposal.confidence !== undefined)
579
+ metaObj.confidence = proposal.confidence;
580
+ if (proposal.gateDecision !== undefined)
581
+ metaObj.gateDecision = proposal.gateDecision;
582
+ if (proposal.backupContent !== undefined)
583
+ metaObj.backupContent = proposal.backupContent;
545
584
  return {
546
585
  id: proposal.id,
547
586
  stash_dir: stashDir,
@@ -656,7 +695,10 @@ export function upsertProposal(db, proposal, stashDir) {
656
695
  }
657
696
  /**
658
697
  * List proposals, optionally filtered by stashDir, status, and/or ref.
659
- * Results are sorted by created_at ASC to match the existing listProposals() behaviour.
698
+ *
699
+ * Results are ordered by `created_at ASC` (matching the historical
700
+ * `listProposals()` sort), with `rowid` as a deterministic tiebreak so two
701
+ * proposals created in the same millisecond list in insertion order.
660
702
  */
661
703
  export function listStateProposals(db, options = {}) {
662
704
  const conditions = [];
@@ -677,21 +719,72 @@ export function listStateProposals(db, options = {}) {
677
719
  const rows = db
678
720
  .prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
679
721
  content, frontmatter_json, metadata_json
680
- FROM proposals ${where} ORDER BY created_at ASC`)
722
+ FROM proposals ${where} ORDER BY created_at ASC, rowid ASC`)
681
723
  .all(...params);
682
724
  return rows.map(proposalRowToProposal);
683
725
  }
684
726
  /**
685
- * Look up a single proposal by id. Returns undefined when not found.
727
+ * Look up a single proposal by id, optionally scoped to one stash root.
728
+ * Returns undefined when not found.
686
729
  */
687
- export function getStateProposal(db, id) {
688
- const row = db
689
- .prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
730
+ export function getStateProposal(db, id, stashDir) {
731
+ const sql = `SELECT id, stash_dir, ref, status, source, created_at, updated_at,
690
732
  content, frontmatter_json, metadata_json
691
- FROM proposals WHERE id = ?`)
692
- .get(id);
733
+ FROM proposals WHERE id = ?${stashDir ? " AND stash_dir = ?" : ""}`;
734
+ const row = (stashDir ? db.prepare(sql).get(id, stashDir) : db.prepare(sql).get(id));
693
735
  return row ? proposalRowToProposal(row) : undefined;
694
736
  }
737
+ /**
738
+ * Find PENDING proposal ids in one stash whose id starts with `idPrefix`.
739
+ * Backs the UUID-prefix form of `akm proposal show/accept/... <prefix>` —
740
+ * prefix resolution is deliberately scoped to the live (pending) queue,
741
+ * mirroring the historical behaviour of scanning only the live directory.
742
+ *
743
+ * `%` / `_` / `\` in the prefix are escaped so the LIKE pattern is literal.
744
+ */
745
+ export function listStateProposalIdsByPrefix(db, stashDir, idPrefix) {
746
+ const escaped = idPrefix.replace(/[\\%_]/g, (ch) => `\\${ch}`);
747
+ const rows = db
748
+ .prepare(`SELECT id FROM proposals
749
+ WHERE stash_dir = ? AND status = 'pending' AND id LIKE ? ESCAPE '\\'
750
+ ORDER BY id ASC`)
751
+ .all(stashDir, `${escaped}%`);
752
+ return rows.map((r) => r.id);
753
+ }
754
+ /**
755
+ * Whether the legacy filesystem proposal import has already run for `stashDir`.
756
+ * See migration 005 (`proposal_fs_imports`).
757
+ */
758
+ export function hasImportedFsProposals(db, stashDir) {
759
+ // Drivers disagree on the no-row sentinel (bun:sqlite → null,
760
+ // better-sqlite3 → undefined) — Boolean() covers both.
761
+ return Boolean(db.prepare("SELECT 1 FROM proposal_fs_imports WHERE stash_dir = ?").get(stashDir));
762
+ }
763
+ /**
764
+ * Record that the legacy filesystem proposal import completed for `stashDir`
765
+ * so subsequent invocations skip the directory walk. INSERT OR REPLACE keeps
766
+ * the call idempotent.
767
+ */
768
+ export function recordFsProposalsImport(db, stashDir, importedCount) {
769
+ db.prepare("INSERT OR REPLACE INTO proposal_fs_imports (stash_dir, imported_at, imported_count) VALUES (?, ?, ?)").run(stashDir, new Date().toISOString(), importedCount);
770
+ }
771
+ /**
772
+ * Insert a proposal row ONLY when the id is not already present (used by the
773
+ * legacy filesystem import so re-runs never clobber rows that have since been
774
+ * mutated through the canonical store). Returns true when a row was inserted.
775
+ */
776
+ export function insertProposalIfAbsent(db, proposal, stashDir) {
777
+ const v = proposalToRowValues(proposal, stashDir);
778
+ const result = db
779
+ .prepare(`
780
+ INSERT OR IGNORE INTO proposals
781
+ (id, stash_dir, ref, status, source, created_at, updated_at, content, frontmatter_json, metadata_json)
782
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
783
+ `)
784
+ .run(v.id, v.stash_dir, v.ref, v.status, v.source, v.created_at, v.updated_at, v.content, v.frontmatter_json, v.metadata_json);
785
+ const changes = result.changes ?? 0;
786
+ return Number(changes) > 0;
787
+ }
695
788
  // ── task_history table helpers ───────────────────────────────────────────────
696
789
  /**
697
790
  * Upsert a task history row.
@@ -28,7 +28,7 @@ export function openDatabase(dbPath, options) {
28
28
  }
29
29
  const db = openSqlite(resolvedPath);
30
30
  db.exec("PRAGMA journal_mode = WAL");
31
- db.exec("PRAGMA busy_timeout = 5000");
31
+ db.exec("PRAGMA busy_timeout = 30000");
32
32
  db.exec("PRAGMA foreign_keys = ON");
33
33
  // Try to load sqlite-vec extension
34
34
  loadVecExtension(db);
@@ -69,7 +69,7 @@ export function openExistingDatabase(dbPath) {
69
69
  const resolvedPath = dbPath ?? getDbPath();
70
70
  const db = openSqlite(resolvedPath);
71
71
  db.exec("PRAGMA journal_mode = WAL");
72
- db.exec("PRAGMA busy_timeout = 5000");
72
+ db.exec("PRAGMA busy_timeout = 30000");
73
73
  db.exec("PRAGMA foreign_keys = ON");
74
74
  // Existing-DB callers must not mutate schema or embedding metadata on open,
75
75
  // but some paths still need write access to usage_events and other tables.