npm - akm-cli - Versions diffs - 0.9.0-beta.2 → 0.9.0-beta.3 - Mend

akm-cli 0.9.0-beta.2 → 0.9.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/CHANGELOG.md +87 -0
package/dist/assets/templates/html/default.html +78 -0
package/dist/assets/templates/html/health.html +560 -0
package/dist/assets/templates/html/vendor/echarts.min.js +45 -0
package/dist/cli/shared.js +21 -5
package/dist/cli.js +36 -5
package/dist/commands/health/html-report.js +448 -0
package/dist/commands/health.js +97 -6
package/dist/commands/improve/extract.js +38 -2
package/dist/commands/improve/improve-auto-accept.js +27 -1
package/dist/commands/improve/improve.js +167 -53
package/dist/commands/improve/reflect-noise.js +0 -0
package/dist/commands/improve/reflect.js +25 -0
package/dist/commands/proposal/drain.js +73 -6
package/dist/commands/proposal/proposal-cli.js +22 -10
package/dist/commands/proposal/proposal.js +12 -1
package/dist/commands/proposal/validators/proposals.js +361 -338
package/dist/commands/remember.js +6 -2
package/dist/core/config/config-schema.js +5 -0
package/dist/core/logs-db.js +304 -0
package/dist/core/state-db.js +107 -14
package/dist/indexer/db/db.js +2 -2
package/dist/indexer/passes/memory-inference.js +61 -22
package/dist/integrations/harnesses/claude/session-log.js +16 -4
package/dist/llm/client.js +15 -0
package/dist/llm/usage-persist.js +77 -0
package/dist/llm/usage-telemetry.js +103 -0
package/dist/output/context.js +3 -2
package/dist/output/html-render.js +73 -0
package/dist/output/shapes/helpers.js +17 -1
package/dist/output/text/helpers.js +69 -1
package/dist/scripts/migrate-storage.js +65 -14
package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +14 -2
package/dist/tasks/runner.js +99 -16
package/dist/workflows/db.js +4 -0
package/package.json +1 -1

package/dist/commands/remember.js CHANGED Viewed

@@ -149,6 +149,10 @@ export async function runLlmEnrich(body) {
         return { tags: [] };
     }
     const { chatCompletion, parseEmbeddedJsonResponse: parseJsonResponse } = await import("../llm/client.js");
+    // #576: attribute this entry point's LLM call to the `remember` stage. The
+    // wrapper is ambient — if a usage sink is active it tags the record; if not,
+    // it is a no-op.
+    const { withLlmStage } = await import("../llm/usage-telemetry.js");
     const prompt = `You are a memory tagger for a developer knowledge base.
 Given the memory text below, return ONLY a JSON object with these fields:
 - "tags": array of 1-5 short lowercase keyword tags
@@ -164,10 +168,10 @@ Return ONLY the JSON object, no prose, no markdown fences.`;
         const result = await (async () => {
             try {
                 return await Promise.race([
-                    chatCompletion(llmConfig, [
+                    withLlmStage("remember", () => chatCompletion(llmConfig, [
                         { role: "system", content: "Return only valid JSON. No prose." },
                         { role: "user", content: prompt },
-                    ], { maxTokens: 256, temperature: 0.1 }),
+                    ], { maxTokens: 256, temperature: 0.1 })),
                     new Promise((_, reject) => {
                         timeoutHandle = setTimeout(() => reject(new Error("LLM enrichment timed out")), LLM_ENRICH_TIMEOUT_MS);
                     }),

package/dist/core/config/config-schema.js CHANGED Viewed

@@ -138,6 +138,11 @@ export const ImproveProcessConfigSchema = z
     // Extract process config (only meaningful for extract process)
     defaultSince: z.string().min(1).optional(),
     maxTotalChars: positiveInt.optional(),
+    // Extract process: minimum raw session size (pre-filter inputCount) below
+    // which the extract LLM call is skipped (#595/#596). 0 disables the gate.
+    // Absent = default 10 (skip only truly empty sessions). Only meaningful
+    // on the `extract` process.
+    minContentChars: z.number().int().min(0).optional(),
     maxChunkSize: z.number().int().min(1).max(50).optional(),
     // Extract process: minimum number of new (unseen, in-window) candidate
     // sessions below which the extract pass skips entirely (emits an

package/dist/core/logs-db.js ADDED Viewed

@@ -0,0 +1,304 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+/**
+ * logs.db — Dedicated SQLite database for task/run log lines (#579).
+ *
+ * Replaces grep-the-flat-file consumption of `<cacheDir>/tasks/logs/<id>/<ts>.log`
+ * with structured, indexed rows: `{ts, task_id, run_id, stream, level, line}`.
+ * The strategic direction (stop scattering data across files/folders) means
+ * every NEW log consumer queries this database; the per-run text file written
+ * by the task runner is retained only as a transitional tail for humans —
+ * see docs/technical/logs-audit.md for the full producer audit.
+ *
+ * ## Why a separate database from state.db
+ *
+ * Log lines are high-volume, append-only, and freely purgeable; state.db rows
+ * (events, proposals, task_history) are durable records. Separating them keeps
+ * state.db small and lets log retention be aggressive without touching durable
+ * state. Cross-db queries (e.g. "failed task_history row → its log lines") use
+ * SQLite ATTACH — see {@link attachStateDatabase}.
+ *
+ * ## run_id
+ *
+ * state.db's `task_history` identifies a run by the unique pair
+ * `(task_id, started_at)` (see migration 002 in state-db.ts). logs.db encodes
+ * that pair as a single string — {@link buildTaskRunId} — so log rows can be
+ * joined back to their history row:
+ *
+ *   l.run_id = th.task_id || '@' || th.started_at
+ *
+ * ## Schema evolution
+ *
+ * Same migration-safety contract as state.db: append-only `MIGRATIONS` applied
+ * through the shared runner in src/storage/engines/sqlite-migrations.ts.
+ *
+ * @module logs-db
+ */
+import fs from "node:fs";
+import path from "node:path";
+import { openDatabase } from "../storage/database.js";
+import { runMigrations as runSqliteMigrations } from "../storage/engines/sqlite-migrations.js";
+import { getDataDir } from "./paths.js";
+import { getStateDbPath } from "./state-db.js";
+// ── Path helper ──────────────────────────────────────────────────────────────
+/**
+ * Default path: `<dataDir>/logs.db` — alongside state.db so cooperating
+ * processes sharing a data root automatically share the same logs database
+ * (same `AKM_DATA_DIR` / XDG env-isolation as {@link getStateDbPath}).
+ */
+export function getLogsDbPath() {
+    return path.join(getDataDir(), "logs.db");
+}
+// ── Database open ────────────────────────────────────────────────────────────
+/**
+ * Open (and initialise / migrate) the logs database.
+ *
+ * @param dbPath - Override the database file path (tests pass a tmpdir path).
+ *
+ * PRAGMA rationale:
+ *
+ *   journal_mode = WAL
+ *     Readers never block writers and vice-versa; crashes are safe (the WAL is
+ *     replayed on next open). Required because the task runner writes log rows
+ *     while `akm health` may be reading them.
+ *
+ *   busy_timeout = 30000
+ *     Log writes happen at the end of scheduled task runs, which can pile up
+ *     (cron fan-out). 30 s of retry absorbs a slow concurrent writer instead of
+ *     surfacing SQLITE_BUSY and dropping log lines.
+ */
+export function openLogsDatabase(dbPath) {
+    const resolvedPath = dbPath ?? getLogsDbPath();
+    const dir = path.dirname(resolvedPath);
+    if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true });
+    }
+    const db = openDatabase(resolvedPath);
+    // PRAGMAs must run before any DDL or DML.
+    db.exec("PRAGMA journal_mode = WAL");
+    db.exec("PRAGMA busy_timeout = 30000");
+    runMigrations(db);
+    return db;
+}
+// ── Migrations ───────────────────────────────────────────────────────────────
+/**
+ * All migrations in application order. APPEND only — never insert in the
+ * middle or reorder. Same contract as state.db's MIGRATIONS array.
+ */
+const MIGRATIONS = [
+    // ── Migration 001 — task_logs ───────────────────────────────────────────────
+    //
+    // One row per log line emitted by a task run.
+    //
+    // Indexed (query) columns:
+    //   ts       TEXT — ISO-8601 UTC; range queries ("logs in the last hour").
+    //   task_id  TEXT — task identifier; per-task log views.
+    //   run_id   TEXT — buildTaskRunId(task_id, started_at); per-run log views
+    //                   and the join key back to state.db task_history.
+    //
+    // Non-indexed columns:
+    //   stream   TEXT — 'stdout' | 'stderr'; which pipe the line came from.
+    //   level    TEXT — 'info' | 'warn' | 'error'; runner-assigned severity
+    //                   ('info' for captured stdout, 'error' for stderr and
+    //                   failure diagnostics).
+    //   line     TEXT — the log line itself (no trailing newline).
+    //
+    // ADD COLUMN extension points (future migrations):
+    //   ALTER TABLE task_logs ADD COLUMN seq INTEGER DEFAULT NULL;
+    //   ALTER TABLE task_logs ADD COLUMN source TEXT DEFAULT NULL;
+    //
+    // TTL: rows where ts < NOW() - retention can be deleted by purgeOldTaskLogs().
+    // No automatic deletion occurs here.
+    {
+        id: "001-task-logs",
+        up: `
+      CREATE TABLE IF NOT EXISTS task_logs (
+        id      INTEGER PRIMARY KEY AUTOINCREMENT,
+        ts      TEXT    NOT NULL,
+        task_id TEXT    NOT NULL,
+        run_id  TEXT    NOT NULL,
+        stream  TEXT    NOT NULL DEFAULT 'stdout',
+        level   TEXT    NOT NULL DEFAULT 'info',
+        line    TEXT    NOT NULL
+      );
+      -- Query patterns:
+      --   SELECT … WHERE ts >= ? AND ts <= ?   → idx_task_logs_ts (purge, windows)
+      --   SELECT … WHERE task_id = ?           → idx_task_logs_task_id
+      --   SELECT … WHERE run_id = ?            → idx_task_logs_run_id (per-run tail)
+      CREATE INDEX IF NOT EXISTS idx_task_logs_ts      ON task_logs(ts);
+      CREATE INDEX IF NOT EXISTS idx_task_logs_task_id ON task_logs(task_id);
+      CREATE INDEX IF NOT EXISTS idx_task_logs_run_id  ON task_logs(run_id);
+    `,
+    },
+];
+/**
+ * Apply every pending migration. Called automatically by
+ * {@link openLogsDatabase}; exported for the same test seams state-db exposes.
+ */
+export function runMigrations(db) {
+    runSqliteMigrations(db, MIGRATIONS);
+}
+// ── run_id ───────────────────────────────────────────────────────────────────
+/**
+ * Encode a task run's identity — the unique `(task_id, started_at)` pair from
+ * state.db `task_history` — as a single run_id string.
+ *
+ * The format MUST stay in sync with the SQL expression
+ * `task_id || '@' || started_at` used by {@link queryFailedRunLogLines}.
+ */
+export function buildTaskRunId(taskId, startedAtIso) {
+    return `${taskId}@${startedAtIso}`;
+}
+/**
+ * Insert a batch of log lines for one task run in a single transaction.
+ * Returns the number of rows inserted. Lines are stored in array order
+ * (ascending rowid), so reading back `ORDER BY id` reproduces emission order.
+ *
+ * Errors propagate — the task runner wraps this in its own best-effort
+ * handling (mirroring `appendHistory`) so an unwritable logs.db never fails
+ * a task run.
+ */
+export function insertTaskLogLines(db, input) {
+    if (input.lines.length === 0)
+        return 0;
+    const stmt = db.prepare(`INSERT INTO task_logs (ts, task_id, run_id, stream, level, line)
+     VALUES (?, ?, ?, ?, ?, ?)`);
+    db.transaction(() => {
+        for (const entry of input.lines) {
+            stmt.run(input.ts, input.taskId, input.runId, entry.stream ?? "stdout", entry.level ?? "info", entry.line);
+        }
+    })();
+    return input.lines.length;
+}
+/**
+ * Read log lines matching the filter, in emission order (ascending id).
+ *
+ * Connection-lifetime rule (WS5): `.all()` materializes a plain array before
+ * returning.
+ */
+export function queryTaskLogs(db, options = {}) {
+    const conditions = [];
+    const params = [];
+    if (options.taskId) {
+        conditions.push("task_id = ?");
+        params.push(options.taskId);
+    }
+    if (options.runId) {
+        conditions.push("run_id = ?");
+        params.push(options.runId);
+    }
+    if (options.stream) {
+        conditions.push("stream = ?");
+        params.push(options.stream);
+    }
+    if (options.since) {
+        conditions.push("ts >= ?");
+        params.push(options.since);
+    }
+    if (options.until) {
+        conditions.push("ts < ?");
+        params.push(options.until);
+    }
+    const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
+    const limit = options.limit !== undefined && options.limit >= 0 ? ` LIMIT ${Math.floor(options.limit)}` : "";
+    return db
+        .prepare(`SELECT id, ts, task_id, run_id, stream, level, line FROM task_logs ${where} ORDER BY id ASC${limit}`)
+        .all(...params);
+}
+/**
+ * Bulk membership check: which of `runIds` have at least one log row?
+ * Used by `akm health` to compute the log-backing rate from the database
+ * instead of `fs.existsSync` over scattered files. Chunked to stay under
+ * SQLite's bound-parameter ceiling.
+ */
+export function getLoggedRunIds(db, runIds) {
+    const out = new Set();
+    if (runIds.length === 0)
+        return out;
+    const CHUNK = 500;
+    for (let i = 0; i < runIds.length; i += CHUNK) {
+        const chunk = runIds.slice(i, i + CHUNK);
+        const placeholders = chunk.map(() => "?").join(",");
+        const rows = db
+            .prepare(`SELECT DISTINCT run_id FROM task_logs WHERE run_id IN (${placeholders})`)
+            .all(...chunk);
+        for (const row of rows)
+            out.add(row.run_id);
+    }
+    return out;
+}
+// ── Cross-db: ATTACH state.db ────────────────────────────────────────────────
+/**
+ * ATTACH state.db to an open logs.db handle under the schema name `state`,
+ * enabling cross-db joins like task_history × task_logs.
+ *
+ * The state.db file must already exist (callers always open state.db first in
+ * practice); attaching a non-existent path would silently create an empty,
+ * unmigrated database file, so this throws instead.
+ */
+export function attachStateDatabase(db, stateDbPath) {
+    const resolved = stateDbPath ?? getStateDbPath();
+    if (!fs.existsSync(resolved)) {
+        throw new Error(`Cannot ATTACH state.db: file does not exist at ${resolved}`);
+    }
+    // prepare().run() rather than db.run(): both drivers support parameterised
+    // ATTACH through a prepared statement, and no other call site uses db.run().
+    db.prepare("ATTACH DATABASE ? AS state").run(resolved);
+}
+/**
+ * Convenience: open logs.db with state.db attached as `state`. The returned
+ * handle supports cross-db queries such as {@link queryFailedRunLogLines}.
+ * Close it like any other handle (DETACH is implicit on close).
+ */
+export function openLogsDatabaseWithState(logsDbPath, stateDbPath) {
+    const db = openLogsDatabase(logsDbPath);
+    try {
+        attachStateDatabase(db, stateDbPath);
+    }
+    catch (err) {
+        db.close();
+        throw err;
+    }
+    return db;
+}
+/**
+ * Cross-db join: every log line belonging to a FAILED task_history run whose
+ * `started_at` is `>= since` (all failed runs when omitted). Requires a handle
+ * opened via {@link openLogsDatabaseWithState}.
+ *
+ * The join key is the run_id encoding documented on {@link buildTaskRunId}:
+ * `task_logs.run_id = task_history.task_id || '@' || task_history.started_at`.
+ */
+export function queryFailedRunLogLines(db, options = {}) {
+    const conditions = ["th.status = 'failed'"];
+    const params = [];
+    if (options.since) {
+        conditions.push("th.started_at >= ?");
+        params.push(options.since);
+    }
+    const limit = options.limit !== undefined && options.limit >= 0 ? ` LIMIT ${Math.floor(options.limit)}` : "";
+    return db
+        .prepare(`SELECT th.task_id, l.run_id, th.started_at, th.status, l.ts, l.stream, l.level, l.line
+       FROM state.task_history th
+       JOIN task_logs l ON l.run_id = th.task_id || '@' || th.started_at
+       WHERE ${conditions.join(" AND ")}
+       ORDER BY th.started_at DESC, l.id ASC${limit}`)
+        .all(...params);
+}
+// ── Retention ────────────────────────────────────────────────────────────────
+/**
+ * Delete task_logs rows older than `retentionDays` (default: 90). Mirrors
+ * `purgeOldEvents` / `purgeOldImproveRuns` in state-db.ts — same default, same
+ * return shape (rows deleted), same disabled-when-non-positive semantics.
+ * Wired into the improve maintenance pass alongside the state.db purges.
+ */
+export function purgeOldTaskLogs(db, retentionDays = 90) {
+    if (!Number.isFinite(retentionDays) || retentionDays <= 0)
+        return 0;
+    const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
+    const result = db.prepare("DELETE FROM task_logs WHERE ts < ?").run(cutoff);
+    const changes = result.changes ?? 0;
+    return typeof changes === "bigint" ? Number(changes) : changes;
+}

package/dist/core/state-db.js CHANGED Viewed

@@ -86,11 +86,12 @@ export function getStateDbPath() {
  *     backwards compatibility; enabling them prevents orphaned rows in tables
  *     that reference each other (not used in v1 schema but guards future ones).
  *
- *   busy_timeout = 5000
+ *   busy_timeout = 30000
  *     When another connection holds a write lock, SQLite retries for up to
- *     5 000 ms before returning SQLITE_BUSY. Without this, the default timeout
- *     is 0 ms — any concurrent writer causes an immediate error. 5 s matches
- *     the same value used in openDatabase() for index.db.
+ *     30 000 ms before returning SQLITE_BUSY. Without this, the default timeout
+ *     is 0 ms — any concurrent writer causes an immediate error. 30 s (#589)
+ *     matches the value used in openDatabase() for index.db; 5 s proved too
+ *     narrow when a post-inference reindex overlapped a parallel event write.
  */
 export function openStateDatabase(dbPath) {
     const resolvedPath = dbPath ?? getStateDbPath();
@@ -102,7 +103,7 @@ export function openStateDatabase(dbPath) {
     // PRAGMAs must run before any DDL or DML.
     db.exec("PRAGMA journal_mode = WAL");
     db.exec("PRAGMA foreign_keys = ON");
-    db.exec("PRAGMA busy_timeout = 5000");
+    db.exec("PRAGMA busy_timeout = 30000");
     runMigrations(db);
     return db;
 }
@@ -190,7 +191,9 @@ const MIGRATIONS = [
       --
       -- Extensible (metadata_json) columns:
       --   metadata_json TEXT      — JSON object for future proposal fields.
-      --                             Current fields stored here: sourceRun, review.
+      --                             Current fields stored here: sourceRun,
+      --                             review, confidence, gateDecision (#577),
+      --                             backupContent.
       --
       -- ADD COLUMN extension points (future migrations):
       --   ALTER TABLE proposals ADD COLUMN source_run TEXT DEFAULT NULL;
@@ -458,6 +461,33 @@ const MIGRATIONS = [
         ON extract_sessions_seen(processed_at);
     `,
     },
+    // ── Migration 005 — proposal_fs_imports ─────────────────────────────────────
+    //
+    // One-shot ledger for the legacy filesystem→SQLite proposal import (#578).
+    //
+    // Before 0.9.0 the proposal queue lived as per-uuid JSON directories under
+    // `<stashDir>/.akm/proposals/` and the `proposals` table (created in 001) was
+    // dead weight. 0.9.0 makes the table canonical; the first proposal operation
+    // against a stash imports any legacy `proposal.json` files it finds (INSERT
+    // OR IGNORE, so re-runs never duplicate) and records the stash here so later
+    // invocations skip the directory walk entirely.
+    //
+    // Indexed (query) columns:
+    //   stash_dir    TEXT PK  — absolute stash root the import ran against.
+    //
+    // Non-indexed columns:
+    //   imported_at    TEXT     — ISO-8601 UTC; when the import completed.
+    //   imported_count INTEGER  — rows actually inserted by the import.
+    {
+        id: "005-proposal-fs-imports",
+        up: `
+      CREATE TABLE IF NOT EXISTS proposal_fs_imports (
+        stash_dir      TEXT    PRIMARY KEY,
+        imported_at    TEXT    NOT NULL,
+        imported_count INTEGER NOT NULL DEFAULT 0
+      );
+    `,
+    },
 ];
 /**
  * Apply every pending migration in a single transaction per migration.
@@ -529,6 +559,9 @@ export function proposalRowToProposal(row) {
             ...(frontmatter !== undefined ? { frontmatter } : {}),
         },
         ...(meta.review !== undefined ? { review: meta.review } : {}),
+        ...(typeof meta.confidence === "number" ? { confidence: meta.confidence } : {}),
+        ...(meta.gateDecision !== undefined ? { gateDecision: meta.gateDecision } : {}),
+        ...(typeof meta.backupContent === "string" ? { backupContent: meta.backupContent } : {}),
     };
 }
 /**
@@ -542,6 +575,12 @@ export function proposalToRowValues(proposal, stashDir) {
         metaObj.sourceRun = proposal.sourceRun;
     if (proposal.review !== undefined)
         metaObj.review = proposal.review;
+    if (proposal.confidence !== undefined)
+        metaObj.confidence = proposal.confidence;
+    if (proposal.gateDecision !== undefined)
+        metaObj.gateDecision = proposal.gateDecision;
+    if (proposal.backupContent !== undefined)
+        metaObj.backupContent = proposal.backupContent;
     return {
         id: proposal.id,
         stash_dir: stashDir,
@@ -656,7 +695,10 @@ export function upsertProposal(db, proposal, stashDir) {
 }
 /**
  * List proposals, optionally filtered by stashDir, status, and/or ref.
- * Results are sorted by created_at ASC to match the existing listProposals() behaviour.
+ *
+ * Results are ordered by `created_at ASC` (matching the historical
+ * `listProposals()` sort), with `rowid` as a deterministic tiebreak so two
+ * proposals created in the same millisecond list in insertion order.
  */
 export function listStateProposals(db, options = {}) {
     const conditions = [];
@@ -677,21 +719,72 @@ export function listStateProposals(db, options = {}) {
     const rows = db
         .prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
               content, frontmatter_json, metadata_json
-       FROM proposals ${where} ORDER BY created_at ASC`)
+       FROM proposals ${where} ORDER BY created_at ASC, rowid ASC`)
         .all(...params);
     return rows.map(proposalRowToProposal);
 }
 /**
- * Look up a single proposal by id. Returns undefined when not found.
+ * Look up a single proposal by id, optionally scoped to one stash root.
+ * Returns undefined when not found.
  */
-export function getStateProposal(db, id) {
-    const row = db
-        .prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
+export function getStateProposal(db, id, stashDir) {
+    const sql = `SELECT id, stash_dir, ref, status, source, created_at, updated_at,
               content, frontmatter_json, metadata_json
-       FROM proposals WHERE id = ?`)
-        .get(id);
+       FROM proposals WHERE id = ?${stashDir ? " AND stash_dir = ?" : ""}`;
+    const row = (stashDir ? db.prepare(sql).get(id, stashDir) : db.prepare(sql).get(id));
     return row ? proposalRowToProposal(row) : undefined;
 }
+/**
+ * Find PENDING proposal ids in one stash whose id starts with `idPrefix`.
+ * Backs the UUID-prefix form of `akm proposal show/accept/... <prefix>` —
+ * prefix resolution is deliberately scoped to the live (pending) queue,
+ * mirroring the historical behaviour of scanning only the live directory.
+ *
+ * `%` / `_` / `\` in the prefix are escaped so the LIKE pattern is literal.
+ */
+export function listStateProposalIdsByPrefix(db, stashDir, idPrefix) {
+    const escaped = idPrefix.replace(/[\\%_]/g, (ch) => `\\${ch}`);
+    const rows = db
+        .prepare(`SELECT id FROM proposals
+       WHERE stash_dir = ? AND status = 'pending' AND id LIKE ? ESCAPE '\\'
+       ORDER BY id ASC`)
+        .all(stashDir, `${escaped}%`);
+    return rows.map((r) => r.id);
+}
+/**
+ * Whether the legacy filesystem proposal import has already run for `stashDir`.
+ * See migration 005 (`proposal_fs_imports`).
+ */
+export function hasImportedFsProposals(db, stashDir) {
+    // Drivers disagree on the no-row sentinel (bun:sqlite → null,
+    // better-sqlite3 → undefined) — Boolean() covers both.
+    return Boolean(db.prepare("SELECT 1 FROM proposal_fs_imports WHERE stash_dir = ?").get(stashDir));
+}
+/**
+ * Record that the legacy filesystem proposal import completed for `stashDir`
+ * so subsequent invocations skip the directory walk. INSERT OR REPLACE keeps
+ * the call idempotent.
+ */
+export function recordFsProposalsImport(db, stashDir, importedCount) {
+    db.prepare("INSERT OR REPLACE INTO proposal_fs_imports (stash_dir, imported_at, imported_count) VALUES (?, ?, ?)").run(stashDir, new Date().toISOString(), importedCount);
+}
+/**
+ * Insert a proposal row ONLY when the id is not already present (used by the
+ * legacy filesystem import so re-runs never clobber rows that have since been
+ * mutated through the canonical store). Returns true when a row was inserted.
+ */
+export function insertProposalIfAbsent(db, proposal, stashDir) {
+    const v = proposalToRowValues(proposal, stashDir);
+    const result = db
+        .prepare(`
+      INSERT OR IGNORE INTO proposals
+        (id, stash_dir, ref, status, source, created_at, updated_at, content, frontmatter_json, metadata_json)
+      VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    `)
+        .run(v.id, v.stash_dir, v.ref, v.status, v.source, v.created_at, v.updated_at, v.content, v.frontmatter_json, v.metadata_json);
+    const changes = result.changes ?? 0;
+    return Number(changes) > 0;
+}
 // ── task_history table helpers ───────────────────────────────────────────────
 /**
  * Upsert a task history row.

package/dist/indexer/db/db.js CHANGED Viewed

@@ -28,7 +28,7 @@ export function openDatabase(dbPath, options) {
     }
     const db = openSqlite(resolvedPath);
     db.exec("PRAGMA journal_mode = WAL");
-    db.exec("PRAGMA busy_timeout = 5000");
+    db.exec("PRAGMA busy_timeout = 30000");
     db.exec("PRAGMA foreign_keys = ON");
     // Try to load sqlite-vec extension
     loadVecExtension(db);
@@ -69,7 +69,7 @@ export function openExistingDatabase(dbPath) {
     const resolvedPath = dbPath ?? getDbPath();
     const db = openSqlite(resolvedPath);
     db.exec("PRAGMA journal_mode = WAL");
-    db.exec("PRAGMA busy_timeout = 5000");
+    db.exec("PRAGMA busy_timeout = 30000");
     db.exec("PRAGMA foreign_keys = ON");
     // Existing-DB callers must not mutate schema or embedding metadata on open,
     // but some paths still need write access to usage_events and other tables.

package/dist/indexer/passes/memory-inference.js CHANGED Viewed

@@ -119,6 +119,26 @@ export async function runMemoryInferencePass(ctx) {
         // 2026-05-26).
         if (signal?.aborted)
             return { aborted: true };
+        // Pre-check (#588): when `<parent>.derived.md` is already on disk the
+        // inference is by definition complete — the parent only looks pending
+        // because `markParentProcessed` never ran (process killed between the
+        // child write and the mark) or the child was created externally (e.g.
+        // consolidation). Skip the LLM/cache call entirely and mark the parent
+        // so it never re-pends. Before this check, production measurements
+        // showed ~55% of the pass's LLM budget re-deriving such parents only to
+        // discover the existing child after the fact.
+        if (fs.existsSync(derivedChildPath(record))) {
+            markParentProcessed(record);
+            return {
+                skipped: false,
+                splitParent: false,
+                written: 0,
+                fromCache: false,
+                retryAttempts: 0,
+                childExists: true,
+                precheck: true,
+            };
+        }
         // Incremental cache: skip LLM call when body hash is unchanged and
         // --re-enrich was not requested. The cache ref is the absolute file path.
         const validate = (raw) => {
@@ -171,23 +191,30 @@ export async function runMemoryInferencePass(ctx) {
             return { skipped: false, splitParent: true, written: writeOutcome.written, fromCache, retryAttempts };
         }
         // LLM produced a valid derived draft but no file was written — either
-        // because `<parent>.derived.md` already exists on disk or
-        // `writeAssetToSource` threw. Categorise as `childExists` so the
-        // attempt is accounted for in health metrics rather than vanishing
-        // into the freshAttempts denominator.
+        // because `<parent>.derived.md` appeared on disk after the pre-check
+        // above (a rare mid-flight race) or `writeAssetToSource` threw.
+        // Categorise as `childExists` so the consumed attempt is accounted for
+        // in health metrics rather than vanishing into the freshAttempts
+        // denominator.
         //
-        // When the child already exists on disk the inference is, by definition,
-        // already complete — so mark the parent processed here too (#550).
-        // Without this, `isPendingMemory()` re-queues the same parent every run
-        // (the `written > 0` path was previously the only site that marks it),
-        // causing permanent re-queueing and wasted LLM calls. A genuine write
-        // *failure* (`writeAssetToSource` threw) must NOT mark the parent — it
-        // should be retried next run — so we key off the explicit `childExists`
-        // outcome rather than the conflated `written === 0`.
+        // When the child exists the inference is, by definition, complete — so
+        // mark the parent processed here too (#550), otherwise
+        // `isPendingMemory()` re-queues the same parent every run. A genuine
+        // write *failure* (`writeAssetToSource` threw) must NOT mark the parent
+        // — it should be retried next run — so we key off the explicit
+        // `childExists` outcome rather than the conflated `written === 0`.
         if (writeOutcome.childExists) {
             markParentProcessed(record);
         }
-        return { skipped: false, splitParent: false, written: 0, fromCache, retryAttempts, childExists: true };
+        return {
+            skipped: false,
+            splitParent: false,
+            written: 0,
+            fromCache,
+            retryAttempts,
+            childExists: true,
+            precheck: false,
+        };
     },
     // Default concurrency of 4 for cloud APIs. Set `llm.concurrency: 1`
     // in config.json for local model servers (LM Studio, Ollama).
@@ -224,11 +251,16 @@ export async function runMemoryInferencePass(ctx) {
             result.writtenFacts += res.written;
         }
         else if ("childExists" in res && res.childExists) {
-            // LLM call was consumed but the derived file already existed (or the
-            // write threw). Track separately so this category is observable in
-            // health output and stops bleeding into the freshAttempts denominator.
+            // Derived child already on disk. Track separately so this category is
+            // observable in health output and stops bleeding into the
+            // freshAttempts denominator. Pre-check skips (#588) are the routine
+            // self-healing path — no LLM attempt was consumed and the parent has
+            // been marked processed — so only the rare post-LLM case (mid-flight
+            // race or write failure) warrants a per-ref warning.
             result.skippedChildExists += 1;
-            warn(`memory inference: derived child for ${pending[i]?.ref ?? "<unknown>"} already existed or write failed; counted as skippedChildExists`);
+            if (!res.precheck) {
+                warn(`memory inference: derived child for ${pending[i]?.ref ?? "<unknown>"} already existed or write failed; counted as skippedChildExists`);
+            }
         }
         else {
             // The per-record state machine should cover every outcome. A hit here
@@ -324,6 +356,14 @@ function toMemoryName(memoriesDir, filePath) {
     // user has organised under memories/.
     return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
 }
+/**
+ * Absolute path of the derived child for a parent memory. Single source of
+ * truth for the `<parent>.derived.md` naming convention — used both by the
+ * pre-LLM existence check (#588) and the write path.
+ */
+function derivedChildPath(parent) {
+    return path.join(parent.stashRoot, "memories", `${parent.name}.derived.md`);
+}
 async function writeDerivedMemory(parent, derived) {
     const writeTarget = {
         kind: "filesystem",
@@ -338,11 +378,10 @@ async function writeDerivedMemory(parent, derived) {
     };
     const childName = `${parent.name}.derived`;
     const childRefStr = `memory:${childName}`;
-    const childPath = path.join(parent.stashRoot, "memories", `${childName}.md`);
-    if (fs.existsSync(childPath)) {
-        // The derived child is already on disk — inference for this parent is
-        // complete. Report `childExists` so the caller marks the parent processed
-        // (#550) instead of re-queueing it forever.
+    if (fs.existsSync(derivedChildPath(parent))) {
+        // The derived child appeared on disk after the caller's pre-check (#588)
+        // — a rare mid-flight race. Report `childExists` so the caller marks the
+        // parent processed (#550) instead of re-queueing it forever.
         return { written: 0, childExists: true };
     }
     try {