akm-cli 0.9.0-beta.2 → 0.9.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +248 -0
- package/dist/assets/templates/html/default.html +78 -0
- package/dist/assets/templates/html/health.html +560 -0
- package/dist/assets/templates/html/vendor/echarts.min.js +45 -0
- package/dist/cli/shared.js +21 -5
- package/dist/cli.js +36 -5
- package/dist/commands/health/html-report.js +448 -0
- package/dist/commands/health.js +97 -6
- package/dist/commands/improve/consolidate.js +15 -2
- package/dist/commands/improve/extract.js +38 -2
- package/dist/commands/improve/improve-auto-accept.js +27 -1
- package/dist/commands/improve/improve.js +167 -53
- package/dist/commands/improve/reflect-noise.js +0 -0
- package/dist/commands/improve/reflect.js +25 -0
- package/dist/commands/proposal/drain.js +73 -6
- package/dist/commands/proposal/proposal-cli.js +22 -10
- package/dist/commands/proposal/proposal.js +12 -1
- package/dist/commands/proposal/validators/proposals.js +361 -338
- package/dist/commands/remember.js +6 -2
- package/dist/core/config/config-schema.js +5 -0
- package/dist/core/logs-db.js +304 -0
- package/dist/core/state-db.js +107 -14
- package/dist/indexer/db/db.js +2 -2
- package/dist/indexer/passes/memory-inference.js +61 -22
- package/dist/integrations/harnesses/claude/session-log.js +16 -4
- package/dist/llm/client.js +15 -0
- package/dist/llm/usage-persist.js +77 -0
- package/dist/llm/usage-telemetry.js +103 -0
- package/dist/output/context.js +3 -2
- package/dist/output/html-render.js +73 -0
- package/dist/output/shapes/helpers.js +17 -1
- package/dist/output/text/helpers.js +69 -1
- package/dist/scripts/migrate-storage.js +65 -14
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +14 -2
- package/dist/tasks/runner.js +99 -16
- package/dist/workflows/db.js +4 -0
- package/package.json +2 -1
|
@@ -149,6 +149,10 @@ export async function runLlmEnrich(body) {
|
|
|
149
149
|
return { tags: [] };
|
|
150
150
|
}
|
|
151
151
|
const { chatCompletion, parseEmbeddedJsonResponse: parseJsonResponse } = await import("../llm/client.js");
|
|
152
|
+
// #576: attribute this entry point's LLM call to the `remember` stage. The
|
|
153
|
+
// wrapper is ambient — if a usage sink is active it tags the record; if not,
|
|
154
|
+
// it is a no-op.
|
|
155
|
+
const { withLlmStage } = await import("../llm/usage-telemetry.js");
|
|
152
156
|
const prompt = `You are a memory tagger for a developer knowledge base.
|
|
153
157
|
Given the memory text below, return ONLY a JSON object with these fields:
|
|
154
158
|
- "tags": array of 1-5 short lowercase keyword tags
|
|
@@ -164,10 +168,10 @@ Return ONLY the JSON object, no prose, no markdown fences.`;
|
|
|
164
168
|
const result = await (async () => {
|
|
165
169
|
try {
|
|
166
170
|
return await Promise.race([
|
|
167
|
-
chatCompletion(llmConfig, [
|
|
171
|
+
withLlmStage("remember", () => chatCompletion(llmConfig, [
|
|
168
172
|
{ role: "system", content: "Return only valid JSON. No prose." },
|
|
169
173
|
{ role: "user", content: prompt },
|
|
170
|
-
], { maxTokens: 256, temperature: 0.1 }),
|
|
174
|
+
], { maxTokens: 256, temperature: 0.1 })),
|
|
171
175
|
new Promise((_, reject) => {
|
|
172
176
|
timeoutHandle = setTimeout(() => reject(new Error("LLM enrichment timed out")), LLM_ENRICH_TIMEOUT_MS);
|
|
173
177
|
}),
|
|
@@ -138,6 +138,11 @@ export const ImproveProcessConfigSchema = z
|
|
|
138
138
|
// Extract process config (only meaningful for extract process)
|
|
139
139
|
defaultSince: z.string().min(1).optional(),
|
|
140
140
|
maxTotalChars: positiveInt.optional(),
|
|
141
|
+
// Extract process: minimum raw session size (pre-filter inputCount) below
|
|
142
|
+
// which the extract LLM call is skipped (#595/#596). 0 disables the gate.
|
|
143
|
+
// Absent = default 10 (skip only truly empty sessions). Only meaningful
|
|
144
|
+
// on the `extract` process.
|
|
145
|
+
minContentChars: z.number().int().min(0).optional(),
|
|
141
146
|
maxChunkSize: z.number().int().min(1).max(50).optional(),
|
|
142
147
|
// Extract process: minimum number of new (unseen, in-window) candidate
|
|
143
148
|
// sessions below which the extract pass skips entirely (emits an
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* logs.db — Dedicated SQLite database for task/run log lines (#579).
|
|
6
|
+
*
|
|
7
|
+
* Replaces grep-the-flat-file consumption of `<cacheDir>/tasks/logs/<id>/<ts>.log`
|
|
8
|
+
* with structured, indexed rows: `{ts, task_id, run_id, stream, level, line}`.
|
|
9
|
+
* The strategic direction (stop scattering data across files/folders) means
|
|
10
|
+
* every NEW log consumer queries this database; the per-run text file written
|
|
11
|
+
* by the task runner is retained only as a transitional tail for humans —
|
|
12
|
+
* see docs/technical/logs-audit.md for the full producer audit.
|
|
13
|
+
*
|
|
14
|
+
* ## Why a separate database from state.db
|
|
15
|
+
*
|
|
16
|
+
* Log lines are high-volume, append-only, and freely purgeable; state.db rows
|
|
17
|
+
* (events, proposals, task_history) are durable records. Separating them keeps
|
|
18
|
+
* state.db small and lets log retention be aggressive without touching durable
|
|
19
|
+
* state. Cross-db queries (e.g. "failed task_history row → its log lines") use
|
|
20
|
+
* SQLite ATTACH — see {@link attachStateDatabase}.
|
|
21
|
+
*
|
|
22
|
+
* ## run_id
|
|
23
|
+
*
|
|
24
|
+
* state.db's `task_history` identifies a run by the unique pair
|
|
25
|
+
* `(task_id, started_at)` (see migration 002 in state-db.ts). logs.db encodes
|
|
26
|
+
* that pair as a single string — {@link buildTaskRunId} — so log rows can be
|
|
27
|
+
* joined back to their history row:
|
|
28
|
+
*
|
|
29
|
+
* l.run_id = th.task_id || '@' || th.started_at
|
|
30
|
+
*
|
|
31
|
+
* ## Schema evolution
|
|
32
|
+
*
|
|
33
|
+
* Same migration-safety contract as state.db: append-only `MIGRATIONS` applied
|
|
34
|
+
* through the shared runner in src/storage/engines/sqlite-migrations.ts.
|
|
35
|
+
*
|
|
36
|
+
* @module logs-db
|
|
37
|
+
*/
|
|
38
|
+
import fs from "node:fs";
|
|
39
|
+
import path from "node:path";
|
|
40
|
+
import { openDatabase } from "../storage/database.js";
|
|
41
|
+
import { runMigrations as runSqliteMigrations } from "../storage/engines/sqlite-migrations.js";
|
|
42
|
+
import { getDataDir } from "./paths.js";
|
|
43
|
+
import { getStateDbPath } from "./state-db.js";
|
|
44
|
+
// ── Path helper ──────────────────────────────────────────────────────────────
|
|
45
|
+
/**
|
|
46
|
+
* Default path: `<dataDir>/logs.db` — alongside state.db so cooperating
|
|
47
|
+
* processes sharing a data root automatically share the same logs database
|
|
48
|
+
* (same `AKM_DATA_DIR` / XDG env-isolation as {@link getStateDbPath}).
|
|
49
|
+
*/
|
|
50
|
+
export function getLogsDbPath() {
|
|
51
|
+
return path.join(getDataDir(), "logs.db");
|
|
52
|
+
}
|
|
53
|
+
// ── Database open ────────────────────────────────────────────────────────────
|
|
54
|
+
/**
|
|
55
|
+
* Open (and initialise / migrate) the logs database.
|
|
56
|
+
*
|
|
57
|
+
* @param dbPath - Override the database file path (tests pass a tmpdir path).
|
|
58
|
+
*
|
|
59
|
+
* PRAGMA rationale:
|
|
60
|
+
*
|
|
61
|
+
* journal_mode = WAL
|
|
62
|
+
* Readers never block writers and vice-versa; crashes are safe (the WAL is
|
|
63
|
+
* replayed on next open). Required because the task runner writes log rows
|
|
64
|
+
* while `akm health` may be reading them.
|
|
65
|
+
*
|
|
66
|
+
* busy_timeout = 30000
|
|
67
|
+
* Log writes happen at the end of scheduled task runs, which can pile up
|
|
68
|
+
* (cron fan-out). 30 s of retry absorbs a slow concurrent writer instead of
|
|
69
|
+
* surfacing SQLITE_BUSY and dropping log lines.
|
|
70
|
+
*/
|
|
71
|
+
export function openLogsDatabase(dbPath) {
|
|
72
|
+
const resolvedPath = dbPath ?? getLogsDbPath();
|
|
73
|
+
const dir = path.dirname(resolvedPath);
|
|
74
|
+
if (!fs.existsSync(dir)) {
|
|
75
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
76
|
+
}
|
|
77
|
+
const db = openDatabase(resolvedPath);
|
|
78
|
+
// PRAGMAs must run before any DDL or DML.
|
|
79
|
+
db.exec("PRAGMA journal_mode = WAL");
|
|
80
|
+
db.exec("PRAGMA busy_timeout = 30000");
|
|
81
|
+
runMigrations(db);
|
|
82
|
+
return db;
|
|
83
|
+
}
|
|
84
|
+
// ── Migrations ───────────────────────────────────────────────────────────────
|
|
85
|
+
/**
|
|
86
|
+
* All migrations in application order. APPEND only — never insert in the
|
|
87
|
+
* middle or reorder. Same contract as state.db's MIGRATIONS array.
|
|
88
|
+
*/
|
|
89
|
+
const MIGRATIONS = [
|
|
90
|
+
// ── Migration 001 — task_logs ───────────────────────────────────────────────
|
|
91
|
+
//
|
|
92
|
+
// One row per log line emitted by a task run.
|
|
93
|
+
//
|
|
94
|
+
// Indexed (query) columns:
|
|
95
|
+
// ts TEXT — ISO-8601 UTC; range queries ("logs in the last hour").
|
|
96
|
+
// task_id TEXT — task identifier; per-task log views.
|
|
97
|
+
// run_id TEXT — buildTaskRunId(task_id, started_at); per-run log views
|
|
98
|
+
// and the join key back to state.db task_history.
|
|
99
|
+
//
|
|
100
|
+
// Non-indexed columns:
|
|
101
|
+
// stream TEXT — 'stdout' | 'stderr'; which pipe the line came from.
|
|
102
|
+
// level TEXT — 'info' | 'warn' | 'error'; runner-assigned severity
|
|
103
|
+
// ('info' for captured stdout, 'error' for stderr and
|
|
104
|
+
// failure diagnostics).
|
|
105
|
+
// line TEXT — the log line itself (no trailing newline).
|
|
106
|
+
//
|
|
107
|
+
// ADD COLUMN extension points (future migrations):
|
|
108
|
+
// ALTER TABLE task_logs ADD COLUMN seq INTEGER DEFAULT NULL;
|
|
109
|
+
// ALTER TABLE task_logs ADD COLUMN source TEXT DEFAULT NULL;
|
|
110
|
+
//
|
|
111
|
+
// TTL: rows where ts < NOW() - retention can be deleted by purgeOldTaskLogs().
|
|
112
|
+
// No automatic deletion occurs here.
|
|
113
|
+
{
|
|
114
|
+
id: "001-task-logs",
|
|
115
|
+
up: `
|
|
116
|
+
CREATE TABLE IF NOT EXISTS task_logs (
|
|
117
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
118
|
+
ts TEXT NOT NULL,
|
|
119
|
+
task_id TEXT NOT NULL,
|
|
120
|
+
run_id TEXT NOT NULL,
|
|
121
|
+
stream TEXT NOT NULL DEFAULT 'stdout',
|
|
122
|
+
level TEXT NOT NULL DEFAULT 'info',
|
|
123
|
+
line TEXT NOT NULL
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
-- Query patterns:
|
|
127
|
+
-- SELECT … WHERE ts >= ? AND ts <= ? → idx_task_logs_ts (purge, windows)
|
|
128
|
+
-- SELECT … WHERE task_id = ? → idx_task_logs_task_id
|
|
129
|
+
-- SELECT … WHERE run_id = ? → idx_task_logs_run_id (per-run tail)
|
|
130
|
+
CREATE INDEX IF NOT EXISTS idx_task_logs_ts ON task_logs(ts);
|
|
131
|
+
CREATE INDEX IF NOT EXISTS idx_task_logs_task_id ON task_logs(task_id);
|
|
132
|
+
CREATE INDEX IF NOT EXISTS idx_task_logs_run_id ON task_logs(run_id);
|
|
133
|
+
`,
|
|
134
|
+
},
|
|
135
|
+
];
|
|
136
|
+
/**
|
|
137
|
+
* Apply every pending migration. Called automatically by
|
|
138
|
+
* {@link openLogsDatabase}; exported for the same test seams state-db exposes.
|
|
139
|
+
*/
|
|
140
|
+
export function runMigrations(db) {
|
|
141
|
+
runSqliteMigrations(db, MIGRATIONS);
|
|
142
|
+
}
|
|
143
|
+
// ── run_id ───────────────────────────────────────────────────────────────────
|
|
144
|
+
/**
|
|
145
|
+
* Encode a task run's identity — the unique `(task_id, started_at)` pair from
|
|
146
|
+
* state.db `task_history` — as a single run_id string.
|
|
147
|
+
*
|
|
148
|
+
* The format MUST stay in sync with the SQL expression
|
|
149
|
+
* `task_id || '@' || started_at` used by {@link queryFailedRunLogLines}.
|
|
150
|
+
*/
|
|
151
|
+
export function buildTaskRunId(taskId, startedAtIso) {
|
|
152
|
+
return `${taskId}@${startedAtIso}`;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Insert a batch of log lines for one task run in a single transaction.
|
|
156
|
+
* Returns the number of rows inserted. Lines are stored in array order
|
|
157
|
+
* (ascending rowid), so reading back `ORDER BY id` reproduces emission order.
|
|
158
|
+
*
|
|
159
|
+
* Errors propagate — the task runner wraps this in its own best-effort
|
|
160
|
+
* handling (mirroring `appendHistory`) so an unwritable logs.db never fails
|
|
161
|
+
* a task run.
|
|
162
|
+
*/
|
|
163
|
+
export function insertTaskLogLines(db, input) {
|
|
164
|
+
if (input.lines.length === 0)
|
|
165
|
+
return 0;
|
|
166
|
+
const stmt = db.prepare(`INSERT INTO task_logs (ts, task_id, run_id, stream, level, line)
|
|
167
|
+
VALUES (?, ?, ?, ?, ?, ?)`);
|
|
168
|
+
db.transaction(() => {
|
|
169
|
+
for (const entry of input.lines) {
|
|
170
|
+
stmt.run(input.ts, input.taskId, input.runId, entry.stream ?? "stdout", entry.level ?? "info", entry.line);
|
|
171
|
+
}
|
|
172
|
+
})();
|
|
173
|
+
return input.lines.length;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Read log lines matching the filter, in emission order (ascending id).
|
|
177
|
+
*
|
|
178
|
+
* Connection-lifetime rule (WS5): `.all()` materializes a plain array before
|
|
179
|
+
* returning.
|
|
180
|
+
*/
|
|
181
|
+
export function queryTaskLogs(db, options = {}) {
|
|
182
|
+
const conditions = [];
|
|
183
|
+
const params = [];
|
|
184
|
+
if (options.taskId) {
|
|
185
|
+
conditions.push("task_id = ?");
|
|
186
|
+
params.push(options.taskId);
|
|
187
|
+
}
|
|
188
|
+
if (options.runId) {
|
|
189
|
+
conditions.push("run_id = ?");
|
|
190
|
+
params.push(options.runId);
|
|
191
|
+
}
|
|
192
|
+
if (options.stream) {
|
|
193
|
+
conditions.push("stream = ?");
|
|
194
|
+
params.push(options.stream);
|
|
195
|
+
}
|
|
196
|
+
if (options.since) {
|
|
197
|
+
conditions.push("ts >= ?");
|
|
198
|
+
params.push(options.since);
|
|
199
|
+
}
|
|
200
|
+
if (options.until) {
|
|
201
|
+
conditions.push("ts < ?");
|
|
202
|
+
params.push(options.until);
|
|
203
|
+
}
|
|
204
|
+
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
205
|
+
const limit = options.limit !== undefined && options.limit >= 0 ? ` LIMIT ${Math.floor(options.limit)}` : "";
|
|
206
|
+
return db
|
|
207
|
+
.prepare(`SELECT id, ts, task_id, run_id, stream, level, line FROM task_logs ${where} ORDER BY id ASC${limit}`)
|
|
208
|
+
.all(...params);
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Bulk membership check: which of `runIds` have at least one log row?
|
|
212
|
+
* Used by `akm health` to compute the log-backing rate from the database
|
|
213
|
+
* instead of `fs.existsSync` over scattered files. Chunked to stay under
|
|
214
|
+
* SQLite's bound-parameter ceiling.
|
|
215
|
+
*/
|
|
216
|
+
export function getLoggedRunIds(db, runIds) {
|
|
217
|
+
const out = new Set();
|
|
218
|
+
if (runIds.length === 0)
|
|
219
|
+
return out;
|
|
220
|
+
const CHUNK = 500;
|
|
221
|
+
for (let i = 0; i < runIds.length; i += CHUNK) {
|
|
222
|
+
const chunk = runIds.slice(i, i + CHUNK);
|
|
223
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
224
|
+
const rows = db
|
|
225
|
+
.prepare(`SELECT DISTINCT run_id FROM task_logs WHERE run_id IN (${placeholders})`)
|
|
226
|
+
.all(...chunk);
|
|
227
|
+
for (const row of rows)
|
|
228
|
+
out.add(row.run_id);
|
|
229
|
+
}
|
|
230
|
+
return out;
|
|
231
|
+
}
|
|
232
|
+
// ── Cross-db: ATTACH state.db ────────────────────────────────────────────────
|
|
233
|
+
/**
|
|
234
|
+
* ATTACH state.db to an open logs.db handle under the schema name `state`,
|
|
235
|
+
* enabling cross-db joins like task_history × task_logs.
|
|
236
|
+
*
|
|
237
|
+
* The state.db file must already exist (callers always open state.db first in
|
|
238
|
+
* practice); attaching a non-existent path would silently create an empty,
|
|
239
|
+
* unmigrated database file, so this throws instead.
|
|
240
|
+
*/
|
|
241
|
+
export function attachStateDatabase(db, stateDbPath) {
|
|
242
|
+
const resolved = stateDbPath ?? getStateDbPath();
|
|
243
|
+
if (!fs.existsSync(resolved)) {
|
|
244
|
+
throw new Error(`Cannot ATTACH state.db: file does not exist at ${resolved}`);
|
|
245
|
+
}
|
|
246
|
+
// prepare().run() rather than db.run(): both drivers support parameterised
|
|
247
|
+
// ATTACH through a prepared statement, and no other call site uses db.run().
|
|
248
|
+
db.prepare("ATTACH DATABASE ? AS state").run(resolved);
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Convenience: open logs.db with state.db attached as `state`. The returned
|
|
252
|
+
* handle supports cross-db queries such as {@link queryFailedRunLogLines}.
|
|
253
|
+
* Close it like any other handle (DETACH is implicit on close).
|
|
254
|
+
*/
|
|
255
|
+
export function openLogsDatabaseWithState(logsDbPath, stateDbPath) {
|
|
256
|
+
const db = openLogsDatabase(logsDbPath);
|
|
257
|
+
try {
|
|
258
|
+
attachStateDatabase(db, stateDbPath);
|
|
259
|
+
}
|
|
260
|
+
catch (err) {
|
|
261
|
+
db.close();
|
|
262
|
+
throw err;
|
|
263
|
+
}
|
|
264
|
+
return db;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Cross-db join: every log line belonging to a FAILED task_history run whose
|
|
268
|
+
* `started_at` is `>= since` (all failed runs when omitted). Requires a handle
|
|
269
|
+
* opened via {@link openLogsDatabaseWithState}.
|
|
270
|
+
*
|
|
271
|
+
* The join key is the run_id encoding documented on {@link buildTaskRunId}:
|
|
272
|
+
* `task_logs.run_id = task_history.task_id || '@' || task_history.started_at`.
|
|
273
|
+
*/
|
|
274
|
+
export function queryFailedRunLogLines(db, options = {}) {
|
|
275
|
+
const conditions = ["th.status = 'failed'"];
|
|
276
|
+
const params = [];
|
|
277
|
+
if (options.since) {
|
|
278
|
+
conditions.push("th.started_at >= ?");
|
|
279
|
+
params.push(options.since);
|
|
280
|
+
}
|
|
281
|
+
const limit = options.limit !== undefined && options.limit >= 0 ? ` LIMIT ${Math.floor(options.limit)}` : "";
|
|
282
|
+
return db
|
|
283
|
+
.prepare(`SELECT th.task_id, l.run_id, th.started_at, th.status, l.ts, l.stream, l.level, l.line
|
|
284
|
+
FROM state.task_history th
|
|
285
|
+
JOIN task_logs l ON l.run_id = th.task_id || '@' || th.started_at
|
|
286
|
+
WHERE ${conditions.join(" AND ")}
|
|
287
|
+
ORDER BY th.started_at DESC, l.id ASC${limit}`)
|
|
288
|
+
.all(...params);
|
|
289
|
+
}
|
|
290
|
+
// ── Retention ────────────────────────────────────────────────────────────────
|
|
291
|
+
/**
|
|
292
|
+
* Delete task_logs rows older than `retentionDays` (default: 90). Mirrors
|
|
293
|
+
* `purgeOldEvents` / `purgeOldImproveRuns` in state-db.ts — same default, same
|
|
294
|
+
* return shape (rows deleted), same disabled-when-non-positive semantics.
|
|
295
|
+
* Wired into the improve maintenance pass alongside the state.db purges.
|
|
296
|
+
*/
|
|
297
|
+
export function purgeOldTaskLogs(db, retentionDays = 90) {
|
|
298
|
+
if (!Number.isFinite(retentionDays) || retentionDays <= 0)
|
|
299
|
+
return 0;
|
|
300
|
+
const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
|
|
301
|
+
const result = db.prepare("DELETE FROM task_logs WHERE ts < ?").run(cutoff);
|
|
302
|
+
const changes = result.changes ?? 0;
|
|
303
|
+
return typeof changes === "bigint" ? Number(changes) : changes;
|
|
304
|
+
}
|
package/dist/core/state-db.js
CHANGED
|
@@ -86,11 +86,12 @@ export function getStateDbPath() {
|
|
|
86
86
|
* backwards compatibility; enabling them prevents orphaned rows in tables
|
|
87
87
|
* that reference each other (not used in v1 schema but guards future ones).
|
|
88
88
|
*
|
|
89
|
-
* busy_timeout =
|
|
89
|
+
* busy_timeout = 30000
|
|
90
90
|
* When another connection holds a write lock, SQLite retries for up to
|
|
91
|
-
*
|
|
92
|
-
* is 0 ms — any concurrent writer causes an immediate error.
|
|
93
|
-
* the
|
|
91
|
+
* 30 000 ms before returning SQLITE_BUSY. Without this, the default timeout
|
|
92
|
+
* is 0 ms — any concurrent writer causes an immediate error. 30 s (#589)
|
|
93
|
+
* matches the value used in openDatabase() for index.db; 5 s proved too
|
|
94
|
+
* narrow when a post-inference reindex overlapped a parallel event write.
|
|
94
95
|
*/
|
|
95
96
|
export function openStateDatabase(dbPath) {
|
|
96
97
|
const resolvedPath = dbPath ?? getStateDbPath();
|
|
@@ -102,7 +103,7 @@ export function openStateDatabase(dbPath) {
|
|
|
102
103
|
// PRAGMAs must run before any DDL or DML.
|
|
103
104
|
db.exec("PRAGMA journal_mode = WAL");
|
|
104
105
|
db.exec("PRAGMA foreign_keys = ON");
|
|
105
|
-
db.exec("PRAGMA busy_timeout =
|
|
106
|
+
db.exec("PRAGMA busy_timeout = 30000");
|
|
106
107
|
runMigrations(db);
|
|
107
108
|
return db;
|
|
108
109
|
}
|
|
@@ -190,7 +191,9 @@ const MIGRATIONS = [
|
|
|
190
191
|
--
|
|
191
192
|
-- Extensible (metadata_json) columns:
|
|
192
193
|
-- metadata_json TEXT — JSON object for future proposal fields.
|
|
193
|
-
-- Current fields stored here: sourceRun,
|
|
194
|
+
-- Current fields stored here: sourceRun,
|
|
195
|
+
-- review, confidence, gateDecision (#577),
|
|
196
|
+
-- backupContent.
|
|
194
197
|
--
|
|
195
198
|
-- ADD COLUMN extension points (future migrations):
|
|
196
199
|
-- ALTER TABLE proposals ADD COLUMN source_run TEXT DEFAULT NULL;
|
|
@@ -458,6 +461,33 @@ const MIGRATIONS = [
|
|
|
458
461
|
ON extract_sessions_seen(processed_at);
|
|
459
462
|
`,
|
|
460
463
|
},
|
|
464
|
+
// ── Migration 005 — proposal_fs_imports ─────────────────────────────────────
|
|
465
|
+
//
|
|
466
|
+
// One-shot ledger for the legacy filesystem→SQLite proposal import (#578).
|
|
467
|
+
//
|
|
468
|
+
// Before 0.9.0 the proposal queue lived as per-uuid JSON directories under
|
|
469
|
+
// `<stashDir>/.akm/proposals/` and the `proposals` table (created in 001) was
|
|
470
|
+
// dead weight. 0.9.0 makes the table canonical; the first proposal operation
|
|
471
|
+
// against a stash imports any legacy `proposal.json` files it finds (INSERT
|
|
472
|
+
// OR IGNORE, so re-runs never duplicate) and records the stash here so later
|
|
473
|
+
// invocations skip the directory walk entirely.
|
|
474
|
+
//
|
|
475
|
+
// Indexed (query) columns:
|
|
476
|
+
// stash_dir TEXT PK — absolute stash root the import ran against.
|
|
477
|
+
//
|
|
478
|
+
// Non-indexed columns:
|
|
479
|
+
// imported_at TEXT — ISO-8601 UTC; when the import completed.
|
|
480
|
+
// imported_count INTEGER — rows actually inserted by the import.
|
|
481
|
+
{
|
|
482
|
+
id: "005-proposal-fs-imports",
|
|
483
|
+
up: `
|
|
484
|
+
CREATE TABLE IF NOT EXISTS proposal_fs_imports (
|
|
485
|
+
stash_dir TEXT PRIMARY KEY,
|
|
486
|
+
imported_at TEXT NOT NULL,
|
|
487
|
+
imported_count INTEGER NOT NULL DEFAULT 0
|
|
488
|
+
);
|
|
489
|
+
`,
|
|
490
|
+
},
|
|
461
491
|
];
|
|
462
492
|
/**
|
|
463
493
|
* Apply every pending migration in a single transaction per migration.
|
|
@@ -529,6 +559,9 @@ export function proposalRowToProposal(row) {
|
|
|
529
559
|
...(frontmatter !== undefined ? { frontmatter } : {}),
|
|
530
560
|
},
|
|
531
561
|
...(meta.review !== undefined ? { review: meta.review } : {}),
|
|
562
|
+
...(typeof meta.confidence === "number" ? { confidence: meta.confidence } : {}),
|
|
563
|
+
...(meta.gateDecision !== undefined ? { gateDecision: meta.gateDecision } : {}),
|
|
564
|
+
...(typeof meta.backupContent === "string" ? { backupContent: meta.backupContent } : {}),
|
|
532
565
|
};
|
|
533
566
|
}
|
|
534
567
|
/**
|
|
@@ -542,6 +575,12 @@ export function proposalToRowValues(proposal, stashDir) {
|
|
|
542
575
|
metaObj.sourceRun = proposal.sourceRun;
|
|
543
576
|
if (proposal.review !== undefined)
|
|
544
577
|
metaObj.review = proposal.review;
|
|
578
|
+
if (proposal.confidence !== undefined)
|
|
579
|
+
metaObj.confidence = proposal.confidence;
|
|
580
|
+
if (proposal.gateDecision !== undefined)
|
|
581
|
+
metaObj.gateDecision = proposal.gateDecision;
|
|
582
|
+
if (proposal.backupContent !== undefined)
|
|
583
|
+
metaObj.backupContent = proposal.backupContent;
|
|
545
584
|
return {
|
|
546
585
|
id: proposal.id,
|
|
547
586
|
stash_dir: stashDir,
|
|
@@ -656,7 +695,10 @@ export function upsertProposal(db, proposal, stashDir) {
|
|
|
656
695
|
}
|
|
657
696
|
/**
|
|
658
697
|
* List proposals, optionally filtered by stashDir, status, and/or ref.
|
|
659
|
-
*
|
|
698
|
+
*
|
|
699
|
+
* Results are ordered by `created_at ASC` (matching the historical
|
|
700
|
+
* `listProposals()` sort), with `rowid` as a deterministic tiebreak so two
|
|
701
|
+
* proposals created in the same millisecond list in insertion order.
|
|
660
702
|
*/
|
|
661
703
|
export function listStateProposals(db, options = {}) {
|
|
662
704
|
const conditions = [];
|
|
@@ -677,21 +719,72 @@ export function listStateProposals(db, options = {}) {
|
|
|
677
719
|
const rows = db
|
|
678
720
|
.prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
|
|
679
721
|
content, frontmatter_json, metadata_json
|
|
680
|
-
FROM proposals ${where} ORDER BY created_at ASC`)
|
|
722
|
+
FROM proposals ${where} ORDER BY created_at ASC, rowid ASC`)
|
|
681
723
|
.all(...params);
|
|
682
724
|
return rows.map(proposalRowToProposal);
|
|
683
725
|
}
|
|
684
726
|
/**
|
|
685
|
-
* Look up a single proposal by id
|
|
727
|
+
* Look up a single proposal by id, optionally scoped to one stash root.
|
|
728
|
+
* Returns undefined when not found.
|
|
686
729
|
*/
|
|
687
|
-
export function getStateProposal(db, id) {
|
|
688
|
-
const
|
|
689
|
-
.prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
|
|
730
|
+
export function getStateProposal(db, id, stashDir) {
|
|
731
|
+
const sql = `SELECT id, stash_dir, ref, status, source, created_at, updated_at,
|
|
690
732
|
content, frontmatter_json, metadata_json
|
|
691
|
-
FROM proposals WHERE id =
|
|
692
|
-
|
|
733
|
+
FROM proposals WHERE id = ?${stashDir ? " AND stash_dir = ?" : ""}`;
|
|
734
|
+
const row = (stashDir ? db.prepare(sql).get(id, stashDir) : db.prepare(sql).get(id));
|
|
693
735
|
return row ? proposalRowToProposal(row) : undefined;
|
|
694
736
|
}
|
|
737
|
+
/**
|
|
738
|
+
* Find PENDING proposal ids in one stash whose id starts with `idPrefix`.
|
|
739
|
+
* Backs the UUID-prefix form of `akm proposal show/accept/... <prefix>` —
|
|
740
|
+
* prefix resolution is deliberately scoped to the live (pending) queue,
|
|
741
|
+
* mirroring the historical behaviour of scanning only the live directory.
|
|
742
|
+
*
|
|
743
|
+
* `%` / `_` / `\` in the prefix are escaped so the LIKE pattern is literal.
|
|
744
|
+
*/
|
|
745
|
+
export function listStateProposalIdsByPrefix(db, stashDir, idPrefix) {
|
|
746
|
+
const escaped = idPrefix.replace(/[\\%_]/g, (ch) => `\\${ch}`);
|
|
747
|
+
const rows = db
|
|
748
|
+
.prepare(`SELECT id FROM proposals
|
|
749
|
+
WHERE stash_dir = ? AND status = 'pending' AND id LIKE ? ESCAPE '\\'
|
|
750
|
+
ORDER BY id ASC`)
|
|
751
|
+
.all(stashDir, `${escaped}%`);
|
|
752
|
+
return rows.map((r) => r.id);
|
|
753
|
+
}
|
|
754
|
+
/**
|
|
755
|
+
* Whether the legacy filesystem proposal import has already run for `stashDir`.
|
|
756
|
+
* See migration 005 (`proposal_fs_imports`).
|
|
757
|
+
*/
|
|
758
|
+
export function hasImportedFsProposals(db, stashDir) {
|
|
759
|
+
// Drivers disagree on the no-row sentinel (bun:sqlite → null,
|
|
760
|
+
// better-sqlite3 → undefined) — Boolean() covers both.
|
|
761
|
+
return Boolean(db.prepare("SELECT 1 FROM proposal_fs_imports WHERE stash_dir = ?").get(stashDir));
|
|
762
|
+
}
|
|
763
|
+
/**
|
|
764
|
+
* Record that the legacy filesystem proposal import completed for `stashDir`
|
|
765
|
+
* so subsequent invocations skip the directory walk. INSERT OR REPLACE keeps
|
|
766
|
+
* the call idempotent.
|
|
767
|
+
*/
|
|
768
|
+
export function recordFsProposalsImport(db, stashDir, importedCount) {
|
|
769
|
+
db.prepare("INSERT OR REPLACE INTO proposal_fs_imports (stash_dir, imported_at, imported_count) VALUES (?, ?, ?)").run(stashDir, new Date().toISOString(), importedCount);
|
|
770
|
+
}
|
|
771
|
+
/**
|
|
772
|
+
* Insert a proposal row ONLY when the id is not already present (used by the
|
|
773
|
+
* legacy filesystem import so re-runs never clobber rows that have since been
|
|
774
|
+
* mutated through the canonical store). Returns true when a row was inserted.
|
|
775
|
+
*/
|
|
776
|
+
export function insertProposalIfAbsent(db, proposal, stashDir) {
|
|
777
|
+
const v = proposalToRowValues(proposal, stashDir);
|
|
778
|
+
const result = db
|
|
779
|
+
.prepare(`
|
|
780
|
+
INSERT OR IGNORE INTO proposals
|
|
781
|
+
(id, stash_dir, ref, status, source, created_at, updated_at, content, frontmatter_json, metadata_json)
|
|
782
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
783
|
+
`)
|
|
784
|
+
.run(v.id, v.stash_dir, v.ref, v.status, v.source, v.created_at, v.updated_at, v.content, v.frontmatter_json, v.metadata_json);
|
|
785
|
+
const changes = result.changes ?? 0;
|
|
786
|
+
return Number(changes) > 0;
|
|
787
|
+
}
|
|
695
788
|
// ── task_history table helpers ───────────────────────────────────────────────
|
|
696
789
|
/**
|
|
697
790
|
* Upsert a task history row.
|
package/dist/indexer/db/db.js
CHANGED
|
@@ -28,7 +28,7 @@ export function openDatabase(dbPath, options) {
|
|
|
28
28
|
}
|
|
29
29
|
const db = openSqlite(resolvedPath);
|
|
30
30
|
db.exec("PRAGMA journal_mode = WAL");
|
|
31
|
-
db.exec("PRAGMA busy_timeout =
|
|
31
|
+
db.exec("PRAGMA busy_timeout = 30000");
|
|
32
32
|
db.exec("PRAGMA foreign_keys = ON");
|
|
33
33
|
// Try to load sqlite-vec extension
|
|
34
34
|
loadVecExtension(db);
|
|
@@ -69,7 +69,7 @@ export function openExistingDatabase(dbPath) {
|
|
|
69
69
|
const resolvedPath = dbPath ?? getDbPath();
|
|
70
70
|
const db = openSqlite(resolvedPath);
|
|
71
71
|
db.exec("PRAGMA journal_mode = WAL");
|
|
72
|
-
db.exec("PRAGMA busy_timeout =
|
|
72
|
+
db.exec("PRAGMA busy_timeout = 30000");
|
|
73
73
|
db.exec("PRAGMA foreign_keys = ON");
|
|
74
74
|
// Existing-DB callers must not mutate schema or embedding metadata on open,
|
|
75
75
|
// but some paths still need write access to usage_events and other tables.
|
|
@@ -119,6 +119,26 @@ export async function runMemoryInferencePass(ctx) {
|
|
|
119
119
|
// 2026-05-26).
|
|
120
120
|
if (signal?.aborted)
|
|
121
121
|
return { aborted: true };
|
|
122
|
+
// Pre-check (#588): when `<parent>.derived.md` is already on disk the
|
|
123
|
+
// inference is by definition complete — the parent only looks pending
|
|
124
|
+
// because `markParentProcessed` never ran (process killed between the
|
|
125
|
+
// child write and the mark) or the child was created externally (e.g.
|
|
126
|
+
// consolidation). Skip the LLM/cache call entirely and mark the parent
|
|
127
|
+
// so it never re-pends. Before this check, production measurements
|
|
128
|
+
// showed ~55% of the pass's LLM budget re-deriving such parents only to
|
|
129
|
+
// discover the existing child after the fact.
|
|
130
|
+
if (fs.existsSync(derivedChildPath(record))) {
|
|
131
|
+
markParentProcessed(record);
|
|
132
|
+
return {
|
|
133
|
+
skipped: false,
|
|
134
|
+
splitParent: false,
|
|
135
|
+
written: 0,
|
|
136
|
+
fromCache: false,
|
|
137
|
+
retryAttempts: 0,
|
|
138
|
+
childExists: true,
|
|
139
|
+
precheck: true,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
122
142
|
// Incremental cache: skip LLM call when body hash is unchanged and
|
|
123
143
|
// --re-enrich was not requested. The cache ref is the absolute file path.
|
|
124
144
|
const validate = (raw) => {
|
|
@@ -171,23 +191,30 @@ export async function runMemoryInferencePass(ctx) {
|
|
|
171
191
|
return { skipped: false, splitParent: true, written: writeOutcome.written, fromCache, retryAttempts };
|
|
172
192
|
}
|
|
173
193
|
// LLM produced a valid derived draft but no file was written — either
|
|
174
|
-
// because `<parent>.derived.md`
|
|
175
|
-
//
|
|
176
|
-
//
|
|
177
|
-
// into the freshAttempts
|
|
194
|
+
// because `<parent>.derived.md` appeared on disk after the pre-check
|
|
195
|
+
// above (a rare mid-flight race) or `writeAssetToSource` threw.
|
|
196
|
+
// Categorise as `childExists` so the consumed attempt is accounted for
|
|
197
|
+
// in health metrics rather than vanishing into the freshAttempts
|
|
198
|
+
// denominator.
|
|
178
199
|
//
|
|
179
|
-
// When the child
|
|
180
|
-
//
|
|
181
|
-
//
|
|
182
|
-
// (
|
|
183
|
-
//
|
|
184
|
-
//
|
|
185
|
-
// should be retried next run — so we key off the explicit `childExists`
|
|
186
|
-
// outcome rather than the conflated `written === 0`.
|
|
200
|
+
// When the child exists the inference is, by definition, complete — so
|
|
201
|
+
// mark the parent processed here too (#550), otherwise
|
|
202
|
+
// `isPendingMemory()` re-queues the same parent every run. A genuine
|
|
203
|
+
// write *failure* (`writeAssetToSource` threw) must NOT mark the parent
|
|
204
|
+
// — it should be retried next run — so we key off the explicit
|
|
205
|
+
// `childExists` outcome rather than the conflated `written === 0`.
|
|
187
206
|
if (writeOutcome.childExists) {
|
|
188
207
|
markParentProcessed(record);
|
|
189
208
|
}
|
|
190
|
-
return {
|
|
209
|
+
return {
|
|
210
|
+
skipped: false,
|
|
211
|
+
splitParent: false,
|
|
212
|
+
written: 0,
|
|
213
|
+
fromCache,
|
|
214
|
+
retryAttempts,
|
|
215
|
+
childExists: true,
|
|
216
|
+
precheck: false,
|
|
217
|
+
};
|
|
191
218
|
},
|
|
192
219
|
// Default concurrency of 4 for cloud APIs. Set `llm.concurrency: 1`
|
|
193
220
|
// in config.json for local model servers (LM Studio, Ollama).
|
|
@@ -224,11 +251,16 @@ export async function runMemoryInferencePass(ctx) {
|
|
|
224
251
|
result.writtenFacts += res.written;
|
|
225
252
|
}
|
|
226
253
|
else if ("childExists" in res && res.childExists) {
|
|
227
|
-
//
|
|
228
|
-
//
|
|
229
|
-
//
|
|
254
|
+
// Derived child already on disk. Track separately so this category is
|
|
255
|
+
// observable in health output and stops bleeding into the
|
|
256
|
+
// freshAttempts denominator. Pre-check skips (#588) are the routine
|
|
257
|
+
// self-healing path — no LLM attempt was consumed and the parent has
|
|
258
|
+
// been marked processed — so only the rare post-LLM case (mid-flight
|
|
259
|
+
// race or write failure) warrants a per-ref warning.
|
|
230
260
|
result.skippedChildExists += 1;
|
|
231
|
-
|
|
261
|
+
if (!res.precheck) {
|
|
262
|
+
warn(`memory inference: derived child for ${pending[i]?.ref ?? "<unknown>"} already existed or write failed; counted as skippedChildExists`);
|
|
263
|
+
}
|
|
232
264
|
}
|
|
233
265
|
else {
|
|
234
266
|
// The per-record state machine should cover every outcome. A hit here
|
|
@@ -324,6 +356,14 @@ function toMemoryName(memoriesDir, filePath) {
|
|
|
324
356
|
// user has organised under memories/.
|
|
325
357
|
return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
|
|
326
358
|
}
|
|
359
|
+
/**
|
|
360
|
+
* Absolute path of the derived child for a parent memory. Single source of
|
|
361
|
+
* truth for the `<parent>.derived.md` naming convention — used both by the
|
|
362
|
+
* pre-LLM existence check (#588) and the write path.
|
|
363
|
+
*/
|
|
364
|
+
function derivedChildPath(parent) {
|
|
365
|
+
return path.join(parent.stashRoot, "memories", `${parent.name}.derived.md`);
|
|
366
|
+
}
|
|
327
367
|
async function writeDerivedMemory(parent, derived) {
|
|
328
368
|
const writeTarget = {
|
|
329
369
|
kind: "filesystem",
|
|
@@ -338,11 +378,10 @@ async function writeDerivedMemory(parent, derived) {
|
|
|
338
378
|
};
|
|
339
379
|
const childName = `${parent.name}.derived`;
|
|
340
380
|
const childRefStr = `memory:${childName}`;
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
//
|
|
344
|
-
//
|
|
345
|
-
// (#550) instead of re-queueing it forever.
|
|
381
|
+
if (fs.existsSync(derivedChildPath(parent))) {
|
|
382
|
+
// The derived child appeared on disk after the caller's pre-check (#588)
|
|
383
|
+
// — a rare mid-flight race. Report `childExists` so the caller marks the
|
|
384
|
+
// parent processed (#550) instead of re-queueing it forever.
|
|
346
385
|
return { written: 0, childExists: true };
|
|
347
386
|
}
|
|
348
387
|
try {
|