akm-cli 0.9.0-beta.54 → 0.9.0-beta.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/cli.js +5 -3
  2. package/dist/commands/agent/contribute-cli.js +2 -3
  3. package/dist/commands/env/env-cli.js +187 -202
  4. package/dist/commands/env/secret-cli.js +109 -121
  5. package/dist/commands/feedback-cli.js +152 -155
  6. package/dist/commands/health/advisories.js +151 -0
  7. package/dist/commands/health/improve-metrics.js +754 -0
  8. package/dist/commands/health/llm-usage.js +65 -0
  9. package/dist/commands/health/md-report.js +103 -0
  10. package/dist/commands/health/metrics.js +278 -0
  11. package/dist/commands/health/task-runs.js +135 -0
  12. package/dist/commands/health/types.js +18 -0
  13. package/dist/commands/health/windows.js +196 -0
  14. package/dist/commands/health.js +14 -1624
  15. package/dist/commands/improve/anti-collapse.js +170 -0
  16. package/dist/commands/improve/collapse-detector.js +3 -2
  17. package/dist/commands/improve/consolidate.js +636 -633
  18. package/dist/commands/improve/dedup.js +1 -1
  19. package/dist/commands/improve/distill/content-repair.js +202 -0
  20. package/dist/commands/improve/distill/promote-memory.js +228 -0
  21. package/dist/commands/improve/distill/quality-gate.js +233 -0
  22. package/dist/commands/improve/distill-guards.js +127 -0
  23. package/dist/commands/improve/distill.js +49 -575
  24. package/dist/commands/improve/extract-cli.js +74 -76
  25. package/dist/commands/improve/extract.js +6 -4
  26. package/dist/commands/improve/hot-probation.js +45 -0
  27. package/dist/commands/improve/improve-auto-accept.js +3 -2
  28. package/dist/commands/improve/improve-cli.js +14 -13
  29. package/dist/commands/improve/improve-result-file.js +2 -1
  30. package/dist/commands/improve/improve.js +6 -5
  31. package/dist/commands/improve/loop-stages.js +19 -21
  32. package/dist/commands/improve/preparation.js +4 -2
  33. package/dist/commands/improve/procedural.js +10 -31
  34. package/dist/commands/improve/recombine.js +19 -43
  35. package/dist/commands/improve/reflect.js +1 -1
  36. package/dist/commands/improve/schema-similarity-gate.js +168 -0
  37. package/dist/commands/improve/shared.js +48 -0
  38. package/dist/commands/observability-cli.js +4 -4
  39. package/dist/commands/proposal/drain-policies.js +2 -2
  40. package/dist/commands/proposal/drain.js +1 -1
  41. package/dist/commands/proposal/legacy-import.js +115 -0
  42. package/dist/commands/proposal/proposal-cli.js +3 -3
  43. package/dist/commands/proposal/proposal.js +2 -1
  44. package/dist/commands/proposal/propose.js +1 -1
  45. package/dist/commands/proposal/repository.js +829 -0
  46. package/dist/commands/proposal/validators/proposals.js +5 -920
  47. package/dist/commands/read/remember-cli.js +132 -137
  48. package/dist/commands/read/search-cli.js +1 -1
  49. package/dist/commands/registry-cli.js +76 -87
  50. package/dist/commands/sources/add-cli.js +90 -94
  51. package/dist/commands/sources/history.js +1 -1
  52. package/dist/commands/sources/schema-repair.js +1 -1
  53. package/dist/commands/sources/sources-cli.js +3 -3
  54. package/dist/commands/sources/stash-cli.js +1 -1
  55. package/dist/commands/tasks/tasks-cli.js +1 -2
  56. package/dist/commands/wiki-cli.js +2 -3
  57. package/dist/core/common.js +3 -3
  58. package/dist/core/config/config-schema.js +6 -0
  59. package/dist/core/deep-merge.js +38 -0
  60. package/dist/core/events.js +2 -1
  61. package/dist/core/logs-db.js +8 -13
  62. package/dist/core/paths.js +14 -14
  63. package/dist/core/state-db.js +13 -1140
  64. package/dist/indexer/db/db.js +66 -709
  65. package/dist/indexer/db/entry-mapper.js +41 -0
  66. package/dist/indexer/db/schema.js +516 -0
  67. package/dist/indexer/feedback/utility-policy.js +85 -0
  68. package/dist/indexer/graph/graph-extraction.js +2 -1
  69. package/dist/indexer/index-writer-lock.js +9 -0
  70. package/dist/indexer/indexer.js +78 -23
  71. package/dist/indexer/search/fts-query.js +51 -0
  72. package/dist/integrations/agent/spawn.js +15 -66
  73. package/dist/output/text/helpers.js +13 -0
  74. package/dist/scripts/migrate-storage.js +6891 -7436
  75. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +44 -43
  76. package/dist/setup/legacy-config.js +106 -0
  77. package/dist/setup/prompt.js +57 -0
  78. package/dist/setup/providers.js +14 -0
  79. package/dist/setup/semantic-assets.js +124 -0
  80. package/dist/setup/setup.js +24 -1607
  81. package/dist/setup/steps/connection.js +734 -0
  82. package/dist/setup/steps/output.js +31 -0
  83. package/dist/setup/steps/platforms.js +124 -0
  84. package/dist/setup/steps/semantic.js +27 -0
  85. package/dist/setup/steps/sources.js +222 -0
  86. package/dist/setup/steps/stashdir.js +42 -0
  87. package/dist/setup/steps/tasks.js +152 -0
  88. package/dist/storage/repositories/canaries-repository.js +107 -0
  89. package/dist/storage/repositories/consolidation-repository.js +38 -0
  90. package/dist/storage/repositories/embeddings-repository.js +72 -0
  91. package/dist/storage/repositories/events-repository.js +187 -0
  92. package/dist/storage/repositories/extract-sessions-repository.js +96 -0
  93. package/dist/storage/repositories/improve-runs-repository.js +130 -0
  94. package/dist/storage/repositories/index-db.js +4 -7
  95. package/dist/storage/repositories/proposals-repository.js +220 -0
  96. package/dist/storage/repositories/recombine-repository.js +213 -0
  97. package/dist/storage/repositories/task-history-repository.js +93 -0
  98. package/dist/storage/sqlite-pragmas.js +3 -3
  99. package/dist/tasks/runner.js +2 -1
  100. package/package.json +1 -1
  101. package/dist/commands/improve/homeostatic.js +0 -497
@@ -0,0 +1,107 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /** Insert a freshly minted canary set (all rows active, one shared set id). */
5
+ export function insertCanaries(db, canarySetId, canaries, now) {
6
+ if (canaries.length === 0)
7
+ return;
8
+ const ts = now ?? new Date().toISOString();
9
+ const stmt = db.prepare(`
10
+ INSERT INTO canary_queries (canary_set_id, anchor_ref, query, source, active, created_at)
11
+ VALUES (?, ?, ?, ?, 1, ?)
12
+ `);
13
+ db.transaction(() => {
14
+ for (const c of canaries) {
15
+ stmt.run(canarySetId, c.anchorRef, c.query, c.source ?? "auto", ts);
16
+ }
17
+ })();
18
+ }
19
+ /** Load the active canary set (empty array = never minted). */
20
+ export function getActiveCanaries(db) {
21
+ // Scope to the NEWEST active set: if an interrupted refresh (or a bug) ever
22
+ // leaves two sets active, mixing their rows would silently corrupt the
23
+ // recall/entropy trend baselines. The newest set wins; stale-active rows are
24
+ // simply never returned.
25
+ return db
26
+ .prepare(`SELECT * FROM canary_queries
27
+ WHERE active = 1 AND canary_set_id = (
28
+ SELECT canary_set_id FROM canary_queries WHERE active = 1
29
+ ORDER BY created_at DESC, id DESC LIMIT 1
30
+ )
31
+ ORDER BY id`)
32
+ .all();
33
+ }
34
+ /** Load one canary set's rows by its exact set id (any active state), insertion order. */
35
+ export function getCanariesBySetId(db, canarySetId) {
36
+ return db
37
+ .prepare(`SELECT * FROM canary_queries WHERE canary_set_id = ? ORDER BY id`)
38
+ .all(canarySetId);
39
+ }
40
+ /** List every distinct canary_set_id that still has active rows. */
41
+ export function listActiveCanarySetIds(db) {
42
+ const rows = db.prepare(`SELECT DISTINCT canary_set_id FROM canary_queries WHERE active = 1`).all();
43
+ return rows.map((r) => r.canary_set_id);
44
+ }
45
+ /**
46
+ * Deactivate every canary row in a set. Rows are RETAINED (active = 0) so
47
+ * historical improve_cycle_metrics rows keyed on the old canary_set_id stay
48
+ * interpretable; only `akm improve canary --refresh` calls this.
49
+ */
50
+ export function deactivateCanarySet(db, canarySetId) {
51
+ const result = db
52
+ .prepare(`UPDATE canary_queries SET active = 0 WHERE canary_set_id = ? AND active = 1`)
53
+ .run(canarySetId);
54
+ const changes = result.changes ?? 0;
55
+ return typeof changes === "bigint" ? Number(changes) : changes;
56
+ }
57
+ /** Persist one qualifying cycle's store-health snapshot. */
58
+ export function insertCycleMetrics(db, row) {
59
+ db.prepare(`
60
+ INSERT INTO improve_cycle_metrics
61
+ (run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
62
+ canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
63
+ mean_bigram_diversity, over_generation_count, accepted_actions,
64
+ merge_floor_violations, alerts_json)
65
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
66
+ `).run(row.run_id, row.ts, row.pass, row.canary_set_id, row.mean_recall, row.mean_ndcg, row.mean_mrr, row.canary_ranks_json, row.store_total, row.store_by_type_json, row.distinct_content_ratio, row.mean_bigram_diversity, row.over_generation_count, row.accepted_actions, row.merge_floor_violations, row.alerts_json);
67
+ }
68
+ /**
69
+ * Load the most recent cycle rows for one canary set, OLDEST-first (the alert
70
+ * evaluator's window order). Scoped by canary_set_id so trends never compare
71
+ * across canary re-mints.
72
+ */
73
+ export function queryRecentCycleMetrics(db, canarySetId, limit) {
74
+ const rows = db
75
+ .prepare(`SELECT run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
76
+ canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
77
+ mean_bigram_diversity, over_generation_count, accepted_actions,
78
+ merge_floor_violations, alerts_json
79
+ FROM improve_cycle_metrics WHERE canary_set_id = ?
80
+ ORDER BY ts DESC, id DESC LIMIT ?`)
81
+ .all(canarySetId, Math.max(0, limit));
82
+ return rows.reverse();
83
+ }
84
+ /** Load the single most recent cycle row across all canary sets (health surface). */
85
+ export function getLatestCycleMetrics(db) {
86
+ const row = db
87
+ .prepare(`SELECT run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
88
+ canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
89
+ mean_bigram_diversity, over_generation_count, accepted_actions,
90
+ merge_floor_violations, alerts_json
91
+ FROM improve_cycle_metrics ORDER BY ts DESC, id DESC LIMIT 1`)
92
+ .get();
93
+ return row == null ? undefined : row;
94
+ }
95
+ /**
96
+ * Delete cycle rows older than `retentionDays` (default 365 — owner-approved;
97
+ * a slow collapse needs a longer trend window than the 90-day events log).
98
+ * Returns the purged row count. canary_queries rows are never purged.
99
+ */
100
+ export function purgeOldCycleMetrics(db, retentionDays = 365) {
101
+ if (!Number.isFinite(retentionDays) || retentionDays <= 0)
102
+ return 0;
103
+ const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
104
+ const result = db.prepare("DELETE FROM improve_cycle_metrics WHERE ts < ?").run(cutoff);
105
+ const changes = result.changes ?? 0;
106
+ return typeof changes === "bigint" ? Number(changes) : changes;
107
+ }
@@ -0,0 +1,38 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Bulk-fetch the judged-state cache for a set of entry keys in one query.
6
+ * Returns a Map keyed by entry_key so the consolidate pool-selection loop can
7
+ * do O(1) "has this memory been judged at this content hash?" lookups.
8
+ * Empty input → empty map (no query issued).
9
+ */
10
+ export function getConsolidationJudgedMap(db, entryKeys) {
11
+ const out = new Map();
12
+ if (entryKeys.length === 0)
13
+ return out;
14
+ // SQLite has a ~999 param ceiling; chunk if a caller ever exceeds that.
15
+ const CHUNK = 500;
16
+ for (let i = 0; i < entryKeys.length; i += CHUNK) {
17
+ const chunk = entryKeys.slice(i, i + CHUNK);
18
+ const placeholders = chunk.map(() => "?").join(",");
19
+ const rows = db
20
+ .prepare(`SELECT * FROM consolidation_judged WHERE entry_key IN (${placeholders})`)
21
+ .all(...chunk);
22
+ for (const row of rows)
23
+ out.set(row.entry_key, row);
24
+ }
25
+ return out;
26
+ }
27
+ /**
28
+ * Record (or update) the judged state for one memory. INSERT-OR-REPLACE so the
29
+ * row always reflects the most recent judge of that entry_key. Called once per
30
+ * memory the consolidate LLM saw in a successfully-judged chunk.
31
+ */
32
+ export function upsertConsolidationJudged(db, input) {
33
+ db.prepare(`
34
+ INSERT OR REPLACE INTO consolidation_judged
35
+ (entry_key, content_hash, judged_at, outcome)
36
+ VALUES (?, ?, ?, ?)
37
+ `).run(input.entryKey, input.contentHash, input.judgedAt, input.outcome);
38
+ }
@@ -0,0 +1,72 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Convert a `number[]` embedding vector to the `Float32Array` byte
6
+ * representation stored in the `body_embeddings.embedding` BLOB column.
7
+ */
8
+ export function embeddingToBlob(vec) {
9
+ const f32 = new Float32Array(vec);
10
+ return new Uint8Array(f32.buffer);
11
+ }
12
+ /**
13
+ * Convert the raw `Uint8Array` bytes from the `body_embeddings.embedding`
14
+ * BLOB column back to a `number[]` embedding vector.
15
+ */
16
+ export function blobToEmbedding(blob) {
17
+ // SQLite BLOB columns are returned as Uint8Array; re-interpret as Float32.
18
+ const f32 = new Float32Array(blob.buffer, blob.byteOffset, blob.byteLength / 4);
19
+ return Array.from(f32);
20
+ }
21
+ /**
22
+ * Bulk-fetch cached body embeddings for a set of content hashes.
23
+ * Returns a Map keyed by `content_hash` (embedding decoded to `number[]`).
24
+ * Empty input → empty map (no query issued).
25
+ *
26
+ * If the stored `model_id` does not match `expectedModelId` the entire table
27
+ * is cleared (drop-all on model mismatch) and an empty map is returned so
28
+ * callers re-embed everything on this run.
29
+ */
30
+ export function getBodyEmbeddings(db, contentHashes, expectedModelId) {
31
+ const out = new Map();
32
+ if (contentHashes.length === 0)
33
+ return out;
34
+ // Model-id mismatch: vectors are in the wrong metric space — drop all rows.
35
+ const firstRow = db.prepare("SELECT model_id FROM body_embeddings LIMIT 1").get();
36
+ if (firstRow && firstRow.model_id !== expectedModelId) {
37
+ db.exec("DELETE FROM body_embeddings");
38
+ return out;
39
+ }
40
+ // SQLite has a ~999 param ceiling; chunk if needed.
41
+ const CHUNK = 500;
42
+ for (let i = 0; i < contentHashes.length; i += CHUNK) {
43
+ const chunk = contentHashes.slice(i, i + CHUNK);
44
+ const placeholders = chunk.map(() => "?").join(",");
45
+ const rows = db
46
+ .prepare(`SELECT content_hash, embedding FROM body_embeddings WHERE content_hash IN (${placeholders})`)
47
+ .all(...chunk);
48
+ for (const row of rows) {
49
+ out.set(row.content_hash, blobToEmbedding(row.embedding));
50
+ }
51
+ }
52
+ return out;
53
+ }
54
+ /**
55
+ * Upsert body-embedding rows in a single transaction.
56
+ * Each entry maps a `cacheHash` → `number[]` vector. `model_id` is stored
57
+ * so a future model change can trigger a drop-all purge.
58
+ */
59
+ export function upsertBodyEmbeddings(db, entries) {
60
+ if (entries.length === 0)
61
+ return;
62
+ const now = Date.now();
63
+ const stmt = db.prepare(`
64
+ INSERT OR REPLACE INTO body_embeddings (content_hash, embedding, model_id, created_at)
65
+ VALUES (?, ?, ?, ?)
66
+ `);
67
+ db.transaction(() => {
68
+ for (const { contentHash, embedding, modelId } of entries) {
69
+ stmt.run(contentHash, embeddingToBlob(embedding), modelId, now);
70
+ }
71
+ })();
72
+ }
@@ -0,0 +1,187 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import { error } from "../../core/warn.js";
5
+ /**
6
+ * Convert a raw `EventRow` from the database to the public `EventEnvelope`
7
+ * interface used throughout the events module.
8
+ */
9
+ export function eventRowToEnvelope(row) {
10
+ let metadata;
11
+ try {
12
+ const parsed = JSON.parse(row.metadata_json);
13
+ // Only attach metadata when the JSON blob is non-empty so downstream
14
+ // consumers that check `envelope.metadata !== undefined` keep working.
15
+ if (Object.keys(parsed).length > 0) {
16
+ metadata = parsed;
17
+ }
18
+ }
19
+ catch {
20
+ // Corrupt JSON in the DB — treat as no metadata.
21
+ }
22
+ return {
23
+ schemaVersion: 1,
24
+ id: row.id,
25
+ ts: row.ts,
26
+ eventType: row.event_type,
27
+ ...(row.ref !== null ? { ref: row.ref } : {}),
28
+ ...(metadata !== undefined ? { metadata } : {}),
29
+ };
30
+ }
31
+ /**
32
+ * Insert a single event. Returns the auto-assigned monotonic rowid, which
33
+ * callers can store as a "sinceId" cursor for future `readEventsSince` calls.
34
+ *
35
+ * Best-effort: mirrors the behaviour of the old `appendEvent` — errors are
36
+ * caught and logged to stderr rather than propagated so observability never
37
+ * breaks mutation.
38
+ */
39
+ export function insertEvent(db, input) {
40
+ try {
41
+ const result = db
42
+ .prepare(`INSERT INTO events (event_type, ts, ref, metadata_json)
43
+ VALUES (?, ?, ?, ?)
44
+ RETURNING id`)
45
+ .get(input.eventType, input.ts, input.ref ?? null, JSON.stringify(input.metadata ?? {}));
46
+ return result?.id;
47
+ }
48
+ catch (err) {
49
+ const message = err instanceof Error ? err.message : String(err);
50
+ error(`akm: state.db event insert failed (${message})`);
51
+ return undefined;
52
+ }
53
+ }
54
+ /**
55
+ * Read events from the database matching the filter. Returns events in
56
+ * ascending id order so consumers can process them in emission order.
57
+ *
58
+ * The returned `nextId` is the maximum id seen (or `sinceId` when no rows
59
+ * match), suitable as the next `sinceId` cursor value.
60
+ */
61
+ export function readStateEvents(db, options = {}) {
62
+ const conditions = [];
63
+ const params = [];
64
+ if (options.sinceId !== undefined && options.sinceId > 0) {
65
+ conditions.push("id > ?");
66
+ params.push(options.sinceId);
67
+ }
68
+ if (options.since) {
69
+ conditions.push("ts >= ?");
70
+ params.push(options.since);
71
+ }
72
+ if (options.type) {
73
+ conditions.push("event_type = ?");
74
+ params.push(options.type);
75
+ }
76
+ if (options.ref) {
77
+ conditions.push("ref = ?");
78
+ params.push(options.ref);
79
+ }
80
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
81
+ const rows = db
82
+ .prepare(`SELECT id, event_type, ts, ref, metadata_json FROM events ${where} ORDER BY id ASC`)
83
+ .all(...params);
84
+ const events = rows.map(eventRowToEnvelope);
85
+ const nextId = events.length > 0 ? events[events.length - 1].id : (options.sinceId ?? 0);
86
+ return { events, nextId };
87
+ }
88
+ /**
89
+ * Delete events older than `retentionDays` (default: 90). Safe to call from
90
+ * a maintenance cron; uses a single DELETE with an index-covered ts predicate.
91
+ *
92
+ * Returns the number of rows actually deleted so callers can emit an
93
+ * `events_purged` observability event. A non-positive or non-finite
94
+ * `retentionDays` is treated as "disabled" and returns 0 without scanning.
95
+ */
96
+ export function purgeOldEvents(db, retentionDays = 90) {
97
+ if (!Number.isFinite(retentionDays) || retentionDays <= 0)
98
+ return 0;
99
+ const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
100
+ const result = db.prepare("DELETE FROM events WHERE ts < ?").run(cutoff);
101
+ // bun:sqlite's run() returns { changes, lastInsertRowid }. `changes` may be
102
+ // a number or bigint depending on the underlying lib; coerce to number for
103
+ // the metadata payload.
104
+ const changes = result.changes ?? 0;
105
+ return typeof changes === "bigint" ? Number(changes) : changes;
106
+ }
107
+ /**
108
+ * Import all events from an `events.jsonl` file into the `events` table.
109
+ *
110
+ * The old byte-offset `id` is NOT preserved — the database assigns new
111
+ * monotonic integer ids. Callers that persisted a byte-offset cursor must
112
+ * discard it after migration and use the returned `maxId` as the new cursor.
113
+ *
114
+ * **Idempotency**: each line is pre-checked against the `events` table using
115
+ * `(event_type, ts, ref, metadata_json)` as the duplicate key. Lines whose
116
+ * exact tuple is already present are skipped and reported as `skipped` in the
117
+ * return value. This makes the migration safe to re-run (the v0.7→v0.8
118
+ * migration guide recommends re-running the script as a recovery path; without
119
+ * this guard, every re-run would double-import the entire event log).
120
+ *
121
+ * Duplicate detection is per-import-tuple, not a table-wide UNIQUE constraint:
122
+ * the events table has no UNIQUE constraint at runtime so that
123
+ * `appendEvent` can write multiple events with the same ts (sub-millisecond
124
+ * bursts produce identical `(event_type, ts, ref)` triples in practice). The
125
+ * SELECT-first check is scoped to the import path only.
126
+ *
127
+ * The import is wrapped in a single transaction for atomicity.
128
+ *
129
+ * @param db - Open state.db connection.
130
+ * @param jsonlPath - Absolute path to the events.jsonl file to import.
131
+ * @returns Number of rows inserted, the max id assigned, and the
132
+ * count of rows skipped because an identical event already
133
+ * existed in the table.
134
+ */
135
+ export async function importEventsJsonl(db, jsonlPath) {
136
+ const { readFileSync, existsSync } = await import("node:fs");
137
+ if (!existsSync(jsonlPath)) {
138
+ return { imported: 0, maxId: 0, skipped: 0 };
139
+ }
140
+ const text = readFileSync(jsonlPath, "utf8");
141
+ const lines = text.split("\n").filter((l) => l.trim().length > 0);
142
+ let imported = 0;
143
+ let maxId = 0;
144
+ let skipped = 0;
145
+ const insertStmt = db.prepare(`INSERT INTO events (event_type, ts, ref, metadata_json)
146
+ VALUES (?, ?, ?, ?)
147
+ RETURNING id`);
148
+ // Dedup pre-check: matches by the full tuple including metadata_json so an
149
+ // import is idempotent over identical rows but does not collide with two
150
+ // genuinely different events that happen to share (event_type, ts, ref).
151
+ //
152
+ // Uses IS for ref so two NULL refs compare equal (a plain `=` would treat
153
+ // NULL = NULL as NULL and the row would be re-inserted on every run).
154
+ const existsStmt = db.prepare(`SELECT 1 FROM events
155
+ WHERE event_type = ?
156
+ AND ts = ?
157
+ AND ref IS ?
158
+ AND metadata_json = ?
159
+ LIMIT 1`);
160
+ db.transaction(() => {
161
+ for (const line of lines) {
162
+ let parsed;
163
+ try {
164
+ parsed = JSON.parse(line);
165
+ }
166
+ catch {
167
+ continue; // skip malformed lines — same behaviour as readEvents()
168
+ }
169
+ const eventType = typeof parsed.eventType === "string" ? parsed.eventType : "unknown";
170
+ const ts = typeof parsed.ts === "string" ? parsed.ts : new Date().toISOString();
171
+ const ref = typeof parsed.ref === "string" ? parsed.ref : null;
172
+ const metadata = parsed.metadata !== undefined && typeof parsed.metadata === "object" ? JSON.stringify(parsed.metadata) : "{}";
173
+ const duplicate = existsStmt.get(eventType, ts, ref, metadata);
174
+ if (duplicate) {
175
+ skipped++;
176
+ continue;
177
+ }
178
+ const result = insertStmt.get(eventType, ts, ref, metadata);
179
+ if (result) {
180
+ imported++;
181
+ if (result.id > maxId)
182
+ maxId = result.id;
183
+ }
184
+ }
185
+ })();
186
+ return { imported, maxId, skipped };
187
+ }
@@ -0,0 +1,96 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Record (or update) one session's extract outcome. INSERT-OR-REPLACE so the
6
+ * row reflects the most recent run. The `content_hash` persisted here is what
7
+ * the NEXT run compares against (#602): a byte-identical session is skipped, a
8
+ * changed session is re-processed, and a NULL-backfill row becomes hash-stable
9
+ * after its one reprocess. `session_ended_at` is still written for
10
+ * telemetry/forensics but is no longer the skip authority.
11
+ */
12
+ export function upsertExtractedSession(db, input) {
13
+ const endedAtIso = typeof input.sessionEndedAt === "number" && Number.isFinite(input.sessionEndedAt)
14
+ ? new Date(input.sessionEndedAt).toISOString()
15
+ : null;
16
+ db.prepare(`
17
+ INSERT OR REPLACE INTO extract_sessions_seen
18
+ (harness, session_id, processed_at, session_ended_at, outcome,
19
+ candidate_count, proposal_count, rationale, source_run, metadata_json,
20
+ content_hash)
21
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
22
+ `).run(input.harness, input.sessionId, input.processedAt, endedAtIso, input.outcome, input.candidateCount, input.proposalCount, input.rationale ?? null, input.sourceRun ?? null, JSON.stringify(input.metadata ?? {}), input.contentHash);
23
+ }
24
+ /**
25
+ * Fetch a single session's last extract record, or `undefined` when the
26
+ * session has never been processed.
27
+ */
28
+ export function getExtractedSession(db, harness, sessionId) {
29
+ // bun:sqlite returns null (not undefined) when no row matches — normalize so
30
+ // callers can rely on `if (!row)` and `toBeUndefined()` equivalently.
31
+ const row = db
32
+ .prepare("SELECT * FROM extract_sessions_seen WHERE harness = ? AND session_id = ?")
33
+ .get(harness, sessionId);
34
+ return row ?? undefined;
35
+ }
36
+ /**
37
+ * Bulk-fetch session-extract status for a list of sessionIds in one harness.
38
+ * Returns a Map keyed by sessionId so callers can do O(1) lookups while
39
+ * iterating the discovery list.
40
+ */
41
+ export function getExtractedSessionsMap(db, harness, sessionIds) {
42
+ const out = new Map();
43
+ if (sessionIds.length === 0)
44
+ return out;
45
+ // SQLite has a ~999 param ceiling; chunk if a caller ever exceeds that.
46
+ const CHUNK = 500;
47
+ for (let i = 0; i < sessionIds.length; i += CHUNK) {
48
+ const chunk = sessionIds.slice(i, i + CHUNK);
49
+ const placeholders = chunk.map(() => "?").join(",");
50
+ const rows = db
51
+ .prepare(`SELECT * FROM extract_sessions_seen
52
+ WHERE harness = ? AND session_id IN (${placeholders})`)
53
+ .all(harness, ...chunk);
54
+ for (const row of rows)
55
+ out.set(row.session_id, row);
56
+ }
57
+ return out;
58
+ }
59
+ /**
60
+ * The most recent extract-run time for a harness — `MAX(processed_at)` across
61
+ * its ledger rows, as ms epoch — or `null` when the harness has never been
62
+ * extracted. Used to default the discovery window to "since the last run" so an
63
+ * intermittently-online host that was off for days still rediscovers sessions
64
+ * that ended during the gap (the content-hash ledger keeps the widened window
65
+ * free of redundant LLM cost).
66
+ */
67
+ export function getLastExtractRunAt(db, harness) {
68
+ const row = db
69
+ .prepare("SELECT MAX(processed_at) AS last FROM extract_sessions_seen WHERE harness = ?")
70
+ .get(harness);
71
+ if (!row?.last)
72
+ return null;
73
+ const ms = Date.parse(row.last);
74
+ return Number.isFinite(ms) ? ms : null;
75
+ }
76
+ /**
77
+ * Decide whether a session should be skipped because the extractor has already
78
+ * processed BYTE-IDENTICAL content (#602). The skip authority is the content
79
+ * hash, NOT `session_ended_at` — this is clock-independent, so it is immune to
80
+ * the clock-skew / out-of-order-endedAt problems that caused the Jun 11-12
81
+ * double-extract + over-throttle incident.
82
+ *
83
+ * Rules:
84
+ * - no prior row → `false` (never seen → process; AC3).
85
+ * - prior.content_hash == null → `false` (legacy / hash-less row → process
86
+ * exactly once to backfill the hash, then it becomes hash-stable; AC4).
87
+ * - hashes equal → `true` (unchanged content → skip; AC1).
88
+ * - hashes differ → `false` (changed content → re-process; AC2).
89
+ */
90
+ export function shouldSkipAlreadyExtractedSession(prior, currentContentHash) {
91
+ if (!prior)
92
+ return false;
93
+ if (prior.content_hash == null)
94
+ return false;
95
+ return prior.content_hash === currentContentHash;
96
+ }
@@ -0,0 +1,130 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import { classifyImproveAction } from "../../core/improve-types.js";
5
+ // ── Per-phase gate threshold store (Migration 012) ───────────────────────────
6
+ /**
7
+ * Read the persisted auto-tuned threshold for a gate phase.
8
+ *
9
+ * Returns `undefined` when no row exists yet (first run, or the phase has
10
+ * never been tuned). The caller falls back to the global `options.autoAccept`
11
+ * in that case.
12
+ */
13
+ export function getPhaseThreshold(db, phase) {
14
+ const row = db.prepare("SELECT threshold FROM improve_gate_thresholds WHERE phase = ?").get(phase);
15
+ return row?.threshold;
16
+ }
17
+ /**
18
+ * Persist the auto-tuned threshold for a gate phase.
19
+ * Uses INSERT OR REPLACE so the call is idempotent (upsert semantics).
20
+ */
21
+ export function persistPhaseThreshold(db, phase, threshold) {
22
+ db.prepare(`INSERT OR REPLACE INTO improve_gate_thresholds (phase, threshold, updated_at)
23
+ VALUES (?, ?, ?)`).run(phase, Math.round(threshold), Date.now());
24
+ }
25
+ /**
26
+ * Compute the cheap aggregate metrics blob from a full improve result.
27
+ *
28
+ * Pure function — no I/O. Used by {@link recordImproveRun} to populate
29
+ * `metrics_json`. Exposed for tests and for any future call site that wants
30
+ * the same aggregation logic without hitting state.db.
31
+ */
32
+ export function computeImproveRunMetrics(result) {
33
+ const plannedCount = Array.isArray(result.plannedRefs) ? result.plannedRefs.length : 0;
34
+ const actions = Array.isArray(result.actions) ? result.actions : [];
35
+ const actionsCount = actions.length;
36
+ let acceptedCount = 0;
37
+ let rejectedCount = 0;
38
+ let skippedCount = 0;
39
+ let autoAcceptedCount = 0;
40
+ let errorCount = 0;
41
+ for (const action of actions) {
42
+ // Bucketing delegated to the shared classifyImproveAction so this aggregate
43
+ // and the improve_completed event in improve.ts can never disagree, and so a
44
+ // new union variant is a compile error rather than a silent drop. Gated skips
45
+ // (cooldown / signal-delta / distill pool-delta) bucket to "skipped", NOT
46
+ // "rejected" — only a guard-rejected produced change is a true rejection.
47
+ // "noop" (memory-prune) is intentionally counted in none of the buckets.
48
+ switch (classifyImproveAction(action.mode)) {
49
+ case "accepted":
50
+ acceptedCount++;
51
+ break;
52
+ case "rejected":
53
+ rejectedCount++;
54
+ break;
55
+ case "skipped":
56
+ skippedCount++;
57
+ break;
58
+ case "error":
59
+ errorCount++;
60
+ break;
61
+ case "noop":
62
+ break;
63
+ }
64
+ // Legacy: pre-gate action results may carry autoAccepted: true (reflect path).
65
+ const r = action.result;
66
+ if (r && r.autoAccepted === true)
67
+ autoAcceptedCount++;
68
+ }
69
+ // Add gate-promoted count from the unified PostPhaseAutoAcceptGate (all phases).
70
+ autoAcceptedCount += result.gateAutoAcceptedCount ?? 0;
71
+ return { plannedCount, actionsCount, acceptedCount, rejectedCount, skippedCount, autoAcceptedCount, errorCount };
72
+ }
73
+ /**
74
+ * Insert a single improve-run row into `improve_runs`. Uses parameterised SQL.
75
+ *
76
+ * Idempotency: the table's PRIMARY KEY is `id`, so re-running with the same
77
+ * runId would error. Callers mint a fresh runId per invocation via
78
+ * {@link buildImproveRunId} so this is not a concern in practice — but the
79
+ * default behaviour is INSERT (not REPLACE) so accidental dupes surface as
80
+ * a SQLite constraint error rather than silently overwriting a prior record.
81
+ *
82
+ * The `metrics` parameter defaults to the output of
83
+ * {@link computeImproveRunMetrics} when not supplied. Pass an explicit
84
+ * `metrics` object to override the derivation (e.g. tests).
85
+ */
86
+ export function recordImproveRun(db, input) {
87
+ const metricsObj = input.metrics ?? computeImproveRunMetrics(input.result);
88
+ db.prepare(`
89
+ INSERT INTO improve_runs
90
+ (id, started_at, completed_at, stash_dir, dry_run, profile,
91
+ scope_mode, scope_value, guidance, ok, result_json, metrics_json, metadata_json)
92
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
93
+ `).run(input.id, input.startedAt, input.completedAt, input.stashDir, input.dryRun ? 1 : 0, input.profile, input.scopeMode, input.scopeValue, input.guidance, input.ok ? 1 : 0, JSON.stringify(input.result), JSON.stringify(metricsObj), JSON.stringify(input.metadata ?? {}));
94
+ }
95
+ /**
96
+ * Read real (non-dry-run) improve_runs rows whose `started_at` falls in the
97
+ * window `[since, until)`. When `until` is omitted the window is open-ended
98
+ * (`started_at >= since`). Rows are returned newest-first (`ORDER BY
99
+ * started_at DESC`).
100
+ *
101
+ * Owns the SQL formerly inlined in commands/health.ts (`loadImproveRunRows`).
102
+ * The `dry_run = 0` filter is first-class so dry-run probes never pollute
103
+ * productivity audits.
104
+ *
105
+ * Connection-lifetime rule (WS5): `.all()` fully materializes the result set
106
+ * into a plain array before returning — no live cursor escapes the caller's
107
+ * `openStateDatabase` scope.
108
+ */
109
+ export function queryImproveRuns(db, since, until) {
110
+ const sql = until
111
+ ? "SELECT id, started_at, completed_at, ok, scope_mode, scope_value, result_json FROM improve_runs WHERE started_at >= ? AND started_at < ? AND dry_run = 0 ORDER BY started_at DESC"
112
+ : "SELECT id, started_at, completed_at, ok, scope_mode, scope_value, result_json FROM improve_runs WHERE started_at >= ? AND dry_run = 0 ORDER BY started_at DESC";
113
+ return (until ? db.prepare(sql).all(since, until) : db.prepare(sql).all(since));
114
+ }
115
+ /**
116
+ * Delete improve_runs rows older than `retentionDays` (default: 90). Mirrors
117
+ * {@link purgeOldEvents} — same default, same return shape (number of rows
118
+ * actually deleted), same disabled-when-non-finite semantics.
119
+ *
120
+ * Safe to call from the improve post-loop maintenance pass alongside
121
+ * `purgeOldEvents(db, retentionDays)`.
122
+ */
123
+ export function purgeOldImproveRuns(db, retentionDays = 90) {
124
+ if (!Number.isFinite(retentionDays) || retentionDays <= 0)
125
+ return 0;
126
+ const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
127
+ const result = db.prepare("DELETE FROM improve_runs WHERE started_at < ?").run(cutoff);
128
+ const changes = result.changes ?? 0;
129
+ return typeof changes === "bigint" ? Number(changes) : changes;
130
+ }
@@ -1,8 +1,9 @@
1
1
  // This Source Code Form is subject to the terms of the Mozilla Public
2
2
  // License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
- import { closeDatabase, openExistingDatabase } from "../../indexer/db/db.js";
4
+ import { openExistingDatabase } from "../../indexer/db/db.js";
5
5
  import { resolveStorageLocations } from "../locations.js";
6
+ import { withManagedDb } from "../managed-db.js";
6
7
  /**
7
8
  * Busy-timeout (ms) for read-path telemetry writers. Small on purpose: a
8
9
  * usage-event insert contending with a background reindex should be dropped,
@@ -39,14 +40,10 @@ export const TELEMETRY_BUSY_TIMEOUT_MS = 250;
39
40
  * @returns Whatever `fn` returns.
40
41
  */
41
42
  export function withIndexDb(fn, opts) {
42
- const db = openExistingDatabase(resolveStorageLocations().indexDb);
43
- try {
43
+ return withManagedDb(() => openExistingDatabase(resolveStorageLocations().indexDb), (db) => {
44
44
  if (opts?.busyTimeoutMs !== undefined) {
45
45
  db.exec(`PRAGMA busy_timeout = ${Math.max(0, Math.floor(opts.busyTimeoutMs))}`);
46
46
  }
47
47
  return fn(db);
48
- }
49
- finally {
50
- closeDatabase(db);
51
- }
48
+ });
52
49
  }