akm-cli 0.9.0-beta.54 → 0.9.0-beta.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/cli.js +5 -3
  2. package/dist/commands/agent/contribute-cli.js +2 -3
  3. package/dist/commands/env/env-cli.js +187 -202
  4. package/dist/commands/env/secret-cli.js +109 -121
  5. package/dist/commands/feedback-cli.js +152 -155
  6. package/dist/commands/health/advisories.js +151 -0
  7. package/dist/commands/health/improve-metrics.js +754 -0
  8. package/dist/commands/health/llm-usage.js +65 -0
  9. package/dist/commands/health/md-report.js +103 -0
  10. package/dist/commands/health/metrics.js +278 -0
  11. package/dist/commands/health/task-runs.js +135 -0
  12. package/dist/commands/health/types.js +18 -0
  13. package/dist/commands/health/windows.js +196 -0
  14. package/dist/commands/health.js +14 -1624
  15. package/dist/commands/improve/anti-collapse.js +170 -0
  16. package/dist/commands/improve/collapse-detector.js +3 -2
  17. package/dist/commands/improve/consolidate.js +636 -633
  18. package/dist/commands/improve/dedup.js +1 -1
  19. package/dist/commands/improve/distill/content-repair.js +202 -0
  20. package/dist/commands/improve/distill/promote-memory.js +228 -0
  21. package/dist/commands/improve/distill/quality-gate.js +233 -0
  22. package/dist/commands/improve/distill-guards.js +127 -0
  23. package/dist/commands/improve/distill.js +49 -575
  24. package/dist/commands/improve/extract-cli.js +74 -76
  25. package/dist/commands/improve/extract.js +6 -4
  26. package/dist/commands/improve/hot-probation.js +45 -0
  27. package/dist/commands/improve/improve-auto-accept.js +3 -2
  28. package/dist/commands/improve/improve-cli.js +14 -13
  29. package/dist/commands/improve/improve-result-file.js +2 -1
  30. package/dist/commands/improve/improve.js +6 -5
  31. package/dist/commands/improve/loop-stages.js +19 -21
  32. package/dist/commands/improve/preparation.js +4 -2
  33. package/dist/commands/improve/procedural.js +10 -31
  34. package/dist/commands/improve/recombine.js +19 -43
  35. package/dist/commands/improve/reflect.js +1 -1
  36. package/dist/commands/improve/schema-similarity-gate.js +168 -0
  37. package/dist/commands/improve/shared.js +48 -0
  38. package/dist/commands/observability-cli.js +4 -4
  39. package/dist/commands/proposal/drain-policies.js +2 -2
  40. package/dist/commands/proposal/drain.js +1 -1
  41. package/dist/commands/proposal/legacy-import.js +115 -0
  42. package/dist/commands/proposal/proposal-cli.js +3 -3
  43. package/dist/commands/proposal/proposal.js +2 -1
  44. package/dist/commands/proposal/propose.js +1 -1
  45. package/dist/commands/proposal/repository.js +829 -0
  46. package/dist/commands/proposal/validators/proposals.js +5 -920
  47. package/dist/commands/read/remember-cli.js +132 -137
  48. package/dist/commands/read/search-cli.js +1 -1
  49. package/dist/commands/registry-cli.js +76 -87
  50. package/dist/commands/sources/add-cli.js +90 -94
  51. package/dist/commands/sources/history.js +1 -1
  52. package/dist/commands/sources/schema-repair.js +1 -1
  53. package/dist/commands/sources/sources-cli.js +3 -3
  54. package/dist/commands/sources/stash-cli.js +1 -1
  55. package/dist/commands/tasks/tasks-cli.js +1 -2
  56. package/dist/commands/wiki-cli.js +2 -3
  57. package/dist/core/common.js +3 -3
  58. package/dist/core/config/config-schema.js +6 -0
  59. package/dist/core/deep-merge.js +38 -0
  60. package/dist/core/events.js +2 -1
  61. package/dist/core/logs-db.js +8 -13
  62. package/dist/core/paths.js +14 -14
  63. package/dist/core/state-db.js +13 -1140
  64. package/dist/indexer/db/db.js +66 -709
  65. package/dist/indexer/db/entry-mapper.js +41 -0
  66. package/dist/indexer/db/schema.js +516 -0
  67. package/dist/indexer/feedback/utility-policy.js +85 -0
  68. package/dist/indexer/graph/graph-extraction.js +2 -1
  69. package/dist/indexer/index-writer-lock.js +9 -0
  70. package/dist/indexer/indexer.js +78 -23
  71. package/dist/indexer/search/fts-query.js +51 -0
  72. package/dist/integrations/agent/spawn.js +15 -66
  73. package/dist/output/text/helpers.js +13 -0
  74. package/dist/scripts/migrate-storage.js +6891 -7436
  75. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +44 -43
  76. package/dist/setup/legacy-config.js +106 -0
  77. package/dist/setup/prompt.js +57 -0
  78. package/dist/setup/providers.js +14 -0
  79. package/dist/setup/semantic-assets.js +124 -0
  80. package/dist/setup/setup.js +24 -1607
  81. package/dist/setup/steps/connection.js +734 -0
  82. package/dist/setup/steps/output.js +31 -0
  83. package/dist/setup/steps/platforms.js +124 -0
  84. package/dist/setup/steps/semantic.js +27 -0
  85. package/dist/setup/steps/sources.js +222 -0
  86. package/dist/setup/steps/stashdir.js +42 -0
  87. package/dist/setup/steps/tasks.js +152 -0
  88. package/dist/storage/repositories/canaries-repository.js +107 -0
  89. package/dist/storage/repositories/consolidation-repository.js +38 -0
  90. package/dist/storage/repositories/embeddings-repository.js +72 -0
  91. package/dist/storage/repositories/events-repository.js +187 -0
  92. package/dist/storage/repositories/extract-sessions-repository.js +96 -0
  93. package/dist/storage/repositories/improve-runs-repository.js +130 -0
  94. package/dist/storage/repositories/index-db.js +4 -7
  95. package/dist/storage/repositories/proposals-repository.js +220 -0
  96. package/dist/storage/repositories/recombine-repository.js +213 -0
  97. package/dist/storage/repositories/task-history-repository.js +93 -0
  98. package/dist/storage/sqlite-pragmas.js +3 -3
  99. package/dist/tasks/runner.js +2 -1
  100. package/package.json +1 -1
  101. package/dist/commands/improve/homeostatic.js +0 -497
@@ -0,0 +1,41 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Shared `entries`-row projection + mapper, extracted from indexer/db/db.ts.
6
+ *
7
+ * Centralizes the one canonical `entries` SELECT column list and the
8
+ * JSON-parse-guarded row → {@link DbIndexedEntry} mapping that several queries
9
+ * used to reimplement. Corrupt `entry_json` rows are skipped (warn once) rather
10
+ * than crashing the caller.
11
+ */
12
+ import { warn } from "../../core/warn.js";
13
+ /**
14
+ * Canonical column list for reading a full indexed entry from the `entries`
15
+ * table, in the order {@link rowToIndexedEntry} expects.
16
+ */
17
+ export const ENTRY_COLUMNS = "id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text";
18
+ /**
19
+ * Map one raw `entries` row to a {@link DbIndexedEntry}, parsing `entry_json`.
20
+ * Returns `null` (and warns, tagged with `context`) when the JSON is corrupt so
21
+ * callers can skip the row instead of crashing.
22
+ */
23
+ export function rowToIndexedEntry(row, context) {
24
+ let entry;
25
+ try {
26
+ entry = JSON.parse(row.entry_json);
27
+ }
28
+ catch {
29
+ warn(`[db] ${context}: skipping entry id=${row.id} — corrupt entry_json`);
30
+ return null;
31
+ }
32
+ return {
33
+ id: row.id,
34
+ entryKey: row.entry_key,
35
+ dirPath: row.dir_path,
36
+ filePath: row.file_path,
37
+ stashDir: row.stash_dir,
38
+ entry,
39
+ searchText: row.search_text,
40
+ };
41
+ }
@@ -0,0 +1,516 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * index.db schema, version stamps, and targeted migrations — extracted from
6
+ * indexer/db/db.ts. This isolates the one genuinely risky area (schema
7
+ * evolution) from the CRUD/FTS/vector queries that stay in db.ts.
8
+ *
9
+ * The meta accessors, embedding purge, and vec-availability probe that
10
+ * `ensureSchema` leans on remain in db.ts (they are part of the database
11
+ * lifecycle) and are imported back here.
12
+ */
13
+ import { bestEffort } from "../../core/best-effort.js";
14
+ import { warn } from "../../core/warn.js";
15
+ import { ensureUsageEventsSchema } from "../usage/usage-events.js";
16
+ import { getMeta, isVecAvailable, purgeEmbeddings, setMeta } from "./db.js";
17
+ // ── Constants ───────────────────────────────────────────────────────────────
18
+ // NOTE: schema changes are additive. DB_VERSION is a forensic stamp only — it
19
+ // no longer gates any destructive path (the old nuclear drop-and-rebuild was
20
+ // removed; index.db's idempotent CREATE … IF NOT EXISTS schema converges any
21
+ // older/partial DB forward without dropping data). Graph re-keying uses a
22
+ // TARGETED, graph-only migration (migrateGraphFilesSchema) — the model for any
23
+ // incompatible change: migrate in place, never wipe the whole index.
24
+ export const DB_VERSION = 17;
25
+ export const EMBEDDING_DIM = 384;
26
+ // #624-P1: graph_files re-keyed to (stash_root, file_path, body_hash). Bumped 3→4
27
+ // as a marker; the actual migration is the targeted drop in migrateGraphFilesSchema.
28
+ export const GRAPH_SCHEMA_VERSION = 4;
29
+ // ── Schema ──────────────────────────────────────────────────────────────────
30
+ /**
31
+ * DDL for the `registry_index_cache` table. This table lives in index.db
32
+ * (managed by this module), so its DDL belongs here next to the `ensureSchema`
33
+ * that applies it — not in state-db.ts.
34
+ *
35
+ * Created with CREATE TABLE IF NOT EXISTS so it is safe to call inside
36
+ * `ensureSchema()`. Caches the result of resolving and fetching remote registry
37
+ * stash indexes so `akm search` does not hit the network on every invocation.
38
+ *
39
+ * Indexed (query) columns:
40
+ * registry_url TEXT PK — canonical URL of the registry; cache key.
41
+ * fetched_at TEXT — ISO-8601; used to detect stale entries (TTL).
42
+ * etag TEXT — HTTP ETag for conditional GET (If-None-Match).
43
+ * last_modified TEXT — HTTP Last-Modified for conditional GET.
44
+ *
45
+ * Non-indexed payload:
46
+ * index_json TEXT — JSON blob of the fetched registry index document.
47
+ *
48
+ * ADD COLUMN extension points (future migrations):
49
+ * ALTER TABLE registry_index_cache ADD COLUMN schema_version INTEGER DEFAULT 1;
50
+ * ALTER TABLE registry_index_cache ADD COLUMN kit_count INTEGER DEFAULT NULL;
51
+ * ALTER TABLE registry_index_cache ADD COLUMN error_message TEXT DEFAULT NULL;
52
+ */
53
+ const REGISTRY_INDEX_CACHE_DDL = `
54
+ CREATE TABLE IF NOT EXISTS registry_index_cache (
55
+ registry_url TEXT PRIMARY KEY,
56
+ fetched_at TEXT NOT NULL,
57
+ etag TEXT,
58
+ last_modified TEXT,
59
+ index_json TEXT NOT NULL DEFAULT '{}'
60
+ );
61
+
62
+ CREATE INDEX IF NOT EXISTS idx_registry_cache_fetched
63
+ ON registry_index_cache(fetched_at);
64
+ `;
65
+ export function ensureSchema(db, embeddingDim) {
66
+ // Create meta table first so we can check version
67
+ db.exec(`
68
+ CREATE TABLE IF NOT EXISTS index_meta (
69
+ key TEXT PRIMARY KEY,
70
+ value TEXT NOT NULL
71
+ );
72
+ `);
73
+ // index.db is a fully regenerable derived cache, so its schema is built
74
+ // idempotently below: every table is CREATE … IF NOT EXISTS and column
75
+ // additions go through guarded ALTERs (ensureDerivedFromColumn) and targeted
76
+ // migrations (migrateGraphFilesSchema / migrateGraphDataFromLegacy). Opening a
77
+ // database with an older or partial schema converges it forward WITHOUT ever
78
+ // dropping data — there is intentionally no "nuclear drop the whole index on a
79
+ // DB_VERSION mismatch" path (a destructive design the regenerable index never
80
+ // needed, and whose pre-drop data-dir backup it required). A genuinely
81
+ // incompatible change is handled by an additive/targeted migration; the few
82
+ // derived tables that ever must be rebuilt are regenerated by `akm index`.
83
+ db.exec(`
84
+ CREATE TABLE IF NOT EXISTS entries (
85
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
86
+ entry_key TEXT NOT NULL UNIQUE,
87
+ dir_path TEXT NOT NULL,
88
+ file_path TEXT NOT NULL,
89
+ stash_dir TEXT NOT NULL,
90
+ entry_json TEXT NOT NULL,
91
+ search_text TEXT NOT NULL,
92
+ entry_type TEXT NOT NULL,
93
+ derived_from TEXT
94
+ );
95
+
96
+ CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
97
+ CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
98
+ CREATE INDEX IF NOT EXISTS idx_entries_file_path ON entries(file_path);
99
+ `);
100
+ // Phase 5A / DB v17: backfill `derived_from` column + index on databases
101
+ // that were created at v17 fresh OR carry a partial v17 schema (a DB whose
102
+ // `index_meta.version` was bumped to 17 but whose `entries` table still
103
+ // lacks the column — this happens when a previous v17 binary opened a
104
+ // pre-v17 DB without taking the upgrade path because no version mismatch
105
+ // was seen at boot). The PRAGMA-then-ALTER guard runs unconditionally so
106
+ // both fresh and partial schemas converge. The CREATE INDEX for
107
+ // `derived_from` MUST run after this helper so we never reference a
108
+ // column that has not yet been added on partial schemas.
109
+ ensureDerivedFromColumn(db);
110
+ // Validated WorkflowDocument JSON, one row per indexed workflow entry.
111
+ // Pure index data — fully rebuilt on each `akm index`. ON DELETE CASCADE
112
+ // means clearing entries (full rebuild or per-dir delete) drops these too.
113
+ db.exec(`
114
+ CREATE TABLE IF NOT EXISTS workflow_documents (
115
+ entry_id INTEGER PRIMARY KEY REFERENCES entries(id) ON DELETE CASCADE,
116
+ schema_version INTEGER NOT NULL,
117
+ document_json TEXT NOT NULL,
118
+ source_path TEXT NOT NULL,
119
+ source_hash TEXT NOT NULL,
120
+ updated_at TEXT NOT NULL
121
+ );
122
+
123
+ CREATE INDEX IF NOT EXISTS idx_workflow_documents_source_path
124
+ ON workflow_documents(source_path);
125
+ `);
126
+ // Set version immediately after table creation so a crash before the end of
127
+ // ensureSchema() does not leave the database in a versionless state on next open.
128
+ const versionAfterCreate = getMeta(db, "version");
129
+ if (!versionAfterCreate) {
130
+ setMeta(db, "version", String(DB_VERSION));
131
+ }
132
+ // BLOB-based embedding storage (always available, no sqlite-vec needed)
133
+ db.exec(`
134
+ CREATE TABLE IF NOT EXISTS embeddings (
135
+ id INTEGER PRIMARY KEY,
136
+ embedding BLOB NOT NULL,
137
+ FOREIGN KEY (id) REFERENCES entries(id)
138
+ );
139
+ `);
140
+ // FTS5 table — multi-column with per-field weighting via bm25()
141
+ const ftsExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_fts'").get();
142
+ if (!ftsExists) {
143
+ db.exec(`
144
+ CREATE VIRTUAL TABLE entries_fts USING fts5(
145
+ entry_id UNINDEXED,
146
+ name,
147
+ description,
148
+ tags,
149
+ hints,
150
+ content,
151
+ tokenize='porter unicode61'
152
+ );
153
+ `);
154
+ }
155
+ // Usage events table — created by ensureUsageEventsSchema() at runtime.
156
+ // Utility scores table (aggregated per-entry utility metrics)
157
+ db.exec(`
158
+ CREATE TABLE IF NOT EXISTS utility_scores (
159
+ entry_id INTEGER PRIMARY KEY,
160
+ utility REAL NOT NULL DEFAULT 0,
161
+ show_count INTEGER NOT NULL DEFAULT 0,
162
+ search_count INTEGER NOT NULL DEFAULT 0,
163
+ select_rate REAL NOT NULL DEFAULT 0,
164
+ last_used_at TEXT,
165
+ updated_at TEXT NOT NULL DEFAULT (datetime('now')),
166
+ FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
167
+ );
168
+ `);
169
+ // Per-project scoped utility scores — tracks usage per (entry, cwd-anchor)
170
+ // so assets useful in project A don't pollute rankings in project B.
171
+ // The global utility_scores table is preserved as a fallback / cold-start aid.
172
+ db.exec(`
173
+ CREATE TABLE IF NOT EXISTS utility_scores_scoped (
174
+ entry_id INTEGER NOT NULL,
175
+ scope_key TEXT NOT NULL,
176
+ utility REAL NOT NULL DEFAULT 0,
177
+ last_used_at INTEGER NOT NULL,
178
+ PRIMARY KEY (entry_id, scope_key)
179
+ );
180
+ CREATE INDEX IF NOT EXISTS idx_utility_scores_scoped_entry_id
181
+ ON utility_scores_scoped(entry_id);
182
+ `);
183
+ db.exec(`
184
+ CREATE TABLE IF NOT EXISTS index_dir_state (
185
+ dir_path TEXT PRIMARY KEY,
186
+ file_set_hash TEXT NOT NULL,
187
+ file_mtime_max_ms REAL NOT NULL,
188
+ reason TEXT NOT NULL,
189
+ updated_at TEXT NOT NULL
190
+ );
191
+ `);
192
+ // LLM enrichment result cache. Stores a SHA-256 body hash and the JSON
193
+ // result for each asset so that subsequent `akm index --enrich` runs can
194
+ // skip the LLM call when the body hasn't changed. The cache is keyed by
195
+ // a stable asset_ref string (e.g. the absolute file path for graph/memory
196
+ // passes, or `entryKey:passId` for the metadata-enhance pass).
197
+ // Entries are cleaned up when assets are removed or --re-enrich is used.
198
+ db.exec(`
199
+ CREATE TABLE IF NOT EXISTS llm_enrichment_cache (
200
+ asset_ref TEXT NOT NULL,
201
+ cache_variant TEXT NOT NULL,
202
+ body_hash TEXT NOT NULL,
203
+ result_json TEXT NOT NULL,
204
+ updated_at INTEGER NOT NULL,
205
+ PRIMARY KEY (asset_ref, cache_variant)
206
+ );
207
+
208
+ CREATE INDEX IF NOT EXISTS idx_llm_cache_updated
209
+ ON llm_enrichment_cache(updated_at);
210
+ `);
211
+ // Graph extraction tables — schema v4 ((stash_root, file_path, body_hash) PK).
212
+ //
213
+ // graph_files is self-keyed on (stash_root, file_path, body_hash) and is NO
214
+ // LONGER tied to entries.id. This is the #624-P1 win: deleting and
215
+ // re-inserting an entries row during a reindex no longer cascade-wipes the
216
+ // extracted graph — as long as the file's body_hash is unchanged, the graph
217
+ // data survives. body_hash is part of the PK so a content change yields a
218
+ // distinct key; a UNIQUE index on (stash_root, file_path) still enforces
219
+ // exactly one graph_files row per path (delete-then-insert on a hash change).
220
+ //
221
+ // graph_file_entities and graph_file_relations carry (stash_root, file_path,
222
+ // body_hash) and declare a composite FK -> graph_files ON DELETE CASCADE so
223
+ // child rows are removed when a graph_files row is replaced.
224
+ //
225
+ // #624-P1 targeted migration: an existing DB may still hold the OLD graph_files
226
+ // (entry_id PK). SQLite can't ALTER a primary key, so we RENAME the 3 graph
227
+ // tables aside (→ *_legacy) here — ONLY the graph tables, never the index/
228
+ // embeddings — then the CREATE block below builds the new shape, then
229
+ // migrateGraphDataFromLegacy() copies the data across so the graph is PRESERVED
230
+ // (not re-extracted).
231
+ migrateGraphFilesSchema(db);
232
+ db.exec(`
233
+ CREATE TABLE IF NOT EXISTS graph_meta (
234
+ stash_root TEXT PRIMARY KEY,
235
+ schema_version INTEGER NOT NULL,
236
+ generated_at TEXT NOT NULL,
237
+ considered_files INTEGER NOT NULL DEFAULT 0,
238
+ extracted_files INTEGER NOT NULL DEFAULT 0,
239
+ entity_count INTEGER NOT NULL DEFAULT 0,
240
+ relation_count INTEGER NOT NULL DEFAULT 0,
241
+ extraction_coverage REAL NOT NULL DEFAULT 0,
242
+ density REAL NOT NULL DEFAULT 0,
243
+ extractor_id TEXT,
244
+ extraction_run_id TEXT,
245
+ model TEXT,
246
+ prompt_version TEXT,
247
+ batch_size INTEGER,
248
+ cache_hits INTEGER NOT NULL DEFAULT 0,
249
+ cache_misses INTEGER NOT NULL DEFAULT 0,
250
+ truncation_count INTEGER NOT NULL DEFAULT 0,
251
+ failure_count INTEGER NOT NULL DEFAULT 0
252
+ );
253
+
254
+ CREATE TABLE IF NOT EXISTS graph_files (
255
+ stash_root TEXT NOT NULL,
256
+ file_path TEXT NOT NULL,
257
+ file_order INTEGER NOT NULL,
258
+ file_type TEXT NOT NULL,
259
+ body_hash TEXT NOT NULL,
260
+ confidence REAL,
261
+ status TEXT NOT NULL DEFAULT 'extracted',
262
+ reason TEXT,
263
+ extraction_run_id TEXT,
264
+ PRIMARY KEY (stash_root, file_path, body_hash)
265
+ );
266
+
267
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_files_path
268
+ ON graph_files(stash_root, file_path);
269
+
270
+ CREATE INDEX IF NOT EXISTS idx_graph_files_stash_order
271
+ ON graph_files(stash_root, file_order);
272
+
273
+ CREATE TABLE IF NOT EXISTS graph_file_entities (
274
+ stash_root TEXT NOT NULL,
275
+ file_path TEXT NOT NULL,
276
+ body_hash TEXT NOT NULL,
277
+ entity_order INTEGER NOT NULL,
278
+ entity_norm TEXT NOT NULL,
279
+ entity TEXT NOT NULL,
280
+ PRIMARY KEY (stash_root, file_path, body_hash, entity_order),
281
+ FOREIGN KEY (stash_root, file_path, body_hash)
282
+ REFERENCES graph_files(stash_root, file_path, body_hash) ON DELETE CASCADE
283
+ );
284
+
285
+ CREATE INDEX IF NOT EXISTS idx_graph_file_entities_entity_norm
286
+ ON graph_file_entities(stash_root, entity_norm);
287
+
288
+ CREATE TABLE IF NOT EXISTS graph_file_relations (
289
+ stash_root TEXT NOT NULL,
290
+ file_path TEXT NOT NULL,
291
+ body_hash TEXT NOT NULL,
292
+ relation_order INTEGER NOT NULL,
293
+ from_entity_norm TEXT NOT NULL,
294
+ from_entity TEXT NOT NULL,
295
+ to_entity_norm TEXT NOT NULL,
296
+ to_entity TEXT NOT NULL,
297
+ relation_type TEXT,
298
+ confidence REAL,
299
+ PRIMARY KEY (stash_root, file_path, body_hash, relation_order),
300
+ FOREIGN KEY (stash_root, file_path, body_hash)
301
+ REFERENCES graph_files(stash_root, file_path, body_hash) ON DELETE CASCADE
302
+ );
303
+
304
+ -- #624-P3: lazy graph-extraction queue. Standalone table (NO FK to
305
+ -- graph_files — a queued file by definition has no graph row yet).
306
+ -- Idempotent on (stash_root, file_path); drained highest-priority-first.
307
+ -- CREATE TABLE IF NOT EXISTS is the forward migration (no DB_VERSION bump).
308
+ CREATE TABLE IF NOT EXISTS graph_extraction_queue (
309
+ stash_root TEXT NOT NULL,
310
+ file_path TEXT NOT NULL,
311
+ body_hash TEXT NOT NULL,
312
+ queued_at TEXT NOT NULL DEFAULT (datetime('now')),
313
+ priority INTEGER NOT NULL DEFAULT 0,
314
+ PRIMARY KEY (stash_root, file_path)
315
+ );
316
+
317
+ CREATE INDEX IF NOT EXISTS idx_graph_extraction_queue_drain
318
+ ON graph_extraction_queue(stash_root, priority DESC, queued_at);
319
+ `);
320
+ // #624-P1 migration step 2: copy any renamed-aside legacy graph data into the
321
+ // new-shape tables (just created above), then drop the legacy tables. No-op
322
+ // unless migrateGraphFilesSchema renamed a legacy graph_files this open.
323
+ migrateGraphDataFromLegacy(db);
324
+ // FTS-dirty queue. Created here (not lazily on first upsert) so the
325
+ // per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
326
+ // every call — that DDL would fire thousands of times during a full
327
+ // index. See `markFtsDirty` and `rebuildFts({ incremental: true })`.
328
+ db.exec(`
329
+ CREATE TABLE IF NOT EXISTS entries_fts_dirty (
330
+ entry_id INTEGER PRIMARY KEY
331
+ );
332
+ `);
333
+ // sqlite-vec table
334
+ //
335
+ // Dimension contract:
336
+ // - When `embeddingDim` is `undefined`, the caller did NOT request a
337
+ // specific dim. Do not touch `index_meta.embeddingDim` and do not run
338
+ // the dim-change wipe — fall back to the stored dim (or the static
339
+ // default) only when we have to materialise the vec table for the
340
+ // first time. Without this guard, registry-side and other dim-unaware
341
+ // `openDatabase()` callers would silently overwrite the dim-aware
342
+ // improve/index value and oscillate the stored dim.
343
+ // - When `embeddingDim` is a number, the caller explicitly asked for
344
+ // that dim and owns the dim-change/backup/wipe semantics.
345
+ const dimExplicit = embeddingDim !== undefined;
346
+ const effectiveDim = embeddingDim ?? (Number(getMeta(db, "embeddingDim")) || EMBEDDING_DIM);
347
+ if (isVecAvailable(db)) {
348
+ // Check if stored embedding dimension differs from configured one
349
+ if (dimExplicit) {
350
+ const storedDim = getMeta(db, "embeddingDim");
351
+ if (storedDim && storedDim !== String(embeddingDim)) {
352
+ // Stored vectors are incompatible with the new dimension. Drop the vec
353
+ // table so the block below recreates it at the new width; the BLOB rows
354
+ // go too. Regenerable from markdown — re-embedded by the next index.
355
+ purgeEmbeddings(db, { dropVecTable: true });
356
+ }
357
+ }
358
+ const vecExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_vec'").get();
359
+ if (!vecExists) {
360
+ if (!Number.isInteger(effectiveDim) || effectiveDim <= 0 || effectiveDim > 4096) {
361
+ throw new Error(`Invalid embedding dimension: ${effectiveDim}`);
362
+ }
363
+ db.exec(`
364
+ CREATE VIRTUAL TABLE entries_vec USING vec0(
365
+ id INTEGER PRIMARY KEY,
366
+ embedding FLOAT[${effectiveDim}]
367
+ );
368
+ `);
369
+ }
370
+ if (dimExplicit) {
371
+ setMeta(db, "embeddingDim", String(embeddingDim));
372
+ }
373
+ }
374
+ else {
375
+ // Also purge BLOB embeddings on dimension change (JS fallback path).
376
+ // When sqlite-vec is unavailable, entries_vec doesn't exist but the BLOB
377
+ // embeddings table still stores vectors. If the configured dimension
378
+ // changes, those stored BLOBs become silently incompatible.
379
+ if (dimExplicit) {
380
+ const storedDim = getMeta(db, "embeddingDim");
381
+ if (storedDim && storedDim !== String(embeddingDim)) {
382
+ // JS-fallback path: no vec table, just clear the stale BLOB vectors.
383
+ purgeEmbeddings(db);
384
+ }
385
+ setMeta(db, "embeddingDim", String(embeddingDim));
386
+ }
387
+ }
388
+ // Usage telemetry table
389
+ ensureUsageEventsSchema(db);
390
+ // Registry index cache table — caches remote registry index documents so
391
+ // `akm search` does not hit the network on every invocation.
392
+ db.exec(REGISTRY_INDEX_CACHE_DDL);
393
+ }
394
+ /**
395
+ * Phase 5A / DB v17 schema guard.
396
+ *
397
+ * Ensures the `entries.derived_from` column + index exist on the open
398
+ * connection. Called from `ensureSchema()` after the entries CREATE so that
399
+ * legacy databases (created against a pre-v17 binary) still gain the new column
400
+ * without data loss. Idempotent: a `PRAGMA table_info` lookup gates the ALTER.
401
+ */
402
+ function ensureDerivedFromColumn(db) {
403
+ bestEffort(() => {
404
+ const cols = db.prepare("PRAGMA table_info(entries)").all();
405
+ const hasColumn = cols.some((c) => c.name === "derived_from");
406
+ if (!hasColumn) {
407
+ db.exec("ALTER TABLE entries ADD COLUMN derived_from TEXT");
408
+ }
409
+ // Index creation is idempotent on its own; safe to call unconditionally.
410
+ db.exec("CREATE INDEX IF NOT EXISTS idx_entries_derived_from ON entries(derived_from)");
411
+ }, "entries table may not exist on a brand-new DB before CREATE — caller is responsible");
412
+ }
413
+ /**
414
+ * Returns true when a table exists in the current database.
415
+ */
416
+ function tableExists(db, name) {
417
+ const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1").get(name);
418
+ return row !== undefined && row !== null;
419
+ }
420
+ /**
421
+ * #624-P1 targeted graph-schema migration — STEP 1 of 2 (rename).
422
+ *
423
+ * graph_files was re-keyed from `entry_id INTEGER PRIMARY KEY REFERENCES
424
+ * entries(id)` to a self-contained `(stash_root, file_path, body_hash)` PK.
425
+ * SQLite cannot ALTER a primary key, so an existing DB carrying the OLD shape
426
+ * has its 3 graph tables RENAMED to `*_legacy` here; ensureSchema's CREATE block
427
+ * then builds the new-shape tables, and {@link migrateGraphDataFromLegacy} COPIES
428
+ * the data across before dropping the legacy tables. The graph is preserved —
429
+ * NOT re-extracted (re-extraction is ~19s/file of LLM work).
430
+ *
431
+ * Crucially this is GRAPH-SCOPED: it touches ONLY the graph tables, never the
432
+ * index / embeddings / enrichment cache. So users keep their (expensive)
433
+ * embeddings instead of being forced into a full re-embed by a DB_VERSION bump.
434
+ *
435
+ * Detection: the old schema has an `entry_id` column on graph_files. Fresh DBs
436
+ * (no graph_files yet) and already-migrated DBs (no entry_id column) are no-ops.
437
+ * Idempotent.
438
+ */
439
+ function migrateGraphFilesSchema(db) {
440
+ bestEffort(() => {
441
+ const cols = db.prepare("PRAGMA table_info(graph_files)").all();
442
+ const isLegacyShape = cols.some((c) => c.name === "entry_id");
443
+ if (!isLegacyShape)
444
+ return;
445
+ // A previous interrupted migration may have left *_legacy behind — drop those
446
+ // husks first so the rename below doesn't collide.
447
+ db.exec("DROP TABLE IF EXISTS graph_file_relations_legacy");
448
+ db.exec("DROP TABLE IF EXISTS graph_file_entities_legacy");
449
+ db.exec("DROP TABLE IF EXISTS graph_files_legacy");
450
+ // Rename the 3 entry_id-keyed tables aside. graph_meta is unchanged (stash_root
451
+ // key) so it is left in place. ALTER … RENAME auto-updates child FK refs in
452
+ // SQLite ≥3.25, which is fine — the legacy children are dropped after the copy.
453
+ db.exec("ALTER TABLE graph_files RENAME TO graph_files_legacy");
454
+ if (tableExists(db, "graph_file_entities")) {
455
+ db.exec("ALTER TABLE graph_file_entities RENAME TO graph_file_entities_legacy");
456
+ }
457
+ if (tableExists(db, "graph_file_relations")) {
458
+ db.exec("ALTER TABLE graph_file_relations RENAME TO graph_file_relations_legacy");
459
+ }
460
+ }, "graph_files may not exist on a brand-new DB before CREATE — caller is responsible");
461
+ }
462
+ /**
463
+ * #624-P1 targeted graph-schema migration — STEP 2 of 2 (copy + drop legacy).
464
+ *
465
+ * Runs AFTER the graph CREATE TABLE block, so the new-shape tables exist. Copies
466
+ * every legacy row into the re-keyed tables — the old tables already carry
467
+ * (stash_root, file_path, body_hash) next to entry_id, so the projection is a
468
+ * straight column copy (children JOIN back to graph_files_legacy to resolve the
469
+ * composite key from their entry_id). Then drops the `*_legacy` tables.
470
+ *
471
+ * Best-effort: a copy failure (e.g. a pre-body_hash legacy schema) is tolerated,
472
+ * and the legacy tables are dropped regardless so they never linger. Rows whose
473
+ * body_hash is null/empty can't form the new PK and are skipped (they re-extract).
474
+ */
475
+ function migrateGraphDataFromLegacy(db) {
476
+ if (!tableExists(db, "graph_files_legacy"))
477
+ return;
478
+ let migratedFiles = 0;
479
+ bestEffort(() => {
480
+ db.transaction(() => {
481
+ const res = db
482
+ .prepare(`INSERT OR IGNORE INTO graph_files
483
+ (stash_root, file_path, body_hash, file_order, file_type, confidence, status, reason, extraction_run_id)
484
+ SELECT stash_root, file_path, body_hash, file_order, file_type, confidence, status, reason, extraction_run_id
485
+ FROM graph_files_legacy
486
+ WHERE body_hash IS NOT NULL AND body_hash != ''`)
487
+ .run();
488
+ migratedFiles = Number(res.changes);
489
+ if (tableExists(db, "graph_file_entities_legacy")) {
490
+ db.exec(`INSERT OR IGNORE INTO graph_file_entities
491
+ (stash_root, file_path, body_hash, entity_order, entity_norm, entity)
492
+ SELECT gf.stash_root, gf.file_path, gf.body_hash, e.entity_order, e.entity_norm, e.entity
493
+ FROM graph_file_entities_legacy e
494
+ JOIN graph_files_legacy gf ON gf.entry_id = e.entry_id
495
+ WHERE gf.body_hash IS NOT NULL AND gf.body_hash != ''`);
496
+ }
497
+ if (tableExists(db, "graph_file_relations_legacy")) {
498
+ db.exec(`INSERT OR IGNORE INTO graph_file_relations
499
+ (stash_root, file_path, body_hash, relation_order, from_entity_norm, from_entity, to_entity_norm, to_entity, relation_type, confidence)
500
+ SELECT gf.stash_root, gf.file_path, gf.body_hash, r.relation_order, r.from_entity_norm, r.from_entity, r.to_entity_norm, r.to_entity, r.relation_type, r.confidence
501
+ FROM graph_file_relations_legacy r
502
+ JOIN graph_files_legacy gf ON gf.entry_id = r.entry_id
503
+ WHERE gf.body_hash IS NOT NULL AND gf.body_hash != ''`);
504
+ }
505
+ })();
506
+ }, "graph data migration is best-effort; legacy tables are dropped regardless below");
507
+ // Always drop the legacy tables (children first), migrated or not.
508
+ bestEffort(() => {
509
+ db.exec("DROP TABLE IF EXISTS graph_file_relations_legacy");
510
+ db.exec("DROP TABLE IF EXISTS graph_file_entities_legacy");
511
+ db.exec("DROP TABLE IF EXISTS graph_files_legacy");
512
+ }, "drop legacy graph tables after migration");
513
+ if (migratedFiles > 0) {
514
+ warn(`[akm] graph index re-keyed (#624): migrated ${migratedFiles} extracted file(s) to the new schema — no re-extraction needed. Index + embeddings untouched.`);
515
+ }
516
+ }
@@ -0,0 +1,85 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * MemRL feedback → utility policy, extracted from indexer/db/db.ts.
6
+ *
7
+ * This is the domain/policy math (arXiv:2601.03192) that decides how a batch of
8
+ * positive/negative feedback signals moves an asset's utility score. It is pure
9
+ * — no database access — so the bounded-step behaviour is unit-testable in
10
+ * isolation; the DB read/write stays with `applyFeedbackToUtilityScore` in db.ts.
11
+ */
12
+ /**
13
+ * MemRL learning rate for feedback-driven utility updates (F-5 / #386).
14
+ *
15
+ * Follows the bounded-step formula from MemRL (arXiv:2601.03192):
16
+ * next = clamp(current + lr × (reward − current), 0, 1)
17
+ *
18
+ * This replaces the unbounded `-0.03 × negativeCount` delta that could
19
+ * silently remove high-utility assets from the improvement loop.
20
+ */
21
+ export const FEEDBACK_LR = 0.1;
22
+ /**
23
+ * Positive reward signal for a single positive feedback event.
24
+ * Reward 1.0 means "fully correct / helpful".
25
+ */
26
+ const FEEDBACK_REWARD_POSITIVE = 1.0;
27
+ /**
28
+ * Negative reward signal for a single negative feedback event.
29
+ * Reward 0.0 means "not helpful" (lowest MemRL signal).
30
+ */
31
+ const FEEDBACK_REWARD_NEGATIVE = 0.0;
32
+ /**
33
+ * Maximum total negative utility delta allowed in a single
34
+ * `applyFeedbackToUtilityScore` call regardless of negativeCount.
35
+ *
36
+ * This caps the per-day negative impact (the function is called once per
37
+ * feedback event — spamming 10 negatives in one session can move utility
38
+ * at most `MAX_NEG_DELTA_PER_CALL`). The cap prevents a noisy negative-
39
+ * feedback stream from silently destroying a high-utility asset's ranking.
40
+ */
41
+ export const MAX_NEG_DELTA_PER_CALL = 0.15;
42
+ /**
43
+ * Utility threshold below which a review-needed escalation is triggered.
44
+ * When a previously high-utility asset (≥ HIGH_UTILITY_THRESHOLD) drops
45
+ * below this value, the caller should create an escalation proposal.
46
+ */
47
+ export const UTILITY_REVIEW_THRESHOLD = 0.5;
48
+ /**
49
+ * Utility level considered "high" — assets above this are tracked for
50
+ * threshold-crossing escalation.
51
+ */
52
+ export const HIGH_UTILITY_THRESHOLD = 0.5;
53
+ /**
54
+ * Compute the next utility from accumulated feedback counts using the MemRL
55
+ * bounded-step EMA formula (F-5 / #386, arXiv:2601.03192):
56
+ *
57
+ * reward = weighted average of positive and negative signals
58
+ * nextUtil = clamp(currentUtil + lr × (reward − currentUtil), 0, 1)
59
+ *
60
+ * The negative impact is additionally capped at {@link MAX_NEG_DELTA_PER_CALL}
61
+ * to prevent a noisy feedback stream from silently erasing a high-utility asset.
62
+ *
63
+ * Pure: no DB access. When both counts are zero, utility is unchanged.
64
+ */
65
+ export function computeNextUtility(previousUtility, positiveCount, negativeCount) {
66
+ if (positiveCount === 0 && negativeCount === 0) {
67
+ return { previousUtility, nextUtility: previousUtility, crossedReviewThreshold: false };
68
+ }
69
+ const total = positiveCount + negativeCount;
70
+ // Weighted reward: proportion of positive signals.
71
+ const reward = positiveCount > 0 && negativeCount === 0
72
+ ? FEEDBACK_REWARD_POSITIVE
73
+ : negativeCount > 0 && positiveCount === 0
74
+ ? FEEDBACK_REWARD_NEGATIVE
75
+ : (positiveCount * FEEDBACK_REWARD_POSITIVE + negativeCount * FEEDBACK_REWARD_NEGATIVE) / total;
76
+ // MemRL bounded-step EMA: lr × (reward − current)
77
+ let delta = FEEDBACK_LR * (reward - previousUtility);
78
+ // Per-call negative cap: if delta is negative (net negative feedback), cap it.
79
+ if (delta < 0) {
80
+ delta = Math.max(delta, -MAX_NEG_DELTA_PER_CALL);
81
+ }
82
+ const nextUtility = Math.max(0, Math.min(1, previousUtility + delta));
83
+ const crossedReviewThreshold = previousUtility >= HIGH_UTILITY_THRESHOLD && nextUtility < UTILITY_REVIEW_THRESHOLD;
84
+ return { previousUtility, nextUtility, crossedReviewThreshold };
85
+ }
@@ -46,8 +46,9 @@ import { warn, warnVerbose } from "../../core/warn.js";
46
46
  import { isProcessEnabled } from "../../llm/feature-gate.js";
47
47
  import * as graphExtract from "../../llm/graph-extract.js";
48
48
  import { resolveIndexPassLLM } from "../../llm/index-passes.js";
49
- import { computeBodyHash, GRAPH_SCHEMA_VERSION, getLlmCacheEntriesByRefs, getLlmCacheEntry, upsertLlmCacheEntry, } from "../db/db.js";
49
+ import { computeBodyHash, getLlmCacheEntriesByRefs, getLlmCacheEntry, upsertLlmCacheEntry, } from "../db/db.js";
50
50
  import { drainExtractionQueue, loadStoredGraphSnapshot, replaceStoredGraph } from "../db/graph-db.js";
51
+ import { GRAPH_SCHEMA_VERSION } from "../db/schema.js";
51
52
  import { walkMarkdownFiles } from "../walk/walker.js";
52
53
  import { deduplicateGraph } from "./graph-dedup.js";
53
54
  /** Schema version for the persisted artifact — bumps trigger a full rebuild. */