akm-cli 0.9.0-beta.54 → 0.9.0-beta.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/dist/cli.js +5 -3
  2. package/dist/commands/agent/contribute-cli.js +2 -3
  3. package/dist/commands/env/env-cli.js +187 -202
  4. package/dist/commands/env/secret-cli.js +109 -121
  5. package/dist/commands/feedback-cli.js +152 -155
  6. package/dist/commands/health/advisories.js +151 -0
  7. package/dist/commands/health/improve-metrics.js +754 -0
  8. package/dist/commands/health/llm-usage.js +65 -0
  9. package/dist/commands/health/md-report.js +103 -0
  10. package/dist/commands/health/metrics.js +278 -0
  11. package/dist/commands/health/task-runs.js +135 -0
  12. package/dist/commands/health/types.js +18 -0
  13. package/dist/commands/health/windows.js +196 -0
  14. package/dist/commands/health.js +14 -1624
  15. package/dist/commands/improve/anti-collapse.js +170 -0
  16. package/dist/commands/improve/collapse-detector.js +3 -2
  17. package/dist/commands/improve/consolidate.js +636 -633
  18. package/dist/commands/improve/dedup.js +1 -1
  19. package/dist/commands/improve/distill/content-repair.js +202 -0
  20. package/dist/commands/improve/distill/promote-memory.js +228 -0
  21. package/dist/commands/improve/distill/quality-gate.js +233 -0
  22. package/dist/commands/improve/distill-guards.js +127 -0
  23. package/dist/commands/improve/distill.js +49 -575
  24. package/dist/commands/improve/extract-cli.js +74 -76
  25. package/dist/commands/improve/extract.js +6 -4
  26. package/dist/commands/improve/hot-probation.js +45 -0
  27. package/dist/commands/improve/improve-auto-accept.js +3 -2
  28. package/dist/commands/improve/improve-cli.js +14 -13
  29. package/dist/commands/improve/improve-result-file.js +2 -1
  30. package/dist/commands/improve/improve.js +6 -5
  31. package/dist/commands/improve/loop-stages.js +19 -21
  32. package/dist/commands/improve/preparation.js +4 -2
  33. package/dist/commands/improve/procedural.js +10 -31
  34. package/dist/commands/improve/recombine.js +19 -43
  35. package/dist/commands/improve/reflect.js +1 -1
  36. package/dist/commands/improve/schema-similarity-gate.js +168 -0
  37. package/dist/commands/improve/shared.js +48 -0
  38. package/dist/commands/observability-cli.js +4 -4
  39. package/dist/commands/proposal/drain-policies.js +2 -2
  40. package/dist/commands/proposal/drain.js +1 -1
  41. package/dist/commands/proposal/legacy-import.js +115 -0
  42. package/dist/commands/proposal/proposal-cli.js +3 -3
  43. package/dist/commands/proposal/proposal.js +2 -1
  44. package/dist/commands/proposal/propose.js +1 -1
  45. package/dist/commands/proposal/repository.js +829 -0
  46. package/dist/commands/proposal/validators/proposals.js +5 -920
  47. package/dist/commands/read/remember-cli.js +132 -137
  48. package/dist/commands/read/search-cli.js +1 -1
  49. package/dist/commands/registry-cli.js +76 -87
  50. package/dist/commands/sources/add-cli.js +90 -94
  51. package/dist/commands/sources/history.js +1 -1
  52. package/dist/commands/sources/schema-repair.js +1 -1
  53. package/dist/commands/sources/sources-cli.js +3 -3
  54. package/dist/commands/sources/stash-cli.js +1 -1
  55. package/dist/commands/tasks/tasks-cli.js +1 -2
  56. package/dist/commands/wiki-cli.js +2 -3
  57. package/dist/core/common.js +3 -3
  58. package/dist/core/config/config-schema.js +6 -0
  59. package/dist/core/deep-merge.js +38 -0
  60. package/dist/core/events.js +2 -1
  61. package/dist/core/logs-db.js +8 -13
  62. package/dist/core/paths.js +14 -14
  63. package/dist/core/state-db.js +13 -1140
  64. package/dist/indexer/db/db.js +96 -723
  65. package/dist/indexer/db/entry-mapper.js +41 -0
  66. package/dist/indexer/db/schema.js +516 -0
  67. package/dist/indexer/feedback/utility-policy.js +75 -0
  68. package/dist/indexer/graph/graph-extraction.js +2 -1
  69. package/dist/indexer/index-writer-lock.js +9 -0
  70. package/dist/indexer/indexer.js +78 -23
  71. package/dist/indexer/search/fts-query.js +51 -0
  72. package/dist/integrations/agent/spawn.js +15 -66
  73. package/dist/llm/embedders/cache.js +3 -1
  74. package/dist/output/text/helpers.js +13 -0
  75. package/dist/registry/resolve.js +5 -0
  76. package/dist/scripts/migrate-storage.js +6908 -7447
  77. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +44 -43
  78. package/dist/setup/legacy-config.js +106 -0
  79. package/dist/setup/prompt.js +57 -0
  80. package/dist/setup/providers.js +14 -0
  81. package/dist/setup/semantic-assets.js +124 -0
  82. package/dist/setup/setup.js +24 -1607
  83. package/dist/setup/steps/connection.js +734 -0
  84. package/dist/setup/steps/output.js +31 -0
  85. package/dist/setup/steps/platforms.js +124 -0
  86. package/dist/setup/steps/semantic.js +27 -0
  87. package/dist/setup/steps/sources.js +222 -0
  88. package/dist/setup/steps/stashdir.js +42 -0
  89. package/dist/setup/steps/tasks.js +152 -0
  90. package/dist/storage/repositories/canaries-repository.js +107 -0
  91. package/dist/storage/repositories/consolidation-repository.js +38 -0
  92. package/dist/storage/repositories/embeddings-repository.js +72 -0
  93. package/dist/storage/repositories/events-repository.js +187 -0
  94. package/dist/storage/repositories/extract-sessions-repository.js +96 -0
  95. package/dist/storage/repositories/improve-runs-repository.js +130 -0
  96. package/dist/storage/repositories/index-db.js +4 -7
  97. package/dist/storage/repositories/proposals-repository.js +220 -0
  98. package/dist/storage/repositories/recombine-repository.js +213 -0
  99. package/dist/storage/repositories/task-history-repository.js +93 -0
  100. package/dist/storage/sqlite-pragmas.js +3 -3
  101. package/dist/tasks/runner.js +2 -1
  102. package/package.json +1 -1
  103. package/dist/commands/improve/homeostatic.js +0 -497
@@ -1,7 +1,6 @@
1
1
  // This Source Code Form is subject to the terms of the Mozilla Public
2
2
  // License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
- import fs from "node:fs";
5
4
  import { createRequire } from "node:module";
6
5
  import path from "node:path";
7
6
  import { parseAssetRef } from "../../core/asset/asset-ref.js";
@@ -10,43 +9,31 @@ import { getDbPath } from "../../core/paths.js";
10
9
  import { warn } from "../../core/warn.js";
11
10
  import { cosineSimilarity } from "../../llm/embedders/types.js";
12
11
  import { sha256Hex } from "../../runtime.js";
13
- import { openDatabase } from "../../storage/database.js";
14
- import { applyStandardPragmas } from "../../storage/sqlite-pragmas.js";
12
+ import { openManagedDatabase } from "../../storage/managed-db.js";
13
+ import { computeNextUtility, HIGH_UTILITY_THRESHOLD, UTILITY_REVIEW_THRESHOLD, } from "../feedback/utility-policy.js";
14
+ import { buildPrefixQuery, sanitizeFtsQuery } from "../search/fts-query.js";
15
15
  import { buildSearchFields } from "../search/search-fields.js";
16
- import { ensureUsageEventsSchema } from "../usage/usage-events.js";
17
- // ── Constants ───────────────────────────────────────────────────────────────
18
- // NOTE: schema changes are additive. DB_VERSION is a forensic stamp only — it
19
- // no longer gates any destructive path (the old nuclear drop-and-rebuild was
20
- // removed; index.db's idempotent CREATE … IF NOT EXISTS schema converges any
21
- // older/partial DB forward without dropping data). Graph re-keying uses a
22
- // TARGETED, graph-only migration (migrateGraphFilesSchema) — the model for any
23
- // incompatible change: migrate in place, never wipe the whole index.
24
- export const DB_VERSION = 17;
25
- export const EMBEDDING_DIM = 384;
26
- // #624-P1: graph_files re-keyed to (stash_root, file_path, body_hash). Bumped 3→4
27
- // as a marker; the actual migration is the targeted drop in migrateGraphFilesSchema.
28
- export const GRAPH_SCHEMA_VERSION = 4;
16
+ import { ENTRY_COLUMNS, rowToIndexedEntry } from "./entry-mapper.js";
17
+ import { ensureSchema } from "./schema.js";
18
+ export { HIGH_UTILITY_THRESHOLD, sanitizeFtsQuery, UTILITY_REVIEW_THRESHOLD };
29
19
  // ── Database lifecycle ──────────────────────────────────────────────────────
30
20
  export function openIndexDatabase(dbPath, options) {
31
- const resolvedPath = dbPath ?? getDbPath();
32
- const dir = path.dirname(resolvedPath);
33
- if (!fs.existsSync(dir)) {
34
- fs.mkdirSync(dir, { recursive: true });
35
- }
36
- const db = openDatabase(resolvedPath);
37
- applyStandardPragmas(db, { dataDir: dir });
38
- // Try to load sqlite-vec extension
39
- loadVecExtension(db);
40
- // Dim resolution: explicit option wins; otherwise consult the on-disk
41
- // config so unparameterised opens (registry providers, graph helpers,
42
- // ad-hoc CLI subcommands) honour the operator-declared dimension. Only if
43
- // both are absent do we fall through to the no-clobber path, which keeps
44
- // ensureSchema from touching `index_meta.embeddingDim` at all.
45
- const resolvedDim = options?.embeddingDim ?? resolveConfiguredEmbeddingDim();
46
- ensureSchema(db, resolvedDim);
47
- // Warn once at init if using JS fallback with many entries
48
- warnIfVecMissing(db, { once: true });
49
- return db;
21
+ return openManagedDatabase({
22
+ path: dbPath ?? getDbPath(),
23
+ init: (db) => {
24
+ // Try to load sqlite-vec extension
25
+ loadVecExtension(db);
26
+ // Dim resolution: explicit option wins; otherwise consult the on-disk
27
+ // config so unparameterised opens (registry providers, graph helpers,
28
+ // ad-hoc CLI subcommands) honour the operator-declared dimension. Only if
29
+ // both are absent do we fall through to the no-clobber path, which keeps
30
+ // ensureSchema from touching `index_meta.embeddingDim` at all.
31
+ const resolvedDim = options?.embeddingDim ?? resolveConfiguredEmbeddingDim();
32
+ ensureSchema(db, resolvedDim);
33
+ // Warn once at init if using JS fallback with many entries
34
+ warnIfVecMissing(db, { once: true });
35
+ },
36
+ });
50
37
  }
51
38
  /**
52
39
  * Read the operator-configured embedding dimension from the on-disk config.
@@ -71,14 +58,10 @@ function resolveConfiguredEmbeddingDim() {
71
58
  }
72
59
  }
73
60
  export function openExistingDatabase(dbPath) {
74
- const resolvedPath = dbPath ?? getDbPath();
75
- const dir = path.dirname(resolvedPath);
76
- const db = openDatabase(resolvedPath);
77
- applyStandardPragmas(db, { dataDir: dir });
78
61
  // Existing-DB callers must not mutate schema or embedding metadata on open,
79
- // but some paths still need write access to usage_events and other tables.
80
- loadVecExtension(db);
81
- return db;
62
+ // but some paths still need write access to usage_events and other tables
63
+ // so init only loads the vec extension, it does not run ensureSchema.
64
+ return openManagedDatabase({ path: dbPath ?? getDbPath(), init: loadVecExtension });
82
65
  }
83
66
  export function closeDatabase(db) {
84
67
  db.close();
@@ -127,372 +110,6 @@ export function warnIfVecMissing(db, { once } = { once: false }) {
127
110
  }
128
111
  }, "embeddings table may not exist yet during init");
129
112
  }
130
- // ── Schema ──────────────────────────────────────────────────────────────────
131
- /**
132
- * DDL for the `registry_index_cache` table. This table lives in index.db
133
- * (managed by this module), so its DDL belongs here next to the `ensureSchema`
134
- * that applies it — not in state-db.ts.
135
- *
136
- * Created with CREATE TABLE IF NOT EXISTS so it is safe to call inside
137
- * `ensureSchema()`. Caches the result of resolving and fetching remote registry
138
- * stash indexes so `akm search` does not hit the network on every invocation.
139
- *
140
- * Indexed (query) columns:
141
- * registry_url TEXT PK — canonical URL of the registry; cache key.
142
- * fetched_at TEXT — ISO-8601; used to detect stale entries (TTL).
143
- * etag TEXT — HTTP ETag for conditional GET (If-None-Match).
144
- * last_modified TEXT — HTTP Last-Modified for conditional GET.
145
- *
146
- * Non-indexed payload:
147
- * index_json TEXT — JSON blob of the fetched registry index document.
148
- *
149
- * ADD COLUMN extension points (future migrations):
150
- * ALTER TABLE registry_index_cache ADD COLUMN schema_version INTEGER DEFAULT 1;
151
- * ALTER TABLE registry_index_cache ADD COLUMN kit_count INTEGER DEFAULT NULL;
152
- * ALTER TABLE registry_index_cache ADD COLUMN error_message TEXT DEFAULT NULL;
153
- */
154
- const REGISTRY_INDEX_CACHE_DDL = `
155
- CREATE TABLE IF NOT EXISTS registry_index_cache (
156
- registry_url TEXT PRIMARY KEY,
157
- fetched_at TEXT NOT NULL,
158
- etag TEXT,
159
- last_modified TEXT,
160
- index_json TEXT NOT NULL DEFAULT '{}'
161
- );
162
-
163
- CREATE INDEX IF NOT EXISTS idx_registry_cache_fetched
164
- ON registry_index_cache(fetched_at);
165
- `;
166
- /** A row backed up out of the legacy `usage_events` table during a version upgrade. */
167
- function ensureSchema(db, embeddingDim) {
168
- // Create meta table first so we can check version
169
- db.exec(`
170
- CREATE TABLE IF NOT EXISTS index_meta (
171
- key TEXT PRIMARY KEY,
172
- value TEXT NOT NULL
173
- );
174
- `);
175
- // index.db is a fully regenerable derived cache, so its schema is built
176
- // idempotently below: every table is CREATE … IF NOT EXISTS and column
177
- // additions go through guarded ALTERs (ensureDerivedFromColumn) and targeted
178
- // migrations (migrateGraphFilesSchema / migrateGraphDataFromLegacy). Opening a
179
- // database with an older or partial schema converges it forward WITHOUT ever
180
- // dropping data — there is intentionally no "nuclear drop the whole index on a
181
- // DB_VERSION mismatch" path (a destructive design the regenerable index never
182
- // needed, and whose pre-drop data-dir backup it required). A genuinely
183
- // incompatible change is handled by an additive/targeted migration; the few
184
- // derived tables that ever must be rebuilt are regenerated by `akm index`.
185
- db.exec(`
186
- CREATE TABLE IF NOT EXISTS entries (
187
- id INTEGER PRIMARY KEY AUTOINCREMENT,
188
- entry_key TEXT NOT NULL UNIQUE,
189
- dir_path TEXT NOT NULL,
190
- file_path TEXT NOT NULL,
191
- stash_dir TEXT NOT NULL,
192
- entry_json TEXT NOT NULL,
193
- search_text TEXT NOT NULL,
194
- entry_type TEXT NOT NULL,
195
- derived_from TEXT
196
- );
197
-
198
- CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
199
- CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
200
- CREATE INDEX IF NOT EXISTS idx_entries_file_path ON entries(file_path);
201
- `);
202
- // Phase 5A / DB v17: backfill `derived_from` column + index on databases
203
- // that were created at v17 fresh OR carry a partial v17 schema (a DB whose
204
- // `index_meta.version` was bumped to 17 but whose `entries` table still
205
- // lacks the column — this happens when a previous v17 binary opened a
206
- // pre-v17 DB without taking the upgrade path because no version mismatch
207
- // was seen at boot). The PRAGMA-then-ALTER guard runs unconditionally so
208
- // both fresh and partial schemas converge. The CREATE INDEX for
209
- // `derived_from` MUST run after this helper so we never reference a
210
- // column that has not yet been added on partial schemas.
211
- ensureDerivedFromColumn(db);
212
- // Validated WorkflowDocument JSON, one row per indexed workflow entry.
213
- // Pure index data — fully rebuilt on each `akm index`. ON DELETE CASCADE
214
- // means clearing entries (full rebuild or per-dir delete) drops these too.
215
- db.exec(`
216
- CREATE TABLE IF NOT EXISTS workflow_documents (
217
- entry_id INTEGER PRIMARY KEY REFERENCES entries(id) ON DELETE CASCADE,
218
- schema_version INTEGER NOT NULL,
219
- document_json TEXT NOT NULL,
220
- source_path TEXT NOT NULL,
221
- source_hash TEXT NOT NULL,
222
- updated_at TEXT NOT NULL
223
- );
224
-
225
- CREATE INDEX IF NOT EXISTS idx_workflow_documents_source_path
226
- ON workflow_documents(source_path);
227
- `);
228
- // Set version immediately after table creation so a crash before the end of
229
- // ensureSchema() does not leave the database in a versionless state on next open.
230
- const versionAfterCreate = getMeta(db, "version");
231
- if (!versionAfterCreate) {
232
- setMeta(db, "version", String(DB_VERSION));
233
- }
234
- // BLOB-based embedding storage (always available, no sqlite-vec needed)
235
- db.exec(`
236
- CREATE TABLE IF NOT EXISTS embeddings (
237
- id INTEGER PRIMARY KEY,
238
- embedding BLOB NOT NULL,
239
- FOREIGN KEY (id) REFERENCES entries(id)
240
- );
241
- `);
242
- // FTS5 table — multi-column with per-field weighting via bm25()
243
- const ftsExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_fts'").get();
244
- if (!ftsExists) {
245
- db.exec(`
246
- CREATE VIRTUAL TABLE entries_fts USING fts5(
247
- entry_id UNINDEXED,
248
- name,
249
- description,
250
- tags,
251
- hints,
252
- content,
253
- tokenize='porter unicode61'
254
- );
255
- `);
256
- }
257
- // Usage events table — created by ensureUsageEventsSchema() at runtime.
258
- // Utility scores table (aggregated per-entry utility metrics)
259
- db.exec(`
260
- CREATE TABLE IF NOT EXISTS utility_scores (
261
- entry_id INTEGER PRIMARY KEY,
262
- utility REAL NOT NULL DEFAULT 0,
263
- show_count INTEGER NOT NULL DEFAULT 0,
264
- search_count INTEGER NOT NULL DEFAULT 0,
265
- select_rate REAL NOT NULL DEFAULT 0,
266
- last_used_at TEXT,
267
- updated_at TEXT NOT NULL DEFAULT (datetime('now')),
268
- FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
269
- );
270
- `);
271
- // Per-project scoped utility scores — tracks usage per (entry, cwd-anchor)
272
- // so assets useful in project A don't pollute rankings in project B.
273
- // The global utility_scores table is preserved as a fallback / cold-start aid.
274
- db.exec(`
275
- CREATE TABLE IF NOT EXISTS utility_scores_scoped (
276
- entry_id INTEGER NOT NULL,
277
- scope_key TEXT NOT NULL,
278
- utility REAL NOT NULL DEFAULT 0,
279
- last_used_at INTEGER NOT NULL,
280
- PRIMARY KEY (entry_id, scope_key)
281
- );
282
- CREATE INDEX IF NOT EXISTS idx_utility_scores_scoped_entry_id
283
- ON utility_scores_scoped(entry_id);
284
- `);
285
- db.exec(`
286
- CREATE TABLE IF NOT EXISTS index_dir_state (
287
- dir_path TEXT PRIMARY KEY,
288
- file_set_hash TEXT NOT NULL,
289
- file_mtime_max_ms REAL NOT NULL,
290
- reason TEXT NOT NULL,
291
- updated_at TEXT NOT NULL
292
- );
293
- `);
294
- // LLM enrichment result cache. Stores a SHA-256 body hash and the JSON
295
- // result for each asset so that subsequent `akm index --enrich` runs can
296
- // skip the LLM call when the body hasn't changed. The cache is keyed by
297
- // a stable asset_ref string (e.g. the absolute file path for graph/memory
298
- // passes, or `entryKey:passId` for the metadata-enhance pass).
299
- // Entries are cleaned up when assets are removed or --re-enrich is used.
300
- db.exec(`
301
- CREATE TABLE IF NOT EXISTS llm_enrichment_cache (
302
- asset_ref TEXT NOT NULL,
303
- cache_variant TEXT NOT NULL,
304
- body_hash TEXT NOT NULL,
305
- result_json TEXT NOT NULL,
306
- updated_at INTEGER NOT NULL,
307
- PRIMARY KEY (asset_ref, cache_variant)
308
- );
309
-
310
- CREATE INDEX IF NOT EXISTS idx_llm_cache_updated
311
- ON llm_enrichment_cache(updated_at);
312
- `);
313
- // Graph extraction tables — schema v4 ((stash_root, file_path, body_hash) PK).
314
- //
315
- // graph_files is self-keyed on (stash_root, file_path, body_hash) and is NO
316
- // LONGER tied to entries.id. This is the #624-P1 win: deleting and
317
- // re-inserting an entries row during a reindex no longer cascade-wipes the
318
- // extracted graph — as long as the file's body_hash is unchanged, the graph
319
- // data survives. body_hash is part of the PK so a content change yields a
320
- // distinct key; a UNIQUE index on (stash_root, file_path) still enforces
321
- // exactly one graph_files row per path (delete-then-insert on a hash change).
322
- //
323
- // graph_file_entities and graph_file_relations carry (stash_root, file_path,
324
- // body_hash) and declare a composite FK -> graph_files ON DELETE CASCADE so
325
- // child rows are removed when a graph_files row is replaced.
326
- //
327
- // #624-P1 targeted migration: an existing DB may still hold the OLD graph_files
328
- // (entry_id PK). SQLite can't ALTER a primary key, so we RENAME the 3 graph
329
- // tables aside (→ *_legacy) here — ONLY the graph tables, never the index/
330
- // embeddings — then the CREATE block below builds the new shape, then
331
- // migrateGraphDataFromLegacy() copies the data across so the graph is PRESERVED
332
- // (not re-extracted).
333
- migrateGraphFilesSchema(db);
334
- db.exec(`
335
- CREATE TABLE IF NOT EXISTS graph_meta (
336
- stash_root TEXT PRIMARY KEY,
337
- schema_version INTEGER NOT NULL,
338
- generated_at TEXT NOT NULL,
339
- considered_files INTEGER NOT NULL DEFAULT 0,
340
- extracted_files INTEGER NOT NULL DEFAULT 0,
341
- entity_count INTEGER NOT NULL DEFAULT 0,
342
- relation_count INTEGER NOT NULL DEFAULT 0,
343
- extraction_coverage REAL NOT NULL DEFAULT 0,
344
- density REAL NOT NULL DEFAULT 0,
345
- extractor_id TEXT,
346
- extraction_run_id TEXT,
347
- model TEXT,
348
- prompt_version TEXT,
349
- batch_size INTEGER,
350
- cache_hits INTEGER NOT NULL DEFAULT 0,
351
- cache_misses INTEGER NOT NULL DEFAULT 0,
352
- truncation_count INTEGER NOT NULL DEFAULT 0,
353
- failure_count INTEGER NOT NULL DEFAULT 0
354
- );
355
-
356
- CREATE TABLE IF NOT EXISTS graph_files (
357
- stash_root TEXT NOT NULL,
358
- file_path TEXT NOT NULL,
359
- file_order INTEGER NOT NULL,
360
- file_type TEXT NOT NULL,
361
- body_hash TEXT NOT NULL,
362
- confidence REAL,
363
- status TEXT NOT NULL DEFAULT 'extracted',
364
- reason TEXT,
365
- extraction_run_id TEXT,
366
- PRIMARY KEY (stash_root, file_path, body_hash)
367
- );
368
-
369
- CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_files_path
370
- ON graph_files(stash_root, file_path);
371
-
372
- CREATE INDEX IF NOT EXISTS idx_graph_files_stash_order
373
- ON graph_files(stash_root, file_order);
374
-
375
- CREATE TABLE IF NOT EXISTS graph_file_entities (
376
- stash_root TEXT NOT NULL,
377
- file_path TEXT NOT NULL,
378
- body_hash TEXT NOT NULL,
379
- entity_order INTEGER NOT NULL,
380
- entity_norm TEXT NOT NULL,
381
- entity TEXT NOT NULL,
382
- PRIMARY KEY (stash_root, file_path, body_hash, entity_order),
383
- FOREIGN KEY (stash_root, file_path, body_hash)
384
- REFERENCES graph_files(stash_root, file_path, body_hash) ON DELETE CASCADE
385
- );
386
-
387
- CREATE INDEX IF NOT EXISTS idx_graph_file_entities_entity_norm
388
- ON graph_file_entities(stash_root, entity_norm);
389
-
390
- CREATE TABLE IF NOT EXISTS graph_file_relations (
391
- stash_root TEXT NOT NULL,
392
- file_path TEXT NOT NULL,
393
- body_hash TEXT NOT NULL,
394
- relation_order INTEGER NOT NULL,
395
- from_entity_norm TEXT NOT NULL,
396
- from_entity TEXT NOT NULL,
397
- to_entity_norm TEXT NOT NULL,
398
- to_entity TEXT NOT NULL,
399
- relation_type TEXT,
400
- confidence REAL,
401
- PRIMARY KEY (stash_root, file_path, body_hash, relation_order),
402
- FOREIGN KEY (stash_root, file_path, body_hash)
403
- REFERENCES graph_files(stash_root, file_path, body_hash) ON DELETE CASCADE
404
- );
405
-
406
- -- #624-P3: lazy graph-extraction queue. Standalone table (NO FK to
407
- -- graph_files — a queued file by definition has no graph row yet).
408
- -- Idempotent on (stash_root, file_path); drained highest-priority-first.
409
- -- CREATE TABLE IF NOT EXISTS is the forward migration (no DB_VERSION bump).
410
- CREATE TABLE IF NOT EXISTS graph_extraction_queue (
411
- stash_root TEXT NOT NULL,
412
- file_path TEXT NOT NULL,
413
- body_hash TEXT NOT NULL,
414
- queued_at TEXT NOT NULL DEFAULT (datetime('now')),
415
- priority INTEGER NOT NULL DEFAULT 0,
416
- PRIMARY KEY (stash_root, file_path)
417
- );
418
-
419
- CREATE INDEX IF NOT EXISTS idx_graph_extraction_queue_drain
420
- ON graph_extraction_queue(stash_root, priority DESC, queued_at);
421
- `);
422
- // #624-P1 migration step 2: copy any renamed-aside legacy graph data into the
423
- // new-shape tables (just created above), then drop the legacy tables. No-op
424
- // unless migrateGraphFilesSchema renamed a legacy graph_files this open.
425
- migrateGraphDataFromLegacy(db);
426
- // FTS-dirty queue. Created here (not lazily on first upsert) so the
427
- // per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
428
- // every call — that DDL would fire thousands of times during a full
429
- // index. See `markFtsDirty` and `rebuildFts({ incremental: true })`.
430
- db.exec(`
431
- CREATE TABLE IF NOT EXISTS entries_fts_dirty (
432
- entry_id INTEGER PRIMARY KEY
433
- );
434
- `);
435
- // sqlite-vec table
436
- //
437
- // Dimension contract:
438
- // - When `embeddingDim` is `undefined`, the caller did NOT request a
439
- // specific dim. Do not touch `index_meta.embeddingDim` and do not run
440
- // the dim-change wipe — fall back to the stored dim (or the static
441
- // default) only when we have to materialise the vec table for the
442
- // first time. Without this guard, registry-side and other dim-unaware
443
- // `openDatabase()` callers would silently overwrite the dim-aware
444
- // improve/index value and oscillate the stored dim.
445
- // - When `embeddingDim` is a number, the caller explicitly asked for
446
- // that dim and owns the dim-change/backup/wipe semantics.
447
- const dimExplicit = embeddingDim !== undefined;
448
- const effectiveDim = embeddingDim ?? (Number(getMeta(db, "embeddingDim")) || EMBEDDING_DIM);
449
- if (isVecAvailable(db)) {
450
- // Check if stored embedding dimension differs from configured one
451
- if (dimExplicit) {
452
- const storedDim = getMeta(db, "embeddingDim");
453
- if (storedDim && storedDim !== String(embeddingDim)) {
454
- // Stored vectors are incompatible with the new dimension. Drop the vec
455
- // table so the block below recreates it at the new width; the BLOB rows
456
- // go too. Regenerable from markdown — re-embedded by the next index.
457
- purgeEmbeddings(db, { dropVecTable: true });
458
- }
459
- }
460
- const vecExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_vec'").get();
461
- if (!vecExists) {
462
- if (!Number.isInteger(effectiveDim) || effectiveDim <= 0 || effectiveDim > 4096) {
463
- throw new Error(`Invalid embedding dimension: ${effectiveDim}`);
464
- }
465
- db.exec(`
466
- CREATE VIRTUAL TABLE entries_vec USING vec0(
467
- id INTEGER PRIMARY KEY,
468
- embedding FLOAT[${effectiveDim}]
469
- );
470
- `);
471
- }
472
- if (dimExplicit) {
473
- setMeta(db, "embeddingDim", String(embeddingDim));
474
- }
475
- }
476
- else {
477
- // Also purge BLOB embeddings on dimension change (JS fallback path).
478
- // When sqlite-vec is unavailable, entries_vec doesn't exist but the BLOB
479
- // embeddings table still stores vectors. If the configured dimension
480
- // changes, those stored BLOBs become silently incompatible.
481
- if (dimExplicit) {
482
- const storedDim = getMeta(db, "embeddingDim");
483
- if (storedDim && storedDim !== String(embeddingDim)) {
484
- // JS-fallback path: no vec table, just clear the stale BLOB vectors.
485
- purgeEmbeddings(db);
486
- }
487
- setMeta(db, "embeddingDim", String(embeddingDim));
488
- }
489
- }
490
- // Usage telemetry table
491
- ensureUsageEventsSchema(db);
492
- // Registry index cache table — caches remote registry index documents so
493
- // `akm search` does not hit the network on every invocation.
494
- db.exec(REGISTRY_INDEX_CACHE_DDL);
495
- }
496
113
  /**
497
114
  * Purge stored embeddings (BLOB rows in `embeddings`, plus the `entries_vec`
498
115
  * virtual table) and mark the index as embedding-free. The single place that
@@ -610,129 +227,6 @@ function getUpsertStmts(db) {
610
227
  upsertStmtsByDb.set(db, stmts);
611
228
  return stmts;
612
229
  }
613
- /**
614
- * Phase 5A / DB v17 schema guard.
615
- *
616
- * Ensures the `entries.derived_from` column + index exist on the open
617
- * connection. Called from `ensureSchema()` after the entries CREATE so that
618
- * legacy databases (created against a pre-v17 binary) still gain the new column
619
- * without data loss. Idempotent: a `PRAGMA table_info` lookup gates the ALTER.
620
- */
621
- function ensureDerivedFromColumn(db) {
622
- bestEffort(() => {
623
- const cols = db.prepare("PRAGMA table_info(entries)").all();
624
- const hasColumn = cols.some((c) => c.name === "derived_from");
625
- if (!hasColumn) {
626
- db.exec("ALTER TABLE entries ADD COLUMN derived_from TEXT");
627
- }
628
- // Index creation is idempotent on its own; safe to call unconditionally.
629
- db.exec("CREATE INDEX IF NOT EXISTS idx_entries_derived_from ON entries(derived_from)");
630
- }, "entries table may not exist on a brand-new DB before CREATE — caller is responsible");
631
- }
632
- /**
633
- * Returns true when a table exists in the current database.
634
- */
635
- function tableExists(db, name) {
636
- const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1").get(name);
637
- return row !== undefined && row !== null;
638
- }
639
- /**
640
- * #624-P1 targeted graph-schema migration — STEP 1 of 2 (rename).
641
- *
642
- * graph_files was re-keyed from `entry_id INTEGER PRIMARY KEY REFERENCES
643
- * entries(id)` to a self-contained `(stash_root, file_path, body_hash)` PK.
644
- * SQLite cannot ALTER a primary key, so an existing DB carrying the OLD shape
645
- * has its 3 graph tables RENAMED to `*_legacy` here; ensureSchema's CREATE block
646
- * then builds the new-shape tables, and {@link migrateGraphDataFromLegacy} COPIES
647
- * the data across before dropping the legacy tables. The graph is preserved —
648
- * NOT re-extracted (re-extraction is ~19s/file of LLM work).
649
- *
650
- * Crucially this is GRAPH-SCOPED: it touches ONLY the graph tables, never the
651
- * index / embeddings / enrichment cache. So users keep their (expensive)
652
- * embeddings instead of being forced into a full re-embed by a DB_VERSION bump.
653
- *
654
- * Detection: the old schema has an `entry_id` column on graph_files. Fresh DBs
655
- * (no graph_files yet) and already-migrated DBs (no entry_id column) are no-ops.
656
- * Idempotent.
657
- */
658
- function migrateGraphFilesSchema(db) {
659
- bestEffort(() => {
660
- const cols = db.prepare("PRAGMA table_info(graph_files)").all();
661
- const isLegacyShape = cols.some((c) => c.name === "entry_id");
662
- if (!isLegacyShape)
663
- return;
664
- // A previous interrupted migration may have left *_legacy behind — drop those
665
- // husks first so the rename below doesn't collide.
666
- db.exec("DROP TABLE IF EXISTS graph_file_relations_legacy");
667
- db.exec("DROP TABLE IF EXISTS graph_file_entities_legacy");
668
- db.exec("DROP TABLE IF EXISTS graph_files_legacy");
669
- // Rename the 3 entry_id-keyed tables aside. graph_meta is unchanged (stash_root
670
- // key) so it is left in place. ALTER … RENAME auto-updates child FK refs in
671
- // SQLite ≥3.25, which is fine — the legacy children are dropped after the copy.
672
- db.exec("ALTER TABLE graph_files RENAME TO graph_files_legacy");
673
- if (tableExists(db, "graph_file_entities")) {
674
- db.exec("ALTER TABLE graph_file_entities RENAME TO graph_file_entities_legacy");
675
- }
676
- if (tableExists(db, "graph_file_relations")) {
677
- db.exec("ALTER TABLE graph_file_relations RENAME TO graph_file_relations_legacy");
678
- }
679
- }, "graph_files may not exist on a brand-new DB before CREATE — caller is responsible");
680
- }
681
- /**
682
- * #624-P1 targeted graph-schema migration — STEP 2 of 2 (copy + drop legacy).
683
- *
684
- * Runs AFTER the graph CREATE TABLE block, so the new-shape tables exist. Copies
685
- * every legacy row into the re-keyed tables — the old tables already carry
686
- * (stash_root, file_path, body_hash) next to entry_id, so the projection is a
687
- * straight column copy (children JOIN back to graph_files_legacy to resolve the
688
- * composite key from their entry_id). Then drops the `*_legacy` tables.
689
- *
690
- * Best-effort: a copy failure (e.g. a pre-body_hash legacy schema) is tolerated,
691
- * and the legacy tables are dropped regardless so they never linger. Rows whose
692
- * body_hash is null/empty can't form the new PK and are skipped (they re-extract).
693
- */
694
- function migrateGraphDataFromLegacy(db) {
695
- if (!tableExists(db, "graph_files_legacy"))
696
- return;
697
- let migratedFiles = 0;
698
- bestEffort(() => {
699
- db.transaction(() => {
700
- const res = db
701
- .prepare(`INSERT OR IGNORE INTO graph_files
702
- (stash_root, file_path, body_hash, file_order, file_type, confidence, status, reason, extraction_run_id)
703
- SELECT stash_root, file_path, body_hash, file_order, file_type, confidence, status, reason, extraction_run_id
704
- FROM graph_files_legacy
705
- WHERE body_hash IS NOT NULL AND body_hash != ''`)
706
- .run();
707
- migratedFiles = Number(res.changes);
708
- if (tableExists(db, "graph_file_entities_legacy")) {
709
- db.exec(`INSERT OR IGNORE INTO graph_file_entities
710
- (stash_root, file_path, body_hash, entity_order, entity_norm, entity)
711
- SELECT gf.stash_root, gf.file_path, gf.body_hash, e.entity_order, e.entity_norm, e.entity
712
- FROM graph_file_entities_legacy e
713
- JOIN graph_files_legacy gf ON gf.entry_id = e.entry_id
714
- WHERE gf.body_hash IS NOT NULL AND gf.body_hash != ''`);
715
- }
716
- if (tableExists(db, "graph_file_relations_legacy")) {
717
- db.exec(`INSERT OR IGNORE INTO graph_file_relations
718
- (stash_root, file_path, body_hash, relation_order, from_entity_norm, from_entity, to_entity_norm, to_entity, relation_type, confidence)
719
- SELECT gf.stash_root, gf.file_path, gf.body_hash, r.relation_order, r.from_entity_norm, r.from_entity, r.to_entity_norm, r.to_entity, r.relation_type, r.confidence
720
- FROM graph_file_relations_legacy r
721
- JOIN graph_files_legacy gf ON gf.entry_id = r.entry_id
722
- WHERE gf.body_hash IS NOT NULL AND gf.body_hash != ''`);
723
- }
724
- })();
725
- }, "graph data migration is best-effort; legacy tables are dropped regardless below");
726
- // Always drop the legacy tables (children first), migrated or not.
727
- bestEffort(() => {
728
- db.exec("DROP TABLE IF EXISTS graph_file_relations_legacy");
729
- db.exec("DROP TABLE IF EXISTS graph_file_entities_legacy");
730
- db.exec("DROP TABLE IF EXISTS graph_files_legacy");
731
- }, "drop legacy graph tables after migration");
732
- if (migratedFiles > 0) {
733
- warn(`[akm] graph index re-keyed (#624): migrated ${migratedFiles} extracted file(s) to the new schema — no re-extraction needed. Index + embeddings untouched.`);
734
- }
735
- }
736
230
  /**
737
231
  * Phase 5A / Advantage D5: look up the derived-memory child row whose
738
232
  * `derived_from` column matches `parentRef` (e.g. `"memory:claude-prefs"`).
@@ -747,7 +241,7 @@ export function getDerivedForParent(db, parentRef) {
747
241
  return null;
748
242
  try {
749
243
  const row = db
750
- .prepare(`SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text
244
+ .prepare(`SELECT ${ENTRY_COLUMNS}
751
245
  FROM entries
752
246
  WHERE derived_from = ?
753
247
  ORDER BY id DESC
@@ -755,23 +249,7 @@ export function getDerivedForParent(db, parentRef) {
755
249
  .get(parentRef);
756
250
  if (!row)
757
251
  return null;
758
- let entry;
759
- try {
760
- entry = JSON.parse(row.entry_json);
761
- }
762
- catch {
763
- warn(`[db] getDerivedForParent: skipping entry id=${row.id} — corrupt entry_json`);
764
- return null;
765
- }
766
- return {
767
- id: row.id,
768
- entryKey: row.entry_key,
769
- dirPath: row.dir_path,
770
- filePath: row.file_path,
771
- stashDir: row.stash_dir,
772
- entry,
773
- searchText: row.search_text,
774
- };
252
+ return rowToIndexedEntry(row, "getDerivedForParent");
775
253
  }
776
254
  catch {
777
255
  /* `derived_from` column may not exist on legacy DBs that haven't been
@@ -1115,65 +593,38 @@ export function searchFts(db, query, limit, entryType, excludeTypes) {
1115
593
  return [];
1116
594
  return runFtsQuery(db, prefixQuery, limit, entryType, excludeTypes);
1117
595
  }
1118
- /**
1119
- * Build a prefix query from an FTS5 query string by appending `*` to each
1120
- * token that is 3+ characters long. Tokens shorter than 3 characters are
1121
- * kept as-is (no prefix expansion) to avoid overly broad matches.
1122
- *
1123
- * Returns null if no tokens qualify for prefix expansion.
1124
- */
1125
- function buildPrefixQuery(ftsQuery) {
1126
- const tokens = ftsQuery.split(/\s+/).filter(Boolean);
1127
- let hasPrefix = false;
1128
- const prefixTokens = tokens.map((t) => {
1129
- if (t.length >= 3) {
1130
- hasPrefix = true;
1131
- return `${t}*`;
1132
- }
1133
- return t;
1134
- });
1135
- if (!hasPrefix)
1136
- return null;
1137
- return prefixTokens.join(" ");
1138
- }
1139
596
  function runFtsQuery(db, ftsQuery, limit, entryType, excludeTypes) {
1140
- let sql;
1141
- let params;
1142
597
  // #627 — exclude-type clause. Only applies on the untyped ('any') path; an
1143
598
  // explicit include filter (entryType) already narrows to a single type, so
1144
599
  // exclusion is redundant there. An empty list skips the clause entirely
1145
600
  // (never emit `NOT IN ()`, which is a SQL error / always-false).
1146
601
  const excludes = excludeTypes && excludeTypes.length > 0 ? excludeTypes : [];
1147
- // Join on integer entry_id directly (no CAST needed; we store integer)
1148
- // Use bm25() with per-column weights: entry_id(0), name(10), description(5), tags(3), hints(2), content(1)
602
+ // The typed and untyped paths differ ONLY by one WHERE clause (an entry_type
603
+ // equality vs. an optional NOT IN exclusion) and their param order — the
604
+ // SELECT/JOIN/ORDER/LIMIT is shared, so build it once. Join on integer
605
+ // entry_id directly (no CAST; we store integer). bm25() per-column weights:
606
+ // entry_id(0), name(10), description(5), tags(3), hints(2), content(1).
607
+ let filterClause;
608
+ let params;
1149
609
  if (entryType && entryType !== "any") {
1150
- sql = `
1151
- SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
1152
- bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
1153
- FROM entries_fts f
1154
- JOIN entries e ON e.id = f.entry_id
1155
- WHERE entries_fts MATCH ?
1156
- AND e.entry_type = ?
1157
- ORDER BY bm25Score, e.id ASC
1158
- LIMIT ?
1159
- `;
610
+ filterClause = "AND e.entry_type = ?";
1160
611
  params = [ftsQuery, entryType, limit];
1161
612
  }
1162
613
  else {
1163
- const excludeClause = excludes.length > 0 ? `AND e.entry_type NOT IN (${excludes.map(() => "?").join(", ")})` : "";
1164
- sql = `
1165
- SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
1166
- bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
1167
- FROM entries_fts f
1168
- JOIN entries e ON e.id = f.entry_id
1169
- WHERE entries_fts MATCH ?
1170
- ${excludeClause}
1171
- ORDER BY bm25Score, e.id ASC
1172
- LIMIT ?
1173
- `;
614
+ filterClause = excludes.length > 0 ? `AND e.entry_type NOT IN (${excludes.map(() => "?").join(", ")})` : "";
1174
615
  // Param order: MATCH, then the NOT IN values, then LIMIT.
1175
616
  params = [ftsQuery, ...excludes, limit];
1176
617
  }
618
+ const sql = `
619
+ SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
620
+ bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
621
+ FROM entries_fts f
622
+ JOIN entries e ON e.id = f.entry_id
623
+ WHERE entries_fts MATCH ?
624
+ ${filterClause}
625
+ ORDER BY bm25Score, e.id ASC
626
+ LIMIT ?
627
+ `;
1177
628
  try {
1178
629
  const rows = db.prepare(sql).all(...params);
1179
630
  // Guard against corrupt JSON — skip the row rather than crashing
@@ -1201,43 +652,13 @@ function runFtsQuery(db, ftsQuery, limit, entryType, excludeTypes) {
1201
652
  return [];
1202
653
  }
1203
654
  }
1204
- export function sanitizeFtsQuery(query) {
1205
- // Allow only characters safe in FTS5 queries: letters, digits, underscores,
1206
- // and whitespace. Everything else (hyphens, dots, quotes, parens, asterisks,
1207
- // colons, carets, @, !, etc.) is replaced with a space so that compound
1208
- // identifiers like "code-review" or "k8s.setup" become AND-joined tokens
1209
- // ("code review", "k8s setup") rather than triggering FTS5 syntax errors.
1210
- let sanitized = query.replace(/[^a-zA-Z0-9_\s]/g, " ");
1211
- // Neutralize the NEAR operator (FTS5 proximity syntax)
1212
- sanitized = sanitized.replace(/\bNEAR\b/g, " ");
1213
- const tokens = sanitized.split(/\s+/).filter((t) => t.length >= 1);
1214
- if (tokens.length === 0)
1215
- return "";
1216
- // Use implicit AND (space-separated tokens) for precision. FTS5 treats
1217
- // space-separated tokens as an implicit AND, matching only rows that
1218
- // contain ALL terms.
1219
- return tokens.join(" ");
1220
- }
655
+ // ── All entries ─────────────────────────────────────────────────────────────
1221
656
  function parseEntryRows(rows, context) {
1222
657
  const entries = [];
1223
658
  for (const row of rows) {
1224
- let entry;
1225
- try {
1226
- entry = JSON.parse(row.entry_json);
1227
- }
1228
- catch {
1229
- warn(`[db] ${context}: skipping entry id=${row.id} — corrupt entry_json`);
1230
- continue;
1231
- }
1232
- entries.push({
1233
- id: row.id,
1234
- entryKey: row.entry_key,
1235
- dirPath: row.dir_path,
1236
- filePath: row.file_path,
1237
- stashDir: row.stash_dir,
1238
- entry,
1239
- searchText: row.search_text,
1240
- });
659
+ const mapped = rowToIndexedEntry(row, context);
660
+ if (mapped)
661
+ entries.push(mapped);
1241
662
  }
1242
663
  return entries;
1243
664
  }
@@ -1248,16 +669,15 @@ export function getAllEntries(db, entryType, excludeTypes) {
1248
669
  // list skips the clause (never `NOT IN ()`).
1249
670
  const excludes = excludeTypes && excludeTypes.length > 0 ? excludeTypes : [];
1250
671
  if (entryType && entryType !== "any") {
1251
- sql =
1252
- "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type = ?";
672
+ sql = `SELECT ${ENTRY_COLUMNS} FROM entries WHERE entry_type = ?`;
1253
673
  params = [entryType];
1254
674
  }
1255
675
  else if (excludes.length > 0) {
1256
- sql = `SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type NOT IN (${excludes.map(() => "?").join(", ")})`;
676
+ sql = `SELECT ${ENTRY_COLUMNS} FROM entries WHERE entry_type NOT IN (${excludes.map(() => "?").join(", ")})`;
1257
677
  params = [...excludes];
1258
678
  }
1259
679
  else {
1260
- sql = "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries";
680
+ sql = `SELECT ${ENTRY_COLUMNS} FROM entries`;
1261
681
  params = [];
1262
682
  }
1263
683
  const rows = db.prepare(sql).all(...params);
@@ -1350,9 +770,7 @@ export function getEntryById(db, id) {
1350
770
  return { filePath: row.file_path, entry };
1351
771
  }
1352
772
  export function getEntriesByDir(db, dirPath) {
1353
- const rows = db
1354
- .prepare("SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE dir_path = ?")
1355
- .all(dirPath);
773
+ const rows = db.prepare(`SELECT ${ENTRY_COLUMNS} FROM entries WHERE dir_path = ?`).all(dirPath);
1356
774
  return parseEntryRows(rows, "getEntriesByDir");
1357
775
  }
1358
776
  /**
@@ -1829,84 +1247,24 @@ export function getEntryByRef(db, type, name) {
1829
1247
  return db.prepare("SELECT id FROM entries WHERE entry_type = ? AND entry_key = ?").get(type, `${type}:${name}`);
1830
1248
  }
1831
1249
  /**
1832
- * MemRL learning rate for feedback-driven utility updates (F-5 / #386).
1833
- *
1834
- * Follows the bounded-step formula from MemRL (arXiv:2601.03192):
1835
- * next = clamp(current + lr × (reward − current), 0, 1)
1836
- *
1837
- * This replaces the unbounded `-0.03 × negativeCount` delta that could
1838
- * silently remove high-utility assets from the improvement loop.
1839
- */
1840
- const FEEDBACK_LR = 0.1;
1841
- /**
1842
- * Positive reward signal for a single positive feedback event.
1843
- * Reward 1.0 means "fully correct / helpful".
1844
- */
1845
- const FEEDBACK_REWARD_POSITIVE = 1.0;
1846
- /**
1847
- * Negative reward signal for a single negative feedback event.
1848
- * Reward 0.0 means "not helpful" (lowest MemRL signal).
1849
- */
1850
- const FEEDBACK_REWARD_NEGATIVE = 0.0;
1851
- /**
1852
- * Maximum total negative utility delta allowed in a single
1853
- * `applyFeedbackToUtilityScore` call regardless of negativeCount.
1854
- *
1855
- * This caps the per-day negative impact (the function is called once per
1856
- * feedback event — spamming 10 negatives in one session can move utility
1857
- * at most `MAX_NEG_DELTA_PER_CALL`). The cap prevents a noisy negative-
1858
- * feedback stream from silently destroying a high-utility asset's ranking.
1859
- */
1860
- const MAX_NEG_DELTA_PER_CALL = 0.15;
1861
- /**
1862
- * Utility threshold below which a review-needed escalation is triggered.
1863
- * When a previously high-utility asset (≥ HIGH_UTILITY_THRESHOLD) drops
1864
- * below this value, the caller should create an escalation proposal.
1865
- */
1866
- export const UTILITY_REVIEW_THRESHOLD = 0.5;
1867
- /**
1868
- * Utility level considered "high" — assets above this are tracked for
1869
- * threshold-crossing escalation.
1870
- */
1871
- export const HIGH_UTILITY_THRESHOLD = 0.5;
1872
- /**
1873
- * Apply accumulated feedback counts to the utility score of an entry using the
1874
- * MemRL bounded-step EMA formula (F-5 / #386, arXiv:2601.03192).
1875
- *
1876
- * Replaces the previous unbounded `-0.03 × negativeCount` formula with:
1877
- *
1878
- * reward = weighted average of positive and negative signals
1879
- * nextUtil = clamp(currentUtil + lr × (reward − currentUtil), 0, 1)
1880
- *
1881
- * The negative impact is additionally capped at {@link MAX_NEG_DELTA_PER_CALL}
1882
- * to prevent a noisy feedback stream from silently erasing a high-utility asset.
1250
+ * Apply accumulated feedback counts to the utility score of an entry, persisting
1251
+ * the result. The bounded-step EMA policy itself (MemRL, F-5 / #386,
1252
+ * arXiv:2601.03192) lives in {@link computeNextUtility} (feedback/utility-policy);
1253
+ * this function only reads the current utility, applies the policy, and writes
1254
+ * the new value.
1883
1255
  *
1884
1256
  * A new entry starts at 0.5 (neutral midpoint) before the EMA step is applied.
1885
- *
1886
- * Returns a {@link FeedbackUtilityResult} so the caller can detect when a
1887
- * previously high-utility asset crosses below the review threshold and create
1888
- * an escalation proposal.
1257
+ * When there is no feedback (both counts zero) the score is left untouched — no
1258
+ * DB write. Returns a {@link FeedbackUtilityResult} so the caller can detect a
1259
+ * previously high-utility asset crossing below the review threshold and escalate.
1889
1260
  */
1890
1261
  export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negativeCount) {
1891
1262
  const existing = getUtilityScore(db, entryId);
1892
1263
  const previousUtility = existing?.utility ?? 0.5;
1264
+ const result = computeNextUtility(previousUtility, positiveCount, negativeCount);
1893
1265
  if (positiveCount === 0 && negativeCount === 0) {
1894
- return { previousUtility, nextUtility: previousUtility, crossedReviewThreshold: false };
1895
- }
1896
- const total = positiveCount + negativeCount;
1897
- // Weighted reward: proportion of positive signals.
1898
- const reward = positiveCount > 0 && negativeCount === 0
1899
- ? FEEDBACK_REWARD_POSITIVE
1900
- : negativeCount > 0 && positiveCount === 0
1901
- ? FEEDBACK_REWARD_NEGATIVE
1902
- : (positiveCount * FEEDBACK_REWARD_POSITIVE + negativeCount * FEEDBACK_REWARD_NEGATIVE) / total;
1903
- // MemRL bounded-step EMA: lr × (reward − current)
1904
- let delta = FEEDBACK_LR * (reward - previousUtility);
1905
- // Per-call negative cap: if delta is negative (net negative feedback), cap it.
1906
- if (delta < 0) {
1907
- delta = Math.max(delta, -MAX_NEG_DELTA_PER_CALL);
1266
+ return result;
1908
1267
  }
1909
- const nextUtility = Math.max(0, Math.min(1, previousUtility + delta));
1910
1268
  const now = new Date().toISOString();
1911
1269
  db.prepare(`
1912
1270
  INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
@@ -1914,16 +1272,14 @@ export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negative
1914
1272
  ON CONFLICT(entry_id) DO UPDATE SET
1915
1273
  utility = ?,
1916
1274
  updated_at = ?
1917
- `).run(entryId, nextUtility, now, now, nextUtility, now);
1918
- const crossedReviewThreshold = previousUtility >= HIGH_UTILITY_THRESHOLD && nextUtility < UTILITY_REVIEW_THRESHOLD;
1919
- return { previousUtility, nextUtility, crossedReviewThreshold };
1275
+ `).run(entryId, result.nextUtility, now, now, result.nextUtility, now);
1276
+ return result;
1920
1277
  }
1921
1278
  /**
1922
1279
  * Re-link detached usage_events to their current entry_ids via entry_ref.
1923
1280
  *
1924
- * After a full rebuild, entry IDs change. This query matches events to their
1925
- * new entry rows using the stable `entry_ref` ("type:name") column so usage
1926
- * history survives a full reindex.
1281
+ * After a full rebuild, entry IDs change. This restores each event's link
1282
+ * using the stable `entry_ref` column so usage history survives a reindex.
1927
1283
  */
1928
1284
  export function relinkUsageEvents(db) {
1929
1285
  bestEffort(() => {
@@ -1940,17 +1296,34 @@ export function relinkUsageEvents(db) {
1940
1296
  WHERE entry_id IS NOT NULL
1941
1297
  AND entry_id NOT IN (SELECT id FROM entries)
1942
1298
  `);
1943
- // Step 2: re-resolve any null entry_id from entry_ref against the
1944
- // current entries table. Picks up entries that were re-created with
1945
- // the same ref (e.g. an asset moved between sources).
1946
- db.exec(`
1947
- UPDATE usage_events SET entry_id = (
1948
- SELECT e.id FROM entries e
1949
- WHERE substr(e.entry_key, length(e.entry_key) - length(usage_events.entry_ref)) = ':' || usage_events.entry_ref
1950
- LIMIT 1
1951
- )
1952
- WHERE entry_id IS NULL AND entry_ref IS NOT NULL
1953
- `);
1299
+ // Step 2: re-resolve any null entry_id from entry_ref against the current
1300
+ // entries table, reusing the SAME canonical resolver the read path uses at
1301
+ // insert time (`findEntryIdByRef` `parseAssetRef`). Resolving per DISTINCT
1302
+ // ref keeps this O(distinct-refs) indexed lookups instead of the previous
1303
+ // O(events × entries) non-indexable `substr(entry_key, …)` scan. It also
1304
+ // fixes a silent correctness bug: the old suffix match compared the RAW
1305
+ // `entry_ref`, so origin-qualified refs ("source//type:name") never matched
1306
+ // an `entry_key` and lost their usage history on every full rebuild.
1307
+ const refs = db
1308
+ .prepare("SELECT DISTINCT entry_ref AS ref FROM usage_events WHERE entry_id IS NULL AND entry_ref IS NOT NULL")
1309
+ .all();
1310
+ const update = db.prepare("UPDATE usage_events SET entry_id = ? WHERE entry_ref = ? AND entry_id IS NULL");
1311
+ const relinkTx = db.transaction(() => {
1312
+ for (const { ref } of refs) {
1313
+ let id;
1314
+ try {
1315
+ id = findEntryIdByRef(db, ref);
1316
+ }
1317
+ catch (err) {
1318
+ if (err instanceof Error && err.name === "UsageError")
1319
+ continue;
1320
+ throw err;
1321
+ }
1322
+ if (id !== undefined)
1323
+ update.run(id, ref);
1324
+ }
1325
+ });
1326
+ relinkTx();
1954
1327
  }, "usage_events table may not exist yet during entry_id re-resolution");
1955
1328
  }
1956
1329
  // ── registry_index_cache helpers ─────────────────────────────────────────────