npm - akm-cli - Versions diffs - 0.7.5 → 0.8.0-rc.6 - Mend

akm-cli 0.7.5 → 0.8.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (236) hide show

package/{.github/CHANGELOG.md → CHANGELOG.md} +113 -2
package/README.md +20 -4
package/SECURITY.md +93 -0
package/dist/cli/config-migrate.js +144 -0
package/dist/cli/config-validate.js +39 -0
package/dist/cli/confirm.js +73 -0
package/dist/cli/parse-args.js +133 -0
package/dist/cli.js +1995 -551
package/dist/commands/agent-dispatch.js +110 -0
package/dist/commands/agent-support.js +68 -0
package/dist/commands/completions.js +3 -0
package/dist/commands/config-cli.js +130 -534
package/dist/commands/consolidate.js +1531 -0
package/dist/commands/curate.js +44 -3
package/dist/commands/db-cli.js +23 -0
package/dist/commands/distill-promotion-policy.js +660 -0
package/dist/commands/distill.js +990 -75
package/dist/commands/eval-cases.js +43 -0
package/dist/commands/events.js +5 -23
package/dist/commands/graph.js +477 -0
package/dist/commands/health.js +400 -0
package/dist/commands/help/help-accept.md +9 -0
package/dist/commands/help/help-improve.md +77 -0
package/dist/commands/help/help-proposals.md +15 -0
package/dist/commands/help/help-propose.md +17 -0
package/dist/commands/help/help-reject.md +8 -0
package/dist/commands/history.js +54 -46
package/dist/commands/improve-profiles.js +146 -0
package/dist/commands/improve-result-file.js +103 -0
package/dist/commands/improve.js +2175 -0
package/dist/commands/info.js +5 -2
package/dist/commands/init.js +50 -2
package/dist/commands/installed-stashes.js +102 -139
package/dist/commands/knowledge.js +136 -0
package/dist/commands/lint/agent-linter.js +49 -0
package/dist/commands/lint/base-linter.js +479 -0
package/dist/commands/lint/command-linter.js +49 -0
package/dist/commands/lint/default-linter.js +16 -0
package/dist/commands/lint/index.js +183 -0
package/dist/commands/lint/knowledge-linter.js +16 -0
package/dist/commands/lint/markdown-insertion.js +343 -0
package/dist/commands/lint/memory-linter.js +61 -0
package/dist/commands/lint/registry.js +36 -0
package/dist/commands/lint/skill-linter.js +45 -0
package/dist/commands/lint/task-linter.js +50 -0
package/dist/commands/lint/types.js +4 -0
package/dist/commands/lint/vault-key-rules.js +139 -0
package/dist/commands/lint/workflow-linter.js +56 -0
package/dist/commands/lint.js +4 -0
package/dist/commands/migration-help.js +5 -2
package/dist/commands/proposal.js +66 -12
package/dist/commands/propose.js +86 -31
package/dist/commands/reflect.js +1119 -73
package/dist/commands/registry-search.js +5 -2
package/dist/commands/remember.js +69 -6
package/dist/commands/schema-repair.js +203 -0
package/dist/commands/search.js +115 -14
package/dist/commands/self-update.js +3 -0
package/dist/commands/show.js +144 -25
package/dist/commands/source-add.js +17 -45
package/dist/commands/source-clone.js +3 -0
package/dist/commands/source-manage.js +14 -19
package/dist/commands/tasks.js +438 -0
package/dist/commands/url-checker.js +42 -0
package/dist/commands/vault.js +130 -77
package/dist/core/action-contributors.js +28 -0
package/dist/core/asset-ref.js +7 -0
package/dist/core/asset-registry.js +7 -16
package/dist/core/asset-serialize.js +88 -0
package/dist/core/asset-spec.js +22 -0
package/dist/core/common.js +157 -0
package/dist/core/concurrent.js +25 -0
package/dist/core/config-io.js +347 -0
package/dist/core/config-migration.js +625 -0
package/dist/core/config-schema.js +501 -0
package/dist/core/config-sources.js +108 -0
package/dist/core/config-types.js +4 -0
package/dist/core/config-walker.js +337 -0
package/dist/core/config.js +327 -987
package/dist/core/errors.js +40 -19
package/dist/core/events.js +91 -138
package/dist/core/file-lock.js +104 -0
package/dist/core/frontmatter.js +3 -6
package/dist/core/lesson-lint.js +3 -0
package/dist/core/markdown.js +20 -0
package/dist/core/memory-belief.js +62 -0
package/dist/core/memory-contradiction-detect.js +274 -0
package/dist/core/memory-improve.js +806 -0
package/dist/core/parse.js +158 -0
package/dist/core/paths.js +326 -14
package/dist/core/proposal-quality-validators.js +364 -0
package/dist/core/proposal-validators.js +69 -0
package/dist/core/proposals.js +498 -42
package/dist/core/state-db.js +927 -0
package/dist/core/text-truncation.js +107 -0
package/dist/core/time.js +54 -0
package/dist/core/warn.js +62 -1
package/dist/core/write-source.js +3 -0
package/dist/indexer/db-backup.js +391 -0
package/dist/indexer/db-search.js +152 -253
package/dist/indexer/db.js +933 -103
package/dist/indexer/ensure-index.js +64 -0
package/dist/indexer/file-context.js +3 -0
package/dist/indexer/graph-boost.js +376 -101
package/dist/indexer/graph-db.js +391 -0
package/dist/indexer/graph-dedup.js +95 -0
package/dist/indexer/graph-extraction.js +550 -124
package/dist/indexer/index-context.js +4 -0
package/dist/indexer/indexer.js +506 -291
package/dist/indexer/llm-cache.js +47 -0
package/dist/indexer/manifest.js +3 -0
package/dist/indexer/matchers.js +148 -160
package/dist/indexer/memory-inference.js +99 -74
package/dist/indexer/metadata-contributors.js +29 -0
package/dist/indexer/metadata.js +255 -196
package/dist/indexer/path-resolver.js +92 -0
package/dist/indexer/project-context.js +192 -0
package/dist/indexer/ranking-contributors.js +331 -0
package/dist/indexer/ranking.js +81 -0
package/dist/indexer/search-fields.js +5 -9
package/dist/indexer/search-hit-enrichers.js +111 -0
package/dist/indexer/search-source.js +44 -10
package/dist/indexer/semantic-status.js +5 -16
package/dist/indexer/staleness-detect.js +447 -0
package/dist/indexer/usage-events.js +12 -9
package/dist/indexer/walker.js +28 -0
package/dist/integrations/agent/builders.js +135 -0
package/dist/integrations/agent/config.js +122 -230
package/dist/integrations/agent/detect.js +3 -0
package/dist/integrations/agent/index.js +7 -13
package/dist/integrations/agent/model-aliases.js +55 -0
package/dist/integrations/agent/profiles.js +70 -5
package/dist/integrations/agent/prompts.js +150 -74
package/dist/integrations/agent/runner.js +151 -0
package/dist/integrations/agent/sdk-runner.js +126 -0
package/dist/integrations/agent/spawn.js +118 -23
package/dist/integrations/github.js +3 -0
package/dist/integrations/lockfile.js +32 -69
package/dist/integrations/session-logs/index.js +68 -0
package/dist/integrations/session-logs/providers/claude-code.js +59 -0
package/dist/integrations/session-logs/providers/opencode.js +55 -0
package/dist/integrations/session-logs/types.js +4 -0
package/dist/llm/call-ai.js +62 -0
package/dist/llm/client.js +72 -124
package/dist/llm/embedder.js +3 -19
package/dist/llm/embedders/cache.js +3 -7
package/dist/llm/embedders/local.js +3 -0
package/dist/llm/embedders/remote.js +20 -8
package/dist/llm/embedders/types.js +3 -7
package/dist/llm/feature-gate.js +89 -48
package/dist/llm/graph-extract.js +676 -70
package/dist/llm/index-passes.js +9 -23
package/dist/llm/memory-infer.js +52 -71
package/dist/llm/metadata-enhance.js +42 -29
package/dist/llm/prompts/graph-extract-user-prompt.md +35 -0
package/dist/output/cli-hints-full.md +281 -0
package/dist/output/cli-hints-short.md +65 -0
package/dist/output/cli-hints.js +5 -318
package/dist/output/context.js +3 -0
package/dist/output/renderers.js +223 -256
package/dist/output/shapes.js +150 -105
package/dist/output/text.js +318 -30
package/dist/registry/build-index.js +3 -0
package/dist/registry/create-provider-registry.js +3 -0
package/dist/registry/factory.js +3 -0
package/dist/registry/origin-resolve.js +3 -0
package/dist/registry/providers/index.js +3 -0
package/dist/registry/providers/skills-sh.js +70 -49
package/dist/registry/providers/static-index.js +53 -48
package/dist/registry/providers/types.js +3 -24
package/dist/registry/resolve.js +11 -16
package/dist/registry/types.js +3 -0
package/dist/scripts/migrate-storage.js +17307 -0
package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +8900 -0
package/dist/scripts/migrations/v16-to-v17.js +141 -0
package/dist/setup/detect.js +3 -0
package/dist/setup/ripgrep-install.js +3 -0
package/dist/setup/ripgrep-resolve.js +3 -0
package/dist/setup/setup.js +775 -37
package/dist/setup/steps.js +3 -15
package/dist/sources/include.js +3 -0
package/dist/sources/provider-factory.js +5 -12
package/dist/sources/provider.js +3 -20
package/dist/sources/providers/filesystem.js +19 -23
package/dist/sources/providers/git.js +7 -5
package/dist/sources/providers/index.js +3 -0
package/dist/sources/providers/install-types.js +3 -13
package/dist/sources/providers/npm.js +3 -4
package/dist/sources/providers/provider-utils.js +3 -0
package/dist/sources/providers/sync-from-ref.js +3 -11
package/dist/sources/providers/tar-utils.js +3 -0
package/dist/sources/providers/website.js +18 -22
package/dist/sources/resolve.js +3 -0
package/dist/sources/types.js +3 -0
package/dist/sources/website-ingest.js +7 -0
package/dist/tasks/backends/cron.js +203 -0
package/dist/tasks/backends/exec-utils.js +28 -0
package/dist/tasks/backends/index.js +24 -0
package/dist/tasks/backends/launchd-template.xml +19 -0
package/dist/tasks/backends/launchd.js +187 -0
package/dist/tasks/backends/schtasks-template.xml +29 -0
package/dist/tasks/backends/schtasks.js +215 -0
package/dist/tasks/parser.js +211 -0
package/dist/tasks/resolveAkmBin.js +87 -0
package/dist/tasks/runner.js +458 -0
package/dist/tasks/schedule.js +211 -0
package/dist/tasks/schema.js +15 -0
package/dist/tasks/validator.js +62 -0
package/dist/version.js +3 -0
package/dist/wiki/index-template.md +12 -0
package/dist/wiki/ingest-workflow-template.md +54 -0
package/dist/wiki/log-template.md +8 -0
package/dist/wiki/schema-template.md +61 -0
package/dist/wiki/wiki-templates.js +15 -0
package/dist/wiki/wiki.js +13 -61
package/dist/workflows/authoring.js +8 -25
package/dist/workflows/cli.js +3 -0
package/dist/workflows/db.js +140 -10
package/dist/workflows/document-cache.js +3 -10
package/dist/workflows/parser.js +3 -0
package/dist/workflows/renderer.js +11 -3
package/dist/workflows/runs.js +62 -91
package/dist/workflows/schema.js +3 -0
package/dist/workflows/scope-key.js +3 -0
package/dist/workflows/validator.js +4 -8
package/dist/workflows/workflow-template.md +24 -0
package/docs/README.md +9 -2
package/docs/data-and-telemetry.md +225 -0
package/docs/migration/release-notes/0.7.0.md +1 -1
package/docs/migration/release-notes/0.7.5.md +2 -2
package/docs/migration/release-notes/0.8.0.md +48 -0
package/docs/migration/v0.7-to-v0.8.md +1307 -0
package/package.json +20 -8
package/.github/LICENSE +0 -374
package/dist/commands/install-audit.js +0 -381
package/dist/templates/wiki-templates.js +0 -100

package/dist/indexer/db.js CHANGED Viewed

@@ -1,16 +1,22 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
 import { Database } from "bun:sqlite";
 import fs from "node:fs";
 import { createRequire } from "node:module";
 import path from "node:path";
 import { parseAssetRef } from "../core/asset-ref";
 import { getDbPath } from "../core/paths";
+import { REGISTRY_INDEX_CACHE_DDL } from "../core/state-db";
 import { warn } from "../core/warn";
 import { cosineSimilarity } from "../llm/embedders/types";
+import { backupDataDir, EMBEDDING_DIM_CHANGE_REASON } from "./db-backup";
 import { buildSearchFields } from "./search-fields";
 import { ensureUsageEventsSchema } from "./usage-events";
 // ── Constants ───────────────────────────────────────────────────────────────
-export const DB_VERSION = 10;
+export const DB_VERSION = 17;
 export const EMBEDDING_DIM = 384;
+export const GRAPH_SCHEMA_VERSION = 3;
 // ── Database lifecycle ──────────────────────────────────────────────────────
 export function openDatabase(dbPath, options) {
     const resolvedPath = dbPath ?? getDbPath();
@@ -24,11 +30,39 @@ export function openDatabase(dbPath, options) {
     db.exec("PRAGMA foreign_keys = ON");
     // Try to load sqlite-vec extension
     loadVecExtension(db);
-    ensureSchema(db, options?.embeddingDim ?? EMBEDDING_DIM);
+    // Dim resolution: explicit option wins; otherwise consult the on-disk
+    // config so unparameterised opens (registry providers, graph helpers,
+    // ad-hoc CLI subcommands) honour the operator-declared dimension. Only if
+    // both are absent do we fall through to the no-clobber path, which keeps
+    // ensureSchema from touching `index_meta.embeddingDim` at all.
+    const resolvedDim = options?.embeddingDim ?? resolveConfiguredEmbeddingDim();
+    ensureSchema(db, resolvedDim, { dataDir: dir });
     // Warn once at init if using JS fallback with many entries
     warnIfVecMissing(db, { once: true });
     return db;
 }
+/**
+ * Read the operator-configured embedding dimension from the on-disk config.
+ * Returns `undefined` when no config file is present, when the config has
+ * no `embedding.dimension` set, or when reading the config throws (e.g.
+ * inside isolated test fixtures with no XDG home). Failure is silent on
+ * purpose — every openDatabase() call would otherwise have to handle a
+ * config-not-found error path, and the fallback (no-clobber semantics) is
+ * already correct.
+ */
+function resolveConfiguredEmbeddingDim() {
+    try {
+        const { loadConfig } = require("../core/config");
+        const dim = loadConfig().embedding?.dimension;
+        if (typeof dim === "number" && Number.isInteger(dim) && dim > 0 && dim <= 4096) {
+            return dim;
+        }
+        return undefined;
+    }
+    catch {
+        return undefined;
+    }
+}
 export function openExistingDatabase(dbPath) {
     const resolvedPath = dbPath ?? getDbPath();
     const db = new Database(resolvedPath);
@@ -86,7 +120,7 @@ export function warnIfVecMissing(db, { once } = { once: false }) {
         /* embeddings table may not exist yet during init */
     }
 }
-function ensureSchema(db, embeddingDim) {
+function ensureSchema(db, embeddingDim, options) {
     // Create meta table first so we can check version
     db.exec(`
     CREATE TABLE IF NOT EXISTS index_meta (
@@ -94,6 +128,39 @@ function ensureSchema(db, embeddingDim) {
       value TEXT NOT NULL
     );
   `);
+    // MVP DB-backup hook (0.8.x): when the stored DB version differs from the
+    // running binary's DB_VERSION, snapshot the data directory BEFORE
+    // `handleVersionUpgrade()` drops tables. This is best-effort —
+    // `backupDataDir` returns null on opt-out, missing data dir, low free
+    // space, or copy errors, and we proceed with the upgrade in all cases.
+    // The proper migration framework lands in 0.9.0; until then this lets
+    // operators recover with `scripts/migrations/restore-data-dir.sh`.
+    if (options?.dataDir) {
+        const storedVersionRaw = getMeta(db, "version");
+        const storedVersion = storedVersionRaw !== undefined && storedVersionRaw !== "" ? Number.parseInt(storedVersionRaw, 10) : null;
+        const willUpgrade = storedVersionRaw !== undefined && storedVersionRaw !== "" && storedVersionRaw !== String(DB_VERSION);
+        if (willUpgrade) {
+            try {
+                // Pass env explicitly so tests can override AKM_DB_BACKUP / AKM_DB_BACKUP_RETAIN
+                // without mutating process.env. Production callers default to process.env.
+                const result = backupDataDir({
+                    dataDir: options.dataDir,
+                    sourceVersion: storedVersion !== null && !Number.isNaN(storedVersion) ? storedVersion : null,
+                    targetVersion: DB_VERSION,
+                    env: process.env,
+                });
+                if (result) {
+                    warn("[akm] data directory backed up to %s before v%s→v%d upgrade", result.path, storedVersionRaw, DB_VERSION);
+                }
+            }
+            catch (err) {
+                // Defensive — backupDataDir already swallows most errors, but if it
+                // throws for an unexpected reason we must still proceed with the
+                // upgrade so the user isn't locked out of their binary.
+                warn("[akm] pre-upgrade data dir backup raised an unexpected error — %s; upgrade will proceed without a snapshot", err instanceof Error ? err.message : String(err));
+            }
+        }
+    }
     // Check stored version — if it differs from DB_VERSION, drop and recreate all tables.
     // Usage events are preserved across version upgrades so that utility score
     // history is not silently lost. The backup is captured here and threaded
@@ -110,12 +177,24 @@ function ensureSchema(db, embeddingDim) {
       stash_dir   TEXT NOT NULL,
       entry_json  TEXT NOT NULL,
       search_text TEXT NOT NULL,
-      entry_type  TEXT NOT NULL
+      entry_type  TEXT NOT NULL,
+      derived_from TEXT
     );
     CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
     CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
+    CREATE INDEX IF NOT EXISTS idx_entries_file_path ON entries(file_path);
   `);
+    // Phase 5A / DB v17: backfill `derived_from` column + index on databases
+    // that were created at v17 fresh OR carry a partial v17 schema (a DB whose
+    // `index_meta.version` was bumped to 17 but whose `entries` table still
+    // lacks the column — this happens when a previous v17 binary opened a
+    // pre-v17 DB without taking the upgrade path because no version mismatch
+    // was seen at boot). The PRAGMA-then-ALTER guard runs unconditionally so
+    // both fresh and partial schemas converge. The CREATE INDEX for
+    // `derived_from` MUST run after this helper so we never reference a
+    // column that has not yet been added on partial schemas.
+    ensureDerivedFromColumn(db);
     // Validated WorkflowDocument JSON, one row per indexed workflow entry.
     // Pure index data — fully rebuilt on each `akm index`. ON DELETE CASCADE
     // means clearing entries (full rebuild or per-dir delete) drops these too.
@@ -174,6 +253,20 @@ function ensureSchema(db, embeddingDim) {
       updated_at   TEXT NOT NULL DEFAULT (datetime('now')),
       FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
     );
+  `);
+    // Per-project scoped utility scores — tracks usage per (entry, cwd-anchor)
+    // so assets useful in project A don't pollute rankings in project B.
+    // The global utility_scores table is preserved as a fallback / cold-start aid.
+    db.exec(`
+    CREATE TABLE IF NOT EXISTS utility_scores_scoped (
+      entry_id     INTEGER NOT NULL,
+      scope_key    TEXT NOT NULL,
+      utility      REAL NOT NULL DEFAULT 0,
+      last_used_at INTEGER NOT NULL,
+      PRIMARY KEY (entry_id, scope_key)
+    );
+    CREATE INDEX IF NOT EXISTS idx_utility_scores_scoped_entry_id
+      ON utility_scores_scoped(entry_id);
   `);
     db.exec(`
     CREATE TABLE IF NOT EXISTS index_dir_state (
@@ -183,6 +276,97 @@ function ensureSchema(db, embeddingDim) {
       reason            TEXT NOT NULL,
       updated_at        TEXT NOT NULL
     );
+  `);
+    // LLM enrichment result cache. Stores a SHA-256 body hash and the JSON
+    // result for each asset so that subsequent `akm index --enrich` runs can
+    // skip the LLM call when the body hasn't changed. The cache is keyed by
+    // a stable asset_ref string (e.g. the absolute file path for graph/memory
+    // passes, or `entryKey:passId` for the metadata-enhance pass).
+    // Entries are cleaned up when assets are removed or --re-enrich is used.
+    db.exec(`
+    CREATE TABLE IF NOT EXISTS llm_enrichment_cache (
+      asset_ref     TEXT NOT NULL,
+      cache_variant TEXT NOT NULL,
+      body_hash     TEXT NOT NULL,
+      result_json   TEXT NOT NULL,
+      updated_at    INTEGER NOT NULL,
+      PRIMARY KEY (asset_ref, cache_variant)
+    );
+     CREATE INDEX IF NOT EXISTS idx_llm_cache_updated
+       ON llm_enrichment_cache(updated_at);
+  `);
+    // Graph extraction tables — schema v2 (entry_id PK).
+    //
+    // graph_files is keyed on entries.id so child tables cascade-delete cleanly
+    // when an entry is removed, and so JOINs from graph rows to entries are a
+    // direct PK lookup. (stash_root, file_path) is retained as UNIQUE so the
+    // extractor's path-based upsert still works.
+    //
+    // graph_file_entities and graph_file_relations no longer duplicate file_path;
+    // they reference entry_id and inherit stash scoping via graph_files.
+    db.exec(`
+    CREATE TABLE IF NOT EXISTS graph_meta (
+      stash_root          TEXT PRIMARY KEY,
+      schema_version      INTEGER NOT NULL,
+      generated_at        TEXT NOT NULL,
+      considered_files    INTEGER NOT NULL DEFAULT 0,
+      extracted_files     INTEGER NOT NULL DEFAULT 0,
+      entity_count        INTEGER NOT NULL DEFAULT 0,
+      relation_count      INTEGER NOT NULL DEFAULT 0,
+      extraction_coverage REAL NOT NULL DEFAULT 0,
+      density             REAL NOT NULL DEFAULT 0,
+      extractor_id        TEXT,
+      extraction_run_id   TEXT,
+      model               TEXT,
+      prompt_version      TEXT,
+      batch_size          INTEGER,
+      cache_hits          INTEGER NOT NULL DEFAULT 0,
+      cache_misses        INTEGER NOT NULL DEFAULT 0,
+      truncation_count    INTEGER NOT NULL DEFAULT 0,
+      failure_count       INTEGER NOT NULL DEFAULT 0
+    );
+    CREATE TABLE IF NOT EXISTS graph_files (
+      entry_id          INTEGER PRIMARY KEY REFERENCES entries(id) ON DELETE CASCADE,
+      stash_root        TEXT NOT NULL,
+      file_path         TEXT NOT NULL,
+      file_order        INTEGER NOT NULL,
+      file_type         TEXT NOT NULL,
+      body_hash         TEXT NOT NULL,
+      confidence        REAL,
+      status            TEXT NOT NULL DEFAULT 'extracted',
+      reason            TEXT,
+      extraction_run_id TEXT,
+      UNIQUE(stash_root, file_path)
+    );
+    CREATE INDEX IF NOT EXISTS idx_graph_files_stash_order
+      ON graph_files(stash_root, file_order);
+    CREATE TABLE IF NOT EXISTS graph_file_entities (
+      entry_id     INTEGER NOT NULL REFERENCES graph_files(entry_id) ON DELETE CASCADE,
+      entity_order INTEGER NOT NULL,
+      stash_root   TEXT NOT NULL,
+      entity_norm  TEXT NOT NULL,
+      entity       TEXT NOT NULL,
+      PRIMARY KEY (entry_id, entity_order)
+    );
+    CREATE INDEX IF NOT EXISTS idx_graph_file_entities_entity_norm
+      ON graph_file_entities(stash_root, entity_norm);
+    CREATE TABLE IF NOT EXISTS graph_file_relations (
+      entry_id       INTEGER NOT NULL REFERENCES graph_files(entry_id) ON DELETE CASCADE,
+      relation_order INTEGER NOT NULL,
+      from_entity_norm TEXT NOT NULL,
+      from_entity    TEXT NOT NULL,
+      to_entity_norm TEXT NOT NULL,
+      to_entity      TEXT NOT NULL,
+      relation_type  TEXT,
+      confidence     REAL,
+      PRIMARY KEY (entry_id, relation_order)
+    );
   `);
     // FTS-dirty queue. Created here (not lazily on first upsert) so the
     // per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
@@ -194,59 +378,89 @@ function ensureSchema(db, embeddingDim) {
     );
   `);
     // sqlite-vec table
+    //
+    // Dimension contract:
+    //   - When `embeddingDim` is `undefined`, the caller did NOT request a
+    //     specific dim. Do not touch `index_meta.embeddingDim` and do not run
+    //     the dim-change wipe — fall back to the stored dim (or the static
+    //     default) only when we have to materialise the vec table for the
+    //     first time. Without this guard, registry-side and other dim-unaware
+    //     `openDatabase()` callers would silently overwrite the dim-aware
+    //     improve/index value and oscillate the stored dim.
+    //   - When `embeddingDim` is a number, the caller explicitly asked for
+    //     that dim and owns the dim-change/backup/wipe semantics.
+    const dimExplicit = embeddingDim !== undefined;
+    const effectiveDim = embeddingDim ?? (Number(getMeta(db, "embeddingDim")) || EMBEDDING_DIM);
     if (isVecAvailable(db)) {
         // Check if stored embedding dimension differs from configured one
-        const storedDim = getMeta(db, "embeddingDim");
-        if (storedDim && storedDim !== String(embeddingDim)) {
-            try {
-                db.exec("DROP TABLE IF EXISTS entries_vec");
-            }
-            catch {
-                /* ignore */
-            }
-            // Delete stale BLOB embeddings so they don't produce silently wrong
-            // similarity scores against the new-dimension vec table.
-            try {
-                db.exec("DELETE FROM embeddings");
-            }
-            catch {
-                /* ignore */
+        if (dimExplicit) {
+            const storedDim = getMeta(db, "embeddingDim");
+            if (storedDim && storedDim !== String(embeddingDim)) {
+                // Re-embedding the whole stash is expensive (LLM API calls + cache
+                // misses), so snapshot the data dir before we drop the vec table and
+                // wipe `embeddings`. This is the SAME hook the version-upgrade path
+                // uses earlier in this function, just gated on embedding-dim mismatch
+                // and tagged so operators can tell the two backup kinds apart.
+                backupBeforeEmbeddingDimChange(options?.dataDir, storedDim, String(embeddingDim));
+                try {
+                    db.exec("DROP TABLE IF EXISTS entries_vec");
+                }
+                catch {
+                    /* ignore */
+                }
+                // Delete stale BLOB embeddings so they don't produce silently wrong
+                // similarity scores against the new-dimension vec table.
+                try {
+                    db.exec("DELETE FROM embeddings");
+                }
+                catch {
+                    /* ignore */
+                }
+                setMeta(db, "hasEmbeddings", "0");
             }
-            setMeta(db, "hasEmbeddings", "0");
         }
         const vecExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_vec'").get();
         if (!vecExists) {
-            if (!Number.isInteger(embeddingDim) || embeddingDim <= 0 || embeddingDim > 4096) {
-                throw new Error(`Invalid embedding dimension: ${embeddingDim}`);
+            if (!Number.isInteger(effectiveDim) || effectiveDim <= 0 || effectiveDim > 4096) {
+                throw new Error(`Invalid embedding dimension: ${effectiveDim}`);
             }
             db.exec(`
         CREATE VIRTUAL TABLE entries_vec USING vec0(
           id       INTEGER PRIMARY KEY,
-          embedding FLOAT[${embeddingDim}]
+          embedding FLOAT[${effectiveDim}]
         );
       `);
         }
-        setMeta(db, "embeddingDim", String(embeddingDim));
+        if (dimExplicit) {
+            setMeta(db, "embeddingDim", String(embeddingDim));
+        }
     }
     else {
         // Also purge BLOB embeddings on dimension change (JS fallback path).
         // When sqlite-vec is unavailable, entries_vec doesn't exist but the BLOB
         // embeddings table still stores vectors. If the configured dimension
         // changes, those stored BLOBs become silently incompatible.
-        const storedDim = getMeta(db, "embeddingDim");
-        if (storedDim && storedDim !== String(embeddingDim)) {
-            try {
-                db.exec("DELETE FROM embeddings");
-            }
-            catch {
-                /* ignore */
+        if (dimExplicit) {
+            const storedDim = getMeta(db, "embeddingDim");
+            if (storedDim && storedDim !== String(embeddingDim)) {
+                backupBeforeEmbeddingDimChange(options?.dataDir, storedDim, String(embeddingDim));
+                try {
+                    db.exec("DELETE FROM embeddings");
+                }
+                catch {
+                    /* ignore */
+                }
+                setMeta(db, "hasEmbeddings", "0");
             }
-            setMeta(db, "hasEmbeddings", "0");
+            setMeta(db, "embeddingDim", String(embeddingDim));
         }
-        setMeta(db, "embeddingDim", String(embeddingDim));
     }
     // Usage telemetry table
     ensureUsageEventsSchema(db);
+    // Registry index cache table — caches remote registry index documents so
+    // `akm search` does not hit the network on every invocation. The DDL is
+    // defined in state-db.ts and shared here to avoid duplication.
+    db.exec(REGISTRY_INDEX_CACHE_DDL);
     // Restore usage_events backed up by the version-upgrade path above.
     restoreUsageEventsBackup(db, usageBackup);
 }
@@ -277,11 +491,23 @@ function handleVersionUpgrade(db) {
         /* table may not exist in older versions */
     }
     db.exec("DROP TABLE IF EXISTS utility_scores");
+    db.exec("DROP TABLE IF EXISTS utility_scores_scoped");
+    db.exec("DROP INDEX IF EXISTS idx_utility_scores_scoped_entry_id");
     db.exec("DROP TABLE IF EXISTS usage_events");
     db.exec("DROP TABLE IF EXISTS embeddings");
     db.exec("DROP TABLE IF EXISTS entries_vec");
     db.exec("DROP TABLE IF EXISTS entries_fts");
     db.exec("DROP TABLE IF EXISTS index_dir_state");
+    db.exec("DROP TABLE IF EXISTS llm_enrichment_cache");
+    db.exec("DROP INDEX IF EXISTS idx_llm_cache_updated");
+    db.exec("DROP TABLE IF EXISTS graph_file_relations");
+    db.exec("DROP TABLE IF EXISTS graph_file_entities");
+    db.exec("DROP TABLE IF EXISTS graph_files");
+    db.exec("DROP TABLE IF EXISTS graph_meta");
+    db.exec("DROP TABLE IF EXISTS graph_relations");
+    db.exec("DROP TABLE IF EXISTS graph_entities");
+    db.exec("DROP TABLE IF EXISTS graph_nodes");
+    db.exec("DROP TABLE IF EXISTS graph_stashes");
     db.exec("DROP INDEX IF EXISTS idx_entries_dir");
     db.exec("DROP INDEX IF EXISTS idx_entries_type");
     db.exec("DROP TABLE IF EXISTS entries");
@@ -289,6 +515,48 @@ function handleVersionUpgrade(db) {
     warn("[akm] Index rebuilt due to version upgrade. Run 'akm index' to repopulate.");
     return usageBackup;
 }
+/**
+ * Snapshot the data directory before the embedding-dimension drop path wipes
+ * `embeddings` and recreates `entries_vec`. Re-embedding a real-world stash
+ * is expensive (LLM calls + cache misses), so we capture the pre-drop state
+ * here using the same MVP backup helper the version-upgrade hook uses
+ * earlier in {@link ensureSchema}.
+ *
+ * The backup is tagged with the `embedding-dim-change` reason so it lands in
+ * `<dataDir>/backups/<timestamp>-embedding-dim-change/` instead of the
+ * version-upgrade-flavored `<timestamp>-pre-v<N>/` directory. Restoration
+ * works identically via `scripts/migrations/restore-data-dir.sh`.
+ *
+ * Failures are non-fatal — they downgrade to a warning and the destructive
+ * ops run anyway, matching the version-upgrade hook's behavior so a broken
+ * backup cannot brick a binary that bumped the configured dim. Likewise,
+ * `AKM_DB_BACKUP=0` opts out via the same path.
+ */
+function backupBeforeEmbeddingDimChange(dataDir, fromDim, toDim) {
+    if (!dataDir)
+        return;
+    try {
+        const result = backupDataDir({
+            dataDir,
+            // The DB version isn't changing here — pass the current DB_VERSION for
+            // both source and target so the metadata sidecar still records the
+            // running binary's version for forensic context.
+            sourceVersion: DB_VERSION,
+            targetVersion: DB_VERSION,
+            reason: EMBEDDING_DIM_CHANGE_REASON,
+            env: process.env,
+        });
+        if (result) {
+            warn("[akm] embedding dimension changed %s→%s; data directory backed up to %s; embeddings will be regenerated", fromDim, toDim, result.path);
+        }
+    }
+    catch (err) {
+        // Defensive — backupDataDir already swallows most errors, but if it
+        // throws for an unexpected reason we must still proceed with the drop
+        // so the user isn't locked out of their binary on a changed dim.
+        warn("[akm] pre-embedding-dim-change data dir backup raised an unexpected error — %s; embeddings will be regenerated without a snapshot", err instanceof Error ? err.message : String(err));
+    }
+}
 /**
  * Re-insert backed-up `usage_events` rows into the freshly-created table.
  *
@@ -383,6 +651,12 @@ export function deleteIndexDirStatesByStashDir(db, stashDir) {
     db.prepare("DELETE FROM index_dir_state WHERE dir_path = ? OR dir_path LIKE ?").run(stashDir, `${stashDir}${path.sep}%`);
 }
 // ── Entry operations ────────────────────────────────────────────────────────
+/**
+ * SQLite parameter chunk size — chosen well below SQLITE_MAX_VARIABLE_NUMBER
+ * (default 999 on most builds) so multi-row `IN (?, ?, ...)` queries stay
+ * within bounds. Shared by helpers below.
+ */
+const SQLITE_CHUNK_SIZE = 500;
 /**
  * Insert or update an entry in the `entries` table. Returns the row id.
  *
@@ -396,7 +670,11 @@ export function upsertEntry(db, entryKey, dirPath, filePath, stashDir, entry, se
     // every call. The dirty-mark INSERT and the upsert-with-RETURNING
     // share the same WeakMap so they live and die with the connection.
     const stmts = getUpsertStmts(db);
-    const result = stmts.upsert.get(entryKey, dirPath, filePath, stashDir, JSON.stringify(entry), searchText, entry.type);
+    // Phase 5A / Advantage D5: surface derived memory parent ref into the
+    // dedicated `derived_from` column so retrieval-time lookup (parent→child)
+    // does not have to scan + JSON-decode every memory row.
+    const derivedFrom = typeof entry.derivedFrom === "string" && entry.derivedFrom.trim() ? entry.derivedFrom.trim() : null;
+    const result = stmts.upsert.get(entryKey, dirPath, filePath, stashDir, JSON.stringify(entry), searchText, entry.type, derivedFrom);
     if (!result)
         throw new Error("upsertEntry: entry_key not found after upsert");
     // Mark this entry as FTS-dirty so `rebuildFts({ incremental: true })`
@@ -415,15 +693,16 @@ function getUpsertStmts(db) {
         // SELECT round-trip needed (last_insert_rowid() is unreliable for
         // ON CONFLICT). Use `.get()` so a single row comes back.
         upsert: db.prepare(`
-      INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type)
-      VALUES (?, ?, ?, ?, ?, ?, ?)
+      INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type, derived_from)
+      VALUES (?, ?, ?, ?, ?, ?, ?, ?)
       ON CONFLICT(entry_key) DO UPDATE SET
         dir_path = excluded.dir_path,
         file_path = excluded.file_path,
         stash_dir = excluded.stash_dir,
         entry_json = excluded.entry_json,
         search_text = excluded.search_text,
-        entry_type = excluded.entry_type
+        entry_type = excluded.entry_type,
+        derived_from = excluded.derived_from
       RETURNING id
     `),
         markDirty: db.prepare("INSERT OR IGNORE INTO entries_fts_dirty (entry_id) VALUES (?)"),
@@ -431,21 +710,128 @@ function getUpsertStmts(db) {
     upsertStmtsByDb.set(db, stmts);
     return stmts;
 }
-export function deleteEntriesByDir(db, dirPath) {
+/**
+ * Phase 5A / DB v17 schema guard.
+ *
+ * Ensures the `entries.derived_from` column + index exist on the open
+ * connection. Called from `ensureSchema()` after the entries CREATE so that
+ * legacy databases (created against a pre-v17 binary but reopened without
+ * triggering `handleVersionUpgrade()`) still gain the new column without
+ * data loss. Idempotent: a `PRAGMA table_info` lookup gates the ALTER.
+ */
+function ensureDerivedFromColumn(db) {
+    try {
+        const cols = db.prepare("PRAGMA table_info(entries)").all();
+        const hasColumn = cols.some((c) => c.name === "derived_from");
+        if (!hasColumn) {
+            db.exec("ALTER TABLE entries ADD COLUMN derived_from TEXT");
+        }
+        // Index creation is idempotent on its own; safe to call unconditionally.
+        db.exec("CREATE INDEX IF NOT EXISTS idx_entries_derived_from ON entries(derived_from)");
+    }
+    catch {
+        /* table may not exist on a brand-new DB before CREATE — caller is responsible */
+    }
+}
+/**
+ * Phase 5A / Advantage D5: look up the derived-memory child row whose
+ * `derived_from` column matches `parentRef` (e.g. `"memory:claude-prefs"`).
+ *
+ * Returns the most-recently-updated derived child when multiple exist (one
+ * parent should yield exactly one `.derived` child in practice, but the
+ * ordering keeps results deterministic). Returns `null` when no derived
+ * child has been indexed for this parent.
+ */
+export function getDerivedForParent(db, parentRef) {
+    if (!parentRef)
+        return null;
+    try {
+        const row = db
+            .prepare(`SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text
+         FROM entries
+         WHERE derived_from = ?
+         ORDER BY id DESC
+         LIMIT 1`)
+            .get(parentRef);
+        if (!row)
+            return null;
+        let entry;
+        try {
+            entry = JSON.parse(row.entry_json);
+        }
+        catch {
+            warn(`[db] getDerivedForParent: skipping entry id=${row.id} — corrupt entry_json`);
+            return null;
+        }
+        return {
+            id: row.id,
+            entryKey: row.entry_key,
+            dirPath: row.dir_path,
+            filePath: row.file_path,
+            stashDir: row.stash_dir,
+            entry,
+            searchText: row.search_text,
+        };
+    }
+    catch {
+        /* `derived_from` column may not exist on legacy DBs that haven't been
+           rebuilt; treat as "no derived child". */
+        return null;
+    }
+}
+/**
+ * Phase 2A / Rec 5: bulk-load positive feedback event counts for the given
+ * entry ids. Used by the utility-decay forgetting curve to stabilize
+ * (extend the half-life of) memories that have repeatedly proven useful.
+ *
+ * Returns a `Map<entryId, count>` containing only entries with at least one
+ * positive feedback event — missing ids implicitly map to `0`. Chunks at
+ * `SQLITE_CHUNK_SIZE` (500) to respect `SQLITE_MAX_VARIABLE_NUMBER`.
+ *
+ * Cheap when called with zero ids, and silently empty when the
+ * `usage_events` table is missing.
+ */
+export function getPositiveFeedbackCountsByIds(db, ids) {
+    const result = new Map();
+    if (ids.length === 0)
+        return result;
+    for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
+        const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
+        const placeholders = chunk.map(() => "?").join(",");
+        try {
+            const rows = db
+                .prepare(`SELECT entry_id, COUNT(*) AS cnt
+             FROM usage_events
+             WHERE event_type = 'feedback'
+               AND signal = 'positive'
+               AND entry_id IN (${placeholders})
+             GROUP BY entry_id`)
+                .all(...chunk);
+            for (const row of rows) {
+                if (row.entry_id !== null && row.cnt > 0) {
+                    result.set(row.entry_id, row.cnt);
+                }
+            }
+        }
+        catch {
+            /* usage_events table may be missing on legacy DBs — treat as zero counts */
+        }
+    }
+    return result;
+}
+function deleteEntriesWhere(db, column, value) {
     db.transaction(() => {
-        const ids = db.prepare("SELECT id FROM entries WHERE dir_path = ?").all(dirPath);
+        const ids = db.prepare(`SELECT id FROM entries WHERE ${column} = ?`).all(value);
         deleteRelatedRows(db, ids);
-        db.prepare("DELETE FROM entries WHERE dir_path = ?").run(dirPath);
+        db.prepare(`DELETE FROM entries WHERE ${column} = ?`).run(value);
     })();
 }
+export function deleteEntriesByDir(db, dirPath) {
+    deleteEntriesWhere(db, "dir_path", dirPath);
+}
 export function deleteEntriesByStashDir(db, stashDir) {
-    db.transaction(() => {
-        const ids = db.prepare("SELECT id FROM entries WHERE stash_dir = ?").all(stashDir);
-        deleteRelatedRows(db, ids);
-        db.prepare("DELETE FROM entries WHERE stash_dir = ?").run(stashDir);
-    })();
+    deleteEntriesWhere(db, "stash_dir", stashDir);
 }
-const SQLITE_CHUNK_SIZE = 500;
 function deleteRelatedRows(db, ids) {
     if (ids.length === 0)
         return;
@@ -480,13 +866,6 @@ function deleteRelatedRows(db, ids) {
         catch {
             /* ignore */
         }
-        // Also delete from FTS table so orphaned FTS rows don't remain
-        try {
-            db.prepare(`DELETE FROM entries_fts WHERE entry_id IN (${placeholders})`).run(...chunk);
-        }
-        catch {
-            /* ignore */
-        }
         if (vecAvail) {
             try {
                 db.prepare(`DELETE FROM entries_vec WHERE id IN (${placeholders})`).run(...chunk);
@@ -502,6 +881,12 @@ function deleteRelatedRows(db, ids) {
         catch {
             /* ignore */
         }
+        try {
+            db.prepare(`DELETE FROM utility_scores_scoped WHERE entry_id IN (${placeholders})`).run(...chunk);
+        }
+        catch {
+            /* ignore */
+        }
         // Clean up usage events before deleting entries
         try {
             db.prepare(`DELETE FROM usage_events WHERE entry_id IN (${placeholders})`).run(...chunk);
@@ -511,6 +896,26 @@ function deleteRelatedRows(db, ids) {
         }
     }
 }
+/**
+ * Delete entries by their primary key IDs, along with all related rows
+ * (embeddings, entries_vec, entries_fts, utility_scores, usage_events).
+ *
+ * Used by the `--clean` post-pass to remove stale entries whose source files
+ * no longer exist on disk.
+ */
+export function deleteEntriesByIds(db, ids) {
+    if (ids.length === 0)
+        return;
+    db.transaction(() => {
+        const idObjs = ids.map((id) => ({ id }));
+        deleteRelatedRows(db, idObjs);
+        for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
+            const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
+            const placeholders = chunk.map(() => "?").join(",");
+            db.prepare(`DELETE FROM entries WHERE id IN (${placeholders})`).run(...chunk);
+        }
+    })();
+}
 /**
  * Rebuild the FTS5 search index.
  *
@@ -585,19 +990,32 @@ export function rebuildFts(db, options) {
 }
 // ── Vector operations ───────────────────────────────────────────────────────
 export function upsertEmbedding(db, entryId, embedding) {
+    // Pre-flight FK guard: when an entry is deleted between when its id is queued
+    // for embedding and when this INSERT runs (e.g. consolidation deletes during
+    // a concurrent improve cycle), the INSERT throws "FOREIGN KEY constraint failed"
+    // and rolls back the entire batch transaction in the caller, losing every
+    // embedding for that run. A cheap SELECT here turns the race into a clean skip.
+    const exists = db.prepare("SELECT 1 FROM entries WHERE id = ?").get(entryId);
+    if (!exists)
+        return false;
     const buf = float32Buffer(embedding);
     // Always write to BLOB table (works without sqlite-vec)
     db.prepare("INSERT OR REPLACE INTO embeddings (id, embedding) VALUES (?, ?)").run(entryId, buf);
-    // Also write to sqlite-vec table when available (fast path)
+    // Also write to sqlite-vec table when available (fast path).
+    // Wrapped in a transaction so a crash between DELETE and INSERT does not
+    // leave the entry missing from the vec table.
     if (isVecAvailable(db)) {
         try {
-            db.prepare("DELETE FROM entries_vec WHERE id = ?").run(entryId);
+            db.transaction(() => {
+                db.prepare("DELETE FROM entries_vec WHERE id = ?").run(entryId);
+                db.prepare("INSERT INTO entries_vec (id, embedding) VALUES (?, ?)").run(entryId, buf);
+            })();
         }
         catch {
-            /* ignore */
+            /* ignore — vec table unavailable or constraint failure */
         }
-        db.prepare("INSERT INTO entries_vec (id, embedding) VALUES (?, ?)").run(entryId, buf);
     }
+    return true;
 }
 export function searchVec(db, queryEmbedding, k) {
     // Fast path: use sqlite-vec when available
@@ -723,7 +1141,7 @@ function runFtsQuery(db, ftsQuery, limit, entryType) {
       JOIN entries e ON e.id = f.entry_id
       WHERE entries_fts MATCH ?
         AND e.entry_type = ?
-      ORDER BY bm25Score
+      ORDER BY bm25Score, e.id ASC
       LIMIT ?
     `;
         params = [ftsQuery, entryType, limit];
@@ -735,7 +1153,7 @@ function runFtsQuery(db, ftsQuery, limit, entryType) {
       FROM entries_fts f
       JOIN entries e ON e.id = f.entry_id
       WHERE entries_fts MATCH ?
-      ORDER BY bm25Score
+      ORDER BY bm25Score, e.id ASC
       LIMIT ?
     `;
         params = [ftsQuery, limit];
@@ -784,21 +1202,7 @@ export function sanitizeFtsQuery(query) {
     // contain ALL terms.
     return tokens.join(" ");
 }
-// ── All entries ─────────────────────────────────────────────────────────────
-export function getAllEntries(db, entryType) {
-    let sql;
-    let params;
-    if (entryType && entryType !== "any") {
-        sql =
-            "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type = ?";
-        params = [entryType];
-    }
-    else {
-        sql = "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries";
-        params = [];
-    }
-    const rows = db.prepare(sql).all(...params);
-    // Guard against corrupt JSON — skip the row rather than crashing
+function parseEntryRows(rows, context) {
     const entries = [];
     for (const row of rows) {
         let entry;
@@ -806,7 +1210,7 @@ export function getAllEntries(db, entryType) {
             entry = JSON.parse(row.entry_json);
         }
         catch {
-            warn(`[db] getAllEntries: skipping entry id=${row.id} — corrupt entry_json`);
+            warn(`[db] ${context}: skipping entry id=${row.id} — corrupt entry_json`);
             continue;
         }
         entries.push({
@@ -821,6 +1225,21 @@ export function getAllEntries(db, entryType) {
     }
     return entries;
 }
+export function getAllEntries(db, entryType) {
+    let sql;
+    let params;
+    if (entryType && entryType !== "any") {
+        sql =
+            "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type = ?";
+        params = [entryType];
+    }
+    else {
+        sql = "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries";
+        params = [];
+    }
+    const rows = db.prepare(sql).all(...params);
+    return parseEntryRows(rows, "getAllEntries");
+}
 export function findEntryIdByRef(db, ref) {
     const parsed = parseAssetRef(ref);
     const nameVariants = [parsed.name];
@@ -866,28 +1285,7 @@ export function getEntriesByDir(db, dirPath) {
     const rows = db
         .prepare("SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE dir_path = ?")
         .all(dirPath);
-    // Guard against corrupt JSON — skip the row rather than crashing
-    const entries = [];
-    for (const row of rows) {
-        let entry;
-        try {
-            entry = JSON.parse(row.entry_json);
-        }
-        catch {
-            warn(`[db] getEntriesByDir: skipping entry id=${row.id} — corrupt entry_json`);
-            continue;
-        }
-        entries.push({
-            id: row.id,
-            entryKey: row.entry_key,
-            dirPath: row.dir_path,
-            filePath: row.file_path,
-            stashDir: row.stash_dir,
-            entry,
-            searchText: row.search_text,
-        });
-    }
-    return entries;
+    return parseEntryRows(rows, "getEntriesByDir");
 }
 /**
  * Get the utility score for an entry, or undefined if none exists.
@@ -910,12 +1308,17 @@ export function getUtilityScore(db, entryId) {
 }
 /**
  * Batch-load utility scores for multiple entry IDs in a single query.
- * Returns a Map keyed by entry_id for O(1) lookup.
+ * Returns a `{ global, scoped }` pair, both Maps keyed by entry_id.
+ *
+ * When `scopeKey` is provided a second query runs against
+ * `utility_scores_scoped` and the result is returned as `scoped`.
+ * Both maps are always present; `scoped` is empty when `scopeKey` is absent.
  */
-export function getUtilityScoresByIds(db, ids) {
+export function getUtilityScoresByIds(db, ids, scopeKey) {
+    const global = new Map();
+    const scoped = new Map();
     if (ids.length === 0)
-        return new Map();
-    const result = new Map();
+        return { global, scoped };
     // Process in chunks to stay within SQLITE_MAX_VARIABLE_NUMBER
     for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
         const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
@@ -924,7 +1327,7 @@ export function getUtilityScoresByIds(db, ids) {
             .prepare(`SELECT entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at FROM utility_scores WHERE entry_id IN (${placeholders})`)
             .all(...chunk);
         for (const row of rows) {
-            result.set(row.entry_id, {
+            global.set(row.entry_id, {
                 entryId: row.entry_id,
                 utility: row.utility,
                 showCount: row.show_count,
@@ -934,8 +1337,21 @@ export function getUtilityScoresByIds(db, ids) {
                 updatedAt: row.updated_at,
             });
         }
+        if (scopeKey) {
+            const scopedRows = db
+                .prepare(`SELECT entry_id, scope_key, utility, last_used_at FROM utility_scores_scoped WHERE scope_key = ? AND entry_id IN (${placeholders})`)
+                .all(scopeKey, ...chunk);
+            for (const row of scopedRows) {
+                scoped.set(row.entry_id, {
+                    entryId: row.entry_id,
+                    scopeKey: row.scope_key,
+                    utility: row.utility,
+                    lastUsedAt: row.last_used_at,
+                });
+            }
+        }
     }
-    return result;
+    return { global, scoped };
 }
 /**
  * Insert or update a utility score for an entry.
@@ -953,3 +1369,417 @@ export function upsertUtilityScore(db, entryId, data) {
       updated_at = datetime('now')
   `).run(entryId, data.utility, data.showCount, data.searchCount, data.selectRate, data.lastUsedAt ?? null);
 }
+/**
+ * Look up a cached LLM result for the given asset_ref.
+ *
+ * Returns `undefined` when no entry exists OR when the stored body_hash
+ * doesn't match `currentBodyHash` (body has changed since the result was
+ * cached). In both cases the caller should invoke the LLM and write a new
+ * cache entry.
+ */
+export function getLlmCacheEntry(db, assetRef, currentBodyHash, cacheVariant = "") {
+    const row = db
+        .prepare("SELECT asset_ref, cache_variant, body_hash, result_json, updated_at FROM llm_enrichment_cache WHERE asset_ref = ? AND cache_variant = ?")
+        .get(assetRef, cacheVariant);
+    if (!row)
+        return undefined;
+    // Hash mismatch → body changed, treat as cache miss.
+    if (row.body_hash !== currentBodyHash)
+        return undefined;
+    return {
+        assetRef: row.asset_ref,
+        cacheVariant: row.cache_variant,
+        bodyHash: row.body_hash,
+        resultJson: row.result_json,
+        updatedAt: row.updated_at,
+    };
+}
+/**
+ * Batched variant of {@link getLlmCacheEntry}. Fetches every cache row whose
+ * `asset_ref` is in `refs` with a single `IN (...)` query (chunked to respect
+ * SQLITE_MAX_VARIABLE_NUMBER), returning a `Map<assetRef, LlmCacheEntry>`.
+ *
+ * Unlike `getLlmCacheEntry`, this does NOT filter by body hash — callers must
+ * compare `entry.bodyHash` against the current body hash themselves. This lets
+ * the batch path issue one DB query per chunk instead of one per file.
+ */
+export function getLlmCacheEntriesByRefs(db, refs, cacheVariant = "") {
+    const result = new Map();
+    if (refs.length === 0)
+        return result;
+    for (let i = 0; i < refs.length; i += SQLITE_CHUNK_SIZE) {
+        const chunk = refs.slice(i, i + SQLITE_CHUNK_SIZE);
+        const placeholders = chunk.map(() => "?").join(", ");
+        const rows = db
+            .prepare(`SELECT asset_ref, cache_variant, body_hash, result_json, updated_at FROM llm_enrichment_cache
+         WHERE cache_variant = ? AND asset_ref IN (${placeholders})`)
+            .all(cacheVariant, ...chunk);
+        for (const row of rows) {
+            result.set(row.asset_ref, {
+                assetRef: row.asset_ref,
+                cacheVariant: row.cache_variant,
+                bodyHash: row.body_hash,
+                resultJson: row.result_json,
+                updatedAt: row.updated_at,
+            });
+        }
+    }
+    return result;
+}
+/**
+ * Insert or update a cached LLM result for the given asset_ref.
+ */
+export function upsertLlmCacheEntry(db, assetRef, bodyHash, resultJson, cacheVariant = "") {
+    db.prepare(`INSERT INTO llm_enrichment_cache (asset_ref, cache_variant, body_hash, result_json, updated_at)
+     VALUES (?, ?, ?, ?, ?)
+     ON CONFLICT(asset_ref, cache_variant) DO UPDATE SET
+        body_hash   = excluded.body_hash,
+        result_json = excluded.result_json,
+        updated_at  = excluded.updated_at`).run(assetRef, cacheVariant, bodyHash, resultJson, Date.now());
+}
+/**
+ * Delete LLM cache entries whose asset_ref is no longer present in the
+ * `entries` table. Should be called during the cleanup phase of each index
+ * run to prevent the cache from growing unboundedly as assets are removed.
+ *
+ * The join uses a LIKE match against the entries `file_path` column because
+ * graph/memory cache refs are absolute file paths, while enrichment cache
+ * refs are entry_key strings — we preserve any entry that still has a
+ * corresponding row in either the entries table (by entry_key) or that
+ * matches a live file_path.
+ */
+export function clearStaleCacheEntries(db) {
+    try {
+        db.exec(`
+      DELETE FROM llm_enrichment_cache
+      WHERE asset_ref NOT IN (SELECT file_path FROM entries)
+        AND asset_ref NOT IN (SELECT entry_key FROM entries)
+    `);
+    }
+    catch {
+        /* ignore — table may not exist in very old DBs opened without ensureSchema */
+    }
+}
+/**
+ * Compute a stable SHA-256 hex digest of a UTF-8 string using Bun's native
+ * hashing. Used as the body_hash key in `llm_enrichment_cache`.
+ *
+ * Bun.CryptoHasher is synchronous and allocation-free compared to Web Crypto,
+ * making it suitable for use inside tight per-asset loops.
+ */
+export function computeBodyHash(body) {
+    const hasher = new Bun.CryptoHasher("sha256");
+    hasher.update(body);
+    return hasher.digest("hex");
+}
+/**
+ * Count search and show events for the given entry refs.
+ * Returns a Map<ref, count> with only refs that have at least one event.
+ * Used by the improve loop to find high-retrieval assets without feedback.
+ */
+export function getRetrievalCounts(db, refs) {
+    if (refs.length === 0)
+        return new Map();
+    const result = new Map();
+    // Chunk to stay within SQLITE_MAX_VARIABLE_NUMBER (same pattern as getUtilityScoresByIds).
+    for (let i = 0; i < refs.length; i += SQLITE_CHUNK_SIZE) {
+        const chunk = refs.slice(i, i + SQLITE_CHUNK_SIZE);
+        const placeholders = chunk.map(() => "?").join(", ");
+        const rows = db
+            .prepare(`SELECT entry_ref, COUNT(*) AS cnt FROM usage_events
+         WHERE event_type IN ('search','show') AND entry_ref IN (${placeholders})
+         GROUP BY entry_ref`)
+            .all(...chunk);
+        for (const r of rows)
+            result.set(r.entry_ref, r.cnt);
+    }
+    return result;
+}
+/**
+ * Apply a MemRL reward signal to a batch of entries via exponential moving
+ * average (EMA): next = clamp(current + lr * (reward - current), 0, 1).
+ *
+ * Wrapped in a single transaction so all bumps succeed or fail together.
+ * The indexer (`akm index`) will overwrite these values at next reindex run;
+ * bumps are intentionally temporary hints between index runs, not permanent
+ * overrides.
+ *
+ * When `scopeKey` is provided, also writes a scoped bump to
+ * `utility_scores_scoped` so per-project usage signals accumulate alongside
+ * the global ones. The global table is always updated regardless.
+ */
+export function bumpUtilityScoresBatch(db, entryIds, reward, lr = 0.1, scopeKey) {
+    if (entryIds.length === 0)
+        return;
+    db.transaction(() => {
+        const { global: scoreMap } = getUtilityScoresByIds(db, entryIds);
+        const now = new Date().toISOString();
+        const nowMs = Date.now();
+        const stmt = db.prepare(`INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
+       VALUES (?, ?, 0, 0, 0, ?, ?)
+       ON CONFLICT(entry_id) DO UPDATE SET
+         utility = excluded.utility,
+         updated_at = excluded.updated_at`);
+        // Prepare scoped upsert once outside the loop when scopeKey is present.
+        const scopedStmt = scopeKey
+            ? db.prepare(`INSERT INTO utility_scores_scoped (entry_id, scope_key, utility, last_used_at)
+           VALUES (?, ?, ?, ?)
+           ON CONFLICT(entry_id, scope_key) DO UPDATE SET
+             utility = excluded.utility,
+             last_used_at = excluded.last_used_at`)
+            : null;
+        for (const entryId of entryIds) {
+            const existing = scoreMap.get(entryId);
+            const current = existing?.utility ?? 0;
+            const next = Math.max(0, Math.min(1, current + lr * (reward - current)));
+            stmt.run(entryId, next, now, now);
+            if (scopedStmt && scopeKey) {
+                // Retrieve the current scoped utility so we can apply the same EMA.
+                const scopedCurrent = getScopedUtility(db, entryId, scopeKey);
+                const scopedNext = Math.max(0, Math.min(1, scopedCurrent + lr * (reward - scopedCurrent)));
+                scopedStmt.run(entryId, scopeKey, scopedNext, nowMs);
+            }
+        }
+    })();
+}
+/**
+ * Return the current utility value for a single (entry_id, scope_key) pair.
+ * Returns 0 when no row exists yet.
+ */
+function getScopedUtility(db, entryId, scopeKey) {
+    const row = db
+        .prepare("SELECT utility FROM utility_scores_scoped WHERE entry_id = ? AND scope_key = ?")
+        .get(entryId, scopeKey);
+    return row?.utility ?? 0;
+}
+// ── Indexer-phase helpers (moved from indexer.ts) ────────────────────────────
+/**
+ * Return all entries that do not yet have an embedding row.
+ * Used by the embedding phase to determine which entries need vectors generated.
+ */
+export function getAllEntriesForEmbedding(db) {
+    return db
+        .prepare(`
+      SELECT e.id, e.search_text AS searchText, e.entry_key AS entryKey, e.file_path AS filePath FROM entries e
+      WHERE NOT EXISTS (SELECT 1 FROM embeddings b WHERE b.id = e.id)
+        AND e.entry_type != 'vault'
+    `)
+        .all();
+}
+/**
+ * Upsert a workflow document record for an indexed entry.
+ * Persists the parsed workflow AST as JSON alongside a FNV-1a hash of the
+ * source content for future incremental fast-paths.
+ */
+export function upsertWorkflowDocument(db, entryId, doc, content) {
+    const sourceHash = computeSourceHash(content);
+    db.prepare(`INSERT INTO workflow_documents (entry_id, schema_version, document_json, source_path, source_hash, updated_at)
+     VALUES (?, ?, ?, ?, ?, ?)
+     ON CONFLICT(entry_id) DO UPDATE SET
+       schema_version = excluded.schema_version,
+       document_json = excluded.document_json,
+       source_path = excluded.source_path,
+       source_hash = excluded.source_hash,
+       updated_at = excluded.updated_at`).run(entryId, doc.schemaVersion, JSON.stringify(doc), doc.source.path, sourceHash, new Date().toISOString());
+}
+/**
+ * Compute a cheap FNV-1a hash of a buffer for source-identity tracking.
+ * Not security-sensitive; used as an incremental fast-path skip key.
+ */
+export function computeSourceHash(content) {
+    let hash = 0x811c9dc5;
+    for (let i = 0; i < content.length; i++) {
+        hash ^= content[i];
+        hash = Math.imul(hash, 0x01000193);
+    }
+    return (hash >>> 0).toString(16);
+}
+/**
+ * Return distinct zero-result search queries from the `usage_events` table
+ * within the given lookback window.
+ *
+ * Reads from `usage_events` (event_type = 'search') where the metadata JSON
+ * blob contains `resultCount = 0`. The `search_events` table never existed;
+ * all errors are caught and an empty array is returned so callers never need
+ * to guard against DB schema differences.
+ */
+export function getZeroResultSearches(db, sinceDays = 30) {
+    const since = new Date(Date.now() - sinceDays * 24 * 60 * 60 * 1000).toISOString();
+    try {
+        const rows = db
+            .prepare(`SELECT DISTINCT json_extract(metadata, '$.query') AS query
+         FROM usage_events
+         WHERE event_type = 'search'
+           AND created_at >= ?
+           AND json_extract(metadata, '$.resultCount') = 0
+         ORDER BY created_at DESC LIMIT 20`)
+            .all(since);
+        return rows.map((r) => r.query).filter((q) => q !== null);
+    }
+    catch {
+        return []; // table may not exist in older DBs
+    }
+}
+/**
+ * Look up an entry by its integer numeric id.
+ * Returns null when no matching row is found.
+ */
+export function getEntryByRef(db, type, name) {
+    return db.prepare("SELECT id FROM entries WHERE entry_type = ? AND entry_key = ?").get(type, `${type}:${name}`);
+}
+/**
+ * MemRL learning rate for feedback-driven utility updates (F-5 / #386).
+ *
+ * Follows the bounded-step formula from MemRL (arXiv:2601.03192):
+ *   next = clamp(current + lr × (reward − current), 0, 1)
+ *
+ * This replaces the unbounded `-0.03 × negativeCount` delta that could
+ * silently remove high-utility assets from the improvement loop.
+ */
+const FEEDBACK_LR = 0.1;
+/**
+ * Positive reward signal for a single positive feedback event.
+ * Reward 1.0 means "fully correct / helpful".
+ */
+const FEEDBACK_REWARD_POSITIVE = 1.0;
+/**
+ * Negative reward signal for a single negative feedback event.
+ * Reward 0.0 means "not helpful" (lowest MemRL signal).
+ */
+const FEEDBACK_REWARD_NEGATIVE = 0.0;
+/**
+ * Maximum total negative utility delta allowed in a single
+ * `applyFeedbackToUtilityScore` call regardless of negativeCount.
+ *
+ * This caps the per-day negative impact (the function is called once per
+ * feedback event — spamming 10 negatives in one session can move utility
+ * at most `MAX_NEG_DELTA_PER_CALL`). The cap prevents a noisy negative-
+ * feedback stream from silently destroying a high-utility asset's ranking.
+ */
+const MAX_NEG_DELTA_PER_CALL = 0.15;
+/**
+ * Utility threshold below which a review-needed escalation is triggered.
+ * When a previously high-utility asset (≥ HIGH_UTILITY_THRESHOLD) drops
+ * below this value, the caller should create an escalation proposal.
+ */
+export const UTILITY_REVIEW_THRESHOLD = 0.5;
+/**
+ * Utility level considered "high" — assets above this are tracked for
+ * threshold-crossing escalation.
+ */
+export const HIGH_UTILITY_THRESHOLD = 0.5;
+/**
+ * Apply accumulated feedback counts to the utility score of an entry using the
+ * MemRL bounded-step EMA formula (F-5 / #386, arXiv:2601.03192).
+ *
+ * Replaces the previous unbounded `-0.03 × negativeCount` formula with:
+ *
+ *   reward   = weighted average of positive and negative signals
+ *   nextUtil = clamp(currentUtil + lr × (reward − currentUtil), 0, 1)
+ *
+ * The negative impact is additionally capped at {@link MAX_NEG_DELTA_PER_CALL}
+ * to prevent a noisy feedback stream from silently erasing a high-utility asset.
+ *
+ * A new entry starts at 0.5 (neutral midpoint) before the EMA step is applied.
+ *
+ * Returns a {@link FeedbackUtilityResult} so the caller can detect when a
+ * previously high-utility asset crosses below the review threshold and create
+ * an escalation proposal.
+ */
+export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negativeCount) {
+    const existing = getUtilityScore(db, entryId);
+    const previousUtility = existing?.utility ?? 0.5;
+    if (positiveCount === 0 && negativeCount === 0) {
+        return { previousUtility, nextUtility: previousUtility, crossedReviewThreshold: false };
+    }
+    const total = positiveCount + negativeCount;
+    // Weighted reward: proportion of positive signals.
+    const reward = positiveCount > 0 && negativeCount === 0
+        ? FEEDBACK_REWARD_POSITIVE
+        : negativeCount > 0 && positiveCount === 0
+            ? FEEDBACK_REWARD_NEGATIVE
+            : (positiveCount * FEEDBACK_REWARD_POSITIVE + negativeCount * FEEDBACK_REWARD_NEGATIVE) / total;
+    // MemRL bounded-step EMA: lr × (reward − current)
+    let delta = FEEDBACK_LR * (reward - previousUtility);
+    // Per-call negative cap: if delta is negative (net negative feedback), cap it.
+    if (delta < 0) {
+        delta = Math.max(delta, -MAX_NEG_DELTA_PER_CALL);
+    }
+    const nextUtility = Math.max(0, Math.min(1, previousUtility + delta));
+    const now = new Date().toISOString();
+    db.prepare(`
+    INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
+    VALUES (?, ?, 0, 0, 0, ?, ?)
+    ON CONFLICT(entry_id) DO UPDATE SET
+      utility    = ?,
+      updated_at = ?
+  `).run(entryId, nextUtility, now, now, nextUtility, now);
+    const crossedReviewThreshold = previousUtility >= HIGH_UTILITY_THRESHOLD && nextUtility < UTILITY_REVIEW_THRESHOLD;
+    return { previousUtility, nextUtility, crossedReviewThreshold };
+}
+/**
+ * Re-link detached usage_events to their current entry_ids via entry_ref.
+ *
+ * After a full rebuild, entry IDs change. This query matches events to their
+ * new entry rows using the stable `entry_ref` ("type:name") column so usage
+ * history survives a full reindex.
+ */
+export function relinkUsageEvents(db) {
+    try {
+        db.exec(`
+      UPDATE usage_events SET entry_id = (
+        SELECT e.id FROM entries e
+        WHERE substr(e.entry_key, length(e.entry_key) - length(usage_events.entry_ref)) = ':' || usage_events.entry_ref
+        LIMIT 1
+      )
+      WHERE entry_id IS NULL AND entry_ref IS NOT NULL
+    `);
+    }
+    catch {
+        /* ignore if table doesn't exist yet */
+    }
+}
+// ── registry_index_cache helpers ─────────────────────────────────────────────
+/**
+ * Upsert a registry index cache entry in index.db.
+ *
+ * @param db          - Open index.db connection (from openDatabase / openExistingDatabase).
+ * @param registryUrl - Canonical URL of the registry (used as primary key).
+ * @param indexJson   - Serialised registry index document (JSON string).
+ * @param opts.etag        - HTTP ETag from the response (optional).
+ * @param opts.lastModified - HTTP Last-Modified from the response (optional).
+ */
+export function upsertRegistryIndexCache(db, registryUrl, indexJson, opts) {
+    db.prepare(`
+    INSERT INTO registry_index_cache (registry_url, fetched_at, etag, last_modified, index_json)
+    VALUES (?, ?, ?, ?, ?)
+    ON CONFLICT(registry_url) DO UPDATE SET
+      fetched_at    = excluded.fetched_at,
+      etag          = excluded.etag,
+      last_modified = excluded.last_modified,
+      index_json    = excluded.index_json
+  `).run(registryUrl, new Date().toISOString(), opts?.etag ?? null, opts?.lastModified ?? null, indexJson);
+}
+/**
+ * Look up a cached registry index entry from index.db.
+ * Returns undefined when not found or when the entry is older than `maxAgeMs`.
+ *
+ * TTL check: if `Date.now() - new Date(fetched_at).getTime() > maxAgeMs` the
+ * entry is considered a cache miss and undefined is returned.
+ *
+ * @param db          - Open index.db connection.
+ * @param registryUrl - Canonical URL of the registry (primary key).
+ * @param maxAgeMs    - Maximum age in milliseconds before the entry is stale (default: 1 hour).
+ */
+export function getRegistryIndexCache(db, registryUrl, maxAgeMs = 3_600_000 /* 1 hour */) {
+    const row = db
+        .prepare(`SELECT fetched_at, etag, last_modified, index_json
+       FROM registry_index_cache WHERE registry_url = ?`)
+        .get(registryUrl);
+    if (!row)
+        return undefined;
+    const fetchedAt = Date.parse(row.fetched_at);
+    if (Number.isNaN(fetchedAt) || Date.now() - fetchedAt > maxAgeMs)
+        return undefined;
+    return { indexJson: row.index_json, etag: row.etag, lastModified: row.last_modified };
+}