akm-cli 0.8.0-rc1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. package/{.github/CHANGELOG.md → CHANGELOG.md} +191 -3
  2. package/README.md +22 -6
  3. package/SECURITY.md +93 -0
  4. package/dist/cli/config-migrate.js +144 -0
  5. package/dist/cli/config-validate.js +39 -0
  6. package/dist/cli/confirm.js +73 -0
  7. package/dist/cli/parse-args.js +93 -3
  8. package/dist/cli/shared.js +129 -0
  9. package/dist/cli.js +2162 -1258
  10. package/dist/commands/add-cli.js +279 -0
  11. package/dist/commands/agent-dispatch.js +20 -12
  12. package/dist/commands/agent-support.js +11 -5
  13. package/dist/commands/completions.js +3 -0
  14. package/dist/commands/config-cli.js +129 -517
  15. package/dist/commands/consolidate.js +1533 -144
  16. package/dist/commands/curate.js +44 -3
  17. package/dist/commands/db-cli.js +23 -0
  18. package/dist/commands/distill-promotion-policy.js +5 -3
  19. package/dist/commands/distill.js +906 -100
  20. package/dist/commands/env.js +213 -0
  21. package/dist/commands/eval-cases.js +3 -0
  22. package/dist/commands/events.js +3 -0
  23. package/dist/commands/extract-cli.js +127 -0
  24. package/dist/commands/extract-prompt.js +204 -0
  25. package/dist/commands/extract.js +477 -0
  26. package/dist/commands/feedback-cli.js +331 -0
  27. package/dist/commands/graph.js +260 -5
  28. package/dist/commands/health.js +977 -51
  29. package/dist/commands/help/help-accept.md +6 -3
  30. package/dist/commands/help/help-improve.md +36 -8
  31. package/dist/commands/help/help-proposals.md +7 -4
  32. package/dist/commands/help/help-reject.md +5 -2
  33. package/dist/commands/history.js +51 -16
  34. package/dist/commands/improve-auto-accept.js +97 -0
  35. package/dist/commands/improve-cli.js +236 -0
  36. package/dist/commands/improve-profiles.js +184 -0
  37. package/dist/commands/improve-result-file.js +167 -0
  38. package/dist/commands/improve.js +1725 -332
  39. package/dist/commands/info.js +3 -0
  40. package/dist/commands/init.js +49 -1
  41. package/dist/commands/installed-stashes.js +6 -23
  42. package/dist/commands/knowledge.js +3 -0
  43. package/dist/commands/lint/agent-linter.js +3 -0
  44. package/dist/commands/lint/base-linter.js +233 -5
  45. package/dist/commands/lint/command-linter.js +3 -0
  46. package/dist/commands/lint/default-linter.js +3 -0
  47. package/dist/commands/lint/env-key-rules.js +154 -0
  48. package/dist/commands/lint/index.js +92 -3
  49. package/dist/commands/lint/knowledge-linter.js +3 -0
  50. package/dist/commands/lint/markdown-insertion.js +343 -0
  51. package/dist/commands/lint/memory-linter.js +3 -0
  52. package/dist/commands/lint/registry.js +3 -0
  53. package/dist/commands/lint/skill-linter.js +3 -0
  54. package/dist/commands/lint/task-linter.js +15 -12
  55. package/dist/commands/lint/types.js +3 -0
  56. package/dist/commands/lint/workflow-linter.js +3 -0
  57. package/dist/commands/lint.js +3 -0
  58. package/dist/commands/migration-help.js +5 -2
  59. package/dist/commands/proposal-drain-policies.js +128 -0
  60. package/dist/commands/proposal-drain.js +477 -0
  61. package/dist/commands/proposal.js +60 -6
  62. package/dist/commands/propose.js +24 -19
  63. package/dist/commands/reflect.js +1004 -94
  64. package/dist/commands/registry-cli.js +150 -0
  65. package/dist/commands/registry-search.js +3 -0
  66. package/dist/commands/remember-cli.js +257 -0
  67. package/dist/commands/remember.js +15 -6
  68. package/dist/commands/schema-repair.js +88 -15
  69. package/dist/commands/search.js +99 -14
  70. package/dist/commands/secret.js +173 -0
  71. package/dist/commands/self-update.js +3 -0
  72. package/dist/commands/show.js +32 -13
  73. package/dist/commands/source-add.js +7 -35
  74. package/dist/commands/source-clone.js +3 -0
  75. package/dist/commands/source-manage.js +3 -0
  76. package/dist/commands/tasks.js +161 -95
  77. package/dist/commands/url-checker.js +3 -0
  78. package/dist/core/action-contributors.js +3 -0
  79. package/dist/core/asset-ref.js +17 -2
  80. package/dist/core/asset-registry.js +9 -2
  81. package/dist/core/asset-serialize.js +88 -0
  82. package/dist/core/asset-spec.js +61 -5
  83. package/dist/core/common.js +93 -5
  84. package/dist/core/concurrent.js +3 -0
  85. package/dist/core/config-io.js +347 -0
  86. package/dist/core/config-migration.js +622 -0
  87. package/dist/core/config-schema.js +558 -0
  88. package/dist/core/config-sources.js +108 -0
  89. package/dist/core/config-types.js +4 -0
  90. package/dist/core/config-walker.js +337 -0
  91. package/dist/core/config.js +366 -1077
  92. package/dist/core/errors.js +42 -20
  93. package/dist/core/events.js +31 -25
  94. package/dist/core/file-lock.js +104 -0
  95. package/dist/core/frontmatter.js +75 -10
  96. package/dist/core/lesson-lint.js +3 -0
  97. package/dist/core/markdown.js +3 -0
  98. package/dist/core/memory-belief.js +62 -0
  99. package/dist/core/memory-contradiction-detect.js +274 -0
  100. package/dist/core/memory-improve.js +142 -14
  101. package/dist/core/parse.js +3 -0
  102. package/dist/core/paths.js +218 -50
  103. package/dist/core/proposal-quality-validators.js +380 -0
  104. package/dist/core/proposal-validators.js +11 -3
  105. package/dist/core/proposals.js +464 -5
  106. package/dist/core/state-db.js +349 -56
  107. package/dist/core/text-truncation.js +107 -0
  108. package/dist/core/time.js +3 -0
  109. package/dist/core/tty.js +59 -0
  110. package/dist/core/warn.js +7 -2
  111. package/dist/core/write-source.js +12 -0
  112. package/dist/indexer/db-backup.js +391 -0
  113. package/dist/indexer/db-search.js +136 -28
  114. package/dist/indexer/db.js +662 -166
  115. package/dist/indexer/ensure-index.js +3 -0
  116. package/dist/indexer/file-context.js +3 -0
  117. package/dist/indexer/graph-boost.js +162 -40
  118. package/dist/indexer/graph-db.js +241 -51
  119. package/dist/indexer/graph-dedup.js +3 -7
  120. package/dist/indexer/graph-extraction.js +242 -149
  121. package/dist/indexer/index-context.js +3 -9
  122. package/dist/indexer/indexer.js +84 -14
  123. package/dist/indexer/llm-cache.js +24 -19
  124. package/dist/indexer/manifest.js +3 -0
  125. package/dist/indexer/matchers.js +184 -11
  126. package/dist/indexer/memory-inference.js +94 -50
  127. package/dist/indexer/metadata-contributors.js +3 -0
  128. package/dist/indexer/metadata.js +114 -48
  129. package/dist/indexer/path-resolver.js +3 -0
  130. package/dist/indexer/project-context.js +192 -0
  131. package/dist/indexer/ranking-contributors.js +134 -7
  132. package/dist/indexer/ranking.js +8 -1
  133. package/dist/indexer/search-fields.js +5 -9
  134. package/dist/indexer/search-hit-enrichers.js +91 -2
  135. package/dist/indexer/search-source.js +20 -1
  136. package/dist/indexer/semantic-status.js +4 -1
  137. package/dist/indexer/staleness-detect.js +447 -0
  138. package/dist/indexer/usage-events.js +12 -9
  139. package/dist/indexer/walker.js +3 -0
  140. package/dist/integrations/agent/builders.js +135 -0
  141. package/dist/integrations/agent/config.js +121 -401
  142. package/dist/integrations/agent/detect.js +3 -0
  143. package/dist/integrations/agent/index.js +6 -14
  144. package/dist/integrations/agent/model-aliases.js +55 -0
  145. package/dist/integrations/agent/profiles.js +3 -0
  146. package/dist/integrations/agent/prompts.js +137 -8
  147. package/dist/integrations/agent/runner.js +208 -0
  148. package/dist/integrations/agent/sdk-runner.js +8 -2
  149. package/dist/integrations/agent/spawn.js +54 -14
  150. package/dist/integrations/github.js +3 -0
  151. package/dist/integrations/lockfile.js +22 -51
  152. package/dist/integrations/session-logs/index.js +4 -0
  153. package/dist/integrations/session-logs/inline-refs.js +35 -0
  154. package/dist/integrations/session-logs/pre-filter.js +152 -0
  155. package/dist/integrations/session-logs/providers/claude-code.js +226 -0
  156. package/dist/integrations/session-logs/providers/opencode.js +231 -25
  157. package/dist/integrations/session-logs/types.js +3 -0
  158. package/dist/llm/call-ai.js +14 -26
  159. package/dist/llm/client.js +16 -2
  160. package/dist/llm/embedder.js +20 -29
  161. package/dist/llm/embedders/cache.js +3 -7
  162. package/dist/llm/embedders/local.js +42 -1
  163. package/dist/llm/embedders/remote.js +20 -8
  164. package/dist/llm/embedders/types.js +3 -7
  165. package/dist/llm/feature-gate.js +92 -56
  166. package/dist/llm/graph-extract.js +401 -30
  167. package/dist/llm/index-passes.js +44 -29
  168. package/dist/llm/memory-infer.js +30 -2
  169. package/dist/llm/metadata-enhance.js +3 -7
  170. package/dist/llm/prompts/extract-session.md +80 -0
  171. package/dist/llm/prompts/graph-extract-user-prompt.md +24 -1
  172. package/dist/output/cli-hints-full.md +60 -32
  173. package/dist/output/cli-hints-short.md +10 -7
  174. package/dist/output/cli-hints.js +5 -2
  175. package/dist/output/context.js +60 -8
  176. package/dist/output/renderers.js +170 -194
  177. package/dist/output/shapes/curate.js +56 -0
  178. package/dist/output/shapes/distill.js +10 -0
  179. package/dist/output/shapes/env-list.js +19 -0
  180. package/dist/output/shapes/events.js +11 -0
  181. package/dist/output/shapes/helpers.js +424 -0
  182. package/dist/output/shapes/history.js +7 -0
  183. package/dist/output/shapes/passthrough.js +105 -0
  184. package/dist/output/shapes/proposal-accept.js +7 -0
  185. package/dist/output/shapes/proposal-diff.js +7 -0
  186. package/dist/output/shapes/proposal-list.js +7 -0
  187. package/dist/output/shapes/proposal-producer.js +11 -0
  188. package/dist/output/shapes/proposal-reject.js +7 -0
  189. package/dist/output/shapes/proposal-show.js +7 -0
  190. package/dist/output/shapes/registry-search.js +6 -0
  191. package/dist/output/shapes/registry.js +30 -0
  192. package/dist/output/shapes/search.js +6 -0
  193. package/dist/output/shapes/secret-list.js +19 -0
  194. package/dist/output/shapes/show.js +6 -0
  195. package/dist/output/shapes/vault-list.js +19 -0
  196. package/dist/output/shapes.js +51 -549
  197. package/dist/output/text/add.js +6 -0
  198. package/dist/output/text/clone.js +6 -0
  199. package/dist/output/text/config.js +6 -0
  200. package/dist/output/text/curate.js +6 -0
  201. package/dist/output/text/distill.js +7 -0
  202. package/dist/output/text/enable-disable.js +7 -0
  203. package/dist/output/text/events.js +10 -0
  204. package/dist/output/text/feedback.js +6 -0
  205. package/dist/output/text/helpers.js +1059 -0
  206. package/dist/output/text/history.js +7 -0
  207. package/dist/output/text/import.js +6 -0
  208. package/dist/output/text/index.js +6 -0
  209. package/dist/output/text/info.js +6 -0
  210. package/dist/output/text/init.js +6 -0
  211. package/dist/output/text/list.js +6 -0
  212. package/dist/output/text/proposal-producer.js +8 -0
  213. package/dist/output/text/proposal.js +12 -0
  214. package/dist/output/text/registry-commands.js +11 -0
  215. package/dist/output/text/registry.js +30 -0
  216. package/dist/output/text/remember.js +6 -0
  217. package/dist/output/text/remove.js +6 -0
  218. package/dist/output/text/save.js +6 -0
  219. package/dist/output/text/search.js +6 -0
  220. package/dist/output/text/show.js +6 -0
  221. package/dist/output/text/update.js +6 -0
  222. package/dist/output/text/upgrade.js +6 -0
  223. package/dist/output/text/vault.js +16 -0
  224. package/dist/output/text/wiki.js +15 -0
  225. package/dist/output/text/workflow.js +14 -0
  226. package/dist/output/text.js +44 -1329
  227. package/dist/registry/build-index.js +3 -0
  228. package/dist/registry/create-provider-registry.js +3 -0
  229. package/dist/registry/factory.js +4 -1
  230. package/dist/registry/origin-resolve.js +3 -0
  231. package/dist/registry/providers/index.js +3 -0
  232. package/dist/registry/providers/skills-sh.js +11 -2
  233. package/dist/registry/providers/static-index.js +10 -1
  234. package/dist/registry/providers/types.js +3 -24
  235. package/dist/registry/resolve.js +11 -16
  236. package/dist/registry/types.js +3 -0
  237. package/dist/scripts/migrate-storage.js +17767 -0
  238. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
  239. package/dist/scripts/migrations/v16-to-v17.js +141 -0
  240. package/dist/setup/detect.js +3 -0
  241. package/dist/setup/ripgrep-install.js +3 -0
  242. package/dist/setup/ripgrep-resolve.js +3 -0
  243. package/dist/setup/setup.js +306 -67
  244. package/dist/setup/steps.js +3 -15
  245. package/dist/sources/include.js +3 -0
  246. package/dist/sources/provider-factory.js +3 -11
  247. package/dist/sources/provider.js +3 -20
  248. package/dist/sources/providers/filesystem.js +19 -23
  249. package/dist/sources/providers/git.js +171 -21
  250. package/dist/sources/providers/index.js +3 -0
  251. package/dist/sources/providers/install-types.js +3 -13
  252. package/dist/sources/providers/npm.js +3 -4
  253. package/dist/sources/providers/provider-utils.js +3 -0
  254. package/dist/sources/providers/sync-from-ref.js +3 -11
  255. package/dist/sources/providers/tar-utils.js +3 -0
  256. package/dist/sources/providers/website.js +18 -22
  257. package/dist/sources/resolve.js +3 -0
  258. package/dist/sources/types.js +3 -0
  259. package/dist/sources/website-ingest.js +3 -0
  260. package/dist/tasks/backends/cron.js +3 -0
  261. package/dist/tasks/backends/exec-utils.js +3 -0
  262. package/dist/tasks/backends/index.js +3 -11
  263. package/dist/tasks/backends/launchd.js +3 -0
  264. package/dist/tasks/backends/schtasks.js +3 -0
  265. package/dist/tasks/parser.js +51 -38
  266. package/dist/tasks/resolveAkmBin.js +3 -0
  267. package/dist/tasks/runner.js +35 -9
  268. package/dist/tasks/schedule.js +20 -1
  269. package/dist/tasks/schema.js +5 -3
  270. package/dist/tasks/validator.js +6 -3
  271. package/dist/version.js +3 -0
  272. package/dist/wiki/wiki-templates.js +3 -0
  273. package/dist/wiki/wiki.js +3 -0
  274. package/dist/workflows/authoring.js +3 -0
  275. package/dist/workflows/cli.js +3 -0
  276. package/dist/workflows/db.js +140 -10
  277. package/dist/workflows/document-cache.js +3 -10
  278. package/dist/workflows/parser.js +3 -0
  279. package/dist/workflows/renderer.js +3 -0
  280. package/dist/workflows/runs.js +18 -1
  281. package/dist/workflows/schema.js +3 -0
  282. package/dist/workflows/scope-key.js +3 -0
  283. package/dist/workflows/validator.js +5 -9
  284. package/docs/README.md +7 -2
  285. package/docs/data-and-telemetry.md +225 -0
  286. package/docs/migration/release-notes/0.7.5.md +2 -2
  287. package/docs/migration/release-notes/0.8.0.md +57 -5
  288. package/docs/migration/v0.7-to-v0.8.md +1378 -0
  289. package/package.json +28 -11
  290. package/.github/LICENSE +0 -374
  291. package/dist/commands/install-audit.js +0 -385
  292. package/dist/commands/vault.js +0 -307
  293. package/dist/indexer/match-contributors.js +0 -141
  294. package/dist/integrations/agent/pipeline.js +0 -39
  295. package/dist/integrations/agent/runners.js +0 -31
@@ -1,3 +1,6 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
1
4
  import { Database } from "bun:sqlite";
2
5
  import fs from "node:fs";
3
6
  import { createRequire } from "node:module";
@@ -7,12 +10,13 @@ import { getDbPath } from "../core/paths";
7
10
  import { REGISTRY_INDEX_CACHE_DDL } from "../core/state-db";
8
11
  import { warn } from "../core/warn";
9
12
  import { cosineSimilarity } from "../llm/embedders/types";
13
+ import { backupDataDir, EMBEDDING_DIM_CHANGE_REASON } from "./db-backup";
10
14
  import { buildSearchFields } from "./search-fields";
11
15
  import { ensureUsageEventsSchema } from "./usage-events";
12
16
  // ── Constants ───────────────────────────────────────────────────────────────
13
- export const DB_VERSION = 12;
17
+ export const DB_VERSION = 17;
14
18
  export const EMBEDDING_DIM = 384;
15
- export const GRAPH_SCHEMA_VERSION = 1;
19
+ export const GRAPH_SCHEMA_VERSION = 3;
16
20
  // ── Database lifecycle ──────────────────────────────────────────────────────
17
21
  export function openDatabase(dbPath, options) {
18
22
  const resolvedPath = dbPath ?? getDbPath();
@@ -26,11 +30,39 @@ export function openDatabase(dbPath, options) {
26
30
  db.exec("PRAGMA foreign_keys = ON");
27
31
  // Try to load sqlite-vec extension
28
32
  loadVecExtension(db);
29
- ensureSchema(db, options?.embeddingDim ?? EMBEDDING_DIM);
33
+ // Dim resolution: explicit option wins; otherwise consult the on-disk
34
+ // config so unparameterised opens (registry providers, graph helpers,
35
+ // ad-hoc CLI subcommands) honour the operator-declared dimension. Only if
36
+ // both are absent do we fall through to the no-clobber path, which keeps
37
+ // ensureSchema from touching `index_meta.embeddingDim` at all.
38
+ const resolvedDim = options?.embeddingDim ?? resolveConfiguredEmbeddingDim();
39
+ ensureSchema(db, resolvedDim, { dataDir: dir });
30
40
  // Warn once at init if using JS fallback with many entries
31
41
  warnIfVecMissing(db, { once: true });
32
42
  return db;
33
43
  }
44
+ /**
45
+ * Read the operator-configured embedding dimension from the on-disk config.
46
+ * Returns `undefined` when no config file is present, when the config has
47
+ * no `embedding.dimension` set, or when reading the config throws (e.g.
48
+ * inside isolated test fixtures with no XDG home). Failure is silent on
49
+ * purpose — every openDatabase() call would otherwise have to handle a
50
+ * config-not-found error path, and the fallback (no-clobber semantics) is
51
+ * already correct.
52
+ */
53
+ function resolveConfiguredEmbeddingDim() {
54
+ try {
55
+ const { loadConfig } = require("../core/config");
56
+ const dim = loadConfig().embedding?.dimension;
57
+ if (typeof dim === "number" && Number.isInteger(dim) && dim > 0 && dim <= 4096) {
58
+ return dim;
59
+ }
60
+ return undefined;
61
+ }
62
+ catch {
63
+ return undefined;
64
+ }
65
+ }
34
66
  export function openExistingDatabase(dbPath) {
35
67
  const resolvedPath = dbPath ?? getDbPath();
36
68
  const db = new Database(resolvedPath);
@@ -88,7 +120,7 @@ export function warnIfVecMissing(db, { once } = { once: false }) {
88
120
  /* embeddings table may not exist yet during init */
89
121
  }
90
122
  }
91
- function ensureSchema(db, embeddingDim) {
123
+ function ensureSchema(db, embeddingDim, options) {
92
124
  // Create meta table first so we can check version
93
125
  db.exec(`
94
126
  CREATE TABLE IF NOT EXISTS index_meta (
@@ -96,6 +128,39 @@ function ensureSchema(db, embeddingDim) {
96
128
  value TEXT NOT NULL
97
129
  );
98
130
  `);
131
+ // MVP DB-backup hook (0.8.x): when the stored DB version differs from the
132
+ // running binary's DB_VERSION, snapshot the data directory BEFORE
133
+ // `handleVersionUpgrade()` drops tables. This is best-effort —
134
+ // `backupDataDir` returns null on opt-out, missing data dir, low free
135
+ // space, or copy errors, and we proceed with the upgrade in all cases.
136
+ // The proper migration framework lands in 0.9.0; until then this lets
137
+ // operators recover with `scripts/migrations/restore-data-dir.sh`.
138
+ if (options?.dataDir) {
139
+ const storedVersionRaw = getMeta(db, "version");
140
+ const storedVersion = storedVersionRaw !== undefined && storedVersionRaw !== "" ? Number.parseInt(storedVersionRaw, 10) : null;
141
+ const willUpgrade = storedVersionRaw !== undefined && storedVersionRaw !== "" && storedVersionRaw !== String(DB_VERSION);
142
+ if (willUpgrade) {
143
+ try {
144
+ // Pass env explicitly so tests can override AKM_DB_BACKUP / AKM_DB_BACKUP_RETAIN
145
+ // without mutating process.env. Production callers default to process.env.
146
+ const result = backupDataDir({
147
+ dataDir: options.dataDir,
148
+ sourceVersion: storedVersion !== null && !Number.isNaN(storedVersion) ? storedVersion : null,
149
+ targetVersion: DB_VERSION,
150
+ env: process.env,
151
+ });
152
+ if (result) {
153
+ warn("[akm] data directory backed up to %s before v%s→v%d upgrade", result.path, storedVersionRaw, DB_VERSION);
154
+ }
155
+ }
156
+ catch (err) {
157
+ // Defensive — backupDataDir already swallows most errors, but if it
158
+ // throws for an unexpected reason we must still proceed with the
159
+ // upgrade so the user isn't locked out of their binary.
160
+ warn("[akm] pre-upgrade data dir backup raised an unexpected error — %s; upgrade will proceed without a snapshot", err instanceof Error ? err.message : String(err));
161
+ }
162
+ }
163
+ }
99
164
  // Check stored version — if it differs from DB_VERSION, drop and recreate all tables.
100
165
  // Usage events are preserved across version upgrades so that utility score
101
166
  // history is not silently lost. The backup is captured here and threaded
@@ -112,12 +177,24 @@ function ensureSchema(db, embeddingDim) {
112
177
  stash_dir TEXT NOT NULL,
113
178
  entry_json TEXT NOT NULL,
114
179
  search_text TEXT NOT NULL,
115
- entry_type TEXT NOT NULL
180
+ entry_type TEXT NOT NULL,
181
+ derived_from TEXT
116
182
  );
117
183
 
118
184
  CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
119
185
  CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
186
+ CREATE INDEX IF NOT EXISTS idx_entries_file_path ON entries(file_path);
120
187
  `);
188
+ // Phase 5A / DB v17: backfill `derived_from` column + index on databases
189
+ // that were created at v17 fresh OR carry a partial v17 schema (a DB whose
190
+ // `index_meta.version` was bumped to 17 but whose `entries` table still
191
+ // lacks the column — this happens when a previous v17 binary opened a
192
+ // pre-v17 DB without taking the upgrade path because no version mismatch
193
+ // was seen at boot). The PRAGMA-then-ALTER guard runs unconditionally so
194
+ // both fresh and partial schemas converge. The CREATE INDEX for
195
+ // `derived_from` MUST run after this helper so we never reference a
196
+ // column that has not yet been added on partial schemas.
197
+ ensureDerivedFromColumn(db);
121
198
  // Validated WorkflowDocument JSON, one row per indexed workflow entry.
122
199
  // Pure index data — fully rebuilt on each `akm index`. ON DELETE CASCADE
123
200
  // means clearing entries (full rebuild or per-dir delete) drops these too.
@@ -176,6 +253,20 @@ function ensureSchema(db, embeddingDim) {
176
253
  updated_at TEXT NOT NULL DEFAULT (datetime('now')),
177
254
  FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
178
255
  );
256
+ `);
257
+ // Per-project scoped utility scores — tracks usage per (entry, cwd-anchor)
258
+ // so assets useful in project A don't pollute rankings in project B.
259
+ // The global utility_scores table is preserved as a fallback / cold-start aid.
260
+ db.exec(`
261
+ CREATE TABLE IF NOT EXISTS utility_scores_scoped (
262
+ entry_id INTEGER NOT NULL,
263
+ scope_key TEXT NOT NULL,
264
+ utility REAL NOT NULL DEFAULT 0,
265
+ last_used_at INTEGER NOT NULL,
266
+ PRIMARY KEY (entry_id, scope_key)
267
+ );
268
+ CREATE INDEX IF NOT EXISTS idx_utility_scores_scoped_entry_id
269
+ ON utility_scores_scoped(entry_id);
179
270
  `);
180
271
  db.exec(`
181
272
  CREATE TABLE IF NOT EXISTS index_dir_state (
@@ -194,15 +285,26 @@ function ensureSchema(db, embeddingDim) {
194
285
  // Entries are cleaned up when assets are removed or --re-enrich is used.
195
286
  db.exec(`
196
287
  CREATE TABLE IF NOT EXISTS llm_enrichment_cache (
197
- asset_ref TEXT PRIMARY KEY,
198
- body_hash TEXT NOT NULL,
199
- result_json TEXT NOT NULL,
200
- updated_at INTEGER NOT NULL
288
+ asset_ref TEXT NOT NULL,
289
+ cache_variant TEXT NOT NULL,
290
+ body_hash TEXT NOT NULL,
291
+ result_json TEXT NOT NULL,
292
+ updated_at INTEGER NOT NULL,
293
+ PRIMARY KEY (asset_ref, cache_variant)
201
294
  );
202
295
 
203
296
  CREATE INDEX IF NOT EXISTS idx_llm_cache_updated
204
297
  ON llm_enrichment_cache(updated_at);
205
298
  `);
299
+ // Graph extraction tables — schema v2 (entry_id PK).
300
+ //
301
+ // graph_files is keyed on entries.id so child tables cascade-delete cleanly
302
+ // when an entry is removed, and so JOINs from graph rows to entries are a
303
+ // direct PK lookup. (stash_root, file_path) is retained as UNIQUE so the
304
+ // extractor's path-based upsert still works.
305
+ //
306
+ // graph_file_entities and graph_file_relations no longer duplicate file_path;
307
+ // they reference entry_id and inherit stash scoping via graph_files.
206
308
  db.exec(`
207
309
  CREATE TABLE IF NOT EXISTS graph_meta (
208
310
  stash_root TEXT PRIMARY KEY,
@@ -213,53 +315,58 @@ function ensureSchema(db, embeddingDim) {
213
315
  entity_count INTEGER NOT NULL DEFAULT 0,
214
316
  relation_count INTEGER NOT NULL DEFAULT 0,
215
317
  extraction_coverage REAL NOT NULL DEFAULT 0,
216
- density REAL NOT NULL DEFAULT 0
318
+ density REAL NOT NULL DEFAULT 0,
319
+ extractor_id TEXT,
320
+ extraction_run_id TEXT,
321
+ model TEXT,
322
+ prompt_version TEXT,
323
+ batch_size INTEGER,
324
+ cache_hits INTEGER NOT NULL DEFAULT 0,
325
+ cache_misses INTEGER NOT NULL DEFAULT 0,
326
+ truncation_count INTEGER NOT NULL DEFAULT 0,
327
+ failure_count INTEGER NOT NULL DEFAULT 0
217
328
  );
218
329
 
219
330
  CREATE TABLE IF NOT EXISTS graph_files (
220
- stash_root TEXT NOT NULL,
221
- file_path TEXT NOT NULL,
222
- file_order INTEGER NOT NULL,
223
- file_type TEXT NOT NULL,
224
- body_hash TEXT,
225
- confidence REAL,
226
- PRIMARY KEY (stash_root, file_path),
227
- FOREIGN KEY (stash_root) REFERENCES graph_meta(stash_root) ON DELETE CASCADE
331
+ entry_id INTEGER PRIMARY KEY REFERENCES entries(id) ON DELETE CASCADE,
332
+ stash_root TEXT NOT NULL,
333
+ file_path TEXT NOT NULL,
334
+ file_order INTEGER NOT NULL,
335
+ file_type TEXT NOT NULL,
336
+ body_hash TEXT NOT NULL,
337
+ confidence REAL,
338
+ status TEXT NOT NULL DEFAULT 'extracted',
339
+ reason TEXT,
340
+ extraction_run_id TEXT,
341
+ UNIQUE(stash_root, file_path)
228
342
  );
229
343
 
230
344
  CREATE INDEX IF NOT EXISTS idx_graph_files_stash_order
231
345
  ON graph_files(stash_root, file_order);
232
346
 
233
347
  CREATE TABLE IF NOT EXISTS graph_file_entities (
234
- stash_root TEXT NOT NULL,
235
- file_path TEXT NOT NULL,
348
+ entry_id INTEGER NOT NULL REFERENCES graph_files(entry_id) ON DELETE CASCADE,
236
349
  entity_order INTEGER NOT NULL,
350
+ stash_root TEXT NOT NULL,
351
+ entity_norm TEXT NOT NULL,
237
352
  entity TEXT NOT NULL,
238
- PRIMARY KEY (stash_root, file_path, entity_order),
239
- FOREIGN KEY (stash_root, file_path)
240
- REFERENCES graph_files(stash_root, file_path)
241
- ON DELETE CASCADE
353
+ PRIMARY KEY (entry_id, entity_order)
242
354
  );
243
355
 
244
- CREATE INDEX IF NOT EXISTS idx_graph_file_entities_lookup
245
- ON graph_file_entities(stash_root, file_path, entity_order);
356
+ CREATE INDEX IF NOT EXISTS idx_graph_file_entities_entity_norm
357
+ ON graph_file_entities(stash_root, entity_norm);
246
358
 
247
359
  CREATE TABLE IF NOT EXISTS graph_file_relations (
248
- stash_root TEXT NOT NULL,
249
- file_path TEXT NOT NULL,
250
- relation_order INTEGER NOT NULL,
251
- from_entity TEXT NOT NULL,
252
- to_entity TEXT NOT NULL,
253
- relation_type TEXT,
254
- confidence REAL,
255
- PRIMARY KEY (stash_root, file_path, relation_order),
256
- FOREIGN KEY (stash_root, file_path)
257
- REFERENCES graph_files(stash_root, file_path)
258
- ON DELETE CASCADE
360
+ entry_id INTEGER NOT NULL REFERENCES graph_files(entry_id) ON DELETE CASCADE,
361
+ relation_order INTEGER NOT NULL,
362
+ from_entity_norm TEXT NOT NULL,
363
+ from_entity TEXT NOT NULL,
364
+ to_entity_norm TEXT NOT NULL,
365
+ to_entity TEXT NOT NULL,
366
+ relation_type TEXT,
367
+ confidence REAL,
368
+ PRIMARY KEY (entry_id, relation_order)
259
369
  );
260
-
261
- CREATE INDEX IF NOT EXISTS idx_graph_file_relations_lookup
262
- ON graph_file_relations(stash_root, file_path, relation_order);
263
370
  `);
264
371
  // FTS-dirty queue. Created here (not lazily on first upsert) so the
265
372
  // per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
@@ -271,56 +378,82 @@ function ensureSchema(db, embeddingDim) {
271
378
  );
272
379
  `);
273
380
  // sqlite-vec table
381
+ //
382
+ // Dimension contract:
383
+ // - When `embeddingDim` is `undefined`, the caller did NOT request a
384
+ // specific dim. Do not touch `index_meta.embeddingDim` and do not run
385
+ // the dim-change wipe — fall back to the stored dim (or the static
386
+ // default) only when we have to materialise the vec table for the
387
+ // first time. Without this guard, registry-side and other dim-unaware
388
+ // `openDatabase()` callers would silently overwrite the dim-aware
389
+ // improve/index value and oscillate the stored dim.
390
+ // - When `embeddingDim` is a number, the caller explicitly asked for
391
+ // that dim and owns the dim-change/backup/wipe semantics.
392
+ const dimExplicit = embeddingDim !== undefined;
393
+ const effectiveDim = embeddingDim ?? (Number(getMeta(db, "embeddingDim")) || EMBEDDING_DIM);
274
394
  if (isVecAvailable(db)) {
275
395
  // Check if stored embedding dimension differs from configured one
276
- const storedDim = getMeta(db, "embeddingDim");
277
- if (storedDim && storedDim !== String(embeddingDim)) {
278
- try {
279
- db.exec("DROP TABLE IF EXISTS entries_vec");
280
- }
281
- catch {
282
- /* ignore */
283
- }
284
- // Delete stale BLOB embeddings so they don't produce silently wrong
285
- // similarity scores against the new-dimension vec table.
286
- try {
287
- db.exec("DELETE FROM embeddings");
288
- }
289
- catch {
290
- /* ignore */
396
+ if (dimExplicit) {
397
+ const storedDim = getMeta(db, "embeddingDim");
398
+ if (storedDim && storedDim !== String(embeddingDim)) {
399
+ // Re-embedding the whole stash is expensive (LLM API calls + cache
400
+ // misses), so snapshot the data dir before we drop the vec table and
401
+ // wipe `embeddings`. This is the SAME hook the version-upgrade path
402
+ // uses earlier in this function, just gated on embedding-dim mismatch
403
+ // and tagged so operators can tell the two backup kinds apart.
404
+ backupBeforeEmbeddingDimChange(options?.dataDir, storedDim, String(embeddingDim));
405
+ try {
406
+ db.exec("DROP TABLE IF EXISTS entries_vec");
407
+ }
408
+ catch {
409
+ /* ignore */
410
+ }
411
+ // Delete stale BLOB embeddings so they don't produce silently wrong
412
+ // similarity scores against the new-dimension vec table.
413
+ try {
414
+ db.exec("DELETE FROM embeddings");
415
+ }
416
+ catch {
417
+ /* ignore */
418
+ }
419
+ setMeta(db, "hasEmbeddings", "0");
291
420
  }
292
- setMeta(db, "hasEmbeddings", "0");
293
421
  }
294
422
  const vecExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_vec'").get();
295
423
  if (!vecExists) {
296
- if (!Number.isInteger(embeddingDim) || embeddingDim <= 0 || embeddingDim > 4096) {
297
- throw new Error(`Invalid embedding dimension: ${embeddingDim}`);
424
+ if (!Number.isInteger(effectiveDim) || effectiveDim <= 0 || effectiveDim > 4096) {
425
+ throw new Error(`Invalid embedding dimension: ${effectiveDim}`);
298
426
  }
299
427
  db.exec(`
300
428
  CREATE VIRTUAL TABLE entries_vec USING vec0(
301
429
  id INTEGER PRIMARY KEY,
302
- embedding FLOAT[${embeddingDim}]
430
+ embedding FLOAT[${effectiveDim}]
303
431
  );
304
432
  `);
305
433
  }
306
- setMeta(db, "embeddingDim", String(embeddingDim));
434
+ if (dimExplicit) {
435
+ setMeta(db, "embeddingDim", String(embeddingDim));
436
+ }
307
437
  }
308
438
  else {
309
439
  // Also purge BLOB embeddings on dimension change (JS fallback path).
310
440
  // When sqlite-vec is unavailable, entries_vec doesn't exist but the BLOB
311
441
  // embeddings table still stores vectors. If the configured dimension
312
442
  // changes, those stored BLOBs become silently incompatible.
313
- const storedDim = getMeta(db, "embeddingDim");
314
- if (storedDim && storedDim !== String(embeddingDim)) {
315
- try {
316
- db.exec("DELETE FROM embeddings");
317
- }
318
- catch {
319
- /* ignore */
443
+ if (dimExplicit) {
444
+ const storedDim = getMeta(db, "embeddingDim");
445
+ if (storedDim && storedDim !== String(embeddingDim)) {
446
+ backupBeforeEmbeddingDimChange(options?.dataDir, storedDim, String(embeddingDim));
447
+ try {
448
+ db.exec("DELETE FROM embeddings");
449
+ }
450
+ catch {
451
+ /* ignore */
452
+ }
453
+ setMeta(db, "hasEmbeddings", "0");
320
454
  }
321
- setMeta(db, "hasEmbeddings", "0");
455
+ setMeta(db, "embeddingDim", String(embeddingDim));
322
456
  }
323
- setMeta(db, "embeddingDim", String(embeddingDim));
324
457
  }
325
458
  // Usage telemetry table
326
459
  ensureUsageEventsSchema(db);
@@ -358,6 +491,8 @@ function handleVersionUpgrade(db) {
358
491
  /* table may not exist in older versions */
359
492
  }
360
493
  db.exec("DROP TABLE IF EXISTS utility_scores");
494
+ db.exec("DROP TABLE IF EXISTS utility_scores_scoped");
495
+ db.exec("DROP INDEX IF EXISTS idx_utility_scores_scoped_entry_id");
361
496
  db.exec("DROP TABLE IF EXISTS usage_events");
362
497
  db.exec("DROP TABLE IF EXISTS embeddings");
363
498
  db.exec("DROP TABLE IF EXISTS entries_vec");
@@ -380,6 +515,48 @@ function handleVersionUpgrade(db) {
380
515
  warn("[akm] Index rebuilt due to version upgrade. Run 'akm index' to repopulate.");
381
516
  return usageBackup;
382
517
  }
518
+ /**
519
+ * Snapshot the data directory before the embedding-dimension drop path wipes
520
+ * `embeddings` and recreates `entries_vec`. Re-embedding a real-world stash
521
+ * is expensive (LLM calls + cache misses), so we capture the pre-drop state
522
+ * here using the same MVP backup helper the version-upgrade hook uses
523
+ * earlier in {@link ensureSchema}.
524
+ *
525
+ * The backup is tagged with the `embedding-dim-change` reason so it lands in
526
+ * `<dataDir>/backups/<timestamp>-embedding-dim-change/` instead of the
527
+ * version-upgrade-flavored `<timestamp>-pre-v<N>/` directory. Restoration
528
+ * works identically via `scripts/migrations/restore-data-dir.sh`.
529
+ *
530
+ * Failures are non-fatal — they downgrade to a warning and the destructive
531
+ * ops run anyway, matching the version-upgrade hook's behavior so a broken
532
+ * backup cannot brick a binary that bumped the configured dim. Likewise,
533
+ * `AKM_DB_BACKUP=0` opts out via the same path.
534
+ */
535
+ function backupBeforeEmbeddingDimChange(dataDir, fromDim, toDim) {
536
+ if (!dataDir)
537
+ return;
538
+ try {
539
+ const result = backupDataDir({
540
+ dataDir,
541
+ // The DB version isn't changing here — pass the current DB_VERSION for
542
+ // both source and target so the metadata sidecar still records the
543
+ // running binary's version for forensic context.
544
+ sourceVersion: DB_VERSION,
545
+ targetVersion: DB_VERSION,
546
+ reason: EMBEDDING_DIM_CHANGE_REASON,
547
+ env: process.env,
548
+ });
549
+ if (result) {
550
+ warn("[akm] embedding dimension changed %s→%s; data directory backed up to %s; embeddings will be regenerated", fromDim, toDim, result.path);
551
+ }
552
+ }
553
+ catch (err) {
554
+ // Defensive — backupDataDir already swallows most errors, but if it
555
+ // throws for an unexpected reason we must still proceed with the drop
556
+ // so the user isn't locked out of their binary on a changed dim.
557
+ warn("[akm] pre-embedding-dim-change data dir backup raised an unexpected error — %s; embeddings will be regenerated without a snapshot", err instanceof Error ? err.message : String(err));
558
+ }
559
+ }
383
560
  /**
384
561
  * Re-insert backed-up `usage_events` rows into the freshly-created table.
385
562
  *
@@ -474,6 +651,12 @@ export function deleteIndexDirStatesByStashDir(db, stashDir) {
474
651
  db.prepare("DELETE FROM index_dir_state WHERE dir_path = ? OR dir_path LIKE ?").run(stashDir, `${stashDir}${path.sep}%`);
475
652
  }
476
653
  // ── Entry operations ────────────────────────────────────────────────────────
654
+ /**
655
+ * SQLite parameter chunk size — chosen well below SQLITE_MAX_VARIABLE_NUMBER
656
+ * (default 999 on most builds) so multi-row `IN (?, ?, ...)` queries stay
657
+ * within bounds. Shared by helpers below.
658
+ */
659
+ const SQLITE_CHUNK_SIZE = 500;
477
660
  /**
478
661
  * Insert or update an entry in the `entries` table. Returns the row id.
479
662
  *
@@ -487,7 +670,11 @@ export function upsertEntry(db, entryKey, dirPath, filePath, stashDir, entry, se
487
670
  // every call. The dirty-mark INSERT and the upsert-with-RETURNING
488
671
  // share the same WeakMap so they live and die with the connection.
489
672
  const stmts = getUpsertStmts(db);
490
- const result = stmts.upsert.get(entryKey, dirPath, filePath, stashDir, JSON.stringify(entry), searchText, entry.type);
673
+ // Phase 5A / Advantage D5: surface derived memory parent ref into the
674
+ // dedicated `derived_from` column so retrieval-time lookup (parent→child)
675
+ // does not have to scan + JSON-decode every memory row.
676
+ const derivedFrom = typeof entry.derivedFrom === "string" && entry.derivedFrom.trim() ? entry.derivedFrom.trim() : null;
677
+ const result = stmts.upsert.get(entryKey, dirPath, filePath, stashDir, JSON.stringify(entry), searchText, entry.type, derivedFrom);
491
678
  if (!result)
492
679
  throw new Error("upsertEntry: entry_key not found after upsert");
493
680
  // Mark this entry as FTS-dirty so `rebuildFts({ incremental: true })`
@@ -506,15 +693,16 @@ function getUpsertStmts(db) {
506
693
  // SELECT round-trip needed (last_insert_rowid() is unreliable for
507
694
  // ON CONFLICT). Use `.get()` so a single row comes back.
508
695
  upsert: db.prepare(`
509
- INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type)
510
- VALUES (?, ?, ?, ?, ?, ?, ?)
696
+ INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type, derived_from)
697
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
511
698
  ON CONFLICT(entry_key) DO UPDATE SET
512
699
  dir_path = excluded.dir_path,
513
700
  file_path = excluded.file_path,
514
701
  stash_dir = excluded.stash_dir,
515
702
  entry_json = excluded.entry_json,
516
703
  search_text = excluded.search_text,
517
- entry_type = excluded.entry_type
704
+ entry_type = excluded.entry_type,
705
+ derived_from = excluded.derived_from
518
706
  RETURNING id
519
707
  `),
520
708
  markDirty: db.prepare("INSERT OR IGNORE INTO entries_fts_dirty (entry_id) VALUES (?)"),
@@ -522,21 +710,128 @@ function getUpsertStmts(db) {
522
710
  upsertStmtsByDb.set(db, stmts);
523
711
  return stmts;
524
712
  }
525
- export function deleteEntriesByDir(db, dirPath) {
713
+ /**
714
+ * Phase 5A / DB v17 schema guard.
715
+ *
716
+ * Ensures the `entries.derived_from` column + index exist on the open
717
+ * connection. Called from `ensureSchema()` after the entries CREATE so that
718
+ * legacy databases (created against a pre-v17 binary but reopened without
719
+ * triggering `handleVersionUpgrade()`) still gain the new column without
720
+ * data loss. Idempotent: a `PRAGMA table_info` lookup gates the ALTER.
721
+ */
722
+ function ensureDerivedFromColumn(db) {
723
+ try {
724
+ const cols = db.prepare("PRAGMA table_info(entries)").all();
725
+ const hasColumn = cols.some((c) => c.name === "derived_from");
726
+ if (!hasColumn) {
727
+ db.exec("ALTER TABLE entries ADD COLUMN derived_from TEXT");
728
+ }
729
+ // Index creation is idempotent on its own; safe to call unconditionally.
730
+ db.exec("CREATE INDEX IF NOT EXISTS idx_entries_derived_from ON entries(derived_from)");
731
+ }
732
+ catch {
733
+ /* table may not exist on a brand-new DB before CREATE — caller is responsible */
734
+ }
735
+ }
736
+ /**
737
+ * Phase 5A / Advantage D5: look up the derived-memory child row whose
738
+ * `derived_from` column matches `parentRef` (e.g. `"memory:claude-prefs"`).
739
+ *
740
+ * Returns the most-recently-updated derived child when multiple exist (one
741
+ * parent should yield exactly one `.derived` child in practice, but the
742
+ * ordering keeps results deterministic). Returns `null` when no derived
743
+ * child has been indexed for this parent.
744
+ */
745
+ export function getDerivedForParent(db, parentRef) {
746
+ if (!parentRef)
747
+ return null;
748
+ try {
749
+ const row = db
750
+ .prepare(`SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text
751
+ FROM entries
752
+ WHERE derived_from = ?
753
+ ORDER BY id DESC
754
+ LIMIT 1`)
755
+ .get(parentRef);
756
+ if (!row)
757
+ return null;
758
+ let entry;
759
+ try {
760
+ entry = JSON.parse(row.entry_json);
761
+ }
762
+ catch {
763
+ warn(`[db] getDerivedForParent: skipping entry id=${row.id} — corrupt entry_json`);
764
+ return null;
765
+ }
766
+ return {
767
+ id: row.id,
768
+ entryKey: row.entry_key,
769
+ dirPath: row.dir_path,
770
+ filePath: row.file_path,
771
+ stashDir: row.stash_dir,
772
+ entry,
773
+ searchText: row.search_text,
774
+ };
775
+ }
776
+ catch {
777
+ /* `derived_from` column may not exist on legacy DBs that haven't been
778
+ rebuilt; treat as "no derived child". */
779
+ return null;
780
+ }
781
+ }
782
+ /**
783
+ * Phase 2A / Rec 5: bulk-load positive feedback event counts for the given
784
+ * entry ids. Used by the utility-decay forgetting curve to stabilize
785
+ * (extend the half-life of) memories that have repeatedly proven useful.
786
+ *
787
+ * Returns a `Map<entryId, count>` containing only entries with at least one
788
+ * positive feedback event — missing ids implicitly map to `0`. Chunks at
789
+ * `SQLITE_CHUNK_SIZE` (500) to respect `SQLITE_MAX_VARIABLE_NUMBER`.
790
+ *
791
+ * Cheap when called with zero ids, and silently empty when the
792
+ * `usage_events` table is missing.
793
+ */
794
+ export function getPositiveFeedbackCountsByIds(db, ids) {
795
+ const result = new Map();
796
+ if (ids.length === 0)
797
+ return result;
798
+ for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
799
+ const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
800
+ const placeholders = chunk.map(() => "?").join(",");
801
+ try {
802
+ const rows = db
803
+ .prepare(`SELECT entry_id, COUNT(*) AS cnt
804
+ FROM usage_events
805
+ WHERE event_type = 'feedback'
806
+ AND signal = 'positive'
807
+ AND entry_id IN (${placeholders})
808
+ GROUP BY entry_id`)
809
+ .all(...chunk);
810
+ for (const row of rows) {
811
+ if (row.entry_id !== null && row.cnt > 0) {
812
+ result.set(row.entry_id, row.cnt);
813
+ }
814
+ }
815
+ }
816
+ catch {
817
+ /* usage_events table may be missing on legacy DBs — treat as zero counts */
818
+ }
819
+ }
820
+ return result;
821
+ }
822
+ function deleteEntriesWhere(db, column, value) {
526
823
  db.transaction(() => {
527
- const ids = db.prepare("SELECT id FROM entries WHERE dir_path = ?").all(dirPath);
824
+ const ids = db.prepare(`SELECT id FROM entries WHERE ${column} = ?`).all(value);
528
825
  deleteRelatedRows(db, ids);
529
- db.prepare("DELETE FROM entries WHERE dir_path = ?").run(dirPath);
826
+ db.prepare(`DELETE FROM entries WHERE ${column} = ?`).run(value);
530
827
  })();
531
828
  }
829
+ export function deleteEntriesByDir(db, dirPath) {
830
+ deleteEntriesWhere(db, "dir_path", dirPath);
831
+ }
532
832
  export function deleteEntriesByStashDir(db, stashDir) {
533
- db.transaction(() => {
534
- const ids = db.prepare("SELECT id FROM entries WHERE stash_dir = ?").all(stashDir);
535
- deleteRelatedRows(db, ids);
536
- db.prepare("DELETE FROM entries WHERE stash_dir = ?").run(stashDir);
537
- })();
833
+ deleteEntriesWhere(db, "stash_dir", stashDir);
538
834
  }
539
- const SQLITE_CHUNK_SIZE = 500;
540
835
  function deleteRelatedRows(db, ids) {
541
836
  if (ids.length === 0)
542
837
  return;
@@ -571,13 +866,6 @@ function deleteRelatedRows(db, ids) {
571
866
  catch {
572
867
  /* ignore */
573
868
  }
574
- // Also delete from FTS table so orphaned FTS rows don't remain
575
- try {
576
- db.prepare(`DELETE FROM entries_fts WHERE entry_id IN (${placeholders})`).run(...chunk);
577
- }
578
- catch {
579
- /* ignore */
580
- }
581
869
  if (vecAvail) {
582
870
  try {
583
871
  db.prepare(`DELETE FROM entries_vec WHERE id IN (${placeholders})`).run(...chunk);
@@ -593,6 +881,12 @@ function deleteRelatedRows(db, ids) {
593
881
  catch {
594
882
  /* ignore */
595
883
  }
884
+ try {
885
+ db.prepare(`DELETE FROM utility_scores_scoped WHERE entry_id IN (${placeholders})`).run(...chunk);
886
+ }
887
+ catch {
888
+ /* ignore */
889
+ }
596
890
  // Clean up usage events before deleting entries
597
891
  try {
598
892
  db.prepare(`DELETE FROM usage_events WHERE entry_id IN (${placeholders})`).run(...chunk);
@@ -602,6 +896,26 @@ function deleteRelatedRows(db, ids) {
602
896
  }
603
897
  }
604
898
  }
899
+ /**
900
+ * Delete entries by their primary key IDs, along with all related rows
901
+ * (embeddings, entries_vec, entries_fts, utility_scores, usage_events).
902
+ *
903
+ * Used by the `--clean` post-pass to remove stale entries whose source files
904
+ * no longer exist on disk.
905
+ */
906
+ export function deleteEntriesByIds(db, ids) {
907
+ if (ids.length === 0)
908
+ return;
909
+ db.transaction(() => {
910
+ const idObjs = ids.map((id) => ({ id }));
911
+ deleteRelatedRows(db, idObjs);
912
+ for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
913
+ const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
914
+ const placeholders = chunk.map(() => "?").join(",");
915
+ db.prepare(`DELETE FROM entries WHERE id IN (${placeholders})`).run(...chunk);
916
+ }
917
+ })();
918
+ }
605
919
  /**
606
920
  * Rebuild the FTS5 search index.
607
921
  *
@@ -676,19 +990,32 @@ export function rebuildFts(db, options) {
676
990
  }
677
991
  // ── Vector operations ───────────────────────────────────────────────────────
678
992
  export function upsertEmbedding(db, entryId, embedding) {
993
+ // Pre-flight FK guard: when an entry is deleted between when its id is queued
994
+ // for embedding and when this INSERT runs (e.g. consolidation deletes during
995
+ // a concurrent improve cycle), the INSERT throws "FOREIGN KEY constraint failed"
996
+ // and rolls back the entire batch transaction in the caller, losing every
997
+ // embedding for that run. A cheap SELECT here turns the race into a clean skip.
998
+ const exists = db.prepare("SELECT 1 FROM entries WHERE id = ?").get(entryId);
999
+ if (!exists)
1000
+ return false;
679
1001
  const buf = float32Buffer(embedding);
680
1002
  // Always write to BLOB table (works without sqlite-vec)
681
1003
  db.prepare("INSERT OR REPLACE INTO embeddings (id, embedding) VALUES (?, ?)").run(entryId, buf);
682
- // Also write to sqlite-vec table when available (fast path)
1004
+ // Also write to sqlite-vec table when available (fast path).
1005
+ // Wrapped in a transaction so a crash between DELETE and INSERT does not
1006
+ // leave the entry missing from the vec table.
683
1007
  if (isVecAvailable(db)) {
684
1008
  try {
685
- db.prepare("DELETE FROM entries_vec WHERE id = ?").run(entryId);
1009
+ db.transaction(() => {
1010
+ db.prepare("DELETE FROM entries_vec WHERE id = ?").run(entryId);
1011
+ db.prepare("INSERT INTO entries_vec (id, embedding) VALUES (?, ?)").run(entryId, buf);
1012
+ })();
686
1013
  }
687
1014
  catch {
688
- /* ignore */
1015
+ /* ignore — vec table unavailable or constraint failure */
689
1016
  }
690
- db.prepare("INSERT INTO entries_vec (id, embedding) VALUES (?, ?)").run(entryId, buf);
691
1017
  }
1018
+ return true;
692
1019
  }
693
1020
  export function searchVec(db, queryEmbedding, k) {
694
1021
  // Fast path: use sqlite-vec when available
@@ -708,6 +1035,23 @@ export function searchVec(db, queryEmbedding, k) {
708
1035
  // Fallback: JS-based cosine similarity over BLOB table
709
1036
  return searchBlobVec(db, queryEmbedding, k);
710
1037
  }
1038
+ /**
1039
+ * Return the k nearest neighbours of an already-indexed entry using its
1040
+ * persisted embedding — no re-embedding, no network. Decodes the stored BLOB by
1041
+ * byte length (dim = bytes / 4) and reuses searchVec (sqlite-vec fast path or
1042
+ * JS-cosine fallback). Returns [] when the entry has no stored embedding or the
1043
+ * BLOB is corrupt. The query entry itself is typically returned with distance
1044
+ * ~0 — callers should filter it out by id.
1045
+ */
1046
+ export function getNeighborsByEntryId(db, id, k) {
1047
+ const row = db.prepare("SELECT embedding FROM embeddings WHERE id = ?").get(id);
1048
+ if (!row)
1049
+ return [];
1050
+ const queryEmbedding = bufferToFloat32(row.embedding, Math.floor(row.embedding.byteLength / 4));
1051
+ if (!queryEmbedding)
1052
+ return [];
1053
+ return searchVec(db, queryEmbedding, k);
1054
+ }
711
1055
  function float32Buffer(vec) {
712
1056
  const f32 = new Float32Array(vec);
713
1057
  return Buffer.from(f32.buffer);
@@ -814,7 +1158,7 @@ function runFtsQuery(db, ftsQuery, limit, entryType) {
814
1158
  JOIN entries e ON e.id = f.entry_id
815
1159
  WHERE entries_fts MATCH ?
816
1160
  AND e.entry_type = ?
817
- ORDER BY bm25Score
1161
+ ORDER BY bm25Score, e.id ASC
818
1162
  LIMIT ?
819
1163
  `;
820
1164
  params = [ftsQuery, entryType, limit];
@@ -826,7 +1170,7 @@ function runFtsQuery(db, ftsQuery, limit, entryType) {
826
1170
  FROM entries_fts f
827
1171
  JOIN entries e ON e.id = f.entry_id
828
1172
  WHERE entries_fts MATCH ?
829
- ORDER BY bm25Score
1173
+ ORDER BY bm25Score, e.id ASC
830
1174
  LIMIT ?
831
1175
  `;
832
1176
  params = [ftsQuery, limit];
@@ -875,21 +1219,7 @@ export function sanitizeFtsQuery(query) {
875
1219
  // contain ALL terms.
876
1220
  return tokens.join(" ");
877
1221
  }
878
- // ── All entries ─────────────────────────────────────────────────────────────
879
- export function getAllEntries(db, entryType) {
880
- let sql;
881
- let params;
882
- if (entryType && entryType !== "any") {
883
- sql =
884
- "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type = ?";
885
- params = [entryType];
886
- }
887
- else {
888
- sql = "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries";
889
- params = [];
890
- }
891
- const rows = db.prepare(sql).all(...params);
892
- // Guard against corrupt JSON — skip the row rather than crashing
1222
+ function parseEntryRows(rows, context) {
893
1223
  const entries = [];
894
1224
  for (const row of rows) {
895
1225
  let entry;
@@ -897,7 +1227,7 @@ export function getAllEntries(db, entryType) {
897
1227
  entry = JSON.parse(row.entry_json);
898
1228
  }
899
1229
  catch {
900
- warn(`[db] getAllEntries: skipping entry id=${row.id} — corrupt entry_json`);
1230
+ warn(`[db] ${context}: skipping entry id=${row.id} — corrupt entry_json`);
901
1231
  continue;
902
1232
  }
903
1233
  entries.push({
@@ -912,6 +1242,21 @@ export function getAllEntries(db, entryType) {
912
1242
  }
913
1243
  return entries;
914
1244
  }
1245
+ export function getAllEntries(db, entryType) {
1246
+ let sql;
1247
+ let params;
1248
+ if (entryType && entryType !== "any") {
1249
+ sql =
1250
+ "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type = ?";
1251
+ params = [entryType];
1252
+ }
1253
+ else {
1254
+ sql = "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries";
1255
+ params = [];
1256
+ }
1257
+ const rows = db.prepare(sql).all(...params);
1258
+ return parseEntryRows(rows, "getAllEntries");
1259
+ }
915
1260
  export function findEntryIdByRef(db, ref) {
916
1261
  const parsed = parseAssetRef(ref);
917
1262
  const nameVariants = [parsed.name];
@@ -957,28 +1302,7 @@ export function getEntriesByDir(db, dirPath) {
957
1302
  const rows = db
958
1303
  .prepare("SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE dir_path = ?")
959
1304
  .all(dirPath);
960
- // Guard against corrupt JSON — skip the row rather than crashing
961
- const entries = [];
962
- for (const row of rows) {
963
- let entry;
964
- try {
965
- entry = JSON.parse(row.entry_json);
966
- }
967
- catch {
968
- warn(`[db] getEntriesByDir: skipping entry id=${row.id} — corrupt entry_json`);
969
- continue;
970
- }
971
- entries.push({
972
- id: row.id,
973
- entryKey: row.entry_key,
974
- dirPath: row.dir_path,
975
- filePath: row.file_path,
976
- stashDir: row.stash_dir,
977
- entry,
978
- searchText: row.search_text,
979
- });
980
- }
981
- return entries;
1305
+ return parseEntryRows(rows, "getEntriesByDir");
982
1306
  }
983
1307
  /**
984
1308
  * Get the utility score for an entry, or undefined if none exists.
@@ -1001,12 +1325,17 @@ export function getUtilityScore(db, entryId) {
1001
1325
  }
1002
1326
  /**
1003
1327
  * Batch-load utility scores for multiple entry IDs in a single query.
1004
- * Returns a Map keyed by entry_id for O(1) lookup.
1328
+ * Returns a `{ global, scoped }` pair, both Maps keyed by entry_id.
1329
+ *
1330
+ * When `scopeKey` is provided a second query runs against
1331
+ * `utility_scores_scoped` and the result is returned as `scoped`.
1332
+ * Both maps are always present; `scoped` is empty when `scopeKey` is absent.
1005
1333
  */
1006
- export function getUtilityScoresByIds(db, ids) {
1334
+ export function getUtilityScoresByIds(db, ids, scopeKey) {
1335
+ const global = new Map();
1336
+ const scoped = new Map();
1007
1337
  if (ids.length === 0)
1008
- return new Map();
1009
- const result = new Map();
1338
+ return { global, scoped };
1010
1339
  // Process in chunks to stay within SQLITE_MAX_VARIABLE_NUMBER
1011
1340
  for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
1012
1341
  const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
@@ -1015,7 +1344,7 @@ export function getUtilityScoresByIds(db, ids) {
1015
1344
  .prepare(`SELECT entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at FROM utility_scores WHERE entry_id IN (${placeholders})`)
1016
1345
  .all(...chunk);
1017
1346
  for (const row of rows) {
1018
- result.set(row.entry_id, {
1347
+ global.set(row.entry_id, {
1019
1348
  entryId: row.entry_id,
1020
1349
  utility: row.utility,
1021
1350
  showCount: row.show_count,
@@ -1025,13 +1354,34 @@ export function getUtilityScoresByIds(db, ids) {
1025
1354
  updatedAt: row.updated_at,
1026
1355
  });
1027
1356
  }
1357
+ if (scopeKey) {
1358
+ const scopedRows = db
1359
+ .prepare(`SELECT entry_id, scope_key, utility, last_used_at FROM utility_scores_scoped WHERE scope_key = ? AND entry_id IN (${placeholders})`)
1360
+ .all(scopeKey, ...chunk);
1361
+ for (const row of scopedRows) {
1362
+ scoped.set(row.entry_id, {
1363
+ entryId: row.entry_id,
1364
+ scopeKey: row.scope_key,
1365
+ utility: row.utility,
1366
+ lastUsedAt: row.last_used_at,
1367
+ });
1368
+ }
1369
+ }
1028
1370
  }
1029
- return result;
1371
+ return { global, scoped };
1030
1372
  }
1031
1373
  /**
1032
1374
  * Insert or update a utility score for an entry.
1033
1375
  */
1034
1376
  export function upsertUtilityScore(db, entryId, data) {
1377
+ // Pre-flight FK guard (mirrors `upsertEmbedding`): when an entry is
1378
+ // deleted between when its id is aggregated from usage_events and when
1379
+ // this INSERT runs, the FK constraint fails and rolls back the entire
1380
+ // finalize transaction. A cheap SELECT here turns the race into a
1381
+ // clean skip. Returns false when the entry no longer exists.
1382
+ const exists = db.prepare("SELECT 1 FROM entries WHERE id = ?").get(entryId);
1383
+ if (!exists)
1384
+ return false;
1035
1385
  db.prepare(`
1036
1386
  INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
1037
1387
  VALUES (?, ?, ?, ?, ?, ?, datetime('now'))
@@ -1043,6 +1393,7 @@ export function upsertUtilityScore(db, entryId, data) {
1043
1393
  last_used_at = excluded.last_used_at,
1044
1394
  updated_at = datetime('now')
1045
1395
  `).run(entryId, data.utility, data.showCount, data.searchCount, data.selectRate, data.lastUsedAt ?? null);
1396
+ return true;
1046
1397
  }
1047
1398
  /**
1048
1399
  * Look up a cached LLM result for the given asset_ref.
@@ -1052,10 +1403,10 @@ export function upsertUtilityScore(db, entryId, data) {
1052
1403
  * cached). In both cases the caller should invoke the LLM and write a new
1053
1404
  * cache entry.
1054
1405
  */
1055
- export function getLlmCacheEntry(db, assetRef, currentBodyHash) {
1406
+ export function getLlmCacheEntry(db, assetRef, currentBodyHash, cacheVariant = "") {
1056
1407
  const row = db
1057
- .prepare("SELECT asset_ref, body_hash, result_json, updated_at FROM llm_enrichment_cache WHERE asset_ref = ?")
1058
- .get(assetRef);
1408
+ .prepare("SELECT asset_ref, cache_variant, body_hash, result_json, updated_at FROM llm_enrichment_cache WHERE asset_ref = ? AND cache_variant = ?")
1409
+ .get(assetRef, cacheVariant);
1059
1410
  if (!row)
1060
1411
  return undefined;
1061
1412
  // Hash mismatch → body changed, treat as cache miss.
@@ -1063,21 +1414,54 @@ export function getLlmCacheEntry(db, assetRef, currentBodyHash) {
1063
1414
  return undefined;
1064
1415
  return {
1065
1416
  assetRef: row.asset_ref,
1417
+ cacheVariant: row.cache_variant,
1066
1418
  bodyHash: row.body_hash,
1067
1419
  resultJson: row.result_json,
1068
1420
  updatedAt: row.updated_at,
1069
1421
  };
1070
1422
  }
1423
+ /**
1424
+ * Batched variant of {@link getLlmCacheEntry}. Fetches every cache row whose
1425
+ * `asset_ref` is in `refs` with a single `IN (...)` query (chunked to respect
1426
+ * SQLITE_MAX_VARIABLE_NUMBER), returning a `Map<assetRef, LlmCacheEntry>`.
1427
+ *
1428
+ * Unlike `getLlmCacheEntry`, this does NOT filter by body hash — callers must
1429
+ * compare `entry.bodyHash` against the current body hash themselves. This lets
1430
+ * the batch path issue one DB query per chunk instead of one per file.
1431
+ */
1432
+ export function getLlmCacheEntriesByRefs(db, refs, cacheVariant = "") {
1433
+ const result = new Map();
1434
+ if (refs.length === 0)
1435
+ return result;
1436
+ for (let i = 0; i < refs.length; i += SQLITE_CHUNK_SIZE) {
1437
+ const chunk = refs.slice(i, i + SQLITE_CHUNK_SIZE);
1438
+ const placeholders = chunk.map(() => "?").join(", ");
1439
+ const rows = db
1440
+ .prepare(`SELECT asset_ref, cache_variant, body_hash, result_json, updated_at FROM llm_enrichment_cache
1441
+ WHERE cache_variant = ? AND asset_ref IN (${placeholders})`)
1442
+ .all(cacheVariant, ...chunk);
1443
+ for (const row of rows) {
1444
+ result.set(row.asset_ref, {
1445
+ assetRef: row.asset_ref,
1446
+ cacheVariant: row.cache_variant,
1447
+ bodyHash: row.body_hash,
1448
+ resultJson: row.result_json,
1449
+ updatedAt: row.updated_at,
1450
+ });
1451
+ }
1452
+ }
1453
+ return result;
1454
+ }
1071
1455
  /**
1072
1456
  * Insert or update a cached LLM result for the given asset_ref.
1073
1457
  */
1074
- export function upsertLlmCacheEntry(db, assetRef, bodyHash, resultJson) {
1075
- db.prepare(`INSERT INTO llm_enrichment_cache (asset_ref, body_hash, result_json, updated_at)
1076
- VALUES (?, ?, ?, ?)
1077
- ON CONFLICT(asset_ref) DO UPDATE SET
1078
- body_hash = excluded.body_hash,
1079
- result_json = excluded.result_json,
1080
- updated_at = excluded.updated_at`).run(assetRef, bodyHash, resultJson, Date.now());
1458
+ export function upsertLlmCacheEntry(db, assetRef, bodyHash, resultJson, cacheVariant = "") {
1459
+ db.prepare(`INSERT INTO llm_enrichment_cache (asset_ref, cache_variant, body_hash, result_json, updated_at)
1460
+ VALUES (?, ?, ?, ?, ?)
1461
+ ON CONFLICT(asset_ref, cache_variant) DO UPDATE SET
1462
+ body_hash = excluded.body_hash,
1463
+ result_json = excluded.result_json,
1464
+ updated_at = excluded.updated_at`).run(assetRef, cacheVariant, bodyHash, resultJson, Date.now());
1081
1465
  }
1082
1466
  /**
1083
1467
  * Delete LLM cache entries whose asset_ref is no longer present in the
@@ -1145,26 +1529,55 @@ export function getRetrievalCounts(db, refs) {
1145
1529
  * The indexer (`akm index`) will overwrite these values at next reindex run;
1146
1530
  * bumps are intentionally temporary hints between index runs, not permanent
1147
1531
  * overrides.
1532
+ *
1533
+ * When `scopeKey` is provided, also writes a scoped bump to
1534
+ * `utility_scores_scoped` so per-project usage signals accumulate alongside
1535
+ * the global ones. The global table is always updated regardless.
1148
1536
  */
1149
- export function bumpUtilityScoresBatch(db, entryIds, reward, lr = 0.1) {
1537
+ export function bumpUtilityScoresBatch(db, entryIds, reward, lr = 0.1, scopeKey) {
1150
1538
  if (entryIds.length === 0)
1151
1539
  return;
1152
1540
  db.transaction(() => {
1153
- const scoreMap = getUtilityScoresByIds(db, entryIds);
1541
+ const { global: scoreMap } = getUtilityScoresByIds(db, entryIds);
1154
1542
  const now = new Date().toISOString();
1543
+ const nowMs = Date.now();
1155
1544
  const stmt = db.prepare(`INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
1156
1545
  VALUES (?, ?, 0, 0, 0, ?, ?)
1157
1546
  ON CONFLICT(entry_id) DO UPDATE SET
1158
1547
  utility = excluded.utility,
1159
1548
  updated_at = excluded.updated_at`);
1549
+ // Prepare scoped upsert once outside the loop when scopeKey is present.
1550
+ const scopedStmt = scopeKey
1551
+ ? db.prepare(`INSERT INTO utility_scores_scoped (entry_id, scope_key, utility, last_used_at)
1552
+ VALUES (?, ?, ?, ?)
1553
+ ON CONFLICT(entry_id, scope_key) DO UPDATE SET
1554
+ utility = excluded.utility,
1555
+ last_used_at = excluded.last_used_at`)
1556
+ : null;
1160
1557
  for (const entryId of entryIds) {
1161
1558
  const existing = scoreMap.get(entryId);
1162
1559
  const current = existing?.utility ?? 0;
1163
1560
  const next = Math.max(0, Math.min(1, current + lr * (reward - current)));
1164
1561
  stmt.run(entryId, next, now, now);
1562
+ if (scopedStmt && scopeKey) {
1563
+ // Retrieve the current scoped utility so we can apply the same EMA.
1564
+ const scopedCurrent = getScopedUtility(db, entryId, scopeKey);
1565
+ const scopedNext = Math.max(0, Math.min(1, scopedCurrent + lr * (reward - scopedCurrent)));
1566
+ scopedStmt.run(entryId, scopeKey, scopedNext, nowMs);
1567
+ }
1165
1568
  }
1166
1569
  })();
1167
1570
  }
1571
+ /**
1572
+ * Return the current utility value for a single (entry_id, scope_key) pair.
1573
+ * Returns 0 when no row exists yet.
1574
+ */
1575
+ function getScopedUtility(db, entryId, scopeKey) {
1576
+ const row = db
1577
+ .prepare("SELECT utility FROM utility_scores_scoped WHERE entry_id = ? AND scope_key = ?")
1578
+ .get(entryId, scopeKey);
1579
+ return row?.utility ?? 0;
1580
+ }
1168
1581
  // ── Indexer-phase helpers (moved from indexer.ts) ────────────────────────────
1169
1582
  /**
1170
1583
  * Return all entries that do not yet have an embedding row.
@@ -1175,6 +1588,7 @@ export function getAllEntriesForEmbedding(db) {
1175
1588
  .prepare(`
1176
1589
  SELECT e.id, e.search_text AS searchText, e.entry_key AS entryKey, e.file_path AS filePath FROM entries e
1177
1590
  WHERE NOT EXISTS (SELECT 1 FROM embeddings b WHERE b.id = e.id)
1591
+ AND e.entry_type != 'vault'
1178
1592
  `)
1179
1593
  .all();
1180
1594
  }
@@ -1237,31 +1651,97 @@ export function getZeroResultSearches(db, sinceDays = 30) {
1237
1651
  * Returns null when no matching row is found.
1238
1652
  */
1239
1653
  export function getEntryByRef(db, type, name) {
1240
- return db
1241
- .prepare("SELECT id FROM entries WHERE entry_type = ? AND entry_key LIKE ?")
1242
- .get(type, `%${type}:${name}`);
1654
+ return db.prepare("SELECT id FROM entries WHERE entry_type = ? AND entry_key = ?").get(type, `${type}:${name}`);
1243
1655
  }
1244
1656
  /**
1245
- * Upsert a utility score adjustment derived from accumulated feedback events.
1657
+ * MemRL learning rate for feedback-driven utility updates (F-5 / #386).
1658
+ *
1659
+ * Follows the bounded-step formula from MemRL (arXiv:2601.03192):
1660
+ * next = clamp(current + lr × (reward − current), 0, 1)
1661
+ *
1662
+ * This replaces the unbounded `-0.03 × negativeCount` delta that could
1663
+ * silently remove high-utility assets from the improvement loop.
1664
+ */
1665
+ const FEEDBACK_LR = 0.1;
1666
+ /**
1667
+ * Positive reward signal for a single positive feedback event.
1668
+ * Reward 1.0 means "fully correct / helpful".
1669
+ */
1670
+ const FEEDBACK_REWARD_POSITIVE = 1.0;
1671
+ /**
1672
+ * Negative reward signal for a single negative feedback event.
1673
+ * Reward 0.0 means "not helpful" (lowest MemRL signal).
1674
+ */
1675
+ const FEEDBACK_REWARD_NEGATIVE = 0.0;
1676
+ /**
1677
+ * Maximum total negative utility delta allowed in a single
1678
+ * `applyFeedbackToUtilityScore` call regardless of negativeCount.
1246
1679
  *
1247
- * - positiveDelta: +0.05 per positive event
1248
- * - negativeDelta: -0.03 per negative event
1249
- * - Score is clamped to [0.0, 1.0]
1250
- * - A new row starts at 0.5 + delta so the first positive feedback immediately
1251
- * lifts the entry above the neutral midpoint.
1680
+ * This caps the per-day negative impact (the function is called once per
1681
+ * feedback event spamming 10 negatives in one session can move utility
1682
+ * at most `MAX_NEG_DELTA_PER_CALL`). The cap prevents a noisy negative-
1683
+ * feedback stream from silently destroying a high-utility asset's ranking.
1684
+ */
1685
+ const MAX_NEG_DELTA_PER_CALL = 0.15;
1686
+ /**
1687
+ * Utility threshold below which a review-needed escalation is triggered.
1688
+ * When a previously high-utility asset (≥ HIGH_UTILITY_THRESHOLD) drops
1689
+ * below this value, the caller should create an escalation proposal.
1690
+ */
1691
+ export const UTILITY_REVIEW_THRESHOLD = 0.5;
1692
+ /**
1693
+ * Utility level considered "high" — assets above this are tracked for
1694
+ * threshold-crossing escalation.
1695
+ */
1696
+ export const HIGH_UTILITY_THRESHOLD = 0.5;
1697
+ /**
1698
+ * Apply accumulated feedback counts to the utility score of an entry using the
1699
+ * MemRL bounded-step EMA formula (F-5 / #386, arXiv:2601.03192).
1700
+ *
1701
+ * Replaces the previous unbounded `-0.03 × negativeCount` formula with:
1702
+ *
1703
+ * reward = weighted average of positive and negative signals
1704
+ * nextUtil = clamp(currentUtil + lr × (reward − currentUtil), 0, 1)
1705
+ *
1706
+ * The negative impact is additionally capped at {@link MAX_NEG_DELTA_PER_CALL}
1707
+ * to prevent a noisy feedback stream from silently erasing a high-utility asset.
1708
+ *
1709
+ * A new entry starts at 0.5 (neutral midpoint) before the EMA step is applied.
1710
+ *
1711
+ * Returns a {@link FeedbackUtilityResult} so the caller can detect when a
1712
+ * previously high-utility asset crosses below the review threshold and create
1713
+ * an escalation proposal.
1252
1714
  */
1253
1715
  export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negativeCount) {
1254
- if (positiveCount === 0 && negativeCount === 0)
1255
- return;
1256
- const delta = positiveCount * 0.05 - negativeCount * 0.03;
1716
+ const existing = getUtilityScore(db, entryId);
1717
+ const previousUtility = existing?.utility ?? 0.5;
1718
+ if (positiveCount === 0 && negativeCount === 0) {
1719
+ return { previousUtility, nextUtility: previousUtility, crossedReviewThreshold: false };
1720
+ }
1721
+ const total = positiveCount + negativeCount;
1722
+ // Weighted reward: proportion of positive signals.
1723
+ const reward = positiveCount > 0 && negativeCount === 0
1724
+ ? FEEDBACK_REWARD_POSITIVE
1725
+ : negativeCount > 0 && positiveCount === 0
1726
+ ? FEEDBACK_REWARD_NEGATIVE
1727
+ : (positiveCount * FEEDBACK_REWARD_POSITIVE + negativeCount * FEEDBACK_REWARD_NEGATIVE) / total;
1728
+ // MemRL bounded-step EMA: lr × (reward − current)
1729
+ let delta = FEEDBACK_LR * (reward - previousUtility);
1730
+ // Per-call negative cap: if delta is negative (net negative feedback), cap it.
1731
+ if (delta < 0) {
1732
+ delta = Math.max(delta, -MAX_NEG_DELTA_PER_CALL);
1733
+ }
1734
+ const nextUtility = Math.max(0, Math.min(1, previousUtility + delta));
1257
1735
  const now = new Date().toISOString();
1258
1736
  db.prepare(`
1259
1737
  INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
1260
- VALUES (?, MAX(0.0, MIN(1.0, 0.5 + ?)), 0, 0, 0, ?, ?)
1738
+ VALUES (?, ?, 0, 0, 0, ?, ?)
1261
1739
  ON CONFLICT(entry_id) DO UPDATE SET
1262
- utility = MAX(0.0, MIN(1.0, utility + ?)),
1740
+ utility = ?,
1263
1741
  updated_at = ?
1264
- `).run(entryId, delta, now, now, delta, now);
1742
+ `).run(entryId, nextUtility, now, now, nextUtility, now);
1743
+ const crossedReviewThreshold = previousUtility >= HIGH_UTILITY_THRESHOLD && nextUtility < UTILITY_REVIEW_THRESHOLD;
1744
+ return { previousUtility, nextUtility, crossedReviewThreshold };
1265
1745
  }
1266
1746
  /**
1267
1747
  * Re-link detached usage_events to their current entry_ids via entry_ref.
@@ -1272,6 +1752,22 @@ export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negative
1272
1752
  */
1273
1753
  export function relinkUsageEvents(db) {
1274
1754
  try {
1755
+ // Step 1: null out stale entry_ids (entry was deleted, re-keyed, etc).
1756
+ // Leaving them in place would let `recomputeUtilityScores` aggregate
1757
+ // by an entry_id that no longer exists in `entries`, then trip the FK
1758
+ // constraint on the utility_scores INSERT and roll back the entire
1759
+ // finalize transaction. Nulled rows can be re-resolved by step 2 below;
1760
+ // events whose entry is permanently gone simply stay null and age out
1761
+ // via the 90-day retention policy.
1762
+ db.exec(`
1763
+ UPDATE usage_events
1764
+ SET entry_id = NULL
1765
+ WHERE entry_id IS NOT NULL
1766
+ AND entry_id NOT IN (SELECT id FROM entries)
1767
+ `);
1768
+ // Step 2: re-resolve any null entry_id from entry_ref against the
1769
+ // current entries table. Picks up entries that were re-created with
1770
+ // the same ref (e.g. an asset moved between sources).
1275
1771
  db.exec(`
1276
1772
  UPDATE usage_events SET entry_id = (
1277
1773
  SELECT e.id FROM entries e