akm-cli 0.8.0-rc2 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (313) hide show
  1. package/{.github/CHANGELOG.md → CHANGELOG.md} +238 -3
  2. package/README.md +22 -6
  3. package/SECURITY.md +93 -0
  4. package/dist/assets/help/help-accept.md +12 -0
  5. package/dist/assets/help/help-improve.md +81 -0
  6. package/dist/{commands → assets}/help/help-proposals.md +7 -4
  7. package/dist/assets/help/help-reject.md +11 -0
  8. package/dist/{output → assets/hints}/cli-hints-full.md +60 -32
  9. package/dist/{output → assets/hints}/cli-hints-short.md +10 -7
  10. package/dist/assets/profiles/default.json +15 -0
  11. package/dist/assets/profiles/graph-refresh.json +13 -0
  12. package/dist/assets/profiles/memory-focus.json +12 -0
  13. package/dist/assets/profiles/quick.json +15 -0
  14. package/dist/assets/profiles/thorough.json +15 -0
  15. package/dist/assets/prompts/extract-session.md +80 -0
  16. package/dist/assets/prompts/graph-extract-user-prompt.md +35 -0
  17. package/dist/assets/tasks/graph-refresh-weekly.yml +10 -0
  18. package/dist/cli/config-migrate.js +144 -0
  19. package/dist/cli/config-validate.js +39 -0
  20. package/dist/cli/confirm.js +73 -0
  21. package/dist/cli/parse-args.js +93 -3
  22. package/dist/cli/shared.js +129 -0
  23. package/dist/cli.js +2141 -1268
  24. package/dist/commands/add-cli.js +279 -0
  25. package/dist/commands/agent-dispatch.js +20 -12
  26. package/dist/commands/agent-support.js +11 -5
  27. package/dist/commands/completions.js +3 -0
  28. package/dist/commands/config-cli.js +129 -517
  29. package/dist/commands/consolidate.js +1557 -147
  30. package/dist/commands/curate.js +44 -3
  31. package/dist/commands/db-cli.js +23 -0
  32. package/dist/commands/distill-promotion-policy.js +5 -3
  33. package/dist/commands/distill.js +906 -100
  34. package/dist/commands/env.js +213 -0
  35. package/dist/commands/eval-cases.js +3 -0
  36. package/dist/commands/events.js +3 -0
  37. package/dist/commands/extract-cli.js +127 -0
  38. package/dist/commands/extract-prompt.js +217 -0
  39. package/dist/commands/extract.js +477 -0
  40. package/dist/commands/feedback-cli.js +331 -0
  41. package/dist/commands/graph.js +260 -5
  42. package/dist/commands/health.js +1042 -55
  43. package/dist/commands/history.js +51 -16
  44. package/dist/commands/improve-auto-accept.js +97 -0
  45. package/dist/commands/improve-cli.js +236 -0
  46. package/dist/commands/improve-profiles.js +138 -0
  47. package/dist/commands/improve-result-file.js +167 -0
  48. package/dist/commands/improve.js +1736 -346
  49. package/dist/commands/info.js +26 -28
  50. package/dist/commands/init.js +49 -1
  51. package/dist/commands/installed-stashes.js +6 -23
  52. package/dist/commands/knowledge.js +3 -0
  53. package/dist/commands/lint/agent-linter.js +3 -0
  54. package/dist/commands/lint/base-linter.js +199 -5
  55. package/dist/commands/lint/command-linter.js +3 -0
  56. package/dist/commands/lint/default-linter.js +3 -0
  57. package/dist/commands/lint/env-key-rules.js +154 -0
  58. package/dist/commands/lint/index.js +92 -3
  59. package/dist/commands/lint/knowledge-linter.js +3 -0
  60. package/dist/commands/lint/markdown-insertion.js +343 -0
  61. package/dist/commands/lint/memory-linter.js +3 -0
  62. package/dist/commands/lint/registry.js +3 -0
  63. package/dist/commands/lint/skill-linter.js +3 -0
  64. package/dist/commands/lint/task-linter.js +15 -12
  65. package/dist/commands/lint/types.js +3 -0
  66. package/dist/commands/lint/workflow-linter.js +3 -0
  67. package/dist/commands/lint.js +3 -0
  68. package/dist/commands/migration-help.js +5 -2
  69. package/dist/commands/proposal-drain-policies.js +128 -0
  70. package/dist/commands/proposal-drain.js +477 -0
  71. package/dist/commands/proposal.js +60 -6
  72. package/dist/commands/propose.js +24 -19
  73. package/dist/commands/reflect.js +1004 -94
  74. package/dist/commands/registry-cli.js +150 -0
  75. package/dist/commands/registry-search.js +3 -0
  76. package/dist/commands/remember-cli.js +257 -0
  77. package/dist/commands/remember.js +15 -6
  78. package/dist/commands/schema-repair.js +88 -15
  79. package/dist/commands/search.js +99 -14
  80. package/dist/commands/secret.js +173 -0
  81. package/dist/commands/self-update.js +3 -0
  82. package/dist/commands/show.js +32 -13
  83. package/dist/commands/source-add.js +7 -35
  84. package/dist/commands/source-clone.js +3 -0
  85. package/dist/commands/source-manage.js +3 -0
  86. package/dist/commands/tasks.js +161 -95
  87. package/dist/commands/url-checker.js +3 -0
  88. package/dist/core/action-contributors.js +3 -0
  89. package/dist/core/asset-ref.js +13 -2
  90. package/dist/core/asset-registry.js +9 -2
  91. package/dist/core/asset-serialize.js +88 -0
  92. package/dist/core/asset-spec.js +61 -5
  93. package/dist/core/common.js +93 -5
  94. package/dist/core/concurrent.js +3 -0
  95. package/dist/core/config-io.js +347 -0
  96. package/dist/core/config-migration.js +622 -0
  97. package/dist/core/config-schema.js +558 -0
  98. package/dist/core/config-sources.js +108 -0
  99. package/dist/core/config-types.js +4 -0
  100. package/dist/core/config-walker.js +337 -0
  101. package/dist/core/config.js +366 -1077
  102. package/dist/core/errors.js +42 -20
  103. package/dist/core/events.js +31 -25
  104. package/dist/core/file-lock.js +104 -0
  105. package/dist/core/frontmatter.js +75 -10
  106. package/dist/core/lesson-lint.js +3 -0
  107. package/dist/core/markdown.js +3 -0
  108. package/dist/core/memory-belief.js +62 -0
  109. package/dist/core/memory-contradiction-detect.js +274 -0
  110. package/dist/core/memory-improve.js +142 -14
  111. package/dist/core/parse.js +3 -0
  112. package/dist/core/paths.js +218 -50
  113. package/dist/core/proposal-quality-validators.js +380 -0
  114. package/dist/core/proposal-validators.js +11 -3
  115. package/dist/core/proposals.js +464 -5
  116. package/dist/core/state-db.js +349 -56
  117. package/dist/core/text-truncation.js +107 -0
  118. package/dist/core/time.js +3 -0
  119. package/dist/core/tty.js +59 -0
  120. package/dist/core/warn.js +7 -2
  121. package/dist/core/write-source.js +12 -0
  122. package/dist/indexer/db-backup.js +391 -0
  123. package/dist/indexer/db-search.js +136 -28
  124. package/dist/indexer/db.js +661 -166
  125. package/dist/indexer/ensure-index.js +3 -0
  126. package/dist/indexer/file-context.js +3 -0
  127. package/dist/indexer/graph-boost.js +162 -40
  128. package/dist/indexer/graph-db.js +241 -51
  129. package/dist/indexer/graph-dedup.js +3 -7
  130. package/dist/indexer/graph-extraction.js +242 -149
  131. package/dist/indexer/index-context.js +3 -9
  132. package/dist/indexer/indexer.js +86 -16
  133. package/dist/indexer/llm-cache.js +24 -19
  134. package/dist/indexer/manifest.js +3 -0
  135. package/dist/indexer/matchers.js +184 -11
  136. package/dist/indexer/memory-inference.js +94 -50
  137. package/dist/indexer/metadata-contributors.js +3 -0
  138. package/dist/indexer/metadata.js +110 -50
  139. package/dist/indexer/path-resolver.js +3 -0
  140. package/dist/indexer/project-context.js +192 -0
  141. package/dist/indexer/ranking-contributors.js +134 -7
  142. package/dist/indexer/ranking.js +8 -1
  143. package/dist/indexer/search-fields.js +5 -9
  144. package/dist/indexer/search-hit-enrichers.js +91 -2
  145. package/dist/indexer/search-source.js +20 -1
  146. package/dist/indexer/semantic-status.js +4 -1
  147. package/dist/indexer/staleness-detect.js +447 -0
  148. package/dist/indexer/usage-events.js +12 -9
  149. package/dist/indexer/walker.js +3 -0
  150. package/dist/integrations/agent/builders.js +135 -0
  151. package/dist/integrations/agent/config.js +121 -401
  152. package/dist/integrations/agent/detect.js +3 -0
  153. package/dist/integrations/agent/index.js +6 -14
  154. package/dist/integrations/agent/model-aliases.js +55 -0
  155. package/dist/integrations/agent/profiles.js +3 -0
  156. package/dist/integrations/agent/prompts.js +137 -8
  157. package/dist/integrations/agent/runner.js +208 -0
  158. package/dist/integrations/agent/sdk-runner.js +8 -2
  159. package/dist/integrations/agent/spawn.js +54 -14
  160. package/dist/integrations/github.js +3 -0
  161. package/dist/integrations/lockfile.js +22 -51
  162. package/dist/integrations/session-logs/index.js +4 -0
  163. package/dist/integrations/session-logs/inline-refs.js +35 -0
  164. package/dist/integrations/session-logs/pre-filter.js +152 -0
  165. package/dist/integrations/session-logs/providers/claude-code.js +226 -0
  166. package/dist/integrations/session-logs/providers/opencode.js +231 -25
  167. package/dist/integrations/session-logs/types.js +3 -0
  168. package/dist/llm/call-ai.js +14 -26
  169. package/dist/llm/client.js +16 -2
  170. package/dist/llm/embedder.js +20 -29
  171. package/dist/llm/embedders/cache.js +3 -7
  172. package/dist/llm/embedders/local.js +42 -1
  173. package/dist/llm/embedders/remote.js +20 -8
  174. package/dist/llm/embedders/types.js +3 -7
  175. package/dist/llm/feature-gate.js +92 -56
  176. package/dist/llm/graph-extract.js +402 -31
  177. package/dist/llm/index-passes.js +44 -29
  178. package/dist/llm/memory-infer.js +30 -2
  179. package/dist/llm/metadata-enhance.js +3 -7
  180. package/dist/output/cli-hints.js +7 -4
  181. package/dist/output/context.js +60 -8
  182. package/dist/output/renderers.js +170 -194
  183. package/dist/output/shapes/curate.js +56 -0
  184. package/dist/output/shapes/distill.js +10 -0
  185. package/dist/output/shapes/env-list.js +19 -0
  186. package/dist/output/shapes/events.js +11 -0
  187. package/dist/output/shapes/helpers.js +424 -0
  188. package/dist/output/shapes/history.js +7 -0
  189. package/dist/output/shapes/passthrough.js +105 -0
  190. package/dist/output/shapes/proposal-accept.js +7 -0
  191. package/dist/output/shapes/proposal-diff.js +7 -0
  192. package/dist/output/shapes/proposal-list.js +7 -0
  193. package/dist/output/shapes/proposal-producer.js +11 -0
  194. package/dist/output/shapes/proposal-reject.js +7 -0
  195. package/dist/output/shapes/proposal-show.js +7 -0
  196. package/dist/output/shapes/registry-search.js +6 -0
  197. package/dist/output/shapes/registry.js +30 -0
  198. package/dist/output/shapes/search.js +6 -0
  199. package/dist/output/shapes/secret-list.js +19 -0
  200. package/dist/output/shapes/show.js +6 -0
  201. package/dist/output/shapes/vault-list.js +19 -0
  202. package/dist/output/shapes.js +51 -549
  203. package/dist/output/text/add.js +6 -0
  204. package/dist/output/text/clone.js +6 -0
  205. package/dist/output/text/config.js +6 -0
  206. package/dist/output/text/curate.js +6 -0
  207. package/dist/output/text/distill.js +7 -0
  208. package/dist/output/text/enable-disable.js +7 -0
  209. package/dist/output/text/events.js +10 -0
  210. package/dist/output/text/feedback.js +6 -0
  211. package/dist/output/text/helpers.js +1059 -0
  212. package/dist/output/text/history.js +7 -0
  213. package/dist/output/text/import.js +6 -0
  214. package/dist/output/text/index.js +6 -0
  215. package/dist/output/text/info.js +6 -0
  216. package/dist/output/text/init.js +6 -0
  217. package/dist/output/text/list.js +6 -0
  218. package/dist/output/text/proposal-producer.js +8 -0
  219. package/dist/output/text/proposal.js +12 -0
  220. package/dist/output/text/registry-commands.js +11 -0
  221. package/dist/output/text/registry.js +30 -0
  222. package/dist/output/text/remember.js +6 -0
  223. package/dist/output/text/remove.js +6 -0
  224. package/dist/output/text/save.js +6 -0
  225. package/dist/output/text/search.js +6 -0
  226. package/dist/output/text/show.js +6 -0
  227. package/dist/output/text/update.js +6 -0
  228. package/dist/output/text/upgrade.js +6 -0
  229. package/dist/output/text/vault.js +16 -0
  230. package/dist/output/text/wiki.js +15 -0
  231. package/dist/output/text/workflow.js +14 -0
  232. package/dist/output/text.js +44 -1329
  233. package/dist/registry/build-index.js +3 -0
  234. package/dist/registry/create-provider-registry.js +3 -0
  235. package/dist/registry/factory.js +4 -1
  236. package/dist/registry/origin-resolve.js +3 -0
  237. package/dist/registry/providers/index.js +3 -0
  238. package/dist/registry/providers/skills-sh.js +11 -2
  239. package/dist/registry/providers/static-index.js +10 -1
  240. package/dist/registry/providers/types.js +3 -24
  241. package/dist/registry/resolve.js +11 -16
  242. package/dist/registry/types.js +3 -0
  243. package/dist/scripts/migrate-storage.js +17767 -0
  244. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
  245. package/dist/scripts/migrations/v16-to-v17.js +141 -0
  246. package/dist/setup/detect.js +3 -0
  247. package/dist/setup/ripgrep-install.js +3 -0
  248. package/dist/setup/ripgrep-resolve.js +3 -0
  249. package/dist/setup/setup.js +306 -67
  250. package/dist/setup/steps.js +3 -15
  251. package/dist/sources/include.js +3 -0
  252. package/dist/sources/provider-factory.js +3 -11
  253. package/dist/sources/provider.js +3 -20
  254. package/dist/sources/providers/filesystem.js +19 -23
  255. package/dist/sources/providers/git.js +171 -21
  256. package/dist/sources/providers/index.js +3 -0
  257. package/dist/sources/providers/install-types.js +3 -13
  258. package/dist/sources/providers/npm.js +3 -4
  259. package/dist/sources/providers/provider-utils.js +3 -0
  260. package/dist/sources/providers/sync-from-ref.js +3 -11
  261. package/dist/sources/providers/tar-utils.js +3 -0
  262. package/dist/sources/providers/website.js +18 -22
  263. package/dist/sources/resolve.js +3 -0
  264. package/dist/sources/types.js +3 -0
  265. package/dist/sources/website-ingest.js +3 -0
  266. package/dist/tasks/backends/cron.js +3 -0
  267. package/dist/tasks/backends/exec-utils.js +3 -0
  268. package/dist/tasks/backends/index.js +3 -11
  269. package/dist/tasks/backends/launchd.js +4 -1
  270. package/dist/tasks/backends/schtasks.js +4 -1
  271. package/dist/tasks/parser.js +51 -38
  272. package/dist/tasks/resolveAkmBin.js +3 -0
  273. package/dist/tasks/runner.js +35 -9
  274. package/dist/tasks/schedule.js +20 -1
  275. package/dist/tasks/schema.js +5 -3
  276. package/dist/tasks/validator.js +6 -3
  277. package/dist/version.js +3 -0
  278. package/dist/wiki/wiki-templates.js +6 -3
  279. package/dist/wiki/wiki.js +4 -1
  280. package/dist/workflows/authoring.js +4 -1
  281. package/dist/workflows/cli.js +3 -0
  282. package/dist/workflows/db.js +140 -10
  283. package/dist/workflows/document-cache.js +3 -10
  284. package/dist/workflows/parser.js +3 -0
  285. package/dist/workflows/renderer.js +3 -0
  286. package/dist/workflows/runs.js +18 -1
  287. package/dist/workflows/schema.js +3 -0
  288. package/dist/workflows/scope-key.js +3 -0
  289. package/dist/workflows/validator.js +5 -9
  290. package/docs/README.md +7 -2
  291. package/docs/data-and-telemetry.md +225 -0
  292. package/docs/migration/release-notes/0.7.5.md +2 -2
  293. package/docs/migration/release-notes/0.8.0.md +57 -5
  294. package/docs/migration/v0.7-to-v0.8.md +1378 -0
  295. package/package.json +28 -11
  296. package/.github/LICENSE +0 -374
  297. package/dist/commands/help/help-accept.md +0 -9
  298. package/dist/commands/help/help-improve.md +0 -53
  299. package/dist/commands/help/help-reject.md +0 -8
  300. package/dist/commands/install-audit.js +0 -385
  301. package/dist/commands/vault.js +0 -310
  302. package/dist/indexer/match-contributors.js +0 -141
  303. package/dist/integrations/agent/pipeline.js +0 -39
  304. package/dist/integrations/agent/runners.js +0 -31
  305. package/dist/llm/prompts/graph-extract-user-prompt.md +0 -12
  306. /package/dist/{tasks → assets}/backends/launchd-template.xml +0 -0
  307. /package/dist/{tasks → assets}/backends/schtasks-template.xml +0 -0
  308. /package/dist/{commands → assets}/help/help-propose.md +0 -0
  309. /package/dist/{wiki → assets/wiki}/index-template.md +0 -0
  310. /package/dist/{wiki → assets/wiki}/ingest-workflow-template.md +0 -0
  311. /package/dist/{wiki → assets/wiki}/log-template.md +0 -0
  312. /package/dist/{wiki → assets/wiki}/schema-template.md +0 -0
  313. /package/dist/{workflows → assets/workflows}/workflow-template.md +0 -0
@@ -1,26 +1,43 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
1
4
  import fs from "node:fs";
2
5
  import path from "node:path";
3
6
  import { makeAssetRef, parseAssetRef } from "../core/asset-ref";
4
- import { loadConfig } from "../core/config";
5
- import { ConfigError, NotFoundError } from "../core/errors";
7
+ import { daysToMs, isAssetType } from "../core/common";
8
+ import { getDefaultLlmConfig, loadConfig } from "../core/config";
9
+ import { ConfigError, NotFoundError, rethrowIfTestIsolationError, UsageError } from "../core/errors";
6
10
  import { appendEvent, readEvents } from "../core/events";
11
+ import { probeLock, releaseLock, tryAcquireLockSync } from "../core/file-lock";
7
12
  import { parseFrontmatter } from "../core/frontmatter";
13
+ import { detectAndWriteContradictions } from "../core/memory-contradiction-detect";
8
14
  import { analyzeMemoryCleanup, applyMemoryCleanup, } from "../core/memory-improve";
9
15
  import { getDbPath } from "../core/paths";
10
- import { listProposals } from "../core/proposals";
16
+ import { createProposal, expireStaleProposals, getProposal, isProposalSkipped, listProposals, purgeOrphanProposals, } from "../core/proposals";
17
+ import { openStateDatabase, purgeOldEvents, purgeOldImproveRuns } from "../core/state-db";
11
18
  import { info, warn } from "../core/warn";
12
- import { closeDatabase, getAllEntries, getRetrievalCounts, getUtilityScoresByIds, getZeroResultSearches, openDatabase, openExistingDatabase, } from "../indexer/db";
19
+ import { closeDatabase, getAllEntries, getEntryCount, getRetrievalCounts, getUtilityScoresByIds, getZeroResultSearches, openDatabase, openExistingDatabase, } from "../indexer/db";
13
20
  import { ensureIndex } from "../indexer/ensure-index";
14
21
  import { runGraphExtractionPass } from "../indexer/graph-extraction";
15
22
  import { akmIndex } from "../indexer/indexer";
16
23
  import { runMemoryInferencePass, } from "../indexer/memory-inference";
17
24
  import { resolveAssetPath } from "../indexer/path-resolver";
18
25
  import { getWritableStashDirs, resolveSourceEntries } from "../indexer/search-source";
19
- import { getExecutionLogCandidates } from "../integrations/session-logs";
26
+ import { runStalenessDetectionPass } from "../indexer/staleness-detect";
27
+ import { resolveImproveProcessRunnerFromProfile, resolveTriageJudgmentRunner } from "../integrations/agent/runner";
28
+ import { getAvailableHarnesses } from "../integrations/session-logs";
29
+ import { isLlmFeatureEnabled, isProcessEnabled } from "../llm/feature-gate";
30
+ import { isGitBackedStash, resolveWritableOverride, saveGitStash } from "../sources/providers/git";
20
31
  import { akmConsolidate } from "./consolidate";
21
- import { akmDistill, deriveLessonRef } from "./distill";
32
+ import { akmDistill, deriveLessonRef, isDistillRefusedInputType } from "./distill";
33
+ import { deriveKnowledgeRef } from "./distill-promotion-policy";
22
34
  import { countEvalCases, writeEvalCase } from "./eval-cases";
35
+ import { akmExtract } from "./extract";
36
+ import { makeGateConfig, resolveExtractConfidence, runAutoAcceptGate } from "./improve-auto-accept";
37
+ import { isProfileFilteredForAllPasses, resolveImproveProfile, resolveProcessEnabled, shouldSkipRef, } from "./improve-profiles";
23
38
  import { akmLint } from "./lint/index";
39
+ import { drainProposals } from "./proposal-drain";
40
+ import { resolveDrainPolicy } from "./proposal-drain-policies";
24
41
  import { akmReflect } from "./reflect";
25
42
  import { runSchemaRepairPass } from "./schema-repair";
26
43
  import { checkDeadUrls } from "./url-checker";
@@ -33,10 +50,47 @@ function resolveImproveScope(scope) {
33
50
  return { mode: "ref", value: trimmed };
34
51
  }
35
52
  catch {
53
+ if (!isAssetType(trimmed)) {
54
+ throw new UsageError(`Unknown asset type: "${trimmed}". Valid types: memory, knowledge, skill, lesson, workflow, agent, command, script, wiki, env, vault, task.\n` +
55
+ `If you passed --format to akm improve, that flag is not supported — use it with akm search or akm show instead.`, "INVALID_FLAG_VALUE");
56
+ }
36
57
  return { mode: "type", value: trimmed };
37
58
  }
38
59
  }
39
- async function collectEligibleRefs(scope, stashDir) {
60
+ /**
61
+ * Render the end-of-run stash-sync commit message, expanding `{token}`
62
+ * placeholders against this run's results. Unknown tokens are passed through
63
+ * verbatim so adding new tokens later never breaks an existing template, and so
64
+ * a literal brace in a message is harmless.
65
+ *
66
+ * Supported tokens (the "free" set — derived from data already on the result):
67
+ * {timestamp} `YYYY-MM-DD HH:MM:SS` (UTC)
68
+ * {date} `YYYY-MM-DD` (UTC)
69
+ * {time} `HH:MM:SS` (UTC)
70
+ * {scope} scope value (e.g. a ref/type) or the scope mode (`all`)
71
+ * {refs} number of planned refs this run processed
72
+ * {accepted} number of proposals auto-accepted by the confidence gate
73
+ *
74
+ * The result is still passed through `sanitizeCommitMessage` downstream in
75
+ * `saveGitStash`, so token values never widen the commit-message attack surface
76
+ * (newlines/control chars are collapsed there).
77
+ *
78
+ * `nowMs` is injected (not read from `Date.now()`) so the function is pure and
79
+ * deterministically testable.
80
+ */
81
+ export function renderSyncCommitMessage(template, result, nowMs) {
82
+ const iso = new Date(nowMs).toISOString();
83
+ const tokens = {
84
+ timestamp: `${iso.slice(0, 10)} ${iso.slice(11, 19)}`,
85
+ date: iso.slice(0, 10),
86
+ time: iso.slice(11, 19),
87
+ scope: result.scope.value ?? result.scope.mode,
88
+ refs: String(result.plannedRefs.length),
89
+ accepted: String(result.gateAutoAcceptedCount ?? 0),
90
+ };
91
+ return template.replace(/\{(\w+)\}/g, (match, key) => (Object.hasOwn(tokens, key) ? tokens[key] : match));
92
+ }
93
+ async function collectEligibleRefs(scope, stashDir, improveProfile) {
40
94
  if (scope.mode === "ref" && scope.value) {
41
95
  const parsed = parseAssetRef(scope.value);
42
96
  const writableDirs = new Set(getWritableStashDirs(stashDir).map((dir) => path.resolve(dir)));
@@ -45,6 +99,7 @@ async function collectEligibleRefs(scope, stashDir) {
45
99
  return {
46
100
  plannedRefs: [],
47
101
  memorySummary: { eligible: 0, derived: 0 },
102
+ profileFilteredRefs: [],
48
103
  };
49
104
  }
50
105
  return {
@@ -53,6 +108,7 @@ async function collectEligibleRefs(scope, stashDir) {
53
108
  eligible: parsed.type === "memory" ? 1 : 0,
54
109
  derived: parsed.type === "memory" && parsed.name.endsWith(".derived") ? 1 : 0,
55
110
  },
111
+ profileFilteredRefs: [],
56
112
  };
57
113
  }
58
114
  let sources;
@@ -60,10 +116,10 @@ async function collectEligibleRefs(scope, stashDir) {
60
116
  sources = resolveSourceEntries(stashDir);
61
117
  }
62
118
  catch {
63
- return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 } };
119
+ return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 }, profileFilteredRefs: [] };
64
120
  }
65
121
  if (sources.length === 0) {
66
- return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 } };
122
+ return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 }, profileFilteredRefs: [] };
67
123
  }
68
124
  // Only operate on writable sources — never mutate read-only registry caches
69
125
  // or remote stashes that the user did not mark writable.
@@ -86,30 +142,57 @@ async function collectEligibleRefs(scope, stashDir) {
86
142
  return isEntryInWritableSource(indexed.stashDir, indexed.filePath, writableDirSet);
87
143
  });
88
144
  const planned = new Map();
145
+ const profileFiltered = new Map();
89
146
  let memoryEligible = 0;
90
147
  let memoryDerived = 0;
91
148
  for (const indexed of entries) {
92
149
  const ref = makeAssetRef(indexed.entry.type, indexed.entry.name);
93
- if (!planned.has(ref)) {
94
- planned.set(ref, {
95
- ref,
96
- reason: scope.mode === "type" ? "scope-type" : indexed.entry.type === "memory" ? "memory-cleanup" : "scope-type",
97
- });
150
+ const isDerived = indexed.entry.name.endsWith(".derived");
151
+ // `.derived` memories are LLM-inferred and intentionally skip reflect
152
+ // (see the synthetic `derived-memory-reflect-skipped` branch in the
153
+ // improve loop). Enqueueing them here just produced one synthetic skip
154
+ // per derived memory per hour with no real work — pure churn observed
155
+ // 2026-05-21: 11 derived refs re-planned every hour during idle periods.
156
+ // The cleanup phase (analyzeMemoryCleanup) inspects derived memories
157
+ // independently of `plannedRefs`, so dropping them here loses nothing.
158
+ if (!isDerived && !planned.has(ref) && !profileFiltered.has(ref)) {
159
+ // 2026-05-27: extend the .derived precedent to profile-incompatible
160
+ // refs. If every per-ref pass (reflect + distill) on the active
161
+ // profile would refuse this ref, drop it from `plannedRefs`. The
162
+ // caller emits `improve_skipped { reason: profile_filtered_all_passes }`
163
+ // once `eventsCtx` is available so the audit trail is preserved in a
164
+ // single event per ref instead of 2× synthetic actions per run.
165
+ // Background: see /tmp/akm-health-investigations/planner-profile-metrics-deep-analysis.md
166
+ if (improveProfile && isProfileFilteredForAllPasses(ref, improveProfile)) {
167
+ profileFiltered.set(ref, {
168
+ ref,
169
+ reason: "profile_filtered_all_passes",
170
+ });
171
+ }
172
+ else {
173
+ planned.set(ref, {
174
+ ref,
175
+ reason: scope.mode === "type" ? "scope-type" : indexed.entry.type === "memory" ? "memory-cleanup" : "scope-type",
176
+ });
177
+ }
98
178
  }
99
179
  if (indexed.entry.type === "memory") {
100
180
  memoryEligible += 1;
101
- if (indexed.entry.name.endsWith(".derived"))
181
+ if (isDerived)
102
182
  memoryDerived += 1;
103
183
  }
104
184
  }
105
185
  return {
106
186
  plannedRefs: [...planned.values()],
107
187
  memorySummary: { eligible: memoryEligible, derived: memoryDerived },
188
+ profileFilteredRefs: [...profileFiltered.values()],
108
189
  };
109
190
  }
110
191
  catch (error) {
192
+ // The bun-test isolation guard must never be downgraded to "empty plan".
193
+ rethrowIfTestIsolationError(error);
111
194
  if (error instanceof NotFoundError || error instanceof Error) {
112
- return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 } };
195
+ return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 }, profileFilteredRefs: [] };
113
196
  }
114
197
  throw error;
115
198
  }
@@ -172,15 +255,37 @@ function memoryCleanupParentRef(scope, stashDir) {
172
255
  }
173
256
  return makeAssetRef("memory", parsed.name.slice(0, -".derived".length));
174
257
  }
175
- function filterRemovedPlannedRefs(plannedRefs, archivedRefs) {
176
- if (archivedRefs.length === 0)
177
- return plannedRefs;
178
- const removed = new Set(archivedRefs);
179
- return plannedRefs.filter((planned) => !removed.has(planned.ref));
180
- }
181
258
  function isLessonCandidate(ref) {
259
+ // Only lesson assets need lesson-schema validation (description + when_to_use).
260
+ // Memories have their own distill path via shouldDistillMemoryRef.
261
+ // All other types go through reflect, not distill.
262
+ return parseAssetRef(ref).type === "lesson";
263
+ }
264
+ /**
265
+ * Planner-side check: should this ref enter the distill queue?
266
+ *
267
+ * Distill produces lessons from non-lesson sources. Two cases are eligible:
268
+ *
269
+ * 1. Memory refs that pass {@link shouldDistillMemoryRef} (the existing
270
+ * memory→lesson/knowledge promotion path).
271
+ *
272
+ * Refs whose `type` is in {@link DISTILL_REFUSED_INPUT_TYPES} (currently
273
+ * `lesson:*`) are explicitly excluded — distill refuses them at runtime and
274
+ * queuing them just produces a no-op `skipped` outcome per ref per hour. That
275
+ * planner waste was the bug fixed in commit
276
+ * fix(improve): drop distill-refused types from planner.
277
+ *
278
+ * Note: prior to this fix the gate used `isLessonCandidate(ref)` directly,
279
+ * which was true *only* for `lesson:*` refs — exactly the set distill refuses.
280
+ * The result: every hourly run re-queued the same lesson refs, the same skip
281
+ * message returned, and no work was ever done. See
282
+ * `tests/commands/improve-distill-planner-skip-lessons.test.ts`.
283
+ */
284
+ function isDistillCandidateRef(ref, stashDir) {
182
285
  const parsed = parseAssetRef(ref);
183
- return parsed.type !== "lesson" && parsed.type !== "memory";
286
+ if (isDistillRefusedInputType(parsed.type))
287
+ return false;
288
+ return shouldDistillMemoryRef(ref, stashDir);
184
289
  }
185
290
  function shouldDistillMemoryRef(ref, stashDir) {
186
291
  const parsed = parseAssetRef(ref);
@@ -200,13 +305,115 @@ function shouldDistillMemoryRef(ref, stashDir) {
200
305
  }
201
306
  return !parsed.name.endsWith(".derived");
202
307
  }
308
+ // ── Signal-delta eligibility helpers (0.8.0) ────────────────────────────────
309
+ //
310
+ // The 0.8.0 redesign replaced flat time-based cooldowns for reflect/distill
311
+ // with a *signal-delta* gate: a ref is re-eligible iff new feedback has
312
+ // landed since the last proposal was generated for it. These helpers build
313
+ // the two timestamp maps the gate needs in bulk, so the planner avoids
314
+ // N+1 queries across the full postCleanupRefs set.
315
+ /**
316
+ * Latest feedback event timestamp per ref in the active window. Reads all
317
+ * `feedback` events newer than `sinceIso` in one query and indexes by ref,
318
+ * keeping the maximum `ts` per ref.
319
+ *
320
+ * Only events with a meaningful payload count as "signal" — `metadata.signal`
321
+ * (positive/negative) OR `metadata.note` (a free-form annotation). Empty
322
+ * metadata events are ignored so a stray `akm feedback <ref>` invocation
323
+ * without a flag doesn't trigger downstream re-processing.
324
+ */
325
+ function buildLatestFeedbackTsMap(refs, sinceIso) {
326
+ const out = new Map();
327
+ if (refs.length === 0)
328
+ return out;
329
+ const refSet = new Set(refs);
330
+ const { events } = readEvents({ type: "feedback", since: sinceIso });
331
+ for (const e of events) {
332
+ const ref = e.ref;
333
+ if (!ref || !refSet.has(ref))
334
+ continue;
335
+ const meta = e.metadata;
336
+ const hasSignal = meta !== undefined && (typeof meta.signal === "string" || typeof meta.note === "string");
337
+ if (!hasSignal)
338
+ continue;
339
+ const ts = e.ts ?? "";
340
+ if (ts > (out.get(ref) ?? ""))
341
+ out.set(ref, ts);
342
+ }
343
+ return out;
344
+ }
345
+ /**
346
+ * Latest proposal timestamp per input-ref, filtered by source ('reflect' or
347
+ * 'distill'). Reads the corresponding `*_invoked` events from state.db —
348
+ * these events are emitted at proposal creation time and carry the *input*
349
+ * asset ref (memory:foo, skill:bar, etc.) directly. We use them rather than
350
+ * `listProposals` because distill proposals are keyed by the derived
351
+ * lesson/knowledge ref, not the source memory — joining back through the
352
+ * payload would be fragile.
353
+ */
354
+ function buildLatestProposalTsMap(refs, source) {
355
+ const out = new Map();
356
+ if (refs.length === 0)
357
+ return out;
358
+ const refSet = new Set(refs);
359
+ const eventType = source === "reflect" ? "reflect_invoked" : "distill_invoked";
360
+ const { events } = readEvents({ type: eventType });
361
+ for (const e of events) {
362
+ const ref = e.ref;
363
+ if (!ref || !refSet.has(ref))
364
+ continue;
365
+ // For distill_invoked we only count attempts that produced (or attempted
366
+ // to produce) a real proposal — config_disabled / parse-error outcomes
367
+ // should not move the signal-delta cursor forward.
368
+ if (eventType === "distill_invoked") {
369
+ const outcome = e.metadata?.outcome;
370
+ if (outcome !== "queued" && outcome !== "skipped" && outcome !== "validation_failed")
371
+ continue;
372
+ }
373
+ const ts = e.ts ?? "";
374
+ if (ts > (out.get(ref) ?? ""))
375
+ out.set(ref, ts);
376
+ }
377
+ return out;
378
+ }
379
+ /**
380
+ * Signal-delta eligibility predicate.
381
+ *
382
+ * True iff `latestFeedback[ref]` is defined AND either no prior proposal
383
+ * exists for this (ref, source) OR `latestFeedback[ref] > lastProposal[ref]`.
384
+ *
385
+ * Refs with no feedback signal at all are ineligible by definition — the
386
+ * high-retrieval fallback path (see `noFeedbackCandidates` later in the
387
+ * planner) handles never-touched-but-frequently-read assets separately.
388
+ */
389
+ function isSignalDeltaEligible(ref, latestFeedback, lastProposal) {
390
+ const fb = latestFeedback.get(ref);
391
+ if (!fb)
392
+ return false;
393
+ const lp = lastProposal.get(ref);
394
+ if (!lp)
395
+ return true;
396
+ return fb > lp;
397
+ }
203
398
  export async function akmImprove(options = {}) {
204
399
  const scope = resolveImproveScope(options.scope);
205
- const { plannedRefs, memorySummary } = await collectEligibleRefs(scope, options.stashDir);
206
400
  const reflectFn = options.reflectFn ?? akmReflect;
207
401
  const distillFn = options.distillFn ?? akmDistill;
208
402
  const ensureIndexFn = options.ensureIndexFn ?? ensureIndex;
209
403
  const reindexFn = options.reindexFn ?? akmIndex;
404
+ const drainProposalsFn = options.drainProposalsFn ?? drainProposals;
405
+ // Resolve the improve profile for this run. Profile drives type filtering,
406
+ // process gating, and default autoAccept/limit values.
407
+ const _earlyConfig = options.config ?? loadConfig();
408
+ const improveProfile = resolveImproveProfile(options.profile, _earlyConfig);
409
+ // Apply profile defaults — CLI flags take precedence over profile defaults.
410
+ // Rebuild options with effective values so all downstream stage functions
411
+ // automatically pick up the profile-driven defaults.
412
+ options = {
413
+ ...options,
414
+ autoAccept: options.autoAccept ?? improveProfile.autoAccept,
415
+ limit: options.limit ?? improveProfile.limit,
416
+ };
210
417
  let primaryStashDir;
211
418
  try {
212
419
  primaryStashDir = resolveSourceEntries(options.stashDir)[0]?.path;
@@ -214,63 +421,289 @@ export async function akmImprove(options = {}) {
214
421
  catch {
215
422
  primaryStashDir = undefined;
216
423
  }
217
- const cleanupParentRef = memoryCleanupParentRef(scope, options.stashDir);
218
- const memoryCleanupPlan = shouldAnalyzeMemoryCleanup(scope, memorySummary.eligible, primaryStashDir)
219
- ? analyzeMemoryCleanup(primaryStashDir, cleanupParentRef ? { parentRef: cleanupParentRef } : undefined)
220
- : undefined;
221
- const guidance = memorySummary.eligible > 0
222
- ? "Improve folds memory cleanup into the same proposal queue: speculative promotions still go through reflect/distill proposals, while high-confidence redundant derived memories are moved into a recoverable cleanup archive instead of being left active in the stash."
223
- : undefined;
224
- if (options.dryRun) {
225
- const result = {
226
- schemaVersion: 1,
227
- ok: true,
228
- scope,
229
- dryRun: true,
230
- ...(guidance ? { guidance } : {}),
231
- memorySummary,
232
- ...(memoryCleanupPlan ? { memoryCleanup: shapeMemoryCleanup(memoryCleanupPlan) } : {}),
233
- plannedRefs,
234
- };
235
- return result;
236
- }
424
+ // Phase 4 lock hoist (§7): the `improve.lock` setup is hoisted ABOVE
425
+ // ensureIndex/collectEligibleRefs so the triage pre-pass (and improve's own
426
+ // queue writes) run fully serialized under the lock. The dry-run early-return
427
+ // below still skips the lock and triage (the lock+triage block is gated on
428
+ // `!options.dryRun`); contradiction-detection and memory-cleanup analysis,
429
+ // which previously ran before the lock, now sit after it for free.
237
430
  const resolvedLockPath = primaryStashDir
238
431
  ? path.join(primaryStashDir, ".akm", "improve.lock")
239
432
  : path.join(options.stashDir ?? ".", ".akm", "improve.lock");
240
- let staleLock = false;
241
- if (fs.existsSync(resolvedLockPath)) {
242
- let lock = null;
433
+ const MAX_LOCK_AGE_MS = 4 * 60 * 60 * 1000; // 4 hours
434
+ const acquireLock = () => {
435
+ fs.mkdirSync(path.dirname(resolvedLockPath), { recursive: true });
436
+ const lockPayload = () => JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() });
437
+ if (tryAcquireLockSync(resolvedLockPath, lockPayload()))
438
+ return;
439
+ // Lock file already exists — probe to determine whether it's still held
440
+ // or whether the prior run died without cleaning up.
441
+ const probe = probeLock(resolvedLockPath, { staleAfterMs: MAX_LOCK_AGE_MS });
442
+ const rawContent = probe.state === "absent" ? undefined : probe.rawContent;
443
+ const lock = rawContent
444
+ ? (() => {
445
+ try {
446
+ return JSON.parse(rawContent);
447
+ }
448
+ catch {
449
+ return null;
450
+ }
451
+ })()
452
+ : null;
453
+ if (probe.state === "stale") {
454
+ // O-7 / #394: Emit improve_lock_recovered event before recovery so the
455
+ // audit trail records the abnormal prior-run exit (Temporal/Airflow pattern).
456
+ try {
457
+ appendEvent({
458
+ eventType: "improve_lock_recovered",
459
+ metadata: {
460
+ stalePid: lock?.pid ?? null,
461
+ lockedAt: lock?.startedAt ?? null,
462
+ recoveredAt: new Date().toISOString(),
463
+ lockAgeMs: probe.ageMs ?? null,
464
+ reason: probe.reason === "pid_dead" ? "pid_not_alive" : probe.reason,
465
+ },
466
+ });
467
+ }
468
+ catch {
469
+ /* event emission is best-effort; never block lock recovery */
470
+ }
471
+ releaseLock(resolvedLockPath);
472
+ if (tryAcquireLockSync(resolvedLockPath, lockPayload()))
473
+ return;
474
+ throw new ConfigError(`akm improve is already running. Delete ${resolvedLockPath} to force.`, "INVALID_CONFIG_FILE");
475
+ }
476
+ throw new ConfigError(`akm improve is already running (PID ${lock?.pid}, started ${lock?.startedAt}). Delete ${resolvedLockPath} to force.`, "INVALID_CONFIG_FILE");
477
+ };
478
+ // Phase 4 lock-leak guard (§7 ordering hazard): hoisting `improve.lock` above
479
+ // the pre-index region (so the triage pre-pass runs under it) means the lock is
480
+ // held while ensureIndex / collectEligibleRefs / contradiction-detection /
481
+ // memory-cleanup analysis run — but the main protecting `try { … } finally {
482
+ // unlinkSync(resolvedLockPath) }` does not begin until after them. A throw in
483
+ // any of those steps would leak the lock. We close that window by wrapping the
484
+ // whole region in a try whose catch releases the lock (when held) and
485
+ // re-throws. The values this region computes are declared in the outer scope so
486
+ // they remain visible to the main run below. The dry-run path never sets
487
+ // `lockAcquired`, so its early return releases nothing.
488
+ let lockAcquired = false;
489
+ const releaseLockOnError = () => {
490
+ if (!lockAcquired)
491
+ return;
243
492
  try {
244
- lock = JSON.parse(fs.readFileSync(resolvedLockPath, "utf8"));
493
+ fs.unlinkSync(resolvedLockPath);
245
494
  }
246
495
  catch {
247
- staleLock = true;
496
+ // best-effort release on the error path
497
+ }
498
+ lockAcquired = false;
499
+ };
500
+ const preEnsureCleanupWarnings = [];
501
+ let plannedRefs;
502
+ let memorySummary;
503
+ let profileFilteredRefs;
504
+ let memoryCleanupPlan;
505
+ let guidance;
506
+ try {
507
+ // Acquire the lock and run the triage pre-pass for non-dry-run executions.
508
+ // The dry-run branch below produces plannedRefs/memorySummary WITHOUT the lock
509
+ // or triage (decision: dry-run never mutates the queue).
510
+ if (!options.dryRun) {
511
+ acquireLock();
512
+ lockAcquired = true;
513
+ // Phase 4 triage pre-pass (§7, §13): drain the standing pending backlog
514
+ // BEFORE ensureIndex so improve generates fresh proposals against a cleared
515
+ // queue (no `duplicate_pending` collisions) and ensureIndex absorbs triage's
516
+ // promotions for free. Gated on the triage process being enabled (opt-in,
517
+ // defaults off) and on a whole-stash / type-scoped run — a single-ref
518
+ // `akm improve skill:x` must never drain the whole queue. Best-effort: a
519
+ // triage failure is a non-fatal warning, never an abort (mirrors the
520
+ // contradiction-detection pass below).
521
+ if (primaryStashDir && resolveProcessEnabled("triage", improveProfile)) {
522
+ if (scope.mode === "ref") {
523
+ warn("[improve] triage pre-pass skipped (single-ref scope never drains the whole queue)");
524
+ }
525
+ else {
526
+ try {
527
+ const triageConfig = improveProfile.processes?.triage;
528
+ const policy = resolveDrainPolicy(triageConfig?.policy);
529
+ const applyMode = triageConfig?.applyMode ?? "queue";
530
+ const maxAccepts = triageConfig?.maxAcceptsPerRun ?? 25;
531
+ const judgment = triageConfig?.judgment
532
+ ? resolveTriageJudgmentRunner(triageConfig.judgment, _earlyConfig)
533
+ : null;
534
+ await drainProposalsFn({
535
+ stashDir: primaryStashDir,
536
+ policy,
537
+ applyMode,
538
+ maxAccepts,
539
+ dryRun: false,
540
+ // No fresh ids exist yet — triage runs before improve generates any.
541
+ excludeIds: new Set(),
542
+ ...(triageConfig?.maxDiffLines !== undefined ? { maxDiffLines: triageConfig.maxDiffLines } : {}),
543
+ judgment,
544
+ });
545
+ }
546
+ catch (err) {
547
+ // Non-fatal: triage is a best-effort pre-pass and must never abort improve.
548
+ warn(`[improve] triage pre-pass failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
549
+ }
550
+ }
551
+ }
248
552
  }
249
- if (lock !== null) {
553
+ // #339 fix: ensureIndex MUST run BEFORE collectEligibleRefs. The eligible-ref
554
+ // query reads the `entries` table; if a DB version upgrade just dropped that
555
+ // table (or the index is otherwise empty), the prior run order silently
556
+ // returned plannedRefs=[] and the improve loop no-op'd. Hoisting the call
557
+ // here repopulates the index first so the subsequent query sees fresh data.
558
+ if (primaryStashDir) {
559
+ // Probe pre-ensureIndex entry count to drive the loud-fail warning below.
560
+ // Best-effort: a missing DB / unreadable schema is the fresh-install case
561
+ // and not a bug — we silently skip the probe.
562
+ let preEnsureEntryCount;
563
+ try {
564
+ const dbPath = getDbPath();
565
+ if (fs.existsSync(dbPath)) {
566
+ const probeDb = openExistingDatabase();
567
+ try {
568
+ preEnsureEntryCount = getEntryCount(probeDb);
569
+ }
570
+ finally {
571
+ closeDatabase(probeDb);
572
+ }
573
+ }
574
+ }
575
+ catch (err) {
576
+ rethrowIfTestIsolationError(err);
577
+ // best-effort; leave preEnsureEntryCount undefined
578
+ }
250
579
  try {
251
- process.kill(lock.pid, 0);
252
- throw new ConfigError(`akm improve is already running (pid ${lock.pid}, started ${lock.startedAt}). Use SIGTERM to stop it.`, "INVALID_CONFIG_FILE");
580
+ await ensureIndexFn(primaryStashDir);
253
581
  }
254
582
  catch (err) {
255
- if (err instanceof ConfigError)
256
- throw err;
257
- staleLock = true;
583
+ preEnsureCleanupWarnings.push(`ensureIndex failed: ${err instanceof Error ? err.message : String(err)}`);
584
+ }
585
+ // #339 loud-fail: if the index was empty pre-ensureIndex but is now
586
+ // populated, a version-upgrade-triggered rebuild just happened. Surface
587
+ // that on stderr so the improve run is not silently masked by stale
588
+ // index state. Zero-before AND zero-after is the empty-stash case and
589
+ // is intentionally not warned (not a bug).
590
+ if (preEnsureEntryCount === 0) {
591
+ try {
592
+ const probeDb = openExistingDatabase();
593
+ let postCount = 0;
594
+ try {
595
+ postCount = getEntryCount(probeDb);
596
+ }
597
+ finally {
598
+ closeDatabase(probeDb);
599
+ }
600
+ if (postCount > 0) {
601
+ warn("[improve] index was empty after DB version upgrade — repopulating before continuing");
602
+ }
603
+ }
604
+ catch (err) {
605
+ rethrowIfTestIsolationError(err);
606
+ // best-effort
607
+ }
258
608
  }
259
609
  }
260
- if (staleLock) {
610
+ ({ plannedRefs, memorySummary, profileFilteredRefs } = await collectEligibleRefs(scope, options.stashDir, improveProfile));
611
+ const cleanupParentRef = memoryCleanupParentRef(scope, options.stashDir);
612
+ // M-1 (#367): Run contradiction-detection BEFORE analyzeMemoryCleanup so
613
+ // the SCC resolver in resolveFamilyContradictions has edges to work on.
614
+ // Best-effort: failures are warnings, never fatal.
615
+ if (primaryStashDir && shouldAnalyzeMemoryCleanup(scope, memorySummary.eligible, primaryStashDir)) {
261
616
  try {
262
- fs.unlinkSync(resolvedLockPath);
617
+ // Reuse the config resolved at the top of the run instead of a second load.
618
+ await detectAndWriteContradictions(primaryStashDir, _earlyConfig);
263
619
  }
264
- catch {
265
- // ignore
620
+ catch (err) {
621
+ // Non-fatal: contradiction detection is a best-effort pass.
622
+ warn(`[improve] contradiction detection failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
266
623
  }
267
624
  }
625
+ memoryCleanupPlan = shouldAnalyzeMemoryCleanup(scope, memorySummary.eligible, primaryStashDir)
626
+ ? analyzeMemoryCleanup(primaryStashDir, cleanupParentRef ? { parentRef: cleanupParentRef } : undefined)
627
+ : undefined;
628
+ guidance =
629
+ memorySummary.eligible > 0
630
+ ? "Improve folds memory cleanup into the same proposal queue: speculative promotions still go through reflect/distill proposals, while high-confidence redundant derived memories are moved into a recoverable cleanup archive instead of being left active in the stash."
631
+ : undefined;
632
+ if (options.dryRun) {
633
+ const result = {
634
+ schemaVersion: 1,
635
+ ok: true,
636
+ scope,
637
+ dryRun: true,
638
+ ...(guidance ? { guidance } : {}),
639
+ memorySummary,
640
+ ...(memoryCleanupPlan ? { memoryCleanup: shapeMemoryCleanup(memoryCleanupPlan) } : {}),
641
+ plannedRefs,
642
+ ...(profileFilteredRefs.length > 0 ? { profileFilteredRefs } : {}),
643
+ };
644
+ return result;
645
+ }
268
646
  }
269
- fs.mkdirSync(path.dirname(resolvedLockPath), { recursive: true });
270
- fs.writeFileSync(resolvedLockPath, JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() }));
271
- const budgetMs = options.timeoutMs ?? 2 * 60 * 60 * 1000; // default 2 hours
647
+ catch (err) {
648
+ releaseLockOnError();
649
+ throw err;
650
+ }
651
+ // FIX 2 (lock-leak window): everything from here on runs UNDER the lock that
652
+ // `acquireLock()` just took. The single `try { … } finally { unlinkSync(lock) }`
653
+ // below now spans the budget-timer setup, `openStateDatabase()`, and the
654
+ // `profileFilteredRefs` audit-event loop too — regions that previously sat in
655
+ // the gap between the lock-acquire catch (above) and the main try. A throw in
656
+ // any of them used to leak the lock (blocking the next improve up to 4h);
657
+ // now the finally releases it exactly once. The dry-run path already returned
658
+ // above without acquiring the lock, so it never reaches this finally; the
659
+ // best-effort `unlinkSync` is a no-op when no lock file exists.
272
660
  const startMs = Date.now();
661
+ const budgetMs = options.timeoutMs ?? 2 * 60 * 60 * 1000; // default 2 hours
662
+ // O-1 (#364): Create a shared AbortController derived from startMs + budgetMs.
663
+ // Every async seam receives this signal so a hung sub-call cannot extend the
664
+ // run past the declared budget.
665
+ // References: Anthropic *Building Effective Agents* (2024); CoALA §5 (arXiv:2309.02427).
666
+ const budgetAbortController = new AbortController();
667
+ // Declared in the outer scope so the `finally` can clear the timer even if a
668
+ // throw occurs before/after it is armed. Defaults to a no-op until armed.
669
+ let clearBudgetTimer = () => { };
670
+ // I1: open a single state.db connection for the entire improve run so all
671
+ // appendEvent calls reuse one handle instead of open/migrate/close per call.
672
+ let eventsDb;
673
+ // `eventsCtx` is read by the main catch (improve_failed) and finally, so it
674
+ // lives in the outer scope. It is always assigned at the top of the try.
675
+ let eventsCtx = {};
273
676
  try {
677
+ const budgetTimer = setTimeout(() => {
678
+ budgetAbortController.abort("improve budget exhausted");
679
+ // Grace period: let finally run to release improve.lock, then hard-exit
680
+ // to prevent the process outliving the task timeout window (lock-cascade fix).
681
+ setTimeout(() => process.exit(1), 5_000);
682
+ }, budgetMs);
683
+ // Clear the timer when the run ends to avoid keeping the event loop alive.
684
+ clearBudgetTimer = () => clearTimeout(budgetTimer);
685
+ try {
686
+ eventsDb = openStateDatabase();
687
+ eventsCtx = { db: eventsDb };
688
+ }
689
+ catch (err) {
690
+ rethrowIfTestIsolationError(err);
691
+ // If we cannot open state.db up-front, fall back to per-call opens.
692
+ eventsCtx = {};
693
+ }
694
+ // 2026-05-27: emit `improve_skipped` audit events for refs the planner
695
+ // pre-filtered (reflect AND distill both refuse them under the active
696
+ // profile). One event per ref so the existing improve_skipped histogram in
697
+ // `health.ts#improveSummary.skipReasons` accumulates the right count under
698
+ // the new `profile_filtered_all_passes` reason code. See
699
+ // `/tmp/akm-health-investigations/planner-profile-metrics-deep-analysis.md`.
700
+ for (const filtered of profileFilteredRefs) {
701
+ appendEvent({
702
+ eventType: "improve_skipped",
703
+ ref: filtered.ref,
704
+ metadata: { reason: "profile_filtered_all_passes" },
705
+ }, eventsCtx);
706
+ }
274
707
  const preparation = await runImprovePreparationStage({
275
708
  scope,
276
709
  options,
@@ -278,12 +711,25 @@ export async function akmImprove(options = {}) {
278
711
  memoryCleanupPlan,
279
712
  primaryStashDir,
280
713
  memorySummary,
281
- ensureIndexFn,
282
714
  reindexFn,
283
715
  startMs,
284
716
  budgetMs,
717
+ eventsCtx,
718
+ initialCleanupWarnings: preEnsureCleanupWarnings,
719
+ improveProfile,
285
720
  });
286
- const { crossStepErrorsInjected, memoryRefsForInference } = await runImproveLoopStage({
721
+ // D6: pre-load all proposal_rejected events from the last 30 days once,
722
+ // so the per-asset loop can use a Map lookup instead of N DB round trips.
723
+ const REJECTED_PROPOSAL_WINDOW_MS = daysToMs(30);
724
+ const rejectedProposalSince = new Date(Date.now() - REJECTED_PROPOSAL_WINDOW_MS).toISOString();
725
+ const allRejectedProposalEvents = readEvents({ type: "proposal_rejected", since: rejectedProposalSince }).events;
726
+ const rejectedProposalsByRef = new Map();
727
+ for (const e of allRejectedProposalEvents) {
728
+ if (e.ref && (!rejectedProposalsByRef.has(e.ref) || e.ts > (rejectedProposalsByRef.get(e.ref)?.ts ?? ""))) {
729
+ rejectedProposalsByRef.set(e.ref, e);
730
+ }
731
+ }
732
+ const { reflectsWithErrorContext, memoryRefsForInference, gateAutoAcceptedCount: loopGateCount, } = await runImproveLoopStage({
287
733
  scope,
288
734
  options,
289
735
  primaryStashDir,
@@ -293,11 +739,16 @@ export async function akmImprove(options = {}) {
293
739
  actions: preparation.actions,
294
740
  signalBearingSet: preparation.signalBearingSet,
295
741
  distillCooledRefs: preparation.distillCooledRefs,
742
+ distillOnlyRefs: preparation.distillOnlyRefs,
296
743
  recentErrors: preparation.recentErrors,
744
+ rejectedProposalsByRef,
745
+ utilityMap: preparation.utilityMap,
297
746
  startMs,
298
747
  budgetMs,
748
+ eventsCtx,
749
+ improveProfile,
299
750
  });
300
- const { allWarnings, consolidation, deadUrls, memoryInference, graphExtraction, maintenanceActions } = await runImprovePostLoopStage({
751
+ const { allWarnings, consolidation, deadUrls, memoryInference, graphExtraction, stalenessDetection, maintenanceActions, memoryInferenceDurationMs, graphExtractionDurationMs, orphansPurged, proposalsExpired, gateAutoAcceptedCount: postLoopGateCount, } = await runImprovePostLoopStage({
301
752
  scope,
302
753
  options,
303
754
  primaryStashDir,
@@ -307,6 +758,10 @@ export async function akmImprove(options = {}) {
307
758
  memorySummary,
308
759
  memoryRefsForInference,
309
760
  reindexFn,
761
+ eventsCtx,
762
+ // O-1 (#364): propagate wall-clock budget signal to post-loop maintenance.
763
+ budgetSignal: budgetAbortController.signal,
764
+ improveProfile,
310
765
  });
311
766
  const finalActions = maintenanceActions && maintenanceActions.length > 0
312
767
  ? [...preparation.actions, ...maintenanceActions]
@@ -340,39 +795,143 @@ export async function akmImprove(options = {}) {
340
795
  }
341
796
  : {}),
342
797
  plannedRefs: preparation.actionableRefs,
798
+ ...(profileFilteredRefs.length > 0 ? { profileFilteredRefs } : {}),
343
799
  actions: finalActions,
344
800
  ...(preparation.validationFailures.length > 0 ? { validationFailures: preparation.validationFailures } : {}),
345
801
  ...(preparation.schemaRepairs.length > 0 ? { schemaRepairs: preparation.schemaRepairs } : {}),
346
802
  ...(consolidation.processed > 0 || consolidation.warnings.length > 0 ? { consolidation } : {}),
347
803
  ...(preparation.lintSummary !== undefined ? { lintSummary: preparation.lintSummary } : {}),
348
804
  ...(preparation.memoryIndexHealth !== undefined ? { memoryIndexHealth: preparation.memoryIndexHealth } : {}),
349
- feedbackRatioUsed: preparation.feedbackRatioUsed,
350
805
  ...(preparation.coverageGaps.length > 0 ? { coverageGaps: preparation.coverageGaps } : {}),
351
- ...(preparation.executionLogCandidates.length > 0
352
- ? { executionLogCandidates: preparation.executionLogCandidates }
353
- : {}),
806
+ ...(preparation.extract && preparation.extract.length > 0 ? { extract: preparation.extract } : {}),
354
807
  ...(primaryStashDir !== undefined ? { evalCasesWritten: countEvalCases(primaryStashDir) } : {}),
355
808
  ...(deadUrls !== undefined && deadUrls.length > 0 ? { deadUrls } : {}),
356
- ...(crossStepErrorsInjected > 0 ? { crossStepErrorsInjected } : {}),
809
+ ...(reflectsWithErrorContext > 0 ? { reflectsWithErrorContext } : {}),
357
810
  ...(memoryInference ? { memoryInference } : {}),
358
811
  ...(graphExtraction ? { graphExtraction } : {}),
812
+ // Per-phase wall-clock durations. Surfaced at the top level of the
813
+ // envelope (not nested) because `health.ts`'s `wallTime.byPhase`
814
+ // aggregator and the existing `memoryInference.durationMs` /
815
+ // `graphExtraction.durationMs` health buckets all read
816
+ // `result.{memoryInferenceDurationMs,graphExtractionDurationMs}`
817
+ // directly. Mirrors how `consolidation.durationMs` is surfaced inside
818
+ // the consolidation sub-object (different convention because the
819
+ // consolidation result type already owns that field). Phases that did
820
+ // not run (zero duration) are omitted so the aggregator's
821
+ // "phase actually ran" filter (`> 0`) excludes them from the median/p95
822
+ // sample. Plumbed in d1273d0's follow-up — see
823
+ // `/tmp/akm-health-investigations/metrics-taxonomy-review.md` §1k / §3.
824
+ ...(memoryInferenceDurationMs > 0 ? { memoryInferenceDurationMs } : {}),
825
+ ...(graphExtractionDurationMs > 0 ? { graphExtractionDurationMs } : {}),
826
+ ...(stalenessDetection ? { stalenessDetection } : {}),
827
+ ...(orphansPurged !== undefined ? { orphansPurged } : {}),
828
+ ...(proposalsExpired !== undefined && proposalsExpired > 0 ? { proposalsExpired } : {}),
829
+ reflectCooldownActions: finalActions.filter((a) => a.mode === "reflect-cooldown").length,
830
+ reflectSkippedActions: finalActions.filter((a) => a.mode === "reflect-skipped").length,
831
+ reflectGuardRejectedActions: finalActions.filter((a) => a.mode === "reflect-guard-rejected").length,
832
+ ...(() => {
833
+ const t = preparation.gateAutoAcceptedCount + loopGateCount + postLoopGateCount;
834
+ return t > 0 ? { gateAutoAcceptedCount: t } : {};
835
+ })(),
359
836
  };
360
837
  if (!result.dryRun)
361
- emitImproveCompletedEvent(result);
838
+ emitImproveCompletedEvent(result, {
839
+ memoryInferenceDurationMs,
840
+ graphExtractionDurationMs,
841
+ totalDurationMs: Date.now() - startMs,
842
+ warningCount: allWarnings.length,
843
+ orphansPurged: orphansPurged ?? 0,
844
+ }, eventsCtx);
845
+ // End-of-run BATCH auto-sync. Recognition is decoupled from the per-write
846
+ // path (see write-source.ts case-3): the primary stash writes as a
847
+ // filesystem source during the run, then is committed in one shot here via
848
+ // the same `saveGitStash` that `akm sync` calls. Gated on a non-dry-run, a
849
+ // git-backed primary stash (by `.git`, not by remote), and sync not
850
+ // disabled. A sync failure is NON-FATAL — it never fails a successful run
851
+ // (mirrors the contradiction-detection best-effort pattern).
852
+ const effectiveSync = { ...improveProfile.sync, ...options.sync };
853
+ if (!result.dryRun && primaryStashDir && effectiveSync.enabled !== false && isGitBackedStash(primaryStashDir)) {
854
+ const saveGitStashFn = options.saveGitStashFn ?? saveGitStash;
855
+ // Reuse the config resolved at the top of the run (`_earlyConfig`) instead
856
+ // of a second loadConfig(); the writable derivation is shared with
857
+ // `akm sync` via resolveWritableOverride().
858
+ const writableOverride = resolveWritableOverride(_earlyConfig);
859
+ const push = effectiveSync.push !== false;
860
+ // `sync.message` may contain `{token}` placeholders (timestamp/date/time/
861
+ // scope/refs/accepted) expanded against this run's results; the default
862
+ // template has no tokens so it renders verbatim.
863
+ const message = renderSyncCommitMessage(effectiveSync.message ?? "akm improve auto-sync", result, Date.now());
864
+ try {
865
+ // Pass primaryStashDir as the explicit commit target so the gate above
866
+ // (which validated primaryStashDir via isGitBackedStash) and the commit
867
+ // operate on the SAME directory — avoids divergence when a caller passes
868
+ // a non-default options.stashDir (FIX 9).
869
+ const syncResult = saveGitStashFn(undefined, message, writableOverride, { push, repoDir: primaryStashDir });
870
+ result.sync = {
871
+ committed: syncResult.committed,
872
+ pushed: syncResult.pushed,
873
+ skipped: syncResult.skipped,
874
+ ...(syncResult.reason !== undefined ? { reason: syncResult.reason } : {}),
875
+ };
876
+ appendEvent({
877
+ eventType: "stash_synced",
878
+ metadata: {
879
+ committed: syncResult.committed,
880
+ pushed: syncResult.pushed,
881
+ skipped: syncResult.skipped,
882
+ reason: syncResult.reason ?? null,
883
+ },
884
+ }, eventsCtx);
885
+ }
886
+ catch (syncErr) {
887
+ const reason = syncErr instanceof Error ? syncErr.message : String(syncErr);
888
+ warn(`improve: end-of-run stash sync failed (non-fatal): ${reason}`);
889
+ result.sync = { committed: false, pushed: false, skipped: true, reason };
890
+ appendEvent({
891
+ eventType: "stash_synced",
892
+ metadata: { committed: false, pushed: false, skipped: true, reason },
893
+ }, eventsCtx);
894
+ }
895
+ }
362
896
  return result;
363
897
  }
898
+ catch (err) {
899
+ // D3: emit improve_failed on unexpected crash so dashboards can detect failures.
900
+ appendEvent({
901
+ eventType: "improve_failed",
902
+ ref: scope.mode === "ref" ? scope.value : `improve:${scope.mode}:${scope.value ?? "all"}`,
903
+ metadata: {
904
+ error: err instanceof Error ? err.message : String(err),
905
+ durationMs: Date.now() - startMs,
906
+ },
907
+ }, eventsCtx);
908
+ throw err;
909
+ }
364
910
  finally {
911
+ // O-1 (#364): Clear the budget abort timer so it does not keep the event
912
+ // loop alive after the run completes.
913
+ clearBudgetTimer();
365
914
  try {
366
915
  fs.unlinkSync(resolvedLockPath);
367
916
  }
368
917
  catch {
369
918
  // ignore
370
919
  }
920
+ // I1: close the long-lived state.db connection opened at the top of the run.
921
+ try {
922
+ eventsDb?.close();
923
+ }
924
+ catch {
925
+ // ignore — DB may already be closed
926
+ }
371
927
  }
372
928
  }
373
- function emitImproveCompletedEvent(result) {
929
+ function emitImproveCompletedEvent(result, durations, eventsCtx) {
374
930
  const actionCounts = {
375
931
  reflect: 0,
932
+ reflectFailed: 0,
933
+ reflectCooldown: 0,
934
+ reflectSkipped: 0,
376
935
  distill: 0,
377
936
  distillSkipped: 0,
378
937
  memoryPrune: 0,
@@ -385,6 +944,15 @@ function emitImproveCompletedEvent(result) {
385
944
  case "reflect":
386
945
  actionCounts.reflect += 1;
387
946
  break;
947
+ case "reflect-failed":
948
+ actionCounts.reflectFailed += 1;
949
+ break;
950
+ case "reflect-cooldown":
951
+ actionCounts.reflectCooldown += 1;
952
+ break;
953
+ case "reflect-skipped":
954
+ actionCounts.reflectSkipped += 1;
955
+ break;
388
956
  case "distill":
389
957
  actionCounts.distill += 1;
390
958
  break;
@@ -407,7 +975,9 @@ function emitImproveCompletedEvent(result) {
407
975
  }
408
976
  appendEvent({
409
977
  eventType: "improve_completed",
410
- ref: result.scope.mode === "ref" ? result.scope.value : `improve:${result.scope.mode}:${result.scope.value ?? "all"}`,
978
+ ref: result.scope.mode === "ref"
979
+ ? result.scope.value
980
+ : `improve:${result.scope.mode}:${result.scope.value ?? "all"}`,
411
981
  metadata: {
412
982
  plannedRefs: result.plannedRefs.length,
413
983
  reflectActions: actionCounts.reflect,
@@ -417,10 +987,11 @@ function emitImproveCompletedEvent(result) {
417
987
  memoryInferenceActions: actionCounts.memoryInference,
418
988
  graphExtractionActions: actionCounts.graphExtraction,
419
989
  errorActions: actionCounts.error,
420
- crossStepErrorsInjected: result.crossStepErrorsInjected ?? 0,
421
- feedbackRatioUsed: result.feedbackRatioUsed,
990
+ reflectFailedActions: actionCounts.reflectFailed,
991
+ reflectCooldownActions: actionCounts.reflectCooldown,
992
+ reflectSkippedActions: actionCounts.reflectSkipped,
993
+ reflectsWithErrorContext: result.reflectsWithErrorContext ?? 0,
422
994
  coverageGapCount: result.coverageGaps?.length ?? 0,
423
- executionLogCandidateCount: result.executionLogCandidates?.length ?? 0,
424
995
  evalCasesWritten: result.evalCasesWritten ?? 0,
425
996
  deadUrlCount: result.deadUrls?.length ?? 0,
426
997
  memoryEligible: result.memorySummary.eligible,
@@ -434,16 +1005,31 @@ function emitImproveCompletedEvent(result) {
434
1005
  consolidationProcessed: result.consolidation?.processed ?? 0,
435
1006
  consolidationDurationMs: result.consolidation?.durationMs ?? 0,
436
1007
  memoryInferenceWrites: result.memoryInference?.writtenFacts ?? 0,
437
- memoryInferenceDurationMs: 0,
1008
+ memoryInferenceDurationMs: durations.memoryInferenceDurationMs,
438
1009
  graphExtractionExtractedFiles: result.graphExtraction?.quality.extractedFiles ?? 0,
439
- graphExtractionDurationMs: 0,
1010
+ graphExtractionDurationMs: durations.graphExtractionDurationMs,
1011
+ // New metrics for tuning the improve loop.
1012
+ ...(durations.totalDurationMs !== undefined ? { durationMs: durations.totalDurationMs } : {}),
1013
+ ...(durations.warningCount !== undefined ? { warningCount: durations.warningCount } : {}),
1014
+ ...(durations.orphansPurged !== undefined ? { orphansPurged: durations.orphansPurged } : {}),
1015
+ ...(result.graphExtraction?.quality
1016
+ ? {
1017
+ graphCoverage: result.graphExtraction.quality.extractionCoverage,
1018
+ graphDensity: result.graphExtraction.quality.density,
1019
+ graphEntities: result.graphExtraction.quality.entityCount,
1020
+ }
1021
+ : {}),
440
1022
  },
441
- });
1023
+ }, eventsCtx);
442
1024
  }
443
1025
  async function runImprovePreparationStage(args) {
444
- const { scope, options, plannedRefs, memoryCleanupPlan, primaryStashDir, ensureIndexFn, reindexFn, startMs, budgetMs, } = args;
1026
+ const { scope, options, plannedRefs, memoryCleanupPlan, primaryStashDir, reindexFn, startMs, budgetMs, eventsCtx, initialCleanupWarnings,
1027
+ // improveProfile is part of the preparation-stage signature for future use
1028
+ // (per-process gating moved into the in-loop stage). Kept here so the
1029
+ // signature does not drift away from the rest of the planner stack.
1030
+ improveProfile: _improveProfile, } = args;
445
1031
  const actions = [];
446
- const cleanupWarnings = [];
1032
+ const cleanupWarnings = initialCleanupWarnings ? [...initialCleanupWarnings] : [];
447
1033
  // Phase 0 — MEMORY.md budget check (200-line cap; warn at 180)
448
1034
  let memoryIndexHealth;
449
1035
  if (primaryStashDir) {
@@ -462,28 +1048,90 @@ async function runImprovePreparationStage(args) {
462
1048
  }
463
1049
  }
464
1050
  }
465
- // Phase 0 — execution log synthesis
466
- let executionLogCandidates = [];
467
- try {
468
- const logEntries = getExecutionLogCandidates(7);
469
- executionLogCandidates = logEntries.filter((e) => e.isFailurePattern).map((e) => e.topic);
1051
+ // Phase 0.4session-extract pass.
1052
+ //
1053
+ // Reads native session files (claude-code JSONL, opencode storage tree)
1054
+ // through the SessionLogHarness registry, pre-filters noise, and asks a
1055
+ // bounded in-tree LLM to produce candidate memory/lesson/knowledge
1056
+ // proposals for content the agent did NOT preserve via inline `akm remember`
1057
+ // / `akm feedback` invocations. Replaces the akm-plugin session-checkpoint
1058
+ // hook with an on-demand pull pipeline.
1059
+ //
1060
+ // Default-on; opt out via `profiles.improve.default.processes.extract.enabled: false`.
1061
+ // Each available harness gets one call with the default --since window;
1062
+ // already-seen sessions (tracked in state.db.extract_sessions_seen) are
1063
+ // skipped automatically so re-runs don't burn LLM calls on unchanged data.
1064
+ //
1065
+ // Failures are non-fatal — one harness throwing doesn't abort improve.
1066
+ // The extract envelope's own `warnings` field surfaces what went wrong.
1067
+ let extractResults;
1068
+ let gateAutoAcceptedCount = 0;
1069
+ const extractConfig = options.config ?? loadConfig();
1070
+ const extractGateCfg = makeGateConfig("extract", {
1071
+ globalThreshold: options.autoAccept,
1072
+ dryRun: options.dryRun ?? false,
1073
+ stashDir: primaryStashDir,
1074
+ config: extractConfig,
1075
+ eventsCtx,
1076
+ });
1077
+ if (isLlmFeatureEnabled(extractConfig, "session_extraction")) {
1078
+ const availableHarnesses = getAvailableHarnesses();
1079
+ if (availableHarnesses.length > 0) {
1080
+ extractResults = [];
1081
+ for (const h of availableHarnesses) {
1082
+ try {
1083
+ const result = await akmExtract({
1084
+ type: h.name,
1085
+ ...(primaryStashDir !== undefined ? { stashDir: primaryStashDir } : {}),
1086
+ config: extractConfig,
1087
+ dryRun: options.dryRun ?? false,
1088
+ });
1089
+ extractResults.push(result);
1090
+ gateAutoAcceptedCount += (await runAutoAcceptGate(primaryStashDir
1091
+ ? result.proposals.map((proposalId) => {
1092
+ const proposal = getProposal(primaryStashDir, proposalId);
1093
+ return { proposalId, confidence: resolveExtractConfidence(proposal) };
1094
+ })
1095
+ : [], extractGateCfg)).promoted.length;
1096
+ }
1097
+ catch (err) {
1098
+ const msg = err instanceof Error ? err.message : String(err);
1099
+ cleanupWarnings.push(`extract(${h.name}) failed: ${msg}`);
1100
+ }
1101
+ }
1102
+ if (extractResults.length === 0) {
1103
+ // All harnesses threw — clear so the envelope's `extract` field is
1104
+ // absent rather than misleadingly empty.
1105
+ extractResults = undefined;
1106
+ }
1107
+ }
470
1108
  }
471
- catch {
472
- // best-effort
1109
+ // Backlog drain: gate any pending extract proposals that weren't created in
1110
+ // this run (i.e. pre-date the gate or were produced by a run that timed out
1111
+ // before the gate fired). Without this, eligible proposals accumulate
1112
+ // indefinitely — the fresh-gate only covers the current run's output.
1113
+ if (primaryStashDir && !options.dryRun && options.autoAccept !== undefined) {
1114
+ const freshIds = new Set((extractResults ?? []).flatMap((r) => r.proposals));
1115
+ const backlog = listProposals(primaryStashDir, { status: "pending" }).filter((p) => p.source === "extract" && !freshIds.has(p.id));
1116
+ if (backlog.length > 0) {
1117
+ const backlogCandidates = backlog.map((p) => ({
1118
+ proposalId: p.id,
1119
+ confidence: resolveExtractConfidence(p),
1120
+ }));
1121
+ gateAutoAcceptedCount += (await runAutoAcceptGate(backlogCandidates, extractGateCfg)).promoted.length;
1122
+ }
473
1123
  }
1124
+ // eligibleCount = raw pre-filter count (before cooldown/signal/cleanup filters).
1125
+ // improve_completed.plannedRefs = post-filter count of refs that actually entered the loop.
474
1126
  appendEvent({
475
1127
  eventType: "improve_invoked",
476
1128
  ref: scope.mode === "ref" ? scope.value : `improve:${scope.mode}:${scope.value ?? "all"}`,
477
- metadata: { scope, dryRun: options.dryRun ?? false, assetCount: plannedRefs.length },
478
- });
479
- if (primaryStashDir) {
480
- try {
481
- await ensureIndexFn(primaryStashDir);
482
- }
483
- catch (err) {
484
- cleanupWarnings.push(`ensureIndex failed: ${err instanceof Error ? err.message : String(err)}`);
485
- }
486
- }
1129
+ metadata: { scope, dryRun: options.dryRun ?? false, eligibleCount: plannedRefs.length },
1130
+ }, eventsCtx);
1131
+ // ensureIndex now runs in akmImprove() BEFORE collectEligibleRefs so the
1132
+ // eligible-ref query sees a populated `entries` table on the very first
1133
+ // pass after a DB version upgrade (#339). Any failure messages from that
1134
+ // earlier call were threaded in via args.initialCleanupWarnings.
487
1135
  let appliedCleanup;
488
1136
  try {
489
1137
  appliedCleanup =
@@ -493,94 +1141,12 @@ async function runImprovePreparationStage(args) {
493
1141
  cleanupWarnings.push(`applyMemoryCleanup failed: ${err instanceof Error ? err.message : String(err)}`);
494
1142
  }
495
1143
  const archivedRefs = appliedCleanup?.archived.map((record) => record.ref) ?? [];
496
- const postCleanupRefs = filterRemovedPlannedRefs(plannedRefs, archivedRefs);
497
- // Gap 6: only surface feedback signals from the last 30 days so that
498
- // ancient one-off feedback events don't permanently lock an asset into
499
- // every improve run. Assets with only stale signals fall through to the
500
- // high-retrieval path (P0-A) or are skipped until new signals arrive.
501
- const FEEDBACK_SIGNAL_WINDOW_DAYS = 30;
502
- const feedbackSinceCutoff = new Date(Date.now() - FEEDBACK_SIGNAL_WINDOW_DAYS * 24 * 60 * 60 * 1000).toISOString();
503
- const signalFiltered = postCleanupRefs.filter((candidate) => {
504
- const { events } = readEvents({ type: "feedback", ref: candidate.ref });
505
- return events.some((e) => (e.ts ?? "") >= feedbackSinceCutoff &&
506
- ((e.metadata !== undefined && typeof e.metadata.signal === "string") ||
507
- (e.metadata !== undefined && typeof e.metadata.note === "string")));
508
- });
509
- // P0-A: also surface zero-feedback assets that have been retrieved many times.
510
- const RETRIEVAL_COUNT_THRESHOLD = options.minRetrievalCount ?? 5;
511
- const signalBearingSet = new Set(signalFiltered.map((r) => r.ref));
512
- const noFeedbackCandidates = postCleanupRefs.filter((r) => !signalBearingSet.has(r.ref));
513
- let highRetrievalRefs = [];
514
- let dbForRetrieval;
515
- try {
516
- dbForRetrieval = openExistingDatabase();
517
- const showEventCount = dbForRetrieval.prepare("SELECT COUNT(*) AS cnt FROM usage_events WHERE event_type = 'show'").get().cnt;
518
- if (showEventCount === 0) {
519
- warn("Warning: show events not yet in usage_events — zero-feedback fallback will match only search-retrieved assets.");
520
- }
521
- const retrievalCounts = getRetrievalCounts(dbForRetrieval, noFeedbackCandidates.map((r) => r.ref));
522
- highRetrievalRefs = noFeedbackCandidates.filter((r) => (retrievalCounts.get(r.ref) ?? 0) >= RETRIEVAL_COUNT_THRESHOLD);
523
- }
524
- catch {
525
- // best-effort: if DB unavailable, highRetrievalRefs stays empty
526
- }
527
- finally {
528
- if (dbForRetrieval)
529
- closeDatabase(dbForRetrieval);
530
- }
531
- // If the user explicitly scoped to a single ref, always act on it —
532
- // skip the signal/retrieval filter entirely. The filter exists to avoid
533
- // noisy "improve everything" runs; it should not gate an intentional
534
- // per-ref invocation where the user's explicit choice is the signal.
535
- //
536
- // For type/all scope with no signals yet (fresh environment), fall back
537
- // to all postCleanupRefs so that the first improve run is not a no-op.
538
- const signalAndRetrievalRefs = [...signalFiltered, ...highRetrievalRefs];
539
- const mergedRefs = scope.mode === "ref"
540
- ? postCleanupRefs
541
- : options.requireFeedbackSignal
542
- ? signalFiltered
543
- : signalAndRetrievalRefs.length === 0
544
- ? postCleanupRefs
545
- : signalAndRetrievalRefs;
546
- const utilityMap = buildUtilityMap(mergedRefs);
547
- // Load feedback ratio per ref and blend into sort key
548
- const feedbackRatios = new Map();
549
- for (const ref of mergedRefs) {
550
- const { events } = readEvents({ type: "feedback", ref: ref.ref });
551
- const positive = events.filter((e) => e.metadata?.signal === "positive").length;
552
- const negative = events.filter((e) => e.metadata?.signal === "negative").length;
553
- const total = positive + negative;
554
- // ratio = negative proportion (high = needs more improvement)
555
- feedbackRatios.set(ref.ref, total > 0 ? negative / total : 0);
556
- }
557
- // Sort: combine utility (desc) with feedback negativity (desc) — high-negative assets rank higher
558
- const sorted = [...mergedRefs].sort((a, b) => {
559
- const utilA = utilityMap.get(a.ref) ?? 0;
560
- const utilB = utilityMap.get(b.ref) ?? 0;
561
- const ratioA = feedbackRatios.get(a.ref) ?? 0;
562
- const ratioB = feedbackRatios.get(b.ref) ?? 0;
563
- // Combined score: 70% utility, 30% negative ratio
564
- const scoreA = utilA * 0.7 + ratioA * 0.3;
565
- const scoreB = utilB * 0.7 + ratioB * 0.3;
566
- return scoreB - scoreA;
567
- });
568
- const feedbackRatioUsed = true;
569
- // Phase 0: surface coverage gaps from zero-result search queries
570
- let coverageGaps = [];
571
- try {
572
- const dbForGaps = openExistingDatabase();
573
- try {
574
- coverageGaps = getZeroResultSearches(dbForGaps);
575
- }
576
- finally {
577
- closeDatabase(dbForGaps);
578
- }
579
- }
580
- catch {
581
- // best-effort
582
- }
583
- const actionableRefs = options.limit ? sorted.slice(0, options.limit) : sorted;
1144
+ const removed = new Set(archivedRefs);
1145
+ const postCleanupRefs = archivedRefs.length === 0 ? plannedRefs : plannedRefs.filter((r) => !removed.has(r.ref));
1146
+ // ── Phase 1: validation pass + schema repair (run on full postCleanupRefs) ──
1147
+ // Identifies refs whose on-disk asset has structural problems. Validation
1148
+ // failures are excluded from every downstream bucket. Run early so the
1149
+ // cooldown partition operates on a clean set.
584
1150
  if (appliedCleanup) {
585
1151
  for (const candidate of memoryCleanupPlan?.pruneCandidates ?? []) {
586
1152
  const archived = appliedCleanup.archived.find((record) => record.ref === candidate.ref);
@@ -602,13 +1168,16 @@ async function runImprovePreparationStage(args) {
602
1168
  }
603
1169
  }
604
1170
  const validationFailures = [];
605
- for (const candidate of actionableRefs) {
1171
+ for (const candidate of postCleanupRefs) {
606
1172
  try {
607
1173
  const filePath = await findAssetFilePath(candidate.ref, options.stashDir);
608
1174
  if (!filePath) {
609
1175
  validationFailures.push({ ref: candidate.ref, reason: "file not found on disk" });
610
1176
  continue;
611
1177
  }
1178
+ if (path.extname(filePath).toLowerCase() !== ".md") {
1179
+ continue;
1180
+ }
612
1181
  if (isLessonCandidate(candidate.ref)) {
613
1182
  const raw = fs.readFileSync(filePath, "utf8");
614
1183
  const fm = parseFrontmatter(raw).data;
@@ -621,7 +1190,7 @@ async function runImprovePreparationStage(args) {
621
1190
  }
622
1191
  }
623
1192
  if (validationFailures.length > 0) {
624
- info(`[improve] ${validationFailures.length} assets have validation issues (will be skipped):`);
1193
+ info(`[improve] ${validationFailures.length} assets have validation issues (will attempt schema repair):`);
625
1194
  for (const f of validationFailures)
626
1195
  info(` ${f.ref}: ${f.reason}`);
627
1196
  }
@@ -630,7 +1199,7 @@ async function runImprovePreparationStage(args) {
630
1199
  // Schema repair pass: attempt to fix validation failures via LLM before skipping.
631
1200
  if (validationFailures.length > 0 && options.repairValidationFailures !== false) {
632
1201
  const baseConfigForRepair = options.config ?? loadConfig();
633
- const llmCfg = baseConfigForRepair.llm;
1202
+ const llmCfg = getDefaultLlmConfig(baseConfigForRepair);
634
1203
  if (llmCfg) {
635
1204
  const result = await runSchemaRepairPass(validationFailures, {
636
1205
  startMs,
@@ -645,6 +1214,9 @@ async function runImprovePreparationStage(args) {
645
1214
  }
646
1215
  }
647
1216
  const validationFailureRefs = new Set(validationFailures.filter((f) => !repairedRefs.has(f.ref)).map((f) => f.ref));
1217
+ if (repairedRefs.size > 0) {
1218
+ info(`[improve] schema repair fixed ${repairedRefs.size}/${validationFailures.length} validation failures; ${validationFailureRefs.size} remain`);
1219
+ }
648
1220
  // Phase 0.5 — structural hygiene pass
649
1221
  let lintSummary;
650
1222
  if (primaryStashDir) {
@@ -656,106 +1228,310 @@ async function runImprovePreparationStage(args) {
656
1228
  // lint is best-effort; never block improve
657
1229
  }
658
1230
  }
659
- const recentErrors = []; // rolling window, last 3 failures
1231
+ // O-5 / #378: Per-originator rolling error windows.
1232
+ // Reflexion (arXiv:2303.11366) warns that cross-task verbal critique
1233
+ // contamination degrades below single-shot baseline. Each originator key
1234
+ // ("schema-repair", "reflect") maintains its own rolling window so that
1235
+ // schema-repair failures are not injected as avoidPatterns into reflect calls.
1236
+ const recentErrors = {};
660
1237
  const RECENT_ERRORS_CAP = 3;
661
- // Seed the rolling window from any schema repair errors that occurred before the main loop.
1238
+ // Helper: push an error onto an originator's rolling window.
1239
+ function pushRecentError(originator, msg) {
1240
+ if (!recentErrors[originator])
1241
+ recentErrors[originator] = [];
1242
+ recentErrors[originator].push(msg);
1243
+ if (recentErrors[originator].length > RECENT_ERRORS_CAP)
1244
+ recentErrors[originator].shift();
1245
+ }
1246
+ // Seed schema-repair originator window from any schema-repair errors.
662
1247
  for (const repair of schemaRepairs) {
663
1248
  if (repair.outcome === "error") {
664
1249
  const errMsg = repair.error ?? `schema repair error: ${repair.reason}`;
665
- recentErrors.push(errMsg);
666
- if (recentErrors.length > RECENT_ERRORS_CAP)
667
- recentErrors.shift();
1250
+ pushRecentError("schema-repair", errMsg);
668
1251
  }
669
1252
  }
670
- // ── Cooldown pre-filter ───────────────────────────────────────────────────
671
- // Read all cooldown-relevant events in 4 bulk queries and materialise two
672
- // Sets that the loop checks with O(1) Set.has() instead of N per-ref
673
- // readEvents() + listProposals() calls. This eliminates the N×3 DB/FS
674
- // round trips that caused per-asset "reflect cooldown" noise for every
675
- // asset in the stash.
1253
+ // ── Phase 2: signal-delta eligibility sets built EARLY ────────────────────
1254
+ // 0.8.0 replaces the flat time-based cooldowns (which produced synchronised
1255
+ // waves whenever many refs cooled at the same instant see the 2026-05-26
1256
+ // 54-ref simultaneous-reflect incident) with a *signal-delta* gate:
1257
+ //
1258
+ // reflectEligible(ref) latestFeedbackTs(ref) > lastReflectProposalTs(ref)
1259
+ // distillEligible(ref) ≡ latestFeedbackTs(ref) > lastDistillProposalTs(ref)
1260
+ //
1261
+ // i.e. a ref is re-eligible iff new feedback has landed since the last
1262
+ // proposal was generated for it. Stable content with no new signal stays
1263
+ // out of the queue regardless of clock time; a sudden burst of feedback
1264
+ // surfaces only the refs that the burst actually touches.
676
1265
  //
677
- // SM-2 tier for reflect uses promoted/rejected events (recorded by
678
- // `akm proposal accept/reject`) rather than the per-ref listProposals()
679
- // filesystem scan, giving identical tier logic without touching the disk.
680
- const REFLECT_COOLDOWN_DAYS = options.reflectCooldownDays ?? 7;
681
- const DISTILL_COOLDOWN_DAYS = options.distillCooldownDays ?? 30;
682
- const reflectCooledRefs = new Set();
1266
+ // The 30-day FEEDBACK_SIGNAL_WINDOW_DAYS bound still applies only feedback
1267
+ // events newer than that count as "current signal". Ancient one-off
1268
+ // negatives don't permanently lock a ref into every run.
1269
+ //
1270
+ // High-retrieval refs (P0-A path) use a simpler "eligible once" rule: a
1271
+ // ref with no feedback signal but retrievalCount ≥ threshold is eligible
1272
+ // exactly once (no prior reflect proposal). Subsequent re-eligibility for
1273
+ // those refs requires either a new feedback event (then the normal
1274
+ // signal-delta gate applies) or human action. Documented limitation: this
1275
+ // path does not re-fire on retrieval-count growth alone in 0.8.0; storing
1276
+ // the retrieval count in proposal metadata for proper delta-tracking is
1277
+ // captured as future work.
1278
+ const FEEDBACK_SIGNAL_WINDOW_DAYS = 30;
1279
+ const feedbackSinceCutoff = new Date(Date.now() - daysToMs(FEEDBACK_SIGNAL_WINDOW_DAYS)).toISOString();
1280
+ // Build the three timestamp maps once across the entire postCleanupRefs set.
1281
+ // Per-ref queries would be N+1 and the planner is already the hottest path
1282
+ // in `akm improve`.
1283
+ const candidateRefs = postCleanupRefs.filter((r) => !validationFailureRefs.has(r.ref)).map((r) => r.ref);
1284
+ const latestFeedbackTs = buildLatestFeedbackTsMap(candidateRefs, feedbackSinceCutoff);
1285
+ const lastReflectProposalTs = buildLatestProposalTsMap(candidateRefs, "reflect");
1286
+ const lastDistillProposalTs = buildLatestProposalTsMap(candidateRefs, "distill");
1287
+ // Refs the distill signal-delta gate rejected at planning time. The main
1288
+ // loop reads this to skip distill for these refs without re-checking
1289
+ // eligibility per iteration.
683
1290
  const distillCooledRefs = new Set();
684
- if (REFLECT_COOLDOWN_DAYS > 0 || DISTILL_COOLDOWN_DAYS > 0) {
685
- const bulkWindowMs = Math.max(REFLECT_COOLDOWN_DAYS, DISTILL_COOLDOWN_DAYS, 14) * 24 * 60 * 60 * 1000;
686
- const bulkSince = new Date(Date.now() - bulkWindowMs).toISOString();
687
- const bulkReflects = readEvents({ type: "reflect_invoked", since: bulkSince }).events;
688
- const bulkDistills = readEvents({ type: "distill_invoked", since: bulkSince }).events;
689
- const bulkPromoted = readEvents({ type: "promoted", since: bulkSince }).events;
690
- const bulkRejected = readEvents({ type: "rejected", since: bulkSince }).events;
691
- const promotedTs = new Map();
692
- for (const e of bulkPromoted) {
693
- if (e.ref && (e.ts ?? "") > (promotedTs.get(e.ref) ?? ""))
694
- promotedTs.set(e.ref, e.ts ?? "");
695
- }
696
- const rejectedTs = new Map();
697
- for (const e of bulkRejected) {
698
- if (e.ref && (e.ts ?? "") > (rejectedTs.get(e.ref) ?? ""))
699
- rejectedTs.set(e.ref, e.ts ?? "");
700
- }
701
- if (REFLECT_COOLDOWN_DAYS > 0) {
702
- const latestReflect = new Map();
703
- for (const e of bulkReflects) {
704
- if (e.ref && (e.ts ?? "") > (latestReflect.get(e.ref) ?? ""))
705
- latestReflect.set(e.ref, e.ts ?? "");
706
- }
707
- for (const [ref, lastTs] of latestReflect) {
708
- if (!lastTs)
709
- continue;
710
- const hasAccepted = (promotedTs.get(ref) ?? "") > lastTs;
711
- const hasRejected = (rejectedTs.get(ref) ?? "") > lastTs;
712
- let effectiveCooldownDays = REFLECT_COOLDOWN_DAYS;
713
- if (hasAccepted)
714
- continue;
715
- else if (hasRejected)
716
- effectiveCooldownDays = Math.min(REFLECT_COOLDOWN_DAYS, 3);
717
- if (Date.now() - new Date(lastTs).getTime() < effectiveCooldownDays * 24 * 60 * 60 * 1000) {
718
- reflectCooledRefs.add(ref);
719
- }
720
- }
1291
+ const preCooldownCount = postCleanupRefs.length;
1292
+ // ── Phase 3: partition postCleanupRefs by signal-delta eligibility ────────
1293
+ // Three buckets (validation failures are excluded entirely):
1294
+ // eligibleRefs reflect signal-delta passes (full reflect+distill
1295
+ // loop path; distill guard remains in the loop for
1296
+ // refs that fail the distill signal-delta gate).
1297
+ // distillOnlyRefs — reflect blocked but distill signal-delta passes
1298
+ // AND ref is a distill candidate.
1299
+ // fullySkippedCount — neither gate passes synthetic skip action
1300
+ // + improve_skipped event, excluded from sort.
1301
+ const eligibleRefs = [];
1302
+ const distillOnlyRefs = [];
1303
+ let fullySkippedCount = 0;
1304
+ // O-2 (#365): explicit --scope <ref> bypasses every gate (user intent wins).
1305
+ const scopeRefBypass = scope.mode === "ref";
1306
+ for (const r of postCleanupRefs) {
1307
+ if (validationFailureRefs.has(r.ref))
1308
+ continue;
1309
+ if (scopeRefBypass) {
1310
+ eligibleRefs.push(r);
1311
+ continue;
721
1312
  }
722
- if (DISTILL_COOLDOWN_DAYS > 0) {
723
- const distillCooldownMs = DISTILL_COOLDOWN_DAYS * 24 * 60 * 60 * 1000;
724
- const latestQueuedDistill = new Map();
725
- for (const e of bulkDistills) {
726
- if (e.ref && e.metadata?.outcome === "queued" && (e.ts ?? "") > (latestQueuedDistill.get(e.ref) ?? "")) {
727
- latestQueuedDistill.set(e.ref, e.ts ?? "");
728
- }
1313
+ const reflectOk = isSignalDeltaEligible(r.ref, latestFeedbackTs, lastReflectProposalTs);
1314
+ const distillOk = isSignalDeltaEligible(r.ref, latestFeedbackTs, lastDistillProposalTs);
1315
+ const isDistillCandidate = isDistillCandidateRef(r.ref, options.stashDir);
1316
+ if (reflectOk) {
1317
+ if (!distillOk && isDistillCandidate) {
1318
+ // Reflect passes the gate, distill does not — emit the synthetic
1319
+ // distill-skipped action and event up-front so the in-loop guard
1320
+ // does not have to re-derive eligibility.
1321
+ distillCooledRefs.add(r.ref);
1322
+ actions.push({ ref: r.ref, mode: "distill-skipped", result: { ok: true, reason: "distill signal-delta" } });
1323
+ appendEvent({
1324
+ eventType: "improve_skipped",
1325
+ ref: r.ref,
1326
+ metadata: { reason: "distill_no_new_signal" },
1327
+ }, eventsCtx);
729
1328
  }
730
- for (const [ref, lastTs] of latestQueuedDistill) {
731
- if (lastTs && Date.now() - new Date(lastTs).getTime() < distillCooldownMs) {
732
- distillCooledRefs.add(ref);
733
- }
1329
+ else if (!distillOk) {
1330
+ // Not a distill candidate AND distill gate doesn't pass — just mark
1331
+ // distillCooled so the loop's distill section is a no-op.
1332
+ distillCooledRefs.add(r.ref);
734
1333
  }
1334
+ eligibleRefs.push(r);
735
1335
  }
736
- }
737
- const loopRefs = actionableRefs.filter((r) => !reflectCooledRefs.has(r.ref) && !validationFailureRefs.has(r.ref));
738
- const reflectCooledLoop = actionableRefs.filter((r) => reflectCooledRefs.has(r.ref));
739
- if (reflectCooledLoop.length > 0) {
740
- info(`[improve] ${reflectCooledLoop.length}/${actionableRefs.length} assets on reflect cooldown — skipping`);
741
- for (const r of reflectCooledLoop) {
1336
+ else if (distillOk && isDistillCandidate) {
1337
+ // Reflect blocked but distill passes distill-only bucket.
1338
+ distillOnlyRefs.push(r);
1339
+ }
1340
+ else {
1341
+ // Neither gate passes fully skipped.
1342
+ fullySkippedCount++;
742
1343
  actions.push({
743
1344
  ref: r.ref,
744
1345
  mode: "distill-skipped",
745
- result: { ok: true, reason: "reflect cooldown (pre-filtered)" },
1346
+ result: { ok: true, reason: "no new signal since last proposal" },
746
1347
  });
747
- appendEvent({ eventType: "improve_skipped", ref: r.ref, metadata: { reason: "reflect_cooldown" } });
1348
+ appendEvent({ eventType: "improve_skipped", ref: r.ref, metadata: { reason: "no_new_signal" } }, eventsCtx);
1349
+ }
1350
+ }
1351
+ // ── Phase 4: signal/feedback/utility/sort on the reduced set ──────────────
1352
+ // Everything from here works only on (eligibleRefs ∪ distillOnlyRefs). The
1353
+ // fully-skipped bucket has already been routed and emitted; we deliberately
1354
+ // avoid spending DB/CPU on refs that cannot enter the loop.
1355
+ const processableRefs = [...eligibleRefs, ...distillOnlyRefs];
1356
+ // Gap 6: only surface feedback signals from the last 30 days so that
1357
+ // ancient one-off feedback events don't permanently lock an asset into
1358
+ // every improve run. Assets with only stale signals fall through to the
1359
+ // high-retrieval path (P0-A) or are skipped until new signals arrive.
1360
+ // (FEEDBACK_SIGNAL_WINDOW_DAYS / feedbackSinceCutoff are already defined in
1361
+ // Phase 2 above for the signal-delta gate; we reuse them here.)
1362
+ // Pre-compute feedback summary per ref in a single pass so we don't issue
1363
+ // two readEvents({type:"feedback", ref}) per asset (one for signal filtering,
1364
+ // one for ratio computation).
1365
+ const feedbackSummary = new Map();
1366
+ for (const candidate of processableRefs) {
1367
+ const { events } = readEvents({ type: "feedback", ref: candidate.ref });
1368
+ let hasSignal = false;
1369
+ let positive = 0;
1370
+ let negative = 0;
1371
+ for (const e of events) {
1372
+ if (!hasSignal &&
1373
+ (e.ts ?? "") >= feedbackSinceCutoff &&
1374
+ e.metadata !== undefined &&
1375
+ (typeof e.metadata.signal === "string" || typeof e.metadata.note === "string")) {
1376
+ hasSignal = true;
1377
+ }
1378
+ if (e.metadata?.signal === "positive")
1379
+ positive++;
1380
+ else if (e.metadata?.signal === "negative")
1381
+ negative++;
748
1382
  }
1383
+ feedbackSummary.set(candidate.ref, { hasSignal, positive, negative });
1384
+ }
1385
+ const signalFiltered = processableRefs.filter((candidate) => feedbackSummary.get(candidate.ref)?.hasSignal === true);
1386
+ // P0-A: also surface zero-feedback assets that have been retrieved many times.
1387
+ const RETRIEVAL_COUNT_THRESHOLD = options.minRetrievalCount ?? 5;
1388
+ const signalBearingSet = new Set(signalFiltered.map((r) => r.ref));
1389
+ const noFeedbackCandidates = processableRefs.filter((r) => !signalBearingSet.has(r.ref));
1390
+ let highRetrievalRefs = [];
1391
+ let dbForRetrieval;
1392
+ try {
1393
+ dbForRetrieval = openExistingDatabase();
1394
+ const showEventCount = dbForRetrieval.prepare("SELECT COUNT(*) AS cnt FROM usage_events WHERE event_type = 'show'").get().cnt;
1395
+ if (showEventCount === 0) {
1396
+ warn("Warning: show events not yet in usage_events — zero-feedback fallback will match only search-retrieved assets.");
1397
+ }
1398
+ const retrievalCounts = getRetrievalCounts(dbForRetrieval, noFeedbackCandidates.map((r) => r.ref));
1399
+ // High-retrieval signal-delta (simplified rule, 0.8.0): a no-feedback
1400
+ // ref qualifies exactly once — when retrievalCount ≥ threshold AND no
1401
+ // prior reflect proposal exists for it. Once a reflect proposal is on
1402
+ // record, subsequent re-eligibility requires explicit feedback (which
1403
+ // flows through the normal signal-delta gate above). Tracking growth in
1404
+ // retrieval count would require persisting the count in proposal
1405
+ // metadata; deferred to a follow-up.
1406
+ highRetrievalRefs = noFeedbackCandidates.filter((r) => (retrievalCounts.get(r.ref) ?? 0) >= RETRIEVAL_COUNT_THRESHOLD && !lastReflectProposalTs.has(r.ref));
1407
+ }
1408
+ catch (err) {
1409
+ rethrowIfTestIsolationError(err);
1410
+ // best-effort: if DB unavailable, highRetrievalRefs stays empty
1411
+ }
1412
+ finally {
1413
+ if (dbForRetrieval)
1414
+ closeDatabase(dbForRetrieval);
1415
+ }
1416
+ // If the user explicitly scoped to a single ref, always act on it —
1417
+ // skip the signal/retrieval filter entirely. The filter exists to avoid
1418
+ // noisy "improve everything" runs; it should not gate an intentional
1419
+ // per-ref invocation where the user's explicit choice is the signal.
1420
+ //
1421
+ // For type/all scope: only process refs with usage signals (recent feedback
1422
+ // or sufficient retrievals). A stash with no signals has 0 eligible refs —
1423
+ // usage is the gate. Run `akm feedback <ref> --positive` or retrieve assets
1424
+ // to bring them into the eligible pool.
1425
+ const signalAndRetrievalRefs = [...signalFiltered, ...highRetrievalRefs];
1426
+ const mergedRefs = scope.mode === "ref" ? processableRefs : options.requireFeedbackSignal ? signalFiltered : signalAndRetrievalRefs;
1427
+ const utilityMap = buildUtilityMap(mergedRefs);
1428
+ // Load feedback ratio per ref from the pre-computed summary (no extra DB pass).
1429
+ const feedbackRatios = new Map();
1430
+ for (const ref of mergedRefs) {
1431
+ const summary = feedbackSummary.get(ref.ref);
1432
+ const positive = summary?.positive ?? 0;
1433
+ const negative = summary?.negative ?? 0;
1434
+ const total = positive + negative;
1435
+ // ratio = negative proportion (high = needs more improvement)
1436
+ feedbackRatios.set(ref.ref, total > 0 ? negative / total : 0);
1437
+ }
1438
+ // Sort: combine utility (desc) with feedback negativity (desc) — high-negative assets rank higher
1439
+ const sorted = [...mergedRefs].sort((a, b) => {
1440
+ const utilA = utilityMap.get(a.ref) ?? 0;
1441
+ const utilB = utilityMap.get(b.ref) ?? 0;
1442
+ const ratioA = feedbackRatios.get(a.ref) ?? 0;
1443
+ const ratioB = feedbackRatios.get(b.ref) ?? 0;
1444
+ // Combined score: 70% utility, 30% negative ratio
1445
+ const scoreA = utilA * 0.7 + ratioA * 0.3;
1446
+ const scoreB = utilB * 0.7 + ratioB * 0.3;
1447
+ return scoreB - scoreA;
1448
+ });
1449
+ // Phase 0: surface coverage gaps from zero-result search queries
1450
+ let coverageGaps = [];
1451
+ try {
1452
+ const dbForGaps = openExistingDatabase();
1453
+ try {
1454
+ coverageGaps = getZeroResultSearches(dbForGaps);
1455
+ }
1456
+ finally {
1457
+ closeDatabase(dbForGaps);
1458
+ }
1459
+ }
1460
+ catch (err) {
1461
+ rethrowIfTestIsolationError(err);
1462
+ // best-effort
1463
+ }
1464
+ // actionableRefs is the post-cooldown, post-validation, post-signal, post-sort
1465
+ // set — i.e. the genuinely processable refs in priority order. Note: this is
1466
+ // a semantic shift from earlier code where actionableRefs was the pre-cooldown
1467
+ // sorted set; the new meaning matches reality and is documented on
1468
+ // ImprovePreparationResult.actionableRefs.
1469
+ //
1470
+ // Final guard: drop any candidate whose backing file is no longer on disk.
1471
+ // Phase 1 validation captures missing files at the start of preparation, but
1472
+ // the gap between that check and dispatch can be minutes on large stashes —
1473
+ // long enough for a checkpoint / git checkout / external cleanup to delete
1474
+ // the asset. Empirically (improve-critical-review 2026-05-20) the single
1475
+ // biggest reject category was "Asset no longer exists on disk" (604/1407 =
1476
+ // 43%), meaning reflect/distill was producing proposals against deleted refs.
1477
+ // A cheap existsSync per surviving candidate eliminates that wasted work.
1478
+ const assetMissingOnDisk = [];
1479
+ const existsCheckedActionable = [];
1480
+ for (const candidate of sorted) {
1481
+ const filePath = await findAssetFilePath(candidate.ref, options.stashDir);
1482
+ if (filePath && fs.existsSync(filePath)) {
1483
+ existsCheckedActionable.push(candidate);
1484
+ }
1485
+ else {
1486
+ assetMissingOnDisk.push(candidate.ref);
1487
+ appendEvent({ eventType: "improve_skipped", ref: candidate.ref, metadata: { reason: "asset_missing_on_disk" } }, eventsCtx);
1488
+ }
1489
+ }
1490
+ const actionableRefs = existsCheckedActionable;
1491
+ // Re-split actionableRefs (sorted) into reflect-path vs distill-only-path while
1492
+ // preserving sort order. distillOnlyRefs participate in the sort so --limit
1493
+ // picks them by score, not by arbitrary position.
1494
+ const distillOnlyRefSetForSort = new Set(distillOnlyRefs.map((r) => r.ref));
1495
+ const reflectAndDistillRefsAfterSort = [];
1496
+ const distillOnlyRefsAfterSort = [];
1497
+ for (const r of actionableRefs) {
1498
+ if (distillOnlyRefSetForSort.has(r.ref)) {
1499
+ distillOnlyRefsAfterSort.push(r);
1500
+ }
1501
+ else {
1502
+ reflectAndDistillRefsAfterSort.push(r);
1503
+ }
1504
+ }
1505
+ // ── Phase 5: --limit applies to the post-cooldown actionable set ──────────
1506
+ const allLoopRefs = [...reflectAndDistillRefsAfterSort, ...distillOnlyRefsAfterSort];
1507
+ const loopRefs = options.limit ? allLoopRefs.slice(0, options.limit) : allLoopRefs;
1508
+ // Update the returned distillOnlyRefs to the sorted order so callers see the
1509
+ // ranked view (loop stage uses it as a Set so order is irrelevant, but the
1510
+ // shape change keeps downstream consumers consistent).
1511
+ const distillOnlyRefsResult = distillOnlyRefsAfterSort;
1512
+ const totalReflectBlocked = fullySkippedCount + distillOnlyRefs.length;
1513
+ if (totalReflectBlocked > 0) {
1514
+ info(`[improve] ${totalReflectBlocked} of ${preCooldownCount} indexed refs blocked by reflect signal-delta ` +
1515
+ `(${fullySkippedCount} fully skipped, ${distillOnlyRefs.length} routed to distill-only)`);
1516
+ }
1517
+ if (signalAndRetrievalRefs.length > 0) {
1518
+ info(`[improve] ${signalAndRetrievalRefs.length} refs with usage signals (${signalFiltered.length} feedback, ${highRetrievalRefs.length} high-retrieval)`);
749
1519
  }
750
1520
  if (validationFailureRefs.size > 0) {
751
- info(`[improve] ${validationFailureRefs.size} assets with validation failures excluded from loop`);
1521
+ info(`[improve] ${validationFailureRefs.size} with validation failures excluded`);
1522
+ }
1523
+ if (assetMissingOnDisk.length > 0) {
1524
+ info(`[improve] ${assetMissingOnDisk.length} candidates dropped — file not on disk`);
752
1525
  }
1526
+ const deferredCount = actionableRefs.length - loopRefs.length;
1527
+ info(`[improve] ${actionableRefs.length} actionable; ${loopRefs.length} will be processed` +
1528
+ (options.limit && deferredCount > 0 ? ` (--limit ${options.limit} applied; ${deferredCount} deferred)` : ""));
753
1529
  return {
754
1530
  actions,
755
1531
  cleanupWarnings,
756
1532
  appliedCleanup,
757
1533
  memoryIndexHealth,
758
- executionLogCandidates,
1534
+ extract: extractResults,
759
1535
  actionableRefs,
760
1536
  signalBearingSet,
761
1537
  validationFailures,
@@ -763,18 +1539,108 @@ async function runImprovePreparationStage(args) {
763
1539
  lintSummary,
764
1540
  loopRefs,
765
1541
  distillCooledRefs,
766
- feedbackRatioUsed,
1542
+ distillOnlyRefs: distillOnlyRefsResult,
767
1543
  coverageGaps,
768
1544
  recentErrors,
1545
+ utilityMap,
1546
+ gateAutoAcceptedCount,
769
1547
  };
770
1548
  }
1549
+ // TODO(refactor): 13 args including `actions`/`recentErrors` mutation channels. Restructure into immutable plan + mutable context objects — deferred to dedicated refactor with isolated testing.
771
1550
  async function runImproveLoopStage(args) {
772
- const { scope, options, primaryStashDir, reflectFn, distillFn, loopRefs, actions, signalBearingSet, distillCooledRefs, recentErrors, startMs, budgetMs, } = args;
1551
+ const { scope, options, primaryStashDir, reflectFn, distillFn, loopRefs, actions, signalBearingSet, distillCooledRefs, distillOnlyRefs, recentErrors, rejectedProposalsByRef, utilityMap, startMs, budgetMs, eventsCtx, improveProfile, } = args;
1552
+ // O-1 (#364): compute remaining budget at call time so each sub-call
1553
+ // receives only its fair share of the wall-clock budget.
1554
+ const remainingBudgetMs = () => Math.max(0, budgetMs - (Date.now() - startMs));
773
1555
  const RECENT_ERRORS_CAP = 3;
774
- const DISTILL_COOLDOWN_DAYS = options.distillCooldownDays ?? 30;
1556
+ // R-2 / #389: Self-Consistency multi-sample voting helpers.
1557
+ // Wang et al. arXiv:2203.11171 — N=3 samples beat single-shot on reasoning tasks.
1558
+ const SC_THRESHOLD = options.selfConsistencyThreshold ?? 0.7;
1559
+ const SC_N = Math.min(Math.max(2, options.selfConsistencyN ?? 3), 5);
1560
+ /**
1561
+ * Compute Jaccard token overlap between two strings.
1562
+ * Tokenizes by whitespace; returns 0 when both are empty.
1563
+ */
1564
+ function jaccardSimilarity(a, b) {
1565
+ const tokensA = new Set(a.split(/\s+/).filter(Boolean));
1566
+ const tokensB = new Set(b.split(/\s+/).filter(Boolean));
1567
+ if (tokensA.size === 0 && tokensB.size === 0)
1568
+ return 1;
1569
+ let intersection = 0;
1570
+ for (const t of tokensA) {
1571
+ if (tokensB.has(t))
1572
+ intersection++;
1573
+ }
1574
+ const union = tokensA.size + tokensB.size - intersection;
1575
+ return union > 0 ? intersection / union : 0;
1576
+ }
1577
+ /**
1578
+ * Given N reflect results, return the one with the highest average Jaccard
1579
+ * similarity to all other successful results (majority-vote winner).
1580
+ * Falls back to the first successful result when N < 2.
1581
+ */
1582
+ function pickMajorityVote(results) {
1583
+ const successful = results.filter((r) => r.ok);
1584
+ if (successful.length === 0)
1585
+ return (results[0] ?? {
1586
+ schemaVersion: 1,
1587
+ ok: false,
1588
+ reason: "non_zero_exit",
1589
+ error: "all samples failed",
1590
+ exitCode: null,
1591
+ });
1592
+ if (successful.length === 1)
1593
+ return successful[0];
1594
+ let bestIdx = 0;
1595
+ let bestScore = -1;
1596
+ for (let i = 0; i < successful.length; i++) {
1597
+ let totalSim = 0;
1598
+ for (let j = 0; j < successful.length; j++) {
1599
+ if (i === j)
1600
+ continue;
1601
+ totalSim += jaccardSimilarity(successful[i].proposal.payload.content ?? "", successful[j].proposal.payload.content ?? "");
1602
+ }
1603
+ const avgSim = totalSim / (successful.length - 1);
1604
+ if (avgSim > bestScore) {
1605
+ bestScore = avgSim;
1606
+ bestIdx = i;
1607
+ }
1608
+ }
1609
+ return successful[bestIdx] ?? successful[0];
1610
+ }
1611
+ // O-5 / #378: helper to push per-originator errors into the rolling window.
1612
+ function pushRecentError(originator, msg) {
1613
+ if (!recentErrors[originator])
1614
+ recentErrors[originator] = [];
1615
+ recentErrors[originator].push(msg);
1616
+ if (recentErrors[originator].length > RECENT_ERRORS_CAP)
1617
+ recentErrors[originator].shift();
1618
+ }
1619
+ // Build a Set for O(1) membership test — these refs skip the reflect call (Bug D2).
1620
+ const distillOnlyRefSet = new Set(distillOnlyRefs.map((r) => r.ref));
775
1621
  let completedCount = 0;
776
- let crossStepErrorsInjected = 0;
1622
+ let reflectsWithErrorContext = 0;
777
1623
  const memoryRefsForInference = new Set();
1624
+ // Pre-load all pending proposals once instead of querying per asset in the loop.
1625
+ const dedupeStashDirForProposals = primaryStashDir ?? options.stashDir;
1626
+ const pendingProposalRefSet = new Set(dedupeStashDirForProposals
1627
+ ? listProposals(dedupeStashDirForProposals, { status: "pending" }).map((p) => p.ref)
1628
+ : []);
1629
+ let gateAutoAcceptedCount = 0;
1630
+ const reflectGateCfg = makeGateConfig("reflect", {
1631
+ globalThreshold: options.autoAccept,
1632
+ dryRun: options.dryRun ?? false,
1633
+ stashDir: primaryStashDir,
1634
+ config: options.config ?? loadConfig(),
1635
+ eventsCtx,
1636
+ });
1637
+ const distillGateCfg = makeGateConfig("distill", {
1638
+ globalThreshold: options.autoAccept,
1639
+ dryRun: options.dryRun ?? false,
1640
+ stashDir: primaryStashDir,
1641
+ config: options.config ?? loadConfig(),
1642
+ eventsCtx,
1643
+ });
778
1644
  for (const planned of loopRefs) {
779
1645
  if (Date.now() - startMs >= budgetMs) {
780
1646
  const remaining = loopRefs.length - completedCount;
@@ -786,7 +1652,15 @@ async function runImproveLoopStage(args) {
786
1652
  reason: "budget_exhausted",
787
1653
  remaining,
788
1654
  },
789
- });
1655
+ }, eventsCtx);
1656
+ // B11: Emit improve_skipped for all remaining assets that will not be processed.
1657
+ for (const remainingRef of loopRefs.slice(completedCount + 1)) {
1658
+ appendEvent({
1659
+ eventType: "improve_skipped",
1660
+ ref: remainingRef.ref,
1661
+ metadata: { reason: "budget_exhausted_batch", remaining: loopRefs.length - completedCount - 1 },
1662
+ }, eventsCtx);
1663
+ }
790
1664
  actions.push({
791
1665
  ref: planned.ref,
792
1666
  mode: "error",
@@ -795,59 +1669,243 @@ async function runImproveLoopStage(args) {
795
1669
  break;
796
1670
  }
797
1671
  try {
798
- if (DISTILL_COOLDOWN_DAYS > 0 &&
799
- distillCooledRefs.has(planned.ref) &&
800
- (isLessonCandidate(planned.ref) || shouldDistillMemoryRef(planned.ref, options.stashDir))) {
1672
+ // Bug D2: distillOnlyRefs skip the reflect call but still run the distill path.
1673
+ // Bug D1: in-loop distill-cooldown check removed — distill-cooled candidates
1674
+ // have their synthetic actions emitted in runImprovePreparationStage.
1675
+ const isDistillOnly = distillOnlyRefSet.has(planned.ref);
1676
+ const parsedPlannedRef = parseAssetRef(planned.ref);
1677
+ // B6: derived memories are machine-generated; skip reflect to avoid noisy proposals.
1678
+ // shouldDistillMemoryRef already returns false for .derived refs, so the distill
1679
+ // path is also a no-op for them — we just avoid unnecessary agent spawns.
1680
+ // D2: distillOnlyRefs also skip the reflect call (reflect-cooled, distill path only).
1681
+ if (!isDistillOnly && !planned.ref.endsWith(".derived")) {
1682
+ // Type guard: skip reflect for unsupported types (script, vault, task, etc.)
1683
+ // and raw wiki directories, driven by the active improve profile.
1684
+ const reflectSkip = shouldSkipRef(planned.ref, "reflect", improveProfile);
1685
+ if (reflectSkip.skip) {
1686
+ actions.push({
1687
+ ref: planned.ref,
1688
+ mode: "reflect-skipped",
1689
+ result: { ok: true, reason: reflectSkip.reason },
1690
+ });
1691
+ }
1692
+ else {
1693
+ // O-5 / #378: only inject reflect-originator errors into the reflect call.
1694
+ // Cross-task errors (e.g. schema-repair) must NOT contaminate reflect prompts.
1695
+ const reflectErrors = recentErrors.reflect ?? [];
1696
+ if (reflectErrors.length > 0)
1697
+ reflectsWithErrorContext++;
1698
+ // O-1 (#364): pass remaining budget as timeoutMs so the agent spawn is
1699
+ // bounded by the wall-clock deadline rather than the default per-profile timeout.
1700
+ const reflectBudgetMs = remainingBudgetMs();
1701
+ // Wire profile.processes.reflect.{mode, profile, timeoutMs} into the reflect
1702
+ // dispatch when present. Falls back to akmReflect's own config-based resolution
1703
+ // (profiles.improve.<name>.processes.reflect → defaults.llm) when the profile
1704
+ // does not specify.
1705
+ const reflectProfileRunner = resolveImproveProcessRunnerFromProfile(improveProfile.processes?.reflect, options.config ?? loadConfig());
1706
+ const reflectCallArgs = {
1707
+ ref: planned.ref,
1708
+ task: options.task,
1709
+ ...(options.stashDir ? { stashDir: options.stashDir } : {}),
1710
+ ...(reflectErrors.length > 0 ? { avoidPatterns: [...reflectErrors] } : {}),
1711
+ agentProcess: options.agentProcess ?? "reflect",
1712
+ eventSource: "improve",
1713
+ ...(reflectBudgetMs > 0 ? { timeoutMs: reflectBudgetMs } : {}),
1714
+ ...(reflectProfileRunner ? { runner: reflectProfileRunner } : {}),
1715
+ };
1716
+ // R-2 / #389: Self-consistency multi-sample voting for high-utility refs.
1717
+ // Self-Consistency arXiv:2203.11171 — N=3 samples beat single-shot quality.
1718
+ const refUtility = utilityMap.get(planned.ref) ?? 0;
1719
+ const useConsistency = refUtility >= SC_THRESHOLD && SC_N >= 2;
1720
+ let reflectResult;
1721
+ if (useConsistency) {
1722
+ const samples = [];
1723
+ for (let s = 0; s < SC_N; s++) {
1724
+ if (remainingBudgetMs() <= 0)
1725
+ break;
1726
+ // draftMode: skip DB write so each sample doesn't create a proposal.
1727
+ samples.push(await reflectFn({ ...reflectCallArgs, draftMode: true }));
1728
+ }
1729
+ const winner = pickMajorityVote(samples.length > 0 ? samples : [await reflectFn({ ...reflectCallArgs, draftMode: true })]);
1730
+ // Persist only the majority-vote winner as a single real proposal.
1731
+ if (winner.ok && primaryStashDir) {
1732
+ const persistResult = createProposal(primaryStashDir, {
1733
+ ref: winner.proposal.ref,
1734
+ source: "reflect",
1735
+ sourceRun: `reflect-sc-${Date.now()}`,
1736
+ payload: winner.proposal.payload,
1737
+ });
1738
+ reflectResult = isProposalSkipped(persistResult)
1739
+ ? {
1740
+ schemaVersion: 1,
1741
+ ok: false,
1742
+ reason: "cooldown",
1743
+ error: `SC proposal skipped: ${persistResult.message}`,
1744
+ ref: winner.ref,
1745
+ exitCode: null,
1746
+ }
1747
+ : { ...winner, proposal: persistResult };
1748
+ }
1749
+ else {
1750
+ reflectResult = winner;
1751
+ }
1752
+ }
1753
+ else {
1754
+ reflectResult = await reflectFn(reflectCallArgs);
1755
+ }
1756
+ const isCooldown = !reflectResult.ok && reflectResult.reason === "cooldown";
1757
+ // Content-policy guard hits (reflect size-rail rejections) are NOT
1758
+ // LLM faults — the agent responded fine, the downstream guard
1759
+ // blocked the output. Route them to a distinct `reflect-guard-rejected`
1760
+ // mode so health metrics can split deterministic guard hits out of
1761
+ // true LLM failures. See
1762
+ // `/tmp/akm-health-investigations/metrics-taxonomy-review.md` §1a.
1763
+ const isGuardReject = !reflectResult.ok && reflectResult.reason === "content_policy_reject";
1764
+ // Type-guard rejection (reflect refused a script/vault/task ref) is
1765
+ // also NOT an LLM failure — the LLM is never invoked. Route to the
1766
+ // existing `reflect-skipped` bucket so it does not inflate the
1767
+ // failure-rate numerator. ~9% of `reflect-failed` events in the
1768
+ // user's stack were this case; see review §1a row "Reflect refused
1769
+ // asset type".
1770
+ const isTypeRefused = !reflectResult.ok && reflectResult.reason === "unsupported_type";
1771
+ actions.push({
1772
+ ref: planned.ref,
1773
+ mode: reflectResult.ok
1774
+ ? "reflect"
1775
+ : isCooldown
1776
+ ? "reflect-cooldown"
1777
+ : isGuardReject
1778
+ ? "reflect-guard-rejected"
1779
+ : isTypeRefused
1780
+ ? "reflect-skipped"
1781
+ : "reflect-failed",
1782
+ result: reflectResult,
1783
+ });
1784
+ // Cooldown skips, guard rejects, and type-refused skips are not
1785
+ // failures — do not pollute recentErrors with them (those get
1786
+ // injected as `avoidPatterns` into the next reflect prompt). Guard
1787
+ // rejects ARE worth showing the LLM as a learn-signal so the next
1788
+ // iteration sees "your last expansion was too large"; type-refused
1789
+ // is deterministic and adds no learning signal.
1790
+ if (!reflectResult.ok && !isCooldown && !isTypeRefused) {
1791
+ const errMsg = reflectResult.error ?? reflectResult.reason ?? "unknown reflect error";
1792
+ pushRecentError("reflect", errMsg);
1793
+ }
1794
+ // improve_reflect_outcome — per-asset metric for tuning the reflect path.
1795
+ appendEvent({
1796
+ eventType: "improve_reflect_outcome",
1797
+ ref: planned.ref,
1798
+ metadata: {
1799
+ ok: reflectResult.ok,
1800
+ durationMs: reflectResult.ok ? reflectResult.durationMs : undefined,
1801
+ agentProfile: reflectResult.ok ? reflectResult.agentProfile : undefined,
1802
+ reason: reflectResult.ok ? undefined : reflectResult.reason,
1803
+ },
1804
+ }, eventsCtx);
1805
+ if (reflectResult.ok) {
1806
+ gateAutoAcceptedCount += (await runAutoAcceptGate([{ proposalId: reflectResult.proposal.id, confidence: reflectResult.proposal.confidence }], reflectGateCfg)).promoted.length;
1807
+ }
1808
+ } // end else (reflect type/profile check)
1809
+ }
1810
+ else if (!isDistillOnly && planned.ref.endsWith(".derived")) {
1811
+ // B6: .derived refs skip reflect; record synthetic skip action.
801
1812
  actions.push({
802
1813
  ref: planned.ref,
803
1814
  mode: "distill-skipped",
804
- result: { ok: true, reason: "distill cooldown" },
1815
+ result: { ok: true, reason: "derived-memory-reflect-skipped" },
805
1816
  });
806
- completedCount++;
807
1817
  appendEvent({
808
1818
  eventType: "improve_skipped",
809
1819
  ref: planned.ref,
810
- metadata: { reason: "distill_cooldown", cooldownDays: DISTILL_COOLDOWN_DAYS },
811
- });
812
- info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref} (distill cooldown)`);
813
- continue;
814
- }
815
- if (recentErrors.length > 0)
816
- crossStepErrorsInjected++;
817
- const reflectResult = await reflectFn({
818
- ref: planned.ref,
819
- task: options.task,
820
- ...(options.stashDir ? { stashDir: options.stashDir } : {}),
821
- ...(recentErrors.length > 0 ? { avoidPatterns: [...recentErrors] } : {}),
822
- agentProcess: options.agentProcess ?? "reflect",
823
- });
824
- actions.push({ ref: planned.ref, mode: "reflect", result: reflectResult });
825
- if (!reflectResult.ok) {
826
- const errMsg = reflectResult.error ?? reflectResult.reason ?? "unknown reflect error";
827
- recentErrors.push(errMsg);
828
- if (recentErrors.length > RECENT_ERRORS_CAP)
829
- recentErrors.shift();
1820
+ metadata: { reason: "derived_memory_reflect_skipped" },
1821
+ }, eventsCtx);
830
1822
  }
831
- const parsedPlannedRef = parseAssetRef(planned.ref);
1823
+ // isDistillOnly refs: no reflect action emitted — proceed directly to distill path below.
832
1824
  const hasRecentFeedbackSignal = signalBearingSet.has(planned.ref);
833
1825
  const explicitRefScope = scope.mode === "ref";
834
- const shouldAttemptDistill = isLessonCandidate(planned.ref) || shouldDistillMemoryRef(planned.ref, options.stashDir);
835
- const skipMemoryDistillForWeakSignal = parsedPlannedRef.type === "memory" && !hasRecentFeedbackSignal && !explicitRefScope;
836
- if (shouldAttemptDistill && !skipMemoryDistillForWeakSignal) {
1826
+ // Profile gate: apply the full type-filter / raw-wiki / disabled rules to
1827
+ // distill so callers who configure `profile.processes.distill.allowedTypes`
1828
+ // or land on raw-wiki refs get a recorded skip action instead of silently
1829
+ // proceeding.
1830
+ const distillSkip = shouldSkipRef(planned.ref, "distill", improveProfile);
1831
+ if (distillSkip.skip) {
1832
+ actions.push({
1833
+ ref: planned.ref,
1834
+ mode: "distill-skipped",
1835
+ result: { ok: true, reason: distillSkip.reason },
1836
+ });
1837
+ completedCount++;
1838
+ info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
1839
+ continue;
1840
+ }
1841
+ // See `isDistillCandidateRef` — excludes `lesson:*` (and anything else in
1842
+ // DISTILL_REFUSED_INPUT_TYPES) so distill never gets queued for an input
1843
+ // it will refuse.
1844
+ const shouldAttemptDistill = isDistillCandidateRef(planned.ref, options.stashDir);
1845
+ const skipMemoryDistillForWeakSignal = !isDistillOnly && parsedPlannedRef.type === "memory" && !hasRecentFeedbackSignal && !explicitRefScope;
1846
+ // distillCooledRefs guard: pre-filter emitted synthetic actions for distill-candidate
1847
+ // refs; non-candidate refs in the set are blocked here.
1848
+ // O-2 (#365): bypass the distill cooldown when the user explicitly targeted
1849
+ // this ref via --scope — their intent overrides unattended-run policies.
1850
+ if (shouldAttemptDistill &&
1851
+ !skipMemoryDistillForWeakSignal &&
1852
+ (!distillCooledRefs.has(planned.ref) || explicitRefScope)) {
1853
+ // TODO(refactor): single call site needs both lesson+knowledge refs for proposal dedup. If a third target ref type is added, extract deriveAllTargetRefs(inputRef): string[].
837
1854
  const lessonRef = deriveLessonRef(planned.ref);
1855
+ const knowledgeRef = deriveKnowledgeRef(planned.ref);
838
1856
  const dedupeStashDir = primaryStashDir ?? options.stashDir;
839
1857
  if (dedupeStashDir) {
840
- const existingProposals = listProposals(dedupeStashDir, { ref: lessonRef });
841
- if (existingProposals.some((p) => p.status === "pending")) {
1858
+ // B2: check both lesson ref and knowledge ref since auto-promoted memories
1859
+ // create knowledge: proposals, not lesson: proposals.
1860
+ const hasExistingPending = pendingProposalRefSet.has(lessonRef) || pendingProposalRefSet.has(knowledgeRef);
1861
+ if (hasExistingPending) {
842
1862
  actions.push({
843
1863
  ref: planned.ref,
844
1864
  mode: "distill-skipped",
845
1865
  result: { ok: true, reason: "pending proposal exists" },
846
1866
  });
1867
+ appendEvent({
1868
+ eventType: "improve_skipped",
1869
+ ref: planned.ref,
1870
+ metadata: { reason: "pending_proposal_exists" },
1871
+ }, eventsCtx);
847
1872
  completedCount++;
848
1873
  info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
849
1874
  continue;
850
1875
  }
1876
+ // D-2 (#370): reject-aware cooldown for distill. When the reviewer
1877
+ // recently rejected a distilled lesson or knowledge proposal for this
1878
+ // asset, skip re-distillation for a 1-day grace window. Prevents the
1879
+ // same rejected proposal from being regenerated immediately. The
1880
+ // window is fixed (the 0.8.0 redesign moved per-ref cooldowns to
1881
+ // signal-delta gates and dropped --distill-cooldown-days; a short
1882
+ // reject grace is preserved here so a fresh rejection isn't
1883
+ // overridden by the same run).
1884
+ // References: ExpeL arXiv:2308.10144, STaR arXiv:2203.14465.
1885
+ const DISTILL_REJECT_COOLDOWN_MS = daysToMs(1);
1886
+ const recentlyRejectedLesson = !explicitRefScope && // O-2: bypass when --scope <ref> is explicit
1887
+ (rejectedProposalsByRef.has(lessonRef) || rejectedProposalsByRef.has(knowledgeRef));
1888
+ if (recentlyRejectedLesson) {
1889
+ const rejectedEntry = rejectedProposalsByRef.get(lessonRef) ?? rejectedProposalsByRef.get(knowledgeRef);
1890
+ const rejectedAgeMs = rejectedEntry ? Date.now() - new Date(rejectedEntry.ts).getTime() : 0;
1891
+ if (rejectedAgeMs < DISTILL_REJECT_COOLDOWN_MS) {
1892
+ actions.push({
1893
+ ref: planned.ref,
1894
+ mode: "distill-skipped",
1895
+ result: { ok: true, reason: "distill reject grace window" },
1896
+ });
1897
+ appendEvent({
1898
+ eventType: "improve_skipped",
1899
+ ref: planned.ref,
1900
+ metadata: {
1901
+ reason: "distill_reject_grace_window",
1902
+ },
1903
+ }, eventsCtx);
1904
+ completedCount++;
1905
+ info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
1906
+ continue;
1907
+ }
1908
+ }
851
1909
  }
852
1910
  const distillResult = await distillFn({
853
1911
  ref: planned.ref,
@@ -855,6 +1913,9 @@ async function runImproveLoopStage(args) {
855
1913
  ...(options.stashDir ? { stashDir: options.stashDir } : {}),
856
1914
  });
857
1915
  actions.push({ ref: planned.ref, mode: "distill", result: distillResult });
1916
+ if (distillResult.outcome === "queued" && distillResult.proposal) {
1917
+ gateAutoAcceptedCount += (await runAutoAcceptGate([{ proposalId: distillResult.proposal.id, confidence: distillResult.proposal.confidence }], distillGateCfg)).promoted.length;
1918
+ }
858
1919
  if (parsedPlannedRef.type === "memory") {
859
1920
  const promotedToKnowledge = distillResult.outcome === "queued" && distillResult.proposalKind === "knowledge";
860
1921
  if (!promotedToKnowledge)
@@ -874,17 +1935,18 @@ async function runImproveLoopStage(args) {
874
1935
  slug: `${slug}-${Date.now()}`,
875
1936
  });
876
1937
  }
877
- const rejectedProposals = readEvents({ type: "proposal_rejected", ref: planned.ref }).events.filter((e) => new Date(e.ts).getTime() >= Date.now() - 30 * 24 * 60 * 60 * 1000);
878
- if (rejectedProposals.length > 0 && primaryStashDir) {
1938
+ // D6: use pre-loaded map instead of per-iteration DB query
1939
+ const rejectedProposalEvent = rejectedProposalsByRef.get(planned.ref);
1940
+ if (rejectedProposalEvent && primaryStashDir) {
879
1941
  const slug = planned.ref
880
1942
  .replace(/[^a-z0-9]/gi, "-")
881
1943
  .toLowerCase()
882
1944
  .slice(0, 60);
883
1945
  writeEvalCase(primaryStashDir, {
884
1946
  ref: planned.ref,
885
- failureReason: rejectedProposals[0].metadata?.reason ?? "proposal rejected",
1947
+ failureReason: rejectedProposalEvent.metadata?.reason ?? "proposal rejected",
886
1948
  assetType: parseAssetRef(planned.ref).type ?? "unknown",
887
- rejectedAt: new Date(rejectedProposals[0].ts).getTime(),
1949
+ rejectedAt: new Date(rejectedProposalEvent.ts).getTime(),
888
1950
  source: "proposal_rejected",
889
1951
  slug: `${slug}-rejected`,
890
1952
  });
@@ -900,51 +1962,111 @@ async function runImproveLoopStage(args) {
900
1962
  eventType: "improve_skipped",
901
1963
  ref: planned.ref,
902
1964
  metadata: { reason: "memory_distill_requires_feedback" },
903
- });
1965
+ }, eventsCtx);
904
1966
  }
905
1967
  }
906
1968
  catch (err) {
907
- actions.push({
908
- ref: planned.ref,
909
- mode: "error",
910
- result: { ok: false, error: err instanceof Error ? err.message : String(err) },
911
- });
1969
+ // B7: UsageError thrown by akmDistill on validation_failed should be recorded
1970
+ // as mode:"distill" with outcome:"validation_failed", NOT as a generic error.
1971
+ // The distill_invoked event was already emitted inside akmDistill before the throw.
1972
+ if (err instanceof UsageError) {
1973
+ actions.push({
1974
+ ref: planned.ref,
1975
+ mode: "distill",
1976
+ result: { ok: false, outcome: "validation_failed", error: err.message },
1977
+ });
1978
+ }
1979
+ else {
1980
+ actions.push({
1981
+ ref: planned.ref,
1982
+ mode: "error",
1983
+ result: { ok: false, error: err instanceof Error ? err.message : String(err) },
1984
+ });
1985
+ }
912
1986
  }
913
1987
  completedCount++;
914
1988
  info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
915
1989
  }
916
- return { crossStepErrorsInjected, memoryRefsForInference };
1990
+ return { reflectsWithErrorContext, memoryRefsForInference, gateAutoAcceptedCount };
917
1991
  }
918
1992
  async function runImprovePostLoopStage(args) {
919
- const { scope, options, primaryStashDir, actionableRefs, appliedCleanup, cleanupWarnings, memorySummary, memoryRefsForInference, reindexFn, } = args;
1993
+ const { scope, options, primaryStashDir, actionableRefs, appliedCleanup, cleanupWarnings, memorySummary, memoryRefsForInference, reindexFn, eventsCtx, budgetSignal, improveProfile, } = args;
920
1994
  const allWarnings = [...cleanupWarnings, ...(appliedCleanup?.warnings ?? [])];
921
1995
  const baseConfig = options.config ?? loadConfig();
922
1996
  const MEMORY_VOLUME_THRESHOLD = options.memoryVolumeConsolidationThreshold ?? 100;
923
- const hasLlm = !!(baseConfig.llm || baseConfig.agent);
1997
+ const hasLlm = !!(baseConfig.defaults?.llm || baseConfig.defaults?.agent);
924
1998
  const volumeTriggered = typeof memorySummary.eligible === "number" && memorySummary.eligible > MEMORY_VOLUME_THRESHOLD && hasLlm;
1999
+ // When volume triggers a consolidation pass, force-enable the consolidate
2000
+ // process on the default improve profile so the gate accepts the run even
2001
+ // if the user's config disabled it. We synthesise a new profile override
2002
+ // rather than mutating connection settings.
925
2003
  const consolidationConfig = volumeTriggered
926
2004
  ? {
927
2005
  ...baseConfig,
928
- ...(baseConfig.llm
929
- ? {
930
- llm: {
931
- ...baseConfig.llm,
932
- features: { ...baseConfig.llm.features, memory_consolidation: true },
2006
+ profiles: {
2007
+ ...(baseConfig.profiles ?? {}),
2008
+ improve: {
2009
+ ...(baseConfig.profiles?.improve ?? {}),
2010
+ default: {
2011
+ ...(baseConfig.profiles?.improve?.default ?? {}),
2012
+ processes: {
2013
+ ...(baseConfig.profiles?.improve?.default?.processes ?? {}),
2014
+ consolidate: {
2015
+ ...(baseConfig.profiles?.improve?.default?.processes?.consolidate ?? {}),
2016
+ enabled: true,
2017
+ },
2018
+ },
933
2019
  },
934
- }
935
- : {}),
2020
+ },
2021
+ },
936
2022
  }
937
2023
  : baseConfig;
938
- const consolidateCooldownDays = options.consolidateCooldownDays ?? 14;
939
- const CONSOLIDATE_COOLDOWN_MS = consolidateCooldownDays * 24 * 60 * 60 * 1000;
2024
+ // 0.8.0 pool-delta gate for consolidate: re-eligible iff at least one
2025
+ // memory file has been updated since the most recent successful
2026
+ // consolidate_completed event. Time-based cooldowns produced the same
2027
+ // synchronised-wave failure mode the reflect/distill cooldowns did; the
2028
+ // pool-delta gate ties consolidation to actual work-to-do.
940
2029
  const recentConsolidations = readEvents({ type: "consolidate_completed" });
941
2030
  const lastConsolidation = recentConsolidations.events
942
2031
  .filter((e) => e.metadata?.processed && Number(e.metadata.processed) > 0)
943
2032
  .sort((a, b) => new Date(b.ts ?? 0).getTime() - new Date(a.ts ?? 0).getTime())[0];
944
- const consolidationOnCooldown = !volumeTriggered &&
945
- consolidateCooldownDays > 0 &&
946
- lastConsolidation?.ts &&
947
- Date.now() - new Date(lastConsolidation.ts).getTime() < CONSOLIDATE_COOLDOWN_MS;
2033
+ const lastConsolidateTs = lastConsolidation?.ts;
2034
+ // Pool-delta: any memory file with mtime > lastConsolidateTs flags work to do.
2035
+ // Using file mtime keeps this query DB-free and matches what the indexer
2036
+ // already uses as the canonical `memory.updated_at` proxy.
2037
+ //
2038
+ // Bootstrap: when no successful consolidate_completed event has ever been
2039
+ // recorded, we cannot evaluate the pool-delta — treat as eligible so a
2040
+ // fresh stash runs consolidate once before the steady-state gate kicks in.
2041
+ const memoryUpdatedAfterLastConsolidate = (() => {
2042
+ if (volumeTriggered)
2043
+ return true; // volume override forces the run regardless.
2044
+ if (!lastConsolidateTs)
2045
+ return true; // bootstrap path: never consolidated.
2046
+ if (!primaryStashDir)
2047
+ return false;
2048
+ const memoriesDir = path.join(primaryStashDir, "memories");
2049
+ if (!fs.existsSync(memoriesDir))
2050
+ return false;
2051
+ try {
2052
+ return fs.readdirSync(memoriesDir).some((f) => {
2053
+ if (!f.endsWith(".md"))
2054
+ return false;
2055
+ try {
2056
+ return fs.statSync(path.join(memoriesDir, f)).mtime.toISOString() > lastConsolidateTs;
2057
+ }
2058
+ catch {
2059
+ return false;
2060
+ }
2061
+ });
2062
+ }
2063
+ catch {
2064
+ return false;
2065
+ }
2066
+ })();
2067
+ const consolidationOnCooldown = !volumeTriggered && !memoryUpdatedAfterLastConsolidate;
2068
+ // Profile gate: if profile explicitly disables consolidate, skip the entire pass.
2069
+ const consolidateDisabledByProfile = improveProfile?.processes?.consolidate?.enabled === false;
948
2070
  let consolidation = {
949
2071
  schemaVersion: 1,
950
2072
  ok: true,
@@ -956,38 +2078,81 @@ async function runImprovePostLoopStage(args) {
956
2078
  merged: 0,
957
2079
  deleted: 0,
958
2080
  promoted: [],
2081
+ contradicted: 0,
959
2082
  warnings: [],
960
2083
  durationMs: 0,
961
2084
  };
962
- if (!consolidationOnCooldown) {
2085
+ let gateAutoAcceptedCount = 0;
2086
+ const consolidateGateCfg = makeGateConfig("consolidate", {
2087
+ globalThreshold: options.autoAccept,
2088
+ dryRun: options.dryRun ?? false,
2089
+ stashDir: primaryStashDir,
2090
+ config: consolidationConfig,
2091
+ eventsCtx,
2092
+ }, { minimumThreshold: 95 });
2093
+ if (consolidateDisabledByProfile) {
2094
+ info("[improve] consolidation skipped (disabled by improve profile)");
2095
+ }
2096
+ else if (!consolidationOnCooldown) {
963
2097
  consolidation = await akmConsolidate({
964
2098
  ...options.consolidateOptions,
965
2099
  config: consolidationConfig,
966
2100
  stashDir: options.stashDir,
967
2101
  autoTriggered: volumeTriggered,
968
- autoAccept: "safe",
2102
+ // Tie consolidate proposals back to this improve invocation so
2103
+ // accept-rate-per-run aggregation works. Mirrors reflect/propose/extract.
2104
+ sourceRun: `consolidate-${Date.now()}`,
2105
+ // Incremental consolidation: pass the last-consolidation timestamp so
2106
+ // akmConsolidate skips chunks with no memory changed since then. Converts
2107
+ // consolidation cost from O(pool) to O(changed clusters) — the fix for
2108
+ // the rising p95 tail where full-pool re-judging produced 5–10 min runs
2109
+ // that promoted ~0. undefined → full pass on first-ever run (bootstrap).
2110
+ // volumeTriggered correctly forces the run past cooldown but must NOT
2111
+ // override incrementalSince — the stash has ~1400 eligible memories so
2112
+ // volumeTriggered=true on every run, permanently forcing full 12-chunk
2113
+ // scans (~264s) instead of the intended 1-2 chunk incremental path (~44s).
2114
+ incrementalSince: lastConsolidateTs,
2115
+ maxChunkSize: improveProfile?.processes?.consolidate?.maxChunkSize,
2116
+ // Honor profile.autoAccept (already merged into options.autoAccept at the
2117
+ // top of akmImprove). The CLI parser always supplies 90 when --auto-accept
2118
+ // is absent, so ?? 90 is not needed here and would prevent --auto-accept=false
2119
+ // (which maps to undefined) from disabling consolidation auto-accept.
2120
+ // options.consolidateOptions.autoAccept (if explicitly provided by caller)
2121
+ // still wins because the spread above runs first.
2122
+ autoAccept: options.consolidateOptions?.autoAccept ?? options.autoAccept,
969
2123
  });
2124
+ gateAutoAcceptedCount += (await runAutoAcceptGate(consolidation.promoted.map((proposalId) => {
2125
+ try {
2126
+ if (!primaryStashDir)
2127
+ return { proposalId, confidence: undefined };
2128
+ const proposal = getProposal(primaryStashDir, proposalId);
2129
+ return { proposalId, confidence: proposal.confidence };
2130
+ }
2131
+ catch {
2132
+ return { proposalId, confidence: undefined };
2133
+ }
2134
+ }), consolidateGateCfg)).promoted.length;
970
2135
  if (consolidation.processed > 0) {
971
2136
  appendEvent({
972
2137
  eventType: "consolidate_completed",
973
2138
  ref: "memory:_consolidation",
974
2139
  metadata: { processed: consolidation.processed, merged: consolidation.merged },
975
- });
2140
+ }, eventsCtx);
976
2141
  }
977
2142
  }
978
2143
  else {
979
- const daysAgo = Math.round((Date.now() - new Date(lastConsolidation?.ts ?? 0).getTime()) / 86400000);
980
2144
  appendEvent({
981
2145
  eventType: "improve_skipped",
982
2146
  ref: "memory:_consolidation",
983
2147
  metadata: {
984
- reason: "consolidation_cooldown",
985
- cooldownDays: 14,
2148
+ reason: "consolidation_no_memory_updates",
986
2149
  lastEventTs: lastConsolidation?.ts ?? null,
987
2150
  },
988
- });
989
- info(`[improve] consolidation skipped (last ran ${daysAgo}d ago, cooldown 14d)`);
2151
+ }, eventsCtx);
2152
+ info("[improve] consolidation skipped (no memory updates since last run)");
990
2153
  }
2154
+ // D9: track whether consolidation wrote any data so graph extraction can reindex if needed
2155
+ const consolidationRan = !consolidateDisabledByProfile && !consolidationOnCooldown && consolidation.processed > 0;
991
2156
  info("[improve] post-loop maintenance starting");
992
2157
  const maintenanceResult = await runImproveMaintenancePasses({
993
2158
  options,
@@ -996,6 +2161,11 @@ async function runImprovePostLoopStage(args) {
996
2161
  memoryRefsForInference,
997
2162
  allWarnings,
998
2163
  reindexFn,
2164
+ consolidationRan,
2165
+ // O-1 (#364): forward the budget signal to memory inference + graph extraction.
2166
+ budgetSignal,
2167
+ eventsCtx,
2168
+ improveProfile,
999
2169
  });
1000
2170
  let deadUrls;
1001
2171
  if (scope.mode === "all" && primaryStashDir && actionableRefs.length > 0) {
@@ -1027,39 +2197,74 @@ async function runImprovePostLoopStage(args) {
1027
2197
  deadUrls,
1028
2198
  ...(maintenanceResult.memoryInference ? { memoryInference: maintenanceResult.memoryInference } : {}),
1029
2199
  ...(maintenanceResult.graphExtraction ? { graphExtraction: maintenanceResult.graphExtraction } : {}),
2200
+ ...(maintenanceResult.stalenessDetection ? { stalenessDetection: maintenanceResult.stalenessDetection } : {}),
1030
2201
  ...(maintenanceResult.actions && maintenanceResult.actions.length > 0
1031
2202
  ? { maintenanceActions: maintenanceResult.actions }
1032
2203
  : {}),
2204
+ memoryInferenceDurationMs: maintenanceResult.memoryInferenceDurationMs,
2205
+ graphExtractionDurationMs: maintenanceResult.graphExtractionDurationMs,
2206
+ orphansPurged: maintenanceResult.orphansPurged,
2207
+ proposalsExpired: maintenanceResult.proposalsExpired,
2208
+ gateAutoAcceptedCount,
1033
2209
  };
1034
2210
  }
2211
+ // TODO(refactor): mutates the passed-in `allWarnings` array as a hidden side channel. Return warnings in ImproveMaintenanceResult and merge in caller — invasive signature change deferred to next refactor pass.
1035
2212
  async function runImproveMaintenancePasses(args) {
1036
- const { options, primaryStashDir, memoryRefsForInference, allWarnings, reindexFn } = args;
2213
+ const { options, primaryStashDir, memoryRefsForInference, allWarnings, reindexFn, consolidationRan, budgetSignal, eventsCtx, improveProfile, } = args;
1037
2214
  if (!primaryStashDir)
1038
- return {};
2215
+ return { memoryInferenceDurationMs: 0, graphExtractionDurationMs: 0 };
1039
2216
  const config = options.config ?? loadConfig();
1040
2217
  const sources = resolveSourceEntries(options.stashDir, config);
1041
2218
  const memoryInferenceFn = options.memoryInferenceFn ?? runMemoryInferencePass;
1042
2219
  const graphExtractionFn = options.graphExtractionFn ?? runGraphExtractionPass;
2220
+ const stalenessDetectionFn = options.stalenessDetectionFn ?? runStalenessDetectionPass;
1043
2221
  let db;
1044
2222
  let memoryInference;
1045
2223
  let graphExtraction;
2224
+ let stalenessDetection;
1046
2225
  let reindexedAfterInference = false;
1047
2226
  const actions = [];
2227
+ let memoryInferenceDurationMs = 0;
2228
+ let graphExtractionDurationMs = 0;
2229
+ let orphansPurged = 0;
2230
+ let proposalsExpired = 0;
1048
2231
  try {
1049
2232
  db = openDatabase(getDbPath(), config.embedding?.dimension ? { embeddingDim: config.embedding.dimension } : undefined);
1050
- if (memoryRefsForInference.size > 0) {
1051
- info(`[improve] memory inference starting (${memoryRefsForInference.size} candidate refs)`);
2233
+ // Memory inference candidate-discovery (post-Item 9 fix from
2234
+ // memory:akm-improve-critical-review-2026-05-20). Previously this pass
2235
+ // was gated on memoryRefsForInference.size > 0 AND passed those refs as a
2236
+ // candidateRefs filter. But memoryRefsForInference is populated from refs
2237
+ // distilled THIS RUN — by the time that happens, those parents are
2238
+ // already split (`inferenceProcessed: true`) and `isPendingMemory` excludes
2239
+ // them. The genuinely-pending parents in the stash never entered the
2240
+ // filter. Result: 0/0/0 for 25 consecutive runs.
2241
+ //
2242
+ // Fix: always run the pass when the feature is enabled; let the pass's
2243
+ // own `collectPendingMemories` + `isPendingMemory` predicate find
2244
+ // candidates from the filesystem-of-truth. The this-run set is still
2245
+ // logged as a hint but no longer used as a filter.
2246
+ const memoryInferenceDisabledByProfile = improveProfile?.processes?.memoryInference?.enabled === false;
2247
+ if (memoryInferenceDisabledByProfile) {
2248
+ info("[improve] memory inference skipped (disabled by improve profile)");
2249
+ }
2250
+ else {
2251
+ const hintRefs = memoryRefsForInference.size;
2252
+ info(hintRefs > 0
2253
+ ? `[improve] memory inference starting (${hintRefs} hint refs touched this run; pass discovers all pending)`
2254
+ : "[improve] memory inference starting (discovering pending parents)");
2255
+ const inferenceStart = Date.now();
1052
2256
  try {
1053
- memoryInference = await memoryInferenceFn(config, sources, undefined, db, false, (event) => {
2257
+ // O-1 (#364): pass budget signal so a hung inference call is cancelled.
2258
+ memoryInference = await memoryInferenceFn(config, sources, budgetSignal, db, false, (event) => {
1054
2259
  const current = event.currentRef ? ` ${event.currentRef}` : "";
1055
2260
  info(`[improve] memory inference ${event.processed}/${event.total}${current} (written ${event.writtenFacts}, skipped ${event.skippedNoFacts})`);
1056
- }, {
1057
- candidateRefs: memoryRefsForInference,
1058
2261
  });
2262
+ memoryInferenceDurationMs = Date.now() - inferenceStart;
1059
2263
  actions.push({ ref: "memory:_inference", mode: "memory-inference", result: memoryInference });
1060
2264
  info(`[improve] memory inference complete (${memoryInference.writtenFacts} facts written from ${memoryInference.splitParents} parents)`);
1061
2265
  }
1062
2266
  catch (err) {
2267
+ memoryInferenceDurationMs = Date.now() - inferenceStart;
1063
2268
  allWarnings.push(`memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
1064
2269
  }
1065
2270
  }
@@ -1074,24 +2279,203 @@ async function runImproveMaintenancePasses(args) {
1074
2279
  allWarnings.push(`reindex after memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
1075
2280
  }
1076
2281
  }
1077
- if (sources.length > 0) {
1078
- info("[improve] graph extraction starting");
2282
+ const graphEnabled = isProcessEnabled("index", "graph_extraction", config);
2283
+ const graphExtractionDisabledByProfile = improveProfile?.processes?.graphExtraction?.enabled === false;
2284
+ const graphExtractionFullScan = improveProfile?.processes?.graphExtraction?.fullScan === true;
2285
+ // Build the set of refs actually touched this run.
2286
+ const touchedRefs = new Set();
2287
+ for (const r of args.actionableRefs)
2288
+ touchedRefs.add(r.ref);
2289
+ for (const r of memoryRefsForInference)
2290
+ touchedRefs.add(r);
2291
+ // INVARIANT: graph extraction normally runs only on files touched by
2292
+ // actionable refs (candidatePaths). Full-corpus scans are opt-in via
2293
+ // profile.processes.graphExtraction.fullScan = true (used by the
2294
+ // `graph-refresh` built-in profile and its weekly scheduled task).
2295
+ // The empty-Set fallback is intentional when no refs were touched —
2296
+ // the extractor's filter rejects every file and returns empty, keeping
2297
+ // the pass invoked so the action is recorded and tests stay exercised.
2298
+ if (graphExtractionDisabledByProfile) {
2299
+ info("[improve] graph extraction skipped (disabled by improve profile)");
2300
+ }
2301
+ else if (sources.length > 0 && graphEnabled) {
2302
+ info(`[improve] graph extraction starting${graphExtractionFullScan ? " (full-corpus scan)" : ""}`);
2303
+ const extractionStart = Date.now();
1079
2304
  try {
2305
+ // D9: if consolidation ran but memory inference did not reindex, force a reindex
2306
+ // so graph extraction sees current DB state after consolidation writes.
2307
+ if (consolidationRan && !reindexedAfterInference) {
2308
+ info("[improve] reindexing after consolidation (graph extraction needs current state)");
2309
+ try {
2310
+ await reindexFn({ stashDir: primaryStashDir });
2311
+ reindexedAfterInference = true;
2312
+ info("[improve] reindex after consolidation complete");
2313
+ }
2314
+ catch (err) {
2315
+ allWarnings.push(`reindex after consolidation failed: ${err instanceof Error ? err.message : String(err)}`);
2316
+ }
2317
+ }
1080
2318
  if (db && reindexedAfterInference) {
1081
2319
  closeDatabase(db);
1082
2320
  db = openDatabase(getDbPath(), config.embedding?.dimension ? { embeddingDim: config.embedding.dimension } : undefined);
1083
2321
  }
1084
- graphExtraction = await graphExtractionFn(config, sources, undefined, db, false, (event) => {
2322
+ // Resolve touched refs to absolute file paths. Skipped for fullScan
2323
+ // (candidatePaths stays undefined → extractor processes all files).
2324
+ let candidatePaths;
2325
+ if (!graphExtractionFullScan) {
2326
+ candidatePaths = new Set();
2327
+ if (primaryStashDir && touchedRefs.size > 0) {
2328
+ const writableDirSet = new Set(getWritableStashDirs(primaryStashDir).map((d) => path.resolve(d)));
2329
+ const resolved = await Promise.all([...touchedRefs].map((ref) => findAssetFilePath(ref, primaryStashDir, writableDirSet).catch(() => null)));
2330
+ for (const p of resolved) {
2331
+ if (typeof p === "string" && p.length > 0)
2332
+ candidatePaths.add(p);
2333
+ }
2334
+ }
2335
+ }
2336
+ const progressHandler = (event) => {
1085
2337
  const current = event.currentPath ? ` ${path.basename(event.currentPath)}` : "";
1086
2338
  info(`[improve] graph extraction ${event.processed}/${event.total}${current} (extracted ${event.extracted}, entities ${event.totalEntities}, relations ${event.totalRelations})`);
2339
+ };
2340
+ // O-1 (#364): pass budget signal so a hung graph extraction call is cancelled.
2341
+ graphExtraction = await graphExtractionFn(config, sources, budgetSignal, db, false, progressHandler, {
2342
+ candidatePaths,
1087
2343
  });
2344
+ graphExtractionDurationMs = Date.now() - extractionStart;
1088
2345
  actions.push({ ref: "graph:_artifact", mode: "graph-extraction", result: graphExtraction });
1089
2346
  info(`[improve] graph extraction complete (${graphExtraction.quality.extractedFiles} files, ${graphExtraction.quality.entityCount} entities, ${graphExtraction.quality.relationCount} relations)`);
1090
2347
  }
1091
2348
  catch (err) {
2349
+ graphExtractionDurationMs = Date.now() - extractionStart;
1092
2350
  allWarnings.push(`graph extraction failed: ${err instanceof Error ? err.message : String(err)}`);
1093
2351
  }
1094
2352
  }
2353
+ else if (sources.length > 0 && !graphEnabled) {
2354
+ info("[improve] graph extraction skipped (features.index.graph_extraction is disabled)");
2355
+ }
2356
+ // Orphan proposal purge — reject pending reflect proposals whose target
2357
+ // asset no longer exists on disk. Runs after graph extraction so newly
2358
+ // promoted assets from accept flows during this run are already present.
2359
+ if (primaryStashDir) {
2360
+ try {
2361
+ const purgeResult = purgeOrphanProposals(primaryStashDir, sources.map((s) => s.path));
2362
+ orphansPurged = purgeResult.rejected;
2363
+ if (purgeResult.rejected > 0) {
2364
+ info(`[improve] orphan purge: ${purgeResult.rejected}/${purgeResult.checked} orphaned proposals rejected (${purgeResult.durationMs}ms)`);
2365
+ }
2366
+ appendEvent({
2367
+ eventType: "proposal_orphan_purge",
2368
+ ref: "proposals:_orphan-purge",
2369
+ metadata: {
2370
+ checked: purgeResult.checked,
2371
+ rejected: purgeResult.rejected,
2372
+ durationMs: purgeResult.durationMs,
2373
+ byType: purgeResult.byType,
2374
+ orphans: purgeResult.orphans.map((o) => o.ref),
2375
+ },
2376
+ }, eventsCtx);
2377
+ }
2378
+ catch (err) {
2379
+ allWarnings.push(`orphan purge failed: ${err instanceof Error ? err.message : String(err)}`);
2380
+ }
2381
+ // Phase 6B (Advantage D6b): expire pending proposals that have aged past
2382
+ // the retention window. Runs AFTER orphan purge so we never double-archive
2383
+ // a proposal that orphan-purge already moved. `expireStaleProposals` emits
2384
+ // its own per-proposal `proposal_expired` events; we additionally emit a
2385
+ // single roll-up event here for parity with the orphan-purge surface.
2386
+ try {
2387
+ const expireResult = expireStaleProposals(primaryStashDir, config);
2388
+ proposalsExpired = expireResult.expired;
2389
+ if (expireResult.expired > 0) {
2390
+ info(`[improve] expiration: ${expireResult.expired}/${expireResult.checked} pending proposals expired ` +
2391
+ `(retention=${expireResult.retentionDays}d, ${expireResult.durationMs}ms)`);
2392
+ }
2393
+ appendEvent({
2394
+ eventType: "proposal_expiration_pass",
2395
+ ref: "proposals:_expiration",
2396
+ metadata: {
2397
+ checked: expireResult.checked,
2398
+ expired: expireResult.expired,
2399
+ durationMs: expireResult.durationMs,
2400
+ retentionDays: expireResult.retentionDays,
2401
+ expiredProposals: expireResult.expiredProposals,
2402
+ },
2403
+ }, eventsCtx);
2404
+ }
2405
+ catch (err) {
2406
+ allWarnings.push(`proposal expiration failed: ${err instanceof Error ? err.message : String(err)}`);
2407
+ }
2408
+ }
2409
+ // Fix #2 (observability 0.8.0): trim the events table in state.db so it
2410
+ // doesn't grow unbounded. `akm health` writes a `health_probe` row on every
2411
+ // invocation, and every command surface emits at least one event besides —
2412
+ // without this trim, state.db is a permanent append-only log. Config key
2413
+ // `improve.eventRetentionDays` (default 90, set 0 to disable) controls the
2414
+ // window. `purgeOldEvents()` opens its own state.db handle separate from
2415
+ // the index `db` above (different SQLite file).
2416
+ {
2417
+ const retentionDays = typeof config.improve?.eventRetentionDays === "number" ? config.improve.eventRetentionDays : 90;
2418
+ if (retentionDays > 0) {
2419
+ let stateDb;
2420
+ try {
2421
+ stateDb = openStateDatabase();
2422
+ const purgedCount = purgeOldEvents(stateDb, retentionDays);
2423
+ if (purgedCount > 0) {
2424
+ info(`[improve] events purge: ${purgedCount} event(s) older than ${retentionDays}d removed from state.db`);
2425
+ }
2426
+ appendEvent({
2427
+ eventType: "events_purged",
2428
+ ref: "events:_purge",
2429
+ metadata: { purgedCount, retentionDays },
2430
+ }, eventsCtx);
2431
+ // improve_runs uses the same retention window as events — both are
2432
+ // observability/audit data, both grow append-only, both have a
2433
+ // dedicated purge helper. Mirroring the events purge here means a
2434
+ // single retention knob (improve.eventRetentionDays) governs both.
2435
+ const improveRunsPurged = purgeOldImproveRuns(stateDb, retentionDays);
2436
+ if (improveRunsPurged > 0) {
2437
+ info(`[improve] improve_runs purge: ${improveRunsPurged} run(s) older than ${retentionDays}d removed from state.db`);
2438
+ }
2439
+ appendEvent({
2440
+ eventType: "improve_runs_purged",
2441
+ ref: "improve_runs:_purge",
2442
+ metadata: { purgedCount: improveRunsPurged, retentionDays },
2443
+ }, eventsCtx);
2444
+ }
2445
+ catch (err) {
2446
+ allWarnings.push(`events purge failed: ${err instanceof Error ? err.message : String(err)}`);
2447
+ }
2448
+ finally {
2449
+ if (stateDb) {
2450
+ try {
2451
+ stateDb.close();
2452
+ }
2453
+ catch {
2454
+ // best-effort
2455
+ }
2456
+ }
2457
+ }
2458
+ }
2459
+ }
2460
+ // Phase 4A (staleness detection). Activates the `deprecated` belief-state
2461
+ // machinery shipped in Phase 1A. Default OFF — gated by
2462
+ // `features.index.staleness_detection.enabled`. Runs after orphan purge
2463
+ // and before the URL check (which lives in the outer caller).
2464
+ if (sources.length > 0) {
2465
+ try {
2466
+ stalenessDetection = await stalenessDetectionFn(config, sources, budgetSignal, db);
2467
+ if (stalenessDetection.considered > 0) {
2468
+ info(`[improve] staleness detection complete (considered ${stalenessDetection.considered}, ` +
2469
+ `deprecated ${stalenessDetection.deprecated}, confirmed ${stalenessDetection.confirmed}, ` +
2470
+ `skipped ${stalenessDetection.skipped}, ${stalenessDetection.durationMs}ms)`);
2471
+ }
2472
+ for (const w of stalenessDetection.warnings)
2473
+ allWarnings.push(`[improve] staleness detection: ${w}`);
2474
+ }
2475
+ catch (err) {
2476
+ allWarnings.push(`staleness detection failed: ${err instanceof Error ? err.message : String(err)}`);
2477
+ }
2478
+ }
1095
2479
  }
1096
2480
  finally {
1097
2481
  if (db)
@@ -1100,7 +2484,12 @@ async function runImproveMaintenancePasses(args) {
1100
2484
  return {
1101
2485
  ...(memoryInference ? { memoryInference } : {}),
1102
2486
  ...(graphExtraction ? { graphExtraction } : {}),
2487
+ ...(stalenessDetection ? { stalenessDetection } : {}),
1103
2488
  ...(actions.length > 0 ? { actions } : {}),
2489
+ memoryInferenceDurationMs,
2490
+ graphExtractionDurationMs,
2491
+ orphansPurged,
2492
+ proposalsExpired,
1104
2493
  };
1105
2494
  }
1106
2495
  function shouldAnalyzeMemoryCleanup(scope, eligibleMemories, primaryStashDir) {
@@ -1141,7 +2530,7 @@ function buildUtilityMap(refs) {
1141
2530
  }
1142
2531
  const ids = [...idToRef.keys()];
1143
2532
  if (ids.length > 0) {
1144
- const scores = getUtilityScoresByIds(db, ids);
2533
+ const { global: scores } = getUtilityScoresByIds(db, ids);
1145
2534
  for (const [id, score] of scores) {
1146
2535
  const ref = idToRef.get(id);
1147
2536
  if (ref)
@@ -1149,7 +2538,8 @@ function buildUtilityMap(refs) {
1149
2538
  }
1150
2539
  }
1151
2540
  }
1152
- catch {
2541
+ catch (err) {
2542
+ rethrowIfTestIsolationError(err);
1153
2543
  // best-effort: if DB unavailable, all utilities default to 0
1154
2544
  }
1155
2545
  finally {