akm-cli 0.7.5 → 0.8.0-rc.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (300) hide show
  1. package/{.github/CHANGELOG.md → CHANGELOG.md} +192 -2
  2. package/README.md +22 -6
  3. package/SECURITY.md +93 -0
  4. package/dist/cli/config-migrate.js +144 -0
  5. package/dist/cli/config-validate.js +39 -0
  6. package/dist/cli/confirm.js +73 -0
  7. package/dist/cli/parse-args.js +133 -0
  8. package/dist/cli/shared.js +129 -0
  9. package/dist/cli.js +2569 -1449
  10. package/dist/commands/add-cli.js +279 -0
  11. package/dist/commands/agent-dispatch.js +110 -0
  12. package/dist/commands/agent-support.js +68 -0
  13. package/dist/commands/completions.js +3 -0
  14. package/dist/commands/config-cli.js +130 -534
  15. package/dist/commands/consolidate.js +2122 -0
  16. package/dist/commands/curate.js +44 -3
  17. package/dist/commands/db-cli.js +23 -0
  18. package/dist/commands/distill-promotion-policy.js +660 -0
  19. package/dist/commands/distill.js +1075 -77
  20. package/dist/commands/env.js +213 -0
  21. package/dist/commands/eval-cases.js +43 -0
  22. package/dist/commands/events.js +5 -23
  23. package/dist/commands/extract-cli.js +127 -0
  24. package/dist/commands/extract-prompt.js +204 -0
  25. package/dist/commands/extract.js +477 -0
  26. package/dist/commands/feedback-cli.js +331 -0
  27. package/dist/commands/graph.js +477 -0
  28. package/dist/commands/health.js +1302 -0
  29. package/dist/commands/help/help-accept.md +12 -0
  30. package/dist/commands/help/help-improve.md +69 -0
  31. package/dist/commands/help/help-proposals.md +18 -0
  32. package/dist/commands/help/help-propose.md +17 -0
  33. package/dist/commands/help/help-reject.md +11 -0
  34. package/dist/commands/history.js +54 -46
  35. package/dist/commands/improve-auto-accept.js +97 -0
  36. package/dist/commands/improve-cli.js +217 -0
  37. package/dist/commands/improve-profiles.js +166 -0
  38. package/dist/commands/improve-result-file.js +167 -0
  39. package/dist/commands/improve.js +2373 -0
  40. package/dist/commands/info.js +5 -2
  41. package/dist/commands/init.js +50 -2
  42. package/dist/commands/installed-stashes.js +102 -139
  43. package/dist/commands/knowledge.js +136 -0
  44. package/dist/commands/lint/agent-linter.js +49 -0
  45. package/dist/commands/lint/base-linter.js +479 -0
  46. package/dist/commands/lint/command-linter.js +49 -0
  47. package/dist/commands/lint/default-linter.js +16 -0
  48. package/dist/commands/lint/env-key-rules.js +154 -0
  49. package/dist/commands/lint/index.js +196 -0
  50. package/dist/commands/lint/knowledge-linter.js +16 -0
  51. package/dist/commands/lint/markdown-insertion.js +343 -0
  52. package/dist/commands/lint/memory-linter.js +61 -0
  53. package/dist/commands/lint/registry.js +36 -0
  54. package/dist/commands/lint/skill-linter.js +45 -0
  55. package/dist/commands/lint/task-linter.js +50 -0
  56. package/dist/commands/lint/types.js +4 -0
  57. package/dist/commands/lint/workflow-linter.js +56 -0
  58. package/dist/commands/lint.js +4 -0
  59. package/dist/commands/migration-help.js +5 -2
  60. package/dist/commands/proposal.js +67 -12
  61. package/dist/commands/propose.js +86 -31
  62. package/dist/commands/reflect.js +1091 -73
  63. package/dist/commands/registry-cli.js +150 -0
  64. package/dist/commands/registry-search.js +5 -2
  65. package/dist/commands/remember-cli.js +257 -0
  66. package/dist/commands/remember.js +69 -6
  67. package/dist/commands/schema-repair.js +203 -0
  68. package/dist/commands/search.js +115 -14
  69. package/dist/commands/secret.js +173 -0
  70. package/dist/commands/self-update.js +3 -0
  71. package/dist/commands/show.js +148 -25
  72. package/dist/commands/source-add.js +17 -45
  73. package/dist/commands/source-clone.js +3 -0
  74. package/dist/commands/source-manage.js +14 -19
  75. package/dist/commands/tasks.js +437 -0
  76. package/dist/commands/url-checker.js +42 -0
  77. package/dist/core/action-contributors.js +28 -0
  78. package/dist/core/asset-ref.js +17 -2
  79. package/dist/core/asset-registry.js +12 -17
  80. package/dist/core/asset-serialize.js +88 -0
  81. package/dist/core/asset-spec.js +67 -1
  82. package/dist/core/common.js +182 -0
  83. package/dist/core/concurrent.js +25 -0
  84. package/dist/core/config-io.js +347 -0
  85. package/dist/core/config-migration.js +622 -0
  86. package/dist/core/config-schema.js +534 -0
  87. package/dist/core/config-sources.js +108 -0
  88. package/dist/core/config-types.js +4 -0
  89. package/dist/core/config-walker.js +337 -0
  90. package/dist/core/config.js +364 -981
  91. package/dist/core/errors.js +42 -20
  92. package/dist/core/events.js +91 -138
  93. package/dist/core/file-lock.js +104 -0
  94. package/dist/core/frontmatter.js +75 -8
  95. package/dist/core/lesson-lint.js +3 -0
  96. package/dist/core/markdown.js +20 -0
  97. package/dist/core/memory-belief.js +62 -0
  98. package/dist/core/memory-contradiction-detect.js +274 -0
  99. package/dist/core/memory-improve.js +806 -0
  100. package/dist/core/parse.js +158 -0
  101. package/dist/core/paths.js +280 -14
  102. package/dist/core/proposal-quality-validators.js +380 -0
  103. package/dist/core/proposal-validators.js +69 -0
  104. package/dist/core/proposals.js +512 -42
  105. package/dist/core/state-db.js +1068 -0
  106. package/dist/core/text-truncation.js +107 -0
  107. package/dist/core/time.js +54 -0
  108. package/dist/core/tty.js +59 -0
  109. package/dist/core/warn.js +64 -1
  110. package/dist/core/write-source.js +3 -0
  111. package/dist/indexer/db-backup.js +391 -0
  112. package/dist/indexer/db-search.js +178 -256
  113. package/dist/indexer/db.js +975 -103
  114. package/dist/indexer/ensure-index.js +64 -0
  115. package/dist/indexer/file-context.js +3 -0
  116. package/dist/indexer/graph-boost.js +376 -101
  117. package/dist/indexer/graph-db.js +391 -0
  118. package/dist/indexer/graph-dedup.js +95 -0
  119. package/dist/indexer/graph-extraction.js +550 -124
  120. package/dist/indexer/index-context.js +4 -0
  121. package/dist/indexer/indexer.js +523 -301
  122. package/dist/indexer/llm-cache.js +52 -0
  123. package/dist/indexer/manifest.js +3 -0
  124. package/dist/indexer/matchers.js +167 -160
  125. package/dist/indexer/memory-inference.js +152 -74
  126. package/dist/indexer/metadata-contributors.js +29 -0
  127. package/dist/indexer/metadata.js +275 -196
  128. package/dist/indexer/path-resolver.js +92 -0
  129. package/dist/indexer/project-context.js +192 -0
  130. package/dist/indexer/ranking-contributors.js +331 -0
  131. package/dist/indexer/ranking.js +81 -0
  132. package/dist/indexer/search-fields.js +5 -9
  133. package/dist/indexer/search-hit-enrichers.js +111 -0
  134. package/dist/indexer/search-source.js +44 -10
  135. package/dist/indexer/semantic-status.js +6 -17
  136. package/dist/indexer/staleness-detect.js +447 -0
  137. package/dist/indexer/usage-events.js +12 -9
  138. package/dist/indexer/walker.js +28 -0
  139. package/dist/integrations/agent/builders.js +135 -0
  140. package/dist/integrations/agent/config.js +122 -230
  141. package/dist/integrations/agent/detect.js +3 -0
  142. package/dist/integrations/agent/index.js +7 -13
  143. package/dist/integrations/agent/model-aliases.js +55 -0
  144. package/dist/integrations/agent/profiles.js +70 -5
  145. package/dist/integrations/agent/prompts.js +214 -80
  146. package/dist/integrations/agent/runner.js +151 -0
  147. package/dist/integrations/agent/sdk-runner.js +126 -0
  148. package/dist/integrations/agent/spawn.js +118 -23
  149. package/dist/integrations/github.js +3 -0
  150. package/dist/integrations/lockfile.js +32 -69
  151. package/dist/integrations/session-logs/index.js +69 -0
  152. package/dist/integrations/session-logs/inline-refs.js +35 -0
  153. package/dist/integrations/session-logs/pre-filter.js +152 -0
  154. package/dist/integrations/session-logs/providers/claude-code.js +282 -0
  155. package/dist/integrations/session-logs/providers/opencode.js +258 -0
  156. package/dist/integrations/session-logs/types.js +4 -0
  157. package/dist/llm/call-ai.js +62 -0
  158. package/dist/llm/client.js +77 -124
  159. package/dist/llm/embedder.js +20 -29
  160. package/dist/llm/embedders/cache.js +3 -7
  161. package/dist/llm/embedders/local.js +42 -1
  162. package/dist/llm/embedders/remote.js +20 -8
  163. package/dist/llm/embedders/types.js +3 -7
  164. package/dist/llm/feature-gate.js +95 -48
  165. package/dist/llm/graph-extract.js +676 -70
  166. package/dist/llm/index-passes.js +44 -29
  167. package/dist/llm/memory-infer.js +77 -71
  168. package/dist/llm/metadata-enhance.js +42 -29
  169. package/dist/llm/prompts/extract-session.md +80 -0
  170. package/dist/llm/prompts/graph-extract-user-prompt.md +35 -0
  171. package/dist/output/cli-hints-full.md +292 -0
  172. package/dist/output/cli-hints-short.md +66 -0
  173. package/dist/output/cli-hints.js +7 -320
  174. package/dist/output/context.js +60 -8
  175. package/dist/output/renderers.js +300 -257
  176. package/dist/output/shapes/curate.js +56 -0
  177. package/dist/output/shapes/distill.js +10 -0
  178. package/dist/output/shapes/env-list.js +19 -0
  179. package/dist/output/shapes/events.js +11 -0
  180. package/dist/output/shapes/helpers.js +424 -0
  181. package/dist/output/shapes/history.js +7 -0
  182. package/dist/output/shapes/passthrough.js +102 -0
  183. package/dist/output/shapes/proposal-accept.js +7 -0
  184. package/dist/output/shapes/proposal-diff.js +7 -0
  185. package/dist/output/shapes/proposal-list.js +7 -0
  186. package/dist/output/shapes/proposal-producer.js +11 -0
  187. package/dist/output/shapes/proposal-reject.js +7 -0
  188. package/dist/output/shapes/proposal-show.js +7 -0
  189. package/dist/output/shapes/registry-search.js +6 -0
  190. package/dist/output/shapes/registry.js +30 -0
  191. package/dist/output/shapes/search.js +6 -0
  192. package/dist/output/shapes/secret-list.js +19 -0
  193. package/dist/output/shapes/show.js +6 -0
  194. package/dist/output/shapes/vault-list.js +19 -0
  195. package/dist/output/shapes.js +51 -516
  196. package/dist/output/text/add.js +6 -0
  197. package/dist/output/text/clone.js +6 -0
  198. package/dist/output/text/config.js +6 -0
  199. package/dist/output/text/curate.js +6 -0
  200. package/dist/output/text/distill.js +7 -0
  201. package/dist/output/text/enable-disable.js +7 -0
  202. package/dist/output/text/events.js +10 -0
  203. package/dist/output/text/feedback.js +6 -0
  204. package/dist/output/text/helpers.js +1039 -0
  205. package/dist/output/text/history.js +7 -0
  206. package/dist/output/text/import.js +6 -0
  207. package/dist/output/text/index.js +6 -0
  208. package/dist/output/text/info.js +6 -0
  209. package/dist/output/text/init.js +6 -0
  210. package/dist/output/text/list.js +6 -0
  211. package/dist/output/text/proposal-producer.js +8 -0
  212. package/dist/output/text/proposal.js +11 -0
  213. package/dist/output/text/registry-commands.js +11 -0
  214. package/dist/output/text/registry.js +30 -0
  215. package/dist/output/text/remember.js +6 -0
  216. package/dist/output/text/remove.js +6 -0
  217. package/dist/output/text/save.js +6 -0
  218. package/dist/output/text/search.js +6 -0
  219. package/dist/output/text/show.js +6 -0
  220. package/dist/output/text/update.js +6 -0
  221. package/dist/output/text/upgrade.js +6 -0
  222. package/dist/output/text/vault.js +16 -0
  223. package/dist/output/text/wiki.js +15 -0
  224. package/dist/output/text/workflow.js +14 -0
  225. package/dist/output/text.js +44 -1092
  226. package/dist/registry/build-index.js +3 -0
  227. package/dist/registry/create-provider-registry.js +3 -0
  228. package/dist/registry/factory.js +4 -1
  229. package/dist/registry/origin-resolve.js +3 -0
  230. package/dist/registry/providers/index.js +3 -0
  231. package/dist/registry/providers/skills-sh.js +71 -50
  232. package/dist/registry/providers/static-index.js +53 -48
  233. package/dist/registry/providers/types.js +3 -24
  234. package/dist/registry/resolve.js +11 -16
  235. package/dist/registry/types.js +3 -0
  236. package/dist/scripts/migrate-storage.js +17750 -0
  237. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
  238. package/dist/scripts/migrations/v16-to-v17.js +141 -0
  239. package/dist/setup/detect.js +3 -0
  240. package/dist/setup/ripgrep-install.js +3 -0
  241. package/dist/setup/ripgrep-resolve.js +3 -0
  242. package/dist/setup/setup.js +775 -37
  243. package/dist/setup/steps.js +3 -15
  244. package/dist/sources/include.js +3 -0
  245. package/dist/sources/provider-factory.js +5 -12
  246. package/dist/sources/provider.js +3 -20
  247. package/dist/sources/providers/filesystem.js +19 -23
  248. package/dist/sources/providers/git.js +138 -21
  249. package/dist/sources/providers/index.js +3 -0
  250. package/dist/sources/providers/install-types.js +3 -13
  251. package/dist/sources/providers/npm.js +3 -4
  252. package/dist/sources/providers/provider-utils.js +3 -0
  253. package/dist/sources/providers/sync-from-ref.js +3 -11
  254. package/dist/sources/providers/tar-utils.js +3 -0
  255. package/dist/sources/providers/website.js +18 -22
  256. package/dist/sources/resolve.js +3 -0
  257. package/dist/sources/types.js +3 -0
  258. package/dist/sources/website-ingest.js +7 -0
  259. package/dist/tasks/backends/cron.js +203 -0
  260. package/dist/tasks/backends/exec-utils.js +28 -0
  261. package/dist/tasks/backends/index.js +24 -0
  262. package/dist/tasks/backends/launchd-template.xml +19 -0
  263. package/dist/tasks/backends/launchd.js +187 -0
  264. package/dist/tasks/backends/schtasks-template.xml +29 -0
  265. package/dist/tasks/backends/schtasks.js +215 -0
  266. package/dist/tasks/parser.js +211 -0
  267. package/dist/tasks/resolveAkmBin.js +87 -0
  268. package/dist/tasks/runner.js +458 -0
  269. package/dist/tasks/schedule.js +227 -0
  270. package/dist/tasks/schema.js +15 -0
  271. package/dist/tasks/validator.js +62 -0
  272. package/dist/version.js +3 -0
  273. package/dist/wiki/index-template.md +12 -0
  274. package/dist/wiki/ingest-workflow-template.md +54 -0
  275. package/dist/wiki/log-template.md +8 -0
  276. package/dist/wiki/schema-template.md +61 -0
  277. package/dist/wiki/wiki-templates.js +15 -0
  278. package/dist/wiki/wiki.js +13 -61
  279. package/dist/workflows/authoring.js +8 -25
  280. package/dist/workflows/cli.js +3 -0
  281. package/dist/workflows/db.js +140 -10
  282. package/dist/workflows/document-cache.js +3 -10
  283. package/dist/workflows/parser.js +3 -0
  284. package/dist/workflows/renderer.js +11 -3
  285. package/dist/workflows/runs.js +77 -92
  286. package/dist/workflows/schema.js +3 -0
  287. package/dist/workflows/scope-key.js +3 -0
  288. package/dist/workflows/validator.js +4 -8
  289. package/dist/workflows/workflow-template.md +24 -0
  290. package/docs/README.md +10 -2
  291. package/docs/data-and-telemetry.md +225 -0
  292. package/docs/migration/release-notes/0.7.0.md +1 -1
  293. package/docs/migration/release-notes/0.7.5.md +2 -2
  294. package/docs/migration/release-notes/0.8.0.md +48 -0
  295. package/docs/migration/v0.7-to-v0.8.md +1307 -0
  296. package/package.json +30 -12
  297. package/.github/LICENSE +0 -374
  298. package/dist/commands/install-audit.js +0 -381
  299. package/dist/commands/vault.js +0 -328
  300. package/dist/templates/wiki-templates.js +0 -100
@@ -0,0 +1,2122 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import { createHash } from "node:crypto";
5
+ import fs from "node:fs";
6
+ import path from "node:path";
7
+ import readline from "node:readline";
8
+ import { parse as yamlParse, stringify as yamlStringify } from "yaml";
9
+ import { parseAssetRef } from "../core/asset-ref";
10
+ import { assembleAssetFromString } from "../core/asset-serialize";
11
+ import { resolveStashDir, timestampForFilename } from "../core/common";
12
+ import { getDefaultLlmConfig, loadConfig } from "../core/config";
13
+ import { ConfigError } from "../core/errors";
14
+ import { appendEvent } from "../core/events";
15
+ import { parseFrontmatter } from "../core/frontmatter";
16
+ import { writeContradictEdge } from "../core/memory-belief";
17
+ import { parseEmbeddedJsonResponse } from "../core/parse";
18
+ import { hasHotCaptureMode, hasSupersededStatus, MERGE_ABSOLUTE_FLOOR_CHARS, MERGE_SHRINK_RATIO_MIN, validateProposalFrontmatter, } from "../core/proposal-quality-validators";
19
+ import { createProposal, isProposalSkipped, listProposals } from "../core/proposals";
20
+ import { detectTruncatedDescription } from "../core/text-truncation";
21
+ // Re-export the moved helpers so existing test imports continue to resolve.
22
+ export { hasSupersededStatus, validateProposalFrontmatter };
23
+ import { warn } from "../core/warn";
24
+ import { deleteAssetFromSource, resolveWriteTarget, writeAssetToSource } from "../core/write-source";
25
+ import { closeDatabase, findEntryIdByRef, getAllEntries, getEntryById, getNeighborsByEntryId, openExistingDatabase, } from "../indexer/db";
26
+ import { resolveImproveProcessRunnerFromProfile } from "../integrations/agent/runner";
27
+ import { chatCompletion } from "../llm/client";
28
+ import { cosineSimilarity, embedBatch } from "../llm/embedder";
29
+ import { isLlmFeatureEnabled, tryLlmFeature } from "../llm/feature-gate";
30
+ // ── Prompts ─────────────────────────────────────────────────────────────────
31
+ const CONSOLIDATE_SYSTEM_PROMPT = `You are the akm consolidate assistant analyzing memory assets.
32
+
33
+ Rules:
34
+ 1. MERGE: Two or more memories are substantially duplicated or closely related → propose merging. Return the primary ref to keep and secondary refs to delete. Do NOT include mergedContent — the merge will be executed in a separate step.
35
+ 2. DELETE: Memory is clearly outdated, contradicted, or redundant → propose deletion. NEVER propose delete for memories annotated \`(captureMode: hot)\` — they are user-explicit and only the user can retire them. The downstream guard will refuse these regardless, so proposing them just wastes tokens.
36
+ 3. PROMOTE: Memory expresses a stable, reusable fact suitable as a \`knowledge:\` asset → propose promotion. Do NOT delete the source memory. NEVER propose promote / merge / contradict for memories annotated \`(already queued)\` — they have a pending proposal whose body matches; a duplicate will be deterministically dropped, so proposing them just wastes tokens.
37
+ 4. CONTRADICT: Two memories make mutually exclusive factual claims about the same subject (e.g. "always use VPN" vs "VPN is optional") → mark the older or less authoritative one as contradicted. This writes a contradictedBy edge so the belief-resolution SCC algorithm can resolve the conflict. Do NOT delete contradicted memories — let the belief resolver decide.
38
+ 5. KEEP: Memory is unique and current → omit from output.
39
+
40
+ Return ONLY JSON (no prose, no code fences):
41
+ {
42
+ "operations": [
43
+ { "op": "merge", "primary": "memory:<name>", "secondaries": ["memory:<name>", ...], "mergeStrategy": "synthesize", "confidence": 0.95 },
44
+ { "op": "delete", "ref": "memory:<name>", "reason": "<brief reason>", "confidence": 0.90 },
45
+ { "op": "promote", "ref": "memory:<name>", "knowledgeRef": "knowledge:<suggested-slug>", "reason": "<brief reason>", "description": "<one sentence describing the new knowledge asset>", "confidence": 0.92 },
46
+ { "op": "contradict", "ref": "memory:<name>", "contradictedByRef": "memory:<name>", "reason": "<brief reason>", "confidence": 0.88 }
47
+ ],
48
+ "warnings": ["<optional concerns>"]
49
+ }
50
+
51
+ For every operation, emit a \`confidence\` field in [0, 1] expressing your certainty that the operation is correct and safe. Use 0.95+ only when evidence is unambiguous. Omit the field rather than guessing if you are uncertain.
52
+
53
+ When the merged content includes an \`updated\` frontmatter field, the value MUST be a real ISO date string (e.g. \`updated: 2026-05-20\`). NEVER emit \`updated: today\`, \`updated: {today}\`, \`updated: {today: null}\`, \`updated: now\`, or any other literal placeholder/template-variable. If you do not have a real source-of-truth date, OMIT the \`updated\` field entirely — the post-processor will not invent one for you.`;
54
+ /**
55
+ * JSON Schema for structured consolidate plans (PR 1 of the asset-writers
56
+ * decision — see knowledge:projects/akm/asset-writers-investigation/00-synthesis).
57
+ * Mirrors the {ops[], warnings?[]} shape currently described in
58
+ * CONSOLIDATE_SYSTEM_PROMPT. Providers with `supportsJsonSchema: true` enforce
59
+ * the shape upstream so the chunk-level "invalid plan from AI — skipping"
60
+ * branch in `runConsolidate` becomes unreachable on schema-honouring providers.
61
+ *
62
+ * The four operation variants (merge / delete / promote / contradict) are
63
+ * modeled as a oneOf so a structured-output provider can still tell them apart
64
+ * by the required `op` discriminator. `parseEmbeddedJsonResponse` keeps
65
+ * working as a fallback parser for providers that ignore the schema.
66
+ */
67
+ export const CONSOLIDATE_PLAN_JSON_SCHEMA = {
68
+ type: "object",
69
+ required: ["operations"],
70
+ additionalProperties: false,
71
+ properties: {
72
+ operations: {
73
+ type: "array",
74
+ description: "Ordered list of consolidate operations the planner proposes.",
75
+ items: {
76
+ oneOf: [
77
+ {
78
+ type: "object",
79
+ required: ["op", "primary", "secondaries", "mergeStrategy"],
80
+ additionalProperties: false,
81
+ properties: {
82
+ op: { type: "string", enum: ["merge"] },
83
+ primary: { type: "string", minLength: 1 },
84
+ secondaries: {
85
+ type: "array",
86
+ minItems: 1,
87
+ items: { type: "string", minLength: 1 },
88
+ },
89
+ mergeStrategy: { type: "string", minLength: 1 },
90
+ confidence: { type: "number", minimum: 0, maximum: 1 },
91
+ },
92
+ },
93
+ {
94
+ type: "object",
95
+ required: ["op", "ref", "reason"],
96
+ additionalProperties: false,
97
+ properties: {
98
+ op: { type: "string", enum: ["delete"] },
99
+ ref: { type: "string", minLength: 1 },
100
+ reason: { type: "string", minLength: 1 },
101
+ confidence: { type: "number", minimum: 0, maximum: 1 },
102
+ },
103
+ },
104
+ {
105
+ type: "object",
106
+ required: ["op", "ref", "knowledgeRef", "reason"],
107
+ additionalProperties: false,
108
+ properties: {
109
+ op: { type: "string", enum: ["promote"] },
110
+ ref: { type: "string", minLength: 1 },
111
+ knowledgeRef: { type: "string", minLength: 1 },
112
+ reason: { type: "string", minLength: 1 },
113
+ description: { type: "string" },
114
+ confidence: { type: "number", minimum: 0, maximum: 1 },
115
+ },
116
+ },
117
+ {
118
+ type: "object",
119
+ required: ["op", "ref", "contradictedByRef", "reason"],
120
+ additionalProperties: false,
121
+ properties: {
122
+ op: { type: "string", enum: ["contradict"] },
123
+ ref: { type: "string", minLength: 1 },
124
+ contradictedByRef: { type: "string", minLength: 1 },
125
+ reason: { type: "string", minLength: 1 },
126
+ confidence: { type: "number", minimum: 0, maximum: 1 },
127
+ },
128
+ },
129
+ ],
130
+ },
131
+ },
132
+ warnings: {
133
+ type: "array",
134
+ description: "Optional list of human-readable concerns the planner wants to surface.",
135
+ items: { type: "string" },
136
+ },
137
+ },
138
+ };
139
+ export function isConsolidationEligibleMemoryName(name) {
140
+ return !name.endsWith(".derived");
141
+ }
142
+ /**
143
+ * Returns true when the memory file has `captureMode: hot` in its frontmatter.
144
+ *
145
+ * Hot memories are USER-EXPLICIT (written via `akm remember` on the hot path).
146
+ * The consolidate LLM is forbidden from deleting or auto-merging them — the
147
+ * user wrote them on purpose and only the user can decide to retire them.
148
+ *
149
+ * Reads the file once per check; consolidate runs against ~10 memories per
150
+ * chunk so the IO cost is trivial. Returns false on any read/parse error
151
+ * (fail-safe: an unparseable file is treated as not-hot, but the broader
152
+ * consolidate flow already guards against unparseable memories elsewhere).
153
+ *
154
+ * Defends against four observed defect classes (see
155
+ * `memory:akm-improve-critical-review-2026-05-20`):
156
+ * - LLM marks a memory contradicted then deletes (dangling contradictedBy)
157
+ * - LLM merges two unrelated memories sharing a topic keyword
158
+ * - LLM judges a recent durable design memo as "redundant"
159
+ * - Cascade deletes (LLM uses ref:X as `contradictedBy` for ref:Y then deletes both)
160
+ */
161
+ export function isHotCapturedMemory(filePath) {
162
+ try {
163
+ if (!fs.existsSync(filePath))
164
+ return false;
165
+ const content = fs.readFileSync(filePath, "utf8");
166
+ const parsed = parseFrontmatter(content);
167
+ return hasHotCaptureMode(parsed.data);
168
+ }
169
+ catch {
170
+ return false;
171
+ }
172
+ }
173
+ export function consolidateGuardStatus(filePath) {
174
+ if (!fs.existsSync(filePath))
175
+ return "missing";
176
+ let content;
177
+ try {
178
+ content = fs.readFileSync(filePath, "utf8");
179
+ }
180
+ catch {
181
+ return "unparseable";
182
+ }
183
+ let parsed;
184
+ try {
185
+ parsed = parseFrontmatter(content);
186
+ }
187
+ catch {
188
+ return "unparseable";
189
+ }
190
+ const data = parsed.data;
191
+ if (!data || Object.keys(data).length === 0)
192
+ return "unparseable";
193
+ return hasHotCaptureMode(data) ? "hot" : "safe";
194
+ }
195
+ // ── Chunk sizing ─────────────────────────────────────────────────────────────
196
+ /**
197
+ * Conservative chars-per-token estimate used when computing prompt budgets.
198
+ * English text averages roughly 4 chars/token for most LLM tokenizers. We use
199
+ * 3 to stay conservative (shorter tokens = more tokens per char).
200
+ */
201
+ const CHARS_PER_TOKEN = 3;
202
+ /**
203
+ * Overhead budget reserved for the system prompt, chunk header lines, and per-
204
+ * memory metadata lines (name, description, tags, separator). Measured at
205
+ * roughly 600 chars for the system prompt + ~100 chars of header + ~50 chars
206
+ * per memory × chunk size. We round up to 2 000 tokens to leave room for the
207
+ * model's own output.
208
+ */
209
+ const PROMPT_OVERHEAD_TOKENS = 2_000;
210
+ /**
211
+ * Default effective token budget used when the default LLM profile's
212
+ * `contextLength` is not set. This is intentionally conservative (4 096)
213
+ * rather than being set to the model's actual context window, because:
214
+ *
215
+ * - When the agent path is used, the agent CLI (e.g. opencode)
216
+ * prepends its own large system prompt + conversation history before
217
+ * forwarding to the model. That overhead easily consumes 30K+ tokens on
218
+ * a model with a 16K context window, leaving very little room for
219
+ * chunk content.
220
+ * - When the HTTP path is used (an LLM profile is selected), only the akm
221
+ * system prompt and user prompt are sent, so the budget can be set to the
222
+ * model's actual context length via profiles.llm[defaults.llm].contextLength.
223
+ *
224
+ * Set profiles.llm[defaults.llm].contextLength in your config file to the
225
+ * model's actual context window to allow larger chunks on the HTTP path.
226
+ */
227
+ export const DEFAULT_CONTEXT_LENGTH_TOKENS = 4_096;
228
+ /**
229
+ * Given the model's context window and the per-memory body truncation limit,
230
+ * return the maximum number of memories that can safely fit in one chunk
231
+ * without the prompt overflowing the context window.
232
+ *
233
+ * The formula is:
234
+ * usableTokens = contextLength - PROMPT_OVERHEAD_TOKENS
235
+ * tokensPerMemory = ceil(bodyTruncation / CHARS_PER_TOKEN)
236
+ * chunkSize = floor(usableTokens / tokensPerMemory)
237
+ *
238
+ * Result is clamped between 1 and 50 to avoid degenerate values.
239
+ *
240
+ * @param contextLength - Model context window in tokens.
241
+ * @param bodyTruncation - Max chars per memory body included in the prompt.
242
+ * @param maxChunkSize - Optional override for the hardcoded cap of 50 (1–50).
243
+ */
244
+ export function computeSafeChunkSize(contextLength, bodyTruncation, maxChunkSize) {
245
+ const usableTokens = Math.max(contextLength - PROMPT_OVERHEAD_TOKENS, 0);
246
+ const tokensPerMemory = Math.max(Math.ceil(bodyTruncation / CHARS_PER_TOKEN), 1);
247
+ const raw = Math.floor(usableTokens / tokensPerMemory);
248
+ return Math.max(1, Math.min(maxChunkSize ?? 50, raw));
249
+ }
250
+ // ── Similarity clustering (C-1 / #380) ──────────────────────────────────────
251
+ /**
252
+ * Re-order memories so that similar ones are placed adjacent to each other
253
+ * before the memories are sliced into chunks. This ensures high-similarity
254
+ * memories land in the same LLM context window, allowing the consolidate
255
+ * model to detect and merge duplicates that would otherwise be split across
256
+ * chunks and survive indefinitely.
257
+ *
258
+ * Algorithm: greedy nearest-neighbour chain starting from the first memory.
259
+ * Each step selects the unused memory with the highest cosine similarity to
260
+ * the last-placed memory. O(n²) — acceptable for the expected N < 200.
261
+ *
262
+ * mem0 arXiv:2504.19413 — every candidate compared against whole store.
263
+ * A-MEM arXiv:2502.12110 — atomic notes linked by similarity.
264
+ *
265
+ * Returns the original order unchanged when:
266
+ * - The embedding config is not present.
267
+ * - Embedding requests fail (fail-open).
268
+ * - There are fewer than 3 memories (no benefit to reordering).
269
+ */
270
+ async function clusterMemoriesBySimilarity(memories, config) {
271
+ if (memories.length < 3 || !config.embedding)
272
+ return memories;
273
+ const texts = memories.map((m) => {
274
+ const parts = [];
275
+ if (m.description)
276
+ parts.push(m.description);
277
+ if (m.tags.length > 0)
278
+ parts.push(m.tags.join(" "));
279
+ return parts.join(". ") || m.name;
280
+ });
281
+ let embeddings = null;
282
+ try {
283
+ embeddings = await embedBatch(texts, config.embedding);
284
+ }
285
+ catch {
286
+ // Fail open: embedding failures degrade gracefully to original order.
287
+ return memories;
288
+ }
289
+ if (!embeddings || embeddings.length !== memories.length)
290
+ return memories;
291
+ // Greedy nearest-neighbour chain.
292
+ const used = new Array(memories.length).fill(false);
293
+ const ordered = [];
294
+ let current = 0; // start from the first memory
295
+ ordered.push(memories[current]);
296
+ used[current] = true;
297
+ for (let step = 1; step < memories.length; step++) {
298
+ const currentEmb = embeddings[current];
299
+ let bestIdx = -1;
300
+ let bestSim = -Infinity;
301
+ for (let j = 0; j < memories.length; j++) {
302
+ if (used[j])
303
+ continue;
304
+ const sim = cosineSimilarity(currentEmb, embeddings[j]);
305
+ if (sim > bestSim) {
306
+ bestSim = sim;
307
+ bestIdx = j;
308
+ }
309
+ }
310
+ if (bestIdx === -1)
311
+ break;
312
+ ordered.push(memories[bestIdx]);
313
+ used[bestIdx] = true;
314
+ current = bestIdx;
315
+ }
316
+ return ordered;
317
+ }
318
+ // ── Chunk helpers ────────────────────────────────────────────────────────────
319
+ /**
320
+ * Build the per-chunk user prompt fed to the consolidate LLM.
321
+ *
322
+ * Each memory is annotated with two flags that drive the system-prompt
323
+ * rules at lines 181-186:
324
+ * - `(captureMode: hot)` — user-explicit memory; system prompt rule 2
325
+ * forbids proposing delete. ~60 wasted LLM verdicts/4h on this user's
326
+ * stack before this annotation.
327
+ * - `(already queued)` — the memory's body hash matches a pending
328
+ * consolidate proposal; system prompt rule 3 forbids proposing
329
+ * promote/merge/contradict. ~107/4h before this annotation.
330
+ *
331
+ * Both annotations are visible to the LLM. `pendingProposalBodyHashes`
332
+ * is precomputed once per run by `loadPendingConsolidateProposalHashes`
333
+ * so the cost stays O(memories) inside the chunk loop.
334
+ */
335
+ export function buildChunkPrompt(sourceName, memories, chunkIndex, totalChunks, bodyTruncation, pendingProposalBodyHashes = new Set()) {
336
+ const start = memories[0] ? `memory:${memories[0].name}` : "";
337
+ const end = memories[memories.length - 1] ? `memory:${memories[memories.length - 1].name}` : "";
338
+ const annotationsByIndex = [];
339
+ const hotRefs = [];
340
+ for (const m of memories) {
341
+ let body = "";
342
+ try {
343
+ body = fs.readFileSync(m.filePath, "utf8");
344
+ }
345
+ catch {
346
+ body = "(unreadable)";
347
+ }
348
+ const parsed = parseFrontmatter(body);
349
+ const isHot = parsed.data.captureMode === "hot";
350
+ const bodyHash = createHash("sha256").update(parsed.content.trim(), "utf8").digest("hex");
351
+ const isAlreadyQueued = pendingProposalBodyHashes.has(bodyHash);
352
+ annotationsByIndex.push({ isHot, isAlreadyQueued, body });
353
+ if (isHot)
354
+ hotRefs.push(`memory:${m.name}`);
355
+ }
356
+ const lines = [
357
+ `Source: ${sourceName}`,
358
+ `Chunk ${chunkIndex + 1} of ${totalChunks}, memories ${start}–${end}:`,
359
+ "",
360
+ ];
361
+ // Top-of-prompt protection block for hot refs. Neutral phrasing — avoid
362
+ // op-words like "promote", "merge", "contradict" so the model doesn't
363
+ // accidentally treat the warning as a hint to use that op elsewhere
364
+ // (variant B leaked the word "contradict" into the control sample
365
+ // during the diagnostic).
366
+ if (hotRefs.length > 0) {
367
+ lines.push("⛔ DO NOT propose any `delete` operation for these refs — they are user-explicit (captureMode: hot) and the downstream guard refuses them regardless. Proposing delete for any of these only wastes tokens.");
368
+ for (const ref of hotRefs)
369
+ lines.push(` - ${ref}`);
370
+ lines.push("");
371
+ }
372
+ for (let i = 0; i < memories.length; i++) {
373
+ const m = memories[i];
374
+ const { isHot, isAlreadyQueued, body } = annotationsByIndex[i];
375
+ const annotations = [];
376
+ if (isHot)
377
+ annotations.push("captureMode: hot");
378
+ if (isAlreadyQueued)
379
+ annotations.push("already queued");
380
+ const annotationSuffix = annotations.length > 0 ? ` (${annotations.join("; ")})` : "";
381
+ lines.push(`[${i + 1}] memory:${m.name}${annotationSuffix}`);
382
+ lines.push(`Description: ${m.description || "(none)"}`);
383
+ lines.push(`Tags: ${m.tags.length > 0 ? m.tags.join(", ") : "(none)"}`);
384
+ lines.push("---");
385
+ lines.push(body.slice(0, bodyTruncation));
386
+ lines.push("");
387
+ }
388
+ return lines.join("\n");
389
+ }
390
+ /**
391
+ * Precompute body-hashes of all currently-pending consolidate proposals so
392
+ * the per-chunk prompt can annotate memories whose body would just produce
393
+ * a deterministic `dedup_pending_proposal` skip. Hash domain matches the
394
+ * dedup site at ~line 1510 (sha256 over the post-frontmatter content,
395
+ * trimmed). Empty set on any read/parse error — fail-safe to "annotate
396
+ * nothing" so the LLM still proposes, just slightly more wastefully.
397
+ */
398
+ export function loadPendingConsolidateProposalHashes(stashDir) {
399
+ const hashes = new Set();
400
+ try {
401
+ const pending = listProposals(stashDir, { status: "pending" }).filter((p) => p.source === "consolidate");
402
+ for (const p of pending) {
403
+ try {
404
+ const body = parseFrontmatter(p.payload.content).content.trim();
405
+ hashes.add(createHash("sha256").update(body, "utf8").digest("hex"));
406
+ }
407
+ catch {
408
+ // skip malformed payloads — they can't dedup anyway
409
+ }
410
+ }
411
+ }
412
+ catch {
413
+ // listProposals throws on missing stash dir during tests — empty set is safe
414
+ }
415
+ return hashes;
416
+ }
417
+ function isValidOp(op) {
418
+ if (typeof op !== "object" || op === null)
419
+ return false;
420
+ const o = op;
421
+ if (o.op === "merge") {
422
+ return typeof o.primary === "string" && Array.isArray(o.secondaries);
423
+ }
424
+ if (o.op === "delete") {
425
+ return typeof o.ref === "string";
426
+ }
427
+ if (o.op === "promote") {
428
+ return typeof o.ref === "string" && typeof o.knowledgeRef === "string";
429
+ }
430
+ if (o.op === "contradict") {
431
+ return typeof o.ref === "string" && typeof o.contradictedByRef === "string";
432
+ }
433
+ return false;
434
+ }
435
+ export function mergePlans(chunks) {
436
+ const mergeOps = new Map();
437
+ const deleteOps = new Map();
438
+ const promoteOps = new Map();
439
+ // C-3 / #382: contradict ops keyed by `ref|contradictedByRef` to deduplicate.
440
+ const contradictOps = new Map();
441
+ const warnings = [];
442
+ for (const chunk of chunks) {
443
+ for (const op of chunk) {
444
+ if (op.op === "merge") {
445
+ // merge wins over delete
446
+ if (deleteOps.has(op.primary)) {
447
+ deleteOps.delete(op.primary);
448
+ }
449
+ for (const sec of op.secondaries) {
450
+ if (deleteOps.has(sec))
451
+ deleteOps.delete(sec);
452
+ }
453
+ mergeOps.set(op.primary, op);
454
+ }
455
+ else if (op.op === "delete") {
456
+ // merge and promote both win over delete. A promote is non-destructive
457
+ // (creates a proposal) but the source memory is counted in `promoted`;
458
+ // if a delete also fires, the ref lands in both `promoted` and
459
+ // `skipReasons`, breaking the invariant by +1.
460
+ if (!mergeOps.has(op.ref) && !promoteOps.has(op.ref)) {
461
+ deleteOps.set(op.ref, op);
462
+ }
463
+ }
464
+ else if (op.op === "promote") {
465
+ // C-2 / #381: when both a promote and a merge target the same ref,
466
+ // queue the promote FIRST rather than discarding it. The promote op
467
+ // routes through createProposal (the human-gated proposal queue), so
468
+ // it is non-destructive. The merge follows after the proposal is
469
+ // created. This preserves the human reviewer's ability to inspect the
470
+ // promotion before the source memory is merged/deleted.
471
+ // AGM K*8 — retain the maximally informative consistent subset.
472
+ promoteOps.set(op.ref, op);
473
+ }
474
+ else if (op.op === "contradict") {
475
+ // Deduplicate by ref+contradictedByRef pair.
476
+ const key = `${op.ref}|${op.contradictedByRef}`;
477
+ if (!contradictOps.has(key)) {
478
+ contradictOps.set(key, op);
479
+ }
480
+ }
481
+ }
482
+ }
483
+ // Second pass: enforce merge-wins-over-delete and deduplicate secondaries.
484
+ //
485
+ // 1. Delete/secondary ordering bug: the per-chunk loop removes delete ops
486
+ // for secondaries that were already in deleteOps, but misses the case
487
+ // where the delete chunk came first. A full sweep here fixes both orders.
488
+ //
489
+ // 2. Cross-merge secondary dedup: if ref A is a secondary in two merge ops,
490
+ // only the first (insertion-order) retains it. Without this, a successful
491
+ // merge credits A to mergedSecondaries and a later merge's emitMerge-
492
+ // FailureSkips also charges A to skipReasons — double-counting A while
493
+ // processed has it only once.
494
+ //
495
+ // 3. Primary-as-secondary dedup: if ref A is a primary in one merge op and
496
+ // a secondary in another, remove A from the secondary list. Both merges
497
+ // would otherwise claim A (merged++ for A, then mergedSecondaries++ for A)
498
+ // breaking the invariant the same way.
499
+ // Also remove delete ops for any ref claimed by a promote op (handles the
500
+ // case where the delete chunk appeared before the promote chunk).
501
+ for (const ref of promoteOps.keys()) {
502
+ deleteOps.delete(ref);
503
+ }
504
+ const claimedSecondaries = new Set();
505
+ for (const mergeOp of mergeOps.values()) {
506
+ deleteOps.delete(mergeOp.primary);
507
+ mergeOp.secondaries = mergeOp.secondaries.filter((sec) => {
508
+ if (mergeOps.has(sec)) {
509
+ warnings.push(`Merge: secondary ${sec} is also a merge primary — removing from secondary list to avoid double-count.`);
510
+ return false;
511
+ }
512
+ if (claimedSecondaries.has(sec)) {
513
+ warnings.push(`Merge: secondary ${sec} appears in multiple merge ops — retaining in first op only.`);
514
+ return false;
515
+ }
516
+ claimedSecondaries.add(sec);
517
+ deleteOps.delete(sec);
518
+ return true;
519
+ });
520
+ }
521
+ // C-2 / #381: promote ops are ordered BEFORE merge ops so that the
522
+ // human-gated proposal queue entry is created before any destructive merge.
523
+ // Phase B processes ops in array order, so promote executes first.
524
+ const ops = [
525
+ ...promoteOps.values(),
526
+ ...mergeOps.values(),
527
+ ...deleteOps.values(),
528
+ ...contradictOps.values(),
529
+ ];
530
+ return { ops, warnings };
531
+ }
532
+ function getJournalPath(stashDir) {
533
+ return path.join(stashDir, ".akm", "consolidate-journal.json");
534
+ }
535
+ function getBackupDir(stashDir, timestamp) {
536
+ return path.join(stashDir, ".akm", "consolidate-backup", timestamp);
537
+ }
538
+ function removeStaleJournal(stashDir, journal, warnings) {
539
+ const journalPath = getJournalPath(stashDir);
540
+ try {
541
+ fs.unlinkSync(journalPath);
542
+ }
543
+ catch {
544
+ warnings.push(`Failed to remove stale consolidate journal at ${journalPath}.`);
545
+ }
546
+ const backupTimestamp = typeof journal.backupTimestamp === "string" && journal.backupTimestamp.trim().length > 0
547
+ ? journal.backupTimestamp.trim()
548
+ : typeof journal.startedAt === "string" && journal.startedAt.trim().length > 0
549
+ ? journal.startedAt.replace(/[:.]/g, "-")
550
+ : "";
551
+ if (!backupTimestamp)
552
+ return;
553
+ const backupDir = getBackupDir(stashDir, backupTimestamp);
554
+ if (!fs.existsSync(backupDir))
555
+ return;
556
+ try {
557
+ fs.rmSync(backupDir, { recursive: true, force: true });
558
+ }
559
+ catch {
560
+ warnings.push(`Failed to remove stale consolidate backup at ${backupDir}.`);
561
+ }
562
+ warnings.push(`Cleared stale consolidate backup at ${backupDir}.`);
563
+ }
564
+ function checkForIncompleteJournal(stashDir, recoveryMode, warnings) {
565
+ const journalPath = getJournalPath(stashDir);
566
+ if (!fs.existsSync(journalPath))
567
+ return;
568
+ let journal;
569
+ try {
570
+ journal = JSON.parse(fs.readFileSync(journalPath, "utf8"));
571
+ }
572
+ catch {
573
+ if (recoveryMode === "clean") {
574
+ try {
575
+ fs.unlinkSync(journalPath);
576
+ warnings.push(`Removed unreadable consolidate journal at ${journalPath}.`);
577
+ }
578
+ catch {
579
+ warnings.push(`Failed to remove unreadable consolidate journal at ${journalPath}.`);
580
+ }
581
+ return;
582
+ }
583
+ throw new ConfigError(`Incomplete consolidation state detected: unreadable journal at ${journalPath}. Re-run with --consolidate-recovery clean to remove stale journal artifacts, or remove the file manually.`, "INVALID_CONFIG_FILE");
584
+ }
585
+ const operationCount = Array.isArray(journal.operations) ? journal.operations.length : 0;
586
+ const completedCount = Array.isArray(journal.completed) ? journal.completed.length : 0;
587
+ if (completedCount >= operationCount)
588
+ return;
589
+ if (recoveryMode === "clean") {
590
+ removeStaleJournal(stashDir, journal, warnings);
591
+ warnings.push(`Removed stale consolidation journal at ${journalPath} (${completedCount}/${operationCount} operations completed).`);
592
+ return;
593
+ }
594
+ const backupHint = typeof journal.backupTimestamp === "string" && journal.backupTimestamp.trim().length > 0
595
+ ? ` Backup dir: ${getBackupDir(stashDir, journal.backupTimestamp.trim())}.`
596
+ : "";
597
+ throw new ConfigError(`Incomplete consolidation run detected at ${journalPath} (${completedCount}/${operationCount} operations completed). Re-run with --consolidate-recovery clean to remove stale journal artifacts.${backupHint}`, "INVALID_CONFIG_FILE");
598
+ }
599
+ function writeJournal(stashDir, ops, backupTimestamp) {
600
+ const journalPath = getJournalPath(stashDir);
601
+ fs.mkdirSync(path.dirname(journalPath), { recursive: true });
602
+ const journal = {
603
+ startedAt: new Date().toISOString(),
604
+ operations: ops,
605
+ completed: [],
606
+ backupTimestamp,
607
+ };
608
+ fs.writeFileSync(journalPath, JSON.stringify(journal, null, 2), "utf8");
609
+ }
610
+ function markJournalCompleted(stashDir, opRef) {
611
+ const journalPath = getJournalPath(stashDir);
612
+ if (!fs.existsSync(journalPath))
613
+ return;
614
+ try {
615
+ const journal = JSON.parse(fs.readFileSync(journalPath, "utf8"));
616
+ journal.completed.push(opRef);
617
+ fs.writeFileSync(journalPath, JSON.stringify(journal, null, 2), "utf8");
618
+ }
619
+ catch {
620
+ // best-effort
621
+ }
622
+ }
623
+ function cleanupJournal(stashDir, timestamp) {
624
+ const journalPath = getJournalPath(stashDir);
625
+ try {
626
+ fs.unlinkSync(journalPath);
627
+ }
628
+ catch {
629
+ // ignore
630
+ }
631
+ const backupDir = getBackupDir(stashDir, timestamp);
632
+ try {
633
+ fs.rmSync(backupDir, { recursive: true, force: true });
634
+ }
635
+ catch {
636
+ // ignore
637
+ }
638
+ }
639
+ function backupFile(filePath, backupDir, name) {
640
+ try {
641
+ fs.mkdirSync(backupDir, { recursive: true });
642
+ fs.copyFileSync(filePath, path.join(backupDir, `${name}.md`));
643
+ }
644
+ catch {
645
+ // best-effort
646
+ }
647
+ }
648
+ // ── Archive helper (P1-B: soft-invalidation) ─────────────────────────────────
649
+ /**
650
+ * Move a memory asset to `.akm/archive/` with `status: superseded` frontmatter
651
+ * instead of deleting it outright. The live stash delete still happens after
652
+ * this call — this is belt-and-suspenders archival that survives the hard delete.
653
+ *
654
+ * Archive filename: `<iso-ts>-<opIndex>-<basename>.md`
655
+ * New frontmatter fields: status, superseded_at, superseded_by (optional),
656
+ * superseded_reason.
657
+ */
658
+ function archiveMemory(filePath, stashDir, ref, reason, opIndex, supersededBy, warnings) {
659
+ const archiveDir = path.join(stashDir, ".akm", "archive");
660
+ fs.mkdirSync(archiveDir, { recursive: true });
661
+ let raw;
662
+ try {
663
+ raw = fs.readFileSync(filePath, "utf8");
664
+ }
665
+ catch {
666
+ if (warnings)
667
+ warnings.push(`archiveMemory: could not read ${ref} for archiving — skipping archive write`);
668
+ return;
669
+ }
670
+ let content = raw;
671
+ try {
672
+ const parsed = parseFrontmatter(raw);
673
+ const newFm = {
674
+ ...parsed.data,
675
+ status: "superseded",
676
+ superseded_at: new Date().toISOString(),
677
+ ...(supersededBy ? { superseded_by: supersededBy } : {}),
678
+ superseded_reason: reason,
679
+ };
680
+ content = assembleAssetFromString(yamlStringify(newFm).trimEnd(), parsed.content);
681
+ }
682
+ catch {
683
+ if (warnings)
684
+ warnings.push(`archiveMemory: could not parse frontmatter for ${ref} — archiving raw`);
685
+ }
686
+ const ts = timestampForFilename();
687
+ const safeName = path.basename(filePath, ".md");
688
+ const archivePath = path.join(archiveDir, `${ts}-${opIndex}-${safeName}.md`);
689
+ try {
690
+ fs.writeFileSync(archivePath, content, "utf8");
691
+ }
692
+ catch (e) {
693
+ if (warnings)
694
+ warnings.push(`archiveMemory: write failed for ${ref}: ${String(e)}`);
695
+ }
696
+ }
697
+ // ── LLM resolution ──────────────────────────────────────────────────────────
698
+ /**
699
+ * Resolve the LLM connection for the consolidate pass.
700
+ *
701
+ * Priority order (mirrors extract / reflect / distill — see
702
+ * `src/commands/extract.ts:421-438` and the canonical
703
+ * `resolveImproveProcessRunnerFromProfile` pattern):
704
+ *
705
+ * 1. `profiles.improve.default.processes.consolidate.profile` (or `mode`)
706
+ * via {@link resolveImproveProcessRunnerFromProfile}. Lets the user pin
707
+ * a dedicated model (e.g. `ministral-3b`) for consolidation instead of
708
+ * whatever `defaults.llm` happens to be.
709
+ * 2. `getDefaultLlmConfig(config)` — the baseline default LLM profile.
710
+ *
711
+ * Regression guard (2026-05-26): before this resolver, `akmConsolidate`
712
+ * called `getDefaultLlmConfig` directly and silently ignored a configured
713
+ * `processes.consolidate.profile`, sending every chunk to the default LLM
714
+ * (often a long-context model loaded with a smaller runtime `n_ctx`, causing
715
+ * silent 400s from LM Studio). The investigation lives at
716
+ * `/tmp/akm-health-investigations/consolidation-no-op.md`.
717
+ */
718
+ function resolveConsolidateLlmConfig(config) {
719
+ const consolidateProcess = config.profiles?.improve?.default?.processes?.consolidate;
720
+ const runnerSpec = resolveImproveProcessRunnerFromProfile(consolidateProcess, config);
721
+ if (runnerSpec && runnerSpec.kind === "llm") {
722
+ return runnerSpec.connection;
723
+ }
724
+ // Non-LLM runner modes (agent/sdk) don't apply to consolidate's HTTP path;
725
+ // fall back to the default LLM profile rather than disabling the pass.
726
+ return getDefaultLlmConfig(config);
727
+ }
728
+ // ── Main entry point ─────────────────────────────────────────────────────────
729
+ export async function akmConsolidate(opts = {}) {
730
+ const startMs = Date.now();
731
+ const config = opts.config ?? loadConfig();
732
+ const stashDir = opts.stashDir ?? resolveStashDir();
733
+ if (!isLlmFeatureEnabled(config, "memory_consolidation")) {
734
+ return {
735
+ schemaVersion: 1,
736
+ ok: true,
737
+ shape: "consolidate-result",
738
+ dryRun: opts.dryRun ?? false,
739
+ previewOnly: false,
740
+ target: opts.target ?? stashDir,
741
+ processed: 0,
742
+ merged: 0,
743
+ deleted: 0,
744
+ promoted: [],
745
+ contradicted: 0,
746
+ warnings: [],
747
+ durationMs: Date.now() - startMs,
748
+ };
749
+ }
750
+ const warnings = [];
751
+ checkForIncompleteJournal(stashDir, opts.recoveryMode ?? "abort", warnings);
752
+ let memories = loadMemoriesForSource(opts.target, stashDir, warnings);
753
+ if (memories.length === 0) {
754
+ return {
755
+ schemaVersion: 1,
756
+ ok: true,
757
+ shape: "consolidate-result",
758
+ dryRun: opts.dryRun ?? false,
759
+ previewOnly: false,
760
+ target: opts.target ?? stashDir,
761
+ processed: 0,
762
+ merged: 0,
763
+ deleted: 0,
764
+ promoted: [],
765
+ contradicted: 0,
766
+ warnings,
767
+ durationMs: Date.now() - startMs,
768
+ };
769
+ }
770
+ if (opts.incrementalSince) {
771
+ memories = narrowToIncrementalCandidates(memories, opts.incrementalSince, warnings);
772
+ if (memories.length === 0) {
773
+ return {
774
+ schemaVersion: 1,
775
+ ok: true,
776
+ shape: "consolidate-result",
777
+ dryRun: opts.dryRun ?? false,
778
+ previewOnly: false,
779
+ target: opts.target ?? stashDir,
780
+ processed: 0,
781
+ merged: 0,
782
+ deleted: 0,
783
+ promoted: [],
784
+ contradicted: 0,
785
+ warnings,
786
+ durationMs: Date.now() - startMs,
787
+ };
788
+ }
789
+ }
790
+ // Consolidation always uses the HTTP LLM client directly — never the agent
791
+ // CLI. The agent CLI is for interactive agent sessions (reflect, propose);
792
+ // structured JSON generation works better and faster via HTTP.
793
+ //
794
+ // Honor `profiles.improve.default.processes.consolidate.profile` first; fall
795
+ // back to the default LLM. See {@link resolveConsolidateLlmConfig}.
796
+ const llmConfig = resolveConsolidateLlmConfig(config);
797
+ const isHttpPath = !!llmConfig;
798
+ // Chunk sizing: derive a safe chunk size from the configured model context
799
+ // window so that the full prompt (system prompt + chunk user prompt) never
800
+ // exceeds the model's n_ctx limit. When no context length is configured we
801
+ // fall back to DEFAULT_CONTEXT_LENGTH_TOKENS (8 000) which is conservative
802
+ // enough for most 8K–16K local models.
803
+ //
804
+ // bodyTruncation caps the body excerpt included per memory in the prompt.
805
+ // Reducing it further than 500 chars degrades consolidation quality, so we
806
+ // keep it fixed and let computeSafeChunkSize vary the number of memories
807
+ // per chunk instead.
808
+ const bodyTruncation = 500;
809
+ const modelContextLength = llmConfig?.contextLength ?? DEFAULT_CONTEXT_LENGTH_TOKENS;
810
+ const chunkSize = computeSafeChunkSize(modelContextLength, bodyTruncation, opts.maxChunkSize);
811
+ // -- Phase A: plan generation -----------------------------------------------
812
+ const sourceName = opts.target ?? stashDir;
813
+ // C-1 / #380: Pre-cluster memories by embedding similarity before chunking.
814
+ // This ensures that semantically similar memories land in the same LLM
815
+ // context window, allowing the model to detect and merge duplicates that
816
+ // would otherwise be split across chunks and survive indefinitely.
817
+ // mem0 arXiv:2504.19413, A-MEM arXiv:2502.12110.
818
+ // Fails open: if embeddings are unavailable or fail, original order is used.
819
+ const clusteredMemories = await clusterMemoriesBySimilarity(memories, config);
820
+ const chunks = [];
821
+ for (let i = 0; i < clusteredMemories.length; i += chunkSize) {
822
+ chunks.push(clusteredMemories.slice(i, i + chunkSize));
823
+ }
824
+ // 2026-05-27 prompt-context fix: precompute body-hashes of pending
825
+ // consolidate proposals once, so the per-chunk prompt can annotate
826
+ // memories whose body would just produce a deterministic
827
+ // `dedup_pending_proposal` skip. Cuts ~110 wasted LLM proposals per
828
+ // 4h on this user's stack. See
829
+ // /tmp/akm-health-investigations/tuning-reasons-investigation.md §Q3.
830
+ const pendingProposalBodyHashes = loadPendingConsolidateProposalHashes(stashDir);
831
+ warn(`[consolidate] ${memories.length} memories / ${chunks.length} chunk(s) / chunk_size=${chunkSize}` +
832
+ ` / pending-proposal hashes: ${pendingProposalBodyHashes.size}`);
833
+ const chunkOpsArrays = [];
834
+ // Structured skip-reason histogram (2026-05-26): every deterministic
835
+ // post-LLM op rejection site below also calls `pushSkipReason` so the
836
+ // health rollup can aggregate without regex-parsing English warning
837
+ // strings. See `/tmp/akm-health-investigations/tuning-reasons-investigation.md` §Q2.
838
+ const skipReasons = [];
839
+ // Tracks refs already emitted to skipReasons. A ref can only occupy one
840
+ // accounting bucket; subsequent skip ops for the same ref are recorded as
841
+ // warnings but must not push a second skipReasons entry (that would inflate
842
+ // Σ(skipReasons) and break the invariant by +1 per duplicate).
843
+ const skipReasonEmittedRefs = new Set();
844
+ const pushSkipReason = (op, ref, reason) => {
845
+ // 2026-05-27 cross-chunk double-count fix: if `ref` already contributed
846
+ // to judgedNoAction in its own chunk (a different chunk proposed an op
847
+ // for it that is now being rejected here), promote it from the
848
+ // judgedNoAction bucket into the more specific skipReason bucket.
849
+ // Preserves the invariant: processed == actioned + judgedNoAction +
850
+ // Σ(skipReasons) + failedChunkMemories.
851
+ if (judgedNoActionRefs.delete(ref))
852
+ judgedNoAction--;
853
+ if (skipReasonEmittedRefs.has(ref)) {
854
+ // Already counted once. Record the extra skip for observability but
855
+ // don't push to skipReasons — that would break the accounting invariant.
856
+ warnings.push(`Skip: ${ref} already in skipReasons (${reason} via ${op}); not re-counted.`);
857
+ return;
858
+ }
859
+ skipReasonEmittedRefs.add(ref);
860
+ skipReasons.push({ op, ref, reason });
861
+ };
862
+ // judgedNoAction tracks memories the LLM saw inside a chunk but proposed
863
+ // no op for. Computed per chunk as `chunk.length − unique(targetRefs in ops)`.
864
+ let judgedNoAction = 0;
865
+ // 2026-05-27 cross-chunk double-count fix: refs that contributed to
866
+ // judgedNoAction in their own chunk. When a different chunk's op references
867
+ // one of these as a secondary and that op later fails, the ref would land
868
+ // in BOTH judgedNoAction and skipReasons (delta +1 per occurrence). Track
869
+ // the set so the merge-failure path can decrement and re-bucket.
870
+ const judgedNoActionRefs = new Set();
871
+ // 2026-05-26 accounting-leak fix: memories that belong to a chunk whose
872
+ // LLM call failed before any per-chunk noAction calculation runs. They
873
+ // would otherwise vanish from the envelope's accounting (no judgedNoAction
874
+ // bump, no skipReasons entry, no actioned counter).
875
+ let failedChunkMemories = 0;
876
+ // 2026-05-26 accounting-leak fix: per-secondary tally so successful merges
877
+ // account for `1 + secondaries.length` memories instead of 1.
878
+ let mergedSecondaries = 0;
879
+ // C-6 / #392: Replace two-consecutive-failures abort with failure-rate threshold.
880
+ // Consecutive-count policies are brittle against transient LM Studio reloads:
881
+ // two transient failures abort the run even though the next chunk would succeed.
882
+ // Rate-based abort (≥50% failure over ≥4 chunks) is more robust.
883
+ // Tanenbaum, Distributed Systems §8 — rate-based policies with minimum sample sizes.
884
+ let totalChunksProcessed = 0;
885
+ let totalChunksFailed = 0;
886
+ const ABORT_MIN_CHUNKS = 4;
887
+ const ABORT_FAILURE_RATE = 0.5;
888
+ for (let chunkIdx = 0; chunkIdx < chunks.length; chunkIdx++) {
889
+ // Abort if failure rate >= 50% over at least 4 processed chunks.
890
+ if (totalChunksProcessed >= ABORT_MIN_CHUNKS) {
891
+ const failureRate = totalChunksFailed / totalChunksProcessed;
892
+ if (failureRate >= ABORT_FAILURE_RATE) {
893
+ const skipped = chunks.length - chunkIdx;
894
+ const abortMsg = `Consolidation aborted — failure rate ${(failureRate * 100).toFixed(0)}% over ${totalChunksProcessed} chunks (>= ${ABORT_FAILURE_RATE * 100}% threshold). LLM may be unavailable. ${skipped} chunk(s) skipped.`;
895
+ warn(abortMsg);
896
+ warnings.push(abortMsg);
897
+ // Account for memories in chunks we never attempted: they are
898
+ // neither judgedNoAction (no plan parsed) nor skipReason (no op
899
+ // rejected). Without this, the accounting invariant fails by
900
+ // `Σ(unattempted_chunk.length)` whenever the abort fires.
901
+ for (let i = chunkIdx; i < chunks.length; i++) {
902
+ failedChunkMemories += chunks[i].length;
903
+ }
904
+ break;
905
+ }
906
+ }
907
+ const chunk = chunks[chunkIdx];
908
+ // All-hot chunk early-exit. The per-prompt hot-list block (see
909
+ // buildChunkPrompt) only *discourages* delete proposals on a mixed chunk;
910
+ // when EVERY memory in the chunk is captureMode: hot, the only ops the LLM
911
+ // could ever propose are deletes — all of which the downstream guard
912
+ // refuses unconditionally. Calling the model is therefore pure token waste.
913
+ // Skip the request entirely and bucket every memory as judgedNoAction (we
914
+ // judged "no action" without spending an LLM call), preserving the
915
+ // accounting invariant `processed == actioned + judgedNoAction +
916
+ // Σ(skipReasons) + failedChunkMemories`. Not counted toward the
917
+ // LLM-failure-rate abort policy — no request was attempted.
918
+ if (chunk.length > 0 && chunk.every((m) => isHotCapturedMemory(m.filePath))) {
919
+ for (const m of chunk)
920
+ judgedNoActionRefs.add(`memory:${m.name}`);
921
+ judgedNoAction += chunk.length;
922
+ warn(`[consolidate] chunk ${chunkIdx + 1}/${chunks.length}: all ${chunk.length} memories are captureMode: hot — skipping LLM (judged no-action).`);
923
+ continue;
924
+ }
925
+ warn(`[consolidate] chunk ${chunkIdx + 1}/${chunks.length} (${chunk.length} memories) …`);
926
+ const userPrompt = buildChunkPrompt(sourceName, chunk, chunkIdx, chunks.length, bodyTruncation, pendingProposalBodyHashes);
927
+ const raw = await tryLlmFeature("memory_consolidation", config, async () => {
928
+ if (!llmConfig)
929
+ return { ok: false, error: "No LLM configured for consolidation" };
930
+ try {
931
+ // responseSchema lift (PR 1, asset-writers-investigation §5): pass
932
+ // the consolidate plan schema so providers with
933
+ // `supportsJsonSchema: true` enforce shape upstream. Providers that
934
+ // ignore the option fall through to the existing
935
+ // `parseEmbeddedJsonResponse` path on the response side.
936
+ const content = await chatCompletion(llmConfig, [
937
+ { role: "system", content: CONSOLIDATE_SYSTEM_PROMPT },
938
+ { role: "user", content: userPrompt },
939
+ ], { responseSchema: CONSOLIDATE_PLAN_JSON_SCHEMA, enableThinking: false });
940
+ return { ok: true, content };
941
+ }
942
+ catch (e) {
943
+ return { ok: false, error: String(e) };
944
+ }
945
+ }, { ok: false, error: `chunk ${chunkIdx + 1} failed` });
946
+ if (!raw.ok) {
947
+ warn(raw.error ?? `chunk ${chunkIdx + 1} failed`);
948
+ warnings.push(raw.error ?? `chunk ${chunkIdx + 1} failed`);
949
+ totalChunksProcessed++;
950
+ totalChunksFailed++;
951
+ // Account for the chunk's memories under the failed-chunk bucket.
952
+ // judgedNoAction does NOT run on this path (it's after the success
953
+ // guards) so without this the accounting invariant breaks on every
954
+ // chunk-level transport/parse failure.
955
+ failedChunkMemories += chunk.length;
956
+ continue;
957
+ }
958
+ if (process.env.AKM_DEBUG_LLM) {
959
+ const preview = (raw.content ?? "").slice(0, 500);
960
+ warn(`[akm:consolidate] chunk ${chunkIdx + 1} raw response (first 500 chars): ${preview}`);
961
+ }
962
+ const parsed = parseEmbeddedJsonResponse(raw.content);
963
+ if (!parsed || !Array.isArray(parsed.operations)) {
964
+ const hint = raw.content !== undefined && raw.content.trim() === ""
965
+ ? " (empty response — if using a thinking model, disable thinking mode)"
966
+ : "";
967
+ warn(`Chunk ${chunkIdx + 1}: invalid plan from AI — skipping.${hint}`);
968
+ warnings.push(`Chunk ${chunkIdx + 1}: invalid plan from AI — skipping.${hint}`);
969
+ totalChunksProcessed++;
970
+ totalChunksFailed++;
971
+ failedChunkMemories += chunk.length;
972
+ continue;
973
+ }
974
+ totalChunksProcessed++; // success
975
+ const ops = [];
976
+ for (const op of parsed.operations) {
977
+ if (isValidOp(op)) {
978
+ ops.push(op);
979
+ }
980
+ else {
981
+ warnings.push(`Chunk ${chunkIdx + 1}: skipping invalid operation: ${JSON.stringify(op)}`);
982
+ }
983
+ }
984
+ if (Array.isArray(parsed.warnings)) {
985
+ for (const w of parsed.warnings) {
986
+ if (typeof w === "string")
987
+ warnings.push(w);
988
+ }
989
+ }
990
+ // Per-chunk judgedNoAction: count memories the LLM saw but proposed no
991
+ // op for. Membership is by `memory:<name>` ref against the targets of
992
+ // each op (primary + secondaries for merge; ref otherwise). 2026-05-26:
993
+ // pre-fix this was a 78/119 (66%) silent drop in the cron run — no
994
+ // warning, event, or counter. See tuning investigation §Q2.
995
+ const targetRefs = new Set();
996
+ for (const op of ops) {
997
+ if (op.op === "merge") {
998
+ targetRefs.add(op.primary);
999
+ for (const s of op.secondaries)
1000
+ targetRefs.add(s);
1001
+ }
1002
+ else {
1003
+ targetRefs.add(op.ref);
1004
+ }
1005
+ }
1006
+ let chunkNoAction = 0;
1007
+ for (const m of chunk) {
1008
+ const memRef = `memory:${m.name}`;
1009
+ if (!targetRefs.has(memRef)) {
1010
+ chunkNoAction++;
1011
+ judgedNoActionRefs.add(memRef);
1012
+ }
1013
+ }
1014
+ judgedNoAction += chunkNoAction;
1015
+ chunkOpsArrays.push(ops);
1016
+ }
1017
+ const { ops: allOps, warnings: mergeWarnings } = mergePlans(chunkOpsArrays);
1018
+ warnings.push(...mergeWarnings);
1019
+ // -- Dry-run: show AI plan without executing any writes --------------------
1020
+ if (opts.dryRun) {
1021
+ return {
1022
+ schemaVersion: 1,
1023
+ ok: true,
1024
+ shape: "consolidate-result",
1025
+ dryRun: true,
1026
+ previewOnly: true,
1027
+ target: sourceName,
1028
+ processed: memories.length,
1029
+ merged: 0,
1030
+ deleted: 0,
1031
+ promoted: [],
1032
+ contradicted: 0,
1033
+ failedChunks: totalChunksFailed,
1034
+ totalChunks: chunks.length,
1035
+ judgedNoAction,
1036
+ skipReasons,
1037
+ mergedSecondaries,
1038
+ failedChunkMemories,
1039
+ planned: allOps,
1040
+ warnings,
1041
+ durationMs: Date.now() - startMs,
1042
+ };
1043
+ }
1044
+ warn(`[consolidate] plan: ${allOps.length} operation(s)`);
1045
+ // -- HTTP path: warn about quality and confirm unless auto-accepted --------
1046
+ if (isHttpPath) {
1047
+ warnings.push("Running on HTTP path — plan generated from truncated memory excerpts; quality may vary.");
1048
+ // Per-proposal confidence gating is handled by the caller (improve.ts)
1049
+ // via runAutoAcceptGate after this function returns. The gate reads
1050
+ // proposal.confidence (forwarded from op.confidence above) and applies
1051
+ // a minimumThreshold floor of 95 for consolidate's destructive ops.
1052
+ // Here we only gate the interactive-confirm path for manual/HTTP invocations.
1053
+ if (opts.autoAccept === undefined && allOps.length > 0) {
1054
+ const n = allOps.length;
1055
+ // Non-interactive contexts (CI / test runners / piped stdin) must not
1056
+ // block on an unanswerable prompt. Default to a non-destructive "no"
1057
+ // so callers in those contexts get the same "aborted, preview only"
1058
+ // shape they'd get from explicit user dismissal. AKM_NON_INTERACTIVE
1059
+ // lets callers force this path even when stdin happens to be a TTY.
1060
+ const nonInteractive = process.stdin.isTTY === false || process.env.AKM_NON_INTERACTIVE === "1";
1061
+ const answer = nonInteractive ? false : await promptConfirm(`Apply ${n} operations? [y/N] `);
1062
+ if (!answer) {
1063
+ return {
1064
+ schemaVersion: 1,
1065
+ ok: true,
1066
+ shape: "consolidate-result",
1067
+ dryRun: false,
1068
+ previewOnly: true,
1069
+ target: sourceName,
1070
+ processed: memories.length,
1071
+ merged: 0,
1072
+ deleted: 0,
1073
+ promoted: [],
1074
+ contradicted: 0,
1075
+ failedChunks: totalChunksFailed,
1076
+ totalChunks: chunks.length,
1077
+ judgedNoAction,
1078
+ skipReasons,
1079
+ mergedSecondaries,
1080
+ failedChunkMemories,
1081
+ planned: allOps,
1082
+ warnings: [...warnings, nonInteractive ? "Non-interactive context: skipped apply." : "Aborted by user."],
1083
+ durationMs: Date.now() - startMs,
1084
+ };
1085
+ }
1086
+ }
1087
+ }
1088
+ // -- Phase B + writes -------------------------------------------------------
1089
+ const target = resolveWriteTarget(config);
1090
+ const timestamp = timestampForFilename();
1091
+ const backupDir = getBackupDir(stashDir, timestamp);
1092
+ // Write journal before any mutations
1093
+ writeJournal(stashDir, allOps, timestamp);
1094
+ let merged = 0;
1095
+ let deleted = 0;
1096
+ const promoted = [];
1097
+ let contradicted = 0; // C-3 / #382: count of contradiction edges written
1098
+ // Within-run dedup: track source refs for which a promote proposal was
1099
+ // already created this run. The LLM can return multiple promote ops for
1100
+ // different source memories that happen to have identical content (all are
1101
+ // duplicate memories), so we also need a content-hash guard below.
1102
+ const promotedSourceRefs = new Set();
1103
+ // Build a lookup map: ref → MemoryEntry
1104
+ const memoryByRef = new Map();
1105
+ for (const m of memories) {
1106
+ memoryByRef.set(`memory:${m.name}`, m);
1107
+ }
1108
+ for (let opIndex = 0; opIndex < allOps.length; opIndex++) {
1109
+ const op = allOps[opIndex];
1110
+ const opDisplayRef = op.op === "merge" ? op.primary : op.op === "contradict" ? `${op.ref} ↔ ${op.contradictedByRef}` : op.ref;
1111
+ warn(`[consolidate] ${opIndex + 1}/${allOps.length} ${op.op} ${opDisplayRef}`);
1112
+ if (op.op === "merge") {
1113
+ // Accounting helper: emit a per-participant skipReason for failed
1114
+ // merges so primary + every loaded-memory secondary land in the
1115
+ // structured skip histogram. Pre-2026-05-26 only the primary was
1116
+ // counted (1 skipReason per failed merge), leaving N secondaries
1117
+ // unaccounted for in the `processed == actioned + noAction + Σskips`
1118
+ // invariant — the source of the 4–11 silent leaks per run.
1119
+ const emitMergeFailureSkips = (reason) => {
1120
+ if (memoryByRef.has(op.primary))
1121
+ pushSkipReason("merge", op.primary, reason);
1122
+ for (const secRef of op.secondaries) {
1123
+ if (memoryByRef.has(secRef))
1124
+ pushSkipReason("merge", secRef, reason);
1125
+ }
1126
+ };
1127
+ const primaryEntry = memoryByRef.get(op.primary);
1128
+ if (!primaryEntry) {
1129
+ warnings.push(`Merge: primary ${op.primary} not found in loaded memories — skipping.`);
1130
+ emitMergeFailureSkips("merge_primary_missing");
1131
+ continue;
1132
+ }
1133
+ // Phase B: generate merged content
1134
+ const secondaryBodies = [];
1135
+ for (const secRef of op.secondaries) {
1136
+ const secEntry = memoryByRef.get(secRef);
1137
+ if (!secEntry) {
1138
+ warnings.push(`Merge: secondary ${secRef} not found — skipping merge op.`);
1139
+ // No accounting impact: a missing secondary is a phantom ref and
1140
+ // never contributed to any chunk's targetRefs reduction. We still
1141
+ // continue the loop to gather the remaining valid secondaries.
1142
+ continue;
1143
+ }
1144
+ secondaryBodies.push(secRef);
1145
+ }
1146
+ if (secondaryBodies.length === 0) {
1147
+ warnings.push(`Merge: ${op.primary} has no valid secondaries — skipping.`);
1148
+ emitMergeFailureSkips("merge_no_valid_secondaries");
1149
+ continue;
1150
+ }
1151
+ let primaryBody = "";
1152
+ try {
1153
+ primaryBody = fs.readFileSync(primaryEntry.filePath, "utf8");
1154
+ }
1155
+ catch {
1156
+ warnings.push(`Merge: could not read primary ${op.primary} — skipping.`);
1157
+ emitMergeFailureSkips("merge_read_failed");
1158
+ continue;
1159
+ }
1160
+ const mergeResult = await generateMergedContent(config, op.primary, primaryBody, op.secondaries, memoryByRef);
1161
+ if ("error" in mergeResult) {
1162
+ warnings.push(`Merge: ${mergeResult.error} for ${mergeResult.detail}.`);
1163
+ emitMergeFailureSkips(mergeResult.error);
1164
+ continue;
1165
+ }
1166
+ const mergedContent = mergeResult.content;
1167
+ // Validate frontmatter of merged content — must have a `---` block
1168
+ // with at minimum a `description` field. We parse via the hand-rolled
1169
+ // parser (cheap) AND require non-empty description. This guards against
1170
+ // the historical defect where merged memories were written back with
1171
+ // empty `description` and later polluted the promote path.
1172
+ let parsedMerged;
1173
+ try {
1174
+ parsedMerged = parseFrontmatter(mergedContent);
1175
+ }
1176
+ catch {
1177
+ warnings.push(`Merge: merged content for ${op.primary} has invalid frontmatter — skipping.`);
1178
+ emitMergeFailureSkips("merge_invalid_frontmatter");
1179
+ continue;
1180
+ }
1181
+ if (parsedMerged.frontmatter === null) {
1182
+ warnings.push(`Merge: merged content for ${op.primary} has no frontmatter block — skipping.`);
1183
+ emitMergeFailureSkips("merge_invalid_frontmatter");
1184
+ continue;
1185
+ }
1186
+ const mergedDesc = parsedMerged.data.description;
1187
+ if (typeof mergedDesc !== "string" || mergedDesc.trim().length === 0) {
1188
+ warnings.push(`Merge: merged content for ${op.primary} missing description — skipping.`);
1189
+ emitMergeFailureSkips("merge_missing_description");
1190
+ continue;
1191
+ }
1192
+ const truncReason = detectTruncatedDescription(mergedDesc);
1193
+ if (truncReason) {
1194
+ warnings.push(`Merge: merged content for ${op.primary} has truncated description (${truncReason}) — skipping.`);
1195
+ emitMergeFailureSkips("merge_truncated_description");
1196
+ continue;
1197
+ }
1198
+ // captureMode:hot guard — refuse the merge if ANY participating memory
1199
+ // (primary or secondary) was user-captured or has unparseable frontmatter
1200
+ // (could have hidden a hot flag). Hot memories are user-explicit and
1201
+ // must not be deleted/overwritten by the consolidate LLM. 14 user
1202
+ // memories were silent-deleted by consolidate before this guard landed;
1203
+ // recovery required copying from .akm/archive/ by hand.
1204
+ const mergeParticipants = [op.primary, ...op.secondaries];
1205
+ const blockedParticipants = mergeParticipants.flatMap((ref) => {
1206
+ const e = memoryByRef.get(ref);
1207
+ if (!e)
1208
+ return [];
1209
+ const verdict = consolidateGuardStatus(e.filePath);
1210
+ if (verdict === "hot" || verdict === "unparseable")
1211
+ return [{ ref, verdict }];
1212
+ return [];
1213
+ });
1214
+ if (blockedParticipants.length > 0) {
1215
+ const detail = blockedParticipants.map((p) => `${p.ref} (${p.verdict})`).join(", ");
1216
+ warnings.push(`Merge: refused for ${op.primary} — ${blockedParticipants.length} participant(s) blocked by hot/unparseable frontmatter guard: ${detail}`);
1217
+ emitMergeFailureSkips("merge_participant_blocked");
1218
+ continue;
1219
+ }
1220
+ // Backup secondaries before deleting
1221
+ for (const secRef of op.secondaries) {
1222
+ const secEntry = memoryByRef.get(secRef);
1223
+ if (secEntry && fs.existsSync(secEntry.filePath)) {
1224
+ backupFile(secEntry.filePath, backupDir, secEntry.name);
1225
+ }
1226
+ }
1227
+ // Write merged primary
1228
+ try {
1229
+ const parsedPrimary = parseAssetRef(op.primary);
1230
+ await writeAssetToSource(target.source, target.config, parsedPrimary, mergedContent);
1231
+ }
1232
+ catch (e) {
1233
+ warnings.push(`Merge: write failed for ${op.primary}: ${String(e)}`);
1234
+ emitMergeFailureSkips("merge_write_failed");
1235
+ continue;
1236
+ }
1237
+ // Archive and delete secondaries (P1-B: soft-invalidation)
1238
+ for (const secRef of op.secondaries) {
1239
+ const secEntry = memoryByRef.get(secRef);
1240
+ if (!secEntry)
1241
+ continue;
1242
+ if (fs.existsSync(secEntry.filePath)) {
1243
+ archiveMemory(secEntry.filePath, stashDir, secRef, "merged into primary", opIndex, op.primary, warnings);
1244
+ }
1245
+ try {
1246
+ const parsedSec = parseAssetRef(secRef);
1247
+ await deleteAssetFromSource(target.source, target.config, parsedSec);
1248
+ markJournalCompleted(stashDir, secRef);
1249
+ }
1250
+ catch (e) {
1251
+ warnings.push(`Merge: delete failed for ${secRef}: ${String(e)}`);
1252
+ }
1253
+ }
1254
+ markJournalCompleted(stashDir, op.primary);
1255
+ merged++;
1256
+ // 2026-05-26 accounting-leak fix: `merged` is op-level, but each
1257
+ // successful merge actions `1 + secondaries.length` memories. Without
1258
+ // this counter the accounting invariant breaks by `secondaries.length`
1259
+ // per successful merge (chunk loop excluded all secondaries from
1260
+ // judgedNoAction via targetRefs, but only the primary is credited to
1261
+ // `merged`). Count only loaded-memory secondaries; phantom secondary
1262
+ // refs never affected any chunk's targetRefs in the first place.
1263
+ for (const secRef of op.secondaries) {
1264
+ if (memoryByRef.has(secRef))
1265
+ mergedSecondaries++;
1266
+ }
1267
+ }
1268
+ else if (op.op === "delete") {
1269
+ const entry = memoryByRef.get(op.ref);
1270
+ if (!entry) {
1271
+ warnings.push(`Delete: ${op.ref} not found in loaded memories — skipping.`);
1272
+ // Phantom ref: not in the batch so not in processed. Pushing to
1273
+ // skipReasons would inflate Σ(skipReasons) without a matching processed
1274
+ // entry, breaking the accounting invariant. Visibility is preserved via
1275
+ // the warnings array above.
1276
+ continue;
1277
+ }
1278
+ // captureMode:hot guard — refuse to delete user-captured memories OR
1279
+ // memories whose frontmatter is unparseable (could have hidden the hot
1280
+ // flag). The consolidate LLM was deleting hot-captured user memos as
1281
+ // "redundant" — 14 such deletes were silently archived between
1282
+ // 2026-05-19 and 2026-05-20 before this guard. Hot memories are
1283
+ // user-explicit and may only be deleted by the user.
1284
+ const guard = consolidateGuardStatus(entry.filePath);
1285
+ if (guard === "hot" || guard === "unparseable") {
1286
+ warnings.push(`Delete: refused for ${op.ref} — ${guard === "hot" ? "captureMode:hot (user-explicit; never auto-delete)" : "frontmatter unparseable (cannot verify hot flag absent)"}. Reason from LLM: "${op.reason ?? "n/a"}"`);
1287
+ pushSkipReason("delete", op.ref, "captureMode_hot_refused");
1288
+ continue;
1289
+ }
1290
+ if (fs.existsSync(entry.filePath)) {
1291
+ backupFile(entry.filePath, backupDir, entry.name);
1292
+ // P1-B: soft-invalidation archive before hard delete
1293
+ archiveMemory(entry.filePath, stashDir, op.ref, op.reason, opIndex, undefined, warnings);
1294
+ }
1295
+ try {
1296
+ const parsedRef = parseAssetRef(op.ref);
1297
+ await deleteAssetFromSource(target.source, target.config, parsedRef);
1298
+ markJournalCompleted(stashDir, op.ref);
1299
+ deleted++;
1300
+ }
1301
+ catch (e) {
1302
+ // Distinguish "file already absent" from genuine failures. A prior run
1303
+ // may have deleted the file but the DB was not yet re-indexed, so the
1304
+ // ref still appeared in memoryByRef. The delete goal is already met.
1305
+ const msg = e instanceof Error ? e.message : String(e);
1306
+ if (msg.includes("not found in source")) {
1307
+ warnings.push(`Delete: ${op.ref} — file already absent (stale DB entry); skipping.`);
1308
+ pushSkipReason("delete", op.ref, "delete_already_gone");
1309
+ }
1310
+ else {
1311
+ warnings.push(`Delete: failed for ${op.ref}: ${String(e)}`);
1312
+ pushSkipReason("delete", op.ref, "delete_failed");
1313
+ }
1314
+ }
1315
+ }
1316
+ else if (op.op === "promote") {
1317
+ const entry = memoryByRef.get(op.ref);
1318
+ if (!entry) {
1319
+ warnings.push(`Promote: ${op.ref} not found in loaded memories — skipping.`);
1320
+ // Phantom ref: not in processed, so no skipReason (same rationale as
1321
+ // delete_ref_missing above).
1322
+ continue;
1323
+ }
1324
+ // Within-run source-ref dedup: skip if this source memory was already
1325
+ // promoted earlier in this run (safety belt — mergePlans already
1326
+ // deduplicates promote ops by source ref via Map, but this guard also
1327
+ // catches any future code paths that bypass mergePlans).
1328
+ if (promotedSourceRefs.has(op.ref)) {
1329
+ warnings.push(`Skipping promote: ${op.ref} already promoted in this run`);
1330
+ pushSkipReason("promote", op.ref, "promote_already_promoted_this_run");
1331
+ continue;
1332
+ }
1333
+ let knowledgeRef = op.knowledgeRef;
1334
+ try {
1335
+ parseAssetRef(knowledgeRef);
1336
+ }
1337
+ catch {
1338
+ const slug = op.knowledgeRef
1339
+ .replace(/^knowledge:/, "")
1340
+ .replace(/[^a-z0-9-]/gi, "-")
1341
+ .toLowerCase();
1342
+ knowledgeRef = `knowledge:${slug}`;
1343
+ warnings.push(`Normalized invalid ref "${op.knowledgeRef}" → "${knowledgeRef}"`);
1344
+ }
1345
+ // Idempotency: check pending proposals by target ref
1346
+ const existingProposals = listProposals(stashDir, { ref: knowledgeRef });
1347
+ if (existingProposals.some((p) => p.status === "pending")) {
1348
+ warnings.push(`Skipping promote: pending proposal already exists for ${knowledgeRef}`);
1349
+ pushSkipReason("promote", op.ref, "promote_pending_proposal_exists");
1350
+ continue;
1351
+ }
1352
+ // Idempotency: check if knowledge asset already exists
1353
+ const parsedKnowledgeRef = parseAssetRef(knowledgeRef);
1354
+ const destPath = path.join(target.source.path, "knowledge", `${parsedKnowledgeRef.name}.md`);
1355
+ if (fs.existsSync(destPath)) {
1356
+ warnings.push(`Skipping promote: ${knowledgeRef} already exists in source`);
1357
+ pushSkipReason("promote", op.ref, "promote_already_exists");
1358
+ continue;
1359
+ }
1360
+ let memoryContent = "";
1361
+ try {
1362
+ memoryContent = fs.readFileSync(entry.filePath, "utf8");
1363
+ }
1364
+ catch (e) {
1365
+ warnings.push(`Promote: could not read ${op.ref}: ${String(e)}`);
1366
+ pushSkipReason("promote", op.ref, "promote_read_failed");
1367
+ continue;
1368
+ }
1369
+ // Defensive sanitization: legacy memory files written by older
1370
+ // consolidate runs may still carry outer code fences or broken YAML.
1371
+ // Strip them here so we never propose a polluted asset.
1372
+ const promoteSanitized = sanitizeMergedContent(memoryContent);
1373
+ if (!promoteSanitized.ok) {
1374
+ warnings.push(`Promote: rejected ${op.ref} — source memory failed sanitization (${promoteSanitized.reason}).`);
1375
+ pushSkipReason("promote", op.ref, "promote_sanitization_failed");
1376
+ continue;
1377
+ }
1378
+ memoryContent = promoteSanitized.result.content;
1379
+ // SOURCE_SUPERSEDED guard: refuse to promote a memory whose source
1380
+ // frontmatter carries `status: superseded`. Predicate at module top
1381
+ // (`hasSupersededStatus`) so tests can exercise it directly.
1382
+ if (hasSupersededStatus(promoteSanitized.result.frontmatter)) {
1383
+ warnings.push(`Promote: refused for ${op.ref} → ${knowledgeRef} — source memory has status:superseded; superseded memories are not promotable knowledge.`);
1384
+ pushSkipReason("promote", op.ref, "promote_superseded");
1385
+ continue;
1386
+ }
1387
+ // Parse the source memory up-front so the body/frontmatter checks below
1388
+ // share the same parsed view.
1389
+ const parsedMemory = parseFrontmatter(memoryContent);
1390
+ // Reject sources whose body is too small to make useful knowledge.
1391
+ // Observed failure: memory files whose body is literally a tags string
1392
+ // ("discord,notification,send-notification") get promoted to knowledge
1393
+ // proposals that no reviewer would accept. Threshold is conservative —
1394
+ // 100 chars catches single-line tag dumps without rejecting genuinely
1395
+ // terse but valid notes.
1396
+ const PROMOTE_BODY_MIN_CHARS = 100;
1397
+ const sourceBody = parsedMemory.content.trim();
1398
+ if (sourceBody.length < PROMOTE_BODY_MIN_CHARS) {
1399
+ warnings.push(`Promote: rejected ${op.ref} → ${knowledgeRef} — source memory body is too small (${sourceBody.length} chars; need ≥${PROMOTE_BODY_MIN_CHARS}) to make useful knowledge.`);
1400
+ pushSkipReason("promote", op.ref, "promote_source_too_small");
1401
+ continue;
1402
+ }
1403
+ // Cross-run + within-run content dedup: if an identical body already
1404
+ // exists in ANY pending consolidate proposal (regardless of target ref),
1405
+ // skip. This prevents duplicate proposals when:
1406
+ // (a) Multiple source memories have identical bodies but differ only
1407
+ // in noise frontmatter (`inferenceProcessed: true` twin alongside
1408
+ // the original; differing `updated:` timestamps; etc.) — the body
1409
+ // is the load-bearing content, so dedup must hash on body only.
1410
+ // (b) A prior run created a proposal for the same body under a
1411
+ // different knowledgeRef slug.
1412
+ const bodyHash = createHash("sha256").update(sourceBody, "utf8").digest("hex");
1413
+ const allPendingConsolidateProposals = listProposals(stashDir, { status: "pending" }).filter((p) => p.source === "consolidate");
1414
+ const contentDupProposal = allPendingConsolidateProposals.find((p) => {
1415
+ const otherBody = parseFrontmatter(p.payload.content).content.trim();
1416
+ return createHash("sha256").update(otherBody, "utf8").digest("hex") === bodyHash;
1417
+ });
1418
+ if (contentDupProposal) {
1419
+ warnings.push(`Skipping promote: identical body already pending as proposal ${contentDupProposal.id} (ref: ${contentDupProposal.ref}); skipping duplicate for ${op.ref} → ${knowledgeRef}`);
1420
+ pushSkipReason("promote", op.ref, "dedup_pending_proposal");
1421
+ continue;
1422
+ }
1423
+ try {
1424
+ // Use LLM-provided description; fall back to memory's own description
1425
+ // (post-sanitization frontmatter is authoritative).
1426
+ const description = (typeof op.description === "string" && op.description.trim()
1427
+ ? op.description.trim()
1428
+ : parsedMemory.data?.description?.trim()) ?? "";
1429
+ // Validate the resolved frontmatter before emitting a proposal.
1430
+ // Required field: non-empty description. Reject obvious truncation
1431
+ // markers (description ends with `,`/`;`/`:`/`...`/hanging connector)
1432
+ // so the queue never sees half-formed metadata that the reviewer
1433
+ // would only reject.
1434
+ const fmCheck = validateProposalFrontmatter({ description });
1435
+ if (!fmCheck.ok) {
1436
+ warnings.push(`Promote: rejected ${op.ref} → ${knowledgeRef} — ${fmCheck.reason}.`);
1437
+ pushSkipReason("promote", op.ref, "promote_invalid_frontmatter");
1438
+ continue;
1439
+ }
1440
+ // Merge `description` INTO the body's YAML frontmatter so it lands in
1441
+ // the on-disk asset when the proposal is accepted. The descriptionQuality
1442
+ // validator parses `payload.content` body (not the envelope
1443
+ // `payload.frontmatter`), and a memory's native frontmatter has
1444
+ // `captureMode`/`beliefState`/etc. but never `description` — without
1445
+ // this merge, 60+ pending proposals were blocked at accept-time with
1446
+ // MISSING_FRONTMATTER_DESCRIPTION even though the envelope had it.
1447
+ // (The body-frontmatter assumption baked into the 2026-05-20 comment
1448
+ // below was wrong: body fm and envelope fm only converge when the
1449
+ // writer explicitly merges them, which it now does.)
1450
+ const mergedBodyFm = {
1451
+ ...(parsedMemory.data ?? {}),
1452
+ description,
1453
+ };
1454
+ const serializedMergedFm = yamlStringify(mergedBodyFm).trimEnd();
1455
+ const proposalContent = assembleAssetFromString(serializedMergedFm, parsedMemory.content);
1456
+ // Pre-emit dedup against pending consolidate proposals from the
1457
+ // same improve run (slug-variant match). The cross-run content-hash
1458
+ // dedup inside `mergePlans` handles duplicates against existing
1459
+ // stash assets — see commit history for the deletion of the
1460
+ // unbounded embedding + cross-type slug branches.
1461
+ const dedup = await checkPreEmitDedup({
1462
+ candidateRef: knowledgeRef,
1463
+ candidateText: `${description}. ${memoryContent}`,
1464
+ stashDir,
1465
+ config,
1466
+ });
1467
+ if (dedup.duplicate) {
1468
+ warnings.push(`Promote: skipped ${op.ref} → ${knowledgeRef} — ${dedup.reason}.`);
1469
+ pushSkipReason("promote", op.ref, "promote_dedup_window");
1470
+ continue;
1471
+ }
1472
+ const proposalResult = createProposal(stashDir, {
1473
+ ref: knowledgeRef,
1474
+ source: "consolidate",
1475
+ payload: {
1476
+ content: proposalContent,
1477
+ frontmatter: { description },
1478
+ },
1479
+ ...(typeof op.confidence === "number" ? { confidence: op.confidence } : {}),
1480
+ });
1481
+ if (isProposalSkipped(proposalResult)) {
1482
+ warnings.push(`Promote: skipped proposal for ${op.ref} (${proposalResult.reason}): ${proposalResult.message}`);
1483
+ pushSkipReason("promote", op.ref, `promote_proposal_${proposalResult.reason}`);
1484
+ }
1485
+ else {
1486
+ promoted.push(proposalResult.id);
1487
+ promotedSourceRefs.add(op.ref);
1488
+ markJournalCompleted(stashDir, op.ref);
1489
+ }
1490
+ }
1491
+ catch (e) {
1492
+ warnings.push(`Promote: createProposal failed for ${op.ref}: ${String(e)}`);
1493
+ pushSkipReason("promote", op.ref, "promote_create_failed");
1494
+ }
1495
+ }
1496
+ else if (op.op === "contradict") {
1497
+ // C-3 / #382: Write contradictedBy edges so resolveFamilyContradictions
1498
+ // (the SCC resolver in memory-improve.ts) has edges to work on.
1499
+ // Zep arXiv:2501.13956 §3 — unified belief-revision with contradiction edges.
1500
+ const entry = memoryByRef.get(op.ref);
1501
+ const contradictorEntry = memoryByRef.get(op.contradictedByRef);
1502
+ if (!entry) {
1503
+ warnings.push(`Contradict: ${op.ref} not found in loaded memories — skipping.`);
1504
+ // Phantom ref: not in processed, so no skipReason (same rationale as
1505
+ // delete_ref_missing).
1506
+ continue;
1507
+ }
1508
+ if (!contradictorEntry) {
1509
+ warnings.push(`Contradict: ${op.contradictedByRef} not found — skipping.`);
1510
+ // op.ref IS in the batch (entry found above) so the skipReason is
1511
+ // correctly charged against a real processed memory.
1512
+ pushSkipReason("contradict", op.ref, "contradict_target_missing");
1513
+ continue;
1514
+ }
1515
+ try {
1516
+ // Write the contradiction edge: op.ref is contradicted by op.contradictedByRef
1517
+ writeContradictEdge(entry.filePath, op.contradictedByRef);
1518
+ contradicted++;
1519
+ markJournalCompleted(stashDir, op.ref);
1520
+ }
1521
+ catch (e) {
1522
+ warnings.push(`Contradict: failed to write edge for ${op.ref}: ${String(e)}`);
1523
+ pushSkipReason("contradict", op.ref, "contradict_write_failed");
1524
+ }
1525
+ }
1526
+ }
1527
+ cleanupJournal(stashDir, timestamp);
1528
+ // TTL cleanup: remove archive entries older than archiveRetentionDays (default 90).
1529
+ // C-5 / #391: emit an `archive_cleanup` event before each deletion so the
1530
+ // audit trail records what was lost. Outbox pattern (EIP, Hohpe-Woolf) —
1531
+ // any event that is recorded must be queryable; silent deletes are an anti-pattern.
1532
+ const archiveDir = path.join(stashDir, ".akm", "archive");
1533
+ if (fs.existsSync(archiveDir)) {
1534
+ const retentionMs = (config.archiveRetentionDays ?? 90) * 86_400_000;
1535
+ const cutoff = Date.now() - retentionMs;
1536
+ for (const fname of fs.readdirSync(archiveDir)) {
1537
+ const fp = path.join(archiveDir, fname);
1538
+ try {
1539
+ const stat = fs.statSync(fp);
1540
+ if (stat.mtimeMs < cutoff) {
1541
+ // Emit event before deletion so the record survives the purge.
1542
+ appendEvent({
1543
+ eventType: "archive_cleanup",
1544
+ metadata: {
1545
+ file: fname,
1546
+ filePath: fp,
1547
+ ageMs: Date.now() - stat.mtimeMs,
1548
+ retentionMs,
1549
+ },
1550
+ });
1551
+ fs.unlinkSync(fp);
1552
+ }
1553
+ }
1554
+ catch {
1555
+ /* ignore race conditions */
1556
+ }
1557
+ }
1558
+ }
1559
+ return {
1560
+ schemaVersion: 1,
1561
+ ok: true,
1562
+ shape: "consolidate-result",
1563
+ dryRun: false,
1564
+ previewOnly: false,
1565
+ target: sourceName,
1566
+ processed: memories.length,
1567
+ merged,
1568
+ deleted,
1569
+ promoted,
1570
+ contradicted,
1571
+ failedChunks: totalChunksFailed,
1572
+ totalChunks: chunks.length,
1573
+ judgedNoAction,
1574
+ skipReasons,
1575
+ mergedSecondaries,
1576
+ failedChunkMemories,
1577
+ warnings,
1578
+ durationMs: Date.now() - startMs,
1579
+ };
1580
+ }
1581
+ // ── Helpers ─────────────────────────────────────────────────────────────────
1582
+ // ── LLM-output sanitization ─────────────────────────────────────────────────
1583
+ //
1584
+ // Three classes of LLM defect have been observed across hundreds of
1585
+ // consolidate proposals (see audit notes in this branch):
1586
+ //
1587
+ // 1. Code-fence leakage: the entire merged asset is wrapped in
1588
+ // ```markdown … ``` (or ```yaml … ```) despite the prompt forbidding
1589
+ // fences. The post-processor used to pass this through verbatim, so the
1590
+ // first character of the asset content became a backtick rather than
1591
+ // `---`, defeating the frontmatter parser.
1592
+ // 2. YAML quote-escaping bugs: descriptions like `'"Specialty intro...:`
1593
+ // with unbalanced quotes that break the YAML reader. The post-processor
1594
+ // historically passed the LLM's raw scalar straight into a manually
1595
+ // assembled `description: <raw>` line.
1596
+ // 3. Truncated descriptions hitting token cutoffs — the model's max_tokens
1597
+ // runs out mid-sentence, leaving things like
1598
+ // `description: "Tables in narrow column containers need max-width:100% +"`
1599
+ // with no closing context.
1600
+ //
1601
+ // `sanitizeMergedContent` and `validateProposalFrontmatter` defend against
1602
+ // all three at the point where LLM output is consumed.
1603
+ /**
1604
+ * Attempt to recover a frontmatter block that is missing its closing `---`.
1605
+ *
1606
+ * Scans lines after the opening `---` for the first blank line or the first
1607
+ * line that cannot be a YAML scalar (i.e. not a key-value, indented
1608
+ * continuation, comment, or list item). Injects `---` before that line so
1609
+ * the normal parser can proceed.
1610
+ *
1611
+ * Returns the patched string on success, or `null` if the structure is too
1612
+ * ambiguous to recover safely (e.g. no opening `---`, or no body content
1613
+ * found after the frontmatter key-value lines).
1614
+ */
1615
+ function recoverMalformedFrontmatter(raw) {
1616
+ if (!raw.startsWith("---"))
1617
+ return null;
1618
+ const lines = raw.split(/\r?\n/);
1619
+ // Skip the opening `---` line (index 0).
1620
+ let insertAt = -1;
1621
+ for (let i = 1; i < lines.length; i++) {
1622
+ const line = lines[i];
1623
+ // A blank line marks the end of the frontmatter block in many YAML variants.
1624
+ if (line.trim() === "") {
1625
+ insertAt = i;
1626
+ break;
1627
+ }
1628
+ // A line that is clearly body content: doesn't look like a YAML key, an
1629
+ // indented continuation, a comment, or a sequence item.
1630
+ const isYaml = /^\w[\w-]*\s*:/.test(line) || // key: value
1631
+ /^\s+\S/.test(line) || // indented continuation / nested
1632
+ /^\s*#/.test(line) || // YAML comment
1633
+ /^\s*-\s/.test(line); // sequence item
1634
+ if (!isYaml) {
1635
+ insertAt = i;
1636
+ break;
1637
+ }
1638
+ }
1639
+ if (insertAt < 0)
1640
+ return null;
1641
+ const result = [...lines.slice(0, insertAt), "---", ...lines.slice(insertAt)].join("\n");
1642
+ return result;
1643
+ }
1644
+ /**
1645
+ * Outer-fence stripper specific to consolidate. Unlike the shared
1646
+ * `stripMarkdownFences` helper (which only handles markdown fences), this
1647
+ * variant additionally recognises `yaml` and bare-language fences and refuses
1648
+ * to strip an unbalanced fence — i.e. a leading ``` with no trailing ``` is
1649
+ * treated as a malformed response, not partially sanitized.
1650
+ *
1651
+ * Returns `null` when only one half of a fence pair is present (caller
1652
+ * should reject the response entirely).
1653
+ */
1654
+ export function stripOuterCodeFence(raw) {
1655
+ const trimmed = raw.trim();
1656
+ const leading = trimmed.match(/^```(?:markdown|md|yaml|yml)?\s*\r?\n/i);
1657
+ const trailing = trimmed.match(/\r?\n```\s*$/);
1658
+ if (!leading && !trailing)
1659
+ return { content: trimmed, stripped: false };
1660
+ if (!leading || !trailing)
1661
+ return null; // unbalanced — refuse
1662
+ const inner = trimmed.slice(leading[0].length, trimmed.length - trailing[0].length).trim();
1663
+ return { content: inner, stripped: true };
1664
+ }
1665
+ export function sanitizeMergedContent(raw) {
1666
+ // Step 1: Strip outer code fence.
1667
+ // Recovery path: if only the leading fence is present, strip it and continue
1668
+ // provided the inner content starts with `---`. Trailing-only fences are NOT
1669
+ // recovered — a trailing ``` is more likely a body code block than a forgotten
1670
+ // wrapper, so recovering would silently corrupt the body.
1671
+ let body;
1672
+ {
1673
+ const fenceResult = stripOuterCodeFence(raw);
1674
+ if (fenceResult) {
1675
+ body = fenceResult.content;
1676
+ }
1677
+ else {
1678
+ const trimmed = raw.trim();
1679
+ const leadingMatch = trimmed.match(/^```(?:markdown|md|yaml|yml)?\s*\r?\n([\s\S]*)$/i);
1680
+ const inner = leadingMatch ? leadingMatch[1].trim() : null;
1681
+ if (!inner?.startsWith("---")) {
1682
+ return { ok: false, reason: "UNBALANCED_CODE_FENCE" };
1683
+ }
1684
+ body = inner;
1685
+ }
1686
+ }
1687
+ // Strip <think> blocks (some local models still emit them despite system prompts).
1688
+ body = body.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
1689
+ // Step 2: Verify frontmatter sentinel.
1690
+ // Recovery path: LLM sometimes emits 1-2 lines of preamble (e.g. "Here is the
1691
+ // merged content:") before the `---`. Accept if `---` appears within 300 chars.
1692
+ // Beyond that it's more likely a body section divider, not a frontmatter start.
1693
+ if (!body.startsWith("---")) {
1694
+ const nlIdx = body.indexOf("\n---");
1695
+ if (nlIdx >= 0 && nlIdx < 300) {
1696
+ body = body.slice(nlIdx + 1);
1697
+ }
1698
+ else {
1699
+ return { ok: false, reason: "MISSING_FRONTMATTER_SENTINEL" };
1700
+ }
1701
+ }
1702
+ // Extract frontmatter block.
1703
+ // Recovery path: LLM sometimes omits the closing `---` delimiter. Detect this
1704
+ // by scanning lines after the opening `---` for the first blank line or the
1705
+ // first line that isn't a YAML key-value pair, then inject `---` there.
1706
+ let match = body.match(/^---\r?\n([\s\S]*?)\r?\n---(?:\r\n|\r|\n|$)([\s\S]*)$/);
1707
+ if (!match) {
1708
+ const recovered = recoverMalformedFrontmatter(body);
1709
+ if (recovered) {
1710
+ match = recovered.match(/^---\r?\n([\s\S]*?)\r?\n---(?:\r\n|\r|\n|$)([\s\S]*)$/);
1711
+ }
1712
+ if (!match) {
1713
+ return { ok: false, reason: "MALFORMED_FRONTMATTER_BLOCK" };
1714
+ }
1715
+ }
1716
+ // Re-parse via the yaml library so any quote-escaping mistakes either get
1717
+ // normalised or surface as a parse error we can reject.
1718
+ // Recovery: if the strict yaml library fails, fall back to the lenient
1719
+ // hand-rolled parseFrontmatter parser, which tolerates common LLM YAML
1720
+ // quirks (unescaped special chars, bare scalars, etc.). If it recovers
1721
+ // at least one key, proceed — yamlStringify below will re-serialize
1722
+ // cleanly. Only reject if both parsers fail to extract any data.
1723
+ let parsedFm;
1724
+ try {
1725
+ parsedFm = yamlParse(match[1]);
1726
+ }
1727
+ catch (e) {
1728
+ const fallback = parseFrontmatter(`---\n${match[1]}\n---\n${match[2]}`);
1729
+ if (fallback.frontmatter !== null && Object.keys(fallback.data).length > 0) {
1730
+ parsedFm = fallback.data;
1731
+ }
1732
+ else {
1733
+ return { ok: false, reason: `INVALID_YAML: ${e instanceof Error ? e.message : String(e)}` };
1734
+ }
1735
+ }
1736
+ if (parsedFm === null || typeof parsedFm !== "object" || Array.isArray(parsedFm)) {
1737
+ return { ok: false, reason: "FRONTMATTER_NOT_OBJECT" };
1738
+ }
1739
+ const fm = parsedFm;
1740
+ // Normalise placeholder leaks like `updated: today`, `updated: {today: null}`,
1741
+ // `updated: now`, etc. The consolidate prompt instructs the LLM not to emit
1742
+ // these, but small models still do. Replace any such leak with today's ISO
1743
+ // date OR drop the field if we can't safely normalise it.
1744
+ normalizeUpdatedField(fm);
1745
+ // Re-serialise via yaml.stringify to fix any quoting quirks.
1746
+ let serialized;
1747
+ try {
1748
+ serialized = yamlStringify(fm).trimEnd();
1749
+ }
1750
+ catch (e) {
1751
+ return { ok: false, reason: `YAML_STRINGIFY_FAILED: ${e instanceof Error ? e.message : String(e)}` };
1752
+ }
1753
+ const cleaned = assembleAssetFromString(serialized, match[2]);
1754
+ return { ok: true, result: { content: cleaned, frontmatter: fm } };
1755
+ }
1756
+ /**
1757
+ * Mutate `fm.updated` in place to normalise placeholder leaks emitted by the
1758
+ * LLM. The consolidate prompt forbids these, but small models still produce
1759
+ * literal `today` / `{today: null}` / `now` values.
1760
+ *
1761
+ * Rules:
1762
+ * - A real ISO-style date string (YYYY-MM-DD, optionally with time) stays as-is.
1763
+ * - A Date object (some YAML parsers materialise dates) is converted to its
1764
+ * ISO yyyy-mm-dd form.
1765
+ * - A placeholder string ("today", "now", "{today}", "${today}", template
1766
+ * variables) is replaced with today's ISO date.
1767
+ * - A map/object (e.g. `{today: null}`) is replaced with today's ISO date.
1768
+ * - `null`, empty string, missing → left alone (no field added; reviewers
1769
+ * should not silently gain metadata they didn't write).
1770
+ *
1771
+ * Exported for unit testing.
1772
+ */
1773
+ export function normalizeUpdatedField(fm) {
1774
+ if (!("updated" in fm))
1775
+ return;
1776
+ const v = fm.updated;
1777
+ if (v === null || v === undefined || v === "")
1778
+ return;
1779
+ const todayIso = new Date().toISOString().slice(0, 10);
1780
+ if (v instanceof Date) {
1781
+ fm.updated = v.toISOString().slice(0, 10);
1782
+ return;
1783
+ }
1784
+ if (typeof v === "string") {
1785
+ const trimmed = v.trim().toLowerCase();
1786
+ if (/^\d{4}-\d{2}-\d{2}/.test(v.trim()))
1787
+ return; // already a real date
1788
+ if (trimmed === "today" ||
1789
+ trimmed === "now" ||
1790
+ trimmed === "{today}" ||
1791
+ // biome-ignore lint/suspicious/noTemplateCurlyInString: matches the literal user-typed placeholder text "${today}" so we can normalize it to today's ISO date
1792
+ trimmed === "${today}" ||
1793
+ trimmed === "{{today}}" ||
1794
+ /^\{?\s*today\s*\}?$/.test(trimmed)) {
1795
+ fm.updated = todayIso;
1796
+ return;
1797
+ }
1798
+ // Unknown string format — leave alone so it's visible in the diff.
1799
+ return;
1800
+ }
1801
+ if (typeof v === "object") {
1802
+ // Maps like `{today: null}`, `{now: null}` — clearly a template leak.
1803
+ fm.updated = todayIso;
1804
+ return;
1805
+ }
1806
+ }
1807
+ /**
1808
+ * Normalise a knowledge slug for variant-aware deduplication. Collapses:
1809
+ * - date suffixes (`-may-2026`, `-2026-05-03`, `-2026`)
1810
+ * - numeric counter suffixes (`-2`, `-3`)
1811
+ * - trailing -patterns / -2026-05-03 styles
1812
+ * - word reorderings via alphabetical sort of the remaining tokens.
1813
+ *
1814
+ * Two slugs that normalise to the same string are considered the same asset
1815
+ * for dedup purposes even if they don't share an exact ref.
1816
+ */
1817
+ export function normalizeSlugForDedup(ref) {
1818
+ const slug = ref.replace(/^[^:]+:/, "");
1819
+ const monthRe = /(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i;
1820
+ const tokens = slug
1821
+ .toLowerCase()
1822
+ .split("-")
1823
+ .filter((tok) => tok.length > 0)
1824
+ // Strip purely-numeric tokens (years, dates, counter suffixes like -2 / -3).
1825
+ // Numbers carry no semantic information for our dedup purposes — every
1826
+ // observed defective slug variant differs only in dates or counters.
1827
+ .filter((tok) => !/^\d+$/.test(tok))
1828
+ .filter((tok) => !monthRe.test(tok));
1829
+ // Sort to absorb word reorderings.
1830
+ tokens.sort();
1831
+ return tokens.join("-");
1832
+ }
1833
+ /**
1834
+ * Pre-emit dedup check: compare the candidate ref against pending consolidate
1835
+ * proposals only. Returns a reason string if a slug-variant match is found,
1836
+ * else null.
1837
+ *
1838
+ * Historical context (REMOVED 2026-05-20): this function previously also ran
1839
+ * (a) a normalised-slug match against existing knowledge AND memory entries
1840
+ * in the DB, and
1841
+ * (b) an embedding cosine-similarity check (>= 0.85) against ALL knowledge
1842
+ * and non-derived memory entries.
1843
+ * Both branches had ZERO observed fires across 30 sampled runs in the
1844
+ * post-fix window. The 29 actual dedup catches all came from the SEPARATE
1845
+ * content-hash dedup inside `mergePlans` (the older SHA-256 helper). The
1846
+ * embedding branch in particular had unbounded cost per promote (embedded
1847
+ * every knowledge + non-derived memory entry, every time) with no observed
1848
+ * benefit. Empirical signal → deleted.
1849
+ *
1850
+ * What remains: a check against pending consolidate proposals in the SAME
1851
+ * improve run. This catches duplicates queued back-to-back within a single
1852
+ * improve invocation — a different concern from the cross-run content-hash
1853
+ * dedup, and cheap (no embeddings, no DB query).
1854
+ */
1855
+ export async function checkPreEmitDedup(opts) {
1856
+ const normCandidate = normalizeSlugForDedup(opts.candidateRef);
1857
+ // Pending consolidate proposals (slug match) — within the same improve run.
1858
+ const pendingConsolidate = listProposals(opts.stashDir, { status: "pending" }).filter((p) => p.source === "consolidate");
1859
+ for (const p of pendingConsolidate) {
1860
+ if (normalizeSlugForDedup(p.ref) === normCandidate) {
1861
+ return { duplicate: true, reason: `slug-variant of pending proposal ${p.id} (${p.ref})` };
1862
+ }
1863
+ }
1864
+ return { duplicate: false };
1865
+ }
1866
+ /**
1867
+ * Incremental candidate set: {changed} ∪ {top-k persisted-vector neighbours of
1868
+ * each changed memory}, intersected with the loaded pool. Returns [] when
1869
+ * nothing changed (caller emits a no-op envelope), the full pool when
1870
+ * everything changed or the index can't answer (fail-open to preserve merge
1871
+ * correctness). `since` is an ISO timestamp.
1872
+ */
1873
+ export function narrowToIncrementalCandidates(memories, since, warnings) {
1874
+ const isChanged = (m) => {
1875
+ try {
1876
+ return fs.statSync(m.filePath).mtime.toISOString() > since;
1877
+ }
1878
+ catch {
1879
+ return true; // never silently drop a memory we cannot stat
1880
+ }
1881
+ };
1882
+ const changed = memories.filter(isChanged);
1883
+ if (changed.length === 0)
1884
+ return [];
1885
+ if (changed.length === memories.length)
1886
+ return memories;
1887
+ const NEIGHBORS_PER_CHANGED = 5;
1888
+ const byName = new Map(memories.map((m) => [m.name, m]));
1889
+ const keep = new Set(changed.map((m) => m.name));
1890
+ let db;
1891
+ try {
1892
+ db = openExistingDatabase();
1893
+ for (const m of changed) {
1894
+ const id = findEntryIdByRef(db, `memory:${m.name}`);
1895
+ if (id === undefined)
1896
+ continue;
1897
+ for (const hit of getNeighborsByEntryId(db, id, NEIGHBORS_PER_CHANGED + 1)) {
1898
+ if (hit.id === id)
1899
+ continue;
1900
+ const entry = getEntryById(db, hit.id);
1901
+ if (!entry)
1902
+ continue;
1903
+ const name = entry.entry.name;
1904
+ if (byName.has(name))
1905
+ keep.add(name); // only neighbours present in the loaded pool
1906
+ }
1907
+ }
1908
+ }
1909
+ catch {
1910
+ warnings.push("Incremental consolidation: index unavailable — processing full pool.");
1911
+ return memories;
1912
+ }
1913
+ finally {
1914
+ if (db)
1915
+ closeDatabase(db);
1916
+ }
1917
+ const candidates = memories.filter((m) => keep.has(m.name));
1918
+ warnings.push(`Incremental consolidation: ${changed.length} changed + neighbours → ${candidates.length}/${memories.length} memories considered (since ${since}).`);
1919
+ return candidates;
1920
+ }
1921
+ function loadMemoriesForSource(source, stashDir, warnings) {
1922
+ // Load from DB first
1923
+ let memories = [];
1924
+ let db;
1925
+ try {
1926
+ db = openExistingDatabase();
1927
+ const entries = getAllEntries(db, "memory");
1928
+ memories = entries
1929
+ .filter((e) => {
1930
+ if (!source)
1931
+ return true;
1932
+ return path.resolve(e.stashDir) === path.resolve(source);
1933
+ })
1934
+ .filter((e) => isConsolidationEligibleMemoryName(e.entry.name))
1935
+ // Skip stale DB entries whose file was deleted by a prior run but not yet
1936
+ // re-indexed. Without this guard the deleted file's ref appears in chunks
1937
+ // sent to the LLM, which then proposes a second delete → delete_failed
1938
+ // because the file is already gone. Re-indexing runs on a cron cadence so
1939
+ // several successful deletes can accumulate before the DB catches up.
1940
+ .filter((e) => fs.existsSync(e.filePath))
1941
+ .map((e) => ({
1942
+ name: e.entry.name,
1943
+ filePath: e.filePath,
1944
+ description: e.entry.description ?? "",
1945
+ tags: e.entry.tags ?? [],
1946
+ stashDir: e.stashDir,
1947
+ }));
1948
+ }
1949
+ catch {
1950
+ memories = [];
1951
+ }
1952
+ finally {
1953
+ if (db)
1954
+ closeDatabase(db);
1955
+ }
1956
+ if (memories.length === 0) {
1957
+ // DB fallback: walk filesystem
1958
+ const memoriesDir = path.join(source ?? stashDir, "memories");
1959
+ const fsStashDir = source ?? stashDir;
1960
+ if (fs.existsSync(memoriesDir)) {
1961
+ for (const fname of fs.readdirSync(memoriesDir)) {
1962
+ if (!fname.endsWith(".md"))
1963
+ continue;
1964
+ const filePath = path.join(memoriesDir, fname);
1965
+ const name = fname.replace(/\.md$/, "");
1966
+ if (!isConsolidationEligibleMemoryName(name))
1967
+ continue;
1968
+ memories.push({ name, filePath, description: "", tags: [], stashDir: fsStashDir });
1969
+ }
1970
+ }
1971
+ if (memories.length > 0) {
1972
+ warnings.push("DB not found or empty — loaded memories directly from filesystem.");
1973
+ }
1974
+ }
1975
+ return memories;
1976
+ }
1977
+ async function generateMergedContent(config, primaryRef, primaryBody, secondaryRefs, memoryByRef) {
1978
+ // Only handle single-secondary merges per design (one call per merge op)
1979
+ const secRef = secondaryRefs[0];
1980
+ const secEntry = memoryByRef.get(secRef);
1981
+ if (!secEntry)
1982
+ return { error: "merge_read_failed", detail: `secondary ${secRef} not in memoryByRef` };
1983
+ let secBody = "";
1984
+ try {
1985
+ secBody = fs.readFileSync(secEntry.filePath, "utf8");
1986
+ }
1987
+ catch {
1988
+ return { error: "merge_read_failed", detail: `could not read secondary ${secRef}` };
1989
+ }
1990
+ const primaryFmKeys = Object.keys(parseFrontmatter(primaryBody).data);
1991
+ const secFmKeys = Object.keys(parseFrontmatter(secBody).data);
1992
+ const requiredFmKeys = [...new Set([...primaryFmKeys, ...secFmKeys])];
1993
+ const prompt = [
1994
+ "Merge these two memory assets into one. Output ONLY the merged markdown (with YAML frontmatter). Do not explain, do not use code fences.",
1995
+ "",
1996
+ "## OUTPUT FORMAT (MANDATORY)",
1997
+ "Return raw markdown content beginning DIRECTLY with the `---` frontmatter delimiter.",
1998
+ "DO NOT wrap your entire response in a code fence.",
1999
+ "",
2000
+ 'GOOD: "---\\ndescription: ...\\n---\\nBody content."',
2001
+ 'BAD: "```markdown\\n---\\ndescription: ...\\n---\\nBody content.\\n```"',
2002
+ 'BAD: "```yaml\\n---\\ndescription: ...\\n---\\nBody content.\\n```"',
2003
+ "",
2004
+ "## FRONTMATTER RULES (MANDATORY)",
2005
+ "- The `updated:` field, if present, MUST be a real ISO date (e.g. `updated: 2026-05-20`). NEVER emit `updated: today`, `updated: now`, or `updated: {today: null}`. If you don't have a real date, OMIT the field — the post-processor will not invent one.",
2006
+ requiredFmKeys.length > 0
2007
+ ? `- CRITICAL: The merged frontmatter MUST include ALL of these keys from both source memories: ${requiredFmKeys.join(", ")}. Do NOT drop any of them.`
2008
+ : null,
2009
+ "",
2010
+ `=== Primary memory (${primaryRef}) ===`,
2011
+ primaryBody,
2012
+ "",
2013
+ `=== Secondary memory (${secRef}) ===`,
2014
+ secBody,
2015
+ ]
2016
+ .filter((line) => line !== null)
2017
+ .join("\n");
2018
+ // Use the same per-process profile resolution as the chunk-plan call above
2019
+ // so the merge generation step doesn't silently revert to the default LLM.
2020
+ const llmConfig = resolveConsolidateLlmConfig(config);
2021
+ const result = await tryLlmFeature("memory_consolidation", config, async () => {
2022
+ if (!llmConfig)
2023
+ return { ok: false, error: "No LLM configured for consolidation" };
2024
+ try {
2025
+ const content = await chatCompletion(llmConfig, [{ role: "user", content: prompt }], {
2026
+ enableThinking: false,
2027
+ });
2028
+ return { ok: true, content };
2029
+ }
2030
+ catch (e) {
2031
+ return { ok: false, error: String(e) };
2032
+ }
2033
+ }, { ok: false, error: `merge content generation failed for ${primaryRef}` });
2034
+ if (!result.ok) {
2035
+ return {
2036
+ error: "merge_transport_failed",
2037
+ detail: result.error ?? `merge content generation failed for ${primaryRef}`,
2038
+ };
2039
+ }
2040
+ // Sanitize LLM output: strip outer code fences (defends against the
2041
+ // ```markdown … ``` leak observed in production), re-serialise frontmatter
2042
+ // through the yaml lib (fixes quote-escaping mistakes), and reject empty
2043
+ // or fence-only responses.
2044
+ const sanitized = sanitizeMergedContent(result.content ?? "");
2045
+ if (!sanitized.ok) {
2046
+ const reason = sanitized.reason;
2047
+ const isFenceError = reason === "UNBALANCED_CODE_FENCE" ||
2048
+ reason === "MISSING_FRONTMATTER_SENTINEL" ||
2049
+ reason === "MALFORMED_FRONTMATTER_BLOCK" ||
2050
+ reason === "FRONTMATTER_NOT_OBJECT";
2051
+ const mergeReason = isFenceError ? "merge_fence_rejected" : "merge_yaml_invalid";
2052
+ return { error: mergeReason, detail: `${primaryRef} — ${reason}` };
2053
+ }
2054
+ const mergedRaw = sanitized.result.content;
2055
+ // C-4 / #383: Content-preservation lint (mem0 §3.2, arXiv:2504.19413).
2056
+ // Guards against LLM-generated merged content that silently drops information
2057
+ // from the source assets. Two checks:
2058
+ // 1. Body size: merged body must be >= 50% of the larger source body.
2059
+ // 2. Frontmatter superset: merged frontmatter must contain all keys present
2060
+ // in both source frontmatters.
2061
+ // Failures return a discriminated error so the call site can emit a specific
2062
+ // skip-reason key in the histogram.
2063
+ try {
2064
+ const primaryFm = parseFrontmatter(primaryBody);
2065
+ const secFm = parseFrontmatter(secBody);
2066
+ const mergedFm = parseFrontmatter(mergedRaw);
2067
+ // Check body size — blended floor: max(ratio × largerLen, absoluteFloor).
2068
+ // Deduplication is expected, so the ratio is lower than the reflect gate
2069
+ // (0.3 vs 0.5). The absolute floor protects very short memory pairs where
2070
+ // the ratio alone would produce a near-zero threshold.
2071
+ const primaryBodyLen = (primaryFm.content ?? "").trim().length;
2072
+ const secBodyLen = (secFm.content ?? "").trim().length;
2073
+ const mergedBodyLen = (mergedFm.content ?? "").trim().length;
2074
+ const largerBodyLen = Math.max(primaryBodyLen, secBodyLen);
2075
+ const mergeFloor = Math.max(MERGE_SHRINK_RATIO_MIN * largerBodyLen, MERGE_ABSOLUTE_FLOOR_CHARS);
2076
+ if (largerBodyLen > 0 && mergedBodyLen < mergeFloor) {
2077
+ return {
2078
+ error: "merge_content_too_short",
2079
+ detail: `${primaryRef} — merged body (${mergedBodyLen} chars) is less than floor (${Math.round(mergeFloor)} chars; max(${MERGE_SHRINK_RATIO_MIN}×${largerBodyLen}, ${MERGE_ABSOLUTE_FLOOR_CHARS}))`,
2080
+ };
2081
+ }
2082
+ // Check frontmatter superset — attempt repair before rejecting.
2083
+ const primaryKeys = Object.keys(primaryFm.data ?? {});
2084
+ const secKeys = Object.keys(secFm.data ?? {});
2085
+ const mergedKeys = new Set(Object.keys(mergedFm.data ?? {}));
2086
+ const missingKeys = [...new Set([...primaryKeys, ...secKeys])].filter((k) => !mergedKeys.has(k));
2087
+ if (missingKeys.length > 0) {
2088
+ // Inject missing keys from source FMs. Primary value wins on conflict.
2089
+ const repairedFmData = { ...mergedFm.data };
2090
+ for (const key of missingKeys) {
2091
+ repairedFmData[key] =
2092
+ key in primaryFm.data
2093
+ ? primaryFm.data[key]
2094
+ : secFm.data[key];
2095
+ }
2096
+ normalizeUpdatedField(repairedFmData);
2097
+ const repairedYaml = yamlStringify(repairedFmData).trimEnd();
2098
+ const bodyPart = mergedFm.content ?? "";
2099
+ return { content: `---\n${repairedYaml}\n---\n${bodyPart}` };
2100
+ }
2101
+ }
2102
+ catch {
2103
+ // parseFrontmatter failures are non-fatal — allow the merge to proceed.
2104
+ }
2105
+ return { content: mergedRaw };
2106
+ }
2107
+ async function promptConfirm(message) {
2108
+ process.stdout.write(message);
2109
+ return new Promise((resolve) => {
2110
+ let settled = false;
2111
+ const done = (answer) => {
2112
+ if (settled)
2113
+ return;
2114
+ settled = true;
2115
+ rl.close();
2116
+ resolve(answer);
2117
+ };
2118
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
2119
+ rl.once("line", (line) => done(line.trim().toLowerCase() === "y"));
2120
+ rl.once("close", () => done(false));
2121
+ });
2122
+ }