akm-cli 0.7.5 → 0.8.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/{.github/CHANGELOG.md → CHANGELOG.md} +113 -2
  2. package/README.md +20 -4
  3. package/SECURITY.md +93 -0
  4. package/dist/cli/config-migrate.js +144 -0
  5. package/dist/cli/config-validate.js +39 -0
  6. package/dist/cli/confirm.js +73 -0
  7. package/dist/cli/parse-args.js +133 -0
  8. package/dist/cli.js +1995 -551
  9. package/dist/commands/agent-dispatch.js +110 -0
  10. package/dist/commands/agent-support.js +68 -0
  11. package/dist/commands/completions.js +3 -0
  12. package/dist/commands/config-cli.js +130 -534
  13. package/dist/commands/consolidate.js +1531 -0
  14. package/dist/commands/curate.js +44 -3
  15. package/dist/commands/db-cli.js +23 -0
  16. package/dist/commands/distill-promotion-policy.js +660 -0
  17. package/dist/commands/distill.js +990 -75
  18. package/dist/commands/eval-cases.js +43 -0
  19. package/dist/commands/events.js +5 -23
  20. package/dist/commands/graph.js +477 -0
  21. package/dist/commands/health.js +400 -0
  22. package/dist/commands/help/help-accept.md +9 -0
  23. package/dist/commands/help/help-improve.md +77 -0
  24. package/dist/commands/help/help-proposals.md +15 -0
  25. package/dist/commands/help/help-propose.md +17 -0
  26. package/dist/commands/help/help-reject.md +8 -0
  27. package/dist/commands/history.js +54 -46
  28. package/dist/commands/improve-profiles.js +146 -0
  29. package/dist/commands/improve-result-file.js +103 -0
  30. package/dist/commands/improve.js +2175 -0
  31. package/dist/commands/info.js +5 -2
  32. package/dist/commands/init.js +50 -2
  33. package/dist/commands/installed-stashes.js +102 -139
  34. package/dist/commands/knowledge.js +136 -0
  35. package/dist/commands/lint/agent-linter.js +49 -0
  36. package/dist/commands/lint/base-linter.js +479 -0
  37. package/dist/commands/lint/command-linter.js +49 -0
  38. package/dist/commands/lint/default-linter.js +16 -0
  39. package/dist/commands/lint/index.js +183 -0
  40. package/dist/commands/lint/knowledge-linter.js +16 -0
  41. package/dist/commands/lint/markdown-insertion.js +343 -0
  42. package/dist/commands/lint/memory-linter.js +61 -0
  43. package/dist/commands/lint/registry.js +36 -0
  44. package/dist/commands/lint/skill-linter.js +45 -0
  45. package/dist/commands/lint/task-linter.js +50 -0
  46. package/dist/commands/lint/types.js +4 -0
  47. package/dist/commands/lint/vault-key-rules.js +139 -0
  48. package/dist/commands/lint/workflow-linter.js +56 -0
  49. package/dist/commands/lint.js +4 -0
  50. package/dist/commands/migration-help.js +5 -2
  51. package/dist/commands/proposal.js +66 -12
  52. package/dist/commands/propose.js +86 -31
  53. package/dist/commands/reflect.js +1119 -73
  54. package/dist/commands/registry-search.js +5 -2
  55. package/dist/commands/remember.js +69 -6
  56. package/dist/commands/schema-repair.js +203 -0
  57. package/dist/commands/search.js +115 -14
  58. package/dist/commands/self-update.js +3 -0
  59. package/dist/commands/show.js +144 -25
  60. package/dist/commands/source-add.js +17 -45
  61. package/dist/commands/source-clone.js +3 -0
  62. package/dist/commands/source-manage.js +14 -19
  63. package/dist/commands/tasks.js +438 -0
  64. package/dist/commands/url-checker.js +42 -0
  65. package/dist/commands/vault.js +130 -77
  66. package/dist/core/action-contributors.js +28 -0
  67. package/dist/core/asset-ref.js +7 -0
  68. package/dist/core/asset-registry.js +7 -16
  69. package/dist/core/asset-serialize.js +88 -0
  70. package/dist/core/asset-spec.js +22 -0
  71. package/dist/core/common.js +157 -0
  72. package/dist/core/concurrent.js +25 -0
  73. package/dist/core/config-io.js +347 -0
  74. package/dist/core/config-migration.js +625 -0
  75. package/dist/core/config-schema.js +501 -0
  76. package/dist/core/config-sources.js +108 -0
  77. package/dist/core/config-types.js +4 -0
  78. package/dist/core/config-walker.js +337 -0
  79. package/dist/core/config.js +327 -987
  80. package/dist/core/errors.js +40 -19
  81. package/dist/core/events.js +91 -138
  82. package/dist/core/file-lock.js +104 -0
  83. package/dist/core/frontmatter.js +3 -6
  84. package/dist/core/lesson-lint.js +3 -0
  85. package/dist/core/markdown.js +20 -0
  86. package/dist/core/memory-belief.js +62 -0
  87. package/dist/core/memory-contradiction-detect.js +274 -0
  88. package/dist/core/memory-improve.js +806 -0
  89. package/dist/core/parse.js +158 -0
  90. package/dist/core/paths.js +326 -14
  91. package/dist/core/proposal-quality-validators.js +364 -0
  92. package/dist/core/proposal-validators.js +69 -0
  93. package/dist/core/proposals.js +498 -42
  94. package/dist/core/state-db.js +927 -0
  95. package/dist/core/text-truncation.js +107 -0
  96. package/dist/core/time.js +54 -0
  97. package/dist/core/warn.js +62 -1
  98. package/dist/core/write-source.js +3 -0
  99. package/dist/indexer/db-backup.js +391 -0
  100. package/dist/indexer/db-search.js +152 -253
  101. package/dist/indexer/db.js +933 -103
  102. package/dist/indexer/ensure-index.js +64 -0
  103. package/dist/indexer/file-context.js +3 -0
  104. package/dist/indexer/graph-boost.js +376 -101
  105. package/dist/indexer/graph-db.js +391 -0
  106. package/dist/indexer/graph-dedup.js +95 -0
  107. package/dist/indexer/graph-extraction.js +550 -124
  108. package/dist/indexer/index-context.js +4 -0
  109. package/dist/indexer/indexer.js +506 -291
  110. package/dist/indexer/llm-cache.js +47 -0
  111. package/dist/indexer/manifest.js +3 -0
  112. package/dist/indexer/matchers.js +148 -160
  113. package/dist/indexer/memory-inference.js +99 -74
  114. package/dist/indexer/metadata-contributors.js +29 -0
  115. package/dist/indexer/metadata.js +255 -196
  116. package/dist/indexer/path-resolver.js +92 -0
  117. package/dist/indexer/project-context.js +192 -0
  118. package/dist/indexer/ranking-contributors.js +331 -0
  119. package/dist/indexer/ranking.js +81 -0
  120. package/dist/indexer/search-fields.js +5 -9
  121. package/dist/indexer/search-hit-enrichers.js +111 -0
  122. package/dist/indexer/search-source.js +44 -10
  123. package/dist/indexer/semantic-status.js +5 -16
  124. package/dist/indexer/staleness-detect.js +447 -0
  125. package/dist/indexer/usage-events.js +12 -9
  126. package/dist/indexer/walker.js +28 -0
  127. package/dist/integrations/agent/builders.js +135 -0
  128. package/dist/integrations/agent/config.js +122 -230
  129. package/dist/integrations/agent/detect.js +3 -0
  130. package/dist/integrations/agent/index.js +7 -13
  131. package/dist/integrations/agent/model-aliases.js +55 -0
  132. package/dist/integrations/agent/profiles.js +70 -5
  133. package/dist/integrations/agent/prompts.js +150 -74
  134. package/dist/integrations/agent/runner.js +151 -0
  135. package/dist/integrations/agent/sdk-runner.js +126 -0
  136. package/dist/integrations/agent/spawn.js +118 -23
  137. package/dist/integrations/github.js +3 -0
  138. package/dist/integrations/lockfile.js +32 -69
  139. package/dist/integrations/session-logs/index.js +68 -0
  140. package/dist/integrations/session-logs/providers/claude-code.js +59 -0
  141. package/dist/integrations/session-logs/providers/opencode.js +55 -0
  142. package/dist/integrations/session-logs/types.js +4 -0
  143. package/dist/llm/call-ai.js +62 -0
  144. package/dist/llm/client.js +72 -124
  145. package/dist/llm/embedder.js +3 -19
  146. package/dist/llm/embedders/cache.js +3 -7
  147. package/dist/llm/embedders/local.js +3 -0
  148. package/dist/llm/embedders/remote.js +20 -8
  149. package/dist/llm/embedders/types.js +3 -7
  150. package/dist/llm/feature-gate.js +89 -48
  151. package/dist/llm/graph-extract.js +676 -70
  152. package/dist/llm/index-passes.js +9 -23
  153. package/dist/llm/memory-infer.js +52 -71
  154. package/dist/llm/metadata-enhance.js +42 -29
  155. package/dist/llm/prompts/graph-extract-user-prompt.md +35 -0
  156. package/dist/output/cli-hints-full.md +281 -0
  157. package/dist/output/cli-hints-short.md +65 -0
  158. package/dist/output/cli-hints.js +5 -318
  159. package/dist/output/context.js +3 -0
  160. package/dist/output/renderers.js +223 -256
  161. package/dist/output/shapes.js +150 -105
  162. package/dist/output/text.js +318 -30
  163. package/dist/registry/build-index.js +3 -0
  164. package/dist/registry/create-provider-registry.js +3 -0
  165. package/dist/registry/factory.js +3 -0
  166. package/dist/registry/origin-resolve.js +3 -0
  167. package/dist/registry/providers/index.js +3 -0
  168. package/dist/registry/providers/skills-sh.js +70 -49
  169. package/dist/registry/providers/static-index.js +53 -48
  170. package/dist/registry/providers/types.js +3 -24
  171. package/dist/registry/resolve.js +11 -16
  172. package/dist/registry/types.js +3 -0
  173. package/dist/scripts/migrate-storage.js +17307 -0
  174. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +8900 -0
  175. package/dist/scripts/migrations/v16-to-v17.js +141 -0
  176. package/dist/setup/detect.js +3 -0
  177. package/dist/setup/ripgrep-install.js +3 -0
  178. package/dist/setup/ripgrep-resolve.js +3 -0
  179. package/dist/setup/setup.js +775 -37
  180. package/dist/setup/steps.js +3 -15
  181. package/dist/sources/include.js +3 -0
  182. package/dist/sources/provider-factory.js +5 -12
  183. package/dist/sources/provider.js +3 -20
  184. package/dist/sources/providers/filesystem.js +19 -23
  185. package/dist/sources/providers/git.js +7 -5
  186. package/dist/sources/providers/index.js +3 -0
  187. package/dist/sources/providers/install-types.js +3 -13
  188. package/dist/sources/providers/npm.js +3 -4
  189. package/dist/sources/providers/provider-utils.js +3 -0
  190. package/dist/sources/providers/sync-from-ref.js +3 -11
  191. package/dist/sources/providers/tar-utils.js +3 -0
  192. package/dist/sources/providers/website.js +18 -22
  193. package/dist/sources/resolve.js +3 -0
  194. package/dist/sources/types.js +3 -0
  195. package/dist/sources/website-ingest.js +7 -0
  196. package/dist/tasks/backends/cron.js +203 -0
  197. package/dist/tasks/backends/exec-utils.js +28 -0
  198. package/dist/tasks/backends/index.js +24 -0
  199. package/dist/tasks/backends/launchd-template.xml +19 -0
  200. package/dist/tasks/backends/launchd.js +187 -0
  201. package/dist/tasks/backends/schtasks-template.xml +29 -0
  202. package/dist/tasks/backends/schtasks.js +215 -0
  203. package/dist/tasks/parser.js +211 -0
  204. package/dist/tasks/resolveAkmBin.js +87 -0
  205. package/dist/tasks/runner.js +458 -0
  206. package/dist/tasks/schedule.js +211 -0
  207. package/dist/tasks/schema.js +15 -0
  208. package/dist/tasks/validator.js +62 -0
  209. package/dist/version.js +3 -0
  210. package/dist/wiki/index-template.md +12 -0
  211. package/dist/wiki/ingest-workflow-template.md +54 -0
  212. package/dist/wiki/log-template.md +8 -0
  213. package/dist/wiki/schema-template.md +61 -0
  214. package/dist/wiki/wiki-templates.js +15 -0
  215. package/dist/wiki/wiki.js +13 -61
  216. package/dist/workflows/authoring.js +8 -25
  217. package/dist/workflows/cli.js +3 -0
  218. package/dist/workflows/db.js +140 -10
  219. package/dist/workflows/document-cache.js +3 -10
  220. package/dist/workflows/parser.js +3 -0
  221. package/dist/workflows/renderer.js +11 -3
  222. package/dist/workflows/runs.js +62 -91
  223. package/dist/workflows/schema.js +3 -0
  224. package/dist/workflows/scope-key.js +3 -0
  225. package/dist/workflows/validator.js +4 -8
  226. package/dist/workflows/workflow-template.md +24 -0
  227. package/docs/README.md +9 -2
  228. package/docs/data-and-telemetry.md +225 -0
  229. package/docs/migration/release-notes/0.7.0.md +1 -1
  230. package/docs/migration/release-notes/0.7.5.md +2 -2
  231. package/docs/migration/release-notes/0.8.0.md +48 -0
  232. package/docs/migration/v0.7-to-v0.8.md +1307 -0
  233. package/package.json +20 -8
  234. package/.github/LICENSE +0 -374
  235. package/dist/commands/install-audit.js +0 -381
  236. package/dist/templates/wiki-templates.js +0 -100
@@ -0,0 +1,1531 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import { createHash } from "node:crypto";
5
+ import fs from "node:fs";
6
+ import path from "node:path";
7
+ import readline from "node:readline";
8
+ import { parse as yamlParse, stringify as yamlStringify } from "yaml";
9
+ import { parseAssetRef } from "../core/asset-ref";
10
+ import { assembleAssetFromString } from "../core/asset-serialize";
11
+ import { resolveStashDir, timestampForFilename } from "../core/common";
12
+ import { getDefaultLlmConfig, loadConfig } from "../core/config";
13
+ import { ConfigError } from "../core/errors";
14
+ import { appendEvent } from "../core/events";
15
+ import { parseFrontmatter } from "../core/frontmatter";
16
+ import { writeContradictEdge } from "../core/memory-belief";
17
+ import { parseEmbeddedJsonResponse } from "../core/parse";
18
+ import { hasHotCaptureMode, hasSupersededStatus, validateProposalFrontmatter, } from "../core/proposal-quality-validators";
19
+ import { createProposal, isProposalSkipped, listProposals } from "../core/proposals";
20
+ import { detectTruncatedDescription } from "../core/text-truncation";
21
+ // Re-export the moved helpers so existing test imports continue to resolve.
22
+ export { hasSupersededStatus, validateProposalFrontmatter };
23
+ import { warn } from "../core/warn";
24
+ import { deleteAssetFromSource, resolveWriteTarget, writeAssetToSource } from "../core/write-source";
25
+ import { closeDatabase, getAllEntries, openExistingDatabase } from "../indexer/db";
26
+ import { chatCompletion } from "../llm/client";
27
+ import { cosineSimilarity, embedBatch } from "../llm/embedder";
28
+ import { isLlmFeatureEnabled, tryLlmFeature } from "../llm/feature-gate";
29
+ // ── Prompts ─────────────────────────────────────────────────────────────────
30
+ const CONSOLIDATE_SYSTEM_PROMPT = `You are the akm consolidate assistant analyzing memory assets.
31
+
32
+ Rules:
33
+ 1. MERGE: Two or more memories are substantially duplicated or closely related → propose merging. Return the primary ref to keep and secondary refs to delete. Do NOT include mergedContent — the merge will be executed in a separate step.
34
+ 2. DELETE: Memory is clearly outdated, contradicted, or redundant → propose deletion.
35
+ 3. PROMOTE: Memory expresses a stable, reusable fact suitable as a \`knowledge:\` asset → propose promotion. Do NOT delete the source memory.
36
+ 4. CONTRADICT: Two memories make mutually exclusive factual claims about the same subject (e.g. "always use VPN" vs "VPN is optional") → mark the older or less authoritative one as contradicted. This writes a contradictedBy edge so the belief-resolution SCC algorithm can resolve the conflict. Do NOT delete contradicted memories — let the belief resolver decide.
37
+ 5. KEEP: Memory is unique and current → omit from output.
38
+
39
+ Return ONLY JSON (no prose, no code fences):
40
+ {
41
+ "operations": [
42
+ { "op": "merge", "primary": "memory:<name>", "secondaries": ["memory:<name>", ...], "mergeStrategy": "synthesize" },
43
+ { "op": "delete", "ref": "memory:<name>", "reason": "<brief reason>" },
44
+ { "op": "promote", "ref": "memory:<name>", "knowledgeRef": "knowledge:<suggested-slug>", "reason": "<brief reason>", "description": "<one sentence describing the new knowledge asset>" },
45
+ { "op": "contradict", "ref": "memory:<name>", "contradictedByRef": "memory:<name>", "reason": "<brief reason>" }
46
+ ],
47
+ "warnings": ["<optional concerns>"]
48
+ }
49
+
50
+ When the merged content includes an \`updated\` frontmatter field, the value MUST be a real ISO date string (e.g. \`updated: 2026-05-20\`). NEVER emit \`updated: today\`, \`updated: {today}\`, \`updated: {today: null}\`, \`updated: now\`, or any other literal placeholder/template-variable. If you do not have a real source-of-truth date, OMIT the \`updated\` field entirely — the post-processor will not invent one for you.`;
51
+ /**
52
+ * JSON Schema for structured consolidate plans (PR 1 of the asset-writers
53
+ * decision — see knowledge:projects/akm/asset-writers-investigation/00-synthesis).
54
+ * Mirrors the {ops[], warnings?[]} shape currently described in
55
+ * CONSOLIDATE_SYSTEM_PROMPT. Providers with `supportsJsonSchema: true` enforce
56
+ * the shape upstream so the chunk-level "invalid plan from AI — skipping"
57
+ * branch in `runConsolidate` becomes unreachable on schema-honouring providers.
58
+ *
59
+ * The four operation variants (merge / delete / promote / contradict) are
60
+ * modeled as a oneOf so a structured-output provider can still tell them apart
61
+ * by the required `op` discriminator. `parseEmbeddedJsonResponse` keeps
62
+ * working as a fallback parser for providers that ignore the schema.
63
+ */
64
+ export const CONSOLIDATE_PLAN_JSON_SCHEMA = {
65
+ type: "object",
66
+ required: ["operations"],
67
+ additionalProperties: false,
68
+ properties: {
69
+ operations: {
70
+ type: "array",
71
+ description: "Ordered list of consolidate operations the planner proposes.",
72
+ items: {
73
+ oneOf: [
74
+ {
75
+ type: "object",
76
+ required: ["op", "primary", "secondaries", "mergeStrategy"],
77
+ additionalProperties: false,
78
+ properties: {
79
+ op: { type: "string", enum: ["merge"] },
80
+ primary: { type: "string", minLength: 1 },
81
+ secondaries: {
82
+ type: "array",
83
+ minItems: 1,
84
+ items: { type: "string", minLength: 1 },
85
+ },
86
+ mergeStrategy: { type: "string", minLength: 1 },
87
+ },
88
+ },
89
+ {
90
+ type: "object",
91
+ required: ["op", "ref", "reason"],
92
+ additionalProperties: false,
93
+ properties: {
94
+ op: { type: "string", enum: ["delete"] },
95
+ ref: { type: "string", minLength: 1 },
96
+ reason: { type: "string", minLength: 1 },
97
+ },
98
+ },
99
+ {
100
+ type: "object",
101
+ required: ["op", "ref", "knowledgeRef", "reason"],
102
+ additionalProperties: false,
103
+ properties: {
104
+ op: { type: "string", enum: ["promote"] },
105
+ ref: { type: "string", minLength: 1 },
106
+ knowledgeRef: { type: "string", minLength: 1 },
107
+ reason: { type: "string", minLength: 1 },
108
+ description: { type: "string" },
109
+ },
110
+ },
111
+ {
112
+ type: "object",
113
+ required: ["op", "ref", "contradictedByRef", "reason"],
114
+ additionalProperties: false,
115
+ properties: {
116
+ op: { type: "string", enum: ["contradict"] },
117
+ ref: { type: "string", minLength: 1 },
118
+ contradictedByRef: { type: "string", minLength: 1 },
119
+ reason: { type: "string", minLength: 1 },
120
+ },
121
+ },
122
+ ],
123
+ },
124
+ },
125
+ warnings: {
126
+ type: "array",
127
+ description: "Optional list of human-readable concerns the planner wants to surface.",
128
+ items: { type: "string" },
129
+ },
130
+ },
131
+ };
132
+ export function isConsolidationEligibleMemoryName(name) {
133
+ return !name.endsWith(".derived");
134
+ }
135
+ /**
136
+ * Returns true when the memory file has `captureMode: hot` in its frontmatter.
137
+ *
138
+ * Hot memories are USER-EXPLICIT (written via `akm remember` on the hot path).
139
+ * The consolidate LLM is forbidden from deleting or auto-merging them — the
140
+ * user wrote them on purpose and only the user can decide to retire them.
141
+ *
142
+ * Reads the file once per check; consolidate runs against ~10 memories per
143
+ * chunk so the IO cost is trivial. Returns false on any read/parse error
144
+ * (fail-safe: an unparseable file is treated as not-hot, but the broader
145
+ * consolidate flow already guards against unparseable memories elsewhere).
146
+ *
147
+ * Defends against four observed defect classes (see
148
+ * `memory:akm-improve-critical-review-2026-05-20`):
149
+ * - LLM marks a memory contradicted then deletes (dangling contradictedBy)
150
+ * - LLM merges two unrelated memories sharing a topic keyword
151
+ * - LLM judges a recent durable design memo as "redundant"
152
+ * - Cascade deletes (LLM uses ref:X as `contradictedBy` for ref:Y then deletes both)
153
+ */
154
+ export function isHotCapturedMemory(filePath) {
155
+ try {
156
+ if (!fs.existsSync(filePath))
157
+ return false;
158
+ const content = fs.readFileSync(filePath, "utf8");
159
+ const parsed = parseFrontmatter(content);
160
+ return hasHotCaptureMode(parsed.data);
161
+ }
162
+ catch {
163
+ return false;
164
+ }
165
+ }
166
+ export function consolidateGuardStatus(filePath) {
167
+ if (!fs.existsSync(filePath))
168
+ return "missing";
169
+ let content;
170
+ try {
171
+ content = fs.readFileSync(filePath, "utf8");
172
+ }
173
+ catch {
174
+ return "unparseable";
175
+ }
176
+ let parsed;
177
+ try {
178
+ parsed = parseFrontmatter(content);
179
+ }
180
+ catch {
181
+ return "unparseable";
182
+ }
183
+ const data = parsed.data;
184
+ if (!data || Object.keys(data).length === 0)
185
+ return "unparseable";
186
+ return hasHotCaptureMode(data) ? "hot" : "safe";
187
+ }
188
+ // ── Chunk sizing ─────────────────────────────────────────────────────────────
189
+ /**
190
+ * Conservative chars-per-token estimate used when computing prompt budgets.
191
+ * English text averages roughly 4 chars/token for most LLM tokenizers. We use
192
+ * 3 to stay conservative (shorter tokens = more tokens per char).
193
+ */
194
+ const CHARS_PER_TOKEN = 3;
195
+ /**
196
+ * Overhead budget reserved for the system prompt, chunk header lines, and per-
197
+ * memory metadata lines (name, description, tags, separator). Measured at
198
+ * roughly 600 chars for the system prompt + ~100 chars of header + ~50 chars
199
+ * per memory × chunk size. We round up to 2 000 tokens to leave room for the
200
+ * model's own output.
201
+ */
202
+ const PROMPT_OVERHEAD_TOKENS = 2_000;
203
+ /**
204
+ * Default effective token budget used when the default LLM profile's
205
+ * `contextLength` is not set. This is intentionally conservative (4 096)
206
+ * rather than being set to the model's actual context window, because:
207
+ *
208
+ * - When the agent path is used, the agent CLI (e.g. opencode)
209
+ * prepends its own large system prompt + conversation history before
210
+ * forwarding to the model. That overhead easily consumes 30K+ tokens on
211
+ * a model with a 16K context window, leaving very little room for
212
+ * chunk content.
213
+ * - When the HTTP path is used (an LLM profile is selected), only the akm
214
+ * system prompt and user prompt are sent, so the budget can be set to the
215
+ * model's actual context length via profiles.llm[defaults.llm].contextLength.
216
+ *
217
+ * Set profiles.llm[defaults.llm].contextLength in your config file to the
218
+ * model's actual context window to allow larger chunks on the HTTP path.
219
+ */
220
+ export const DEFAULT_CONTEXT_LENGTH_TOKENS = 4_096;
221
+ /**
222
+ * Given the model's context window and the per-memory body truncation limit,
223
+ * return the maximum number of memories that can safely fit in one chunk
224
+ * without the prompt overflowing the context window.
225
+ *
226
+ * The formula is:
227
+ * usableTokens = contextLength - PROMPT_OVERHEAD_TOKENS
228
+ * tokensPerMemory = ceil(bodyTruncation / CHARS_PER_TOKEN)
229
+ * chunkSize = floor(usableTokens / tokensPerMemory)
230
+ *
231
+ * Result is clamped between 1 and 50 to avoid degenerate values.
232
+ *
233
+ * @param contextLength - Model context window in tokens.
234
+ * @param bodyTruncation - Max chars per memory body included in the prompt.
235
+ */
236
+ export function computeSafeChunkSize(contextLength, bodyTruncation) {
237
+ const usableTokens = Math.max(contextLength - PROMPT_OVERHEAD_TOKENS, 0);
238
+ const tokensPerMemory = Math.max(Math.ceil(bodyTruncation / CHARS_PER_TOKEN), 1);
239
+ const raw = Math.floor(usableTokens / tokensPerMemory);
240
+ return Math.max(1, Math.min(50, raw));
241
+ }
242
+ // ── Similarity clustering (C-1 / #380) ──────────────────────────────────────
243
+ /**
244
+ * Re-order memories so that similar ones are placed adjacent to each other
245
+ * before the memories are sliced into chunks. This ensures high-similarity
246
+ * memories land in the same LLM context window, allowing the consolidate
247
+ * model to detect and merge duplicates that would otherwise be split across
248
+ * chunks and survive indefinitely.
249
+ *
250
+ * Algorithm: greedy nearest-neighbour chain starting from the first memory.
251
+ * Each step selects the unused memory with the highest cosine similarity to
252
+ * the last-placed memory. O(n²) — acceptable for the expected N < 200.
253
+ *
254
+ * mem0 arXiv:2504.19413 — every candidate compared against whole store.
255
+ * A-MEM arXiv:2502.12110 — atomic notes linked by similarity.
256
+ *
257
+ * Returns the original order unchanged when:
258
+ * - The embedding config is not present.
259
+ * - Embedding requests fail (fail-open).
260
+ * - There are fewer than 3 memories (no benefit to reordering).
261
+ */
262
+ async function clusterMemoriesBySimilarity(memories, config) {
263
+ if (memories.length < 3 || !config.embedding)
264
+ return memories;
265
+ const texts = memories.map((m) => {
266
+ const parts = [];
267
+ if (m.description)
268
+ parts.push(m.description);
269
+ if (m.tags.length > 0)
270
+ parts.push(m.tags.join(" "));
271
+ return parts.join(". ") || m.name;
272
+ });
273
+ let embeddings = null;
274
+ try {
275
+ embeddings = await embedBatch(texts, config.embedding);
276
+ }
277
+ catch {
278
+ // Fail open: embedding failures degrade gracefully to original order.
279
+ return memories;
280
+ }
281
+ if (!embeddings || embeddings.length !== memories.length)
282
+ return memories;
283
+ // Greedy nearest-neighbour chain.
284
+ const used = new Array(memories.length).fill(false);
285
+ const ordered = [];
286
+ let current = 0; // start from the first memory
287
+ ordered.push(memories[current]);
288
+ used[current] = true;
289
+ for (let step = 1; step < memories.length; step++) {
290
+ const currentEmb = embeddings[current];
291
+ let bestIdx = -1;
292
+ let bestSim = -Infinity;
293
+ for (let j = 0; j < memories.length; j++) {
294
+ if (used[j])
295
+ continue;
296
+ const sim = cosineSimilarity(currentEmb, embeddings[j]);
297
+ if (sim > bestSim) {
298
+ bestSim = sim;
299
+ bestIdx = j;
300
+ }
301
+ }
302
+ if (bestIdx === -1)
303
+ break;
304
+ ordered.push(memories[bestIdx]);
305
+ used[bestIdx] = true;
306
+ current = bestIdx;
307
+ }
308
+ return ordered;
309
+ }
310
+ // ── Chunk helpers ────────────────────────────────────────────────────────────
311
+ export function buildChunkPrompt(sourceName, memories, chunkIndex, totalChunks, bodyTruncation) {
312
+ const start = memories[0] ? `memory:${memories[0].name}` : "";
313
+ const end = memories[memories.length - 1] ? `memory:${memories[memories.length - 1].name}` : "";
314
+ const lines = [
315
+ `Source: ${sourceName}`,
316
+ `Chunk ${chunkIndex + 1} of ${totalChunks}, memories ${start}–${end}:`,
317
+ "",
318
+ ];
319
+ for (let i = 0; i < memories.length; i++) {
320
+ const m = memories[i];
321
+ lines.push(`[${i + 1}] memory:${m.name}`);
322
+ lines.push(`Description: ${m.description || "(none)"}`);
323
+ lines.push(`Tags: ${m.tags.length > 0 ? m.tags.join(", ") : "(none)"}`);
324
+ lines.push("---");
325
+ let body = "";
326
+ try {
327
+ body = fs.readFileSync(m.filePath, "utf8");
328
+ }
329
+ catch {
330
+ body = "(unreadable)";
331
+ }
332
+ lines.push(body.slice(0, bodyTruncation));
333
+ lines.push("");
334
+ }
335
+ return lines.join("\n");
336
+ }
337
+ function isValidOp(op) {
338
+ if (typeof op !== "object" || op === null)
339
+ return false;
340
+ const o = op;
341
+ if (o.op === "merge") {
342
+ return typeof o.primary === "string" && Array.isArray(o.secondaries);
343
+ }
344
+ if (o.op === "delete") {
345
+ return typeof o.ref === "string";
346
+ }
347
+ if (o.op === "promote") {
348
+ return typeof o.ref === "string" && typeof o.knowledgeRef === "string";
349
+ }
350
+ if (o.op === "contradict") {
351
+ return typeof o.ref === "string" && typeof o.contradictedByRef === "string";
352
+ }
353
+ return false;
354
+ }
355
+ export function mergePlans(chunks) {
356
+ const mergeOps = new Map();
357
+ const deleteOps = new Map();
358
+ const promoteOps = new Map();
359
+ // C-3 / #382: contradict ops keyed by `ref|contradictedByRef` to deduplicate.
360
+ const contradictOps = new Map();
361
+ const warnings = [];
362
+ for (const chunk of chunks) {
363
+ for (const op of chunk) {
364
+ if (op.op === "merge") {
365
+ // merge wins over delete
366
+ if (deleteOps.has(op.primary)) {
367
+ deleteOps.delete(op.primary);
368
+ }
369
+ for (const sec of op.secondaries) {
370
+ if (deleteOps.has(sec))
371
+ deleteOps.delete(sec);
372
+ }
373
+ mergeOps.set(op.primary, op);
374
+ }
375
+ else if (op.op === "delete") {
376
+ if (!mergeOps.has(op.ref)) {
377
+ deleteOps.set(op.ref, op);
378
+ }
379
+ }
380
+ else if (op.op === "promote") {
381
+ // C-2 / #381: when both a promote and a merge target the same ref,
382
+ // queue the promote FIRST rather than discarding it. The promote op
383
+ // routes through createProposal (the human-gated proposal queue), so
384
+ // it is non-destructive. The merge follows after the proposal is
385
+ // created. This preserves the human reviewer's ability to inspect the
386
+ // promotion before the source memory is merged/deleted.
387
+ // AGM K*8 — retain the maximally informative consistent subset.
388
+ promoteOps.set(op.ref, op);
389
+ }
390
+ else if (op.op === "contradict") {
391
+ // Deduplicate by ref+contradictedByRef pair.
392
+ const key = `${op.ref}|${op.contradictedByRef}`;
393
+ if (!contradictOps.has(key)) {
394
+ contradictOps.set(key, op);
395
+ }
396
+ }
397
+ }
398
+ }
399
+ // C-2 / #381: promote ops are ordered BEFORE merge ops so that the
400
+ // human-gated proposal queue entry is created before any destructive merge.
401
+ // Phase B processes ops in array order, so promote executes first.
402
+ const ops = [
403
+ ...promoteOps.values(),
404
+ ...mergeOps.values(),
405
+ ...deleteOps.values(),
406
+ ...contradictOps.values(),
407
+ ];
408
+ return { ops, warnings };
409
+ }
410
+ function getJournalPath(stashDir) {
411
+ return path.join(stashDir, ".akm", "consolidate-journal.json");
412
+ }
413
+ function getBackupDir(stashDir, timestamp) {
414
+ return path.join(stashDir, ".akm", "consolidate-backup", timestamp);
415
+ }
416
+ function removeStaleJournal(stashDir, journal, warnings) {
417
+ const journalPath = getJournalPath(stashDir);
418
+ try {
419
+ fs.unlinkSync(journalPath);
420
+ }
421
+ catch {
422
+ warnings.push(`Failed to remove stale consolidate journal at ${journalPath}.`);
423
+ }
424
+ const backupTimestamp = typeof journal.backupTimestamp === "string" && journal.backupTimestamp.trim().length > 0
425
+ ? journal.backupTimestamp.trim()
426
+ : typeof journal.startedAt === "string" && journal.startedAt.trim().length > 0
427
+ ? journal.startedAt.replace(/[:.]/g, "-")
428
+ : "";
429
+ if (!backupTimestamp)
430
+ return;
431
+ const backupDir = getBackupDir(stashDir, backupTimestamp);
432
+ if (!fs.existsSync(backupDir))
433
+ return;
434
+ try {
435
+ fs.rmSync(backupDir, { recursive: true, force: true });
436
+ }
437
+ catch {
438
+ warnings.push(`Failed to remove stale consolidate backup at ${backupDir}.`);
439
+ }
440
+ warnings.push(`Cleared stale consolidate backup at ${backupDir}.`);
441
+ }
442
+ function checkForIncompleteJournal(stashDir, recoveryMode, warnings) {
443
+ const journalPath = getJournalPath(stashDir);
444
+ if (!fs.existsSync(journalPath))
445
+ return;
446
+ let journal;
447
+ try {
448
+ journal = JSON.parse(fs.readFileSync(journalPath, "utf8"));
449
+ }
450
+ catch {
451
+ if (recoveryMode === "clean") {
452
+ try {
453
+ fs.unlinkSync(journalPath);
454
+ warnings.push(`Removed unreadable consolidate journal at ${journalPath}.`);
455
+ }
456
+ catch {
457
+ warnings.push(`Failed to remove unreadable consolidate journal at ${journalPath}.`);
458
+ }
459
+ return;
460
+ }
461
+ throw new ConfigError(`Incomplete consolidation state detected: unreadable journal at ${journalPath}. Re-run with --consolidate-recovery clean to remove stale journal artifacts, or remove the file manually.`, "INVALID_CONFIG_FILE");
462
+ }
463
+ const operationCount = Array.isArray(journal.operations) ? journal.operations.length : 0;
464
+ const completedCount = Array.isArray(journal.completed) ? journal.completed.length : 0;
465
+ if (completedCount >= operationCount)
466
+ return;
467
+ if (recoveryMode === "clean") {
468
+ removeStaleJournal(stashDir, journal, warnings);
469
+ warnings.push(`Removed stale consolidation journal at ${journalPath} (${completedCount}/${operationCount} operations completed).`);
470
+ return;
471
+ }
472
+ const backupHint = typeof journal.backupTimestamp === "string" && journal.backupTimestamp.trim().length > 0
473
+ ? ` Backup dir: ${getBackupDir(stashDir, journal.backupTimestamp.trim())}.`
474
+ : "";
475
+ throw new ConfigError(`Incomplete consolidation run detected at ${journalPath} (${completedCount}/${operationCount} operations completed). Re-run with --consolidate-recovery clean to remove stale journal artifacts.${backupHint}`, "INVALID_CONFIG_FILE");
476
+ }
477
+ function writeJournal(stashDir, ops, backupTimestamp) {
478
+ const journalPath = getJournalPath(stashDir);
479
+ fs.mkdirSync(path.dirname(journalPath), { recursive: true });
480
+ const journal = {
481
+ startedAt: new Date().toISOString(),
482
+ operations: ops,
483
+ completed: [],
484
+ backupTimestamp,
485
+ };
486
+ fs.writeFileSync(journalPath, JSON.stringify(journal, null, 2), "utf8");
487
+ }
488
+ function markJournalCompleted(stashDir, opRef) {
489
+ const journalPath = getJournalPath(stashDir);
490
+ if (!fs.existsSync(journalPath))
491
+ return;
492
+ try {
493
+ const journal = JSON.parse(fs.readFileSync(journalPath, "utf8"));
494
+ journal.completed.push(opRef);
495
+ fs.writeFileSync(journalPath, JSON.stringify(journal, null, 2), "utf8");
496
+ }
497
+ catch {
498
+ // best-effort
499
+ }
500
+ }
501
+ function cleanupJournal(stashDir, timestamp) {
502
+ const journalPath = getJournalPath(stashDir);
503
+ try {
504
+ fs.unlinkSync(journalPath);
505
+ }
506
+ catch {
507
+ // ignore
508
+ }
509
+ const backupDir = getBackupDir(stashDir, timestamp);
510
+ try {
511
+ fs.rmSync(backupDir, { recursive: true, force: true });
512
+ }
513
+ catch {
514
+ // ignore
515
+ }
516
+ }
517
+ function backupFile(filePath, backupDir, name) {
518
+ try {
519
+ fs.mkdirSync(backupDir, { recursive: true });
520
+ fs.copyFileSync(filePath, path.join(backupDir, `${name}.md`));
521
+ }
522
+ catch {
523
+ // best-effort
524
+ }
525
+ }
526
+ // ── Archive helper (P1-B: soft-invalidation) ─────────────────────────────────
527
+ /**
528
+ * Move a memory asset to `.akm/archive/` with `status: superseded` frontmatter
529
+ * instead of deleting it outright. The live stash delete still happens after
530
+ * this call — this is belt-and-suspenders archival that survives the hard delete.
531
+ *
532
+ * Archive filename: `<iso-ts>-<opIndex>-<basename>.md`
533
+ * New frontmatter fields: status, superseded_at, superseded_by (optional),
534
+ * superseded_reason.
535
+ */
536
+ function archiveMemory(filePath, stashDir, ref, reason, opIndex, supersededBy, warnings) {
537
+ const archiveDir = path.join(stashDir, ".akm", "archive");
538
+ fs.mkdirSync(archiveDir, { recursive: true });
539
+ let raw;
540
+ try {
541
+ raw = fs.readFileSync(filePath, "utf8");
542
+ }
543
+ catch {
544
+ if (warnings)
545
+ warnings.push(`archiveMemory: could not read ${ref} for archiving — skipping archive write`);
546
+ return;
547
+ }
548
+ let content = raw;
549
+ try {
550
+ const parsed = parseFrontmatter(raw);
551
+ const newFm = {
552
+ ...parsed.data,
553
+ status: "superseded",
554
+ superseded_at: new Date().toISOString(),
555
+ ...(supersededBy ? { superseded_by: supersededBy } : {}),
556
+ superseded_reason: reason,
557
+ };
558
+ content = assembleAssetFromString(yamlStringify(newFm).trimEnd(), parsed.content);
559
+ }
560
+ catch {
561
+ if (warnings)
562
+ warnings.push(`archiveMemory: could not parse frontmatter for ${ref} — archiving raw`);
563
+ }
564
+ const ts = timestampForFilename();
565
+ const safeName = path.basename(filePath, ".md");
566
+ const archivePath = path.join(archiveDir, `${ts}-${opIndex}-${safeName}.md`);
567
+ try {
568
+ fs.writeFileSync(archivePath, content, "utf8");
569
+ }
570
+ catch (e) {
571
+ if (warnings)
572
+ warnings.push(`archiveMemory: write failed for ${ref}: ${String(e)}`);
573
+ }
574
+ }
575
+ // ── Main entry point ─────────────────────────────────────────────────────────
576
+ export async function akmConsolidate(opts = {}) {
577
+ const startMs = Date.now();
578
+ const config = opts.config ?? loadConfig();
579
+ const stashDir = opts.stashDir ?? resolveStashDir();
580
+ if (!isLlmFeatureEnabled(config, "memory_consolidation")) {
581
+ return {
582
+ schemaVersion: 1,
583
+ ok: true,
584
+ shape: "consolidate-result",
585
+ dryRun: opts.dryRun ?? false,
586
+ previewOnly: false,
587
+ target: opts.target ?? stashDir,
588
+ processed: 0,
589
+ merged: 0,
590
+ deleted: 0,
591
+ promoted: [],
592
+ contradicted: 0,
593
+ warnings: [],
594
+ durationMs: Date.now() - startMs,
595
+ };
596
+ }
597
+ const warnings = [];
598
+ checkForIncompleteJournal(stashDir, opts.recoveryMode ?? "abort", warnings);
599
+ const memories = loadMemoriesForSource(opts.target, stashDir, warnings);
600
+ if (memories.length === 0) {
601
+ return {
602
+ schemaVersion: 1,
603
+ ok: true,
604
+ shape: "consolidate-result",
605
+ dryRun: opts.dryRun ?? false,
606
+ previewOnly: false,
607
+ target: opts.target ?? stashDir,
608
+ processed: 0,
609
+ merged: 0,
610
+ deleted: 0,
611
+ promoted: [],
612
+ contradicted: 0,
613
+ warnings,
614
+ durationMs: Date.now() - startMs,
615
+ };
616
+ }
617
+ // Consolidation always uses the HTTP LLM client directly — never the agent
618
+ // CLI. The agent CLI is for interactive agent sessions (reflect, propose);
619
+ // structured JSON generation works better and faster via HTTP.
620
+ const llmConfig = getDefaultLlmConfig(config);
621
+ const isHttpPath = !!llmConfig;
622
+ // Chunk sizing: derive a safe chunk size from the configured model context
623
+ // window so that the full prompt (system prompt + chunk user prompt) never
624
+ // exceeds the model's n_ctx limit. When no context length is configured we
625
+ // fall back to DEFAULT_CONTEXT_LENGTH_TOKENS (8 000) which is conservative
626
+ // enough for most 8K–16K local models.
627
+ //
628
+ // bodyTruncation caps the body excerpt included per memory in the prompt.
629
+ // Reducing it further than 500 chars degrades consolidation quality, so we
630
+ // keep it fixed and let computeSafeChunkSize vary the number of memories
631
+ // per chunk instead.
632
+ const bodyTruncation = 500;
633
+ const modelContextLength = llmConfig?.contextLength ?? DEFAULT_CONTEXT_LENGTH_TOKENS;
634
+ const chunkSize = computeSafeChunkSize(modelContextLength, bodyTruncation);
635
+ // -- Phase A: plan generation -----------------------------------------------
636
+ const sourceName = opts.target ?? stashDir;
637
+ // C-1 / #380: Pre-cluster memories by embedding similarity before chunking.
638
+ // This ensures that semantically similar memories land in the same LLM
639
+ // context window, allowing the model to detect and merge duplicates that
640
+ // would otherwise be split across chunks and survive indefinitely.
641
+ // mem0 arXiv:2504.19413, A-MEM arXiv:2502.12110.
642
+ // Fails open: if embeddings are unavailable or fail, original order is used.
643
+ const clusteredMemories = await clusterMemoriesBySimilarity(memories, config);
644
+ const chunks = [];
645
+ for (let i = 0; i < clusteredMemories.length; i += chunkSize) {
646
+ chunks.push(clusteredMemories.slice(i, i + chunkSize));
647
+ }
648
+ warn(`[consolidate] ${memories.length} memories / ${chunks.length} chunk(s) / chunk_size=${chunkSize}`);
649
+ const chunkOpsArrays = [];
650
+ // C-6 / #392: Replace two-consecutive-failures abort with failure-rate threshold.
651
+ // Consecutive-count policies are brittle against transient LM Studio reloads:
652
+ // two transient failures abort the run even though the next chunk would succeed.
653
+ // Rate-based abort (≥50% failure over ≥4 chunks) is more robust.
654
+ // Tanenbaum, Distributed Systems §8 — rate-based policies with minimum sample sizes.
655
+ let totalChunksProcessed = 0;
656
+ let totalChunksFailed = 0;
657
+ const ABORT_MIN_CHUNKS = 4;
658
+ const ABORT_FAILURE_RATE = 0.5;
659
+ for (let chunkIdx = 0; chunkIdx < chunks.length; chunkIdx++) {
660
+ // Abort if failure rate >= 50% over at least 4 processed chunks.
661
+ if (totalChunksProcessed >= ABORT_MIN_CHUNKS) {
662
+ const failureRate = totalChunksFailed / totalChunksProcessed;
663
+ if (failureRate >= ABORT_FAILURE_RATE) {
664
+ const skipped = chunks.length - chunkIdx;
665
+ warnings.push(`Consolidation aborted — failure rate ${(failureRate * 100).toFixed(0)}% over ${totalChunksProcessed} chunks (>= ${ABORT_FAILURE_RATE * 100}% threshold). LLM may be unavailable. ${skipped} chunk(s) skipped.`);
666
+ break;
667
+ }
668
+ }
669
+ const chunk = chunks[chunkIdx];
670
+ warn(`[consolidate] chunk ${chunkIdx + 1}/${chunks.length} (${chunk.length} memories) …`);
671
+ const userPrompt = buildChunkPrompt(sourceName, chunk, chunkIdx, chunks.length, bodyTruncation);
672
+ const raw = await tryLlmFeature("memory_consolidation", config, async () => {
673
+ if (!llmConfig)
674
+ return { ok: false, error: "No LLM configured for consolidation" };
675
+ try {
676
+ // responseSchema lift (PR 1, asset-writers-investigation §5): pass
677
+ // the consolidate plan schema so providers with
678
+ // `supportsJsonSchema: true` enforce shape upstream. Providers that
679
+ // ignore the option fall through to the existing
680
+ // `parseEmbeddedJsonResponse` path on the response side.
681
+ const content = await chatCompletion(llmConfig, [
682
+ { role: "system", content: CONSOLIDATE_SYSTEM_PROMPT },
683
+ { role: "user", content: userPrompt },
684
+ ], { responseSchema: CONSOLIDATE_PLAN_JSON_SCHEMA });
685
+ return { ok: true, content };
686
+ }
687
+ catch (e) {
688
+ return { ok: false, error: String(e) };
689
+ }
690
+ }, { ok: false, error: `chunk ${chunkIdx + 1} failed` });
691
+ if (!raw.ok) {
692
+ warnings.push(raw.error ?? `chunk ${chunkIdx + 1} failed`);
693
+ totalChunksProcessed++;
694
+ totalChunksFailed++;
695
+ continue;
696
+ }
697
+ if (process.env.AKM_DEBUG_LLM) {
698
+ const preview = (raw.content ?? "").slice(0, 500);
699
+ warn(`[akm:consolidate] chunk ${chunkIdx + 1} raw response (first 500 chars): ${preview}`);
700
+ }
701
+ const parsed = parseEmbeddedJsonResponse(raw.content);
702
+ if (!parsed || !Array.isArray(parsed.operations)) {
703
+ const hint = raw.content !== undefined && raw.content.trim() === ""
704
+ ? " (empty response — if using a thinking model, disable thinking mode)"
705
+ : "";
706
+ warnings.push(`Chunk ${chunkIdx + 1}: invalid plan from AI — skipping.${hint}`);
707
+ totalChunksProcessed++;
708
+ totalChunksFailed++;
709
+ continue;
710
+ }
711
+ totalChunksProcessed++; // success
712
+ const ops = [];
713
+ for (const op of parsed.operations) {
714
+ if (isValidOp(op)) {
715
+ ops.push(op);
716
+ }
717
+ else {
718
+ warnings.push(`Chunk ${chunkIdx + 1}: skipping invalid operation: ${JSON.stringify(op)}`);
719
+ }
720
+ }
721
+ if (Array.isArray(parsed.warnings)) {
722
+ for (const w of parsed.warnings) {
723
+ if (typeof w === "string")
724
+ warnings.push(w);
725
+ }
726
+ }
727
+ chunkOpsArrays.push(ops);
728
+ }
729
+ const { ops: allOps, warnings: mergeWarnings } = mergePlans(chunkOpsArrays);
730
+ warnings.push(...mergeWarnings);
731
+ // -- Dry-run: show AI plan without executing any writes --------------------
732
+ if (opts.dryRun) {
733
+ return {
734
+ schemaVersion: 1,
735
+ ok: true,
736
+ shape: "consolidate-result",
737
+ dryRun: true,
738
+ previewOnly: true,
739
+ target: sourceName,
740
+ processed: memories.length,
741
+ merged: 0,
742
+ deleted: 0,
743
+ promoted: [],
744
+ contradicted: 0,
745
+ planned: allOps,
746
+ warnings,
747
+ durationMs: Date.now() - startMs,
748
+ };
749
+ }
750
+ warn(`[consolidate] plan: ${allOps.length} operation(s)`);
751
+ // -- HTTP path: warn about quality and confirm unless auto-accepted --------
752
+ if (isHttpPath) {
753
+ warnings.push("Running on HTTP path — plan generated from truncated memory excerpts; quality may vary.");
754
+ // TODO(confidence-scoring): once proposals expose a per-operation
755
+ // confidence score, compare it against `opts.autoAccept` instead of
756
+ // treating any defined threshold as a whole-batch accept. Until then,
757
+ // any non-undefined threshold behaves like the legacy `"safe"` mode.
758
+ if (opts.autoAccept === undefined && allOps.length > 0) {
759
+ const n = allOps.length;
760
+ // Non-interactive contexts (CI / test runners / piped stdin) must not
761
+ // block on an unanswerable prompt. Default to a non-destructive "no"
762
+ // so callers in those contexts get the same "aborted, preview only"
763
+ // shape they'd get from explicit user dismissal. AKM_NON_INTERACTIVE
764
+ // lets callers force this path even when stdin happens to be a TTY.
765
+ const nonInteractive = process.stdin.isTTY === false || process.env.AKM_NON_INTERACTIVE === "1";
766
+ const answer = nonInteractive ? false : await promptConfirm(`Apply ${n} operations? [y/N] `);
767
+ if (!answer) {
768
+ return {
769
+ schemaVersion: 1,
770
+ ok: true,
771
+ shape: "consolidate-result",
772
+ dryRun: false,
773
+ previewOnly: true,
774
+ target: sourceName,
775
+ processed: memories.length,
776
+ merged: 0,
777
+ deleted: 0,
778
+ promoted: [],
779
+ contradicted: 0,
780
+ planned: allOps,
781
+ warnings: [...warnings, nonInteractive ? "Non-interactive context: skipped apply." : "Aborted by user."],
782
+ durationMs: Date.now() - startMs,
783
+ };
784
+ }
785
+ }
786
+ }
787
+ // -- Phase B + writes -------------------------------------------------------
788
+ const target = resolveWriteTarget(config);
789
+ const timestamp = timestampForFilename();
790
+ const backupDir = getBackupDir(stashDir, timestamp);
791
+ // Write journal before any mutations
792
+ writeJournal(stashDir, allOps, timestamp);
793
+ let merged = 0;
794
+ let deleted = 0;
795
+ const promoted = [];
796
+ let contradicted = 0; // C-3 / #382: count of contradiction edges written
797
+ // Within-run dedup: track source refs for which a promote proposal was
798
+ // already created this run. The LLM can return multiple promote ops for
799
+ // different source memories that happen to have identical content (all are
800
+ // duplicate memories), so we also need a content-hash guard below.
801
+ const promotedSourceRefs = new Set();
802
+ // Build a lookup map: ref → MemoryEntry
803
+ const memoryByRef = new Map();
804
+ for (const m of memories) {
805
+ memoryByRef.set(`memory:${m.name}`, m);
806
+ }
807
+ for (let opIndex = 0; opIndex < allOps.length; opIndex++) {
808
+ const op = allOps[opIndex];
809
+ const opDisplayRef = op.op === "merge" ? op.primary : op.op === "contradict" ? `${op.ref} ↔ ${op.contradictedByRef}` : op.ref;
810
+ warn(`[consolidate] ${opIndex + 1}/${allOps.length} ${op.op} ${opDisplayRef}`);
811
+ if (op.op === "merge") {
812
+ const primaryEntry = memoryByRef.get(op.primary);
813
+ if (!primaryEntry) {
814
+ warnings.push(`Merge: primary ${op.primary} not found in loaded memories — skipping.`);
815
+ continue;
816
+ }
817
+ // Phase B: generate merged content
818
+ const secondaryBodies = [];
819
+ for (const secRef of op.secondaries) {
820
+ const secEntry = memoryByRef.get(secRef);
821
+ if (!secEntry) {
822
+ warnings.push(`Merge: secondary ${secRef} not found — skipping merge op.`);
823
+ continue;
824
+ }
825
+ secondaryBodies.push(secRef);
826
+ }
827
+ if (secondaryBodies.length === 0)
828
+ continue;
829
+ let primaryBody = "";
830
+ try {
831
+ primaryBody = fs.readFileSync(primaryEntry.filePath, "utf8");
832
+ }
833
+ catch {
834
+ warnings.push(`Merge: could not read primary ${op.primary} — skipping.`);
835
+ continue;
836
+ }
837
+ const mergedContent = await generateMergedContent(config, op.primary, primaryBody, op.secondaries, memoryByRef, warnings);
838
+ if (mergedContent === null)
839
+ continue;
840
+ // Validate frontmatter of merged content — must have a `---` block
841
+ // with at minimum a `description` field. We parse via the hand-rolled
842
+ // parser (cheap) AND require non-empty description. This guards against
843
+ // the historical defect where merged memories were written back with
844
+ // empty `description` and later polluted the promote path.
845
+ let parsedMerged;
846
+ try {
847
+ parsedMerged = parseFrontmatter(mergedContent);
848
+ }
849
+ catch {
850
+ warnings.push(`Merge: merged content for ${op.primary} has invalid frontmatter — skipping.`);
851
+ continue;
852
+ }
853
+ if (parsedMerged.frontmatter === null) {
854
+ warnings.push(`Merge: merged content for ${op.primary} has no frontmatter block — skipping.`);
855
+ continue;
856
+ }
857
+ const mergedDesc = parsedMerged.data.description;
858
+ if (typeof mergedDesc !== "string" || mergedDesc.trim().length === 0) {
859
+ warnings.push(`Merge: merged content for ${op.primary} missing description — skipping.`);
860
+ continue;
861
+ }
862
+ const truncReason = detectTruncatedDescription(mergedDesc);
863
+ if (truncReason) {
864
+ warnings.push(`Merge: merged content for ${op.primary} has truncated description (${truncReason}) — skipping.`);
865
+ continue;
866
+ }
867
+ // captureMode:hot guard — refuse the merge if ANY participating memory
868
+ // (primary or secondary) was user-captured or has unparseable frontmatter
869
+ // (could have hidden a hot flag). Hot memories are user-explicit and
870
+ // must not be deleted/overwritten by the consolidate LLM. 14 user
871
+ // memories were silent-deleted by consolidate before this guard landed;
872
+ // recovery required copying from .akm/archive/ by hand.
873
+ const mergeParticipants = [op.primary, ...op.secondaries];
874
+ const blockedParticipants = mergeParticipants.flatMap((ref) => {
875
+ const e = memoryByRef.get(ref);
876
+ if (!e)
877
+ return [];
878
+ const verdict = consolidateGuardStatus(e.filePath);
879
+ if (verdict === "hot" || verdict === "unparseable")
880
+ return [{ ref, verdict }];
881
+ return [];
882
+ });
883
+ if (blockedParticipants.length > 0) {
884
+ const detail = blockedParticipants.map((p) => `${p.ref} (${p.verdict})`).join(", ");
885
+ warnings.push(`Merge: refused for ${op.primary} — ${blockedParticipants.length} participant(s) blocked by hot/unparseable frontmatter guard: ${detail}`);
886
+ continue;
887
+ }
888
+ // Backup secondaries before deleting
889
+ for (const secRef of op.secondaries) {
890
+ const secEntry = memoryByRef.get(secRef);
891
+ if (secEntry && fs.existsSync(secEntry.filePath)) {
892
+ backupFile(secEntry.filePath, backupDir, secEntry.name);
893
+ }
894
+ }
895
+ // Write merged primary
896
+ try {
897
+ const parsedPrimary = parseAssetRef(op.primary);
898
+ await writeAssetToSource(target.source, target.config, parsedPrimary, mergedContent);
899
+ }
900
+ catch (e) {
901
+ warnings.push(`Merge: write failed for ${op.primary}: ${String(e)}`);
902
+ continue;
903
+ }
904
+ // Archive and delete secondaries (P1-B: soft-invalidation)
905
+ for (const secRef of op.secondaries) {
906
+ const secEntry = memoryByRef.get(secRef);
907
+ if (!secEntry)
908
+ continue;
909
+ if (fs.existsSync(secEntry.filePath)) {
910
+ archiveMemory(secEntry.filePath, stashDir, secRef, "merged into primary", opIndex, op.primary, warnings);
911
+ }
912
+ try {
913
+ const parsedSec = parseAssetRef(secRef);
914
+ await deleteAssetFromSource(target.source, target.config, parsedSec);
915
+ markJournalCompleted(stashDir, secRef);
916
+ }
917
+ catch (e) {
918
+ warnings.push(`Merge: delete failed for ${secRef}: ${String(e)}`);
919
+ }
920
+ }
921
+ markJournalCompleted(stashDir, op.primary);
922
+ merged++;
923
+ }
924
+ else if (op.op === "delete") {
925
+ const entry = memoryByRef.get(op.ref);
926
+ if (!entry) {
927
+ warnings.push(`Delete: ${op.ref} not found in loaded memories — skipping.`);
928
+ continue;
929
+ }
930
+ // captureMode:hot guard — refuse to delete user-captured memories OR
931
+ // memories whose frontmatter is unparseable (could have hidden the hot
932
+ // flag). The consolidate LLM was deleting hot-captured user memos as
933
+ // "redundant" — 14 such deletes were silently archived between
934
+ // 2026-05-19 and 2026-05-20 before this guard. Hot memories are
935
+ // user-explicit and may only be deleted by the user.
936
+ const guard = consolidateGuardStatus(entry.filePath);
937
+ if (guard === "hot" || guard === "unparseable") {
938
+ warnings.push(`Delete: refused for ${op.ref} — ${guard === "hot" ? "captureMode:hot (user-explicit; never auto-delete)" : "frontmatter unparseable (cannot verify hot flag absent)"}. Reason from LLM: "${op.reason ?? "n/a"}"`);
939
+ continue;
940
+ }
941
+ if (fs.existsSync(entry.filePath)) {
942
+ backupFile(entry.filePath, backupDir, entry.name);
943
+ // P1-B: soft-invalidation archive before hard delete
944
+ archiveMemory(entry.filePath, stashDir, op.ref, op.reason, opIndex, undefined, warnings);
945
+ }
946
+ try {
947
+ const parsedRef = parseAssetRef(op.ref);
948
+ await deleteAssetFromSource(target.source, target.config, parsedRef);
949
+ markJournalCompleted(stashDir, op.ref);
950
+ deleted++;
951
+ }
952
+ catch (e) {
953
+ warnings.push(`Delete: failed for ${op.ref}: ${String(e)}`);
954
+ }
955
+ }
956
+ else if (op.op === "promote") {
957
+ const entry = memoryByRef.get(op.ref);
958
+ if (!entry) {
959
+ warnings.push(`Promote: ${op.ref} not found in loaded memories — skipping.`);
960
+ continue;
961
+ }
962
+ // Within-run source-ref dedup: skip if this source memory was already
963
+ // promoted earlier in this run (safety belt — mergePlans already
964
+ // deduplicates promote ops by source ref via Map, but this guard also
965
+ // catches any future code paths that bypass mergePlans).
966
+ if (promotedSourceRefs.has(op.ref)) {
967
+ warnings.push(`Skipping promote: ${op.ref} already promoted in this run`);
968
+ continue;
969
+ }
970
+ let knowledgeRef = op.knowledgeRef;
971
+ try {
972
+ parseAssetRef(knowledgeRef);
973
+ }
974
+ catch {
975
+ const slug = op.knowledgeRef
976
+ .replace(/^knowledge:/, "")
977
+ .replace(/[^a-z0-9-]/gi, "-")
978
+ .toLowerCase();
979
+ knowledgeRef = `knowledge:${slug}`;
980
+ warnings.push(`Normalized invalid ref "${op.knowledgeRef}" → "${knowledgeRef}"`);
981
+ }
982
+ // Idempotency: check pending proposals by target ref
983
+ const existingProposals = listProposals(stashDir, { ref: knowledgeRef });
984
+ if (existingProposals.some((p) => p.status === "pending")) {
985
+ warnings.push(`Skipping promote: pending proposal already exists for ${knowledgeRef}`);
986
+ continue;
987
+ }
988
+ // Idempotency: check if knowledge asset already exists
989
+ const parsedKnowledgeRef = parseAssetRef(knowledgeRef);
990
+ const destPath = path.join(target.source.path, "knowledge", `${parsedKnowledgeRef.name}.md`);
991
+ if (fs.existsSync(destPath)) {
992
+ warnings.push(`Skipping promote: ${knowledgeRef} already exists in source`);
993
+ continue;
994
+ }
995
+ let memoryContent = "";
996
+ try {
997
+ memoryContent = fs.readFileSync(entry.filePath, "utf8");
998
+ }
999
+ catch (e) {
1000
+ warnings.push(`Promote: could not read ${op.ref}: ${String(e)}`);
1001
+ continue;
1002
+ }
1003
+ // Defensive sanitization: legacy memory files written by older
1004
+ // consolidate runs may still carry outer code fences or broken YAML.
1005
+ // Strip them here so we never propose a polluted asset.
1006
+ const promoteSanitized = sanitizeMergedContent(memoryContent);
1007
+ if (!promoteSanitized.ok) {
1008
+ warnings.push(`Promote: rejected ${op.ref} — source memory failed sanitization (${promoteSanitized.reason}).`);
1009
+ continue;
1010
+ }
1011
+ memoryContent = promoteSanitized.result.content;
1012
+ // SOURCE_SUPERSEDED guard: refuse to promote a memory whose source
1013
+ // frontmatter carries `status: superseded`. Predicate at module top
1014
+ // (`hasSupersededStatus`) so tests can exercise it directly.
1015
+ if (hasSupersededStatus(promoteSanitized.result.frontmatter)) {
1016
+ warnings.push(`Promote: refused for ${op.ref} → ${knowledgeRef} — source memory has status:superseded; superseded memories are not promotable knowledge.`);
1017
+ continue;
1018
+ }
1019
+ // Cross-run + within-run content dedup: if an identical payload already
1020
+ // exists in ANY pending consolidate proposal (regardless of target ref),
1021
+ // skip. This prevents duplicate proposals when:
1022
+ // (a) Multiple source memories have identical content (duplicate memories
1023
+ // that were not merged) and each gets a different knowledgeRef from
1024
+ // the LLM in the same run.
1025
+ // (b) A prior run created a proposal for the same content under a
1026
+ // different knowledgeRef slug.
1027
+ // We use SHA-256 of the raw file content — same algorithm as createProposal's
1028
+ // internal contentHash so the comparison is consistent.
1029
+ const newContentHash = createHash("sha256").update(memoryContent, "utf8").digest("hex");
1030
+ const allPendingConsolidateProposals = listProposals(stashDir, { status: "pending" }).filter((p) => p.source === "consolidate");
1031
+ const contentDupProposal = allPendingConsolidateProposals.find((p) => createHash("sha256").update(p.payload.content, "utf8").digest("hex") === newContentHash);
1032
+ if (contentDupProposal) {
1033
+ warnings.push(`Skipping promote: identical content already pending as proposal ${contentDupProposal.id} (ref: ${contentDupProposal.ref}); skipping duplicate for ${op.ref} → ${knowledgeRef}`);
1034
+ continue;
1035
+ }
1036
+ try {
1037
+ // Use LLM-provided description; fall back to memory's own description
1038
+ // (post-sanitization frontmatter is authoritative).
1039
+ const parsedMemory = parseFrontmatter(memoryContent);
1040
+ const description = (typeof op.description === "string" && op.description.trim()
1041
+ ? op.description.trim()
1042
+ : parsedMemory.data?.description?.trim()) ?? "";
1043
+ // Validate the resolved frontmatter before emitting a proposal.
1044
+ // Required field: non-empty description. Reject obvious truncation
1045
+ // markers (description ends with `,`/`;`/`:`/`...`/hanging connector)
1046
+ // so the queue never sees half-formed metadata that the reviewer
1047
+ // would only reject.
1048
+ const fmCheck = validateProposalFrontmatter({ description });
1049
+ if (!fmCheck.ok) {
1050
+ warnings.push(`Promote: rejected ${op.ref} → ${knowledgeRef} — ${fmCheck.reason}.`);
1051
+ continue;
1052
+ }
1053
+ // (Body-frontmatter check REMOVED 2026-05-20: zero observed fires
1054
+ // across 17 sampled runs, and structurally redundant with
1055
+ // sanitizeMergedContent which already round-trips the body
1056
+ // frontmatter through the yaml library. The body and envelope
1057
+ // frontmatter come from the same `parsedMemory.data` object in this
1058
+ // scope, so the outer `validateProposalFrontmatter({ description })`
1059
+ // call above is sufficient.)
1060
+ // Pre-emit dedup against pending consolidate proposals from the
1061
+ // same improve run (slug-variant match). The cross-run content-hash
1062
+ // dedup inside `mergePlans` handles duplicates against existing
1063
+ // stash assets — see commit history for the deletion of the
1064
+ // unbounded embedding + cross-type slug branches.
1065
+ const dedup = await checkPreEmitDedup({
1066
+ candidateRef: knowledgeRef,
1067
+ candidateText: `${description}. ${memoryContent}`,
1068
+ stashDir,
1069
+ config,
1070
+ });
1071
+ if (dedup.duplicate) {
1072
+ warnings.push(`Promote: skipped ${op.ref} → ${knowledgeRef} — ${dedup.reason}.`);
1073
+ continue;
1074
+ }
1075
+ const proposalResult = createProposal(stashDir, {
1076
+ ref: knowledgeRef,
1077
+ source: "consolidate",
1078
+ payload: {
1079
+ content: memoryContent,
1080
+ frontmatter: { description },
1081
+ },
1082
+ });
1083
+ if (isProposalSkipped(proposalResult)) {
1084
+ warnings.push(`Promote: skipped proposal for ${op.ref} (${proposalResult.reason}): ${proposalResult.message}`);
1085
+ }
1086
+ else {
1087
+ promoted.push(proposalResult.id);
1088
+ promotedSourceRefs.add(op.ref);
1089
+ markJournalCompleted(stashDir, op.ref);
1090
+ }
1091
+ }
1092
+ catch (e) {
1093
+ warnings.push(`Promote: createProposal failed for ${op.ref}: ${String(e)}`);
1094
+ }
1095
+ }
1096
+ else if (op.op === "contradict") {
1097
+ // C-3 / #382: Write contradictedBy edges so resolveFamilyContradictions
1098
+ // (the SCC resolver in memory-improve.ts) has edges to work on.
1099
+ // Zep arXiv:2501.13956 §3 — unified belief-revision with contradiction edges.
1100
+ const entry = memoryByRef.get(op.ref);
1101
+ const contradictorEntry = memoryByRef.get(op.contradictedByRef);
1102
+ if (!entry) {
1103
+ warnings.push(`Contradict: ${op.ref} not found in loaded memories — skipping.`);
1104
+ continue;
1105
+ }
1106
+ if (!contradictorEntry) {
1107
+ warnings.push(`Contradict: ${op.contradictedByRef} not found — skipping.`);
1108
+ continue;
1109
+ }
1110
+ try {
1111
+ // Write the contradiction edge: op.ref is contradicted by op.contradictedByRef
1112
+ writeContradictEdge(entry.filePath, op.contradictedByRef);
1113
+ contradicted++;
1114
+ markJournalCompleted(stashDir, op.ref);
1115
+ }
1116
+ catch (e) {
1117
+ warnings.push(`Contradict: failed to write edge for ${op.ref}: ${String(e)}`);
1118
+ }
1119
+ }
1120
+ }
1121
+ cleanupJournal(stashDir, timestamp);
1122
+ // TTL cleanup: remove archive entries older than archiveRetentionDays (default 90).
1123
+ // C-5 / #391: emit an `archive_cleanup` event before each deletion so the
1124
+ // audit trail records what was lost. Outbox pattern (EIP, Hohpe-Woolf) —
1125
+ // any event that is recorded must be queryable; silent deletes are an anti-pattern.
1126
+ const archiveDir = path.join(stashDir, ".akm", "archive");
1127
+ if (fs.existsSync(archiveDir)) {
1128
+ const retentionMs = (config.archiveRetentionDays ?? 90) * 86_400_000;
1129
+ const cutoff = Date.now() - retentionMs;
1130
+ for (const fname of fs.readdirSync(archiveDir)) {
1131
+ const fp = path.join(archiveDir, fname);
1132
+ try {
1133
+ const stat = fs.statSync(fp);
1134
+ if (stat.mtimeMs < cutoff) {
1135
+ // Emit event before deletion so the record survives the purge.
1136
+ appendEvent({
1137
+ eventType: "archive_cleanup",
1138
+ metadata: {
1139
+ file: fname,
1140
+ filePath: fp,
1141
+ ageMs: Date.now() - stat.mtimeMs,
1142
+ retentionMs,
1143
+ },
1144
+ });
1145
+ fs.unlinkSync(fp);
1146
+ }
1147
+ }
1148
+ catch {
1149
+ /* ignore race conditions */
1150
+ }
1151
+ }
1152
+ }
1153
+ return {
1154
+ schemaVersion: 1,
1155
+ ok: true,
1156
+ shape: "consolidate-result",
1157
+ dryRun: false,
1158
+ previewOnly: false,
1159
+ target: sourceName,
1160
+ processed: memories.length,
1161
+ merged,
1162
+ deleted,
1163
+ promoted,
1164
+ contradicted,
1165
+ warnings,
1166
+ durationMs: Date.now() - startMs,
1167
+ };
1168
+ }
1169
+ // ── Helpers ─────────────────────────────────────────────────────────────────
1170
+ // ── LLM-output sanitization ─────────────────────────────────────────────────
1171
+ //
1172
+ // Three classes of LLM defect have been observed across hundreds of
1173
+ // consolidate proposals (see audit notes in this branch):
1174
+ //
1175
+ // 1. Code-fence leakage: the entire merged asset is wrapped in
1176
+ // ```markdown … ``` (or ```yaml … ```) despite the prompt forbidding
1177
+ // fences. The post-processor used to pass this through verbatim, so the
1178
+ // first character of the asset content became a backtick rather than
1179
+ // `---`, defeating the frontmatter parser.
1180
+ // 2. YAML quote-escaping bugs: descriptions like `'"Specialty intro...:`
1181
+ // with unbalanced quotes that break the YAML reader. The post-processor
1182
+ // historically passed the LLM's raw scalar straight into a manually
1183
+ // assembled `description: <raw>` line.
1184
+ // 3. Truncated descriptions hitting token cutoffs — the model's max_tokens
1185
+ // runs out mid-sentence, leaving things like
1186
+ // `description: "Tables in narrow column containers need max-width:100% +"`
1187
+ // with no closing context.
1188
+ //
1189
+ // `sanitizeMergedContent` and `validateProposalFrontmatter` defend against
1190
+ // all three at the point where LLM output is consumed.
1191
+ /**
1192
+ * Outer-fence stripper specific to consolidate. Unlike the shared
1193
+ * `stripMarkdownFences` helper (which only handles markdown fences), this
1194
+ * variant additionally recognises `yaml` and bare-language fences and refuses
1195
+ * to strip an unbalanced fence — i.e. a leading ``` with no trailing ``` is
1196
+ * treated as a malformed response, not partially sanitized.
1197
+ *
1198
+ * Returns `null` when only one half of a fence pair is present (caller
1199
+ * should reject the response entirely).
1200
+ */
1201
+ export function stripOuterCodeFence(raw) {
1202
+ const trimmed = raw.trim();
1203
+ const leading = trimmed.match(/^```(?:markdown|md|yaml|yml)?\s*\r?\n/i);
1204
+ const trailing = trimmed.match(/\r?\n```\s*$/);
1205
+ if (!leading && !trailing)
1206
+ return { content: trimmed, stripped: false };
1207
+ if (!leading || !trailing)
1208
+ return null; // unbalanced — refuse
1209
+ const inner = trimmed.slice(leading[0].length, trimmed.length - trailing[0].length).trim();
1210
+ return { content: inner, stripped: true };
1211
+ }
1212
+ export function sanitizeMergedContent(raw) {
1213
+ const fenceResult = stripOuterCodeFence(raw);
1214
+ if (!fenceResult) {
1215
+ return { ok: false, reason: "UNBALANCED_CODE_FENCE" };
1216
+ }
1217
+ let body = fenceResult.content;
1218
+ // Strip <think> blocks (some local models still emit them despite system prompts).
1219
+ body = body.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
1220
+ if (!body.startsWith("---")) {
1221
+ return { ok: false, reason: "MISSING_FRONTMATTER_SENTINEL" };
1222
+ }
1223
+ // Extract frontmatter block.
1224
+ const match = body.match(/^---\r?\n([\s\S]*?)\r?\n---(?:\r\n|\r|\n|$)([\s\S]*)$/);
1225
+ if (!match) {
1226
+ return { ok: false, reason: "MALFORMED_FRONTMATTER_BLOCK" };
1227
+ }
1228
+ // Re-parse via the yaml library so any quote-escaping mistakes either get
1229
+ // normalised or surface as a parse error we can reject.
1230
+ let parsedFm;
1231
+ try {
1232
+ parsedFm = yamlParse(match[1]);
1233
+ }
1234
+ catch (e) {
1235
+ return { ok: false, reason: `INVALID_YAML: ${e instanceof Error ? e.message : String(e)}` };
1236
+ }
1237
+ if (parsedFm === null || typeof parsedFm !== "object" || Array.isArray(parsedFm)) {
1238
+ return { ok: false, reason: "FRONTMATTER_NOT_OBJECT" };
1239
+ }
1240
+ const fm = parsedFm;
1241
+ // Normalise placeholder leaks like `updated: today`, `updated: {today: null}`,
1242
+ // `updated: now`, etc. The consolidate prompt instructs the LLM not to emit
1243
+ // these, but small models still do. Replace any such leak with today's ISO
1244
+ // date OR drop the field if we can't safely normalise it.
1245
+ normalizeUpdatedField(fm);
1246
+ // Re-serialise via yaml.stringify to fix any quoting quirks.
1247
+ let serialized;
1248
+ try {
1249
+ serialized = yamlStringify(fm).trimEnd();
1250
+ }
1251
+ catch (e) {
1252
+ return { ok: false, reason: `YAML_STRINGIFY_FAILED: ${e instanceof Error ? e.message : String(e)}` };
1253
+ }
1254
+ const cleaned = assembleAssetFromString(serialized, match[2]);
1255
+ return { ok: true, result: { content: cleaned, frontmatter: fm } };
1256
+ }
1257
+ /**
1258
+ * Mutate `fm.updated` in place to normalise placeholder leaks emitted by the
1259
+ * LLM. The consolidate prompt forbids these, but small models still produce
1260
+ * literal `today` / `{today: null}` / `now` values.
1261
+ *
1262
+ * Rules:
1263
+ * - A real ISO-style date string (YYYY-MM-DD, optionally with time) stays as-is.
1264
+ * - A Date object (some YAML parsers materialise dates) is converted to its
1265
+ * ISO yyyy-mm-dd form.
1266
+ * - A placeholder string ("today", "now", "{today}", "${today}", template
1267
+ * variables) is replaced with today's ISO date.
1268
+ * - A map/object (e.g. `{today: null}`) is replaced with today's ISO date.
1269
+ * - `null`, empty string, missing → left alone (no field added; reviewers
1270
+ * should not silently gain metadata they didn't write).
1271
+ *
1272
+ * Exported for unit testing.
1273
+ */
1274
+ export function normalizeUpdatedField(fm) {
1275
+ if (!("updated" in fm))
1276
+ return;
1277
+ const v = fm.updated;
1278
+ if (v === null || v === undefined || v === "")
1279
+ return;
1280
+ const todayIso = new Date().toISOString().slice(0, 10);
1281
+ if (v instanceof Date) {
1282
+ fm.updated = v.toISOString().slice(0, 10);
1283
+ return;
1284
+ }
1285
+ if (typeof v === "string") {
1286
+ const trimmed = v.trim().toLowerCase();
1287
+ if (/^\d{4}-\d{2}-\d{2}/.test(v.trim()))
1288
+ return; // already a real date
1289
+ if (trimmed === "today" ||
1290
+ trimmed === "now" ||
1291
+ trimmed === "{today}" ||
1292
+ // biome-ignore lint/suspicious/noTemplateCurlyInString: matches the literal user-typed placeholder text "${today}" so we can normalize it to today's ISO date
1293
+ trimmed === "${today}" ||
1294
+ trimmed === "{{today}}" ||
1295
+ /^\{?\s*today\s*\}?$/.test(trimmed)) {
1296
+ fm.updated = todayIso;
1297
+ return;
1298
+ }
1299
+ // Unknown string format — leave alone so it's visible in the diff.
1300
+ return;
1301
+ }
1302
+ if (typeof v === "object") {
1303
+ // Maps like `{today: null}`, `{now: null}` — clearly a template leak.
1304
+ fm.updated = todayIso;
1305
+ return;
1306
+ }
1307
+ }
1308
+ /**
1309
+ * Normalise a knowledge slug for variant-aware deduplication. Collapses:
1310
+ * - date suffixes (`-may-2026`, `-2026-05-03`, `-2026`)
1311
+ * - numeric counter suffixes (`-2`, `-3`)
1312
+ * - trailing -patterns / -2026-05-03 styles
1313
+ * - word reorderings via alphabetical sort of the remaining tokens.
1314
+ *
1315
+ * Two slugs that normalise to the same string are considered the same asset
1316
+ * for dedup purposes even if they don't share an exact ref.
1317
+ */
1318
+ export function normalizeSlugForDedup(ref) {
1319
+ const slug = ref.replace(/^[^:]+:/, "");
1320
+ const monthRe = /(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i;
1321
+ const tokens = slug
1322
+ .toLowerCase()
1323
+ .split("-")
1324
+ .filter((tok) => tok.length > 0)
1325
+ // Strip purely-numeric tokens (years, dates, counter suffixes like -2 / -3).
1326
+ // Numbers carry no semantic information for our dedup purposes — every
1327
+ // observed defective slug variant differs only in dates or counters.
1328
+ .filter((tok) => !/^\d+$/.test(tok))
1329
+ .filter((tok) => !monthRe.test(tok));
1330
+ // Sort to absorb word reorderings.
1331
+ tokens.sort();
1332
+ return tokens.join("-");
1333
+ }
1334
+ /**
1335
+ * Pre-emit dedup check: compare the candidate ref against pending consolidate
1336
+ * proposals only. Returns a reason string if a slug-variant match is found,
1337
+ * else null.
1338
+ *
1339
+ * Historical context (REMOVED 2026-05-20): this function previously also ran
1340
+ * (a) a normalised-slug match against existing knowledge AND memory entries
1341
+ * in the DB, and
1342
+ * (b) an embedding cosine-similarity check (>= 0.85) against ALL knowledge
1343
+ * and non-derived memory entries.
1344
+ * Both branches had ZERO observed fires across 30 sampled runs in the
1345
+ * post-fix window. The 29 actual dedup catches all came from the SEPARATE
1346
+ * content-hash dedup inside `mergePlans` (the older SHA-256 helper). The
1347
+ * embedding branch in particular had unbounded cost per promote (embedded
1348
+ * every knowledge + non-derived memory entry, every time) with no observed
1349
+ * benefit. Empirical signal → deleted.
1350
+ *
1351
+ * What remains: a check against pending consolidate proposals in the SAME
1352
+ * improve run. This catches duplicates queued back-to-back within a single
1353
+ * improve invocation — a different concern from the cross-run content-hash
1354
+ * dedup, and cheap (no embeddings, no DB query).
1355
+ */
1356
+ export async function checkPreEmitDedup(opts) {
1357
+ const normCandidate = normalizeSlugForDedup(opts.candidateRef);
1358
+ // Pending consolidate proposals (slug match) — within the same improve run.
1359
+ const pendingConsolidate = listProposals(opts.stashDir, { status: "pending" }).filter((p) => p.source === "consolidate");
1360
+ for (const p of pendingConsolidate) {
1361
+ if (normalizeSlugForDedup(p.ref) === normCandidate) {
1362
+ return { duplicate: true, reason: `slug-variant of pending proposal ${p.id} (${p.ref})` };
1363
+ }
1364
+ }
1365
+ return { duplicate: false };
1366
+ }
1367
+ function loadMemoriesForSource(source, stashDir, warnings) {
1368
+ // Load from DB first
1369
+ let memories = [];
1370
+ let db;
1371
+ try {
1372
+ db = openExistingDatabase();
1373
+ const entries = getAllEntries(db, "memory");
1374
+ memories = entries
1375
+ .filter((e) => {
1376
+ if (!source)
1377
+ return true;
1378
+ return path.resolve(e.stashDir) === path.resolve(source);
1379
+ })
1380
+ .filter((e) => isConsolidationEligibleMemoryName(e.entry.name))
1381
+ .map((e) => ({
1382
+ name: e.entry.name,
1383
+ filePath: e.filePath,
1384
+ description: e.entry.description ?? "",
1385
+ tags: e.entry.tags ?? [],
1386
+ stashDir: e.stashDir,
1387
+ }));
1388
+ }
1389
+ catch {
1390
+ memories = [];
1391
+ }
1392
+ finally {
1393
+ if (db)
1394
+ closeDatabase(db);
1395
+ }
1396
+ if (memories.length === 0) {
1397
+ // DB fallback: walk filesystem
1398
+ const memoriesDir = path.join(source ?? stashDir, "memories");
1399
+ const fsStashDir = source ?? stashDir;
1400
+ if (fs.existsSync(memoriesDir)) {
1401
+ for (const fname of fs.readdirSync(memoriesDir)) {
1402
+ if (!fname.endsWith(".md"))
1403
+ continue;
1404
+ const filePath = path.join(memoriesDir, fname);
1405
+ const name = fname.replace(/\.md$/, "");
1406
+ if (!isConsolidationEligibleMemoryName(name))
1407
+ continue;
1408
+ memories.push({ name, filePath, description: "", tags: [], stashDir: fsStashDir });
1409
+ }
1410
+ }
1411
+ if (memories.length > 0) {
1412
+ warnings.push("DB not found or empty — loaded memories directly from filesystem.");
1413
+ }
1414
+ }
1415
+ return memories;
1416
+ }
1417
+ async function generateMergedContent(config, primaryRef, primaryBody, secondaryRefs, memoryByRef, warnings) {
1418
+ // Only handle single-secondary merges per design (one call per merge op)
1419
+ const secRef = secondaryRefs[0];
1420
+ const secEntry = memoryByRef.get(secRef);
1421
+ if (!secEntry)
1422
+ return null;
1423
+ let secBody = "";
1424
+ try {
1425
+ secBody = fs.readFileSync(secEntry.filePath, "utf8");
1426
+ }
1427
+ catch {
1428
+ warnings.push(`Merge: could not read secondary ${secRef} — skipping.`);
1429
+ return null;
1430
+ }
1431
+ const prompt = [
1432
+ "Merge these two memory assets into one. Output ONLY the merged markdown (with YAML frontmatter). Do not explain, do not use code fences.",
1433
+ "",
1434
+ "## OUTPUT FORMAT (MANDATORY)",
1435
+ "Return raw markdown content beginning DIRECTLY with the `---` frontmatter delimiter.",
1436
+ "DO NOT wrap your entire response in a code fence.",
1437
+ "",
1438
+ 'GOOD: "---\\ndescription: ...\\n---\\nBody content."',
1439
+ 'BAD: "```markdown\\n---\\ndescription: ...\\n---\\nBody content.\\n```"',
1440
+ 'BAD: "```yaml\\n---\\ndescription: ...\\n---\\nBody content.\\n```"',
1441
+ "",
1442
+ "- The `updated:` field, if present, MUST be a real ISO date (e.g. `updated: 2026-05-20`). NEVER emit `updated: today`, `updated: now`, or `updated: {today: null}`. If you don't have a real date, OMIT the field — the post-processor will not invent one.",
1443
+ "",
1444
+ `=== Primary memory (${primaryRef}) ===`,
1445
+ primaryBody,
1446
+ "",
1447
+ `=== Secondary memory (${secRef}) ===`,
1448
+ secBody,
1449
+ ].join("\n");
1450
+ const llmConfig = getDefaultLlmConfig(config);
1451
+ const result = await tryLlmFeature("memory_consolidation", config, async () => {
1452
+ if (!llmConfig)
1453
+ return { ok: false, error: "No LLM configured for consolidation" };
1454
+ try {
1455
+ const content = await chatCompletion(llmConfig, [{ role: "user", content: prompt }]);
1456
+ return { ok: true, content };
1457
+ }
1458
+ catch (e) {
1459
+ return { ok: false, error: String(e) };
1460
+ }
1461
+ }, { ok: false, error: `merge content generation failed for ${primaryRef}` });
1462
+ if (!result.ok) {
1463
+ warnings.push(result.error ?? `merge content generation failed for ${primaryRef}`);
1464
+ return null;
1465
+ }
1466
+ // Sanitize LLM output: strip outer code fences (defends against the
1467
+ // ```markdown … ``` leak observed in production), re-serialise frontmatter
1468
+ // through the yaml lib (fixes quote-escaping mistakes), and reject empty
1469
+ // or fence-only responses.
1470
+ const sanitized = sanitizeMergedContent(result.content ?? "");
1471
+ if (!sanitized.ok) {
1472
+ warnings.push(`Merge: rejected LLM output for ${primaryRef} — ${sanitized.reason}.`);
1473
+ return null;
1474
+ }
1475
+ const mergedRaw = sanitized.result.content;
1476
+ // C-4 / #383: Content-preservation lint (mem0 §3.2, arXiv:2504.19413).
1477
+ // Guards against LLM-generated merged content that silently drops information
1478
+ // from the source assets. Two checks:
1479
+ // 1. Body size: merged body must be >= 50% of the larger source body.
1480
+ // 2. Frontmatter superset: merged frontmatter must contain all keys present
1481
+ // in both source frontmatters.
1482
+ // Failures emit a warning and return null so the merge op is skipped rather
1483
+ // than writing degraded content.
1484
+ try {
1485
+ const primaryFm = parseFrontmatter(primaryBody);
1486
+ const secFm = parseFrontmatter(secBody);
1487
+ const mergedFm = parseFrontmatter(mergedRaw);
1488
+ // Check body size
1489
+ const primaryBodyLen = (primaryFm.content ?? "").trim().length;
1490
+ const secBodyLen = (secFm.content ?? "").trim().length;
1491
+ const mergedBodyLen = (mergedFm.content ?? "").trim().length;
1492
+ const largerBodyLen = Math.max(primaryBodyLen, secBodyLen);
1493
+ if (largerBodyLen > 0 && mergedBodyLen < largerBodyLen * 0.5) {
1494
+ warnings.push(`Merge: content-preservation lint failed for ${primaryRef} — ` +
1495
+ `merged body (${mergedBodyLen} chars) is less than 50% of larger source (${largerBodyLen} chars). ` +
1496
+ `Skipping merge to prevent data loss.`);
1497
+ return null;
1498
+ }
1499
+ // Check frontmatter superset
1500
+ const primaryKeys = Object.keys(primaryFm.data ?? {});
1501
+ const secKeys = Object.keys(secFm.data ?? {});
1502
+ const mergedKeys = new Set(Object.keys(mergedFm.data ?? {}));
1503
+ const missingKeys = [...primaryKeys, ...secKeys].filter((k) => !mergedKeys.has(k));
1504
+ if (missingKeys.length > 0) {
1505
+ warnings.push(`Merge: content-preservation lint failed for ${primaryRef} — ` +
1506
+ `merged frontmatter missing keys from sources: ${missingKeys.join(", ")}. ` +
1507
+ `Skipping merge to prevent data loss.`);
1508
+ return null;
1509
+ }
1510
+ }
1511
+ catch {
1512
+ // parseFrontmatter failures are non-fatal — allow the merge to proceed.
1513
+ }
1514
+ return mergedRaw;
1515
+ }
1516
+ async function promptConfirm(message) {
1517
+ process.stdout.write(message);
1518
+ return new Promise((resolve) => {
1519
+ let settled = false;
1520
+ const done = (answer) => {
1521
+ if (settled)
1522
+ return;
1523
+ settled = true;
1524
+ rl.close();
1525
+ resolve(answer);
1526
+ };
1527
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
1528
+ rl.once("line", (line) => done(line.trim().toLowerCase() === "y"));
1529
+ rl.once("close", () => done(false));
1530
+ });
1531
+ }