akm-cli 0.9.0-beta.54 → 0.9.0-beta.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/dist/cli.js +5 -3
  2. package/dist/commands/agent/contribute-cli.js +2 -3
  3. package/dist/commands/env/env-cli.js +187 -202
  4. package/dist/commands/env/secret-cli.js +109 -121
  5. package/dist/commands/feedback-cli.js +152 -155
  6. package/dist/commands/health/advisories.js +151 -0
  7. package/dist/commands/health/improve-metrics.js +754 -0
  8. package/dist/commands/health/llm-usage.js +65 -0
  9. package/dist/commands/health/md-report.js +103 -0
  10. package/dist/commands/health/metrics.js +278 -0
  11. package/dist/commands/health/task-runs.js +135 -0
  12. package/dist/commands/health/types.js +18 -0
  13. package/dist/commands/health/windows.js +196 -0
  14. package/dist/commands/health.js +14 -1624
  15. package/dist/commands/improve/anti-collapse.js +170 -0
  16. package/dist/commands/improve/collapse-detector.js +3 -2
  17. package/dist/commands/improve/consolidate.js +636 -633
  18. package/dist/commands/improve/dedup.js +1 -1
  19. package/dist/commands/improve/distill/content-repair.js +202 -0
  20. package/dist/commands/improve/distill/promote-memory.js +228 -0
  21. package/dist/commands/improve/distill/quality-gate.js +233 -0
  22. package/dist/commands/improve/distill-guards.js +127 -0
  23. package/dist/commands/improve/distill.js +49 -575
  24. package/dist/commands/improve/extract-cli.js +74 -76
  25. package/dist/commands/improve/extract.js +6 -4
  26. package/dist/commands/improve/hot-probation.js +45 -0
  27. package/dist/commands/improve/improve-auto-accept.js +3 -2
  28. package/dist/commands/improve/improve-cli.js +14 -13
  29. package/dist/commands/improve/improve-result-file.js +2 -1
  30. package/dist/commands/improve/improve.js +6 -5
  31. package/dist/commands/improve/loop-stages.js +19 -21
  32. package/dist/commands/improve/preparation.js +4 -2
  33. package/dist/commands/improve/procedural.js +10 -31
  34. package/dist/commands/improve/recombine.js +19 -43
  35. package/dist/commands/improve/reflect.js +1 -1
  36. package/dist/commands/improve/schema-similarity-gate.js +168 -0
  37. package/dist/commands/improve/shared.js +48 -0
  38. package/dist/commands/observability-cli.js +4 -4
  39. package/dist/commands/proposal/drain-policies.js +2 -2
  40. package/dist/commands/proposal/drain.js +1 -1
  41. package/dist/commands/proposal/legacy-import.js +115 -0
  42. package/dist/commands/proposal/proposal-cli.js +3 -3
  43. package/dist/commands/proposal/proposal.js +2 -1
  44. package/dist/commands/proposal/propose.js +1 -1
  45. package/dist/commands/proposal/repository.js +829 -0
  46. package/dist/commands/proposal/validators/proposals.js +5 -920
  47. package/dist/commands/read/remember-cli.js +132 -137
  48. package/dist/commands/read/search-cli.js +1 -1
  49. package/dist/commands/registry-cli.js +76 -87
  50. package/dist/commands/sources/add-cli.js +90 -94
  51. package/dist/commands/sources/history.js +1 -1
  52. package/dist/commands/sources/schema-repair.js +1 -1
  53. package/dist/commands/sources/sources-cli.js +3 -3
  54. package/dist/commands/sources/stash-cli.js +1 -1
  55. package/dist/commands/tasks/tasks-cli.js +1 -2
  56. package/dist/commands/wiki-cli.js +2 -3
  57. package/dist/core/common.js +3 -3
  58. package/dist/core/config/config-schema.js +6 -0
  59. package/dist/core/deep-merge.js +38 -0
  60. package/dist/core/events.js +2 -1
  61. package/dist/core/logs-db.js +8 -13
  62. package/dist/core/paths.js +14 -14
  63. package/dist/core/state-db.js +13 -1140
  64. package/dist/indexer/db/db.js +96 -723
  65. package/dist/indexer/db/entry-mapper.js +41 -0
  66. package/dist/indexer/db/schema.js +516 -0
  67. package/dist/indexer/feedback/utility-policy.js +75 -0
  68. package/dist/indexer/graph/graph-extraction.js +2 -1
  69. package/dist/indexer/index-writer-lock.js +9 -0
  70. package/dist/indexer/indexer.js +78 -23
  71. package/dist/indexer/search/fts-query.js +51 -0
  72. package/dist/integrations/agent/spawn.js +15 -66
  73. package/dist/llm/embedders/cache.js +3 -1
  74. package/dist/output/text/helpers.js +13 -0
  75. package/dist/registry/resolve.js +5 -0
  76. package/dist/scripts/migrate-storage.js +6908 -7447
  77. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +44 -43
  78. package/dist/setup/legacy-config.js +106 -0
  79. package/dist/setup/prompt.js +57 -0
  80. package/dist/setup/providers.js +14 -0
  81. package/dist/setup/semantic-assets.js +124 -0
  82. package/dist/setup/setup.js +24 -1607
  83. package/dist/setup/steps/connection.js +734 -0
  84. package/dist/setup/steps/output.js +31 -0
  85. package/dist/setup/steps/platforms.js +124 -0
  86. package/dist/setup/steps/semantic.js +27 -0
  87. package/dist/setup/steps/sources.js +222 -0
  88. package/dist/setup/steps/stashdir.js +42 -0
  89. package/dist/setup/steps/tasks.js +152 -0
  90. package/dist/storage/repositories/canaries-repository.js +107 -0
  91. package/dist/storage/repositories/consolidation-repository.js +38 -0
  92. package/dist/storage/repositories/embeddings-repository.js +72 -0
  93. package/dist/storage/repositories/events-repository.js +187 -0
  94. package/dist/storage/repositories/extract-sessions-repository.js +96 -0
  95. package/dist/storage/repositories/improve-runs-repository.js +130 -0
  96. package/dist/storage/repositories/index-db.js +4 -7
  97. package/dist/storage/repositories/proposals-repository.js +220 -0
  98. package/dist/storage/repositories/recombine-repository.js +213 -0
  99. package/dist/storage/repositories/task-history-repository.js +93 -0
  100. package/dist/storage/sqlite-pragmas.js +3 -3
  101. package/dist/tasks/runner.js +2 -1
  102. package/package.json +1 -1
  103. package/dist/commands/improve/homeostatic.js +0 -497
@@ -16,14 +16,15 @@ import { ConfigError } from "../../core/errors.js";
16
16
  import { parseEmbeddedJsonResponse } from "../../core/parse.js";
17
17
  import { resolveStashStandards } from "../../core/standards/resolve-stash-standards.js";
18
18
  import { detectTruncatedDescription } from "../../core/text-truncation.js";
19
+ import { createProposal, isProposalSkipped, listProposals } from "../proposal/repository.js";
19
20
  import { hasSupersededStatus, MERGE_ABSOLUTE_FLOOR_CHARS, MERGE_SHRINK_RATIO_MIN, validateProposalFrontmatter, } from "../proposal/validators/proposal-quality-validators.js";
20
- import { createProposal, isProposalSkipped, listProposals } from "../proposal/validators/proposals.js";
21
+ import { checkGenerationGuard, checkLexicalDiversity, checkMergeInformationFloor, computeMergedGeneration, readAssetGeneration, } from "./anti-collapse.js";
21
22
  import { cacheHash, runDeterministicDedup, stripFrontmatterBody } from "./dedup.js";
22
- import { checkGenerationGuard, checkLexicalDiversity, checkMergeInformationFloor, computeMergedGeneration, readAssetGeneration, shouldSkipHotProbationInLlm, } from "./homeostatic.js";
23
+ import { shouldSkipHotProbationInLlm } from "./hot-probation.js";
23
24
  import { writeContradictEdge } from "./memory/memory-belief.js";
24
25
  // Re-export the moved helpers so existing test imports continue to resolve.
25
26
  export { hasSupersededStatus, validateProposalFrontmatter };
26
- import { getBodyEmbeddings, getConsolidationJudgedMap, openStateDatabase, upsertBodyEmbeddings, upsertConsolidationJudged, withStateDb, } from "../../core/state-db.js";
27
+ import { openStateDatabase, withStateDb } from "../../core/state-db.js";
27
28
  import { warn } from "../../core/warn.js";
28
29
  import { commitWriteTargetBoundary, deleteAssetFromSource, resolveWriteTarget, writeAssetToSource, } from "../../core/write-source.js";
29
30
  import { closeDatabase, findEntryIdByRef, getAllEntries, getEntryById, getNeighborsByEntryId, openExistingDatabase, } from "../../indexer/db/db.js";
@@ -31,6 +32,8 @@ import { resolveImproveProcessRunnerFromProfile, runnerIsLlm } from "../../integ
31
32
  import { chatCompletion } from "../../llm/client.js";
32
33
  import { cosineSimilarity, embedBatch, resolveEmbeddingModelId } from "../../llm/embedder.js";
33
34
  import { isLlmFeatureEnabled, tryLlmFeature } from "../../llm/feature-gate.js";
35
+ import { getConsolidationJudgedMap, upsertConsolidationJudged, } from "../../storage/repositories/consolidation-repository.js";
36
+ import { getBodyEmbeddings, upsertBodyEmbeddings } from "../../storage/repositories/embeddings-repository.js";
34
37
  // Chunk sizing + per-chunk prompt assembly live in ./consolidate/chunking.
35
38
  // Imported for internal use by the orchestrator and re-exported for importers.
36
39
  import { buildChunkPrompt, computeSafeChunkSize, DEFAULT_CONTEXT_LENGTH_TOKENS } from "./consolidate/chunking.js";
@@ -543,6 +546,29 @@ function computeMemoryContentHash(filePath) {
543
546
  return undefined;
544
547
  }
545
548
  }
549
+ /**
550
+ * Build a {@link ConsolidateResult} from partial overrides, filling the envelope
551
+ * defaults (schemaVersion / ok / shape + the zeroed counters). Collapses the
552
+ * ~7 near-identical result literals that previously appeared verbatim at every
553
+ * early-return site and the final return of `akmConsolidateInner`. Callers pass
554
+ * only the fields that differ from the all-zero, ok, non-preview baseline.
555
+ */
556
+ export function makeConsolidateResult(overrides) {
557
+ return {
558
+ schemaVersion: 1,
559
+ ok: true,
560
+ shape: "consolidate-result",
561
+ dryRun: false,
562
+ previewOnly: false,
563
+ processed: 0,
564
+ merged: 0,
565
+ deleted: 0,
566
+ promoted: [],
567
+ contradicted: 0,
568
+ warnings: [],
569
+ ...overrides,
570
+ };
571
+ }
546
572
  // ── Main entry point ─────────────────────────────────────────────────────────
547
573
  export async function akmConsolidate(opts = {}) {
548
574
  const startMs = Date.now();
@@ -553,21 +579,11 @@ export async function akmConsolidate(opts = {}) {
553
579
  const config = opts.config ?? loadConfig();
554
580
  const stashDir = opts.stashDir ?? resolveStashDir();
555
581
  if (!isLlmFeatureEnabled(config, "memory_consolidation")) {
556
- return {
557
- schemaVersion: 1,
558
- ok: true,
559
- shape: "consolidate-result",
582
+ return makeConsolidateResult({
560
583
  dryRun: opts.dryRun ?? false,
561
- previewOnly: false,
562
584
  target: opts.target ?? stashDir,
563
- processed: 0,
564
- merged: 0,
565
- deleted: 0,
566
- promoted: [],
567
- contradicted: 0,
568
- warnings: [],
569
585
  durationMs: Date.now() - startMs,
570
- };
586
+ });
571
587
  }
572
588
  const warnings = [];
573
589
  checkForIncompleteJournal(stashDir, opts.recoveryMode ?? "abort", warnings);
@@ -669,43 +685,26 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
669
685
  }
670
686
  }
671
687
  if (memories.length === 0) {
672
- return {
673
- schemaVersion: 1,
674
- ok: true,
675
- shape: "consolidate-result",
688
+ return makeConsolidateResult({
676
689
  dryRun: opts.dryRun ?? false,
677
- previewOnly: false,
678
690
  target: opts.target ?? stashDir,
679
- processed: 0,
680
- merged: 0,
681
691
  // #617: the deterministic dedup pre-pass may have emptied the pool by
682
692
  // collapsing every remaining memory into a canonical. Surface those
683
693
  // collapses in `deleted` so the run reports the work it actually did.
684
694
  deleted: dedupCollapsed,
685
- promoted: [],
686
- contradicted: 0,
687
695
  warnings,
688
696
  durationMs: Date.now() - startMs,
689
- };
697
+ });
690
698
  }
691
699
  if (opts.incrementalSince) {
692
700
  memories = narrowToIncrementalCandidates(memories, opts.incrementalSince, warnings, opts.neighborsPerChanged);
693
701
  if (memories.length === 0) {
694
- return {
695
- schemaVersion: 1,
696
- ok: true,
697
- shape: "consolidate-result",
702
+ return makeConsolidateResult({
698
703
  dryRun: opts.dryRun ?? false,
699
- previewOnly: false,
700
704
  target: opts.target ?? stashDir,
701
- processed: 0,
702
- merged: 0,
703
- deleted: 0,
704
- promoted: [],
705
- contradicted: 0,
706
705
  warnings,
707
706
  durationMs: Date.now() - startMs,
708
- };
707
+ });
709
708
  }
710
709
  }
711
710
  // WS-5 perf telemetry accumulators. These are collected throughout the run and
@@ -770,21 +769,13 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
770
769
  warnings.push(`Judged-state cache: skipped ${skipped} memor${skipped === 1 ? "y" : "ies"} judged-unchanged (no LLM); ${memories.length} remain for judging.`);
771
770
  }
772
771
  if (memories.length === 0) {
773
- return {
774
- schemaVersion: 1,
775
- ok: true,
776
- shape: "consolidate-result",
772
+ return makeConsolidateResult({
777
773
  dryRun: opts.dryRun ?? false,
778
- previewOnly: false,
779
774
  target: opts.target ?? stashDir,
780
- processed: 0,
781
- merged: 0,
782
775
  deleted: dedupCollapsed,
783
- promoted: [],
784
- contradicted: 0,
785
776
  warnings,
786
777
  durationMs: Date.now() - startMs,
787
- };
778
+ });
788
779
  }
789
780
  }
790
781
  if (opts.limit === undefined && memories.length > 150) {
@@ -994,9 +985,6 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
994
985
  // would otherwise vanish from the envelope's accounting (no judgedNoAction
995
986
  // bump, no skipReasons entry, no actioned counter).
996
987
  let failedChunkMemories = 0;
997
- // 2026-05-26 accounting-leak fix: per-secondary tally so successful merges
998
- // account for `1 + secondaries.length` memories instead of 1.
999
- let mergedSecondaries = 0;
1000
988
  // C-6 / #392: Replace two-consecutive-failures abort with failure-rate threshold.
1001
989
  // Consecutive-count policies are brittle against transient LM Studio reloads:
1002
990
  // two transient failures abort the run even though the next chunk would succeed.
@@ -1058,15 +1046,16 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1058
1046
  }
1059
1047
  warn(`[consolidate] chunk ${chunkIdx + 1}/${chunks.length} (${chunk.length} memories) …`);
1060
1048
  const userPrompt = buildChunkPrompt(sourceName, chunk, chunkIdx, chunks.length, bodyTruncation, pendingProposalBodyHashes, standardsContext);
1061
- let raw = await tryLlmFeature("memory_consolidation", config, async () => {
1049
+ // Single chunk LLM call, wrapped in the feature gate. Deduplicated across
1050
+ // the first attempt and the retry below (the two blocks were byte-identical
1051
+ // apart from their fallback error string). responseSchema lift (PR 1,
1052
+ // asset-writers-investigation §5): providers with `supportsJsonSchema: true`
1053
+ // enforce the shape upstream; others fall through to
1054
+ // `parseEmbeddedJsonResponse` on the response side.
1055
+ const callChunkLlm = (fallbackError) => tryLlmFeature("memory_consolidation", config, async () => {
1062
1056
  if (!llmConfig)
1063
1057
  return { ok: false, error: "No LLM configured for consolidation" };
1064
1058
  try {
1065
- // responseSchema lift (PR 1, asset-writers-investigation §5): pass
1066
- // the consolidate plan schema so providers with
1067
- // `supportsJsonSchema: true` enforce shape upstream. Providers that
1068
- // ignore the option fall through to the existing
1069
- // `parseEmbeddedJsonResponse` path on the response side.
1070
1059
  const content = await chatCompletion(llmConfig, [
1071
1060
  { role: "system", content: CONSOLIDATE_SYSTEM_PROMPT },
1072
1061
  { role: "user", content: userPrompt },
@@ -1076,26 +1065,14 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1076
1065
  catch (e) {
1077
1066
  return { ok: false, error: String(e) };
1078
1067
  }
1079
- }, { ok: false, error: `chunk ${chunkIdx + 1} failed` });
1068
+ }, { ok: false, error: fallbackError });
1069
+ let raw = await callChunkLlm(`chunk ${chunkIdx + 1} failed`);
1080
1070
  if (!raw.ok) {
1081
1071
  // Single retry with 2s backoff before recording chunk as lost.
1082
1072
  // Recovers transient Shredder LM Studio timeouts without significantly
1083
1073
  // extending run time. Only marks failed if both attempts fail.
1084
1074
  await new Promise((r) => setTimeout(r, 2_000));
1085
- const retry = await tryLlmFeature("memory_consolidation", config, async () => {
1086
- if (!llmConfig)
1087
- return { ok: false, error: "No LLM configured for consolidation" };
1088
- try {
1089
- const content = await chatCompletion(llmConfig, [
1090
- { role: "system", content: CONSOLIDATE_SYSTEM_PROMPT },
1091
- { role: "user", content: userPrompt },
1092
- ], { responseSchema: CONSOLIDATE_PLAN_JSON_SCHEMA, enableThinking: false });
1093
- return { ok: true, content };
1094
- }
1095
- catch (e) {
1096
- return { ok: false, error: String(e) };
1097
- }
1098
- }, { ok: false, error: `chunk ${chunkIdx + 1} retry failed` });
1075
+ const retry = await callChunkLlm(`chunk ${chunkIdx + 1} retry failed`);
1099
1076
  if (!retry.ok) {
1100
1077
  warn(retry.error ?? `chunk ${chunkIdx + 1} failed after retry`);
1101
1078
  warnings.push(retry.error ?? `chunk ${chunkIdx + 1} failed after retry`);
@@ -1224,28 +1201,23 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1224
1201
  warnings.push(...mergeWarnings);
1225
1202
  // -- Dry-run: show AI plan without executing any writes --------------------
1226
1203
  if (opts.dryRun) {
1227
- return {
1228
- schemaVersion: 1,
1229
- ok: true,
1230
- shape: "consolidate-result",
1204
+ return makeConsolidateResult({
1231
1205
  dryRun: true,
1232
1206
  previewOnly: true,
1233
1207
  target: sourceName,
1234
1208
  processed: memories.length,
1235
- merged: 0,
1236
- deleted: 0,
1237
- promoted: [],
1238
- contradicted: 0,
1239
1209
  failedChunks: totalChunksFailed,
1240
1210
  totalChunks: chunks.length,
1241
1211
  judgedNoAction,
1242
1212
  skipReasons,
1243
- mergedSecondaries,
1213
+ // No merge has executed on the preview path — the per-secondary tally is
1214
+ // provably still 0 here (it only increments in the op-execution loop).
1215
+ mergedSecondaries: 0,
1244
1216
  failedChunkMemories,
1245
1217
  planned: allOps,
1246
1218
  warnings,
1247
1219
  durationMs: Date.now() - startMs,
1248
- };
1220
+ });
1249
1221
  }
1250
1222
  warn(`[consolidate] plan: ${allOps.length} operation(s)`);
1251
1223
  // -- HTTP path: warn about quality and confirm unless auto-accepted --------
@@ -1266,28 +1238,21 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1266
1238
  const nonInteractive = process.stdin.isTTY === false || process.env.AKM_NON_INTERACTIVE === "1";
1267
1239
  const answer = nonInteractive ? false : await promptConfirm(`Apply ${n} operations? [y/N] `);
1268
1240
  if (!answer) {
1269
- return {
1270
- schemaVersion: 1,
1271
- ok: true,
1272
- shape: "consolidate-result",
1273
- dryRun: false,
1241
+ return makeConsolidateResult({
1274
1242
  previewOnly: true,
1275
1243
  target: sourceName,
1276
1244
  processed: memories.length,
1277
- merged: 0,
1278
- deleted: 0,
1279
- promoted: [],
1280
- contradicted: 0,
1281
1245
  failedChunks: totalChunksFailed,
1282
1246
  totalChunks: chunks.length,
1283
1247
  judgedNoAction,
1284
1248
  skipReasons,
1285
- mergedSecondaries,
1249
+ // No merge executed on the abort path — mergedSecondaries is still 0.
1250
+ mergedSecondaries: 0,
1286
1251
  failedChunkMemories,
1287
1252
  planned: allOps,
1288
1253
  warnings: [...warnings, nonInteractive ? "Non-interactive context: skipped apply." : "Aborted by user."],
1289
1254
  durationMs: Date.now() - startMs,
1290
- };
1255
+ });
1291
1256
  }
1292
1257
  }
1293
1258
  }
@@ -1297,11 +1262,14 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1297
1262
  const backupDir = getBackupDir(stashDir, timestamp);
1298
1263
  // Write journal before any mutations
1299
1264
  writeJournal(stashDir, allOps, timestamp);
1300
- let merged = 0;
1301
- let deleted = 0;
1265
+ const counts = {
1266
+ merged: 0,
1267
+ deleted: 0,
1268
+ contradicted: 0, // C-3 / #382: count of contradiction edges written
1269
+ mergeFloorViolations: 0, // R5 §4.2: advisory merge-information-floor failures
1270
+ mergedSecondaries: 0,
1271
+ };
1302
1272
  const promoted = [];
1303
- let contradicted = 0; // C-3 / #382: count of contradiction edges written
1304
- let mergeFloorViolations = 0; // R5 §4.2: advisory merge-information-floor failures
1305
1273
  // Within-run dedup: track source refs for which a promote proposal was
1306
1274
  // already created this run. The LLM can return multiple promote ops for
1307
1275
  // different source memories that happen to have identical content (all are
@@ -1312,550 +1280,41 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1312
1280
  for (const m of memories) {
1313
1281
  memoryByRef.set(`memory:${m.name}`, m);
1314
1282
  }
1283
+ const opCtx = {
1284
+ config,
1285
+ stashDir,
1286
+ sourceRun,
1287
+ target,
1288
+ backupDir,
1289
+ memoryByRef,
1290
+ promoted,
1291
+ promotedSourceRefs,
1292
+ warnings,
1293
+ counts,
1294
+ pushSkipReason,
1295
+ };
1296
+ // Thin dispatch over the op discriminator — each branch is now an isolated,
1297
+ // independently-testable handler that mutates `opCtx`.
1315
1298
  for (let opIndex = 0; opIndex < allOps.length; opIndex++) {
1316
1299
  const op = allOps[opIndex];
1317
1300
  const opDisplayRef = op.op === "merge" ? op.primary : op.op === "contradict" ? `${op.ref} ↔ ${op.contradictedByRef}` : op.ref;
1318
1301
  warn(`[consolidate] ${opIndex + 1}/${allOps.length} ${op.op} ${opDisplayRef}`);
1319
- if (op.op === "merge") {
1320
- // Accounting helper: emit a per-participant skipReason for failed
1321
- // merges so primary + every loaded-memory secondary land in the
1322
- // structured skip histogram. Pre-2026-05-26 only the primary was
1323
- // counted (1 skipReason per failed merge), leaving N secondaries
1324
- // unaccounted for in the `processed == actioned + noAction + Σskips`
1325
- // invariant — the source of the 4–11 silent leaks per run.
1326
- const emitMergeFailureSkips = (reason) => {
1327
- if (memoryByRef.has(op.primary))
1328
- pushSkipReason("merge", op.primary, reason);
1329
- for (const secRef of op.secondaries) {
1330
- if (memoryByRef.has(secRef))
1331
- pushSkipReason("merge", secRef, reason);
1332
- }
1333
- };
1334
- const primaryEntry = memoryByRef.get(op.primary);
1335
- if (!primaryEntry) {
1336
- // This fires when a prior op in the same run consumed this ref as a
1337
- // secondary and Fix-A pruned it from memoryByRef. It should NOT fire
1338
- // for hallucinated primaries (those are dropped by mergePlans() before
1339
- // reaching here). If this counter is non-zero, suspect an intra-run
1340
- // cross-chunk race, not a filter regression.
1341
- warnings.push(`Merge: primary ${op.primary} not found in loaded memories (pruned by prior op this run) — skipping.`);
1342
- emitMergeFailureSkips("merge_primary_missing");
1343
- continue;
1344
- }
1345
- // Defense-in-depth: even if the entry is in memoryByRef (pre-flight ran
1346
- // before this run's own ops), the file may have been deleted by a
1347
- // concurrent process or an edge case the pre-flight filter missed.
1348
- if (!fs.existsSync(primaryEntry.filePath)) {
1349
- warnings.push(`Merge: primary ${op.primary} file gone at execution time (stale entry) — skipping.`);
1350
- emitMergeFailureSkips("merge_primary_file_gone");
1351
- continue;
1352
- }
1353
- // Phase B: generate merged content
1354
- const secondaryBodies = [];
1355
- for (const secRef of op.secondaries) {
1356
- const secEntry = memoryByRef.get(secRef);
1357
- if (!secEntry) {
1358
- warnings.push(`Merge: secondary ${secRef} not found — skipping merge op.`);
1359
- // No accounting impact: a missing secondary is a phantom ref and
1360
- // never contributed to any chunk's targetRefs reduction. We still
1361
- // continue the loop to gather the remaining valid secondaries.
1362
- continue;
1363
- }
1364
- secondaryBodies.push(secRef);
1365
- }
1366
- if (secondaryBodies.length === 0) {
1367
- warnings.push(`Merge: ${op.primary} has no valid secondaries — skipping.`);
1368
- emitMergeFailureSkips("merge_no_valid_secondaries");
1369
- continue;
1370
- }
1371
- // Pre-flight hot guard — skip the LLM call entirely if any participant
1372
- // is hot or unparseable. Without this, mixed chunks still send hot merges
1373
- // to the planner which proposes them; generateMergedContent() is then
1374
- // called, produces output without `description`, and the skip is
1375
- // misattributed to merge_missing_description instead of the real cause.
1376
- const preflightParticipants = [op.primary, ...op.secondaries];
1377
- const preflightBlocked = preflightParticipants.flatMap((ref) => {
1378
- const e = memoryByRef.get(ref);
1379
- if (!e)
1380
- return [];
1381
- const verdict = consolidateGuardStatus(e.filePath);
1382
- if (verdict === "hot" || verdict === "unparseable")
1383
- return [{ ref, verdict }];
1384
- return [];
1385
- });
1386
- if (preflightBlocked.length > 0) {
1387
- const detail = preflightBlocked.map((p) => `${p.ref} (${p.verdict})`).join(", ");
1388
- warnings.push(`Merge: refused for ${op.primary} — ${preflightBlocked.length} participant(s) blocked by hot/unparseable frontmatter guard (pre-flight): ${detail}`);
1389
- emitMergeFailureSkips("merge_participant_blocked");
1390
- continue;
1391
- }
1392
- let primaryBody = "";
1393
- try {
1394
- primaryBody = fs.readFileSync(primaryEntry.filePath, "utf8");
1395
- }
1396
- catch {
1397
- warnings.push(`Merge: could not read primary ${op.primary} — skipping.`);
1398
- emitMergeFailureSkips("merge_read_failed");
1399
- continue;
1400
- }
1401
- const mergeResult = await generateMergedContent(config, op.primary, primaryBody, op.secondaries, memoryByRef);
1402
- if ("error" in mergeResult) {
1403
- warnings.push(`Merge: ${mergeResult.error} for ${mergeResult.detail}.`);
1404
- emitMergeFailureSkips(mergeResult.error);
1405
- continue;
1406
- }
1407
- let mergedContent = mergeResult.content;
1408
- // Validate frontmatter of merged content — must have a `---` block
1409
- // with at minimum a `description` field. We parse via the hand-rolled
1410
- // parser (cheap) AND require non-empty description. This guards against
1411
- // the historical defect where merged memories were written back with
1412
- // empty `description` and later polluted the promote path.
1413
- let parsedMerged;
1414
- try {
1415
- parsedMerged = parseFrontmatter(mergedContent);
1416
- }
1417
- catch {
1418
- warnings.push(`Merge: merged content for ${op.primary} has invalid frontmatter — skipping.`);
1419
- emitMergeFailureSkips("merge_invalid_frontmatter");
1420
- continue;
1421
- }
1422
- if (parsedMerged.frontmatter === null) {
1423
- warnings.push(`Merge: merged content for ${op.primary} has no frontmatter block — skipping.`);
1424
- emitMergeFailureSkips("merge_invalid_frontmatter");
1425
- continue;
1426
- }
1427
- const mergedDesc = parsedMerged.data.description;
1428
- if (typeof mergedDesc !== "string" || mergedDesc.trim().length === 0) {
1429
- warnings.push(`Merge: merged content for ${op.primary} missing description — skipping.`);
1430
- emitMergeFailureSkips("merge_missing_description");
1431
- continue;
1432
- }
1433
- const truncReason = detectTruncatedDescription(mergedDesc);
1434
- if (truncReason) {
1435
- warnings.push(`Merge: merged content for ${op.primary} has truncated description (${truncReason}) — skipping.`);
1436
- emitMergeFailureSkips("merge_truncated_description");
1437
- continue;
1438
- }
1439
- // captureMode:hot guard — refuse the merge if ANY participating memory
1440
- // (primary or secondary) was user-captured or has unparseable frontmatter
1441
- // (could have hidden a hot flag). Hot memories are user-explicit and
1442
- // must not be deleted/overwritten by the consolidate LLM. 14 user
1443
- // memories were silent-deleted by consolidate before this guard landed;
1444
- // recovery required copying from .akm/archive/ by hand.
1445
- const mergeParticipants = [op.primary, ...op.secondaries];
1446
- const blockedParticipants = mergeParticipants.flatMap((ref) => {
1447
- const e = memoryByRef.get(ref);
1448
- if (!e)
1449
- return [];
1450
- const verdict = consolidateGuardStatus(e.filePath);
1451
- if (verdict === "hot" || verdict === "unparseable")
1452
- return [{ ref, verdict }];
1453
- return [];
1454
- });
1455
- if (blockedParticipants.length > 0) {
1456
- const detail = blockedParticipants.map((p) => `${p.ref} (${p.verdict})`).join(", ");
1457
- warnings.push(`Merge: refused for ${op.primary} — ${blockedParticipants.length} participant(s) blocked by hot/unparseable frontmatter guard: ${detail}`);
1458
- emitMergeFailureSkips("merge_participant_blocked");
1459
- continue;
1460
- }
1461
- // WS-3b: Anti-collapse generation guard (step 8a).
1462
- // DEFAULT ON since R5 (opt out via antiCollapse.enabled: false). Refuses
1463
- // to merge two assets both above generation N (default 2) — prevents the
1464
- // pipeline from building ever-deeper LLM-merged trees that lose the
1465
- // source fidelity of the original episodes.
1466
- const antiCollapseConfig = config.profiles?.improve?.default?.processes?.consolidate?.antiCollapse ??
1467
- {};
1468
- if (antiCollapseConfig.enabled !== false) {
1469
- const allParticipants = [op.primary, ...op.secondaries];
1470
- // One read per participant: generation counter, stripped body (for the
1471
- // information floor), and existing source_refs (for the provenance union).
1472
- const participantInfo = allParticipants.map((ref) => {
1473
- const e = memoryByRef.get(ref);
1474
- if (!e)
1475
- return { ref, generation: 0, body: "", sourceRefs: [] };
1476
- try {
1477
- const raw = fs.readFileSync(e.filePath, "utf8");
1478
- const parsed = parseFrontmatter(raw);
1479
- const fm = parsed.data;
1480
- const sourceRefs = Array.isArray(fm.source_refs) ? fm.source_refs.map(String) : [];
1481
- return { ref, generation: readAssetGeneration(fm), body: stripFrontmatterBody(raw), sourceRefs };
1482
- }
1483
- catch {
1484
- return { ref, generation: 0, body: "", sourceRefs: [] };
1485
- }
1486
- });
1487
- const sourceGenerations = participantInfo.map((p) => p.generation);
1488
- const generationCheck = checkGenerationGuard(sourceGenerations, antiCollapseConfig);
1489
- if (generationCheck.refused) {
1490
- warnings.push(`Merge: ${generationCheck.reason}`);
1491
- emitMergeFailureSkips("merge_generation_guard");
1492
- continue;
1493
- }
1494
- // WS-3b: Lexical diversity check (step 8b).
1495
- // Low n-gram diversity ⇒ likely correlated-extraction artifact; raise merge threshold.
1496
- if (antiCollapseConfig.lexicalDiversityCheck !== false) {
1497
- const bodies = participantInfo.map((p) => p.body).filter((b) => b.length > 0);
1498
- const diversityCheck = checkLexicalDiversity(bodies, antiCollapseConfig);
1499
- if (diversityCheck.lowDiversity) {
1500
- // Low-diversity cluster: just warn (don't refuse merge since the dedup
1501
- // path handles exact twins). The warning surfaces in health telemetry.
1502
- warnings.push(`Merge: cluster around ${op.primary} has low lexical diversity (${diversityCheck.diversity?.toFixed(2) ?? "?"} < 0.30) — likely correlated extraction; merge proceeds but review is recommended.`);
1503
- }
1504
- }
1505
- // Inject generation counter into merged content frontmatter (step 8a).
1506
- // merged.generation = max(sourceGenerations) + 1. source_refs is the
1507
- // UNION of participants + everything they already cited (R5 §4.2 —
1508
- // the old set-if-absent behavior dropped second-generation provenance).
1509
- const provenanceUnion = [...new Set([...allParticipants, ...participantInfo.flatMap((p) => p.sourceRefs)])];
1510
- mergedContent = injectGenerationFrontmatter(mergedContent, sourceGenerations, provenanceUnion);
1511
- // R5 §4.2: merge-information floor — ADVISORY in v1. A merge that
1512
- // shrinks provenance or genericizes below the retention floor is
1513
- // counted + warned, never refused (promotion path: design doc §7).
1514
- try {
1515
- const mergedParsed = parseFrontmatter(mergedContent);
1516
- const mergedFm = mergedParsed.data;
1517
- const mergedSourceRefs = Array.isArray(mergedFm.source_refs) ? mergedFm.source_refs.map(String) : [];
1518
- const floorCheck = checkMergeInformationFloor(mergedParsed.content, mergedSourceRefs, participantInfo, antiCollapseConfig);
1519
- if (!floorCheck.passed) {
1520
- mergeFloorViolations++;
1521
- warnings.push(`Merge: information floor advisory for ${op.primary}: ${floorCheck.reason ?? "unspecified"} — merge proceeds (v1 observe-only).`);
1522
- }
1523
- }
1524
- catch {
1525
- // Floor measurement is best-effort; never blocks the merge path.
1526
- }
1527
- }
1528
- // Backup secondaries before deleting
1529
- for (const secRef of op.secondaries) {
1530
- const secEntry = memoryByRef.get(secRef);
1531
- if (secEntry && fs.existsSync(secEntry.filePath)) {
1532
- backupFile(secEntry.filePath, backupDir, secEntry.name);
1533
- }
1534
- }
1535
- // Write merged primary
1536
- try {
1537
- const parsedPrimary = parseAssetRef(op.primary);
1538
- await writeAssetToSource(target.source, target.config, parsedPrimary, mergedContent);
1539
- }
1540
- catch (e) {
1541
- warnings.push(`Merge: write failed for ${op.primary}: ${String(e)}`);
1542
- emitMergeFailureSkips("merge_write_failed");
1543
- continue;
1544
- }
1545
- // Archive and delete secondaries (P1-B: soft-invalidation)
1546
- for (const secRef of op.secondaries) {
1547
- const secEntry = memoryByRef.get(secRef);
1548
- if (!secEntry)
1549
- continue;
1550
- if (fs.existsSync(secEntry.filePath)) {
1551
- archiveMemory(secEntry.filePath, stashDir, secRef, "merged into primary", opIndex, op.primary, warnings);
1552
- }
1553
- try {
1554
- const parsedSec = parseAssetRef(secRef);
1555
- await deleteAssetFromSource(target.source, target.config, parsedSec);
1556
- markJournalCompleted(stashDir, secRef);
1557
- }
1558
- catch (e) {
1559
- warnings.push(`Merge: delete failed for ${secRef}: ${String(e)}`);
1560
- }
1561
- }
1562
- markJournalCompleted(stashDir, op.primary);
1563
- merged++;
1564
- // 2026-05-26 accounting-leak fix: `merged` is op-level, but each
1565
- // successful merge actions `1 + secondaries.length` memories. Without
1566
- // this counter the accounting invariant breaks by `secondaries.length`
1567
- // per successful merge (chunk loop excluded all secondaries from
1568
- // judgedNoAction via targetRefs, but only the primary is credited to
1569
- // `merged`). Count only loaded-memory secondaries; phantom secondary
1570
- // refs never affected any chunk's targetRefs in the first place.
1571
- for (const secRef of op.secondaries) {
1572
- if (memoryByRef.has(secRef))
1573
- mergedSecondaries++;
1574
- }
1575
- // Prune consumed refs from memoryByRef so later ops in this run cannot
1576
- // reference an absorbed secondary as a merge primary and proceed with a
1577
- // stale entry. Primary is rewritten (not deleted), so we only remove
1578
- // secondaries; the primary ref remains valid under its new content.
1579
- for (const secRef of op.secondaries) {
1580
- memoryByRef.delete(secRef);
1581
- }
1582
- }
1583
- else if (op.op === "delete") {
1584
- const entry = memoryByRef.get(op.ref);
1585
- if (!entry) {
1586
- warnings.push(`Delete: ${op.ref} not found in loaded memories — skipping.`);
1587
- // Phantom ref: not in the batch so not in processed. Pushing to
1588
- // skipReasons would inflate Σ(skipReasons) without a matching processed
1589
- // entry, breaking the accounting invariant. Visibility is preserved via
1590
- // the warnings array above.
1591
- continue;
1592
- }
1593
- // captureMode:hot guard — refuse to delete user-captured memories OR
1594
- // memories whose frontmatter is unparseable (could have hidden the hot
1595
- // flag). The consolidate LLM was deleting hot-captured user memos as
1596
- // "redundant" — 14 such deletes were silently archived between
1597
- // 2026-05-19 and 2026-05-20 before this guard. Hot memories are
1598
- // user-explicit and may only be deleted by the user.
1599
- const guard = consolidateGuardStatus(entry.filePath);
1600
- if (guard === "hot" || guard === "unparseable") {
1601
- warnings.push(`Delete: refused for ${op.ref} — ${guard === "hot" ? "captureMode:hot (user-explicit; never auto-delete)" : "frontmatter unparseable (cannot verify hot flag absent)"}. Reason from LLM: "${op.reason ?? "n/a"}"`);
1602
- pushSkipReason("delete", op.ref, "captureMode_hot_refused");
1603
- continue;
1604
- }
1605
- if (fs.existsSync(entry.filePath)) {
1606
- backupFile(entry.filePath, backupDir, entry.name);
1607
- // P1-B: soft-invalidation archive before hard delete
1608
- archiveMemory(entry.filePath, stashDir, op.ref, op.reason, opIndex, undefined, warnings);
1609
- }
1610
- try {
1611
- const parsedRef = parseAssetRef(op.ref);
1612
- await deleteAssetFromSource(target.source, target.config, parsedRef);
1613
- markJournalCompleted(stashDir, op.ref);
1614
- deleted++;
1615
- // Prune from memoryByRef so later ops in this run cannot reference a
1616
- // deleted memory as a merge primary or secondary.
1617
- memoryByRef.delete(op.ref);
1618
- }
1619
- catch (e) {
1620
- // Distinguish "file already absent" from genuine failures. A prior run
1621
- // may have deleted the file but the DB was not yet re-indexed, so the
1622
- // ref still appeared in memoryByRef. The delete goal is already met.
1623
- const msg = e instanceof Error ? e.message : String(e);
1624
- if (msg.includes("not found in source")) {
1625
- warnings.push(`Delete: ${op.ref} — file already absent (stale DB entry); skipping.`);
1626
- pushSkipReason("delete", op.ref, "delete_already_gone");
1627
- }
1628
- else {
1629
- warnings.push(`Delete: failed for ${op.ref}: ${String(e)}`);
1630
- pushSkipReason("delete", op.ref, "delete_failed");
1631
- }
1632
- }
1633
- }
1634
- else if (op.op === "promote") {
1635
- const entry = memoryByRef.get(op.ref);
1636
- if (!entry) {
1637
- warnings.push(`Promote: ${op.ref} not found in loaded memories — skipping.`);
1638
- // Phantom ref: not in processed, so no skipReason (same rationale as
1639
- // delete_ref_missing above).
1640
- continue;
1641
- }
1642
- // Within-run source-ref dedup: skip if this source memory was already
1643
- // promoted earlier in this run (safety belt — mergePlans already
1644
- // deduplicates promote ops by source ref via Map, but this guard also
1645
- // catches any future code paths that bypass mergePlans).
1646
- if (promotedSourceRefs.has(op.ref)) {
1647
- warnings.push(`Skipping promote: ${op.ref} already promoted in this run`);
1648
- pushSkipReason("promote", op.ref, "promote_already_promoted_this_run");
1649
- continue;
1650
- }
1651
- let knowledgeRef = op.knowledgeRef;
1652
- try {
1653
- parseAssetRef(knowledgeRef);
1654
- }
1655
- catch {
1656
- const slug = op.knowledgeRef
1657
- .replace(/^knowledge:/, "")
1658
- .replace(/[^a-z0-9-]/gi, "-")
1659
- .toLowerCase();
1660
- knowledgeRef = `knowledge:${slug}`;
1661
- warnings.push(`Normalized invalid ref "${op.knowledgeRef}" → "${knowledgeRef}"`);
1662
- }
1663
- // Idempotency: check pending proposals by target ref
1664
- const existingProposals = listProposals(stashDir, { ref: knowledgeRef });
1665
- if (existingProposals.some((p) => p.status === "pending")) {
1666
- warnings.push(`Skipping promote: pending proposal already exists for ${knowledgeRef}`);
1667
- pushSkipReason("promote", op.ref, "promote_pending_proposal_exists");
1668
- continue;
1669
- }
1670
- // Idempotency: check if knowledge asset already exists
1671
- const parsedKnowledgeRef = parseAssetRef(knowledgeRef);
1672
- const destPath = path.join(target.source.path, "knowledge", `${parsedKnowledgeRef.name}.md`);
1673
- if (fs.existsSync(destPath)) {
1674
- warnings.push(`Skipping promote: ${knowledgeRef} already exists in source`);
1675
- pushSkipReason("promote", op.ref, "promote_already_exists");
1676
- continue;
1677
- }
1678
- let memoryContent = "";
1679
- try {
1680
- memoryContent = fs.readFileSync(entry.filePath, "utf8");
1681
- }
1682
- catch (e) {
1683
- warnings.push(`Promote: could not read ${op.ref}: ${String(e)}`);
1684
- pushSkipReason("promote", op.ref, "promote_read_failed");
1685
- continue;
1686
- }
1687
- // Defensive sanitization: legacy memory files written by older
1688
- // consolidate runs may still carry outer code fences or broken YAML.
1689
- // Strip them here so we never propose a polluted asset.
1690
- const promoteSanitized = sanitizeMergedContent(memoryContent);
1691
- if (!promoteSanitized.ok) {
1692
- warnings.push(`Promote: rejected ${op.ref} — source memory failed sanitization (${promoteSanitized.reason}).`);
1693
- pushSkipReason("promote", op.ref, "promote_sanitization_failed");
1694
- continue;
1695
- }
1696
- memoryContent = promoteSanitized.result.content;
1697
- // SOURCE_SUPERSEDED guard: refuse to promote a memory whose source
1698
- // frontmatter carries `status: superseded`. Predicate at module top
1699
- // (`hasSupersededStatus`) so tests can exercise it directly.
1700
- if (hasSupersededStatus(promoteSanitized.result.frontmatter)) {
1701
- warnings.push(`Promote: refused for ${op.ref} → ${knowledgeRef} — source memory has status:superseded; superseded memories are not promotable knowledge.`);
1702
- pushSkipReason("promote", op.ref, "promote_superseded");
1703
- continue;
1704
- }
1705
- // Parse the source memory up-front so the body/frontmatter checks below
1706
- // share the same parsed view.
1707
- const parsedMemory = parseFrontmatter(memoryContent);
1708
- // Reject sources whose body is too small to make useful knowledge.
1709
- // Observed failure: memory files whose body is literally a tags string
1710
- // ("discord,notification,send-notification") get promoted to knowledge
1711
- // proposals that no reviewer would accept. Threshold is conservative —
1712
- // 100 chars catches single-line tag dumps without rejecting genuinely
1713
- // terse but valid notes.
1714
- const PROMOTE_BODY_MIN_CHARS = 100;
1715
- const sourceBody = parsedMemory.content.trim();
1716
- if (sourceBody.length < PROMOTE_BODY_MIN_CHARS) {
1717
- warnings.push(`Promote: rejected ${op.ref} → ${knowledgeRef} — source memory body is too small (${sourceBody.length} chars; need ≥${PROMOTE_BODY_MIN_CHARS}) to make useful knowledge.`);
1718
- pushSkipReason("promote", op.ref, "promote_source_too_small");
1719
- continue;
1720
- }
1721
- // Cross-run + within-run content dedup: if an identical body already
1722
- // exists in ANY pending consolidate proposal (regardless of target ref),
1723
- // skip. This prevents duplicate proposals when:
1724
- // (a) Multiple source memories have identical bodies but differ only
1725
- // in noise frontmatter (`inferenceProcessed: true` twin alongside
1726
- // the original; differing `updated:` timestamps; etc.) — the body
1727
- // is the load-bearing content, so dedup must hash on body only.
1728
- // (b) A prior run created a proposal for the same body under a
1729
- // different knowledgeRef slug.
1730
- // Use cacheHash (case-preserving stripped body) to match the canonical
1731
- // hash domain used by the body-embedding cache and pending-proposal set.
1732
- const bodyHash = cacheHash(sourceBody);
1733
- const allPendingConsolidateProposals = listProposals(stashDir, { status: "pending" }).filter((p) => p.source === "consolidate");
1734
- const contentDupProposal = allPendingConsolidateProposals.find((p) => {
1735
- return cacheHash(p.payload.content) === bodyHash;
1736
- });
1737
- if (contentDupProposal) {
1738
- warnings.push(`Skipping promote: identical body already pending as proposal ${contentDupProposal.id} (ref: ${contentDupProposal.ref}); skipping duplicate for ${op.ref} → ${knowledgeRef}`);
1739
- pushSkipReason("promote", op.ref, "dedup_pending_proposal");
1740
- continue;
1741
- }
1742
- try {
1743
- // Use LLM-provided description; fall back to memory's own description
1744
- // (post-sanitization frontmatter is authoritative).
1745
- const description = (typeof op.description === "string" && op.description.trim()
1746
- ? op.description.trim()
1747
- : parsedMemory.data?.description?.trim()) ?? "";
1748
- // Validate the resolved frontmatter before emitting a proposal.
1749
- // Required field: non-empty description. Reject obvious truncation
1750
- // markers (description ends with `,`/`;`/`:`/`...`/hanging connector)
1751
- // so the queue never sees half-formed metadata that the reviewer
1752
- // would only reject.
1753
- const fmCheck = validateProposalFrontmatter({ description });
1754
- if (!fmCheck.ok) {
1755
- warnings.push(`Promote: rejected ${op.ref} → ${knowledgeRef} — ${fmCheck.reason}.`);
1756
- pushSkipReason("promote", op.ref, "promote_invalid_frontmatter");
1757
- continue;
1758
- }
1759
- // Merge `description` INTO the body's YAML frontmatter so it lands in
1760
- // the on-disk asset when the proposal is accepted. The descriptionQuality
1761
- // validator parses `payload.content` body (not the envelope
1762
- // `payload.frontmatter`), and a memory's native frontmatter has
1763
- // `captureMode`/`beliefState`/etc. but never `description` — without
1764
- // this merge, 60+ pending proposals were blocked at accept-time with
1765
- // MISSING_FRONTMATTER_DESCRIPTION even though the envelope had it.
1766
- // (The body-frontmatter assumption baked into the 2026-05-20 comment
1767
- // below was wrong: body fm and envelope fm only converge when the
1768
- // writer explicitly merges them, which it now does.)
1769
- const mergedBodyFm = {
1770
- ...(parsedMemory.data ?? {}),
1771
- description,
1772
- };
1773
- const serializedMergedFm = serializeFrontmatter(mergedBodyFm);
1774
- const proposalContent = assembleAssetFromString(serializedMergedFm, parsedMemory.content);
1775
- // Pre-emit dedup against pending consolidate proposals from the
1776
- // same improve run (slug-variant match). The cross-run content-hash
1777
- // dedup inside `mergePlans` handles duplicates against existing
1778
- // stash assets — see commit history for the deletion of the
1779
- // unbounded embedding + cross-type slug branches.
1780
- const dedup = await checkPreEmitDedup({
1781
- candidateRef: knowledgeRef,
1782
- candidateText: `${description}. ${memoryContent}`,
1783
- stashDir,
1784
- config,
1785
- });
1786
- if (dedup.duplicate) {
1787
- warnings.push(`Promote: skipped ${op.ref} → ${knowledgeRef} — ${dedup.reason}.`);
1788
- pushSkipReason("promote", op.ref, "promote_dedup_window");
1789
- continue;
1790
- }
1791
- const proposalResult = createProposal(stashDir, {
1792
- ref: knowledgeRef,
1793
- source: "consolidate",
1794
- sourceRun,
1795
- payload: {
1796
- content: proposalContent,
1797
- frontmatter: { description },
1798
- },
1799
- ...(typeof op.confidence === "number" ? { confidence: op.confidence } : {}),
1800
- });
1801
- if (isProposalSkipped(proposalResult)) {
1802
- warnings.push(`Promote: skipped proposal for ${op.ref} (${proposalResult.reason}): ${proposalResult.message}`);
1803
- pushSkipReason("promote", op.ref, `promote_proposal_${proposalResult.reason}`);
1804
- }
1805
- else {
1806
- promoted.push(proposalResult.id);
1807
- promotedSourceRefs.add(op.ref);
1808
- markJournalCompleted(stashDir, op.ref);
1809
- }
1810
- }
1811
- catch (e) {
1812
- warnings.push(`Promote: createProposal failed for ${op.ref}: ${String(e)}`);
1813
- pushSkipReason("promote", op.ref, "promote_create_failed");
1814
- }
1815
- }
1816
- else if (op.op === "contradict") {
1817
- // Confidence gate: surface-level topic overlap causes false positives
1818
- // (investigation 2026-06-18). Require ≥0.92 confidence before writing
1819
- // contradiction edges. Missing confidence field defaults to 1.0 for
1820
- // backward compatibility with responses that predate this field.
1821
- const opConfidence = typeof op.confidence === "number"
1822
- ? op.confidence
1823
- : 1.0;
1824
- if (opConfidence < 0.92) {
1825
- warnings.push(`Contradict: confidence ${opConfidence.toFixed(2)} below 0.92 threshold for ${op.ref} <-> ${op.contradictedByRef} — skipping.`);
1826
- pushSkipReason("contradict", op.ref, "contradict_low_confidence");
1827
- continue;
1828
- }
1829
- // C-3 / #382: Write contradictedBy edges so resolveFamilyContradictions
1830
- // (the SCC resolver in memory-improve.ts) has edges to work on.
1831
- // Zep arXiv:2501.13956 §3 — unified belief-revision with contradiction edges.
1832
- const entry = memoryByRef.get(op.ref);
1833
- const contradictorEntry = memoryByRef.get(op.contradictedByRef);
1834
- if (!entry) {
1835
- warnings.push(`Contradict: ${op.ref} not found in loaded memories — skipping.`);
1836
- // Phantom ref: not in processed, so no skipReason (same rationale as
1837
- // delete_ref_missing).
1838
- continue;
1839
- }
1840
- if (!contradictorEntry) {
1841
- warnings.push(`Contradict: ${op.contradictedByRef} not found — skipping.`);
1842
- // op.ref IS in the batch (entry found above) so the skipReason is
1843
- // correctly charged against a real processed memory.
1844
- pushSkipReason("contradict", op.ref, "contradict_target_missing");
1845
- continue;
1846
- }
1847
- try {
1848
- // Write the contradiction edge: op.ref is contradicted by op.contradictedByRef
1849
- writeContradictEdge(entry.filePath, op.contradictedByRef);
1850
- contradicted++;
1851
- markJournalCompleted(stashDir, op.ref);
1852
- }
1853
- catch (e) {
1854
- warnings.push(`Contradict: failed to write edge for ${op.ref}: ${String(e)}`);
1855
- pushSkipReason("contradict", op.ref, "contradict_write_failed");
1856
- }
1302
+ switch (op.op) {
1303
+ case "merge":
1304
+ await handleMergeOp(op, opIndex, opCtx);
1305
+ break;
1306
+ case "delete":
1307
+ await handleDeleteOp(op, opIndex, opCtx);
1308
+ break;
1309
+ case "promote":
1310
+ await handlePromoteOp(op, opCtx);
1311
+ break;
1312
+ case "contradict":
1313
+ await handleContradictOp(op, opCtx);
1314
+ break;
1857
1315
  }
1858
1316
  }
1317
+ const { merged, deleted, contradicted, mergeFloorViolations, mergedSecondaries } = counts;
1859
1318
  // 0.9.0 (issue #507): batch-at-boundary commit. The merge/delete loop above
1860
1319
  // wrote one merged primary and deleted N secondaries to the resolved target
1861
1320
  // with NO per-asset commit. If the target is a writable git source and any
@@ -1913,6 +1372,550 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1913
1372
  },
1914
1373
  };
1915
1374
  }
1375
+ /** Execute one `merge` op (behavior-identical to the former inlined branch). */
1376
+ export async function handleMergeOp(op, opIndex, ctx) {
1377
+ const { config, stashDir, target, backupDir, memoryByRef, warnings, pushSkipReason, counts } = ctx;
1378
+ // Accounting helper: emit a per-participant skipReason for failed
1379
+ // merges so primary + every loaded-memory secondary land in the
1380
+ // structured skip histogram. Pre-2026-05-26 only the primary was
1381
+ // counted (1 skipReason per failed merge), leaving N secondaries
1382
+ // unaccounted for in the `processed == actioned + noAction + Σskips`
1383
+ // invariant — the source of the 4–11 silent leaks per run.
1384
+ const emitMergeFailureSkips = (reason) => {
1385
+ if (memoryByRef.has(op.primary))
1386
+ pushSkipReason("merge", op.primary, reason);
1387
+ for (const secRef of op.secondaries) {
1388
+ if (memoryByRef.has(secRef))
1389
+ pushSkipReason("merge", secRef, reason);
1390
+ }
1391
+ };
1392
+ const primaryEntry = memoryByRef.get(op.primary);
1393
+ if (!primaryEntry) {
1394
+ // This fires when a prior op in the same run consumed this ref as a
1395
+ // secondary and Fix-A pruned it from memoryByRef. It should NOT fire
1396
+ // for hallucinated primaries (those are dropped by mergePlans() before
1397
+ // reaching here). If this counter is non-zero, suspect an intra-run
1398
+ // cross-chunk race, not a filter regression.
1399
+ warnings.push(`Merge: primary ${op.primary} not found in loaded memories (pruned by prior op this run) — skipping.`);
1400
+ emitMergeFailureSkips("merge_primary_missing");
1401
+ return;
1402
+ }
1403
+ // Defense-in-depth: even if the entry is in memoryByRef (pre-flight ran
1404
+ // before this run's own ops), the file may have been deleted by a
1405
+ // concurrent process or an edge case the pre-flight filter missed.
1406
+ if (!fs.existsSync(primaryEntry.filePath)) {
1407
+ warnings.push(`Merge: primary ${op.primary} file gone at execution time (stale entry) — skipping.`);
1408
+ emitMergeFailureSkips("merge_primary_file_gone");
1409
+ return;
1410
+ }
1411
+ // Phase B: generate merged content
1412
+ const secondaryBodies = [];
1413
+ for (const secRef of op.secondaries) {
1414
+ const secEntry = memoryByRef.get(secRef);
1415
+ if (!secEntry) {
1416
+ warnings.push(`Merge: secondary ${secRef} not found — skipping merge op.`);
1417
+ // No accounting impact: a missing secondary is a phantom ref and
1418
+ // never contributed to any chunk's targetRefs reduction. We still
1419
+ // continue the loop to gather the remaining valid secondaries.
1420
+ continue;
1421
+ }
1422
+ secondaryBodies.push(secRef);
1423
+ }
1424
+ if (secondaryBodies.length === 0) {
1425
+ warnings.push(`Merge: ${op.primary} has no valid secondaries — skipping.`);
1426
+ emitMergeFailureSkips("merge_no_valid_secondaries");
1427
+ return;
1428
+ }
1429
+ // Pre-flight hot guard — skip the LLM call entirely if any participant
1430
+ // is hot or unparseable. Without this, mixed chunks still send hot merges
1431
+ // to the planner which proposes them; generateMergedContent() is then
1432
+ // called, produces output without `description`, and the skip is
1433
+ // misattributed to merge_missing_description instead of the real cause.
1434
+ const preflightParticipants = [op.primary, ...op.secondaries];
1435
+ const preflightBlocked = preflightParticipants.flatMap((ref) => {
1436
+ const e = memoryByRef.get(ref);
1437
+ if (!e)
1438
+ return [];
1439
+ const verdict = consolidateGuardStatus(e.filePath);
1440
+ if (verdict === "hot" || verdict === "unparseable")
1441
+ return [{ ref, verdict }];
1442
+ return [];
1443
+ });
1444
+ if (preflightBlocked.length > 0) {
1445
+ const detail = preflightBlocked.map((p) => `${p.ref} (${p.verdict})`).join(", ");
1446
+ warnings.push(`Merge: refused for ${op.primary} — ${preflightBlocked.length} participant(s) blocked by hot/unparseable frontmatter guard (pre-flight): ${detail}`);
1447
+ emitMergeFailureSkips("merge_participant_blocked");
1448
+ return;
1449
+ }
1450
+ let primaryBody = "";
1451
+ try {
1452
+ primaryBody = fs.readFileSync(primaryEntry.filePath, "utf8");
1453
+ }
1454
+ catch {
1455
+ warnings.push(`Merge: could not read primary ${op.primary} — skipping.`);
1456
+ emitMergeFailureSkips("merge_read_failed");
1457
+ return;
1458
+ }
1459
+ const mergeResult = await generateMergedContent(config, op.primary, primaryBody, op.secondaries, memoryByRef);
1460
+ if ("error" in mergeResult) {
1461
+ warnings.push(`Merge: ${mergeResult.error} for ${mergeResult.detail}.`);
1462
+ emitMergeFailureSkips(mergeResult.error);
1463
+ return;
1464
+ }
1465
+ let mergedContent = mergeResult.content;
1466
+ // Validate frontmatter of merged content — must have a `---` block
1467
+ // with at minimum a `description` field. We parse via the hand-rolled
1468
+ // parser (cheap) AND require non-empty description. This guards against
1469
+ // the historical defect where merged memories were written back with
1470
+ // empty `description` and later polluted the promote path.
1471
+ let parsedMerged;
1472
+ try {
1473
+ parsedMerged = parseFrontmatter(mergedContent);
1474
+ }
1475
+ catch {
1476
+ warnings.push(`Merge: merged content for ${op.primary} has invalid frontmatter — skipping.`);
1477
+ emitMergeFailureSkips("merge_invalid_frontmatter");
1478
+ return;
1479
+ }
1480
+ if (parsedMerged.frontmatter === null) {
1481
+ warnings.push(`Merge: merged content for ${op.primary} has no frontmatter block — skipping.`);
1482
+ emitMergeFailureSkips("merge_invalid_frontmatter");
1483
+ return;
1484
+ }
1485
+ const mergedDesc = parsedMerged.data.description;
1486
+ if (typeof mergedDesc !== "string" || mergedDesc.trim().length === 0) {
1487
+ warnings.push(`Merge: merged content for ${op.primary} missing description — skipping.`);
1488
+ emitMergeFailureSkips("merge_missing_description");
1489
+ return;
1490
+ }
1491
+ const truncReason = detectTruncatedDescription(mergedDesc);
1492
+ if (truncReason) {
1493
+ warnings.push(`Merge: merged content for ${op.primary} has truncated description (${truncReason}) — skipping.`);
1494
+ emitMergeFailureSkips("merge_truncated_description");
1495
+ return;
1496
+ }
1497
+ // captureMode:hot guard — refuse the merge if ANY participating memory
1498
+ // (primary or secondary) was user-captured or has unparseable frontmatter
1499
+ // (could have hidden a hot flag). Hot memories are user-explicit and
1500
+ // must not be deleted/overwritten by the consolidate LLM. 14 user
1501
+ // memories were silent-deleted by consolidate before this guard landed;
1502
+ // recovery required copying from .akm/archive/ by hand.
1503
+ const mergeParticipants = [op.primary, ...op.secondaries];
1504
+ const blockedParticipants = mergeParticipants.flatMap((ref) => {
1505
+ const e = memoryByRef.get(ref);
1506
+ if (!e)
1507
+ return [];
1508
+ const verdict = consolidateGuardStatus(e.filePath);
1509
+ if (verdict === "hot" || verdict === "unparseable")
1510
+ return [{ ref, verdict }];
1511
+ return [];
1512
+ });
1513
+ if (blockedParticipants.length > 0) {
1514
+ const detail = blockedParticipants.map((p) => `${p.ref} (${p.verdict})`).join(", ");
1515
+ warnings.push(`Merge: refused for ${op.primary} — ${blockedParticipants.length} participant(s) blocked by hot/unparseable frontmatter guard: ${detail}`);
1516
+ emitMergeFailureSkips("merge_participant_blocked");
1517
+ return;
1518
+ }
1519
+ // WS-3b: Anti-collapse generation guard (step 8a).
1520
+ // DEFAULT ON since R5 (opt out via antiCollapse.enabled: false). Refuses
1521
+ // to merge two assets both above generation N (default 2) — prevents the
1522
+ // pipeline from building ever-deeper LLM-merged trees that lose the
1523
+ // source fidelity of the original episodes.
1524
+ const antiCollapseConfig = config.profiles?.improve?.default?.processes?.consolidate?.antiCollapse ?? {};
1525
+ if (antiCollapseConfig.enabled !== false) {
1526
+ const allParticipants = [op.primary, ...op.secondaries];
1527
+ // One read per participant: generation counter, stripped body (for the
1528
+ // information floor), and existing source_refs (for the provenance union).
1529
+ const participantInfo = allParticipants.map((ref) => {
1530
+ const e = memoryByRef.get(ref);
1531
+ if (!e)
1532
+ return { ref, generation: 0, body: "", sourceRefs: [] };
1533
+ try {
1534
+ const raw = fs.readFileSync(e.filePath, "utf8");
1535
+ const parsed = parseFrontmatter(raw);
1536
+ const fm = parsed.data;
1537
+ const sourceRefs = Array.isArray(fm.source_refs) ? fm.source_refs.map(String) : [];
1538
+ return { ref, generation: readAssetGeneration(fm), body: stripFrontmatterBody(raw), sourceRefs };
1539
+ }
1540
+ catch {
1541
+ return { ref, generation: 0, body: "", sourceRefs: [] };
1542
+ }
1543
+ });
1544
+ const sourceGenerations = participantInfo.map((p) => p.generation);
1545
+ const generationCheck = checkGenerationGuard(sourceGenerations, antiCollapseConfig);
1546
+ if (generationCheck.refused) {
1547
+ warnings.push(`Merge: ${generationCheck.reason}`);
1548
+ emitMergeFailureSkips("merge_generation_guard");
1549
+ return;
1550
+ }
1551
+ // WS-3b: Lexical diversity check (step 8b).
1552
+ // Low n-gram diversity ⇒ likely correlated-extraction artifact; raise merge threshold.
1553
+ if (antiCollapseConfig.lexicalDiversityCheck !== false) {
1554
+ const bodies = participantInfo.map((p) => p.body).filter((b) => b.length > 0);
1555
+ const diversityCheck = checkLexicalDiversity(bodies, antiCollapseConfig);
1556
+ if (diversityCheck.lowDiversity) {
1557
+ // Low-diversity cluster: just warn (don't refuse merge since the dedup
1558
+ // path handles exact twins). The warning surfaces in health telemetry.
1559
+ warnings.push(`Merge: cluster around ${op.primary} has low lexical diversity (${diversityCheck.diversity?.toFixed(2) ?? "?"} < 0.30) — likely correlated extraction; merge proceeds but review is recommended.`);
1560
+ }
1561
+ }
1562
+ // Inject generation counter into merged content frontmatter (step 8a).
1563
+ // merged.generation = max(sourceGenerations) + 1. source_refs is the
1564
+ // UNION of participants + everything they already cited (R5 §4.2 —
1565
+ // the old set-if-absent behavior dropped second-generation provenance).
1566
+ const provenanceUnion = [...new Set([...allParticipants, ...participantInfo.flatMap((p) => p.sourceRefs)])];
1567
+ mergedContent = injectGenerationFrontmatter(mergedContent, sourceGenerations, provenanceUnion);
1568
+ // R5 §4.2: merge-information floor — ADVISORY in v1. A merge that
1569
+ // shrinks provenance or genericizes below the retention floor is
1570
+ // counted + warned, never refused (promotion path: design doc §7).
1571
+ try {
1572
+ const mergedParsed = parseFrontmatter(mergedContent);
1573
+ const mergedFm = mergedParsed.data;
1574
+ const mergedSourceRefs = Array.isArray(mergedFm.source_refs) ? mergedFm.source_refs.map(String) : [];
1575
+ const floorCheck = checkMergeInformationFloor(mergedParsed.content, mergedSourceRefs, participantInfo, antiCollapseConfig);
1576
+ if (!floorCheck.passed) {
1577
+ counts.mergeFloorViolations++;
1578
+ warnings.push(`Merge: information floor advisory for ${op.primary}: ${floorCheck.reason ?? "unspecified"} — merge proceeds (v1 observe-only).`);
1579
+ }
1580
+ }
1581
+ catch {
1582
+ // Floor measurement is best-effort; never blocks the merge path.
1583
+ }
1584
+ }
1585
+ // Backup secondaries before deleting
1586
+ for (const secRef of op.secondaries) {
1587
+ const secEntry = memoryByRef.get(secRef);
1588
+ if (secEntry && fs.existsSync(secEntry.filePath)) {
1589
+ backupFile(secEntry.filePath, backupDir, secEntry.name);
1590
+ }
1591
+ }
1592
+ // Write merged primary
1593
+ try {
1594
+ const parsedPrimary = parseAssetRef(op.primary);
1595
+ await writeAssetToSource(target.source, target.config, parsedPrimary, mergedContent);
1596
+ }
1597
+ catch (e) {
1598
+ warnings.push(`Merge: write failed for ${op.primary}: ${String(e)}`);
1599
+ emitMergeFailureSkips("merge_write_failed");
1600
+ return;
1601
+ }
1602
+ // Archive and delete secondaries (P1-B: soft-invalidation)
1603
+ for (const secRef of op.secondaries) {
1604
+ const secEntry = memoryByRef.get(secRef);
1605
+ if (!secEntry)
1606
+ continue;
1607
+ if (fs.existsSync(secEntry.filePath)) {
1608
+ archiveMemory(secEntry.filePath, stashDir, secRef, "merged into primary", opIndex, op.primary, warnings);
1609
+ }
1610
+ try {
1611
+ const parsedSec = parseAssetRef(secRef);
1612
+ await deleteAssetFromSource(target.source, target.config, parsedSec);
1613
+ markJournalCompleted(stashDir, secRef);
1614
+ }
1615
+ catch (e) {
1616
+ warnings.push(`Merge: delete failed for ${secRef}: ${String(e)}`);
1617
+ }
1618
+ }
1619
+ markJournalCompleted(stashDir, op.primary);
1620
+ counts.merged++;
1621
+ // 2026-05-26 accounting-leak fix: `merged` is op-level, but each
1622
+ // successful merge actions `1 + secondaries.length` memories. Without
1623
+ // this counter the accounting invariant breaks by `secondaries.length`
1624
+ // per successful merge (chunk loop excluded all secondaries from
1625
+ // judgedNoAction via targetRefs, but only the primary is credited to
1626
+ // `merged`). Count only loaded-memory secondaries; phantom secondary
1627
+ // refs never affected any chunk's targetRefs in the first place.
1628
+ for (const secRef of op.secondaries) {
1629
+ if (memoryByRef.has(secRef))
1630
+ counts.mergedSecondaries++;
1631
+ }
1632
+ // Prune consumed refs from memoryByRef so later ops in this run cannot
1633
+ // reference an absorbed secondary as a merge primary and proceed with a
1634
+ // stale entry. Primary is rewritten (not deleted), so we only remove
1635
+ // secondaries; the primary ref remains valid under its new content.
1636
+ for (const secRef of op.secondaries) {
1637
+ memoryByRef.delete(secRef);
1638
+ }
1639
+ }
1640
+ /** Execute one `delete` op (behavior-identical to the former inlined branch). */
1641
+ export async function handleDeleteOp(op, opIndex, ctx) {
1642
+ const { stashDir, target, backupDir, memoryByRef, warnings, pushSkipReason, counts } = ctx;
1643
+ const entry = memoryByRef.get(op.ref);
1644
+ if (!entry) {
1645
+ warnings.push(`Delete: ${op.ref} not found in loaded memories — skipping.`);
1646
+ // Phantom ref: not in the batch so not in processed. Pushing to
1647
+ // skipReasons would inflate Σ(skipReasons) without a matching processed
1648
+ // entry, breaking the accounting invariant. Visibility is preserved via
1649
+ // the warnings array above.
1650
+ return;
1651
+ }
1652
+ // captureMode:hot guard — refuse to delete user-captured memories OR
1653
+ // memories whose frontmatter is unparseable (could have hidden the hot
1654
+ // flag). The consolidate LLM was deleting hot-captured user memos as
1655
+ // "redundant" — 14 such deletes were silently archived between
1656
+ // 2026-05-19 and 2026-05-20 before this guard. Hot memories are
1657
+ // user-explicit and may only be deleted by the user.
1658
+ const guard = consolidateGuardStatus(entry.filePath);
1659
+ if (guard === "hot" || guard === "unparseable") {
1660
+ warnings.push(`Delete: refused for ${op.ref} — ${guard === "hot" ? "captureMode:hot (user-explicit; never auto-delete)" : "frontmatter unparseable (cannot verify hot flag absent)"}. Reason from LLM: "${op.reason ?? "n/a"}"`);
1661
+ pushSkipReason("delete", op.ref, "captureMode_hot_refused");
1662
+ return;
1663
+ }
1664
+ if (fs.existsSync(entry.filePath)) {
1665
+ backupFile(entry.filePath, backupDir, entry.name);
1666
+ // P1-B: soft-invalidation archive before hard delete
1667
+ archiveMemory(entry.filePath, stashDir, op.ref, op.reason, opIndex, undefined, warnings);
1668
+ }
1669
+ try {
1670
+ const parsedRef = parseAssetRef(op.ref);
1671
+ await deleteAssetFromSource(target.source, target.config, parsedRef);
1672
+ markJournalCompleted(stashDir, op.ref);
1673
+ counts.deleted++;
1674
+ // Prune from memoryByRef so later ops in this run cannot reference a
1675
+ // deleted memory as a merge primary or secondary.
1676
+ memoryByRef.delete(op.ref);
1677
+ }
1678
+ catch (e) {
1679
+ // Distinguish "file already absent" from genuine failures. A prior run
1680
+ // may have deleted the file but the DB was not yet re-indexed, so the
1681
+ // ref still appeared in memoryByRef. The delete goal is already met.
1682
+ const msg = e instanceof Error ? e.message : String(e);
1683
+ if (msg.includes("not found in source")) {
1684
+ warnings.push(`Delete: ${op.ref} — file already absent (stale DB entry); skipping.`);
1685
+ pushSkipReason("delete", op.ref, "delete_already_gone");
1686
+ }
1687
+ else {
1688
+ warnings.push(`Delete: failed for ${op.ref}: ${String(e)}`);
1689
+ pushSkipReason("delete", op.ref, "delete_failed");
1690
+ }
1691
+ }
1692
+ }
1693
+ /** Execute one `promote` op (behavior-identical to the former inlined branch). */
1694
+ export async function handlePromoteOp(op, ctx) {
1695
+ const { config, stashDir, sourceRun, target, memoryByRef, warnings, pushSkipReason, promoted, promotedSourceRefs } = ctx;
1696
+ const entry = memoryByRef.get(op.ref);
1697
+ if (!entry) {
1698
+ warnings.push(`Promote: ${op.ref} not found in loaded memories — skipping.`);
1699
+ // Phantom ref: not in processed, so no skipReason (same rationale as
1700
+ // delete_ref_missing above).
1701
+ return;
1702
+ }
1703
+ // Within-run source-ref dedup: skip if this source memory was already
1704
+ // promoted earlier in this run (safety belt — mergePlans already
1705
+ // deduplicates promote ops by source ref via Map, but this guard also
1706
+ // catches any future code paths that bypass mergePlans).
1707
+ if (promotedSourceRefs.has(op.ref)) {
1708
+ warnings.push(`Skipping promote: ${op.ref} already promoted in this run`);
1709
+ pushSkipReason("promote", op.ref, "promote_already_promoted_this_run");
1710
+ return;
1711
+ }
1712
+ let knowledgeRef = op.knowledgeRef;
1713
+ try {
1714
+ parseAssetRef(knowledgeRef);
1715
+ }
1716
+ catch {
1717
+ const slug = op.knowledgeRef
1718
+ .replace(/^knowledge:/, "")
1719
+ .replace(/[^a-z0-9-]/gi, "-")
1720
+ .toLowerCase();
1721
+ knowledgeRef = `knowledge:${slug}`;
1722
+ warnings.push(`Normalized invalid ref "${op.knowledgeRef}" → "${knowledgeRef}"`);
1723
+ }
1724
+ // Idempotency: check pending proposals by target ref
1725
+ const existingProposals = listProposals(stashDir, { ref: knowledgeRef });
1726
+ if (existingProposals.some((p) => p.status === "pending")) {
1727
+ warnings.push(`Skipping promote: pending proposal already exists for ${knowledgeRef}`);
1728
+ pushSkipReason("promote", op.ref, "promote_pending_proposal_exists");
1729
+ return;
1730
+ }
1731
+ // Idempotency: check if knowledge asset already exists
1732
+ const parsedKnowledgeRef = parseAssetRef(knowledgeRef);
1733
+ const destPath = path.join(target.source.path, "knowledge", `${parsedKnowledgeRef.name}.md`);
1734
+ if (fs.existsSync(destPath)) {
1735
+ warnings.push(`Skipping promote: ${knowledgeRef} already exists in source`);
1736
+ pushSkipReason("promote", op.ref, "promote_already_exists");
1737
+ return;
1738
+ }
1739
+ let memoryContent = "";
1740
+ try {
1741
+ memoryContent = fs.readFileSync(entry.filePath, "utf8");
1742
+ }
1743
+ catch (e) {
1744
+ warnings.push(`Promote: could not read ${op.ref}: ${String(e)}`);
1745
+ pushSkipReason("promote", op.ref, "promote_read_failed");
1746
+ return;
1747
+ }
1748
+ // Defensive sanitization: legacy memory files written by older
1749
+ // consolidate runs may still carry outer code fences or broken YAML.
1750
+ // Strip them here so we never propose a polluted asset.
1751
+ const promoteSanitized = sanitizeMergedContent(memoryContent);
1752
+ if (!promoteSanitized.ok) {
1753
+ warnings.push(`Promote: rejected ${op.ref} — source memory failed sanitization (${promoteSanitized.reason}).`);
1754
+ pushSkipReason("promote", op.ref, "promote_sanitization_failed");
1755
+ return;
1756
+ }
1757
+ memoryContent = promoteSanitized.result.content;
1758
+ // SOURCE_SUPERSEDED guard: refuse to promote a memory whose source
1759
+ // frontmatter carries `status: superseded`. Predicate at module top
1760
+ // (`hasSupersededStatus`) so tests can exercise it directly.
1761
+ if (hasSupersededStatus(promoteSanitized.result.frontmatter)) {
1762
+ warnings.push(`Promote: refused for ${op.ref} → ${knowledgeRef} — source memory has status:superseded; superseded memories are not promotable knowledge.`);
1763
+ pushSkipReason("promote", op.ref, "promote_superseded");
1764
+ return;
1765
+ }
1766
+ // Parse the source memory up-front so the body/frontmatter checks below
1767
+ // share the same parsed view.
1768
+ const parsedMemory = parseFrontmatter(memoryContent);
1769
+ // Reject sources whose body is too small to make useful knowledge.
1770
+ // Observed failure: memory files whose body is literally a tags string
1771
+ // ("discord,notification,send-notification") get promoted to knowledge
1772
+ // proposals that no reviewer would accept. Threshold is conservative —
1773
+ // 100 chars catches single-line tag dumps without rejecting genuinely
1774
+ // terse but valid notes.
1775
+ const PROMOTE_BODY_MIN_CHARS = 100;
1776
+ const sourceBody = parsedMemory.content.trim();
1777
+ if (sourceBody.length < PROMOTE_BODY_MIN_CHARS) {
1778
+ warnings.push(`Promote: rejected ${op.ref} → ${knowledgeRef} — source memory body is too small (${sourceBody.length} chars; need ≥${PROMOTE_BODY_MIN_CHARS}) to make useful knowledge.`);
1779
+ pushSkipReason("promote", op.ref, "promote_source_too_small");
1780
+ return;
1781
+ }
1782
+ // Cross-run + within-run content dedup: if an identical body already
1783
+ // exists in ANY pending consolidate proposal (regardless of target ref),
1784
+ // skip. This prevents duplicate proposals when:
1785
+ // (a) Multiple source memories have identical bodies but differ only
1786
+ // in noise frontmatter (`inferenceProcessed: true` twin alongside
1787
+ // the original; differing `updated:` timestamps; etc.) — the body
1788
+ // is the load-bearing content, so dedup must hash on body only.
1789
+ // (b) A prior run created a proposal for the same body under a
1790
+ // different knowledgeRef slug.
1791
+ // Use cacheHash (case-preserving stripped body) to match the canonical
1792
+ // hash domain used by the body-embedding cache and pending-proposal set.
1793
+ const bodyHash = cacheHash(sourceBody);
1794
+ const allPendingConsolidateProposals = listProposals(stashDir, { status: "pending" }).filter((p) => p.source === "consolidate");
1795
+ const contentDupProposal = allPendingConsolidateProposals.find((p) => {
1796
+ return cacheHash(p.payload.content) === bodyHash;
1797
+ });
1798
+ if (contentDupProposal) {
1799
+ warnings.push(`Skipping promote: identical body already pending as proposal ${contentDupProposal.id} (ref: ${contentDupProposal.ref}); skipping duplicate for ${op.ref} → ${knowledgeRef}`);
1800
+ pushSkipReason("promote", op.ref, "dedup_pending_proposal");
1801
+ return;
1802
+ }
1803
+ try {
1804
+ // Use LLM-provided description; fall back to memory's own description
1805
+ // (post-sanitization frontmatter is authoritative).
1806
+ const description = (typeof op.description === "string" && op.description.trim()
1807
+ ? op.description.trim()
1808
+ : parsedMemory.data?.description?.trim()) ?? "";
1809
+ // Validate the resolved frontmatter before emitting a proposal.
1810
+ // Required field: non-empty description. Reject obvious truncation
1811
+ // markers (description ends with `,`/`;`/`:`/`...`/hanging connector)
1812
+ // so the queue never sees half-formed metadata that the reviewer
1813
+ // would only reject.
1814
+ const fmCheck = validateProposalFrontmatter({ description });
1815
+ if (!fmCheck.ok) {
1816
+ warnings.push(`Promote: rejected ${op.ref} → ${knowledgeRef} — ${fmCheck.reason}.`);
1817
+ pushSkipReason("promote", op.ref, "promote_invalid_frontmatter");
1818
+ return;
1819
+ }
1820
+ // Merge `description` INTO the body's YAML frontmatter so it lands in
1821
+ // the on-disk asset when the proposal is accepted. The descriptionQuality
1822
+ // validator parses `payload.content` body (not the envelope
1823
+ // `payload.frontmatter`), and a memory's native frontmatter has
1824
+ // `captureMode`/`beliefState`/etc. but never `description` — without
1825
+ // this merge, 60+ pending proposals were blocked at accept-time with
1826
+ // MISSING_FRONTMATTER_DESCRIPTION even though the envelope had it.
1827
+ // (The body-frontmatter assumption baked into the 2026-05-20 comment
1828
+ // below was wrong: body fm and envelope fm only converge when the
1829
+ // writer explicitly merges them, which it now does.)
1830
+ const mergedBodyFm = {
1831
+ ...(parsedMemory.data ?? {}),
1832
+ description,
1833
+ };
1834
+ const serializedMergedFm = serializeFrontmatter(mergedBodyFm);
1835
+ const proposalContent = assembleAssetFromString(serializedMergedFm, parsedMemory.content);
1836
+ // Pre-emit dedup against pending consolidate proposals from the
1837
+ // same improve run (slug-variant match). The cross-run content-hash
1838
+ // dedup inside `mergePlans` handles duplicates against existing
1839
+ // stash assets — see commit history for the deletion of the
1840
+ // unbounded embedding + cross-type slug branches.
1841
+ const dedup = await checkPreEmitDedup({
1842
+ candidateRef: knowledgeRef,
1843
+ candidateText: `${description}. ${memoryContent}`,
1844
+ stashDir,
1845
+ config,
1846
+ });
1847
+ if (dedup.duplicate) {
1848
+ warnings.push(`Promote: skipped ${op.ref} → ${knowledgeRef} — ${dedup.reason}.`);
1849
+ pushSkipReason("promote", op.ref, "promote_dedup_window");
1850
+ return;
1851
+ }
1852
+ const proposalResult = createProposal(stashDir, {
1853
+ ref: knowledgeRef,
1854
+ source: "consolidate",
1855
+ sourceRun,
1856
+ payload: {
1857
+ content: proposalContent,
1858
+ frontmatter: { description },
1859
+ },
1860
+ ...(typeof op.confidence === "number" ? { confidence: op.confidence } : {}),
1861
+ });
1862
+ if (isProposalSkipped(proposalResult)) {
1863
+ warnings.push(`Promote: skipped proposal for ${op.ref} (${proposalResult.reason}): ${proposalResult.message}`);
1864
+ pushSkipReason("promote", op.ref, `promote_proposal_${proposalResult.reason}`);
1865
+ }
1866
+ else {
1867
+ promoted.push(proposalResult.id);
1868
+ promotedSourceRefs.add(op.ref);
1869
+ markJournalCompleted(stashDir, op.ref);
1870
+ }
1871
+ }
1872
+ catch (e) {
1873
+ warnings.push(`Promote: createProposal failed for ${op.ref}: ${String(e)}`);
1874
+ pushSkipReason("promote", op.ref, "promote_create_failed");
1875
+ }
1876
+ }
1877
+ /** Execute one `contradict` op (behavior-identical to the former inlined branch). */
1878
+ export async function handleContradictOp(op, ctx) {
1879
+ const { stashDir, memoryByRef, warnings, pushSkipReason, counts } = ctx;
1880
+ // Confidence gate: surface-level topic overlap causes false positives
1881
+ // (investigation 2026-06-18). Require ≥0.92 confidence before writing
1882
+ // contradiction edges. Missing confidence field defaults to 1.0 for
1883
+ // backward compatibility with responses that predate this field.
1884
+ const opConfidence = typeof op.confidence === "number" ? op.confidence : 1.0;
1885
+ if (opConfidence < 0.92) {
1886
+ warnings.push(`Contradict: confidence ${opConfidence.toFixed(2)} below 0.92 threshold for ${op.ref} <-> ${op.contradictedByRef} — skipping.`);
1887
+ pushSkipReason("contradict", op.ref, "contradict_low_confidence");
1888
+ return;
1889
+ }
1890
+ // C-3 / #382: Write contradictedBy edges so resolveFamilyContradictions
1891
+ // (the SCC resolver in memory-improve.ts) has edges to work on.
1892
+ // Zep arXiv:2501.13956 §3 — unified belief-revision with contradiction edges.
1893
+ const entry = memoryByRef.get(op.ref);
1894
+ const contradictorEntry = memoryByRef.get(op.contradictedByRef);
1895
+ if (!entry) {
1896
+ warnings.push(`Contradict: ${op.ref} not found in loaded memories — skipping.`);
1897
+ // Phantom ref: not in processed, so no skipReason (same rationale as
1898
+ // delete_ref_missing).
1899
+ return;
1900
+ }
1901
+ if (!contradictorEntry) {
1902
+ warnings.push(`Contradict: ${op.contradictedByRef} not found — skipping.`);
1903
+ // op.ref IS in the batch (entry found above) so the skipReason is
1904
+ // correctly charged against a real processed memory.
1905
+ pushSkipReason("contradict", op.ref, "contradict_target_missing");
1906
+ return;
1907
+ }
1908
+ try {
1909
+ // Write the contradiction edge: op.ref is contradicted by op.contradictedByRef
1910
+ writeContradictEdge(entry.filePath, op.contradictedByRef);
1911
+ counts.contradicted++;
1912
+ markJournalCompleted(stashDir, op.ref);
1913
+ }
1914
+ catch (e) {
1915
+ warnings.push(`Contradict: failed to write edge for ${op.ref}: ${String(e)}`);
1916
+ pushSkipReason("contradict", op.ref, "contradict_write_failed");
1917
+ }
1918
+ }
1916
1919
  // ── Helpers ─────────────────────────────────────────────────────────────────
1917
1920
  /**
1918
1921
  * Normalise a knowledge slug for variant-aware deduplication. Collapses: