akm-cli 0.9.0-beta.52 → 0.9.0-beta.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/assets/hints/cli-hints-full.md +6 -5
  2. package/dist/cli/clack.js +56 -0
  3. package/dist/cli/confirm.js +1 -1
  4. package/dist/cli.js +0 -7
  5. package/dist/commands/env/env-cli.js +3 -2
  6. package/dist/commands/env/env.js +14 -67
  7. package/dist/commands/health/checks.js +28 -15
  8. package/dist/commands/health/html-report.js +33 -10
  9. package/dist/commands/health.js +222 -22
  10. package/dist/commands/improve/collapse-detector.js +419 -0
  11. package/dist/commands/improve/consolidate.js +72 -54
  12. package/dist/commands/improve/distill.js +79 -13
  13. package/dist/commands/improve/extract.js +13 -6
  14. package/dist/commands/improve/homeostatic.js +109 -79
  15. package/dist/commands/improve/improve-cli.js +67 -1
  16. package/dist/commands/improve/improve.js +10 -0
  17. package/dist/commands/improve/loop-stages.js +39 -1
  18. package/dist/commands/improve/outcome-loop.js +33 -19
  19. package/dist/commands/improve/preparation.js +36 -11
  20. package/dist/commands/improve/salience.js +49 -32
  21. package/dist/commands/read/curate.js +9 -13
  22. package/dist/commands/read/knowledge.js +4 -0
  23. package/dist/commands/read/search-cli.js +6 -4
  24. package/dist/commands/read/search.js +12 -5
  25. package/dist/commands/read/show.js +6 -8
  26. package/dist/commands/sources/add-cli.js +1 -1
  27. package/dist/commands/sources/init.js +12 -0
  28. package/dist/commands/sources/stash-cli.js +1 -1
  29. package/dist/commands/tasks/default-tasks.js +12 -0
  30. package/dist/core/asset/asset-spec.js +3 -2
  31. package/dist/core/config/config-schema.js +39 -17
  32. package/dist/core/config/config.js +12 -0
  33. package/dist/core/eval/rank-metrics.js +113 -0
  34. package/dist/core/state/migrations.js +56 -0
  35. package/dist/core/state-db.js +146 -19
  36. package/dist/core/warn.js +21 -0
  37. package/dist/indexer/db/db.js +6 -0
  38. package/dist/indexer/ensure-index.js +36 -92
  39. package/dist/indexer/index-writer-lock.js +9 -11
  40. package/dist/indexer/index-written-assets.js +105 -0
  41. package/dist/indexer/indexer.js +16 -4
  42. package/dist/indexer/passes/metadata.js +20 -0
  43. package/dist/indexer/read-preflight.js +23 -0
  44. package/dist/indexer/search/db-search.js +29 -1
  45. package/dist/indexer/search/ranking-contributors.js +33 -1
  46. package/dist/indexer/search/ranking.js +66 -0
  47. package/dist/indexer/search/search-fields.js +6 -0
  48. package/dist/indexer/walk/walker.js +21 -13
  49. package/dist/integrations/agent/detect.js +9 -0
  50. package/dist/integrations/agent/index.js +1 -1
  51. package/dist/llm/client.js +12 -0
  52. package/dist/llm/embedder.js +26 -2
  53. package/dist/llm/embedders/local.js +7 -1
  54. package/dist/llm/feature-gate.js +6 -2
  55. package/dist/output/renderers.js +8 -13
  56. package/dist/output/shapes/helpers.js +0 -3
  57. package/dist/output/shapes/passthrough.js +1 -0
  58. package/dist/scripts/migrate-storage.js +178 -35
  59. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +46 -19
  60. package/dist/setup/detect.js +9 -0
  61. package/dist/setup/registry-stash-loader.js +12 -0
  62. package/dist/setup/setup.js +1 -1
  63. package/dist/storage/repositories/index-db.js +10 -1
  64. package/dist/tasks/backends/index.js +9 -0
  65. package/dist/tasks/runner.js +9 -0
  66. package/package.json +2 -4
@@ -581,6 +581,43 @@ function writeQualityRejection(stash, inputRef, lessonRef, content, score, reaso
581
581
  ...extraMeta,
582
582
  };
583
583
  }
584
+ /**
585
+ * G4 — content-score a distilled OUTPUT (lesson/knowledge proposal body) and
586
+ * persist it to state.db :: asset_salience with `encoding_source: "content"`.
587
+ *
588
+ * Lessons are refused as distill INPUTS (`DISTILL_REFUSED_INPUT_TYPES`), so
589
+ * this creation-time write is their only chance to earn a real content-derived
590
+ * encoding score instead of sitting on the type-weight stub forever. Best-effort:
591
+ * never blocks or fails the proposal flow.
592
+ */
593
+ function persistOutputEncodingSalience(ref, body, existingRefVocabulary,
594
+ // Operator opt-out (improve.salience.outcomeWeightEnabled: false) must apply
595
+ // here too, or distill-written rank_score rows would use WS-2 weights while
596
+ // preparation uses parity weights — inconsistent salience semantics.
597
+ outcomeWeightEnabled) {
598
+ try {
599
+ const parsedRef = parseAssetRef(ref);
600
+ const salienceResult = scoreEncodingSalience({
601
+ body,
602
+ type: parsedRef.type,
603
+ existingRefVocabulary,
604
+ revisionCount: 0, // a freshly distilled output IS a first encounter
605
+ });
606
+ withStateDb((stateDb) => {
607
+ const vector = computeSalience({
608
+ ref,
609
+ type: parsedRef.type,
610
+ retrievalFreq: 0,
611
+ encodingSalience: salienceResult.score,
612
+ outcomeWeightEnabled,
613
+ });
614
+ upsertAssetSalience(stateDb, ref, vector);
615
+ });
616
+ }
617
+ catch {
618
+ // Best-effort — scoring must never block proposal creation.
619
+ }
620
+ }
584
621
  // ── Main entry point ────────────────────────────────────────────────────────
585
622
  /**
586
623
  * Run a single bounded distillation pass for `ref`. Always emits exactly one
@@ -639,6 +676,9 @@ export async function akmDistill(options) {
639
676
  const chat = options.chat ?? chatCompletion;
640
677
  const lookup = options.lookupFn ?? defaultLookup;
641
678
  const readEventsImpl = options.readEventsFn ?? readEvents;
679
+ // R1 opt-out must flow into every computeSalience call this command makes so
680
+ // distill-written rank_score rows use the same weights as preparation's.
681
+ const outcomeWeightEnabled = config.improve?.salience?.outcomeWeightEnabled !== false;
642
682
  // D-4 / #390: similar-lessons retrieval seam (test-injectable).
643
683
  const fetchSimilarLessonsFn = options.fetchSimilarLessonsFn ?? ((query, n) => fetchTopSimilarLessons(query, n, options.stashDir));
644
684
  // Best-effort load: when the asset is not yet indexed we still proceed —
@@ -661,30 +701,42 @@ export async function akmDistill(options) {
661
701
  // 1. The asset's frontmatter (human-readable mirror; idempotent delta gate).
662
702
  // 2. state.db :: asset_salience (canonical; feeds improve's high-salience gate).
663
703
  // Both writes are best-effort — a DB error never blocks distillation.
704
+ //
705
+ // The bigram ref vocabulary is built ONCE per invocation — the novelty signal
706
+ // reuses it when scoring the distilled OUTPUT at proposal creation (G4).
707
+ let existingRefVocabulary = new Set();
708
+ try {
709
+ const embCfg = config?.embedding;
710
+ const indexDb = openIndexDatabase(getDbPath(), embCfg?.dimension ? { embeddingDim: embCfg.dimension } : undefined);
711
+ try {
712
+ const allRefs = getAllEntries(indexDb).map((e) => e.entryKey);
713
+ existingRefVocabulary = buildRefVocabulary(allRefs);
714
+ }
715
+ finally {
716
+ closeDatabase(indexDb);
717
+ }
718
+ }
719
+ catch {
720
+ // Index not available — novelty defaults to type-floor.
721
+ }
664
722
  if (assetContent && assetFilePath) {
665
723
  try {
666
724
  const parsedRef = parseAssetRef(inputRef);
667
- // Build bigram vocabulary from currently-indexed refs for novelty signal.
668
- let existingRefVocabulary = new Set();
725
+ // G4: predictionError decays with revision count the prior hardcoded
726
+ // `revisionCount: 0` made it a dead constant 1.0. Use the number of
727
+ // proposals ever raised against this ref as the revision proxy.
728
+ let revisionCount = 0;
669
729
  try {
670
- const embCfg = config?.embedding;
671
- const indexDb = openIndexDatabase(getDbPath(), embCfg?.dimension ? { embeddingDim: embCfg.dimension } : undefined);
672
- try {
673
- const allRefs = getAllEntries(indexDb).map((e) => e.entryKey);
674
- existingRefVocabulary = buildRefVocabulary(allRefs);
675
- }
676
- finally {
677
- closeDatabase(indexDb);
678
- }
730
+ revisionCount = listProposals(stash, { ref: inputRef, includeArchive: true }).length;
679
731
  }
680
732
  catch {
681
- // Index not available novelty defaults to type-floor.
733
+ // best-effort: unknown history scores as a first encounter
682
734
  }
683
735
  const salienceResult = scoreEncodingSalience({
684
736
  body: assetContent,
685
737
  type: parsedRef.type,
686
738
  existingRefVocabulary,
687
- revisionCount: 0,
739
+ revisionCount,
688
740
  });
689
741
  // 1. Write salience to the source asset frontmatter (idempotent).
690
742
  const updatedContent = writeSalienceToFrontmatter(assetContent, salienceResult.score, salienceResult);
@@ -700,6 +752,7 @@ export async function akmDistill(options) {
700
752
  type: parsedRef.type,
701
753
  retrievalFreq: 0,
702
754
  encodingSalience: salienceResult.score,
755
+ outcomeWeightEnabled,
703
756
  });
704
757
  upsertAssetSalience(stateDb, inputRef, vector);
705
758
  });
@@ -892,6 +945,9 @@ export async function akmDistill(options) {
892
945
  };
893
946
  }
894
947
  const proposal = proposalResult;
948
+ // G4: content-score the distilled OUTPUT so it carries a real encoding
949
+ // salience (encoding_source='content') from creation.
950
+ persistOutputEncodingSalience(promotion.knowledgeRef, resolvedPromotionContent, existingRefVocabulary, outcomeWeightEnabled);
895
951
  appendEvent({
896
952
  eventType: "distill_invoked",
897
953
  ref: inputRef,
@@ -901,6 +957,9 @@ export async function akmDistill(options) {
901
957
  proposalRef: promotion.knowledgeRef,
902
958
  proposalKind: "knowledge",
903
959
  proposalId: proposal.id,
960
+ // R3: judge verdicts are longitudinally queryable, not just a one-shot
961
+ // proposal.confidence write (normalized 1–5 score / 5).
962
+ ...(knowledgeJudgeConfidence !== undefined ? { judgeConfidence: knowledgeJudgeConfidence } : {}),
904
963
  ...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
905
964
  ...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
906
965
  ...eligMeta,
@@ -1367,6 +1426,10 @@ export async function akmDistill(options) {
1367
1426
  };
1368
1427
  }
1369
1428
  const proposal2 = proposalResult2;
1429
+ // G4: content-score the distilled OUTPUT so it carries a real encoding
1430
+ // salience (encoding_source='content') from creation — lessons never get
1431
+ // another chance (they are refused as distill inputs).
1432
+ persistOutputEncodingSalience(effectiveLessonRef, content, existingRefVocabulary, outcomeWeightEnabled);
1370
1433
  appendEvent({
1371
1434
  eventType: "distill_invoked",
1372
1435
  ref: inputRef,
@@ -1376,6 +1439,9 @@ export async function akmDistill(options) {
1376
1439
  proposalRef: effectiveLessonRef,
1377
1440
  proposalKind: effectiveProposalKind,
1378
1441
  proposalId: proposal2.id,
1442
+ // R3: judge verdicts are longitudinally queryable, not just a one-shot
1443
+ // proposal.confidence write (normalized 1–5 score / 5).
1444
+ ...(lessonJudgeConfidence !== undefined ? { judgeConfidence: lessonJudgeConfidence } : {}),
1379
1445
  ...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
1380
1446
  ...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
1381
1447
  ...(descriptionSwapped > 0 ? { descriptionSwapped } : {}),
@@ -35,6 +35,7 @@ import { resolveStashStandards } from "../../core/standards/resolve-stash-standa
35
35
  import { getExtractedSessionsMap, getLastExtractRunAt, getStateDbPath, openStateDatabase, shouldSkipAlreadyExtractedSession, upsertExtractedSession, withStateDb, } from "../../core/state-db.js";
36
36
  import { repairTruncatedDescription } from "../../core/text-truncation.js";
37
37
  import { warn } from "../../core/warn.js";
38
+ import { indexWrittenAssets } from "../../indexer/index-written-assets.js";
38
39
  import { resolveImproveProcessRunnerFromProfile, runnerIsLlm } from "../../integrations/agent/runner.js";
39
40
  import { normalizeHarnessId } from "../../integrations/harnesses/index.js";
40
41
  import { getAvailableHarnesses } from "../../integrations/session-logs/index.js";
@@ -370,6 +371,10 @@ standardsContext) {
370
371
  try {
371
372
  const result = await writeSessionAsset(data, stashDir, sessionIndexing.generate);
372
373
  if (result.written) {
374
+ // Write-path indexing (itself fail-open): standalone `akm extract`
375
+ // (session-end hook) has no post-loop reindex to pick this file up.
376
+ if (result.filePath)
377
+ await indexWrittenAssets(stashDir, [result.filePath]);
373
378
  return {
374
379
  ...(result.ref ? { sessionAssetRef: result.ref } : {}),
375
380
  ...(result.logPath ? { sessionLogPath: result.logPath } : {}),
@@ -752,16 +757,18 @@ export async function akmExtract(options) {
752
757
  }
753
758
  }
754
759
  // WS-3b Step-0b: schema-similarity intake gate.
755
- // Load derived-layer (lesson/knowledge) embeddings once per run, but ONLY
756
- // when the gate is enabled in config. When disabled (the default) this block
757
- // is fully skipped and schemaSimilarityCtx stays null → byte-identical to
758
- // prior behaviour.
760
+ // DEFAULT ON since R3 (docs/design/improve-self-learning-analysis.md G5):
761
+ // extract is the highest-volume acquisition path with no LLM judge, so the
762
+ // cheap embedding-dedup check (one embed per lesson/knowledge candidate,
763
+ // fail-open) is the intake quality gate. Opt out via
764
+ // processes.extract.schemaSimilarity.enabled: false. The gate is inert in
765
+ // practice when no derived-layer embeddings exist (empty ctx → no penalty).
759
766
  const schemaSimilarityCfg = extractProcess?.schemaSimilarity;
760
767
  let schemaSimilarityCtx = null;
761
- if (schemaSimilarityCfg?.enabled === true) {
768
+ if (schemaSimilarityCfg?.enabled !== false) {
762
769
  const derivedEmbeddings = options.schemaSimilarityEmbeddings ?? loadDerivedLayerEmbeddings();
763
770
  schemaSimilarityCtx = {
764
- config: schemaSimilarityCfg,
771
+ config: { ...schemaSimilarityCfg, enabled: true },
765
772
  derivedEmbeddings,
766
773
  embeddingConfig: config.embedding,
767
774
  embedFn: options.schemaSimilarityEmbedFn,
@@ -1,13 +1,51 @@
1
1
  // This Source Code Form is subject to the terms of the Mozilla Public
2
2
  // License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * WS-3b Step 0 — Intake + homeostatic tier.
6
+ *
7
+ * Sub-features (0b is default-ON for extract since R3; the rest default-OFF):
8
+ *
9
+ * (The former **0a homeostatic demotion** pass was removed (R4,
10
+ * docs/design/improve-self-learning-analysis.md G3): it was default-off and
11
+ * self-undoing — the next `upsertAssetSalience` recompute unconditionally
12
+ * overwrote the demoted values. SHY-style continuous downscaling now lives in
13
+ * `computeSalience`'s always-applied recency decay, whose 0.1 floor itself
14
+ * decays on a long half-life so unreviewed-forever assets keep drifting down.)
15
+ *
16
+ * **0b Schema-similarity gate**
17
+ * At intake, if a new candidate's body embedding is within ε of an existing
18
+ * derived-layer lesson/knowledge node, mark `schema-consistent` and lower
19
+ * its priority; only schema-inconsistent/contradicting candidates get full
20
+ * `encodingSalience`. One embedding lookup via body_embeddings cache; relieves
21
+ * dedup pressure before it accumulates.
22
+ *
23
+ * **0c Hot-probation intake buffer (#604)**
24
+ * New system-generated extractions enter `captureMode: hot-probation` and
25
+ * spend ONE consolidation cycle in probation before promotion to the main
26
+ * stash; dedup + quality second-pass runs against them. Stops noisy
27
+ * extractions from polluting the stash at the source. Reuses shared
28
+ * dedupHash + body_embeddings. Default OFF.
29
+ *
30
+ * **Anti-collapse guards (step 8)**
31
+ * (a) Generation counter: merged.generation = max(sources)+1; refuse merge
32
+ * of two assets both above generation N (default 2); merges cite sources.
33
+ * (b) Lexical-diversity check: low n-gram diversity ⇒ raise merge threshold.
34
+ * (c) Occasional random non-similar cluster in the pool.
35
+ *
36
+ * **CLS interleaving (step 9)**
37
+ * distill/memoryInference prompts include embedding-retrieved adjacent
38
+ * lessons/knowledge so the pipeline doesn't overwrite prior generalizations.
39
+ *
40
+ * **Distill→source fidelity (step 10)**
41
+ * After a distill proposal, check it against cited source memories; a
42
+ * contradiction flag routes to human review.
43
+ *
44
+ * @module homeostatic
45
+ */
4
46
  import { warn } from "../../core/warn.js";
5
47
  import { closeDatabase, openExistingDatabase } from "../../indexer/db/db.js";
6
48
  // ── Constants ─────────────────────────────────────────────────────────────────
7
- /** Default days-since-last-retrieval threshold to consider an asset stale. */
8
- export const DEFAULT_STALE_DAYS = 30;
9
- /** Default retrievalSalience demotion factor for stale assets. */
10
- export const DEFAULT_DEMOTION_FACTOR = 0.5;
11
49
  /** Default epsilon for schema-similarity gate (looser than dedup's 0.97). */
12
50
  export const DEFAULT_SCHEMA_SIMILARITY_EPSILON = 0.85;
13
51
  /** Default multiplicative confidence penalty applied to schema-consistent candidates. */
@@ -18,79 +56,6 @@ export const DEFAULT_MAX_GENERATION = 2;
18
56
  export const DEFAULT_RANDOM_CLUSTER_FRACTION = 0.05;
19
57
  /** Default number of adjacent lessons/knowledge for CLS interleaving. */
20
58
  export const DEFAULT_CLS_ADJACENT_COUNT = 3;
21
- /**
22
- * Demote `retrievalSalience` in state.db for stale/low-value assets.
23
- *
24
- * "Stale" = the asset has a salience row with `updated_at` older than
25
- * `staleDays` AND `retrieval_salience > 0`. Demotion multiplies the current
26
- * `retrieval_salience` by `demotionFactor` (default 0.5) and records
27
- * `homeostatic_demoted_at` so the pass can be observed.
28
- *
29
- * Pure state.db operation — no file I/O, no LLM calls. Idempotent: running
30
- * twice in the same run only demotes the already-demoted value a second time,
31
- * which is bounded (0.5 × 0.5 = 0.25) and corrected on re-retrieval.
32
- *
33
- * Called BEFORE the dedup/LLM-merge pool is assembled so the merge pool
34
- * already reflects the updated scores.
35
- */
36
- export function runHomeostaticDemotion(db, config, now) {
37
- const warnings = [];
38
- if (!config.enabled)
39
- return { demoted: 0, warnings };
40
- const staleDays = config.staleDays ?? DEFAULT_STALE_DAYS;
41
- const demotionFactor = config.demotionFactor ?? DEFAULT_DEMOTION_FACTOR;
42
- const nowMs = now ?? Date.now();
43
- const staleThresholdMs = nowMs - staleDays * 86_400_000;
44
- try {
45
- // Find assets whose salience row is stale AND has non-zero retrievalSalience.
46
- // updated_at reflects the last time salience was computed (i.e. the last run
47
- // that touched this asset). If the asset hasn't been seen recently, its
48
- // retrieval_salience is stale.
49
- const staleRows = db
50
- .prepare(`SELECT asset_ref, retrieval_salience, rank_score, encoding_salience, outcome_salience
51
- FROM asset_salience
52
- WHERE updated_at < ? AND retrieval_salience > 0`)
53
- .all(staleThresholdMs);
54
- if (staleRows.length === 0)
55
- return { demoted: 0, warnings };
56
- // Batch update in a transaction for atomicity and performance.
57
- const updateStmt = db.prepare(`UPDATE asset_salience
58
- SET retrieval_salience = ?,
59
- rank_score = ?,
60
- homeostatic_demoted_at = ?,
61
- updated_at = ?
62
- WHERE asset_ref = ?`);
63
- let demoted = 0;
64
- db.exec("BEGIN");
65
- try {
66
- for (const row of staleRows) {
67
- const newRetrieval = row.retrieval_salience * demotionFactor;
68
- // Recompute rank_score with the demoted retrieval value.
69
- // We use simplified WS-1 parity weights here (no outcome weight by
70
- // default) so the demotion is consistent with what salience.ts computes.
71
- // The next full computeSalience call will overwrite with the exact value.
72
- const newRank = Math.min(1, Math.max(0, (0.3 * row.encoding_salience + 0.0 * row.outcome_salience + 0.7 * newRetrieval) *
73
- // Apply a mild size penalty assumption (200 bytes floor gives 1/log10(200)≈0.43)
74
- 0.43));
75
- updateStmt.run(newRetrieval, newRank, nowMs, nowMs, row.asset_ref);
76
- demoted++;
77
- }
78
- db.exec("COMMIT");
79
- }
80
- catch (e) {
81
- db.exec("ROLLBACK");
82
- throw e;
83
- }
84
- warn(`[homeostatic] demoted retrievalSalience for ${demoted} stale asset(s) (staleDays=${staleDays}, factor=${demotionFactor})`);
85
- return { demoted, warnings };
86
- }
87
- catch (err) {
88
- const msg = `[homeostatic] demotion failed: ${err instanceof Error ? err.message : String(err)}`;
89
- warn(msg);
90
- warnings.push(msg);
91
- return { demoted: 0, warnings };
92
- }
93
- }
94
59
  /**
95
60
  * Check whether a candidate body embedding is schema-consistent with an existing
96
61
  * derived-layer lesson/knowledge node. Returns `true` when the candidate is
@@ -270,7 +235,8 @@ export function computeMergedGeneration(sourceGenerations) {
270
235
  * @param config - Anti-collapse config.
271
236
  */
272
237
  export function checkGenerationGuard(sourceGenerations, config) {
273
- if (!config.enabled)
238
+ // R5: default ON — only an explicit opt-out disables the guard.
239
+ if (config.enabled === false)
274
240
  return { refused: false };
275
241
  const maxGen = config.maxGeneration ?? DEFAULT_MAX_GENERATION;
276
242
  const highGenCount = sourceGenerations.filter((g) => g > maxGen).length;
@@ -282,6 +248,69 @@ export function checkGenerationGuard(sourceGenerations, config) {
282
248
  }
283
249
  return { refused: false };
284
250
  }
251
+ /** Distinct-token retention floor default (R5 §4.2). */
252
+ export const DEFAULT_MIN_SPECIFICITY_RETENTION = 0.6;
253
+ function distinctTokens(text) {
254
+ // Same lowercase whitespace tokenization computeBigramDiversity uses.
255
+ return new Set(text
256
+ .toLowerCase()
257
+ .split(/\s+/)
258
+ .filter((w) => w.length > 0));
259
+ }
260
+ /**
261
+ * A merge must strictly increase information (R5 §4.2):
262
+ * 1. Provenance: the merged asset's `source_refs` must be a superset of the
263
+ * union of all participants' `source_refs` plus the participant refs
264
+ * themselves — provenance never shrinks through a merge.
265
+ * 2. Specificity: distinctTokens(mergedBody) ≥ minSpecificityRetention ×
266
+ * |union(distinctTokens(participant bodies))| — a merge that only
267
+ * shortens/genericizes fails.
268
+ *
269
+ * Pure and deterministic; ADVISORY in v1 (the caller counts violations, it
270
+ * does not refuse the merge). Returns `passed: true` immediately when the
271
+ * anti-collapse suite or the floor itself is opted out.
272
+ */
273
+ export function checkMergeInformationFloor(mergedBody, mergedSourceRefs, participants, config) {
274
+ if (config.enabled === false || config.mergeInformationFloor === false || participants.length === 0) {
275
+ return { passed: true, provenanceBefore: 0, provenanceAfter: 0, specificityRetention: 1 };
276
+ }
277
+ // 1. Provenance union: participants + everything they already cited.
278
+ const required = new Set();
279
+ for (const p of participants) {
280
+ required.add(p.ref);
281
+ for (const sr of p.sourceRefs)
282
+ required.add(sr);
283
+ }
284
+ const after = new Set(mergedSourceRefs);
285
+ const missing = [...required].filter((r) => !after.has(r));
286
+ // 2. Specificity retention over the union of source tokens.
287
+ const sourceTokens = new Set();
288
+ for (const p of participants) {
289
+ for (const t of distinctTokens(p.body))
290
+ sourceTokens.add(t);
291
+ }
292
+ const mergedTokens = distinctTokens(mergedBody);
293
+ // Clamped at computation so the pass/fail decision, the reason string, and
294
+ // the reported field all describe the same value.
295
+ const specificityRetention = Math.min(1, sourceTokens.size === 0 ? 1 : mergedTokens.size / sourceTokens.size);
296
+ const minRetention = config.minSpecificityRetention ?? DEFAULT_MIN_SPECIFICITY_RETENTION;
297
+ const provenanceOk = missing.length === 0;
298
+ const specificityOk = specificityRetention >= minRetention;
299
+ const reasons = [];
300
+ if (!provenanceOk) {
301
+ reasons.push(`provenance shrank: merged source_refs missing ${missing.length} ref(s) (e.g. ${missing[0]})`);
302
+ }
303
+ if (!specificityOk) {
304
+ reasons.push(`specificity retention ${specificityRetention.toFixed(2)} < ${minRetention} (merge genericized/shortened)`);
305
+ }
306
+ return {
307
+ passed: provenanceOk && specificityOk,
308
+ provenanceBefore: required.size,
309
+ provenanceAfter: after.size,
310
+ specificityRetention,
311
+ ...(reasons.length > 0 ? { reason: reasons.join("; ") } : {}),
312
+ };
313
+ }
285
314
  /**
286
315
  * Compute the bigram n-gram diversity of a text string.
287
316
  * Returns a value in [0, 1] where 0 = all identical bigrams, 1 = all unique.
@@ -312,7 +341,8 @@ export function computeBigramDiversity(text) {
312
341
  * below the 0.3 threshold; `{ lowDiversity: false }` otherwise.
313
342
  */
314
343
  export function checkLexicalDiversity(bodies, config) {
315
- if (!config.enabled || config.lexicalDiversityCheck === false) {
344
+ // R5: default ON only an explicit opt-out disables the check.
345
+ if (config.enabled === false || config.lexicalDiversityCheck === false) {
316
346
  return { lowDiversity: false };
317
347
  }
318
348
  if (bodies.length === 0)
@@ -8,16 +8,70 @@ import { output, runWithJsonErrors } from "../../cli/shared.js";
8
8
  import { loadConfig } from "../../core/config/config.js";
9
9
  import { UsageError } from "../../core/errors.js";
10
10
  import { getCacheDir } from "../../core/paths.js";
11
+ import { getActiveCanaries, queryRecentCycleMetrics, withStateDb } from "../../core/state-db.js";
11
12
  import { clearLogFile, setLogFile } from "../../core/warn.js";
13
+ import { closeDatabase, openExistingDatabase } from "../../indexer/db/db.js";
12
14
  import { resolveSourceEntries } from "../../indexer/search/search-source.js";
13
15
  import { getHyphenatedArg, getHyphenatedBoolean, parseFlagValue } from "../../output/context.js";
16
+ import { refreshCanarySet } from "./collapse-detector.js";
14
17
  import { akmImprove } from "./improve.js";
15
18
  import { buildImproveRunId, recordTerminatedImproveRun, relativeImproveResultPath, writeImproveResultFile, } from "./improve-result-file.js";
16
19
  import { runImproveSession } from "./improve-session.js";
20
+ // R5 — collapse-detector canary set inspection / explicit refresh. The
21
+ // detector NEVER auto-refreshes the canary set (silent re-baselining is how a
22
+ // slow collapse hides); this verb is the only refresh path.
23
+ //
24
+ // Dispatched from the parent improve run() on `scope === "canary"` — NOT a
25
+ // citty subCommand: registering subCommands makes citty treat EVERY first
26
+ // positional as a subcommand name, breaking `akm improve <type|ref>` outright
27
+ // (citty throws "Unknown command memory"), and citty also re-runs the parent
28
+ // run() after a matched subcommand.
29
+ async function runCanaryInspection(refresh) {
30
+ const config = loadConfig();
31
+ const cfg = config.improve?.collapseDetector ?? {};
32
+ const result = withStateDb((stateDb) => {
33
+ let refreshOutcome;
34
+ if (refresh) {
35
+ const indexDb = openExistingDatabase();
36
+ try {
37
+ // Mint-first, deactivate-after (refreshCanarySet): an empty/unreadable
38
+ // index keeps the old baseline instead of destroying it.
39
+ refreshOutcome = refreshCanarySet(stateDb, indexDb, cfg) === null ? "kept-old-set" : "refreshed";
40
+ }
41
+ finally {
42
+ closeDatabase(indexDb);
43
+ }
44
+ }
45
+ const canaries = getActiveCanaries(stateDb);
46
+ const canarySetId = canaries[0]?.canary_set_id;
47
+ const recentCycles = canarySetId ? queryRecentCycleMetrics(stateDb, canarySetId, cfg.windowCycles ?? 5) : [];
48
+ return {
49
+ schemaVersion: 1,
50
+ ok: true,
51
+ refreshed: refreshOutcome === "refreshed",
52
+ ...(refreshOutcome === "kept-old-set"
53
+ ? { warning: "refresh skipped: no mintable learning entries in the index — existing canary set kept" }
54
+ : {}),
55
+ canarySetId: canarySetId ?? null,
56
+ canaries: canaries.map((c) => ({ id: c.id, anchorRef: c.anchor_ref, query: c.query })),
57
+ recentCycles: recentCycles.map((r) => ({
58
+ ts: r.ts,
59
+ pass: r.pass,
60
+ meanRecall: r.mean_recall,
61
+ meanNdcg: r.mean_ndcg,
62
+ distinctContentRatio: r.distinct_content_ratio,
63
+ acceptedActions: r.accepted_actions,
64
+ mergeFloorViolations: r.merge_floor_violations,
65
+ alerts: JSON.parse(r.alerts_json),
66
+ })),
67
+ };
68
+ });
69
+ output("improve-canary", result);
70
+ }
17
71
  export const improveCommand = defineCommand({
18
72
  meta: {
19
73
  name: "improve",
20
- description: "Analyze existing AKM assets and generate improvement proposals; also consolidates memories when profiles.improve.default.processes.consolidate.enabled is true",
74
+ description: "Analyze existing AKM assets and generate improvement proposals; also consolidates memories when profiles.improve.default.processes.consolidate.enabled is true. `akm improve canary [--refresh]` inspects the collapse-detector canary set.",
21
75
  },
22
76
  args: {
23
77
  scope: {
@@ -26,6 +80,11 @@ export const improveCommand = defineCommand({
26
80
  required: false,
27
81
  },
28
82
  task: { type: "string", description: "Add extra guidance for this improvement pass" },
83
+ refresh: {
84
+ type: "boolean",
85
+ description: "(canary scope only) Mint a new collapse-detector canary set and deactivate the old one; old rows and their cycle history are retained",
86
+ default: false,
87
+ },
29
88
  "dry-run": { type: "boolean", description: "Show planned actions without writing", default: false },
30
89
  target: { type: "string", description: "Override the write target for accepted proposals" },
31
90
  "auto-accept": {
@@ -74,6 +133,13 @@ export const improveCommand = defineCommand({
74
133
  },
75
134
  },
76
135
  async run({ args }) {
136
+ // "canary" is a reserved scope word (never a valid asset type, and refs
137
+ // contain ":"): dispatch to the detector inspection verb instead of an
138
+ // improve run.
139
+ if (args.scope === "canary") {
140
+ await runWithJsonErrors(() => runCanaryInspection(getHyphenatedBoolean(args, "refresh") === true));
141
+ return;
142
+ }
77
143
  await runWithJsonErrors(async () => {
78
144
  const formatFlagValue = parseFlagValue(process.argv, "--format");
79
145
  if (formatFlagValue !== undefined) {
@@ -491,6 +491,7 @@ export async function akmImprove(options = {}) {
491
491
  let stalenessDetection;
492
492
  let recombination;
493
493
  let proceduralCompilation;
494
+ let cycleMetrics;
494
495
  // Summed counters/durations.
495
496
  let prepGateCount = 0;
496
497
  let prepGateFailedCount = 0;
@@ -655,6 +656,10 @@ export async function akmImprove(options = {}) {
655
656
  budgetSignal: budgetAbortController.signal,
656
657
  improveProfile,
657
658
  consolidationRan: preparation.consolidationRan,
659
+ // R5: floor violations from this run's consolidate pass + the
660
+ // auto-accepted volume so far (prep + loop gates) for churn detection.
661
+ consolidationMergeFloorViolations: preparation.consolidation.mergeFloorViolations ?? 0,
662
+ acceptedActions: preparation.gateAutoAcceptedCount + loopGateCountThisCycle,
658
663
  }));
659
664
  const postLoopGateCountThisCycle = postLoopResult.gateAutoAcceptedCount;
660
665
  // Last-wins point-in-time objects.
@@ -663,6 +668,10 @@ export async function akmImprove(options = {}) {
663
668
  stalenessDetection = postLoopResult.stalenessDetection;
664
669
  recombination = postLoopResult.recombination;
665
670
  proceduralCompilation = postLoopResult.proceduralCompilation;
671
+ // Keep the last QUALIFYING cycle's snapshot — a later non-qualifying
672
+ // cycle in a maxCycles>1 run must not clobber it with undefined.
673
+ if (postLoopResult.cycleMetrics)
674
+ cycleMetrics = postLoopResult.cycleMetrics;
666
675
  // Summed counters/durations.
667
676
  postLoopGateCount += postLoopResult.gateAutoAcceptedCount;
668
677
  postLoopGateFailedCount += postLoopResult.gateAutoAcceptFailedCount;
@@ -754,6 +763,7 @@ export async function akmImprove(options = {}) {
754
763
  ...(stalenessDetection ? { stalenessDetection } : {}),
755
764
  ...(recombination ? { recombination } : {}),
756
765
  ...(proceduralCompilation ? { proceduralCompilation } : {}),
766
+ ...(cycleMetrics ? { cycleMetrics } : {}),
757
767
  ...(orphansPurged !== undefined ? { orphansPurged } : {}),
758
768
  ...(proposalsExpired !== undefined && proposalsExpired > 0 ? { proposalsExpired } : {}),
759
769
  reflectCooldownActions: finalActions.filter((a) => a.mode === "reflect-cooldown").length,
@@ -9,7 +9,7 @@ import { UsageError } from "../../core/errors.js";
9
9
  import { appendEvent } from "../../core/events.js";
10
10
  import { openLogsDatabase, purgeOldTaskLogs } from "../../core/logs-db.js";
11
11
  import { getDbPath } from "../../core/paths.js";
12
- import { purgeOldEvents, purgeOldImproveRuns, withStateDb } from "../../core/state-db.js";
12
+ import { purgeOldCycleMetrics, purgeOldEvents, purgeOldImproveRuns, withStateDb, } from "../../core/state-db.js";
13
13
  import { info, warn } from "../../core/warn.js";
14
14
  import { closeDatabase, openIndexDatabase } from "../../indexer/db/db.js";
15
15
  import { runGraphExtractionPass } from "../../indexer/graph/graph-extraction.js";
@@ -22,6 +22,7 @@ import { isProcessEnabled } from "../../llm/feature-gate.js";
22
22
  import { withLlmStage } from "../../llm/usage-telemetry.js";
23
23
  import { createProposal, expireStaleProposals, isProposalSkipped, listProposals, purgeOrphanProposals, } from "../proposal/validators/proposals.js";
24
24
  import { checkDeadUrls } from "../url-checker.js";
25
+ import { DEFAULT_RETENTION_DAYS as CYCLE_METRICS_RETENTION_DAYS, runCollapseDetector } from "./collapse-detector.js";
25
26
  import { deriveLessonRef } from "./distill.js";
26
27
  import { deriveKnowledgeRef } from "./distill-promotion-policy.js";
27
28
  // Eligibility / candidate-selection predicates live in ./eligibility.
@@ -694,9 +695,31 @@ export async function runImprovePostLoopStage(args) {
694
695
  allWarnings.push(`procedural: ${String(e)}`);
695
696
  }
696
697
  }
698
+ // ── R5: collapse/churn detector ────────────────────────────────────────────
699
+ // One snapshot per QUALIFYING cycle: consolidate processed work and/or
700
+ // recombine formed clusters. Runs AFTER the maintenance reindex so FTS sees
701
+ // the post-merge index; one call site covers both passes. Deterministic,
702
+ // observe-only, fail-open (the orchestrator catches everything) — and inert
703
+ // on the ~9-in-10 default-profile runs that touch no merges.
704
+ let cycleMetrics;
705
+ const recombineWorked = (recombination?.clustersFormed ?? 0) > 0;
706
+ if (!options.dryRun && (consolidationRan || recombineWorked)) {
707
+ cycleMetrics = runCollapseDetector({
708
+ runId: options.runId ?? "improve-adhoc",
709
+ pass: consolidationRan && recombineWorked ? "both" : consolidationRan ? "consolidate" : "recombine",
710
+ // prep+loop gate accepts, PLUS recombine's confirmed-lesson promotions —
711
+ // recombine churn is the historically observed failure mode and its
712
+ // promotions never flow through the prep/loop gates.
713
+ acceptedActions: (args.acceptedActions ?? 0) + (recombination?.lessonsPromoted ?? 0),
714
+ mergeFloorViolations: args.consolidationMergeFloorViolations ?? 0,
715
+ config: options.config ?? loadConfig(),
716
+ ...(eventsCtx ? { eventsCtx } : {}),
717
+ });
718
+ }
697
719
  return {
698
720
  allWarnings,
699
721
  deadUrls,
722
+ ...(cycleMetrics ? { cycleMetrics } : {}),
700
723
  ...(recombination ? { recombination } : {}),
701
724
  ...(proceduralCompilation ? { proceduralCompilation } : {}),
702
725
  ...(maintenanceResult.memoryInference ? { memoryInference: maintenanceResult.memoryInference } : {}),
@@ -1007,6 +1030,21 @@ export async function runImproveMaintenancePasses(args) {
1007
1030
  ref: "improve_runs:_purge",
1008
1031
  metadata: { purgedCount: improveRunsPurged, retentionDays },
1009
1032
  }, eventsCtx);
1033
+ // R5: improve_cycle_metrics has its OWN retention window
1034
+ // (default 365d — a slow collapse needs a longer trend than
1035
+ // the 90d events window). canary_queries rows are never purged.
1036
+ const cycleRetention = config.improve?.collapseDetector?.retentionDays ?? CYCLE_METRICS_RETENTION_DAYS;
1037
+ const cycleMetricsPurged = purgeOldCycleMetrics(stateDb, cycleRetention);
1038
+ if (cycleMetricsPurged > 0) {
1039
+ info(`[improve] cycle-metrics purge: ${cycleMetricsPurged} row(s) older than ${cycleRetention}d removed from state.db`);
1040
+ appendEvent({
1041
+ // Dedicated type (mirrors improve_runs_purged) so consumers
1042
+ // never have to disambiguate purge targets via the ref string.
1043
+ eventType: "improve_cycle_metrics_purged",
1044
+ ref: "improve_cycle_metrics:_purge",
1045
+ metadata: { purgedCount: cycleMetricsPurged, retentionDays: cycleRetention },
1046
+ }, eventsCtx);
1047
+ }
1010
1048
  }, { path: eventsCtx?.dbPath, borrowed: eventsCtx?.db });
1011
1049
  }
1012
1050
  catch (err) {