akm-cli 0.9.0-beta.52 → 0.9.0-beta.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/assets/hints/cli-hints-full.md +6 -5
  2. package/dist/cli.js +0 -7
  3. package/dist/commands/env/env-cli.js +3 -2
  4. package/dist/commands/env/env.js +14 -67
  5. package/dist/commands/health/checks.js +28 -15
  6. package/dist/commands/health.js +68 -1
  7. package/dist/commands/improve/collapse-detector.js +419 -0
  8. package/dist/commands/improve/consolidate.js +72 -54
  9. package/dist/commands/improve/distill.js +79 -13
  10. package/dist/commands/improve/extract.js +13 -6
  11. package/dist/commands/improve/homeostatic.js +109 -79
  12. package/dist/commands/improve/improve-cli.js +67 -1
  13. package/dist/commands/improve/improve.js +10 -0
  14. package/dist/commands/improve/loop-stages.js +39 -1
  15. package/dist/commands/improve/outcome-loop.js +15 -3
  16. package/dist/commands/improve/preparation.js +17 -8
  17. package/dist/commands/improve/salience.js +49 -32
  18. package/dist/commands/read/curate.js +5 -9
  19. package/dist/commands/read/knowledge.js +4 -0
  20. package/dist/commands/read/search.js +5 -2
  21. package/dist/commands/read/show.js +3 -3
  22. package/dist/core/asset/asset-spec.js +3 -2
  23. package/dist/core/config/config-schema.js +39 -17
  24. package/dist/core/eval/rank-metrics.js +113 -0
  25. package/dist/core/state/migrations.js +56 -0
  26. package/dist/core/state-db.js +146 -19
  27. package/dist/indexer/ensure-index.js +33 -90
  28. package/dist/indexer/index-writer-lock.js +0 -11
  29. package/dist/indexer/index-written-assets.js +105 -0
  30. package/dist/indexer/passes/metadata.js +20 -0
  31. package/dist/indexer/search/db-search.js +29 -1
  32. package/dist/indexer/search/ranking-contributors.js +33 -1
  33. package/dist/indexer/search/ranking.js +66 -0
  34. package/dist/indexer/search/search-fields.js +6 -0
  35. package/dist/llm/feature-gate.js +6 -2
  36. package/dist/output/renderers.js +8 -13
  37. package/dist/output/shapes/helpers.js +0 -3
  38. package/dist/output/shapes/passthrough.js +1 -0
  39. package/dist/scripts/migrate-storage.js +152 -33
  40. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +41 -18
  41. package/dist/storage/repositories/index-db.js +10 -1
  42. package/package.json +2 -4
@@ -33,6 +33,15 @@ export const WARM_START_CAP = 0.3;
33
33
  * very-negative run can't send the score to −∞.
34
34
  */
35
35
  export const OUTCOME_SCORE_MIN = -1.0;
36
+ /**
37
+ * Saturation ceiling: the maximum outcome_score. Biological RPE saturates —
38
+ * a fully predicted reward produces zero response, not an ever-growing one —
39
+ * so a long-lived popular asset must not accrue unbounded outcome mass that
40
+ * would dominate ranking once the outcome weight is enabled (analysis G2).
41
+ * 1.5 comfortably exceeds the max plausible single-cycle raw update while
42
+ * keeping the normalised outcomeSalience spread meaningful.
43
+ */
44
+ export const OUTCOME_SCORE_MAX = 1.5;
36
45
  /**
37
46
  * Diversity floor: `outcomeSalience` for any asset is at least this fraction
38
47
  * of the maximum observed `outcome_score` in the table, so rare-but-correct
@@ -107,9 +116,12 @@ export function updateAssetOutcome(db, inputs) {
107
116
  // so the score tracks the moving signal, not the cumulative sum.
108
117
  const rawUpdate = predictionError - penalty + valence;
109
118
  const newScore = OUTCOME_EMA_ALPHA * rawUpdate + (1 - OUTCOME_EMA_ALPHA) * existing.outcome_score;
110
- // Clip to [OUTCOME_SCORE_MIN, +Infinity)no upper cap so that very-active
111
- // useful assets can accumulate a high positive score.
112
- outcomeScore = Math.max(OUTCOME_SCORE_MIN, newScore);
119
+ // Clip to [OUTCOME_SCORE_MIN, OUTCOME_SCORE_MAX]the ceiling is the RPE
120
+ // saturation analog (G2): without it, long-lived popular assets accumulate
121
+ // unbounded positive mass (live max was 3.13) and would dominate rank_score
122
+ // the moment the outcome weight is enabled. Stored legacy scores above the
123
+ // ceiling converge back under it on their next differential update.
124
+ outcomeScore = Math.min(OUTCOME_SCORE_MAX, Math.max(OUTCOME_SCORE_MIN, newScore));
113
125
  // ── review_pressure (#613) ─────────────────────────────────────────────
114
126
  // New negatives this cycle.
115
127
  const newNegatives = Math.max(0, inputs.negativeFeedbackCount - existing.negative_feedback_count);
@@ -1145,23 +1145,32 @@ export async function runImprovePreparationStage(args) {
1145
1145
  const proactiveAndRetrievalSet = new Set([...highRetrievalRefs, ...proactiveRefs].map((r) => r.ref));
1146
1146
  try {
1147
1147
  withStateDb((dbForHighSalience) => {
1148
- const effectiveLimit = options.limit ?? 10;
1148
+ // Derive the cap from the resolved reflect limit (mirrors improve.ts's
1149
+ // options.limit resolution) so an unbounded whole-stash run does not
1150
+ // collapse the lane to exactly 1 ref via the bare `?? 10` fallback.
1151
+ const effectiveLimit = options.limit ?? improveProfile?.processes?.reflect?.limit ?? improveProfile.limit ?? 10;
1149
1152
  const highSalienceCap = Math.max(1, Math.floor(effectiveLimit * 0.1));
1150
1153
  // #632/#4 — session-capture telemetry (checkpoints) must never consume
1151
1154
  // the scarce high-salience budget. Even with a content-scored row, these
1152
1155
  // are pipeline bookkeeping, not assets worth reflecting/rewriting.
1153
1156
  const candidates = noFeedbackCandidates.filter((r) => !proactiveAndRetrievalSet.has(r.ref) && !isSessionCaptureMemoryName(parseAssetRef(r.ref).name));
1157
+ // Collect ALL qualifying candidates, then take the top-N BY SCORE — the
1158
+ // previous first-N-in-scan-order break meant a higher-salience candidate
1159
+ // found later in the scan lost its slot to an earlier lower-scoring one.
1160
+ const qualifying = [];
1154
1161
  for (const r of candidates) {
1155
- if (highSalienceRefs.length >= highSalienceCap)
1156
- break;
1157
1162
  const row = getAssetSalience(dbForHighSalience, r.ref);
1158
1163
  if (row &&
1159
1164
  isContentEncodingRow(row, parseAssetRef(r.ref).type) &&
1160
1165
  row.encoding_salience >= salienceThreshold &&
1161
1166
  !lastReflectProposalTs.has(r.ref)) {
1162
- highSalienceRefs.push(r);
1167
+ qualifying.push({ ref: r, score: row.encoding_salience });
1163
1168
  }
1164
1169
  }
1170
+ qualifying.sort((a, b) => b.score - a.score);
1171
+ for (const q of qualifying.slice(0, highSalienceCap)) {
1172
+ highSalienceRefs.push(q.ref);
1173
+ }
1165
1174
  }, { path: eventsCtx?.dbPath });
1166
1175
  }
1167
1176
  catch (err) {
@@ -1392,11 +1401,11 @@ export async function runImprovePreparationStage(args) {
1392
1401
  // so feedback refs get their genuine retrieval frequency, not a 0-floor fallback.
1393
1402
  // outcomeSalienceByRef is populated by WS-2 above (or empty on first run).
1394
1403
  //
1395
- // Part-V gate: read the operator opt-in flag from config. Default false
1396
- // (WS-1 parity weights) until the maintainer runs scripts/akm-eval and sets
1397
- // improve.salience.outcomeWeightEnabled: true in the config.
1404
+ // R1 loop closure: the outcome weight is ON by default (the G2 saturation
1405
+ // cap makes it safe). Operators opt out with
1406
+ // improve.salience.outcomeWeightEnabled: false in the config.
1398
1407
  const salienceConfig = (options.config ?? loadConfig()).improve?.salience;
1399
- const outcomeWeightEnabled = salienceConfig?.outcomeWeightEnabled === true;
1408
+ const outcomeWeightEnabled = salienceConfig?.outcomeWeightEnabled !== false;
1400
1409
  const salienceMap = new Map();
1401
1410
  const nowForSalience = Date.now();
1402
1411
  // #644 — preserve content-derived encoding scores across runs.
@@ -21,12 +21,12 @@
21
21
  *
22
22
  * `rankScore = (w_e·encoding + w_o·outcome + w_r·retrieval) × sizePenalty`, normalized [0,1].
23
23
  *
24
- * **WS-2 default-off (Part-V gate):**
25
- * `w_o = 0.15` is the target but is applied only when `outcomeWeightEnabled=true`
26
- * (set via `improve.salience.outcomeWeightEnabled: true` in config after running
27
- * Part-V T0 baseline). Default: WS-1 parity weights `w_e=0.30, w_r=0.70, w_o=0`.
28
- * `outcomeSalience` is populated from `asset_outcome.outcome_score` (WS-2) for
29
- * observability regardless of the flag.
24
+ * **WS-2 default-ON (R1 loop closure):**
25
+ * `w_o = 0.15` is applied by default now that `outcome_score` saturates at
26
+ * `OUTCOME_SCORE_MAX` (G2). Operators can opt out via
27
+ * `improve.salience.outcomeWeightEnabled: false`, which restores the WS-1
28
+ * parity weights `w_e=0.30, w_r=0.70, w_o=0`. `outcomeSalience` is populated
29
+ * from `asset_outcome.outcome_score` regardless of the flag.
30
30
  *
31
31
  * ## Plasticity
32
32
  *
@@ -52,16 +52,30 @@ import { WARM_START_CAP } from "./outcome-loop.js";
52
52
  const DAY_MS = 86_400_000;
53
53
  // ── Recency decay half-life (mirrors the proactive-maintenance prototype) ─────
54
54
  const RECENCY_HALFLIFE_DAYS = 21;
55
+ // ── Recency-floor half-life (R4 — SHY-style continuous downscaling) ──────────
56
+ //
57
+ // The recency floor itself decays on this (much longer) half-life so an
58
+ // unreviewed-forever asset keeps drifting down instead of parking at the 0.1
59
+ // floor. This replaces the deleted homeostatic demotion pass (which was
60
+ // default-off and self-undoing — every salience recompute clobbered it);
61
+ // folding the decay into the always-applied recency term makes it persist by
62
+ // construction. At 180 days the floor halves; a 1-year-stale asset sits at
63
+ // ~0.025 instead of 0.1.
64
+ const RECENCY_FLOOR_HALFLIFE_DAYS = 180;
65
+ // Absolute epsilon under the decaying floor. Keeps the frequency term ordinal
66
+ // for assets whose last-use timestamp is unknown (utility_scores has no
67
+ // last_used_at) — without it their retrieval salience collapses to exactly 0
68
+ // and frequency ordering is lost for maintenance selection.
69
+ const RECENCY_EPSILON = 0.01;
55
70
  // ── Size proxy floor (avoids log10(0)) ────────────────────────────────────────
56
71
  const SIZE_FLOOR_BYTES = 200;
57
72
  // ── Projection weights ────────────────────────────────────────────────────────
58
73
  //
59
- // These constants reflect the WS-2 TARGET values (used when outcomeWeightEnabled=true).
60
- // Default ranking uses WS-1 parity weights (w_e=0.30, w_r=0.70, w_o=0) until the
61
- // maintainer opts in via `improve.salience.outcomeWeightEnabled: true` after running
62
- // the Part-V T0 baseline (scripts/akm-eval + health report).
74
+ // These constants are the DEFAULT ranking weights (R1 loop closure). Operators
75
+ // can opt back out to the WS-1 parity weights (w_e=0.30, w_r=0.70, w_o=0) via
76
+ // `improve.salience.outcomeWeightEnabled: false`.
63
77
  //
64
- // WS-2 opt-in split (w_e=0.25, w_o=0.15, w_r=0.60, sum = 1.0):
78
+ // WS-2 split (w_e=0.25, w_o=0.15, w_r=0.60, sum = 1.0):
65
79
  // [exp] Expert recommendation: encoding should be moderate so a type-importance
66
80
  // stub does not completely dominate; retrieval should be strong since it directly
67
81
  // measures use; outcome provides a quality signal proportional to usefulness.
@@ -78,10 +92,10 @@ if (Math.abs(W_ENCODING + W_OUTCOME + W_RETRIEVAL - 1.0) > 1e-9) {
78
92
  }
79
93
  // ── WS-1 parity weights ───────────────────────────────────────────────────────
80
94
  //
81
- // These constants reflect the default WS-1 parity weights used when
82
- // `outcomeWeightEnabled` is false/absent (the default). They preserve the
95
+ // These constants reflect the WS-1 parity weights used when the operator
96
+ // explicitly opts out (`outcomeWeightEnabled: false`). They preserve the
83
97
  // WS-1 two-way split (w_e=0.30, w_r=0.70) with w_o=0 so outcome does not
84
- // affect rankScore until the operator opts in after the Part-V baseline run.
98
+ // affect rankScore in the opt-out mode.
85
99
  //
86
100
  // Named here (rather than inline literals in the else branch) so a future
87
101
  // re-tune has a single source of truth and the sum-to-1 guard below catches
@@ -155,15 +169,19 @@ export function computeSalience(inputs) {
155
169
  //
156
170
  // Formula: log(1 + freq) × recencyDecay
157
171
  // log(1+freq): sub-linear frequency term (same as proactive-maintenance prototype).
158
- // recencyDecay: 0.1 + 0.5^(useAgeDays/halflife) — decays to 0.1 floor when stale.
159
- // lastUseMs=0/undefined useAgeDays=9999 recencyDecay≈0.1 (floor).
172
+ // recencyDecay: max(ε, 0.1·0.5^(useAgeDays/180) + 0.5^(useAgeDays/21))
173
+ // the fast term halves every 21 days; the 0.1 floor itself halves every
174
+ // 180 days (R4: SHY-style continuous downscaling — an unreviewed-forever
175
+ // asset keeps drifting down instead of parking at the floor). The ε=0.01
176
+ // epsilon keeps the frequency term ordinal for unknown-last-use assets.
177
+ // lastUseMs=0/undefined → useAgeDays=9999 → recencyDecay=ε.
160
178
  //
161
179
  // The recency term is MANDATORY (plan requirement §WS-1 step 2). Without it
162
- // retrievalSalience degenerates to a non-decaying frequency count and the WS-3
163
- // homeostatic step-0 demotion has nothing to act on.
180
+ // retrievalSalience degenerates to a non-decaying frequency count. This
181
+ // always-applied decay replaces the deleted homeostatic demotion pass.
164
182
  const lastUseMs = inputs.lastUseMs ?? 0;
165
183
  const useAgeDays = lastUseMs > 0 ? (now - lastUseMs) / DAY_MS : 9999;
166
- const recencyDecay = 0.1 + 0.5 ** (useAgeDays / RECENCY_HALFLIFE_DAYS);
184
+ const recencyDecay = Math.max(RECENCY_EPSILON, 0.1 * 0.5 ** (useAgeDays / RECENCY_FLOOR_HALFLIFE_DAYS) + 0.5 ** (useAgeDays / RECENCY_HALFLIFE_DAYS));
167
185
  const rawRetrieval = Math.log(1 + inputs.retrievalFreq) * recencyDecay;
168
186
  // ── Size penalty ─────────────────────────────────────────────────────────────
169
187
  // 1/log10(size): larger assets are slightly deprioritized (same as proactive prototype).
@@ -184,29 +202,28 @@ export function computeSalience(inputs) {
184
202
  // which asymptotes to 1 and equals 0.5 at rawRetrieval=1. This is the same
185
203
  // formula used for MemRL utility updates.
186
204
  const retrieval = rawRetrieval / (rawRetrieval + 1);
187
- // ── Weight selection (Part-V gate) ────────────────────────────────────────
188
- //
189
- // When `outcomeWeightEnabled` is false/absent (default): use WS-1 parity
190
- // weights (w_e=0.30, w_r=0.70, w_o=0) so ranking is unchanged from the WS-1
191
- // baseline. The `outcome` sub-score is still computed and stored in the
192
- // salience vector for observability, but it does not affect rankScore.
205
+ // ── Weight selection (R1 — outcome loop closed by default) ───────────────
193
206
  //
194
- // When `outcomeWeightEnabled` is true (operator opt-in after Part-V run):
195
- // use WS-2 weights (w_e=0.25, w_o=0.15, w_r=0.60).
207
+ // When `outcomeWeightEnabled` is true/absent (DEFAULT ON since the G2
208
+ // saturation cap landed): use WS-2 weights (w_e=0.25, w_o=0.15, w_r=0.60)
209
+ // so the prediction-error outcome signal actually shapes rankScore — this
210
+ // is the R1 loop-closure from docs/design/improve-self-learning-analysis.md.
196
211
  //
197
- // The constants W_ENCODING, W_OUTCOME, W_RETRIEVAL always reflect the
198
- // WS-2 target values for documentation and re-tune reference.
212
+ // When `outcomeWeightEnabled` is explicitly false (operator opt-out via
213
+ // `improve.salience.outcomeWeightEnabled: false`): fall back to the WS-1
214
+ // parity weights (w_e=0.30, w_r=0.70, w_o=0). The `outcome` sub-score is
215
+ // still computed and stored for observability in that mode.
199
216
  let we;
200
217
  let wo;
201
218
  let wr;
202
- if (inputs.outcomeWeightEnabled === true) {
203
- // WS-2 active: three-way split from Part-V operator opt-in.
219
+ if (inputs.outcomeWeightEnabled !== false) {
220
+ // WS-2 active (default): three-way split.
204
221
  we = W_ENCODING; // 0.25
205
222
  wo = W_OUTCOME; // 0.15
206
223
  wr = W_RETRIEVAL; // 0.60
207
224
  }
208
225
  else {
209
- // WS-1 parity (default): w_o=0, redistribute to WS-1 proportions.
226
+ // WS-1 parity (opt-out): w_o=0, redistribute to WS-1 proportions.
210
227
  // Original WS-1 split was w_e=0.30, w_r=0.70.
211
228
  we = W_ENCODING_PARITY;
212
229
  wo = W_OUTCOME_PARITY;
@@ -19,12 +19,12 @@ import { parseFrontmatter } from "../../core/asset/frontmatter.js";
19
19
  import { getIndexPassConfig, loadConfig } from "../../core/config/config.js";
20
20
  import { rethrowIfTestIsolationError, UsageError } from "../../core/errors.js";
21
21
  import { appendEvent } from "../../core/events.js";
22
- import { closeDatabase, computeBodyHash, openExistingDatabase } from "../../indexer/db/db.js";
22
+ import { computeBodyHash } from "../../indexer/db/db.js";
23
23
  import { enqueueGraphExtraction, hasGraphData } from "../../indexer/db/graph-db.js";
24
24
  import { findSourceForPath, resolveSourceEntries } from "../../indexer/search/search-source.js";
25
25
  import { insertUsageEvent } from "../../indexer/usage/usage-events.js";
26
26
  import { truncateDescription } from "../../output/shapes.js";
27
- import { withIndexDb } from "../../storage/repositories/index-db.js";
27
+ import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
28
28
  import { akmSearch, parseSearchSource } from "./search.js";
29
29
  import { akmShowUnified } from "./show.js";
30
30
  const CURATE_FALLBACK_FILTER_WORDS = new Set([
@@ -65,8 +65,7 @@ function logCurateEvent(query, result) {
65
65
  metadata: { query, itemCount: result.items.length, itemRefs },
66
66
  });
67
67
  try {
68
- const db = openExistingDatabase();
69
- try {
68
+ withIndexDb((db) => {
70
69
  insertUsageEvent(db, {
71
70
  event_type: "curate",
72
71
  query,
@@ -86,10 +85,7 @@ function logCurateEvent(query, result) {
86
85
  source: "user",
87
86
  });
88
87
  }
89
- }
90
- finally {
91
- closeDatabase(db);
92
- }
88
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
93
89
  }
94
90
  catch (err) {
95
91
  rethrowIfTestIsolationError(err);
@@ -207,7 +203,7 @@ function maybeEnqueueLazyGraph(assetPath) {
207
203
  if (!hasGraphData(db, stashRoot, assetPath)) {
208
204
  enqueueGraphExtraction(db, stashRoot, assetPath, bodyHash, 0);
209
205
  }
210
- });
206
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
211
207
  }
212
208
  catch (err) {
213
209
  rethrowIfTestIsolationError(err);
@@ -16,6 +16,7 @@ import { isHttpUrl, isWithin, tryReadStdinText } from "../../core/common.js";
16
16
  import { loadConfig } from "../../core/config/config.js";
17
17
  import { UsageError } from "../../core/errors.js";
18
18
  import { commitWriteTargetBoundary, formatRefForMessage, resolveWriteTarget, writeAssetToSource, } from "../../core/write-source.js";
19
+ import { indexWrittenAssets } from "../../indexer/index-written-assets.js";
19
20
  import { fetchWebsiteMarkdownSnapshot, shouldAllowPrivateWebsiteUrlForTests } from "../../sources/website-ingest.js";
20
21
  const MAX_CAPTURED_ASSET_SLUG_LENGTH = 64;
21
22
  // ── Asset-name normalisation ─────────────────────────────────────────────────
@@ -144,6 +145,9 @@ export async function writeMarkdownAsset(options) {
144
145
  // 0.9.0 (issue #507): single batch commit at the write boundary for git
145
146
  // targets. No-op for filesystem/primary-stash targets.
146
147
  commitWriteTargetBoundary(target, `Update ${formatRefForMessage(ref)}`);
148
+ // Write-path indexing: the asset is searchable immediately. Fail-open; reads
149
+ // no longer trigger reindexes, so keeping the index current is the writer's job.
150
+ await indexWrittenAssets(source.path, [result.path]);
147
151
  return {
148
152
  ref: result.ref,
149
153
  path: result.path,
@@ -23,7 +23,7 @@ import { getCurrentWorkflowScopeKey } from "../../workflows/authoring/scope-key.
23
23
  // indexer or path-resolution code runs.
24
24
  import "../../sources/providers/index.js";
25
25
  import { insertUsageEvent } from "../../indexer/usage/usage-events.js";
26
- import { withIndexDb } from "../../storage/repositories/index-db.js";
26
+ import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
27
27
  import { searchRegistry } from "./registry-search.js";
28
28
  const DEFAULT_LIMIT = 20;
29
29
  export async function akmSearch(input) {
@@ -227,6 +227,9 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
227
227
  metadata: { query, hitCount: stashHits.length, resultRefs: allResultRefs, mode },
228
228
  });
229
229
  try {
230
+ // Short busy timeout: telemetry must never stall the search result behind
231
+ // a background reindex holding the index.db write lock (30s default wait).
232
+ // Under contention these usage hints are skipped, not waited for.
230
233
  withIndexDb((db) => {
231
234
  const resolved = resolveEntryIds(db, stashHits.slice(0, 50));
232
235
  for (const { entryId, ref } of resolved) {
@@ -269,7 +272,7 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
269
272
  }),
270
273
  source: eventSource,
271
274
  });
272
- });
275
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
273
276
  }
274
277
  catch (err) {
275
278
  rethrowIfTestIsolationError(err);
@@ -39,7 +39,7 @@ import { resolveAssetPath } from "../../indexer/walk/path-resolver.js";
39
39
  import { resolveIndexPassLLM } from "../../llm/index-passes.js";
40
40
  import { resolveSourcesForOrigin } from "../../registry/origin-resolve.js";
41
41
  import { resolveStorageLocations } from "../../storage/locations.js";
42
- import { withIndexDb } from "../../storage/repositories/index-db.js";
42
+ import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
43
43
  // Eagerly import source providers to trigger self-registration.
44
44
  import "../../sources/providers/index.js";
45
45
  import { getCurrentWorkflowScopeKey } from "../../workflows/authoring/scope-key.js";
@@ -301,7 +301,7 @@ function logShowEvent(ref, eventSource = "user") {
301
301
  entry_id: findEntryIdByRef(db, ref),
302
302
  source: eventSource,
303
303
  });
304
- });
304
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
305
305
  }
306
306
  catch (err) {
307
307
  rethrowIfTestIsolationError(err);
@@ -431,7 +431,7 @@ async function maybeExtractGraphInline(config, sourceStashDir, assetPath) {
431
431
  }
432
432
  withIndexDb((db) => {
433
433
  alreadyGraphed = hasGraphData(db, sourceStashDir, assetPath);
434
- });
434
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
435
435
  if (alreadyGraphed)
436
436
  return;
437
437
  // Open the db for the async extraction ourselves: `withIndexDb` is
@@ -69,8 +69,9 @@ const ASSET_SPECS_INTERNAL = {
69
69
  script: { stashDir: "scripts", ...scriptSpec },
70
70
  memory: { stashDir: "memories", ...markdownSpec },
71
71
  // Environment assets — whole `.env` files sourced/injected wholesale. Replaced
72
- // the deprecated `vault` type (removed in 0.9.0). Key NAMES + start-of-line
73
- // comments are surfaced as metadata; values are never read for indexing.
72
+ // the deprecated `vault` type (removed in 0.9.0). Only key NAMES are surfaced
73
+ // as metadata; values and comment text are never read for indexing (comments
74
+ // routinely contain commented-out credentials).
74
75
  env: {
75
76
  stashDir: "env",
76
77
  isRelevantFile: (fileName) => fileName === ".env" || fileName.endsWith(".env"),
@@ -171,6 +171,9 @@ export const ImproveProcessConfigSchema = z
171
171
  // byte-identically to today (the incrementalSince path is unaffected). Only
172
172
  // meaningful on the `consolidate` process.
173
173
  judgedCache: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
174
+ // Distill process: LLM-as-judge lesson quality gate. Default ON (R3);
175
+ // fail-open — judge failure/timeout/parse errors pass through. Set
176
+ // `enabled: false` on the distill process to opt out.
174
177
  qualityGate: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
175
178
  contradictionDetection: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
176
179
  // Extract process config (only meaningful for extract process)
@@ -250,24 +253,13 @@ export const ImproveProcessConfigSchema = z
250
253
  // once sufficient history accumulates; this value is only used on the very
251
254
  // first run. Default 30 s. Only meaningful on the `consolidate` process.
252
255
  p90ChunkSecondsDefault: z.number().finite().positive().optional(),
253
- // WS-3b: Homeostatic demotion (step 0a). Before any LLM merge, demote
254
- // retrievalSalience for stale/low-value assets so the merge pool is bounded
255
- // and high-SNR. Demotion is state.db-only (file content untouched);
256
- // re-promotable on re-retrieval. Default OFF. Only meaningful on the
257
- // `consolidate` process.
258
- homeostaticDemotion: z
259
- .object({
260
- enabled: z.boolean().optional(),
261
- // Minimum days since last retrieval to consider an asset stale (default 30).
262
- staleDays: z.number().int().min(0).optional(),
263
- // Demotion factor: multiply retrievalSalience by this when stale (default 0.5).
264
- demotionFactor: z.number().min(0).max(1).optional(),
265
- })
266
- .passthrough()
267
- .optional(),
256
+ // (WS-3b step 0a `homeostaticDemotion` was removed R4. The key is
257
+ // tolerated via passthrough if an old config still carries it; continuous
258
+ // decay is now part of the always-applied salience recency term.)
268
259
  // WS-3b: Schema-similarity gate (step 0b). At intake, if a new candidate's
269
260
  // body embedding is within epsilon of an existing derived-layer lesson/knowledge
270
- // node, mark it schema-consistent and lower its priority. Default OFF.
261
+ // node, mark it schema-consistent and lower its priority. Default ON for
262
+ // the `extract` process since R3 (fail-open; set `enabled: false` to opt out).
271
263
  // Only meaningful on the `consolidate` and `extract` processes.
272
264
  schemaSimilarity: z
273
265
  .object({
@@ -297,13 +289,19 @@ export const ImproveProcessConfigSchema = z
297
289
  // - maxGeneration: refuse to merge two assets both above this generation (default 2).
298
290
  // - lexicalDiversityCheck: low n-gram diversity ⇒ raise merge threshold.
299
291
  // - randomClusterFraction: occasional random (non-similar) cluster in pool (default 0.05).
300
- // Default OFF. Only meaningful on the `consolidate` process.
292
+ // - mergeInformationFloor: measure that merges keep provenance + specificity
293
+ // (R5 §4.2; ADVISORY in v1 — counted, never refused).
294
+ // - minSpecificityRetention: distinct-token retention floor for merges (default 0.6).
295
+ // Default ON since R5 (opt out via enabled: false). Only meaningful on the
296
+ // `consolidate` process.
301
297
  antiCollapse: z
302
298
  .object({
303
299
  enabled: z.boolean().optional(),
304
300
  maxGeneration: z.number().int().min(1).optional(),
305
301
  lexicalDiversityCheck: z.boolean().optional(),
306
302
  randomClusterFraction: z.number().min(0).max(1).optional(),
303
+ mergeInformationFloor: z.boolean().optional(),
304
+ minSpecificityRetention: z.number().min(0).max(1).optional(),
307
305
  })
308
306
  .passthrough()
309
307
  .optional(),
@@ -633,6 +631,29 @@ const ImproveSalienceSchema = z
633
631
  replayBudget: z.number().int().min(0).optional(),
634
632
  })
635
633
  .passthrough();
634
+ // R5 — longitudinal collapse/churn detector (observe-only in v1; deterministic,
635
+ // fail-open, runs only on cycles where consolidate/recombine did work).
636
+ // Default ON; opt out via `improve.collapseDetector.enabled: false`.
637
+ // See docs/design/improve-collapse-churn-detector-design.md.
638
+ const ImproveCollapseDetectorSchema = z
639
+ .object({
640
+ enabled: z.boolean().optional(),
641
+ // Canary set size minted on first run (owner-approved 30–50 range; default 40).
642
+ canaryCount: z.number().int().min(3).max(200).optional(),
643
+ // Top-K cutoff for canary recall/nDCG (default 10).
644
+ k: z.number().int().min(1).max(100).optional(),
645
+ // Trend window in qualifying cycles (default 5).
646
+ windowCycles: z.number().int().min(2).max(50).optional(),
647
+ // Absolute mean-recall drop vs window median that fires collapse (default 0.15).
648
+ recallDropThreshold: z.number().min(0).max(1).optional(),
649
+ // distinct-content-ratio decline over the window that fires collapse (default 0.05).
650
+ entropyDropThreshold: z.number().min(0).max(1).optional(),
651
+ // Accepted-action volume over the window below which churn never fires (default 25).
652
+ churnMinAcceptedActions: z.number().int().min(1).optional(),
653
+ // improve_cycle_metrics retention (default 365 days, owner-approved).
654
+ retentionDays: z.number().int().min(1).optional(),
655
+ })
656
+ .passthrough();
636
657
  export const ImproveConfigSchema = z
637
658
  .object({
638
659
  utilityDecay: ImproveUtilityDecaySchema.optional(),
@@ -640,6 +661,7 @@ export const ImproveConfigSchema = z
640
661
  calibration: ImproveCalibrationSchema.optional(),
641
662
  exploration: ImproveExplorationSchema.optional(),
642
663
  salience: ImproveSalienceSchema.optional(),
664
+ collapseDetector: ImproveCollapseDetectorSchema.optional(),
643
665
  })
644
666
  .passthrough();
645
667
  // ── Index / per-pass ────────────────────────────────────────────────────────
@@ -0,0 +1,113 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ export const DEFAULT_CURATE_WEIGHTS = {
5
+ ndcg: 0.5,
6
+ recall: 0.2,
7
+ mrr: 0.1,
8
+ noBannedAboveRequired: 0.2,
9
+ };
10
+ /** nDCG@k with binary relevance: gain 1 for relevant refs, 0 otherwise. */
11
+ export function ndcgAtK(returned, relevant, k) {
12
+ const top = returned.slice(0, k);
13
+ let dcg = 0;
14
+ for (let i = 0; i < top.length; i++) {
15
+ if (relevant.has(top[i]))
16
+ dcg += 1 / Math.log2(i + 2);
17
+ }
18
+ const idealCount = Math.min(k, relevant.size);
19
+ let idcg = 0;
20
+ for (let i = 0; i < idealCount; i++)
21
+ idcg += 1 / Math.log2(i + 2);
22
+ return idcg === 0 ? 1 : dcg / idcg;
23
+ }
24
+ export function recallAtK(returned, relevant, k) {
25
+ if (relevant.size === 0)
26
+ return 1;
27
+ const top = new Set(returned.slice(0, k));
28
+ let hit = 0;
29
+ for (const r of relevant)
30
+ if (top.has(r))
31
+ hit += 1;
32
+ return hit / relevant.size;
33
+ }
34
+ export function mrr(returned, relevant) {
35
+ for (let i = 0; i < returned.length; i++) {
36
+ if (relevant.has(returned[i]))
37
+ return 1 / (i + 1);
38
+ }
39
+ return 0;
40
+ }
41
+ /**
42
+ * Leapfrog gate. A banned ref "leapfrogs" when it appears ABOVE at least one
43
+ * present relevant ref. Returns the fraction of present banned refs that do
44
+ * NOT leapfrog (1.0 when no banned ref is present, or none leapfrog), plus the
45
+ * raw violation count.
46
+ */
47
+ export function noBannedAboveRequired(returned, relevant, banned) {
48
+ const rankOf = new Map();
49
+ returned.forEach((ref, i) => {
50
+ if (!rankOf.has(ref))
51
+ rankOf.set(ref, i);
52
+ });
53
+ const relevantRanks = returned.map((ref, i) => (relevant.has(ref) ? i : -1)).filter((i) => i >= 0);
54
+ if (relevantRanks.length === 0) {
55
+ // No relevant ref present to be leapfrogged — gate is vacuously satisfied.
56
+ return { score: 1, leapfrogCount: 0 };
57
+ }
58
+ const worstRelevantRank = Math.max(...relevantRanks);
59
+ const bannedPresent = returned.filter((ref) => banned.has(ref));
60
+ if (bannedPresent.length === 0)
61
+ return { score: 1, leapfrogCount: 0 };
62
+ let leapfrog = 0;
63
+ for (const b of bannedPresent) {
64
+ const rb = rankOf.get(b);
65
+ if (rb !== undefined && rb < worstRelevantRank)
66
+ leapfrog += 1;
67
+ }
68
+ return { score: 1 - leapfrog / bannedPresent.length, leapfrogCount: leapfrog };
69
+ }
70
+ /** Score a single curate result (ordered refs) against its judgment. */
71
+ export function scoreCurateCase(returned, judgment, weights = DEFAULT_CURATE_WEIGHTS) {
72
+ const k = judgment.limit;
73
+ const relevant = new Set(judgment.relevant);
74
+ const banned = new Set(judgment.banned);
75
+ const ndcg = ndcgAtK(returned, relevant, k);
76
+ const recall = recallAtK(returned, relevant, k);
77
+ const rr = mrr(returned, relevant);
78
+ const gate = noBannedAboveRequired(returned, relevant, banned);
79
+ const score = ndcg * weights.ndcg + recall * weights.recall + rr * weights.mrr + gate.score * weights.noBannedAboveRequired;
80
+ return {
81
+ ndcg,
82
+ recall,
83
+ mrr: rr,
84
+ noBannedAboveRequired: gate.score,
85
+ bannedLeapfrogCount: gate.leapfrogCount,
86
+ score,
87
+ };
88
+ }
89
+ /** Aggregate per-case metrics into a suite summary. */
90
+ export function summarizeCurateMetrics(metrics) {
91
+ const n = metrics.length;
92
+ if (n === 0) {
93
+ return {
94
+ caseCount: 0,
95
+ meanScore: 0,
96
+ meanNdcg: 0,
97
+ meanRecall: 0,
98
+ meanMrr: 0,
99
+ meanNoBannedAboveRequired: 1,
100
+ totalBannedLeapfrog: 0,
101
+ };
102
+ }
103
+ const sum = (sel) => metrics.reduce((a, m) => a + sel(m), 0);
104
+ return {
105
+ caseCount: n,
106
+ meanScore: sum((m) => m.score) / n,
107
+ meanNdcg: sum((m) => m.ndcg) / n,
108
+ meanRecall: sum((m) => m.recall) / n,
109
+ meanMrr: sum((m) => m.mrr) / n,
110
+ meanNoBannedAboveRequired: sum((m) => m.noBannedAboveRequired) / n,
111
+ totalBannedLeapfrog: sum((m) => m.bannedLeapfrogCount),
112
+ };
113
+ }
@@ -700,6 +700,62 @@ const MIGRATIONS = [
700
700
  ALTER TABLE asset_salience ADD COLUMN encoding_source TEXT DEFAULT NULL;
701
701
  `,
702
702
  },
703
+ // ── Migration 016 — collapse/churn detector (R5) ─────────────────────────────
704
+ //
705
+ // Longitudinal store-health history for the improve pipeline
706
+ // (docs/design/improve-collapse-churn-detector-design.md).
707
+ //
708
+ // canary_queries — the fixed canary set, minted deterministically from the
709
+ // live stash on first detector run and NEVER auto-refreshed (silent
710
+ // re-baselining is how a slow collapse hides). `canary_set_id` groups one
711
+ // mint; deactivated sets keep their rows (active = 0) so historical cycle
712
+ // rows stay interpretable. Tens of rows; never purged.
713
+ //
714
+ // improve_cycle_metrics — one row per qualifying improve cycle (a run where
715
+ // consolidate processed ≥1 op or recombine evaluated ≥1 cluster). Every
716
+ // column is a scalar or a size-capped JSON blob (< 2 KB/row by
717
+ // construction — the result_json lesson applied). Retention: 365 days via
718
+ // purgeOldCycleMetrics. Trend queries drive the collapse/churn alert
719
+ // evaluation and the health advisory; `canary_set_id` scoping prevents
720
+ // comparing across canary re-mints.
721
+ {
722
+ id: "016-collapse-churn-detector",
723
+ up: `
724
+ CREATE TABLE IF NOT EXISTS canary_queries (
725
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
726
+ canary_set_id TEXT NOT NULL,
727
+ anchor_ref TEXT NOT NULL,
728
+ query TEXT NOT NULL,
729
+ source TEXT NOT NULL DEFAULT 'auto',
730
+ active INTEGER NOT NULL DEFAULT 1,
731
+ created_at TEXT NOT NULL
732
+ );
733
+ CREATE INDEX IF NOT EXISTS idx_canary_queries_active
734
+ ON canary_queries(active, canary_set_id);
735
+
736
+ CREATE TABLE IF NOT EXISTS improve_cycle_metrics (
737
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
738
+ run_id TEXT NOT NULL,
739
+ ts TEXT NOT NULL,
740
+ pass TEXT NOT NULL,
741
+ canary_set_id TEXT NOT NULL,
742
+ mean_recall REAL NOT NULL,
743
+ mean_ndcg REAL NOT NULL,
744
+ mean_mrr REAL NOT NULL,
745
+ canary_ranks_json TEXT NOT NULL,
746
+ store_total INTEGER NOT NULL,
747
+ store_by_type_json TEXT NOT NULL,
748
+ distinct_content_ratio REAL NOT NULL,
749
+ mean_bigram_diversity REAL NOT NULL,
750
+ over_generation_count INTEGER NOT NULL,
751
+ accepted_actions INTEGER NOT NULL,
752
+ merge_floor_violations INTEGER NOT NULL DEFAULT 0,
753
+ alerts_json TEXT NOT NULL DEFAULT '[]'
754
+ );
755
+ CREATE INDEX IF NOT EXISTS idx_improve_cycle_metrics_ts
756
+ ON improve_cycle_metrics(ts);
757
+ `,
758
+ },
703
759
  ];
704
760
  /**
705
761
  * Apply every pending migration in a single transaction per migration.