akm-cli 0.9.0-beta.52 → 0.9.0-beta.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/assets/hints/cli-hints-full.md +6 -5
  2. package/dist/cli/clack.js +56 -0
  3. package/dist/cli/confirm.js +1 -1
  4. package/dist/cli.js +0 -7
  5. package/dist/commands/env/env-cli.js +3 -2
  6. package/dist/commands/env/env.js +14 -67
  7. package/dist/commands/health/checks.js +28 -15
  8. package/dist/commands/health/html-report.js +33 -10
  9. package/dist/commands/health.js +222 -22
  10. package/dist/commands/improve/collapse-detector.js +419 -0
  11. package/dist/commands/improve/consolidate.js +72 -54
  12. package/dist/commands/improve/distill.js +79 -13
  13. package/dist/commands/improve/extract.js +13 -6
  14. package/dist/commands/improve/homeostatic.js +109 -79
  15. package/dist/commands/improve/improve-cli.js +67 -1
  16. package/dist/commands/improve/improve.js +10 -0
  17. package/dist/commands/improve/loop-stages.js +39 -1
  18. package/dist/commands/improve/outcome-loop.js +33 -19
  19. package/dist/commands/improve/preparation.js +36 -11
  20. package/dist/commands/improve/salience.js +49 -32
  21. package/dist/commands/read/curate.js +9 -13
  22. package/dist/commands/read/knowledge.js +4 -0
  23. package/dist/commands/read/search-cli.js +6 -4
  24. package/dist/commands/read/search.js +12 -5
  25. package/dist/commands/read/show.js +6 -8
  26. package/dist/commands/sources/add-cli.js +1 -1
  27. package/dist/commands/sources/init.js +12 -0
  28. package/dist/commands/sources/stash-cli.js +1 -1
  29. package/dist/commands/tasks/default-tasks.js +12 -0
  30. package/dist/core/asset/asset-spec.js +3 -2
  31. package/dist/core/config/config-schema.js +39 -17
  32. package/dist/core/config/config.js +12 -0
  33. package/dist/core/eval/rank-metrics.js +113 -0
  34. package/dist/core/state/migrations.js +56 -0
  35. package/dist/core/state-db.js +146 -19
  36. package/dist/core/warn.js +21 -0
  37. package/dist/indexer/db/db.js +6 -0
  38. package/dist/indexer/ensure-index.js +36 -92
  39. package/dist/indexer/index-writer-lock.js +9 -11
  40. package/dist/indexer/index-written-assets.js +105 -0
  41. package/dist/indexer/indexer.js +16 -4
  42. package/dist/indexer/passes/metadata.js +20 -0
  43. package/dist/indexer/read-preflight.js +23 -0
  44. package/dist/indexer/search/db-search.js +29 -1
  45. package/dist/indexer/search/ranking-contributors.js +33 -1
  46. package/dist/indexer/search/ranking.js +66 -0
  47. package/dist/indexer/search/search-fields.js +6 -0
  48. package/dist/indexer/walk/walker.js +21 -13
  49. package/dist/integrations/agent/detect.js +9 -0
  50. package/dist/integrations/agent/index.js +1 -1
  51. package/dist/llm/client.js +12 -0
  52. package/dist/llm/embedder.js +26 -2
  53. package/dist/llm/embedders/local.js +7 -1
  54. package/dist/llm/feature-gate.js +6 -2
  55. package/dist/output/renderers.js +8 -13
  56. package/dist/output/shapes/helpers.js +0 -3
  57. package/dist/output/shapes/passthrough.js +1 -0
  58. package/dist/scripts/migrate-storage.js +178 -35
  59. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +46 -19
  60. package/dist/setup/detect.js +9 -0
  61. package/dist/setup/registry-stash-loader.js +12 -0
  62. package/dist/setup/setup.js +1 -1
  63. package/dist/storage/repositories/index-db.js +10 -1
  64. package/dist/tasks/backends/index.js +9 -0
  65. package/dist/tasks/runner.js +9 -0
  66. package/package.json +2 -4
@@ -2,12 +2,6 @@
2
2
  // License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
4
  // ── Constants ─────────────────────────────────────────────────────────────────
5
- /**
6
- * Weight on the "retrieved-but-never-improved" penalty term. Setting this to
7
- * 0 degrades to a pure prediction-error score (no quality filter); setting it
8
- * to 1 heavily penalises assets whose retrievals never led to accepted changes.
9
- */
10
- export const OUTCOME_PENALTY_WEIGHT = 0.3;
11
5
  /**
12
6
  * EMA decay factor for the expected-retrieval rolling mean (α).
13
7
  * New expected = α × new_count + (1−α) × old_expected.
@@ -33,6 +27,15 @@ export const WARM_START_CAP = 0.3;
33
27
  * very-negative run can't send the score to −∞.
34
28
  */
35
29
  export const OUTCOME_SCORE_MIN = -1.0;
30
+ /**
31
+ * Saturation ceiling: the maximum outcome_score. Biological RPE saturates —
32
+ * a fully predicted reward produces zero response, not an ever-growing one —
33
+ * so a long-lived popular asset must not accrue unbounded outcome mass that
34
+ * would dominate ranking once the outcome weight is enabled (analysis G2).
35
+ * 1.5 comfortably exceeds the max plausible single-cycle raw update while
36
+ * keeping the normalised outcomeSalience spread meaningful.
37
+ */
38
+ export const OUTCOME_SCORE_MAX = 1.5;
36
39
  /**
37
40
  * Diversity floor: `outcomeSalience` for any asset is at least this fraction
38
41
  * of the maximum observed `outcome_score` in the table, so rare-but-correct
@@ -86,12 +89,8 @@ export function updateAssetOutcome(db, inputs) {
86
89
  //
87
90
  // retrieval_delta = current − stored (non-negative — we never go backwards)
88
91
  const retrievalDelta = Math.max(0, inputs.currentRetrievalCount - existing.retrieval_count);
89
- // accepted_change_rate = accepted_count / max(1, retrieval_count)
90
- const acceptedChangeRate = inputs.acceptedChangeCount / Math.max(1, inputs.currentRetrievalCount);
91
92
  // Differential prediction-error term:
92
- // outcome = (retrieval_delta − expected_delta)
93
- // − PENALTY × retrieval_delta × (1 − accepted_change_rate)
94
- // + valence
93
+ // outcome = (retrieval_delta − expected_delta) + valence
95
94
  //
96
95
  // Prediction error is computed against the PRIOR stored EMA (before folding
97
96
  // in this cycle's observation), so the current delta cannot leak into its own
@@ -102,14 +101,16 @@ export function updateAssetOutcome(db, inputs) {
102
101
  // expected' = α × delta + (1−α) × prior_expected
103
102
  expectedRetrievalRate =
104
103
  OUTCOME_EMA_ALPHA * retrievalDelta + (1 - OUTCOME_EMA_ALPHA) * existing.expected_retrieval_rate;
105
- const penalty = OUTCOME_PENALTY_WEIGHT * retrievalDelta * (1 - acceptedChangeRate);
106
104
  // Running sum (EMA approach): new score = α × update + (1−α) × old
107
105
  // so the score tracks the moving signal, not the cumulative sum.
108
- const rawUpdate = predictionError - penalty + valence;
106
+ const rawUpdate = predictionError + valence;
109
107
  const newScore = OUTCOME_EMA_ALPHA * rawUpdate + (1 - OUTCOME_EMA_ALPHA) * existing.outcome_score;
110
- // Clip to [OUTCOME_SCORE_MIN, +Infinity)no upper cap so that very-active
111
- // useful assets can accumulate a high positive score.
112
- outcomeScore = Math.max(OUTCOME_SCORE_MIN, newScore);
108
+ // Clip to [OUTCOME_SCORE_MIN, OUTCOME_SCORE_MAX]the ceiling is the RPE
109
+ // saturation analog (G2): without it, long-lived popular assets accumulate
110
+ // unbounded positive mass (live max was 3.13) and would dominate rank_score
111
+ // the moment the outcome weight is enabled. Stored legacy scores above the
112
+ // ceiling converge back under it on their next differential update.
113
+ outcomeScore = Math.min(OUTCOME_SCORE_MAX, Math.max(OUTCOME_SCORE_MIN, newScore));
113
114
  // ── review_pressure (#613) ─────────────────────────────────────────────
114
115
  // New negatives this cycle.
115
116
  const newNegatives = Math.max(0, inputs.negativeFeedbackCount - existing.negative_feedback_count);
@@ -211,6 +212,17 @@ export function outcomeScoreToSalience(outcomeScore, maxScore) {
211
212
  // Apply diversity floor.
212
213
  return Math.max(DIVERSITY_FLOOR_FRACTION, normalised);
213
214
  }
215
+ // ── Proxy-adequacy tripwire ───────────────────────────────────────────────────
216
+ /**
217
+ * Dead-proxy threshold: |corr| below this means outcome_score carries no
218
+ * information about improvement need (pure noise).
219
+ */
220
+ export const PROXY_DEAD_CORR_THRESHOLD = 0.1;
221
+ /**
222
+ * Minimum sample size before the dead-proxy check fires. Below this, a
223
+ * near-zero correlation is indistinguishable from small-sample noise.
224
+ */
225
+ export const PROXY_DEAD_MIN_N = 500;
214
226
  /**
215
227
  * Compute `corr(outcome_score, accepted_change_rate)` across all asset_outcome
216
228
  * rows. Returns `{correlation: NaN, n, isInverted: false}` when there is
@@ -226,7 +238,7 @@ export function outcomeScoreToSalience(outcomeScore, maxScore) {
226
238
  export function computeProxyAdequacy(rows) {
227
239
  const n = rows.length;
228
240
  if (n < 3)
229
- return { correlation: Number.NaN, n, isInverted: false };
241
+ return { correlation: Number.NaN, n, isInverted: false, isDead: false };
230
242
  // accepted_change_rate per row.
231
243
  const xs = rows.map((r) => r.outcome_score);
232
244
  const ys = rows.map((r) => r.accepted_change_count / Math.max(1, r.retrieval_count));
@@ -247,10 +259,12 @@ export function computeProxyAdequacy(rows) {
247
259
  varY /= n;
248
260
  const denom = Math.sqrt(varX) * Math.sqrt(varY);
249
261
  if (denom < 1e-12)
250
- return { correlation: Number.NaN, n, isInverted: false };
262
+ return { correlation: Number.NaN, n, isInverted: false, isDead: false };
251
263
  const correlation = covXY / denom;
252
264
  // Inverted proxy: negative correlation between outcome and accepted_change_rate
253
265
  // means high-outcome assets are also high-need — the opposite of "useful".
254
266
  const isInverted = correlation < -0.3;
255
- return { correlation, n, isInverted };
267
+ // Dead proxy: near-zero correlation at scale — the score is noise.
268
+ const isDead = n >= PROXY_DEAD_MIN_N && Math.abs(correlation) < PROXY_DEAD_CORR_THRESHOLD;
269
+ return { correlation, n, isInverted, isDead };
256
270
  }
@@ -27,7 +27,7 @@ import { computeValenceScore, FEEDBACK_WEIGHT, UTILITY_WEIGHT } from "./feedback
27
27
  import { makeGateConfig, resolveExtractConfidence, runAutoAcceptGate } from "./improve-auto-accept.js";
28
28
  import { resolveProcessEnabled } from "./improve-profiles.js";
29
29
  import { applyMemoryCleanup } from "./memory/memory-improve.js";
30
- import { computeProxyAdequacy, getAllAssetOutcomes, getOutcomeScoresByRef, outcomeScoreToSalience, updateAssetOutcome, } from "./outcome-loop.js";
30
+ import { computeProxyAdequacy, getAllAssetOutcomes, getOutcomeScoresByRef, OUTCOME_SCORE_MAX, outcomeScoreToSalience, updateAssetOutcome, } from "./outcome-loop.js";
31
31
  import { DEFAULT_DUE_DAYS, DEFAULT_MAX_PER_RUN, selectProactiveMaintenanceRefs } from "./proactive-maintenance.js";
32
32
  import { buildRankChangeReport, computeSalience, getAllRankScores, getAssetSalience, getConsecutiveNoOps, getLastUseMsByRef, isContentEncodingRow, SALIENCE_NO_OP_DAMPEN_FACTOR, SALIENCE_NO_OP_DAMPEN_THRESHOLD, upsertAssetSalience, } from "./salience.js";
33
33
  // ── improve preparation stage ───────────────────────
@@ -1145,23 +1145,32 @@ export async function runImprovePreparationStage(args) {
1145
1145
  const proactiveAndRetrievalSet = new Set([...highRetrievalRefs, ...proactiveRefs].map((r) => r.ref));
1146
1146
  try {
1147
1147
  withStateDb((dbForHighSalience) => {
1148
- const effectiveLimit = options.limit ?? 10;
1148
+ // Derive the cap from the resolved reflect limit (mirrors improve.ts's
1149
+ // options.limit resolution) so an unbounded whole-stash run does not
1150
+ // collapse the lane to exactly 1 ref via the bare `?? 10` fallback.
1151
+ const effectiveLimit = options.limit ?? improveProfile?.processes?.reflect?.limit ?? improveProfile.limit ?? 10;
1149
1152
  const highSalienceCap = Math.max(1, Math.floor(effectiveLimit * 0.1));
1150
1153
  // #632/#4 — session-capture telemetry (checkpoints) must never consume
1151
1154
  // the scarce high-salience budget. Even with a content-scored row, these
1152
1155
  // are pipeline bookkeeping, not assets worth reflecting/rewriting.
1153
1156
  const candidates = noFeedbackCandidates.filter((r) => !proactiveAndRetrievalSet.has(r.ref) && !isSessionCaptureMemoryName(parseAssetRef(r.ref).name));
1157
+ // Collect ALL qualifying candidates, then take the top-N BY SCORE — the
1158
+ // previous first-N-in-scan-order break meant a higher-salience candidate
1159
+ // found later in the scan lost its slot to an earlier lower-scoring one.
1160
+ const qualifying = [];
1154
1161
  for (const r of candidates) {
1155
- if (highSalienceRefs.length >= highSalienceCap)
1156
- break;
1157
1162
  const row = getAssetSalience(dbForHighSalience, r.ref);
1158
1163
  if (row &&
1159
1164
  isContentEncodingRow(row, parseAssetRef(r.ref).type) &&
1160
1165
  row.encoding_salience >= salienceThreshold &&
1161
1166
  !lastReflectProposalTs.has(r.ref)) {
1162
- highSalienceRefs.push(r);
1167
+ qualifying.push({ ref: r, score: row.encoding_salience });
1163
1168
  }
1164
1169
  }
1170
+ qualifying.sort((a, b) => b.score - a.score);
1171
+ for (const q of qualifying.slice(0, highSalienceCap)) {
1172
+ highSalienceRefs.push(q.ref);
1173
+ }
1165
1174
  }, { path: eventsCtx?.dbPath });
1166
1175
  }
1167
1176
  catch (err) {
@@ -1349,8 +1358,13 @@ export async function runImprovePreparationStage(args) {
1349
1358
  if (row.outcome_score > maxOutcomeScore)
1350
1359
  maxOutcomeScore = row.outcome_score;
1351
1360
  }
1352
- // Proxy-adequacy tripwire: emit a health event if outcome_score is
1353
- // negatively correlated with accepted_change_rate (inverted proxy).
1361
+ // Read-clip: legacy rows written before the OUTCOME_SCORE_MAX write-clip
1362
+ // existed can sit above the ceiling (live max was 3.13). Without this
1363
+ // clip they inflate the normalisation denominator and floor everyone
1364
+ // else's outcomeSalience (#691 follow-up).
1365
+ maxOutcomeScore = Math.min(maxOutcomeScore, OUTCOME_SCORE_MAX);
1366
+ // Proxy-adequacy tripwire (two-tailed): inverted (corr < −0.3) and
1367
+ // dead (|corr| < 0.1 at n ≥ 500) both emit health events.
1354
1368
  const adequacy = computeProxyAdequacy(allOutcomes);
1355
1369
  if (adequacy.isInverted) {
1356
1370
  appendEvent({
@@ -1363,6 +1377,17 @@ export async function runImprovePreparationStage(args) {
1363
1377
  },
1364
1378
  }, eventsCtx);
1365
1379
  }
1380
+ if (adequacy.isDead) {
1381
+ appendEvent({
1382
+ eventType: "outcome_proxy_dead",
1383
+ ref: undefined,
1384
+ metadata: {
1385
+ correlation: adequacy.correlation,
1386
+ n: adequacy.n,
1387
+ note: "|corr(outcome_score, accepted_change_rate)| < 0.1 at n ≥ 500: outcome_score is statistically unrelated to improvement outcomes — the proxy is noise, not signal. Rank contributions derived from it are not currently informative.",
1388
+ },
1389
+ }, eventsCtx);
1390
+ }
1366
1391
  }
1367
1392
  catch {
1368
1393
  // best-effort: tripwire failure never blocks ranking
@@ -1392,11 +1417,11 @@ export async function runImprovePreparationStage(args) {
1392
1417
  // so feedback refs get their genuine retrieval frequency, not a 0-floor fallback.
1393
1418
  // outcomeSalienceByRef is populated by WS-2 above (or empty on first run).
1394
1419
  //
1395
- // Part-V gate: read the operator opt-in flag from config. Default false
1396
- // (WS-1 parity weights) until the maintainer runs scripts/akm-eval and sets
1397
- // improve.salience.outcomeWeightEnabled: true in the config.
1420
+ // R1 loop closure: the outcome weight is ON by default (the G2 saturation
1421
+ // cap makes it safe). Operators opt out with
1422
+ // improve.salience.outcomeWeightEnabled: false in the config.
1398
1423
  const salienceConfig = (options.config ?? loadConfig()).improve?.salience;
1399
- const outcomeWeightEnabled = salienceConfig?.outcomeWeightEnabled === true;
1424
+ const outcomeWeightEnabled = salienceConfig?.outcomeWeightEnabled !== false;
1400
1425
  const salienceMap = new Map();
1401
1426
  const nowForSalience = Date.now();
1402
1427
  // #644 — preserve content-derived encoding scores across runs.
@@ -21,12 +21,12 @@
21
21
  *
22
22
  * `rankScore = (w_e·encoding + w_o·outcome + w_r·retrieval) × sizePenalty`, normalized [0,1].
23
23
  *
24
- * **WS-2 default-off (Part-V gate):**
25
- * `w_o = 0.15` is the target but is applied only when `outcomeWeightEnabled=true`
26
- * (set via `improve.salience.outcomeWeightEnabled: true` in config after running
27
- * Part-V T0 baseline). Default: WS-1 parity weights `w_e=0.30, w_r=0.70, w_o=0`.
28
- * `outcomeSalience` is populated from `asset_outcome.outcome_score` (WS-2) for
29
- * observability regardless of the flag.
24
+ * **WS-2 default-ON (R1 loop closure):**
25
+ * `w_o = 0.15` is applied by default now that `outcome_score` saturates at
26
+ * `OUTCOME_SCORE_MAX` (G2). Operators can opt out via
27
+ * `improve.salience.outcomeWeightEnabled: false`, which restores the WS-1
28
+ * parity weights `w_e=0.30, w_r=0.70, w_o=0`. `outcomeSalience` is populated
29
+ * from `asset_outcome.outcome_score` regardless of the flag.
30
30
  *
31
31
  * ## Plasticity
32
32
  *
@@ -52,16 +52,30 @@ import { WARM_START_CAP } from "./outcome-loop.js";
52
52
  const DAY_MS = 86_400_000;
53
53
  // ── Recency decay half-life (mirrors the proactive-maintenance prototype) ─────
54
54
  const RECENCY_HALFLIFE_DAYS = 21;
55
+ // ── Recency-floor half-life (R4 — SHY-style continuous downscaling) ──────────
56
+ //
57
+ // The recency floor itself decays on this (much longer) half-life so an
58
+ // unreviewed-forever asset keeps drifting down instead of parking at the 0.1
59
+ // floor. This replaces the deleted homeostatic demotion pass (which was
60
+ // default-off and self-undoing — every salience recompute clobbered it);
61
+ // folding the decay into the always-applied recency term makes it persist by
62
+ // construction. At 180 days the floor halves; a 1-year-stale asset sits at
63
+ // ~0.025 instead of 0.1.
64
+ const RECENCY_FLOOR_HALFLIFE_DAYS = 180;
65
+ // Absolute epsilon under the decaying floor. Keeps the frequency term ordinal
66
+ // for assets whose last-use timestamp is unknown (utility_scores has no
67
+ // last_used_at) — without it their retrieval salience collapses to exactly 0
68
+ // and frequency ordering is lost for maintenance selection.
69
+ const RECENCY_EPSILON = 0.01;
55
70
  // ── Size proxy floor (avoids log10(0)) ────────────────────────────────────────
56
71
  const SIZE_FLOOR_BYTES = 200;
57
72
  // ── Projection weights ────────────────────────────────────────────────────────
58
73
  //
59
- // These constants reflect the WS-2 TARGET values (used when outcomeWeightEnabled=true).
60
- // Default ranking uses WS-1 parity weights (w_e=0.30, w_r=0.70, w_o=0) until the
61
- // maintainer opts in via `improve.salience.outcomeWeightEnabled: true` after running
62
- // the Part-V T0 baseline (scripts/akm-eval + health report).
74
+ // These constants are the DEFAULT ranking weights (R1 loop closure). Operators
75
+ // can opt back out to the WS-1 parity weights (w_e=0.30, w_r=0.70, w_o=0) via
76
+ // `improve.salience.outcomeWeightEnabled: false`.
63
77
  //
64
- // WS-2 opt-in split (w_e=0.25, w_o=0.15, w_r=0.60, sum = 1.0):
78
+ // WS-2 split (w_e=0.25, w_o=0.15, w_r=0.60, sum = 1.0):
65
79
  // [exp] Expert recommendation: encoding should be moderate so a type-importance
66
80
  // stub does not completely dominate; retrieval should be strong since it directly
67
81
  // measures use; outcome provides a quality signal proportional to usefulness.
@@ -78,10 +92,10 @@ if (Math.abs(W_ENCODING + W_OUTCOME + W_RETRIEVAL - 1.0) > 1e-9) {
78
92
  }
79
93
  // ── WS-1 parity weights ───────────────────────────────────────────────────────
80
94
  //
81
- // These constants reflect the default WS-1 parity weights used when
82
- // `outcomeWeightEnabled` is false/absent (the default). They preserve the
95
+ // These constants reflect the WS-1 parity weights used when the operator
96
+ // explicitly opts out (`outcomeWeightEnabled: false`). They preserve the
83
97
  // WS-1 two-way split (w_e=0.30, w_r=0.70) with w_o=0 so outcome does not
84
- // affect rankScore until the operator opts in after the Part-V baseline run.
98
+ // affect rankScore in the opt-out mode.
85
99
  //
86
100
  // Named here (rather than inline literals in the else branch) so a future
87
101
  // re-tune has a single source of truth and the sum-to-1 guard below catches
@@ -155,15 +169,19 @@ export function computeSalience(inputs) {
155
169
  //
156
170
  // Formula: log(1 + freq) × recencyDecay
157
171
  // log(1+freq): sub-linear frequency term (same as proactive-maintenance prototype).
158
- // recencyDecay: 0.1 + 0.5^(useAgeDays/halflife) — decays to 0.1 floor when stale.
159
- // lastUseMs=0/undefined useAgeDays=9999 recencyDecay≈0.1 (floor).
172
+ // recencyDecay: max(ε, 0.1·0.5^(useAgeDays/180) + 0.5^(useAgeDays/21))
173
+ // the fast term halves every 21 days; the 0.1 floor itself halves every
174
+ // 180 days (R4: SHY-style continuous downscaling — an unreviewed-forever
175
+ // asset keeps drifting down instead of parking at the floor). The ε=0.01
176
+ // epsilon keeps the frequency term ordinal for unknown-last-use assets.
177
+ // lastUseMs=0/undefined → useAgeDays=9999 → recencyDecay=ε.
160
178
  //
161
179
  // The recency term is MANDATORY (plan requirement §WS-1 step 2). Without it
162
- // retrievalSalience degenerates to a non-decaying frequency count and the WS-3
163
- // homeostatic step-0 demotion has nothing to act on.
180
+ // retrievalSalience degenerates to a non-decaying frequency count. This
181
+ // always-applied decay replaces the deleted homeostatic demotion pass.
164
182
  const lastUseMs = inputs.lastUseMs ?? 0;
165
183
  const useAgeDays = lastUseMs > 0 ? (now - lastUseMs) / DAY_MS : 9999;
166
- const recencyDecay = 0.1 + 0.5 ** (useAgeDays / RECENCY_HALFLIFE_DAYS);
184
+ const recencyDecay = Math.max(RECENCY_EPSILON, 0.1 * 0.5 ** (useAgeDays / RECENCY_FLOOR_HALFLIFE_DAYS) + 0.5 ** (useAgeDays / RECENCY_HALFLIFE_DAYS));
167
185
  const rawRetrieval = Math.log(1 + inputs.retrievalFreq) * recencyDecay;
168
186
  // ── Size penalty ─────────────────────────────────────────────────────────────
169
187
  // 1/log10(size): larger assets are slightly deprioritized (same as proactive prototype).
@@ -184,29 +202,28 @@ export function computeSalience(inputs) {
184
202
  // which asymptotes to 1 and equals 0.5 at rawRetrieval=1. This is the same
185
203
  // formula used for MemRL utility updates.
186
204
  const retrieval = rawRetrieval / (rawRetrieval + 1);
187
- // ── Weight selection (Part-V gate) ────────────────────────────────────────
188
- //
189
- // When `outcomeWeightEnabled` is false/absent (default): use WS-1 parity
190
- // weights (w_e=0.30, w_r=0.70, w_o=0) so ranking is unchanged from the WS-1
191
- // baseline. The `outcome` sub-score is still computed and stored in the
192
- // salience vector for observability, but it does not affect rankScore.
205
+ // ── Weight selection (R1 — outcome loop closed by default) ───────────────
193
206
  //
194
- // When `outcomeWeightEnabled` is true (operator opt-in after Part-V run):
195
- // use WS-2 weights (w_e=0.25, w_o=0.15, w_r=0.60).
207
+ // When `outcomeWeightEnabled` is true/absent (DEFAULT ON since the G2
208
+ // saturation cap landed): use WS-2 weights (w_e=0.25, w_o=0.15, w_r=0.60)
209
+ // so the prediction-error outcome signal actually shapes rankScore — this
210
+ // is the R1 loop-closure from docs/design/improve-self-learning-analysis.md.
196
211
  //
197
- // The constants W_ENCODING, W_OUTCOME, W_RETRIEVAL always reflect the
198
- // WS-2 target values for documentation and re-tune reference.
212
+ // When `outcomeWeightEnabled` is explicitly false (operator opt-out via
213
+ // `improve.salience.outcomeWeightEnabled: false`): fall back to the WS-1
214
+ // parity weights (w_e=0.30, w_r=0.70, w_o=0). The `outcome` sub-score is
215
+ // still computed and stored for observability in that mode.
199
216
  let we;
200
217
  let wo;
201
218
  let wr;
202
- if (inputs.outcomeWeightEnabled === true) {
203
- // WS-2 active: three-way split from Part-V operator opt-in.
219
+ if (inputs.outcomeWeightEnabled !== false) {
220
+ // WS-2 active (default): three-way split.
204
221
  we = W_ENCODING; // 0.25
205
222
  wo = W_OUTCOME; // 0.15
206
223
  wr = W_RETRIEVAL; // 0.60
207
224
  }
208
225
  else {
209
- // WS-1 parity (default): w_o=0, redistribute to WS-1 proportions.
226
+ // WS-1 parity (opt-out): w_o=0, redistribute to WS-1 proportions.
210
227
  // Original WS-1 split was w_e=0.30, w_r=0.70.
211
228
  we = W_ENCODING_PARITY;
212
229
  wo = W_OUTCOME_PARITY;
@@ -19,12 +19,12 @@ import { parseFrontmatter } from "../../core/asset/frontmatter.js";
19
19
  import { getIndexPassConfig, loadConfig } from "../../core/config/config.js";
20
20
  import { rethrowIfTestIsolationError, UsageError } from "../../core/errors.js";
21
21
  import { appendEvent } from "../../core/events.js";
22
- import { closeDatabase, computeBodyHash, openExistingDatabase } from "../../indexer/db/db.js";
22
+ import { computeBodyHash } from "../../indexer/db/db.js";
23
23
  import { enqueueGraphExtraction, hasGraphData } from "../../indexer/db/graph-db.js";
24
24
  import { findSourceForPath, resolveSourceEntries } from "../../indexer/search/search-source.js";
25
25
  import { insertUsageEvent } from "../../indexer/usage/usage-events.js";
26
26
  import { truncateDescription } from "../../output/shapes.js";
27
- import { withIndexDb } from "../../storage/repositories/index-db.js";
27
+ import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
28
28
  import { akmSearch, parseSearchSource } from "./search.js";
29
29
  import { akmShowUnified } from "./show.js";
30
30
  const CURATE_FALLBACK_FILTER_WORDS = new Set([
@@ -58,15 +58,14 @@ const CURATE_REFERENCE_QUERY_RE = /\b(?:reference|docs?|guide|how|explain|learn|
58
58
  * Fire-and-forget: log a curate event to the usage_events table and events.jsonl.
59
59
  * Never blocks the caller; errors are silently ignored.
60
60
  */
61
- function logCurateEvent(query, result) {
61
+ function logCurateEvent(query, result, eventSource = "user") {
62
62
  const itemRefs = result.items.map((item) => ("ref" in item ? item.ref : `registry:${item.id}`));
63
63
  appendEvent({
64
64
  eventType: "curate",
65
65
  metadata: { query, itemCount: result.items.length, itemRefs },
66
66
  });
67
67
  try {
68
- const db = openExistingDatabase();
69
- try {
68
+ withIndexDb((db) => {
70
69
  insertUsageEvent(db, {
71
70
  event_type: "curate",
72
71
  query,
@@ -74,7 +73,7 @@ function logCurateEvent(query, result) {
74
73
  itemCount: result.items.length,
75
74
  itemRefs,
76
75
  }),
77
- source: "user",
76
+ source: eventSource,
78
77
  });
79
78
  for (const item of result.items) {
80
79
  if (!("ref" in item) || typeof item.ref !== "string")
@@ -83,13 +82,10 @@ function logCurateEvent(query, result) {
83
82
  event_type: "curate",
84
83
  query,
85
84
  entry_ref: item.ref,
86
- source: "user",
85
+ source: eventSource,
87
86
  });
88
87
  }
89
- }
90
- finally {
91
- closeDatabase(db);
92
- }
88
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
93
89
  }
94
90
  catch (err) {
95
91
  rethrowIfTestIsolationError(err);
@@ -110,7 +106,7 @@ export async function akmCurate(options) {
110
106
  source,
111
107
  }));
112
108
  const result = await curateSearchResults(options.query, searchResponse, limit, options.type);
113
- logCurateEvent(options.query, result);
109
+ logCurateEvent(options.query, result, options.eventSource);
114
110
  return result;
115
111
  }
116
112
  export async function curateSearchResults(query, result, limit, selectedType) {
@@ -207,7 +203,7 @@ function maybeEnqueueLazyGraph(assetPath) {
207
203
  if (!hasGraphData(db, stashRoot, assetPath)) {
208
204
  enqueueGraphExtraction(db, stashRoot, assetPath, bodyHash, 0);
209
205
  }
210
- });
206
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
211
207
  }
212
208
  catch (err) {
213
209
  rethrowIfTestIsolationError(err);
@@ -16,6 +16,7 @@ import { isHttpUrl, isWithin, tryReadStdinText } from "../../core/common.js";
16
16
  import { loadConfig } from "../../core/config/config.js";
17
17
  import { UsageError } from "../../core/errors.js";
18
18
  import { commitWriteTargetBoundary, formatRefForMessage, resolveWriteTarget, writeAssetToSource, } from "../../core/write-source.js";
19
+ import { indexWrittenAssets } from "../../indexer/index-written-assets.js";
19
20
  import { fetchWebsiteMarkdownSnapshot, shouldAllowPrivateWebsiteUrlForTests } from "../../sources/website-ingest.js";
20
21
  const MAX_CAPTURED_ASSET_SLUG_LENGTH = 64;
21
22
  // ── Asset-name normalisation ─────────────────────────────────────────────────
@@ -144,6 +145,9 @@ export async function writeMarkdownAsset(options) {
144
145
  // 0.9.0 (issue #507): single batch commit at the write boundary for git
145
146
  // targets. No-op for filesystem/primary-stash targets.
146
147
  commitWriteTargetBoundary(target, `Update ${formatRefForMessage(ref)}`);
148
+ // Write-path indexing: the asset is searchable immediately. Fail-open; reads
149
+ // no longer trigger reindexes, so keeping the index current is the writer's job.
150
+ await indexWrittenAssets(source.path, [result.path]);
147
151
  return {
148
152
  ref: result.ref,
149
153
  path: result.path,
@@ -21,13 +21,15 @@ import { getHyphenatedBoolean, getOutputMode, parseFlagValue } from "../../outpu
21
21
  import { akmCurate } from "./curate.js";
22
22
  import { akmSearch, parseBeliefFilterMode, parseScopeFilterFlags, parseSearchSource } from "./search.js";
23
23
  import { akmShowUnified } from "./show.js";
24
- // AKM_EVENT_SOURCE attributes a query to a `user` invocation or the internal
25
- // `improve` loop so the event log can distinguish them; any other value is
26
- // treated as unset.
24
+ // AKM_EVENT_SOURCE attributes a query to a `user` invocation, the internal
25
+ // `improve` loop, or the `task` runner so the event log can distinguish
26
+ // genuine demand from machine traffic; any other value is treated as unset.
27
27
  function resolveEventSource() {
28
28
  const raw = process.env.AKM_EVENT_SOURCE;
29
29
  if (raw === "improve")
30
30
  return "improve";
31
+ if (raw === "task")
32
+ return "task";
31
33
  if (raw === "user")
32
34
  return "user";
33
35
  return undefined;
@@ -129,7 +131,7 @@ export const curateCommand = defineJsonCommand({
129
131
  const limitParsed = parsePositiveIntFlag(args.limit ?? undefined);
130
132
  const limit = limitParsed && limitParsed > 0 ? limitParsed : 4;
131
133
  const source = parseSearchSource(args.source ?? "stash");
132
- const curated = await akmCurate({ query: args.query, type, limit, source });
134
+ const curated = await akmCurate({ query: args.query, type, limit, source, eventSource: resolveEventSource() });
133
135
  output("curate", curated);
134
136
  },
135
137
  });
@@ -16,14 +16,14 @@ import { rethrowIfTestIsolationError, UsageError } from "../../core/errors.js";
16
16
  import { appendEvent } from "../../core/events.js";
17
17
  import { isTransientStashPath } from "../../core/paths.js";
18
18
  import { bumpUtilityScoresBatch, getEntryIdByFilePath } from "../../indexer/db/db.js";
19
+ import { resolveReadSources } from "../../indexer/read-preflight.js";
19
20
  import { searchLocal } from "../../indexer/search/db-search.js";
20
- import { resolveSourceEntries } from "../../indexer/search/search-source.js";
21
21
  import { getCurrentWorkflowScopeKey } from "../../workflows/authoring/scope-key.js";
22
22
  // Eagerly import source providers to trigger self-registration before the
23
23
  // indexer or path-resolution code runs.
24
24
  import "../../sources/providers/index.js";
25
25
  import { insertUsageEvent } from "../../indexer/usage/usage-events.js";
26
- import { withIndexDb } from "../../storage/repositories/index-db.js";
26
+ import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
27
27
  import { searchRegistry } from "./registry-search.js";
28
28
  const DEFAULT_LIMIT = 20;
29
29
  export async function akmSearch(input) {
@@ -57,7 +57,7 @@ export async function akmSearch(input) {
57
57
  else {
58
58
  source = parsedSource;
59
59
  }
60
- let allSources = resolveSourceEntries(undefined, config);
60
+ let allSources = resolveReadSources(undefined, config).sources;
61
61
  // When a named source was requested, narrow the sources list to just that entry.
62
62
  // `resolveSourceEntries` sets `registryId` to `entry.name` for each config source.
63
63
  if (namedSourceName !== undefined) {
@@ -227,6 +227,9 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
227
227
  metadata: { query, hitCount: stashHits.length, resultRefs: allResultRefs, mode },
228
228
  });
229
229
  try {
230
+ // Short busy timeout: telemetry must never stall the search result behind
231
+ // a background reindex holding the index.db write lock (30s default wait).
232
+ // Under contention these usage hints are skipped, not waited for.
230
233
  withIndexDb((db) => {
231
234
  const resolved = resolveEntryIds(db, stashHits.slice(0, 50));
232
235
  for (const { entryId, ref } of resolved) {
@@ -240,7 +243,11 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
240
243
  }
241
244
  // Bump utility scores for all resolved entries (MemRL retrieval signal).
242
245
  // The indexer overwrites these at next reindex; bumps are temporary hints.
243
- const resolvedIds = resolved.map((r) => r.entryId).filter((id) => id !== undefined);
246
+ // Gated to user-sourced events: pipeline searches (improve probes, task
247
+ // runner) must not feed the utility signal (meta-review 05 DRIFT-6 —
248
+ // the bump previously fired unconditionally, so even correctly-tagged
249
+ // machine traffic inflated utility).
250
+ const resolvedIds = eventSource === "user" ? resolved.map((r) => r.entryId).filter((id) => id !== undefined) : [];
244
251
  if (resolvedIds.length > 0) {
245
252
  let scopeKey;
246
253
  try {
@@ -269,7 +276,7 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
269
276
  }),
270
277
  source: eventSource,
271
278
  });
272
- });
279
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
273
280
  }
274
281
  catch (err) {
275
282
  rethrowIfTestIsolationError(err);
@@ -28,10 +28,10 @@ import { NotFoundError, rethrowIfTestIsolationError, UsageError } from "../../co
28
28
  import { appendEvent, readEvents } from "../../core/events.js";
29
29
  import { closeDatabase, computeBodyHash, findEntryIdByRef, openExistingDatabase } from "../../indexer/db/db.js";
30
30
  import { hasGraphData } from "../../indexer/db/graph-db.js";
31
- import { ensureIndex } from "../../indexer/ensure-index.js";
32
31
  import { listRelatedPathsForFile } from "../../indexer/graph/graph-boost.js";
33
32
  import { extractGraphForSingleFile } from "../../indexer/graph/graph-extraction.js";
34
33
  import { lookup } from "../../indexer/indexer.js";
34
+ import { ensurePrimaryIndexForRead, resolveReadSources } from "../../indexer/read-preflight.js";
35
35
  import { buildEditHint, findSourceForPath, isEditable, resolveSourceEntries } from "../../indexer/search/search-source.js";
36
36
  import { insertUsageEvent } from "../../indexer/usage/usage-events.js";
37
37
  import { buildFileContext, buildRenderContext, getRenderer, runMatchers } from "../../indexer/walk/file-context.js";
@@ -39,7 +39,7 @@ import { resolveAssetPath } from "../../indexer/walk/path-resolver.js";
39
39
  import { resolveIndexPassLLM } from "../../llm/index-passes.js";
40
40
  import { resolveSourcesForOrigin } from "../../registry/origin-resolve.js";
41
41
  import { resolveStorageLocations } from "../../storage/locations.js";
42
- import { withIndexDb } from "../../storage/repositories/index-db.js";
42
+ import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
43
43
  // Eagerly import source providers to trigger self-registration.
44
44
  import "../../sources/providers/index.js";
45
45
  import { getCurrentWorkflowScopeKey } from "../../workflows/authoring/scope-key.js";
@@ -147,10 +147,8 @@ export async function akmShowUnified(input) {
147
147
  }
148
148
  }
149
149
  // Auto-index when stale so the index is current before lookup.
150
- const allSources = resolveSourceEntries();
151
- if (allSources.length > 0) {
152
- await ensureIndex(allSources[0].path);
153
- }
150
+ const { primarySource } = resolveReadSources();
151
+ await ensurePrimaryIndexForRead(primarySource);
154
152
  // Try local filesystem (FTS5 index lookup)
155
153
  const result = await showLocal(input);
156
154
  // Scope filter narrows resolution: if --scope was supplied, the asset's
@@ -301,7 +299,7 @@ function logShowEvent(ref, eventSource = "user") {
301
299
  entry_id: findEntryIdByRef(db, ref),
302
300
  source: eventSource,
303
301
  });
304
- });
302
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
305
303
  }
306
304
  catch (err) {
307
305
  rethrowIfTestIsolationError(err);
@@ -431,7 +429,7 @@ async function maybeExtractGraphInline(config, sourceStashDir, assetPath) {
431
429
  }
432
430
  withIndexDb((db) => {
433
431
  alreadyGraphed = hasGraphData(db, sourceStashDir, assetPath);
434
- });
432
+ }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
435
433
  if (alreadyGraphed)
436
434
  return;
437
435
  // Open the db for the async extraction ourselves: `withIndexDb` is
@@ -3,8 +3,8 @@
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
4
  import fs from "node:fs";
5
5
  import path from "node:path";
6
- import * as p from "@clack/prompts";
7
6
  import { defineCommand } from "citty";
7
+ import * as p from "../../cli/clack.js";
8
8
  import { output, runWithJsonErrors } from "../../cli/shared.js";
9
9
  import { UsageError } from "../../core/errors.js";
10
10
  import { appendEvent } from "../../core/events.js";
@@ -55,7 +55,19 @@ function assertInitSandbox(stashDir, dirExplicitlyProvided) {
55
55
  function isUnderTestRunner() {
56
56
  return process.env.BUN_TEST === "1" || process.env.NODE_ENV === "test";
57
57
  }
58
+ // ── Test seam ────────────────────────────────────────────────────────────────
59
+ // Swap-and-restore override. Inert in production; only tests call the setter.
60
+ let akmInitOverride;
61
+ /** TEST-ONLY. Swap the implementation of `akmInit`; pass undefined to restore. */
62
+ export function _setAkmInitForTests(fake) {
63
+ akmInitOverride = fake;
64
+ }
58
65
  export async function akmInit(options) {
66
+ if (akmInitOverride)
67
+ return akmInitOverride(options);
68
+ return akmInitReal(options);
69
+ }
70
+ async function akmInitReal(options) {
59
71
  const dirExplicitlyProvided = options?.dir != null;
60
72
  const setDefault = options?.setDefault === true;
61
73
  const stashDir = options?.dir ? path.resolve(options.dir) : getDefaultStashDir();
@@ -25,8 +25,8 @@
25
25
  * SIGINT/SIGTERM handlers in a try/finally — left byte-for-byte untouched.
26
26
  */
27
27
  import path from "node:path";
28
- import * as p from "@clack/prompts";
29
28
  import { defineCommand } from "citty";
29
+ import * as p from "../../cli/clack.js";
30
30
  import { defineJsonCommand, output, runWithJsonErrors } from "../../cli/shared.js";
31
31
  import { assertFlatAssetName } from "../../core/asset/asset-create.js";
32
32
  import { isHttpUrl } from "../../core/common.js";