akm-cli 0.9.0-beta.52 → 0.9.0-beta.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/hints/cli-hints-full.md +6 -5
- package/dist/cli.js +0 -7
- package/dist/commands/env/env-cli.js +3 -2
- package/dist/commands/env/env.js +14 -67
- package/dist/commands/health/checks.js +28 -15
- package/dist/commands/health.js +68 -1
- package/dist/commands/improve/collapse-detector.js +419 -0
- package/dist/commands/improve/consolidate.js +72 -54
- package/dist/commands/improve/distill.js +79 -13
- package/dist/commands/improve/extract.js +13 -6
- package/dist/commands/improve/homeostatic.js +109 -79
- package/dist/commands/improve/improve-cli.js +67 -1
- package/dist/commands/improve/improve.js +10 -0
- package/dist/commands/improve/loop-stages.js +39 -1
- package/dist/commands/improve/outcome-loop.js +15 -3
- package/dist/commands/improve/preparation.js +17 -8
- package/dist/commands/improve/salience.js +49 -32
- package/dist/commands/read/curate.js +5 -9
- package/dist/commands/read/knowledge.js +4 -0
- package/dist/commands/read/search.js +5 -2
- package/dist/commands/read/show.js +3 -3
- package/dist/core/asset/asset-spec.js +3 -2
- package/dist/core/config/config-schema.js +39 -17
- package/dist/core/eval/rank-metrics.js +113 -0
- package/dist/core/state/migrations.js +56 -0
- package/dist/core/state-db.js +146 -19
- package/dist/indexer/ensure-index.js +33 -90
- package/dist/indexer/index-writer-lock.js +0 -11
- package/dist/indexer/index-written-assets.js +105 -0
- package/dist/indexer/passes/metadata.js +20 -0
- package/dist/indexer/search/db-search.js +29 -1
- package/dist/indexer/search/ranking-contributors.js +33 -1
- package/dist/indexer/search/ranking.js +66 -0
- package/dist/indexer/search/search-fields.js +6 -0
- package/dist/llm/feature-gate.js +6 -2
- package/dist/output/renderers.js +8 -13
- package/dist/output/shapes/helpers.js +0 -3
- package/dist/output/shapes/passthrough.js +1 -0
- package/dist/scripts/migrate-storage.js +152 -33
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +41 -18
- package/dist/storage/repositories/index-db.js +10 -1
- package/package.json +2 -4
|
@@ -33,6 +33,15 @@ export const WARM_START_CAP = 0.3;
|
|
|
33
33
|
* very-negative run can't send the score to −∞.
|
|
34
34
|
*/
|
|
35
35
|
export const OUTCOME_SCORE_MIN = -1.0;
|
|
36
|
+
/**
|
|
37
|
+
* Saturation ceiling: the maximum outcome_score. Biological RPE saturates —
|
|
38
|
+
* a fully predicted reward produces zero response, not an ever-growing one —
|
|
39
|
+
* so a long-lived popular asset must not accrue unbounded outcome mass that
|
|
40
|
+
* would dominate ranking once the outcome weight is enabled (analysis G2).
|
|
41
|
+
* 1.5 comfortably exceeds the max plausible single-cycle raw update while
|
|
42
|
+
* keeping the normalised outcomeSalience spread meaningful.
|
|
43
|
+
*/
|
|
44
|
+
export const OUTCOME_SCORE_MAX = 1.5;
|
|
36
45
|
/**
|
|
37
46
|
* Diversity floor: `outcomeSalience` for any asset is at least this fraction
|
|
38
47
|
* of the maximum observed `outcome_score` in the table, so rare-but-correct
|
|
@@ -107,9 +116,12 @@ export function updateAssetOutcome(db, inputs) {
|
|
|
107
116
|
// so the score tracks the moving signal, not the cumulative sum.
|
|
108
117
|
const rawUpdate = predictionError - penalty + valence;
|
|
109
118
|
const newScore = OUTCOME_EMA_ALPHA * rawUpdate + (1 - OUTCOME_EMA_ALPHA) * existing.outcome_score;
|
|
110
|
-
// Clip to [OUTCOME_SCORE_MIN,
|
|
111
|
-
//
|
|
112
|
-
|
|
119
|
+
// Clip to [OUTCOME_SCORE_MIN, OUTCOME_SCORE_MAX] — the ceiling is the RPE
|
|
120
|
+
// saturation analog (G2): without it, long-lived popular assets accumulate
|
|
121
|
+
// unbounded positive mass (live max was 3.13) and would dominate rank_score
|
|
122
|
+
// the moment the outcome weight is enabled. Stored legacy scores above the
|
|
123
|
+
// ceiling converge back under it on their next differential update.
|
|
124
|
+
outcomeScore = Math.min(OUTCOME_SCORE_MAX, Math.max(OUTCOME_SCORE_MIN, newScore));
|
|
113
125
|
// ── review_pressure (#613) ─────────────────────────────────────────────
|
|
114
126
|
// New negatives this cycle.
|
|
115
127
|
const newNegatives = Math.max(0, inputs.negativeFeedbackCount - existing.negative_feedback_count);
|
|
@@ -1145,23 +1145,32 @@ export async function runImprovePreparationStage(args) {
|
|
|
1145
1145
|
const proactiveAndRetrievalSet = new Set([...highRetrievalRefs, ...proactiveRefs].map((r) => r.ref));
|
|
1146
1146
|
try {
|
|
1147
1147
|
withStateDb((dbForHighSalience) => {
|
|
1148
|
-
|
|
1148
|
+
// Derive the cap from the resolved reflect limit (mirrors improve.ts's
|
|
1149
|
+
// options.limit resolution) so an unbounded whole-stash run does not
|
|
1150
|
+
// collapse the lane to exactly 1 ref via the bare `?? 10` fallback.
|
|
1151
|
+
const effectiveLimit = options.limit ?? improveProfile?.processes?.reflect?.limit ?? improveProfile.limit ?? 10;
|
|
1149
1152
|
const highSalienceCap = Math.max(1, Math.floor(effectiveLimit * 0.1));
|
|
1150
1153
|
// #632/#4 — session-capture telemetry (checkpoints) must never consume
|
|
1151
1154
|
// the scarce high-salience budget. Even with a content-scored row, these
|
|
1152
1155
|
// are pipeline bookkeeping, not assets worth reflecting/rewriting.
|
|
1153
1156
|
const candidates = noFeedbackCandidates.filter((r) => !proactiveAndRetrievalSet.has(r.ref) && !isSessionCaptureMemoryName(parseAssetRef(r.ref).name));
|
|
1157
|
+
// Collect ALL qualifying candidates, then take the top-N BY SCORE — the
|
|
1158
|
+
// previous first-N-in-scan-order break meant a higher-salience candidate
|
|
1159
|
+
// found later in the scan lost its slot to an earlier lower-scoring one.
|
|
1160
|
+
const qualifying = [];
|
|
1154
1161
|
for (const r of candidates) {
|
|
1155
|
-
if (highSalienceRefs.length >= highSalienceCap)
|
|
1156
|
-
break;
|
|
1157
1162
|
const row = getAssetSalience(dbForHighSalience, r.ref);
|
|
1158
1163
|
if (row &&
|
|
1159
1164
|
isContentEncodingRow(row, parseAssetRef(r.ref).type) &&
|
|
1160
1165
|
row.encoding_salience >= salienceThreshold &&
|
|
1161
1166
|
!lastReflectProposalTs.has(r.ref)) {
|
|
1162
|
-
|
|
1167
|
+
qualifying.push({ ref: r, score: row.encoding_salience });
|
|
1163
1168
|
}
|
|
1164
1169
|
}
|
|
1170
|
+
qualifying.sort((a, b) => b.score - a.score);
|
|
1171
|
+
for (const q of qualifying.slice(0, highSalienceCap)) {
|
|
1172
|
+
highSalienceRefs.push(q.ref);
|
|
1173
|
+
}
|
|
1165
1174
|
}, { path: eventsCtx?.dbPath });
|
|
1166
1175
|
}
|
|
1167
1176
|
catch (err) {
|
|
@@ -1392,11 +1401,11 @@ export async function runImprovePreparationStage(args) {
|
|
|
1392
1401
|
// so feedback refs get their genuine retrieval frequency, not a 0-floor fallback.
|
|
1393
1402
|
// outcomeSalienceByRef is populated by WS-2 above (or empty on first run).
|
|
1394
1403
|
//
|
|
1395
|
-
//
|
|
1396
|
-
//
|
|
1397
|
-
// improve.salience.outcomeWeightEnabled:
|
|
1404
|
+
// R1 loop closure: the outcome weight is ON by default (the G2 saturation
|
|
1405
|
+
// cap makes it safe). Operators opt out with
|
|
1406
|
+
// improve.salience.outcomeWeightEnabled: false in the config.
|
|
1398
1407
|
const salienceConfig = (options.config ?? loadConfig()).improve?.salience;
|
|
1399
|
-
const outcomeWeightEnabled = salienceConfig?.outcomeWeightEnabled
|
|
1408
|
+
const outcomeWeightEnabled = salienceConfig?.outcomeWeightEnabled !== false;
|
|
1400
1409
|
const salienceMap = new Map();
|
|
1401
1410
|
const nowForSalience = Date.now();
|
|
1402
1411
|
// #644 — preserve content-derived encoding scores across runs.
|
|
@@ -21,12 +21,12 @@
|
|
|
21
21
|
*
|
|
22
22
|
* `rankScore = (w_e·encoding + w_o·outcome + w_r·retrieval) × sizePenalty`, normalized [0,1].
|
|
23
23
|
*
|
|
24
|
-
* **WS-2 default-
|
|
25
|
-
* `w_o = 0.15` is
|
|
26
|
-
* (
|
|
27
|
-
*
|
|
28
|
-
* `
|
|
29
|
-
*
|
|
24
|
+
* **WS-2 default-ON (R1 loop closure):**
|
|
25
|
+
* `w_o = 0.15` is applied by default now that `outcome_score` saturates at
|
|
26
|
+
* `OUTCOME_SCORE_MAX` (G2). Operators can opt out via
|
|
27
|
+
* `improve.salience.outcomeWeightEnabled: false`, which restores the WS-1
|
|
28
|
+
* parity weights `w_e=0.30, w_r=0.70, w_o=0`. `outcomeSalience` is populated
|
|
29
|
+
* from `asset_outcome.outcome_score` regardless of the flag.
|
|
30
30
|
*
|
|
31
31
|
* ## Plasticity
|
|
32
32
|
*
|
|
@@ -52,16 +52,30 @@ import { WARM_START_CAP } from "./outcome-loop.js";
|
|
|
52
52
|
const DAY_MS = 86_400_000;
|
|
53
53
|
// ── Recency decay half-life (mirrors the proactive-maintenance prototype) ─────
|
|
54
54
|
const RECENCY_HALFLIFE_DAYS = 21;
|
|
55
|
+
// ── Recency-floor half-life (R4 — SHY-style continuous downscaling) ──────────
|
|
56
|
+
//
|
|
57
|
+
// The recency floor itself decays on this (much longer) half-life so an
|
|
58
|
+
// unreviewed-forever asset keeps drifting down instead of parking at the 0.1
|
|
59
|
+
// floor. This replaces the deleted homeostatic demotion pass (which was
|
|
60
|
+
// default-off and self-undoing — every salience recompute clobbered it);
|
|
61
|
+
// folding the decay into the always-applied recency term makes it persist by
|
|
62
|
+
// construction. At 180 days the floor halves; a 1-year-stale asset sits at
|
|
63
|
+
// ~0.025 instead of 0.1.
|
|
64
|
+
const RECENCY_FLOOR_HALFLIFE_DAYS = 180;
|
|
65
|
+
// Absolute epsilon under the decaying floor. Keeps the frequency term ordinal
|
|
66
|
+
// for assets whose last-use timestamp is unknown (utility_scores has no
|
|
67
|
+
// last_used_at) — without it their retrieval salience collapses to exactly 0
|
|
68
|
+
// and frequency ordering is lost for maintenance selection.
|
|
69
|
+
const RECENCY_EPSILON = 0.01;
|
|
55
70
|
// ── Size proxy floor (avoids log10(0)) ────────────────────────────────────────
|
|
56
71
|
const SIZE_FLOOR_BYTES = 200;
|
|
57
72
|
// ── Projection weights ────────────────────────────────────────────────────────
|
|
58
73
|
//
|
|
59
|
-
// These constants
|
|
60
|
-
//
|
|
61
|
-
//
|
|
62
|
-
// the Part-V T0 baseline (scripts/akm-eval + health report).
|
|
74
|
+
// These constants are the DEFAULT ranking weights (R1 loop closure). Operators
|
|
75
|
+
// can opt back out to the WS-1 parity weights (w_e=0.30, w_r=0.70, w_o=0) via
|
|
76
|
+
// `improve.salience.outcomeWeightEnabled: false`.
|
|
63
77
|
//
|
|
64
|
-
// WS-2
|
|
78
|
+
// WS-2 split (w_e=0.25, w_o=0.15, w_r=0.60, sum = 1.0):
|
|
65
79
|
// [exp] Expert recommendation: encoding should be moderate so a type-importance
|
|
66
80
|
// stub does not completely dominate; retrieval should be strong since it directly
|
|
67
81
|
// measures use; outcome provides a quality signal proportional to usefulness.
|
|
@@ -78,10 +92,10 @@ if (Math.abs(W_ENCODING + W_OUTCOME + W_RETRIEVAL - 1.0) > 1e-9) {
|
|
|
78
92
|
}
|
|
79
93
|
// ── WS-1 parity weights ───────────────────────────────────────────────────────
|
|
80
94
|
//
|
|
81
|
-
// These constants reflect the
|
|
82
|
-
//
|
|
95
|
+
// These constants reflect the WS-1 parity weights used when the operator
|
|
96
|
+
// explicitly opts out (`outcomeWeightEnabled: false`). They preserve the
|
|
83
97
|
// WS-1 two-way split (w_e=0.30, w_r=0.70) with w_o=0 so outcome does not
|
|
84
|
-
// affect rankScore
|
|
98
|
+
// affect rankScore in the opt-out mode.
|
|
85
99
|
//
|
|
86
100
|
// Named here (rather than inline literals in the else branch) so a future
|
|
87
101
|
// re-tune has a single source of truth and the sum-to-1 guard below catches
|
|
@@ -155,15 +169,19 @@ export function computeSalience(inputs) {
|
|
|
155
169
|
//
|
|
156
170
|
// Formula: log(1 + freq) × recencyDecay
|
|
157
171
|
// log(1+freq): sub-linear frequency term (same as proactive-maintenance prototype).
|
|
158
|
-
// recencyDecay: 0.1 + 0.5^(useAgeDays/
|
|
159
|
-
//
|
|
172
|
+
// recencyDecay: max(ε, 0.1·0.5^(useAgeDays/180) + 0.5^(useAgeDays/21)) —
|
|
173
|
+
// the fast term halves every 21 days; the 0.1 floor itself halves every
|
|
174
|
+
// 180 days (R4: SHY-style continuous downscaling — an unreviewed-forever
|
|
175
|
+
// asset keeps drifting down instead of parking at the floor). The ε=0.01
|
|
176
|
+
// epsilon keeps the frequency term ordinal for unknown-last-use assets.
|
|
177
|
+
// lastUseMs=0/undefined → useAgeDays=9999 → recencyDecay=ε.
|
|
160
178
|
//
|
|
161
179
|
// The recency term is MANDATORY (plan requirement §WS-1 step 2). Without it
|
|
162
|
-
// retrievalSalience degenerates to a non-decaying frequency count
|
|
163
|
-
//
|
|
180
|
+
// retrievalSalience degenerates to a non-decaying frequency count. This
|
|
181
|
+
// always-applied decay replaces the deleted homeostatic demotion pass.
|
|
164
182
|
const lastUseMs = inputs.lastUseMs ?? 0;
|
|
165
183
|
const useAgeDays = lastUseMs > 0 ? (now - lastUseMs) / DAY_MS : 9999;
|
|
166
|
-
const recencyDecay = 0.1 + 0.5 ** (useAgeDays / RECENCY_HALFLIFE_DAYS);
|
|
184
|
+
const recencyDecay = Math.max(RECENCY_EPSILON, 0.1 * 0.5 ** (useAgeDays / RECENCY_FLOOR_HALFLIFE_DAYS) + 0.5 ** (useAgeDays / RECENCY_HALFLIFE_DAYS));
|
|
167
185
|
const rawRetrieval = Math.log(1 + inputs.retrievalFreq) * recencyDecay;
|
|
168
186
|
// ── Size penalty ─────────────────────────────────────────────────────────────
|
|
169
187
|
// 1/log10(size): larger assets are slightly deprioritized (same as proactive prototype).
|
|
@@ -184,29 +202,28 @@ export function computeSalience(inputs) {
|
|
|
184
202
|
// which asymptotes to 1 and equals 0.5 at rawRetrieval=1. This is the same
|
|
185
203
|
// formula used for MemRL utility updates.
|
|
186
204
|
const retrieval = rawRetrieval / (rawRetrieval + 1);
|
|
187
|
-
// ── Weight selection (
|
|
188
|
-
//
|
|
189
|
-
// When `outcomeWeightEnabled` is false/absent (default): use WS-1 parity
|
|
190
|
-
// weights (w_e=0.30, w_r=0.70, w_o=0) so ranking is unchanged from the WS-1
|
|
191
|
-
// baseline. The `outcome` sub-score is still computed and stored in the
|
|
192
|
-
// salience vector for observability, but it does not affect rankScore.
|
|
205
|
+
// ── Weight selection (R1 — outcome loop closed by default) ───────────────
|
|
193
206
|
//
|
|
194
|
-
// When `outcomeWeightEnabled` is true (
|
|
195
|
-
// use WS-2 weights (w_e=0.25, w_o=0.15, w_r=0.60)
|
|
207
|
+
// When `outcomeWeightEnabled` is true/absent (DEFAULT ON since the G2
|
|
208
|
+
// saturation cap landed): use WS-2 weights (w_e=0.25, w_o=0.15, w_r=0.60)
|
|
209
|
+
// so the prediction-error outcome signal actually shapes rankScore — this
|
|
210
|
+
// is the R1 loop-closure from docs/design/improve-self-learning-analysis.md.
|
|
196
211
|
//
|
|
197
|
-
//
|
|
198
|
-
//
|
|
212
|
+
// When `outcomeWeightEnabled` is explicitly false (operator opt-out via
|
|
213
|
+
// `improve.salience.outcomeWeightEnabled: false`): fall back to the WS-1
|
|
214
|
+
// parity weights (w_e=0.30, w_r=0.70, w_o=0). The `outcome` sub-score is
|
|
215
|
+
// still computed and stored for observability in that mode.
|
|
199
216
|
let we;
|
|
200
217
|
let wo;
|
|
201
218
|
let wr;
|
|
202
|
-
if (inputs.outcomeWeightEnabled
|
|
203
|
-
// WS-2 active: three-way split
|
|
219
|
+
if (inputs.outcomeWeightEnabled !== false) {
|
|
220
|
+
// WS-2 active (default): three-way split.
|
|
204
221
|
we = W_ENCODING; // 0.25
|
|
205
222
|
wo = W_OUTCOME; // 0.15
|
|
206
223
|
wr = W_RETRIEVAL; // 0.60
|
|
207
224
|
}
|
|
208
225
|
else {
|
|
209
|
-
// WS-1 parity (
|
|
226
|
+
// WS-1 parity (opt-out): w_o=0, redistribute to WS-1 proportions.
|
|
210
227
|
// Original WS-1 split was w_e=0.30, w_r=0.70.
|
|
211
228
|
we = W_ENCODING_PARITY;
|
|
212
229
|
wo = W_OUTCOME_PARITY;
|
|
@@ -19,12 +19,12 @@ import { parseFrontmatter } from "../../core/asset/frontmatter.js";
|
|
|
19
19
|
import { getIndexPassConfig, loadConfig } from "../../core/config/config.js";
|
|
20
20
|
import { rethrowIfTestIsolationError, UsageError } from "../../core/errors.js";
|
|
21
21
|
import { appendEvent } from "../../core/events.js";
|
|
22
|
-
import {
|
|
22
|
+
import { computeBodyHash } from "../../indexer/db/db.js";
|
|
23
23
|
import { enqueueGraphExtraction, hasGraphData } from "../../indexer/db/graph-db.js";
|
|
24
24
|
import { findSourceForPath, resolveSourceEntries } from "../../indexer/search/search-source.js";
|
|
25
25
|
import { insertUsageEvent } from "../../indexer/usage/usage-events.js";
|
|
26
26
|
import { truncateDescription } from "../../output/shapes.js";
|
|
27
|
-
import { withIndexDb } from "../../storage/repositories/index-db.js";
|
|
27
|
+
import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
|
|
28
28
|
import { akmSearch, parseSearchSource } from "./search.js";
|
|
29
29
|
import { akmShowUnified } from "./show.js";
|
|
30
30
|
const CURATE_FALLBACK_FILTER_WORDS = new Set([
|
|
@@ -65,8 +65,7 @@ function logCurateEvent(query, result) {
|
|
|
65
65
|
metadata: { query, itemCount: result.items.length, itemRefs },
|
|
66
66
|
});
|
|
67
67
|
try {
|
|
68
|
-
|
|
69
|
-
try {
|
|
68
|
+
withIndexDb((db) => {
|
|
70
69
|
insertUsageEvent(db, {
|
|
71
70
|
event_type: "curate",
|
|
72
71
|
query,
|
|
@@ -86,10 +85,7 @@ function logCurateEvent(query, result) {
|
|
|
86
85
|
source: "user",
|
|
87
86
|
});
|
|
88
87
|
}
|
|
89
|
-
}
|
|
90
|
-
finally {
|
|
91
|
-
closeDatabase(db);
|
|
92
|
-
}
|
|
88
|
+
}, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
|
|
93
89
|
}
|
|
94
90
|
catch (err) {
|
|
95
91
|
rethrowIfTestIsolationError(err);
|
|
@@ -207,7 +203,7 @@ function maybeEnqueueLazyGraph(assetPath) {
|
|
|
207
203
|
if (!hasGraphData(db, stashRoot, assetPath)) {
|
|
208
204
|
enqueueGraphExtraction(db, stashRoot, assetPath, bodyHash, 0);
|
|
209
205
|
}
|
|
210
|
-
});
|
|
206
|
+
}, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
|
|
211
207
|
}
|
|
212
208
|
catch (err) {
|
|
213
209
|
rethrowIfTestIsolationError(err);
|
|
@@ -16,6 +16,7 @@ import { isHttpUrl, isWithin, tryReadStdinText } from "../../core/common.js";
|
|
|
16
16
|
import { loadConfig } from "../../core/config/config.js";
|
|
17
17
|
import { UsageError } from "../../core/errors.js";
|
|
18
18
|
import { commitWriteTargetBoundary, formatRefForMessage, resolveWriteTarget, writeAssetToSource, } from "../../core/write-source.js";
|
|
19
|
+
import { indexWrittenAssets } from "../../indexer/index-written-assets.js";
|
|
19
20
|
import { fetchWebsiteMarkdownSnapshot, shouldAllowPrivateWebsiteUrlForTests } from "../../sources/website-ingest.js";
|
|
20
21
|
const MAX_CAPTURED_ASSET_SLUG_LENGTH = 64;
|
|
21
22
|
// ── Asset-name normalisation ─────────────────────────────────────────────────
|
|
@@ -144,6 +145,9 @@ export async function writeMarkdownAsset(options) {
|
|
|
144
145
|
// 0.9.0 (issue #507): single batch commit at the write boundary for git
|
|
145
146
|
// targets. No-op for filesystem/primary-stash targets.
|
|
146
147
|
commitWriteTargetBoundary(target, `Update ${formatRefForMessage(ref)}`);
|
|
148
|
+
// Write-path indexing: the asset is searchable immediately. Fail-open; reads
|
|
149
|
+
// no longer trigger reindexes, so keeping the index current is the writer's job.
|
|
150
|
+
await indexWrittenAssets(source.path, [result.path]);
|
|
147
151
|
return {
|
|
148
152
|
ref: result.ref,
|
|
149
153
|
path: result.path,
|
|
@@ -23,7 +23,7 @@ import { getCurrentWorkflowScopeKey } from "../../workflows/authoring/scope-key.
|
|
|
23
23
|
// indexer or path-resolution code runs.
|
|
24
24
|
import "../../sources/providers/index.js";
|
|
25
25
|
import { insertUsageEvent } from "../../indexer/usage/usage-events.js";
|
|
26
|
-
import { withIndexDb } from "../../storage/repositories/index-db.js";
|
|
26
|
+
import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
|
|
27
27
|
import { searchRegistry } from "./registry-search.js";
|
|
28
28
|
const DEFAULT_LIMIT = 20;
|
|
29
29
|
export async function akmSearch(input) {
|
|
@@ -227,6 +227,9 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
|
|
|
227
227
|
metadata: { query, hitCount: stashHits.length, resultRefs: allResultRefs, mode },
|
|
228
228
|
});
|
|
229
229
|
try {
|
|
230
|
+
// Short busy timeout: telemetry must never stall the search result behind
|
|
231
|
+
// a background reindex holding the index.db write lock (30s default wait).
|
|
232
|
+
// Under contention these usage hints are skipped, not waited for.
|
|
230
233
|
withIndexDb((db) => {
|
|
231
234
|
const resolved = resolveEntryIds(db, stashHits.slice(0, 50));
|
|
232
235
|
for (const { entryId, ref } of resolved) {
|
|
@@ -269,7 +272,7 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
|
|
|
269
272
|
}),
|
|
270
273
|
source: eventSource,
|
|
271
274
|
});
|
|
272
|
-
});
|
|
275
|
+
}, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
|
|
273
276
|
}
|
|
274
277
|
catch (err) {
|
|
275
278
|
rethrowIfTestIsolationError(err);
|
|
@@ -39,7 +39,7 @@ import { resolveAssetPath } from "../../indexer/walk/path-resolver.js";
|
|
|
39
39
|
import { resolveIndexPassLLM } from "../../llm/index-passes.js";
|
|
40
40
|
import { resolveSourcesForOrigin } from "../../registry/origin-resolve.js";
|
|
41
41
|
import { resolveStorageLocations } from "../../storage/locations.js";
|
|
42
|
-
import { withIndexDb } from "../../storage/repositories/index-db.js";
|
|
42
|
+
import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
|
|
43
43
|
// Eagerly import source providers to trigger self-registration.
|
|
44
44
|
import "../../sources/providers/index.js";
|
|
45
45
|
import { getCurrentWorkflowScopeKey } from "../../workflows/authoring/scope-key.js";
|
|
@@ -301,7 +301,7 @@ function logShowEvent(ref, eventSource = "user") {
|
|
|
301
301
|
entry_id: findEntryIdByRef(db, ref),
|
|
302
302
|
source: eventSource,
|
|
303
303
|
});
|
|
304
|
-
});
|
|
304
|
+
}, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
|
|
305
305
|
}
|
|
306
306
|
catch (err) {
|
|
307
307
|
rethrowIfTestIsolationError(err);
|
|
@@ -431,7 +431,7 @@ async function maybeExtractGraphInline(config, sourceStashDir, assetPath) {
|
|
|
431
431
|
}
|
|
432
432
|
withIndexDb((db) => {
|
|
433
433
|
alreadyGraphed = hasGraphData(db, sourceStashDir, assetPath);
|
|
434
|
-
});
|
|
434
|
+
}, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
|
|
435
435
|
if (alreadyGraphed)
|
|
436
436
|
return;
|
|
437
437
|
// Open the db for the async extraction ourselves: `withIndexDb` is
|
|
@@ -69,8 +69,9 @@ const ASSET_SPECS_INTERNAL = {
|
|
|
69
69
|
script: { stashDir: "scripts", ...scriptSpec },
|
|
70
70
|
memory: { stashDir: "memories", ...markdownSpec },
|
|
71
71
|
// Environment assets — whole `.env` files sourced/injected wholesale. Replaced
|
|
72
|
-
// the deprecated `vault` type (removed in 0.9.0).
|
|
73
|
-
//
|
|
72
|
+
// the deprecated `vault` type (removed in 0.9.0). Only key NAMES are surfaced
|
|
73
|
+
// as metadata; values and comment text are never read for indexing (comments
|
|
74
|
+
// routinely contain commented-out credentials).
|
|
74
75
|
env: {
|
|
75
76
|
stashDir: "env",
|
|
76
77
|
isRelevantFile: (fileName) => fileName === ".env" || fileName.endsWith(".env"),
|
|
@@ -171,6 +171,9 @@ export const ImproveProcessConfigSchema = z
|
|
|
171
171
|
// byte-identically to today (the incrementalSince path is unaffected). Only
|
|
172
172
|
// meaningful on the `consolidate` process.
|
|
173
173
|
judgedCache: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
|
|
174
|
+
// Distill process: LLM-as-judge lesson quality gate. Default ON (R3);
|
|
175
|
+
// fail-open — judge failure/timeout/parse errors pass through. Set
|
|
176
|
+
// `enabled: false` on the distill process to opt out.
|
|
174
177
|
qualityGate: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
|
|
175
178
|
contradictionDetection: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
|
|
176
179
|
// Extract process config (only meaningful for extract process)
|
|
@@ -250,24 +253,13 @@ export const ImproveProcessConfigSchema = z
|
|
|
250
253
|
// once sufficient history accumulates; this value is only used on the very
|
|
251
254
|
// first run. Default 30 s. Only meaningful on the `consolidate` process.
|
|
252
255
|
p90ChunkSecondsDefault: z.number().finite().positive().optional(),
|
|
253
|
-
// WS-3b
|
|
254
|
-
//
|
|
255
|
-
//
|
|
256
|
-
// re-promotable on re-retrieval. Default OFF. Only meaningful on the
|
|
257
|
-
// `consolidate` process.
|
|
258
|
-
homeostaticDemotion: z
|
|
259
|
-
.object({
|
|
260
|
-
enabled: z.boolean().optional(),
|
|
261
|
-
// Minimum days since last retrieval to consider an asset stale (default 30).
|
|
262
|
-
staleDays: z.number().int().min(0).optional(),
|
|
263
|
-
// Demotion factor: multiply retrievalSalience by this when stale (default 0.5).
|
|
264
|
-
demotionFactor: z.number().min(0).max(1).optional(),
|
|
265
|
-
})
|
|
266
|
-
.passthrough()
|
|
267
|
-
.optional(),
|
|
256
|
+
// (WS-3b step 0a `homeostaticDemotion` was removed — R4. The key is
|
|
257
|
+
// tolerated via passthrough if an old config still carries it; continuous
|
|
258
|
+
// decay is now part of the always-applied salience recency term.)
|
|
268
259
|
// WS-3b: Schema-similarity gate (step 0b). At intake, if a new candidate's
|
|
269
260
|
// body embedding is within epsilon of an existing derived-layer lesson/knowledge
|
|
270
|
-
// node, mark it schema-consistent and lower its priority. Default
|
|
261
|
+
// node, mark it schema-consistent and lower its priority. Default ON for
|
|
262
|
+
// the `extract` process since R3 (fail-open; set `enabled: false` to opt out).
|
|
271
263
|
// Only meaningful on the `consolidate` and `extract` processes.
|
|
272
264
|
schemaSimilarity: z
|
|
273
265
|
.object({
|
|
@@ -297,13 +289,19 @@ export const ImproveProcessConfigSchema = z
|
|
|
297
289
|
// - maxGeneration: refuse to merge two assets both above this generation (default 2).
|
|
298
290
|
// - lexicalDiversityCheck: low n-gram diversity ⇒ raise merge threshold.
|
|
299
291
|
// - randomClusterFraction: occasional random (non-similar) cluster in pool (default 0.05).
|
|
300
|
-
//
|
|
292
|
+
// - mergeInformationFloor: measure that merges keep provenance + specificity
|
|
293
|
+
// (R5 §4.2; ADVISORY in v1 — counted, never refused).
|
|
294
|
+
// - minSpecificityRetention: distinct-token retention floor for merges (default 0.6).
|
|
295
|
+
// Default ON since R5 (opt out via enabled: false). Only meaningful on the
|
|
296
|
+
// `consolidate` process.
|
|
301
297
|
antiCollapse: z
|
|
302
298
|
.object({
|
|
303
299
|
enabled: z.boolean().optional(),
|
|
304
300
|
maxGeneration: z.number().int().min(1).optional(),
|
|
305
301
|
lexicalDiversityCheck: z.boolean().optional(),
|
|
306
302
|
randomClusterFraction: z.number().min(0).max(1).optional(),
|
|
303
|
+
mergeInformationFloor: z.boolean().optional(),
|
|
304
|
+
minSpecificityRetention: z.number().min(0).max(1).optional(),
|
|
307
305
|
})
|
|
308
306
|
.passthrough()
|
|
309
307
|
.optional(),
|
|
@@ -633,6 +631,29 @@ const ImproveSalienceSchema = z
|
|
|
633
631
|
replayBudget: z.number().int().min(0).optional(),
|
|
634
632
|
})
|
|
635
633
|
.passthrough();
|
|
634
|
+
// R5 — longitudinal collapse/churn detector (observe-only in v1; deterministic,
|
|
635
|
+
// fail-open, runs only on cycles where consolidate/recombine did work).
|
|
636
|
+
// Default ON; opt out via `improve.collapseDetector.enabled: false`.
|
|
637
|
+
// See docs/design/improve-collapse-churn-detector-design.md.
|
|
638
|
+
const ImproveCollapseDetectorSchema = z
|
|
639
|
+
.object({
|
|
640
|
+
enabled: z.boolean().optional(),
|
|
641
|
+
// Canary set size minted on first run (owner-approved 30–50 range; default 40).
|
|
642
|
+
canaryCount: z.number().int().min(3).max(200).optional(),
|
|
643
|
+
// Top-K cutoff for canary recall/nDCG (default 10).
|
|
644
|
+
k: z.number().int().min(1).max(100).optional(),
|
|
645
|
+
// Trend window in qualifying cycles (default 5).
|
|
646
|
+
windowCycles: z.number().int().min(2).max(50).optional(),
|
|
647
|
+
// Absolute mean-recall drop vs window median that fires collapse (default 0.15).
|
|
648
|
+
recallDropThreshold: z.number().min(0).max(1).optional(),
|
|
649
|
+
// distinct-content-ratio decline over the window that fires collapse (default 0.05).
|
|
650
|
+
entropyDropThreshold: z.number().min(0).max(1).optional(),
|
|
651
|
+
// Accepted-action volume over the window below which churn never fires (default 25).
|
|
652
|
+
churnMinAcceptedActions: z.number().int().min(1).optional(),
|
|
653
|
+
// improve_cycle_metrics retention (default 365 days, owner-approved).
|
|
654
|
+
retentionDays: z.number().int().min(1).optional(),
|
|
655
|
+
})
|
|
656
|
+
.passthrough();
|
|
636
657
|
export const ImproveConfigSchema = z
|
|
637
658
|
.object({
|
|
638
659
|
utilityDecay: ImproveUtilityDecaySchema.optional(),
|
|
@@ -640,6 +661,7 @@ export const ImproveConfigSchema = z
|
|
|
640
661
|
calibration: ImproveCalibrationSchema.optional(),
|
|
641
662
|
exploration: ImproveExplorationSchema.optional(),
|
|
642
663
|
salience: ImproveSalienceSchema.optional(),
|
|
664
|
+
collapseDetector: ImproveCollapseDetectorSchema.optional(),
|
|
643
665
|
})
|
|
644
666
|
.passthrough();
|
|
645
667
|
// ── Index / per-pass ────────────────────────────────────────────────────────
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
export const DEFAULT_CURATE_WEIGHTS = {
|
|
5
|
+
ndcg: 0.5,
|
|
6
|
+
recall: 0.2,
|
|
7
|
+
mrr: 0.1,
|
|
8
|
+
noBannedAboveRequired: 0.2,
|
|
9
|
+
};
|
|
10
|
+
/** nDCG@k with binary relevance: gain 1 for relevant refs, 0 otherwise. */
|
|
11
|
+
export function ndcgAtK(returned, relevant, k) {
|
|
12
|
+
const top = returned.slice(0, k);
|
|
13
|
+
let dcg = 0;
|
|
14
|
+
for (let i = 0; i < top.length; i++) {
|
|
15
|
+
if (relevant.has(top[i]))
|
|
16
|
+
dcg += 1 / Math.log2(i + 2);
|
|
17
|
+
}
|
|
18
|
+
const idealCount = Math.min(k, relevant.size);
|
|
19
|
+
let idcg = 0;
|
|
20
|
+
for (let i = 0; i < idealCount; i++)
|
|
21
|
+
idcg += 1 / Math.log2(i + 2);
|
|
22
|
+
return idcg === 0 ? 1 : dcg / idcg;
|
|
23
|
+
}
|
|
24
|
+
export function recallAtK(returned, relevant, k) {
|
|
25
|
+
if (relevant.size === 0)
|
|
26
|
+
return 1;
|
|
27
|
+
const top = new Set(returned.slice(0, k));
|
|
28
|
+
let hit = 0;
|
|
29
|
+
for (const r of relevant)
|
|
30
|
+
if (top.has(r))
|
|
31
|
+
hit += 1;
|
|
32
|
+
return hit / relevant.size;
|
|
33
|
+
}
|
|
34
|
+
export function mrr(returned, relevant) {
|
|
35
|
+
for (let i = 0; i < returned.length; i++) {
|
|
36
|
+
if (relevant.has(returned[i]))
|
|
37
|
+
return 1 / (i + 1);
|
|
38
|
+
}
|
|
39
|
+
return 0;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Leapfrog gate. A banned ref "leapfrogs" when it appears ABOVE at least one
|
|
43
|
+
* present relevant ref. Returns the fraction of present banned refs that do
|
|
44
|
+
* NOT leapfrog (1.0 when no banned ref is present, or none leapfrog), plus the
|
|
45
|
+
* raw violation count.
|
|
46
|
+
*/
|
|
47
|
+
export function noBannedAboveRequired(returned, relevant, banned) {
|
|
48
|
+
const rankOf = new Map();
|
|
49
|
+
returned.forEach((ref, i) => {
|
|
50
|
+
if (!rankOf.has(ref))
|
|
51
|
+
rankOf.set(ref, i);
|
|
52
|
+
});
|
|
53
|
+
const relevantRanks = returned.map((ref, i) => (relevant.has(ref) ? i : -1)).filter((i) => i >= 0);
|
|
54
|
+
if (relevantRanks.length === 0) {
|
|
55
|
+
// No relevant ref present to be leapfrogged — gate is vacuously satisfied.
|
|
56
|
+
return { score: 1, leapfrogCount: 0 };
|
|
57
|
+
}
|
|
58
|
+
const worstRelevantRank = Math.max(...relevantRanks);
|
|
59
|
+
const bannedPresent = returned.filter((ref) => banned.has(ref));
|
|
60
|
+
if (bannedPresent.length === 0)
|
|
61
|
+
return { score: 1, leapfrogCount: 0 };
|
|
62
|
+
let leapfrog = 0;
|
|
63
|
+
for (const b of bannedPresent) {
|
|
64
|
+
const rb = rankOf.get(b);
|
|
65
|
+
if (rb !== undefined && rb < worstRelevantRank)
|
|
66
|
+
leapfrog += 1;
|
|
67
|
+
}
|
|
68
|
+
return { score: 1 - leapfrog / bannedPresent.length, leapfrogCount: leapfrog };
|
|
69
|
+
}
|
|
70
|
+
/** Score a single curate result (ordered refs) against its judgment. */
|
|
71
|
+
export function scoreCurateCase(returned, judgment, weights = DEFAULT_CURATE_WEIGHTS) {
|
|
72
|
+
const k = judgment.limit;
|
|
73
|
+
const relevant = new Set(judgment.relevant);
|
|
74
|
+
const banned = new Set(judgment.banned);
|
|
75
|
+
const ndcg = ndcgAtK(returned, relevant, k);
|
|
76
|
+
const recall = recallAtK(returned, relevant, k);
|
|
77
|
+
const rr = mrr(returned, relevant);
|
|
78
|
+
const gate = noBannedAboveRequired(returned, relevant, banned);
|
|
79
|
+
const score = ndcg * weights.ndcg + recall * weights.recall + rr * weights.mrr + gate.score * weights.noBannedAboveRequired;
|
|
80
|
+
return {
|
|
81
|
+
ndcg,
|
|
82
|
+
recall,
|
|
83
|
+
mrr: rr,
|
|
84
|
+
noBannedAboveRequired: gate.score,
|
|
85
|
+
bannedLeapfrogCount: gate.leapfrogCount,
|
|
86
|
+
score,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
/** Aggregate per-case metrics into a suite summary. */
|
|
90
|
+
export function summarizeCurateMetrics(metrics) {
|
|
91
|
+
const n = metrics.length;
|
|
92
|
+
if (n === 0) {
|
|
93
|
+
return {
|
|
94
|
+
caseCount: 0,
|
|
95
|
+
meanScore: 0,
|
|
96
|
+
meanNdcg: 0,
|
|
97
|
+
meanRecall: 0,
|
|
98
|
+
meanMrr: 0,
|
|
99
|
+
meanNoBannedAboveRequired: 1,
|
|
100
|
+
totalBannedLeapfrog: 0,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
const sum = (sel) => metrics.reduce((a, m) => a + sel(m), 0);
|
|
104
|
+
return {
|
|
105
|
+
caseCount: n,
|
|
106
|
+
meanScore: sum((m) => m.score) / n,
|
|
107
|
+
meanNdcg: sum((m) => m.ndcg) / n,
|
|
108
|
+
meanRecall: sum((m) => m.recall) / n,
|
|
109
|
+
meanMrr: sum((m) => m.mrr) / n,
|
|
110
|
+
meanNoBannedAboveRequired: sum((m) => m.noBannedAboveRequired) / n,
|
|
111
|
+
totalBannedLeapfrog: sum((m) => m.bannedLeapfrogCount),
|
|
112
|
+
};
|
|
113
|
+
}
|
|
@@ -700,6 +700,62 @@ const MIGRATIONS = [
|
|
|
700
700
|
ALTER TABLE asset_salience ADD COLUMN encoding_source TEXT DEFAULT NULL;
|
|
701
701
|
`,
|
|
702
702
|
},
|
|
703
|
+
// ── Migration 016 — collapse/churn detector (R5) ─────────────────────────────
|
|
704
|
+
//
|
|
705
|
+
// Longitudinal store-health history for the improve pipeline
|
|
706
|
+
// (docs/design/improve-collapse-churn-detector-design.md).
|
|
707
|
+
//
|
|
708
|
+
// canary_queries — the fixed canary set, minted deterministically from the
|
|
709
|
+
// live stash on first detector run and NEVER auto-refreshed (silent
|
|
710
|
+
// re-baselining is how a slow collapse hides). `canary_set_id` groups one
|
|
711
|
+
// mint; deactivated sets keep their rows (active = 0) so historical cycle
|
|
712
|
+
// rows stay interpretable. Tens of rows; never purged.
|
|
713
|
+
//
|
|
714
|
+
// improve_cycle_metrics — one row per qualifying improve cycle (a run where
|
|
715
|
+
// consolidate processed ≥1 op or recombine evaluated ≥1 cluster). Every
|
|
716
|
+
// column is a scalar or a size-capped JSON blob (< 2 KB/row by
|
|
717
|
+
// construction — the result_json lesson applied). Retention: 365 days via
|
|
718
|
+
// purgeOldCycleMetrics. Trend queries drive the collapse/churn alert
|
|
719
|
+
// evaluation and the health advisory; `canary_set_id` scoping prevents
|
|
720
|
+
// comparing across canary re-mints.
|
|
721
|
+
{
|
|
722
|
+
id: "016-collapse-churn-detector",
|
|
723
|
+
up: `
|
|
724
|
+
CREATE TABLE IF NOT EXISTS canary_queries (
|
|
725
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
726
|
+
canary_set_id TEXT NOT NULL,
|
|
727
|
+
anchor_ref TEXT NOT NULL,
|
|
728
|
+
query TEXT NOT NULL,
|
|
729
|
+
source TEXT NOT NULL DEFAULT 'auto',
|
|
730
|
+
active INTEGER NOT NULL DEFAULT 1,
|
|
731
|
+
created_at TEXT NOT NULL
|
|
732
|
+
);
|
|
733
|
+
CREATE INDEX IF NOT EXISTS idx_canary_queries_active
|
|
734
|
+
ON canary_queries(active, canary_set_id);
|
|
735
|
+
|
|
736
|
+
CREATE TABLE IF NOT EXISTS improve_cycle_metrics (
|
|
737
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
738
|
+
run_id TEXT NOT NULL,
|
|
739
|
+
ts TEXT NOT NULL,
|
|
740
|
+
pass TEXT NOT NULL,
|
|
741
|
+
canary_set_id TEXT NOT NULL,
|
|
742
|
+
mean_recall REAL NOT NULL,
|
|
743
|
+
mean_ndcg REAL NOT NULL,
|
|
744
|
+
mean_mrr REAL NOT NULL,
|
|
745
|
+
canary_ranks_json TEXT NOT NULL,
|
|
746
|
+
store_total INTEGER NOT NULL,
|
|
747
|
+
store_by_type_json TEXT NOT NULL,
|
|
748
|
+
distinct_content_ratio REAL NOT NULL,
|
|
749
|
+
mean_bigram_diversity REAL NOT NULL,
|
|
750
|
+
over_generation_count INTEGER NOT NULL,
|
|
751
|
+
accepted_actions INTEGER NOT NULL,
|
|
752
|
+
merge_floor_violations INTEGER NOT NULL DEFAULT 0,
|
|
753
|
+
alerts_json TEXT NOT NULL DEFAULT '[]'
|
|
754
|
+
);
|
|
755
|
+
CREATE INDEX IF NOT EXISTS idx_improve_cycle_metrics_ts
|
|
756
|
+
ON improve_cycle_metrics(ts);
|
|
757
|
+
`,
|
|
758
|
+
},
|
|
703
759
|
];
|
|
704
760
|
/**
|
|
705
761
|
* Apply every pending migration in a single transaction per migration.
|