akm-cli 0.9.0-beta.53 → 0.9.0-beta.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/clack.js +56 -0
- package/dist/cli/confirm.js +1 -1
- package/dist/commands/health/html-report.js +33 -10
- package/dist/commands/health.js +154 -21
- package/dist/commands/improve/outcome-loop.js +18 -16
- package/dist/commands/improve/preparation.js +19 -3
- package/dist/commands/read/curate.js +4 -4
- package/dist/commands/read/search-cli.js +6 -4
- package/dist/commands/read/search.js +7 -3
- package/dist/commands/read/show.js +3 -5
- package/dist/commands/sources/add-cli.js +1 -1
- package/dist/commands/sources/init.js +12 -0
- package/dist/commands/sources/stash-cli.js +1 -1
- package/dist/commands/tasks/default-tasks.js +12 -0
- package/dist/core/config/config.js +12 -0
- package/dist/core/warn.js +21 -0
- package/dist/indexer/db/db.js +6 -0
- package/dist/indexer/ensure-index.js +3 -2
- package/dist/indexer/index-writer-lock.js +9 -0
- package/dist/indexer/indexer.js +16 -4
- package/dist/indexer/read-preflight.js +23 -0
- package/dist/indexer/walk/walker.js +21 -13
- package/dist/integrations/agent/detect.js +9 -0
- package/dist/integrations/agent/index.js +1 -1
- package/dist/llm/client.js +12 -0
- package/dist/llm/embedder.js +26 -2
- package/dist/llm/embedders/local.js +7 -1
- package/dist/scripts/migrate-storage.js +26 -2
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +5 -1
- package/dist/setup/detect.js +9 -0
- package/dist/setup/registry-stash-loader.js +12 -0
- package/dist/setup/setup.js +1 -1
- package/dist/tasks/backends/index.js +9 -0
- package/dist/tasks/runner.js +9 -0
- package/package.json +2 -2
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
import { cancel as realCancel, confirm as realConfirm, intro as realIntro, isCancel as realIsCancel, log as realLog, multiselect as realMultiselect, note as realNote, outro as realOutro, select as realSelect, spinner as realSpinner, text as realText, } from "@clack/prompts";
|
|
5
|
+
// ── Test seam ────────────────────────────────────────────────────────────────
|
|
6
|
+
// Swap-and-restore override. Inert in production; only tests call the setter
|
|
7
|
+
// (via tests/_helpers/seams.ts `overrideSeam`, never directly).
|
|
8
|
+
let clackFake;
|
|
9
|
+
/** TEST-ONLY. Swap the clack prompt surface; pass undefined to restore. */
|
|
10
|
+
export function _setClackForTests(fake) {
|
|
11
|
+
clackFake = fake;
|
|
12
|
+
}
|
|
13
|
+
const realFns = {
|
|
14
|
+
intro: realIntro,
|
|
15
|
+
outro: realOutro,
|
|
16
|
+
cancel: realCancel,
|
|
17
|
+
confirm: realConfirm,
|
|
18
|
+
select: realSelect,
|
|
19
|
+
multiselect: realMultiselect,
|
|
20
|
+
text: realText,
|
|
21
|
+
spinner: realSpinner,
|
|
22
|
+
note: realNote,
|
|
23
|
+
isCancel: realIsCancel,
|
|
24
|
+
};
|
|
25
|
+
/** Delegator with the real export's exact type; reads the fake at call time. */
|
|
26
|
+
function bind(name) {
|
|
27
|
+
return ((...args) => {
|
|
28
|
+
const impl = (clackFake?.[name] ?? realFns[name]);
|
|
29
|
+
return impl(...args);
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
function bindLog(name) {
|
|
33
|
+
return ((...args) => {
|
|
34
|
+
const impl = (clackFake?.log?.[name] ?? realLog[name]);
|
|
35
|
+
return impl(...args);
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
export const intro = bind("intro");
|
|
39
|
+
export const outro = bind("outro");
|
|
40
|
+
export const cancel = bind("cancel");
|
|
41
|
+
export const confirm = bind("confirm");
|
|
42
|
+
export const select = bind("select");
|
|
43
|
+
export const multiselect = bind("multiselect");
|
|
44
|
+
export const text = bind("text");
|
|
45
|
+
export const spinner = bind("spinner");
|
|
46
|
+
export const note = bind("note");
|
|
47
|
+
export const isCancel = bind("isCancel");
|
|
48
|
+
export const log = {
|
|
49
|
+
message: bindLog("message"),
|
|
50
|
+
info: bindLog("info"),
|
|
51
|
+
success: bindLog("success"),
|
|
52
|
+
step: bindLog("step"),
|
|
53
|
+
warn: bindLog("warn"),
|
|
54
|
+
warning: bindLog("warning"),
|
|
55
|
+
error: bindLog("error"),
|
|
56
|
+
};
|
package/dist/cli/confirm.js
CHANGED
|
@@ -35,8 +35,8 @@
|
|
|
35
35
|
* `--quiet` NEVER suppresses the confirmation prompt — it is safety-critical
|
|
36
36
|
* output. The auto-migration banner is similarly exempt from `--quiet`.
|
|
37
37
|
*/
|
|
38
|
-
import * as p from "@clack/prompts";
|
|
39
38
|
import { UsageError } from "../core/errors.js";
|
|
39
|
+
import * as p from "./clack.js";
|
|
40
40
|
/**
|
|
41
41
|
* Prompt the user to confirm a destructive action.
|
|
42
42
|
*
|
|
@@ -282,6 +282,7 @@ export function buildHealthHtmlReplacements(result, opts) {
|
|
|
282
282
|
};
|
|
283
283
|
const coverage = improve.coverage;
|
|
284
284
|
const degradation = improve.degradation;
|
|
285
|
+
const minting = improve.enrichmentMinting;
|
|
285
286
|
// #576: real per-stage LLM token/time accounting (replaces the GPU-time
|
|
286
287
|
// proxy). Optional-guarded so reports built from older health JSON without
|
|
287
288
|
// the aggregate still render.
|
|
@@ -557,7 +558,7 @@ export function buildHealthHtmlReplacements(result, opts) {
|
|
|
557
558
|
"Coverage rate",
|
|
558
559
|
pct(coverage.rate, 1),
|
|
559
560
|
"flat",
|
|
560
|
-
"
|
|
561
|
+
"Distinct accepted refs / total stash assets (denominator-fixed). Shows what fraction of the corpus has been touched.",
|
|
561
562
|
], [
|
|
562
563
|
"Eligible fraction",
|
|
563
564
|
pct(coverage.eligibleFraction, 1),
|
|
@@ -566,8 +567,22 @@ export function buildHealthHtmlReplacements(result, opts) {
|
|
|
566
567
|
], [
|
|
567
568
|
"Coverage accepted",
|
|
568
569
|
num(coverage.acceptedProposals),
|
|
569
|
-
"
|
|
570
|
-
"Total accepted proposals
|
|
570
|
+
"flat",
|
|
571
|
+
"Total accepted proposals in the window (raw volume — includes repeated rewrites of the same asset).",
|
|
572
|
+
], [
|
|
573
|
+
"Churn ratio",
|
|
574
|
+
Number.isFinite(coverage.churnRatio) ? num(coverage.churnRatio) : "—",
|
|
575
|
+
Number.isFinite(coverage.churnRatio) && coverage.churnRatio > 1.5 ? "down" : "flat",
|
|
576
|
+
"Accepted proposals / distinct refs touched. >1.5 = the loop is repeatedly rewriting the same assets (churn, not coverage).",
|
|
577
|
+
]);
|
|
578
|
+
}
|
|
579
|
+
// Enrichment-vs-minting policy rollup (reporting-only).
|
|
580
|
+
if (minting && Number.isFinite(minting.share)) {
|
|
581
|
+
summaryRows.push([
|
|
582
|
+
"Enrichment-lane minted share",
|
|
583
|
+
pct(minting.share, 1),
|
|
584
|
+
minting.share > 0.05 ? "down" : "flat",
|
|
585
|
+
`New assets minted by enrichment lanes / their accepted total (${minting.minted} minted vs ${minting.updated} updated). Enrichment lanes are ratified to edit existing assets only; WARN >5%, FAIL >15%.`,
|
|
571
586
|
]);
|
|
572
587
|
}
|
|
573
588
|
// WS-5: perf telemetry rows (only when at least one run reported telemetry).
|
|
@@ -606,18 +621,13 @@ export function buildHealthHtmlReplacements(result, opts) {
|
|
|
606
621
|
summaryRows.push([
|
|
607
622
|
"Corpus diversity (Gini)",
|
|
608
623
|
num(degradation.corpusCentroidDistance),
|
|
609
|
-
degradation.entrenchmentFlagged ? "down" : "flat",
|
|
610
|
-
"Gini coefficient of retrieval_salience for top-100 ranked assets.
|
|
624
|
+
degradation.entrenchmentFlagged || degradation.salienceUniformityFlagged ? "down" : "flat",
|
|
625
|
+
"Gini coefficient of retrieval_salience for top-100 ranked assets. Two-tailed: >0.35 = entrenchment risk; <0.08 = collapsed toward uniform (ranking no longer discriminates).",
|
|
611
626
|
], [
|
|
612
627
|
"Merge fidelity contradiction rate",
|
|
613
628
|
pct(degradation.mergeFidelityContradictionRate, 1),
|
|
614
629
|
"flat",
|
|
615
630
|
"Fraction of consolidated proposals that involved a contradiction, from consolidation result envelopes.",
|
|
616
|
-
], [
|
|
617
|
-
"High-generation fraction",
|
|
618
|
-
pct(degradation.highGenerationFraction, 1),
|
|
619
|
-
"flat",
|
|
620
|
-
"Fraction of assets with consecutive_no_ops >= 2 (proxy for high-generation assets in the salience table).",
|
|
621
631
|
]);
|
|
622
632
|
}
|
|
623
633
|
const summaryRowsHtml = summaryRows
|
|
@@ -726,6 +736,19 @@ export function buildHealthHtmlReplacements(result, opts) {
|
|
|
726
736
|
remedy: "akm health --format json | jq '.improve.degradation'",
|
|
727
737
|
});
|
|
728
738
|
}
|
|
739
|
+
// Low-tail companion: salience distribution collapsed toward uniform.
|
|
740
|
+
if (degradation?.salienceUniformityFlagged) {
|
|
741
|
+
pushItem({
|
|
742
|
+
key: "salience-uniformity-collapse",
|
|
743
|
+
prio: "P2",
|
|
744
|
+
cls: "warn",
|
|
745
|
+
title: "Salience distribution collapsed: retrieval_salience Gini < 0.08",
|
|
746
|
+
descHtml: "The top-100 salience scores are near-uniform (uniform baseline ≈ 0.1) — " +
|
|
747
|
+
"ranking currently carries little to no discrimination between assets. " +
|
|
748
|
+
`Corpus diversity proxy: ${esc(String(degradation.corpusCentroidDistance))}.`,
|
|
749
|
+
remedy: "akm health --format json | jq '.improve.degradation'",
|
|
750
|
+
});
|
|
751
|
+
}
|
|
729
752
|
// WS-5: over-budget consolidation advisory.
|
|
730
753
|
if (perf.overBudgetRuns > 0) {
|
|
731
754
|
pushItem({
|
package/dist/commands/health.js
CHANGED
|
@@ -14,6 +14,17 @@ import { getExecutionLogCandidates } from "../integrations/session-logs/index.js
|
|
|
14
14
|
import { LLM_USAGE_EVENT } from "../llm/usage-persist.js";
|
|
15
15
|
import { HEALTH_CHECKS } from "./health/checks.js";
|
|
16
16
|
import { gateDecisionsToSamples, summarizeCalibration } from "./improve/calibration.js";
|
|
17
|
+
/**
|
|
18
|
+
* Lanes ratified as ENRICHMENT-ONLY: they may propose edits to existing
|
|
19
|
+
* assets (metadata, relations, content refresh) but must not mint new ones.
|
|
20
|
+
* New-asset generation belongs to the signal-gated minting lanes
|
|
21
|
+
* (extract/distill/memory-inference/recombine).
|
|
22
|
+
*/
|
|
23
|
+
export const ENRICHMENT_LANES = ["proactive", "high-salience", "high-retrieval", "signal-delta"];
|
|
24
|
+
/** Minted share of enrichment-lane accepts that triggers a WARN advisory. */
|
|
25
|
+
export const ENRICHMENT_MINTED_WARN_SHARE = 0.05;
|
|
26
|
+
/** Minted share of enrichment-lane accepts that triggers a FAIL advisory. */
|
|
27
|
+
export const ENRICHMENT_MINTED_FAIL_SHARE = 0.15;
|
|
17
28
|
const DEFAULT_SINCE_MS = 24 * 60 * 60 * 1000;
|
|
18
29
|
const IMPROVE_COMPLETED_EVENT = "improve_completed";
|
|
19
30
|
const HEALTH_PROBE_EVENT = "health_probe";
|
|
@@ -173,6 +184,8 @@ function createUnknownImproveMetrics() {
|
|
|
173
184
|
rate: Number.NaN,
|
|
174
185
|
eligibleFraction: Number.NaN,
|
|
175
186
|
acceptedProposals: 0,
|
|
187
|
+
distinctRefs: 0,
|
|
188
|
+
churnRatio: Number.NaN,
|
|
176
189
|
totalAssets: 0,
|
|
177
190
|
},
|
|
178
191
|
};
|
|
@@ -1134,6 +1147,7 @@ function readCalibration(db, since, until) {
|
|
|
1134
1147
|
*/
|
|
1135
1148
|
function computeDenominatorFixedCoverage(db, totalAssets, eligibleAssets, since, until, stashDir) {
|
|
1136
1149
|
let acceptedProposals = 0;
|
|
1150
|
+
let distinctRefs = 0;
|
|
1137
1151
|
try {
|
|
1138
1152
|
const proposals = listStateProposals(db, {
|
|
1139
1153
|
status: "accepted",
|
|
@@ -1147,25 +1161,91 @@ function computeDenominatorFixedCoverage(db, totalAssets, eligibleAssets, since,
|
|
|
1147
1161
|
return true;
|
|
1148
1162
|
});
|
|
1149
1163
|
acceptedProposals = proposals.length;
|
|
1164
|
+
// Coverage counts DISTINCT refs: N accepted rewrites of one asset are
|
|
1165
|
+
// churn, not coverage. The raw proposal count is kept alongside so the
|
|
1166
|
+
// churn ratio (proposals ÷ distinct refs) stays visible.
|
|
1167
|
+
distinctRefs = new Set(proposals.map((p) => p.ref)).size;
|
|
1150
1168
|
}
|
|
1151
1169
|
catch {
|
|
1152
1170
|
// Fail open: table may not exist on older installs.
|
|
1153
1171
|
}
|
|
1172
|
+
const churnRatio = distinctRefs > 0 ? roundRate(acceptedProposals / distinctRefs) : Number.NaN;
|
|
1154
1173
|
if (totalAssets === 0) {
|
|
1155
1174
|
return {
|
|
1156
1175
|
rate: Number.NaN,
|
|
1157
1176
|
eligibleFraction: Number.NaN,
|
|
1158
1177
|
acceptedProposals,
|
|
1178
|
+
distinctRefs,
|
|
1179
|
+
churnRatio,
|
|
1159
1180
|
totalAssets: 0,
|
|
1160
1181
|
};
|
|
1161
1182
|
}
|
|
1162
1183
|
return {
|
|
1163
|
-
rate: roundRate(
|
|
1184
|
+
rate: roundRate(distinctRefs / totalAssets),
|
|
1164
1185
|
eligibleFraction: roundRate(eligibleAssets / totalAssets),
|
|
1165
1186
|
acceptedProposals,
|
|
1187
|
+
distinctRefs,
|
|
1188
|
+
churnRatio,
|
|
1166
1189
|
totalAssets,
|
|
1167
1190
|
};
|
|
1168
1191
|
}
|
|
1192
|
+
/**
|
|
1193
|
+
* Compute the enrichment-vs-minting rollup over the window's accepted,
|
|
1194
|
+
* lane-attributed proposals (reporting-only; see {@link EnrichmentMintingRollup}).
|
|
1195
|
+
*
|
|
1196
|
+
* SQL-side `json_extract` keeps the (potentially large) `backupContent` blobs
|
|
1197
|
+
* out of process memory. Pre-Phase-6C rows without an `eligibilitySource`
|
|
1198
|
+
* cannot be lane-classified and are excluded. Fails open (undefined) when the
|
|
1199
|
+
* proposals table is absent.
|
|
1200
|
+
*/
|
|
1201
|
+
export function computeEnrichmentMintingRollup(db, since, until) {
|
|
1202
|
+
try {
|
|
1203
|
+
const rows = db
|
|
1204
|
+
.prepare(`SELECT
|
|
1205
|
+
json_extract(metadata_json, '$.eligibilitySource') AS lane,
|
|
1206
|
+
CASE WHEN json_extract(metadata_json, '$.backupContent') IS NULL THEN 1 ELSE 0 END AS is_minted,
|
|
1207
|
+
COUNT(*) AS cnt
|
|
1208
|
+
FROM proposals
|
|
1209
|
+
WHERE status = 'accepted'
|
|
1210
|
+
AND updated_at >= ?
|
|
1211
|
+
AND (? IS NULL OR updated_at < ?)
|
|
1212
|
+
AND json_extract(metadata_json, '$.eligibilitySource') IS NOT NULL
|
|
1213
|
+
AND json_extract(metadata_json, '$.eligibilitySource') != ''
|
|
1214
|
+
GROUP BY lane, is_minted`)
|
|
1215
|
+
.all(since, until ?? null, until ?? null);
|
|
1216
|
+
if (rows.length === 0)
|
|
1217
|
+
return undefined;
|
|
1218
|
+
const byLane = {};
|
|
1219
|
+
for (const row of rows) {
|
|
1220
|
+
byLane[row.lane] ??= { minted: 0, updated: 0 };
|
|
1221
|
+
const entry = byLane[row.lane];
|
|
1222
|
+
if (row.is_minted === 1)
|
|
1223
|
+
entry.minted += row.cnt;
|
|
1224
|
+
else
|
|
1225
|
+
entry.updated += row.cnt;
|
|
1226
|
+
}
|
|
1227
|
+
let minted = 0;
|
|
1228
|
+
let updated = 0;
|
|
1229
|
+
for (const lane of ENRICHMENT_LANES) {
|
|
1230
|
+
const entry = byLane[lane];
|
|
1231
|
+
if (!entry)
|
|
1232
|
+
continue;
|
|
1233
|
+
minted += entry.minted;
|
|
1234
|
+
updated += entry.updated;
|
|
1235
|
+
}
|
|
1236
|
+
const decided = minted + updated;
|
|
1237
|
+
return {
|
|
1238
|
+
minted,
|
|
1239
|
+
updated,
|
|
1240
|
+
share: decided > 0 ? roundRate(minted / decided) : Number.NaN,
|
|
1241
|
+
byLane,
|
|
1242
|
+
};
|
|
1243
|
+
}
|
|
1244
|
+
catch {
|
|
1245
|
+
// Fail open: proposals table may not exist on older installs.
|
|
1246
|
+
return undefined;
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1169
1249
|
/**
|
|
1170
1250
|
* Compute WS-5 per-run degradation metrics (Part V §4).
|
|
1171
1251
|
*
|
|
@@ -1176,7 +1256,7 @@ function computeDenominatorFixedCoverage(db, totalAssets, eligibleAssets, since,
|
|
|
1176
1256
|
* @param since - Window start (ISO-8601).
|
|
1177
1257
|
* @param until - Window end (ISO-8601).
|
|
1178
1258
|
*/
|
|
1179
|
-
function computeDegradationMetrics(db, since, until) {
|
|
1259
|
+
export function computeDegradationMetrics(db, since, until) {
|
|
1180
1260
|
// (a) Corpus diversity — salience rank distribution of the top-100 assets.
|
|
1181
1261
|
// We use the Gini coefficient of retrieval_salience scores as an intra-corpus
|
|
1182
1262
|
// diversity proxy. A Gini close to 1 = highly concentrated (entrenched top
|
|
@@ -1184,6 +1264,7 @@ function computeDegradationMetrics(db, since, until) {
|
|
|
1184
1264
|
// consecutive-run centroid distance requires cross-run history not yet stored.
|
|
1185
1265
|
let corpusCentroidDistance = Number.NaN;
|
|
1186
1266
|
let entrenchmentFlagged;
|
|
1267
|
+
let salienceUniformityFlagged;
|
|
1187
1268
|
try {
|
|
1188
1269
|
const rows = db
|
|
1189
1270
|
.prepare(`SELECT retrieval_salience FROM asset_salience
|
|
@@ -1202,9 +1283,13 @@ function computeDegradationMetrics(db, since, until) {
|
|
|
1202
1283
|
// corpusCentroidDistance approximation: gini is "distance from uniform".
|
|
1203
1284
|
// Note: retrieval_salience values are in [0,1], so the max achievable Gini
|
|
1204
1285
|
// with this formula is ~0.5 (when one asset dominates and others are near 0).
|
|
1205
|
-
//
|
|
1286
|
+
// Two-tailed: >0.35 flags entrenchment (robustly above the ~0.1 uniform
|
|
1287
|
+
// baseline); <0.08 flags uniformity collapse — the distribution no longer
|
|
1288
|
+
// discriminates between assets (live 2026-07 value 0.040 sat unflagged
|
|
1289
|
+
// in this tail under the old one-tailed check).
|
|
1206
1290
|
corpusCentroidDistance = roundRate(gini);
|
|
1207
1291
|
entrenchmentFlagged = gini > 0.35;
|
|
1292
|
+
salienceUniformityFlagged = gini < 0.08;
|
|
1208
1293
|
}
|
|
1209
1294
|
}
|
|
1210
1295
|
catch {
|
|
@@ -1243,23 +1328,11 @@ function computeDegradationMetrics(db, since, until) {
|
|
|
1243
1328
|
catch {
|
|
1244
1329
|
// Fail open.
|
|
1245
1330
|
}
|
|
1246
|
-
// (c)
|
|
1247
|
-
//
|
|
1248
|
-
// (
|
|
1249
|
-
//
|
|
1250
|
-
//
|
|
1251
|
-
let highGenerationFraction = Number.NaN;
|
|
1252
|
-
try {
|
|
1253
|
-
const genRows = db.prepare("SELECT consecutive_no_ops FROM asset_salience").all();
|
|
1254
|
-
if (genRows.length > 0) {
|
|
1255
|
-
// Use consecutive_no_ops >= 2 as a proxy for "has been through merge cycles".
|
|
1256
|
-
const highGen = genRows.filter((r) => r.consecutive_no_ops >= 2).length;
|
|
1257
|
-
highGenerationFraction = roundRate(highGen / genRows.length);
|
|
1258
|
-
}
|
|
1259
|
-
}
|
|
1260
|
-
catch {
|
|
1261
|
-
// Table not present.
|
|
1262
|
-
}
|
|
1331
|
+
// (c) highGenerationFraction was DELETED (meta-review 05 DRIFT-3): it
|
|
1332
|
+
// approximated "LLM-merge generations" from consecutive_no_ops — which counts
|
|
1333
|
+
// the opposite condition (cycles where nothing was changed) — and its own
|
|
1334
|
+
// in-code TODO admitted the proxy. Display-only, never actionable; removed
|
|
1335
|
+
// rather than instrumented.
|
|
1263
1336
|
// (d) Oracle spot-check — up to 5 recently accepted proposals in the window.
|
|
1264
1337
|
const oracleSpotCheck = [];
|
|
1265
1338
|
try {
|
|
@@ -1287,8 +1360,8 @@ function computeDegradationMetrics(db, since, until) {
|
|
|
1287
1360
|
return {
|
|
1288
1361
|
corpusCentroidDistance,
|
|
1289
1362
|
entrenchmentFlagged,
|
|
1363
|
+
salienceUniformityFlagged,
|
|
1290
1364
|
mergeFidelityContradictionRate,
|
|
1291
|
-
highGenerationFraction,
|
|
1292
1365
|
oracleSpotCheck,
|
|
1293
1366
|
};
|
|
1294
1367
|
}
|
|
@@ -1447,6 +1520,7 @@ export function akmHealth(options = {}) {
|
|
|
1447
1520
|
if (degradationMain) {
|
|
1448
1521
|
improveSummary.degradation = degradationMain;
|
|
1449
1522
|
}
|
|
1523
|
+
improveSummary.enrichmentMinting = computeEnrichmentMintingRollup(db, since, until);
|
|
1450
1524
|
// WS-2 proxy-adequacy tripwire: surface any outcome_proxy_inverted events
|
|
1451
1525
|
// in the health window as an advisory so operators know when the 0.10+
|
|
1452
1526
|
// rich in-session signal is no longer deferrable.
|
|
@@ -1466,6 +1540,65 @@ export function akmHealth(options = {}) {
|
|
|
1466
1540
|
"The 0.10+ rich in-session outcome signal is no longer deferrable. See plan §WS-2.",
|
|
1467
1541
|
});
|
|
1468
1542
|
}
|
|
1543
|
+
// Two-tailed companion: a proxy that decays to noise (|corr| < 0.1 at scale)
|
|
1544
|
+
// is as much a failure as an inverted one — it just fails silently.
|
|
1545
|
+
const proxyDeadEvents = readEvents({ since, type: "outcome_proxy_dead" }, { dbPath: stateDbPath, db }).events;
|
|
1546
|
+
if (proxyDeadEvents.length > 0) {
|
|
1547
|
+
const lastEvent = proxyDeadEvents[proxyDeadEvents.length - 1];
|
|
1548
|
+
const correlation = typeof lastEvent.metadata?.correlation === "number" ? lastEvent.metadata.correlation.toFixed(3) : "unknown";
|
|
1549
|
+
advisories.push({
|
|
1550
|
+
name: "outcome-proxy-dead",
|
|
1551
|
+
status: "warn",
|
|
1552
|
+
kind: "deterministic",
|
|
1553
|
+
confidence: "high",
|
|
1554
|
+
message: `WS-2 outcome proxy is DEAD (${proxyDeadEvents.length} event(s) in window). ` +
|
|
1555
|
+
`|corr(outcome_score, accepted_change_rate)| = ${correlation} < 0.1 at n ≥ 500. ` +
|
|
1556
|
+
"outcome_score is statistically unrelated to improvement outcomes — " +
|
|
1557
|
+
"treat outcome-derived rank contributions as noise until a real usage/outcome signal lands.",
|
|
1558
|
+
});
|
|
1559
|
+
}
|
|
1560
|
+
// Salience-distribution collapse: Gini below the uniform baseline means
|
|
1561
|
+
// ranking no longer discriminates between assets.
|
|
1562
|
+
if (improveSummary.degradation?.salienceUniformityFlagged) {
|
|
1563
|
+
advisories.push({
|
|
1564
|
+
name: "salience-uniformity-collapse",
|
|
1565
|
+
status: "warn",
|
|
1566
|
+
kind: "deterministic",
|
|
1567
|
+
confidence: "high",
|
|
1568
|
+
message: `Salience distribution collapsed toward uniform: top-100 retrieval_salience Gini = ` +
|
|
1569
|
+
`${improveSummary.degradation.corpusCentroidDistance} < 0.08 (uniform baseline ≈ 0.1). ` +
|
|
1570
|
+
"Ranking currently carries little to no discrimination between assets.",
|
|
1571
|
+
});
|
|
1572
|
+
}
|
|
1573
|
+
// Enrichment-vs-minting policy: enrichment lanes edit existing assets;
|
|
1574
|
+
// a rising minted share means a lane is generating new content instead.
|
|
1575
|
+
const minting = improveSummary.enrichmentMinting;
|
|
1576
|
+
if (minting && Number.isFinite(minting.share) && minting.share > ENRICHMENT_MINTED_WARN_SHARE) {
|
|
1577
|
+
advisories.push({
|
|
1578
|
+
name: "enrichment-lane-minting",
|
|
1579
|
+
status: minting.share > ENRICHMENT_MINTED_FAIL_SHARE ? "fail" : "warn",
|
|
1580
|
+
kind: "deterministic",
|
|
1581
|
+
confidence: "high",
|
|
1582
|
+
message: `Enrichment lanes minted ${minting.minted} NEW asset(s) vs ${minting.updated} update(s) ` +
|
|
1583
|
+
`(${Math.round(minting.share * 100)}% minted, threshold ${Math.round(ENRICHMENT_MINTED_WARN_SHARE * 100)}%). ` +
|
|
1584
|
+
"Enrichment-classed lanes (proactive/high-salience/high-retrieval/signal-delta) are ratified to edit " +
|
|
1585
|
+
"existing assets only — new-asset generation belongs to the signal-gated minting lanes.",
|
|
1586
|
+
});
|
|
1587
|
+
}
|
|
1588
|
+
// Churn: accepted proposals far exceeding distinct touched refs means the
|
|
1589
|
+
// loop is repeatedly rewriting the same assets, not covering the corpus.
|
|
1590
|
+
if (Number.isFinite(improveSummary.coverage.churnRatio) && improveSummary.coverage.churnRatio > 1.5) {
|
|
1591
|
+
advisories.push({
|
|
1592
|
+
name: "improve-churn-ratio",
|
|
1593
|
+
status: "warn",
|
|
1594
|
+
kind: "deterministic",
|
|
1595
|
+
confidence: "high",
|
|
1596
|
+
message: `Improve churn ratio ${improveSummary.coverage.churnRatio} > 1.5: ` +
|
|
1597
|
+
`${improveSummary.coverage.acceptedProposals} accepted proposals touched only ` +
|
|
1598
|
+
`${improveSummary.coverage.distinctRefs} distinct assets in the window — ` +
|
|
1599
|
+
"repeated rewrites of the same refs count as churn, not coverage.",
|
|
1600
|
+
});
|
|
1601
|
+
}
|
|
1469
1602
|
// R5 collapse/churn detector: surface any collapse_detector_alert events
|
|
1470
1603
|
// in the health window, plus the latest cycle row's headline numbers so
|
|
1471
1604
|
// the operator can act without opening the DB. `unknown` when the detector
|
|
@@ -2,12 +2,6 @@
|
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
4
|
// ── Constants ─────────────────────────────────────────────────────────────────
|
|
5
|
-
/**
|
|
6
|
-
* Weight on the "retrieved-but-never-improved" penalty term. Setting this to
|
|
7
|
-
* 0 degrades to a pure prediction-error score (no quality filter); setting it
|
|
8
|
-
* to 1 heavily penalises assets whose retrievals never led to accepted changes.
|
|
9
|
-
*/
|
|
10
|
-
export const OUTCOME_PENALTY_WEIGHT = 0.3;
|
|
11
5
|
/**
|
|
12
6
|
* EMA decay factor for the expected-retrieval rolling mean (α).
|
|
13
7
|
* New expected = α × new_count + (1−α) × old_expected.
|
|
@@ -95,12 +89,8 @@ export function updateAssetOutcome(db, inputs) {
|
|
|
95
89
|
//
|
|
96
90
|
// retrieval_delta = current − stored (non-negative — we never go backwards)
|
|
97
91
|
const retrievalDelta = Math.max(0, inputs.currentRetrievalCount - existing.retrieval_count);
|
|
98
|
-
// accepted_change_rate = accepted_count / max(1, retrieval_count)
|
|
99
|
-
const acceptedChangeRate = inputs.acceptedChangeCount / Math.max(1, inputs.currentRetrievalCount);
|
|
100
92
|
// Differential prediction-error term:
|
|
101
|
-
// outcome = (retrieval_delta − expected_delta)
|
|
102
|
-
// − PENALTY × retrieval_delta × (1 − accepted_change_rate)
|
|
103
|
-
// + valence
|
|
93
|
+
// outcome = (retrieval_delta − expected_delta) + valence
|
|
104
94
|
//
|
|
105
95
|
// Prediction error is computed against the PRIOR stored EMA (before folding
|
|
106
96
|
// in this cycle's observation), so the current delta cannot leak into its own
|
|
@@ -111,10 +101,9 @@ export function updateAssetOutcome(db, inputs) {
|
|
|
111
101
|
// expected' = α × delta + (1−α) × prior_expected
|
|
112
102
|
expectedRetrievalRate =
|
|
113
103
|
OUTCOME_EMA_ALPHA * retrievalDelta + (1 - OUTCOME_EMA_ALPHA) * existing.expected_retrieval_rate;
|
|
114
|
-
const penalty = OUTCOME_PENALTY_WEIGHT * retrievalDelta * (1 - acceptedChangeRate);
|
|
115
104
|
// Running sum (EMA approach): new score = α × update + (1−α) × old
|
|
116
105
|
// so the score tracks the moving signal, not the cumulative sum.
|
|
117
|
-
const rawUpdate = predictionError
|
|
106
|
+
const rawUpdate = predictionError + valence;
|
|
118
107
|
const newScore = OUTCOME_EMA_ALPHA * rawUpdate + (1 - OUTCOME_EMA_ALPHA) * existing.outcome_score;
|
|
119
108
|
// Clip to [OUTCOME_SCORE_MIN, OUTCOME_SCORE_MAX] — the ceiling is the RPE
|
|
120
109
|
// saturation analog (G2): without it, long-lived popular assets accumulate
|
|
@@ -223,6 +212,17 @@ export function outcomeScoreToSalience(outcomeScore, maxScore) {
|
|
|
223
212
|
// Apply diversity floor.
|
|
224
213
|
return Math.max(DIVERSITY_FLOOR_FRACTION, normalised);
|
|
225
214
|
}
|
|
215
|
+
// ── Proxy-adequacy tripwire ───────────────────────────────────────────────────
|
|
216
|
+
/**
|
|
217
|
+
* Dead-proxy threshold: |corr| below this means outcome_score carries no
|
|
218
|
+
* information about improvement need (pure noise).
|
|
219
|
+
*/
|
|
220
|
+
export const PROXY_DEAD_CORR_THRESHOLD = 0.1;
|
|
221
|
+
/**
|
|
222
|
+
* Minimum sample size before the dead-proxy check fires. Below this, a
|
|
223
|
+
* near-zero correlation is indistinguishable from small-sample noise.
|
|
224
|
+
*/
|
|
225
|
+
export const PROXY_DEAD_MIN_N = 500;
|
|
226
226
|
/**
|
|
227
227
|
* Compute `corr(outcome_score, accepted_change_rate)` across all asset_outcome
|
|
228
228
|
* rows. Returns `{correlation: NaN, n, isInverted: false}` when there is
|
|
@@ -238,7 +238,7 @@ export function outcomeScoreToSalience(outcomeScore, maxScore) {
|
|
|
238
238
|
export function computeProxyAdequacy(rows) {
|
|
239
239
|
const n = rows.length;
|
|
240
240
|
if (n < 3)
|
|
241
|
-
return { correlation: Number.NaN, n, isInverted: false };
|
|
241
|
+
return { correlation: Number.NaN, n, isInverted: false, isDead: false };
|
|
242
242
|
// accepted_change_rate per row.
|
|
243
243
|
const xs = rows.map((r) => r.outcome_score);
|
|
244
244
|
const ys = rows.map((r) => r.accepted_change_count / Math.max(1, r.retrieval_count));
|
|
@@ -259,10 +259,12 @@ export function computeProxyAdequacy(rows) {
|
|
|
259
259
|
varY /= n;
|
|
260
260
|
const denom = Math.sqrt(varX) * Math.sqrt(varY);
|
|
261
261
|
if (denom < 1e-12)
|
|
262
|
-
return { correlation: Number.NaN, n, isInverted: false };
|
|
262
|
+
return { correlation: Number.NaN, n, isInverted: false, isDead: false };
|
|
263
263
|
const correlation = covXY / denom;
|
|
264
264
|
// Inverted proxy: negative correlation between outcome and accepted_change_rate
|
|
265
265
|
// means high-outcome assets are also high-need — the opposite of "useful".
|
|
266
266
|
const isInverted = correlation < -0.3;
|
|
267
|
-
|
|
267
|
+
// Dead proxy: near-zero correlation at scale — the score is noise.
|
|
268
|
+
const isDead = n >= PROXY_DEAD_MIN_N && Math.abs(correlation) < PROXY_DEAD_CORR_THRESHOLD;
|
|
269
|
+
return { correlation, n, isInverted, isDead };
|
|
268
270
|
}
|
|
@@ -27,7 +27,7 @@ import { computeValenceScore, FEEDBACK_WEIGHT, UTILITY_WEIGHT } from "./feedback
|
|
|
27
27
|
import { makeGateConfig, resolveExtractConfidence, runAutoAcceptGate } from "./improve-auto-accept.js";
|
|
28
28
|
import { resolveProcessEnabled } from "./improve-profiles.js";
|
|
29
29
|
import { applyMemoryCleanup } from "./memory/memory-improve.js";
|
|
30
|
-
import { computeProxyAdequacy, getAllAssetOutcomes, getOutcomeScoresByRef, outcomeScoreToSalience, updateAssetOutcome, } from "./outcome-loop.js";
|
|
30
|
+
import { computeProxyAdequacy, getAllAssetOutcomes, getOutcomeScoresByRef, OUTCOME_SCORE_MAX, outcomeScoreToSalience, updateAssetOutcome, } from "./outcome-loop.js";
|
|
31
31
|
import { DEFAULT_DUE_DAYS, DEFAULT_MAX_PER_RUN, selectProactiveMaintenanceRefs } from "./proactive-maintenance.js";
|
|
32
32
|
import { buildRankChangeReport, computeSalience, getAllRankScores, getAssetSalience, getConsecutiveNoOps, getLastUseMsByRef, isContentEncodingRow, SALIENCE_NO_OP_DAMPEN_FACTOR, SALIENCE_NO_OP_DAMPEN_THRESHOLD, upsertAssetSalience, } from "./salience.js";
|
|
33
33
|
// ── improve preparation stage ───────────────────────
|
|
@@ -1358,8 +1358,13 @@ export async function runImprovePreparationStage(args) {
|
|
|
1358
1358
|
if (row.outcome_score > maxOutcomeScore)
|
|
1359
1359
|
maxOutcomeScore = row.outcome_score;
|
|
1360
1360
|
}
|
|
1361
|
-
//
|
|
1362
|
-
//
|
|
1361
|
+
// Read-clip: legacy rows written before the OUTCOME_SCORE_MAX write-clip
|
|
1362
|
+
// existed can sit above the ceiling (live max was 3.13). Without this
|
|
1363
|
+
// clip they inflate the normalisation denominator and floor everyone
|
|
1364
|
+
// else's outcomeSalience (#691 follow-up).
|
|
1365
|
+
maxOutcomeScore = Math.min(maxOutcomeScore, OUTCOME_SCORE_MAX);
|
|
1366
|
+
// Proxy-adequacy tripwire (two-tailed): inverted (corr < −0.3) and
|
|
1367
|
+
// dead (|corr| < 0.1 at n ≥ 500) both emit health events.
|
|
1363
1368
|
const adequacy = computeProxyAdequacy(allOutcomes);
|
|
1364
1369
|
if (adequacy.isInverted) {
|
|
1365
1370
|
appendEvent({
|
|
@@ -1372,6 +1377,17 @@ export async function runImprovePreparationStage(args) {
|
|
|
1372
1377
|
},
|
|
1373
1378
|
}, eventsCtx);
|
|
1374
1379
|
}
|
|
1380
|
+
if (adequacy.isDead) {
|
|
1381
|
+
appendEvent({
|
|
1382
|
+
eventType: "outcome_proxy_dead",
|
|
1383
|
+
ref: undefined,
|
|
1384
|
+
metadata: {
|
|
1385
|
+
correlation: adequacy.correlation,
|
|
1386
|
+
n: adequacy.n,
|
|
1387
|
+
note: "|corr(outcome_score, accepted_change_rate)| < 0.1 at n ≥ 500: outcome_score is statistically unrelated to improvement outcomes — the proxy is noise, not signal. Rank contributions derived from it are not currently informative.",
|
|
1388
|
+
},
|
|
1389
|
+
}, eventsCtx);
|
|
1390
|
+
}
|
|
1375
1391
|
}
|
|
1376
1392
|
catch {
|
|
1377
1393
|
// best-effort: tripwire failure never blocks ranking
|
|
@@ -58,7 +58,7 @@ const CURATE_REFERENCE_QUERY_RE = /\b(?:reference|docs?|guide|how|explain|learn|
|
|
|
58
58
|
* Fire-and-forget: log a curate event to the usage_events table and events.jsonl.
|
|
59
59
|
* Never blocks the caller; errors are silently ignored.
|
|
60
60
|
*/
|
|
61
|
-
function logCurateEvent(query, result) {
|
|
61
|
+
function logCurateEvent(query, result, eventSource = "user") {
|
|
62
62
|
const itemRefs = result.items.map((item) => ("ref" in item ? item.ref : `registry:${item.id}`));
|
|
63
63
|
appendEvent({
|
|
64
64
|
eventType: "curate",
|
|
@@ -73,7 +73,7 @@ function logCurateEvent(query, result) {
|
|
|
73
73
|
itemCount: result.items.length,
|
|
74
74
|
itemRefs,
|
|
75
75
|
}),
|
|
76
|
-
source:
|
|
76
|
+
source: eventSource,
|
|
77
77
|
});
|
|
78
78
|
for (const item of result.items) {
|
|
79
79
|
if (!("ref" in item) || typeof item.ref !== "string")
|
|
@@ -82,7 +82,7 @@ function logCurateEvent(query, result) {
|
|
|
82
82
|
event_type: "curate",
|
|
83
83
|
query,
|
|
84
84
|
entry_ref: item.ref,
|
|
85
|
-
source:
|
|
85
|
+
source: eventSource,
|
|
86
86
|
});
|
|
87
87
|
}
|
|
88
88
|
}, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
|
|
@@ -106,7 +106,7 @@ export async function akmCurate(options) {
|
|
|
106
106
|
source,
|
|
107
107
|
}));
|
|
108
108
|
const result = await curateSearchResults(options.query, searchResponse, limit, options.type);
|
|
109
|
-
logCurateEvent(options.query, result);
|
|
109
|
+
logCurateEvent(options.query, result, options.eventSource);
|
|
110
110
|
return result;
|
|
111
111
|
}
|
|
112
112
|
export async function curateSearchResults(query, result, limit, selectedType) {
|
|
@@ -21,13 +21,15 @@ import { getHyphenatedBoolean, getOutputMode, parseFlagValue } from "../../outpu
|
|
|
21
21
|
import { akmCurate } from "./curate.js";
|
|
22
22
|
import { akmSearch, parseBeliefFilterMode, parseScopeFilterFlags, parseSearchSource } from "./search.js";
|
|
23
23
|
import { akmShowUnified } from "./show.js";
|
|
24
|
-
// AKM_EVENT_SOURCE attributes a query to a `user` invocation
|
|
25
|
-
// `improve` loop so the event log can distinguish
|
|
26
|
-
// treated as unset.
|
|
24
|
+
// AKM_EVENT_SOURCE attributes a query to a `user` invocation, the internal
|
|
25
|
+
// `improve` loop, or the `task` runner so the event log can distinguish
|
|
26
|
+
// genuine demand from machine traffic; any other value is treated as unset.
|
|
27
27
|
function resolveEventSource() {
|
|
28
28
|
const raw = process.env.AKM_EVENT_SOURCE;
|
|
29
29
|
if (raw === "improve")
|
|
30
30
|
return "improve";
|
|
31
|
+
if (raw === "task")
|
|
32
|
+
return "task";
|
|
31
33
|
if (raw === "user")
|
|
32
34
|
return "user";
|
|
33
35
|
return undefined;
|
|
@@ -129,7 +131,7 @@ export const curateCommand = defineJsonCommand({
|
|
|
129
131
|
const limitParsed = parsePositiveIntFlag(args.limit ?? undefined);
|
|
130
132
|
const limit = limitParsed && limitParsed > 0 ? limitParsed : 4;
|
|
131
133
|
const source = parseSearchSource(args.source ?? "stash");
|
|
132
|
-
const curated = await akmCurate({ query: args.query, type, limit, source });
|
|
134
|
+
const curated = await akmCurate({ query: args.query, type, limit, source, eventSource: resolveEventSource() });
|
|
133
135
|
output("curate", curated);
|
|
134
136
|
},
|
|
135
137
|
});
|
|
@@ -16,8 +16,8 @@ import { rethrowIfTestIsolationError, UsageError } from "../../core/errors.js";
|
|
|
16
16
|
import { appendEvent } from "../../core/events.js";
|
|
17
17
|
import { isTransientStashPath } from "../../core/paths.js";
|
|
18
18
|
import { bumpUtilityScoresBatch, getEntryIdByFilePath } from "../../indexer/db/db.js";
|
|
19
|
+
import { resolveReadSources } from "../../indexer/read-preflight.js";
|
|
19
20
|
import { searchLocal } from "../../indexer/search/db-search.js";
|
|
20
|
-
import { resolveSourceEntries } from "../../indexer/search/search-source.js";
|
|
21
21
|
import { getCurrentWorkflowScopeKey } from "../../workflows/authoring/scope-key.js";
|
|
22
22
|
// Eagerly import source providers to trigger self-registration before the
|
|
23
23
|
// indexer or path-resolution code runs.
|
|
@@ -57,7 +57,7 @@ export async function akmSearch(input) {
|
|
|
57
57
|
else {
|
|
58
58
|
source = parsedSource;
|
|
59
59
|
}
|
|
60
|
-
let allSources =
|
|
60
|
+
let allSources = resolveReadSources(undefined, config).sources;
|
|
61
61
|
// When a named source was requested, narrow the sources list to just that entry.
|
|
62
62
|
// `resolveSourceEntries` sets `registryId` to `entry.name` for each config source.
|
|
63
63
|
if (namedSourceName !== undefined) {
|
|
@@ -243,7 +243,11 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
|
|
|
243
243
|
}
|
|
244
244
|
// Bump utility scores for all resolved entries (MemRL retrieval signal).
|
|
245
245
|
// The indexer overwrites these at next reindex; bumps are temporary hints.
|
|
246
|
-
|
|
246
|
+
// Gated to user-sourced events: pipeline searches (improve probes, task
|
|
247
|
+
// runner) must not feed the utility signal (meta-review 05 DRIFT-6 —
|
|
248
|
+
// the bump previously fired unconditionally, so even correctly-tagged
|
|
249
|
+
// machine traffic inflated utility).
|
|
250
|
+
const resolvedIds = eventSource === "user" ? resolved.map((r) => r.entryId).filter((id) => id !== undefined) : [];
|
|
247
251
|
if (resolvedIds.length > 0) {
|
|
248
252
|
let scopeKey;
|
|
249
253
|
try {
|