akm-cli 0.9.0-beta.52 → 0.9.0-beta.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/hints/cli-hints-full.md +6 -5
- package/dist/cli/clack.js +56 -0
- package/dist/cli/confirm.js +1 -1
- package/dist/cli.js +0 -7
- package/dist/commands/env/env-cli.js +3 -2
- package/dist/commands/env/env.js +14 -67
- package/dist/commands/health/checks.js +28 -15
- package/dist/commands/health/html-report.js +33 -10
- package/dist/commands/health.js +222 -22
- package/dist/commands/improve/collapse-detector.js +419 -0
- package/dist/commands/improve/consolidate.js +72 -54
- package/dist/commands/improve/distill.js +79 -13
- package/dist/commands/improve/extract.js +13 -6
- package/dist/commands/improve/homeostatic.js +109 -79
- package/dist/commands/improve/improve-cli.js +67 -1
- package/dist/commands/improve/improve.js +10 -0
- package/dist/commands/improve/loop-stages.js +39 -1
- package/dist/commands/improve/outcome-loop.js +33 -19
- package/dist/commands/improve/preparation.js +36 -11
- package/dist/commands/improve/salience.js +49 -32
- package/dist/commands/read/curate.js +9 -13
- package/dist/commands/read/knowledge.js +4 -0
- package/dist/commands/read/search-cli.js +6 -4
- package/dist/commands/read/search.js +12 -5
- package/dist/commands/read/show.js +6 -8
- package/dist/commands/sources/add-cli.js +1 -1
- package/dist/commands/sources/init.js +12 -0
- package/dist/commands/sources/stash-cli.js +1 -1
- package/dist/commands/tasks/default-tasks.js +12 -0
- package/dist/core/asset/asset-spec.js +3 -2
- package/dist/core/config/config-schema.js +39 -17
- package/dist/core/config/config.js +12 -0
- package/dist/core/eval/rank-metrics.js +113 -0
- package/dist/core/state/migrations.js +56 -0
- package/dist/core/state-db.js +146 -19
- package/dist/core/warn.js +21 -0
- package/dist/indexer/db/db.js +6 -0
- package/dist/indexer/ensure-index.js +36 -92
- package/dist/indexer/index-writer-lock.js +9 -11
- package/dist/indexer/index-written-assets.js +105 -0
- package/dist/indexer/indexer.js +16 -4
- package/dist/indexer/passes/metadata.js +20 -0
- package/dist/indexer/read-preflight.js +23 -0
- package/dist/indexer/search/db-search.js +29 -1
- package/dist/indexer/search/ranking-contributors.js +33 -1
- package/dist/indexer/search/ranking.js +66 -0
- package/dist/indexer/search/search-fields.js +6 -0
- package/dist/indexer/walk/walker.js +21 -13
- package/dist/integrations/agent/detect.js +9 -0
- package/dist/integrations/agent/index.js +1 -1
- package/dist/llm/client.js +12 -0
- package/dist/llm/embedder.js +26 -2
- package/dist/llm/embedders/local.js +7 -1
- package/dist/llm/feature-gate.js +6 -2
- package/dist/output/renderers.js +8 -13
- package/dist/output/shapes/helpers.js +0 -3
- package/dist/output/shapes/passthrough.js +1 -0
- package/dist/scripts/migrate-storage.js +178 -35
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +46 -19
- package/dist/setup/detect.js +9 -0
- package/dist/setup/registry-stash-loader.js +12 -0
- package/dist/setup/setup.js +1 -1
- package/dist/storage/repositories/index-db.js +10 -1
- package/dist/tasks/backends/index.js +9 -0
- package/dist/tasks/runner.js +9 -0
- package/package.json +2 -4
|
@@ -76,12 +76,19 @@ const DEFAULT_DEPS = {
|
|
|
76
76
|
list: akmTasksList,
|
|
77
77
|
add: akmTasksAdd,
|
|
78
78
|
};
|
|
79
|
+
let defaultTasksOverrides;
|
|
80
|
+
/** TEST-ONLY. Swap the CI/server/register functions; pass undefined to restore. */
|
|
81
|
+
export function _setDefaultTasksForTests(fakes) {
|
|
82
|
+
defaultTasksOverrides = fakes;
|
|
83
|
+
}
|
|
79
84
|
/**
|
|
80
85
|
* Decide whether `akm setup` is running in a CI environment, where it must
|
|
81
86
|
* register NO scheduled tasks. Mirrors the common `CI=true` convention used by
|
|
82
87
|
* GitHub Actions, GitLab CI, CircleCI, etc.
|
|
83
88
|
*/
|
|
84
89
|
export function isCiEnvironment(env = process.env) {
|
|
90
|
+
if (defaultTasksOverrides?.isCiEnvironment)
|
|
91
|
+
return defaultTasksOverrides.isCiEnvironment(env);
|
|
85
92
|
const ci = env.CI;
|
|
86
93
|
if (ci === undefined || ci === null)
|
|
87
94
|
return false;
|
|
@@ -95,6 +102,8 @@ export function isCiEnvironment(env = process.env) {
|
|
|
95
102
|
* Used as the default when setup is non-interactive (no TTY / --yes / CI).
|
|
96
103
|
*/
|
|
97
104
|
export function detectServerDefault() {
|
|
105
|
+
if (defaultTasksOverrides?.detectServerDefault)
|
|
106
|
+
return defaultTasksOverrides.detectServerDefault();
|
|
98
107
|
if (os.platform() !== "linux")
|
|
99
108
|
return false;
|
|
100
109
|
// A laptop exposes a battery under /sys/class/power_supply/BAT*. Absence of
|
|
@@ -121,6 +130,9 @@ export function detectServerDefault() {
|
|
|
121
130
|
* never re-disable a user-enabled task).
|
|
122
131
|
*/
|
|
123
132
|
export async function registerDefaultTasks(options = {}) {
|
|
133
|
+
if (defaultTasksOverrides?.registerDefaultTasks) {
|
|
134
|
+
return defaultTasksOverrides.registerDefaultTasks(options);
|
|
135
|
+
}
|
|
124
136
|
if (isCiEnvironment()) {
|
|
125
137
|
return { skipped: true, reason: "ci", created: [], existing: [], toggled: [] };
|
|
126
138
|
}
|
|
@@ -69,8 +69,9 @@ const ASSET_SPECS_INTERNAL = {
|
|
|
69
69
|
script: { stashDir: "scripts", ...scriptSpec },
|
|
70
70
|
memory: { stashDir: "memories", ...markdownSpec },
|
|
71
71
|
// Environment assets — whole `.env` files sourced/injected wholesale. Replaced
|
|
72
|
-
// the deprecated `vault` type (removed in 0.9.0).
|
|
73
|
-
//
|
|
72
|
+
// the deprecated `vault` type (removed in 0.9.0). Only key NAMES are surfaced
|
|
73
|
+
// as metadata; values and comment text are never read for indexing (comments
|
|
74
|
+
// routinely contain commented-out credentials).
|
|
74
75
|
env: {
|
|
75
76
|
stashDir: "env",
|
|
76
77
|
isRelevantFile: (fileName) => fileName === ".env" || fileName.endsWith(".env"),
|
|
@@ -171,6 +171,9 @@ export const ImproveProcessConfigSchema = z
|
|
|
171
171
|
// byte-identically to today (the incrementalSince path is unaffected). Only
|
|
172
172
|
// meaningful on the `consolidate` process.
|
|
173
173
|
judgedCache: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
|
|
174
|
+
// Distill process: LLM-as-judge lesson quality gate. Default ON (R3);
|
|
175
|
+
// fail-open — judge failure/timeout/parse errors pass through. Set
|
|
176
|
+
// `enabled: false` on the distill process to opt out.
|
|
174
177
|
qualityGate: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
|
|
175
178
|
contradictionDetection: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
|
|
176
179
|
// Extract process config (only meaningful for extract process)
|
|
@@ -250,24 +253,13 @@ export const ImproveProcessConfigSchema = z
|
|
|
250
253
|
// once sufficient history accumulates; this value is only used on the very
|
|
251
254
|
// first run. Default 30 s. Only meaningful on the `consolidate` process.
|
|
252
255
|
p90ChunkSecondsDefault: z.number().finite().positive().optional(),
|
|
253
|
-
// WS-3b
|
|
254
|
-
//
|
|
255
|
-
//
|
|
256
|
-
// re-promotable on re-retrieval. Default OFF. Only meaningful on the
|
|
257
|
-
// `consolidate` process.
|
|
258
|
-
homeostaticDemotion: z
|
|
259
|
-
.object({
|
|
260
|
-
enabled: z.boolean().optional(),
|
|
261
|
-
// Minimum days since last retrieval to consider an asset stale (default 30).
|
|
262
|
-
staleDays: z.number().int().min(0).optional(),
|
|
263
|
-
// Demotion factor: multiply retrievalSalience by this when stale (default 0.5).
|
|
264
|
-
demotionFactor: z.number().min(0).max(1).optional(),
|
|
265
|
-
})
|
|
266
|
-
.passthrough()
|
|
267
|
-
.optional(),
|
|
256
|
+
// (WS-3b step 0a `homeostaticDemotion` was removed — R4. The key is
|
|
257
|
+
// tolerated via passthrough if an old config still carries it; continuous
|
|
258
|
+
// decay is now part of the always-applied salience recency term.)
|
|
268
259
|
// WS-3b: Schema-similarity gate (step 0b). At intake, if a new candidate's
|
|
269
260
|
// body embedding is within epsilon of an existing derived-layer lesson/knowledge
|
|
270
|
-
// node, mark it schema-consistent and lower its priority. Default
|
|
261
|
+
// node, mark it schema-consistent and lower its priority. Default ON for
|
|
262
|
+
// the `extract` process since R3 (fail-open; set `enabled: false` to opt out).
|
|
271
263
|
// Only meaningful on the `consolidate` and `extract` processes.
|
|
272
264
|
schemaSimilarity: z
|
|
273
265
|
.object({
|
|
@@ -297,13 +289,19 @@ export const ImproveProcessConfigSchema = z
|
|
|
297
289
|
// - maxGeneration: refuse to merge two assets both above this generation (default 2).
|
|
298
290
|
// - lexicalDiversityCheck: low n-gram diversity ⇒ raise merge threshold.
|
|
299
291
|
// - randomClusterFraction: occasional random (non-similar) cluster in pool (default 0.05).
|
|
300
|
-
//
|
|
292
|
+
// - mergeInformationFloor: measure that merges keep provenance + specificity
|
|
293
|
+
// (R5 §4.2; ADVISORY in v1 — counted, never refused).
|
|
294
|
+
// - minSpecificityRetention: distinct-token retention floor for merges (default 0.6).
|
|
295
|
+
// Default ON since R5 (opt out via enabled: false). Only meaningful on the
|
|
296
|
+
// `consolidate` process.
|
|
301
297
|
antiCollapse: z
|
|
302
298
|
.object({
|
|
303
299
|
enabled: z.boolean().optional(),
|
|
304
300
|
maxGeneration: z.number().int().min(1).optional(),
|
|
305
301
|
lexicalDiversityCheck: z.boolean().optional(),
|
|
306
302
|
randomClusterFraction: z.number().min(0).max(1).optional(),
|
|
303
|
+
mergeInformationFloor: z.boolean().optional(),
|
|
304
|
+
minSpecificityRetention: z.number().min(0).max(1).optional(),
|
|
307
305
|
})
|
|
308
306
|
.passthrough()
|
|
309
307
|
.optional(),
|
|
@@ -633,6 +631,29 @@ const ImproveSalienceSchema = z
|
|
|
633
631
|
replayBudget: z.number().int().min(0).optional(),
|
|
634
632
|
})
|
|
635
633
|
.passthrough();
|
|
634
|
+
// R5 — longitudinal collapse/churn detector (observe-only in v1; deterministic,
|
|
635
|
+
// fail-open, runs only on cycles where consolidate/recombine did work).
|
|
636
|
+
// Default ON; opt out via `improve.collapseDetector.enabled: false`.
|
|
637
|
+
// See docs/design/improve-collapse-churn-detector-design.md.
|
|
638
|
+
const ImproveCollapseDetectorSchema = z
|
|
639
|
+
.object({
|
|
640
|
+
enabled: z.boolean().optional(),
|
|
641
|
+
// Canary set size minted on first run (owner-approved 30–50 range; default 40).
|
|
642
|
+
canaryCount: z.number().int().min(3).max(200).optional(),
|
|
643
|
+
// Top-K cutoff for canary recall/nDCG (default 10).
|
|
644
|
+
k: z.number().int().min(1).max(100).optional(),
|
|
645
|
+
// Trend window in qualifying cycles (default 5).
|
|
646
|
+
windowCycles: z.number().int().min(2).max(50).optional(),
|
|
647
|
+
// Absolute mean-recall drop vs window median that fires collapse (default 0.15).
|
|
648
|
+
recallDropThreshold: z.number().min(0).max(1).optional(),
|
|
649
|
+
// distinct-content-ratio decline over the window that fires collapse (default 0.05).
|
|
650
|
+
entropyDropThreshold: z.number().min(0).max(1).optional(),
|
|
651
|
+
// Accepted-action volume over the window below which churn never fires (default 25).
|
|
652
|
+
churnMinAcceptedActions: z.number().int().min(1).optional(),
|
|
653
|
+
// improve_cycle_metrics retention (default 365 days, owner-approved).
|
|
654
|
+
retentionDays: z.number().int().min(1).optional(),
|
|
655
|
+
})
|
|
656
|
+
.passthrough();
|
|
636
657
|
export const ImproveConfigSchema = z
|
|
637
658
|
.object({
|
|
638
659
|
utilityDecay: ImproveUtilityDecaySchema.optional(),
|
|
@@ -640,6 +661,7 @@ export const ImproveConfigSchema = z
|
|
|
640
661
|
calibration: ImproveCalibrationSchema.optional(),
|
|
641
662
|
exploration: ImproveExplorationSchema.optional(),
|
|
642
663
|
salience: ImproveSalienceSchema.optional(),
|
|
664
|
+
collapseDetector: ImproveCollapseDetectorSchema.optional(),
|
|
643
665
|
})
|
|
644
666
|
.passthrough();
|
|
645
667
|
// ── Index / per-pass ────────────────────────────────────────────────────────
|
|
@@ -278,7 +278,19 @@ export function loadConfig() {
|
|
|
278
278
|
warnIfProjectConfigPresent(process.cwd());
|
|
279
279
|
return loadUserConfig();
|
|
280
280
|
}
|
|
281
|
+
let saveConfigOverride;
|
|
282
|
+
/** TEST-ONLY. Swap the implementation of `saveConfig`; pass undefined to restore. */
|
|
283
|
+
export function _setSaveConfigForTests(fake) {
|
|
284
|
+
saveConfigOverride = fake;
|
|
285
|
+
}
|
|
281
286
|
export function saveConfig(config) {
|
|
287
|
+
if (saveConfigOverride) {
|
|
288
|
+
saveConfigOverride(config);
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
saveConfigReal(config);
|
|
292
|
+
}
|
|
293
|
+
function saveConfigReal(config) {
|
|
282
294
|
cachedConfig = undefined;
|
|
283
295
|
const configPath = getConfigPath();
|
|
284
296
|
const dir = path.dirname(configPath);
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
export const DEFAULT_CURATE_WEIGHTS = {
|
|
5
|
+
ndcg: 0.5,
|
|
6
|
+
recall: 0.2,
|
|
7
|
+
mrr: 0.1,
|
|
8
|
+
noBannedAboveRequired: 0.2,
|
|
9
|
+
};
|
|
10
|
+
/** nDCG@k with binary relevance: gain 1 for relevant refs, 0 otherwise. */
|
|
11
|
+
export function ndcgAtK(returned, relevant, k) {
|
|
12
|
+
const top = returned.slice(0, k);
|
|
13
|
+
let dcg = 0;
|
|
14
|
+
for (let i = 0; i < top.length; i++) {
|
|
15
|
+
if (relevant.has(top[i]))
|
|
16
|
+
dcg += 1 / Math.log2(i + 2);
|
|
17
|
+
}
|
|
18
|
+
const idealCount = Math.min(k, relevant.size);
|
|
19
|
+
let idcg = 0;
|
|
20
|
+
for (let i = 0; i < idealCount; i++)
|
|
21
|
+
idcg += 1 / Math.log2(i + 2);
|
|
22
|
+
return idcg === 0 ? 1 : dcg / idcg;
|
|
23
|
+
}
|
|
24
|
+
export function recallAtK(returned, relevant, k) {
|
|
25
|
+
if (relevant.size === 0)
|
|
26
|
+
return 1;
|
|
27
|
+
const top = new Set(returned.slice(0, k));
|
|
28
|
+
let hit = 0;
|
|
29
|
+
for (const r of relevant)
|
|
30
|
+
if (top.has(r))
|
|
31
|
+
hit += 1;
|
|
32
|
+
return hit / relevant.size;
|
|
33
|
+
}
|
|
34
|
+
export function mrr(returned, relevant) {
|
|
35
|
+
for (let i = 0; i < returned.length; i++) {
|
|
36
|
+
if (relevant.has(returned[i]))
|
|
37
|
+
return 1 / (i + 1);
|
|
38
|
+
}
|
|
39
|
+
return 0;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Leapfrog gate. A banned ref "leapfrogs" when it appears ABOVE at least one
|
|
43
|
+
* present relevant ref. Returns the fraction of present banned refs that do
|
|
44
|
+
* NOT leapfrog (1.0 when no banned ref is present, or none leapfrog), plus the
|
|
45
|
+
* raw violation count.
|
|
46
|
+
*/
|
|
47
|
+
export function noBannedAboveRequired(returned, relevant, banned) {
|
|
48
|
+
const rankOf = new Map();
|
|
49
|
+
returned.forEach((ref, i) => {
|
|
50
|
+
if (!rankOf.has(ref))
|
|
51
|
+
rankOf.set(ref, i);
|
|
52
|
+
});
|
|
53
|
+
const relevantRanks = returned.map((ref, i) => (relevant.has(ref) ? i : -1)).filter((i) => i >= 0);
|
|
54
|
+
if (relevantRanks.length === 0) {
|
|
55
|
+
// No relevant ref present to be leapfrogged — gate is vacuously satisfied.
|
|
56
|
+
return { score: 1, leapfrogCount: 0 };
|
|
57
|
+
}
|
|
58
|
+
const worstRelevantRank = Math.max(...relevantRanks);
|
|
59
|
+
const bannedPresent = returned.filter((ref) => banned.has(ref));
|
|
60
|
+
if (bannedPresent.length === 0)
|
|
61
|
+
return { score: 1, leapfrogCount: 0 };
|
|
62
|
+
let leapfrog = 0;
|
|
63
|
+
for (const b of bannedPresent) {
|
|
64
|
+
const rb = rankOf.get(b);
|
|
65
|
+
if (rb !== undefined && rb < worstRelevantRank)
|
|
66
|
+
leapfrog += 1;
|
|
67
|
+
}
|
|
68
|
+
return { score: 1 - leapfrog / bannedPresent.length, leapfrogCount: leapfrog };
|
|
69
|
+
}
|
|
70
|
+
/** Score a single curate result (ordered refs) against its judgment. */
|
|
71
|
+
export function scoreCurateCase(returned, judgment, weights = DEFAULT_CURATE_WEIGHTS) {
|
|
72
|
+
const k = judgment.limit;
|
|
73
|
+
const relevant = new Set(judgment.relevant);
|
|
74
|
+
const banned = new Set(judgment.banned);
|
|
75
|
+
const ndcg = ndcgAtK(returned, relevant, k);
|
|
76
|
+
const recall = recallAtK(returned, relevant, k);
|
|
77
|
+
const rr = mrr(returned, relevant);
|
|
78
|
+
const gate = noBannedAboveRequired(returned, relevant, banned);
|
|
79
|
+
const score = ndcg * weights.ndcg + recall * weights.recall + rr * weights.mrr + gate.score * weights.noBannedAboveRequired;
|
|
80
|
+
return {
|
|
81
|
+
ndcg,
|
|
82
|
+
recall,
|
|
83
|
+
mrr: rr,
|
|
84
|
+
noBannedAboveRequired: gate.score,
|
|
85
|
+
bannedLeapfrogCount: gate.leapfrogCount,
|
|
86
|
+
score,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
/** Aggregate per-case metrics into a suite summary. */
|
|
90
|
+
export function summarizeCurateMetrics(metrics) {
|
|
91
|
+
const n = metrics.length;
|
|
92
|
+
if (n === 0) {
|
|
93
|
+
return {
|
|
94
|
+
caseCount: 0,
|
|
95
|
+
meanScore: 0,
|
|
96
|
+
meanNdcg: 0,
|
|
97
|
+
meanRecall: 0,
|
|
98
|
+
meanMrr: 0,
|
|
99
|
+
meanNoBannedAboveRequired: 1,
|
|
100
|
+
totalBannedLeapfrog: 0,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
const sum = (sel) => metrics.reduce((a, m) => a + sel(m), 0);
|
|
104
|
+
return {
|
|
105
|
+
caseCount: n,
|
|
106
|
+
meanScore: sum((m) => m.score) / n,
|
|
107
|
+
meanNdcg: sum((m) => m.ndcg) / n,
|
|
108
|
+
meanRecall: sum((m) => m.recall) / n,
|
|
109
|
+
meanMrr: sum((m) => m.mrr) / n,
|
|
110
|
+
meanNoBannedAboveRequired: sum((m) => m.noBannedAboveRequired) / n,
|
|
111
|
+
totalBannedLeapfrog: sum((m) => m.bannedLeapfrogCount),
|
|
112
|
+
};
|
|
113
|
+
}
|
|
@@ -700,6 +700,62 @@ const MIGRATIONS = [
|
|
|
700
700
|
ALTER TABLE asset_salience ADD COLUMN encoding_source TEXT DEFAULT NULL;
|
|
701
701
|
`,
|
|
702
702
|
},
|
|
703
|
+
// ── Migration 016 — collapse/churn detector (R5) ─────────────────────────────
|
|
704
|
+
//
|
|
705
|
+
// Longitudinal store-health history for the improve pipeline
|
|
706
|
+
// (docs/design/improve-collapse-churn-detector-design.md).
|
|
707
|
+
//
|
|
708
|
+
// canary_queries — the fixed canary set, minted deterministically from the
|
|
709
|
+
// live stash on first detector run and NEVER auto-refreshed (silent
|
|
710
|
+
// re-baselining is how a slow collapse hides). `canary_set_id` groups one
|
|
711
|
+
// mint; deactivated sets keep their rows (active = 0) so historical cycle
|
|
712
|
+
// rows stay interpretable. Tens of rows; never purged.
|
|
713
|
+
//
|
|
714
|
+
// improve_cycle_metrics — one row per qualifying improve cycle (a run where
|
|
715
|
+
// consolidate processed ≥1 op or recombine evaluated ≥1 cluster). Every
|
|
716
|
+
// column is a scalar or a size-capped JSON blob (< 2 KB/row by
|
|
717
|
+
// construction — the result_json lesson applied). Retention: 365 days via
|
|
718
|
+
// purgeOldCycleMetrics. Trend queries drive the collapse/churn alert
|
|
719
|
+
// evaluation and the health advisory; `canary_set_id` scoping prevents
|
|
720
|
+
// comparing across canary re-mints.
|
|
721
|
+
{
|
|
722
|
+
id: "016-collapse-churn-detector",
|
|
723
|
+
up: `
|
|
724
|
+
CREATE TABLE IF NOT EXISTS canary_queries (
|
|
725
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
726
|
+
canary_set_id TEXT NOT NULL,
|
|
727
|
+
anchor_ref TEXT NOT NULL,
|
|
728
|
+
query TEXT NOT NULL,
|
|
729
|
+
source TEXT NOT NULL DEFAULT 'auto',
|
|
730
|
+
active INTEGER NOT NULL DEFAULT 1,
|
|
731
|
+
created_at TEXT NOT NULL
|
|
732
|
+
);
|
|
733
|
+
CREATE INDEX IF NOT EXISTS idx_canary_queries_active
|
|
734
|
+
ON canary_queries(active, canary_set_id);
|
|
735
|
+
|
|
736
|
+
CREATE TABLE IF NOT EXISTS improve_cycle_metrics (
|
|
737
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
738
|
+
run_id TEXT NOT NULL,
|
|
739
|
+
ts TEXT NOT NULL,
|
|
740
|
+
pass TEXT NOT NULL,
|
|
741
|
+
canary_set_id TEXT NOT NULL,
|
|
742
|
+
mean_recall REAL NOT NULL,
|
|
743
|
+
mean_ndcg REAL NOT NULL,
|
|
744
|
+
mean_mrr REAL NOT NULL,
|
|
745
|
+
canary_ranks_json TEXT NOT NULL,
|
|
746
|
+
store_total INTEGER NOT NULL,
|
|
747
|
+
store_by_type_json TEXT NOT NULL,
|
|
748
|
+
distinct_content_ratio REAL NOT NULL,
|
|
749
|
+
mean_bigram_diversity REAL NOT NULL,
|
|
750
|
+
over_generation_count INTEGER NOT NULL,
|
|
751
|
+
accepted_actions INTEGER NOT NULL,
|
|
752
|
+
merge_floor_violations INTEGER NOT NULL DEFAULT 0,
|
|
753
|
+
alerts_json TEXT NOT NULL DEFAULT '[]'
|
|
754
|
+
);
|
|
755
|
+
CREATE INDEX IF NOT EXISTS idx_improve_cycle_metrics_ts
|
|
756
|
+
ON improve_cycle_metrics(ts);
|
|
757
|
+
`,
|
|
758
|
+
},
|
|
703
759
|
];
|
|
704
760
|
/**
|
|
705
761
|
* Apply every pending migration in a single transaction per migration.
|
package/dist/core/state-db.js
CHANGED
|
@@ -472,19 +472,20 @@ export function insertProposalIfAbsent(db, proposal, stashDir) {
|
|
|
472
472
|
/**
|
|
473
473
|
* Errors `BEGIN IMMEDIATE` can throw under concurrent-writer contention that are
|
|
474
474
|
* transient (the statement did NOT start a usable transaction) and safe to
|
|
475
|
-
* retry
|
|
475
|
+
* retry:
|
|
476
476
|
* - "database is locked" / SQLITE_BUSY — another writer holds the lock.
|
|
477
|
-
* - "cannot start a transaction within a transaction" — bun:sqlite can leave
|
|
478
|
-
* the connection reporting an open transaction after a contended busy-wait
|
|
479
|
-
* on BEGIN IMMEDIATE (observed only under heavy parallel load, e.g. the
|
|
480
|
-
* proposal-queue worker race). A ROLLBACK clears that phantom state.
|
|
481
477
|
* These are start-of-transaction failures only; an error thrown by `fn` is a
|
|
482
478
|
* real failure and is NEVER retried.
|
|
479
|
+
*
|
|
480
|
+
* "cannot start a transaction within a transaction" is deliberately NOT
|
|
481
|
+
* retryable: it means a transaction is already open on this connection (a
|
|
482
|
+
* re-entrant call — handled by the entry guard in withImmediateTransaction),
|
|
483
|
+
* and "retrying" it with a ROLLBACK would destroy the caller's transaction
|
|
484
|
+
* (issue #686).
|
|
483
485
|
*/
|
|
484
486
|
function isRetryableBeginError(err) {
|
|
485
487
|
const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
|
|
486
|
-
return (msg.includes("
|
|
487
|
-
msg.includes("database is locked") ||
|
|
488
|
+
return (msg.includes("database is locked") ||
|
|
488
489
|
msg.includes("database table is locked") ||
|
|
489
490
|
// Phantom BEGIN (see below) — synthesized when BEGIN IMMEDIATE returns
|
|
490
491
|
// without opening a transaction. Safe to retry: fn() has not run.
|
|
@@ -498,6 +499,16 @@ function sleepSyncMs(ms) {
|
|
|
498
499
|
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
|
|
499
500
|
}
|
|
500
501
|
export function withImmediateTransaction(db, fn) {
|
|
502
|
+
// Re-entrancy guard (issue #686): if a transaction is already open on this
|
|
503
|
+
// connection (e.g. a nested withImmediateTransaction call inside an outer
|
|
504
|
+
// frame's fn), join it — run fn directly with no BEGIN/COMMIT/ROLLBACK of
|
|
505
|
+
// our own. Without this, the nested BEGIN throws "cannot start a transaction
|
|
506
|
+
// within a transaction", which the old retry path answered with an
|
|
507
|
+
// unconditional ROLLBACK — destroying the OUTER transaction and leaving its
|
|
508
|
+
// COMMIT to fail with "cannot commit - no transaction is active".
|
|
509
|
+
if (db.inTransaction) {
|
|
510
|
+
return fn();
|
|
511
|
+
}
|
|
501
512
|
let lastBeginErr;
|
|
502
513
|
for (let attempt = 1; attempt <= WITH_IMMEDIATE_TX_MAX_ATTEMPTS; attempt++) {
|
|
503
514
|
try {
|
|
@@ -515,13 +526,15 @@ export function withImmediateTransaction(db, fn) {
|
|
|
515
526
|
catch (err) {
|
|
516
527
|
lastBeginErr = err;
|
|
517
528
|
if (isRetryableBeginError(err) && attempt < WITH_IMMEDIATE_TX_MAX_ATTEMPTS) {
|
|
518
|
-
//
|
|
519
|
-
//
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
529
|
+
// Only roll back a transaction we can see — never blind-ROLLBACK, since
|
|
530
|
+
// that could destroy a transaction this frame does not own.
|
|
531
|
+
if (db.inTransaction) {
|
|
532
|
+
try {
|
|
533
|
+
db.exec("ROLLBACK");
|
|
534
|
+
}
|
|
535
|
+
catch {
|
|
536
|
+
// Transaction already gone — fine.
|
|
537
|
+
}
|
|
525
538
|
}
|
|
526
539
|
sleepSyncMs(2 ** (attempt - 1));
|
|
527
540
|
continue;
|
|
@@ -530,15 +543,25 @@ export function withImmediateTransaction(db, fn) {
|
|
|
530
543
|
}
|
|
531
544
|
try {
|
|
532
545
|
const result = fn();
|
|
546
|
+
if (!db.inTransaction) {
|
|
547
|
+
// The transaction we opened vanished while fn() ran (e.g. an
|
|
548
|
+
// auto-rollback or a stray ROLLBACK inside fn). fn's writes may have
|
|
549
|
+
// escaped serialization, so retrying is unsafe — fail loudly instead of
|
|
550
|
+
// letting COMMIT throw the opaque "cannot commit - no transaction is
|
|
551
|
+
// active" SQLiteError.
|
|
552
|
+
throw new Error("withImmediateTransaction invariant violated: transaction opened by BEGIN IMMEDIATE was no longer active after the transaction body ran; refusing to COMMIT (writes may have escaped serialization)");
|
|
553
|
+
}
|
|
533
554
|
db.exec("COMMIT");
|
|
534
555
|
return result;
|
|
535
556
|
}
|
|
536
557
|
catch (err) {
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
558
|
+
if (db.inTransaction) {
|
|
559
|
+
try {
|
|
560
|
+
db.exec("ROLLBACK");
|
|
561
|
+
}
|
|
562
|
+
catch {
|
|
563
|
+
// Ignore rollback failures so the original error is preserved.
|
|
564
|
+
}
|
|
542
565
|
}
|
|
543
566
|
throw err; // a real error inside the transaction body — never retried.
|
|
544
567
|
}
|
|
@@ -1252,3 +1275,107 @@ export function upsertBodyEmbeddings(db, entries) {
|
|
|
1252
1275
|
}
|
|
1253
1276
|
})();
|
|
1254
1277
|
}
|
|
1278
|
+
/** Insert a freshly minted canary set (all rows active, one shared set id). */
|
|
1279
|
+
export function insertCanaries(db, canarySetId, canaries, now) {
|
|
1280
|
+
if (canaries.length === 0)
|
|
1281
|
+
return;
|
|
1282
|
+
const ts = now ?? new Date().toISOString();
|
|
1283
|
+
const stmt = db.prepare(`
|
|
1284
|
+
INSERT INTO canary_queries (canary_set_id, anchor_ref, query, source, active, created_at)
|
|
1285
|
+
VALUES (?, ?, ?, ?, 1, ?)
|
|
1286
|
+
`);
|
|
1287
|
+
db.transaction(() => {
|
|
1288
|
+
for (const c of canaries) {
|
|
1289
|
+
stmt.run(canarySetId, c.anchorRef, c.query, c.source ?? "auto", ts);
|
|
1290
|
+
}
|
|
1291
|
+
})();
|
|
1292
|
+
}
|
|
1293
|
+
/** Load the active canary set (empty array = never minted). */
|
|
1294
|
+
export function getActiveCanaries(db) {
|
|
1295
|
+
// Scope to the NEWEST active set: if an interrupted refresh (or a bug) ever
|
|
1296
|
+
// leaves two sets active, mixing their rows would silently corrupt the
|
|
1297
|
+
// recall/entropy trend baselines. The newest set wins; stale-active rows are
|
|
1298
|
+
// simply never returned.
|
|
1299
|
+
return db
|
|
1300
|
+
.prepare(`SELECT * FROM canary_queries
|
|
1301
|
+
WHERE active = 1 AND canary_set_id = (
|
|
1302
|
+
SELECT canary_set_id FROM canary_queries WHERE active = 1
|
|
1303
|
+
ORDER BY created_at DESC, id DESC LIMIT 1
|
|
1304
|
+
)
|
|
1305
|
+
ORDER BY id`)
|
|
1306
|
+
.all();
|
|
1307
|
+
}
|
|
1308
|
+
/** Load one canary set's rows by its exact set id (any active state), insertion order. */
|
|
1309
|
+
export function getCanariesBySetId(db, canarySetId) {
|
|
1310
|
+
return db
|
|
1311
|
+
.prepare(`SELECT * FROM canary_queries WHERE canary_set_id = ? ORDER BY id`)
|
|
1312
|
+
.all(canarySetId);
|
|
1313
|
+
}
|
|
1314
|
+
/** List every distinct canary_set_id that still has active rows. */
|
|
1315
|
+
export function listActiveCanarySetIds(db) {
|
|
1316
|
+
const rows = db.prepare(`SELECT DISTINCT canary_set_id FROM canary_queries WHERE active = 1`).all();
|
|
1317
|
+
return rows.map((r) => r.canary_set_id);
|
|
1318
|
+
}
|
|
1319
|
+
/**
|
|
1320
|
+
* Deactivate every canary row in a set. Rows are RETAINED (active = 0) so
|
|
1321
|
+
* historical improve_cycle_metrics rows keyed on the old canary_set_id stay
|
|
1322
|
+
* interpretable; only `akm improve canary --refresh` calls this.
|
|
1323
|
+
*/
|
|
1324
|
+
export function deactivateCanarySet(db, canarySetId) {
|
|
1325
|
+
const result = db
|
|
1326
|
+
.prepare(`UPDATE canary_queries SET active = 0 WHERE canary_set_id = ? AND active = 1`)
|
|
1327
|
+
.run(canarySetId);
|
|
1328
|
+
const changes = result.changes ?? 0;
|
|
1329
|
+
return typeof changes === "bigint" ? Number(changes) : changes;
|
|
1330
|
+
}
|
|
1331
|
+
/** Persist one qualifying cycle's store-health snapshot. */
|
|
1332
|
+
export function insertCycleMetrics(db, row) {
|
|
1333
|
+
db.prepare(`
|
|
1334
|
+
INSERT INTO improve_cycle_metrics
|
|
1335
|
+
(run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
|
|
1336
|
+
canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
|
|
1337
|
+
mean_bigram_diversity, over_generation_count, accepted_actions,
|
|
1338
|
+
merge_floor_violations, alerts_json)
|
|
1339
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1340
|
+
`).run(row.run_id, row.ts, row.pass, row.canary_set_id, row.mean_recall, row.mean_ndcg, row.mean_mrr, row.canary_ranks_json, row.store_total, row.store_by_type_json, row.distinct_content_ratio, row.mean_bigram_diversity, row.over_generation_count, row.accepted_actions, row.merge_floor_violations, row.alerts_json);
|
|
1341
|
+
}
|
|
1342
|
+
/**
|
|
1343
|
+
* Load the most recent cycle rows for one canary set, OLDEST-first (the alert
|
|
1344
|
+
* evaluator's window order). Scoped by canary_set_id so trends never compare
|
|
1345
|
+
* across canary re-mints.
|
|
1346
|
+
*/
|
|
1347
|
+
export function queryRecentCycleMetrics(db, canarySetId, limit) {
|
|
1348
|
+
const rows = db
|
|
1349
|
+
.prepare(`SELECT run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
|
|
1350
|
+
canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
|
|
1351
|
+
mean_bigram_diversity, over_generation_count, accepted_actions,
|
|
1352
|
+
merge_floor_violations, alerts_json
|
|
1353
|
+
FROM improve_cycle_metrics WHERE canary_set_id = ?
|
|
1354
|
+
ORDER BY ts DESC, id DESC LIMIT ?`)
|
|
1355
|
+
.all(canarySetId, Math.max(0, limit));
|
|
1356
|
+
return rows.reverse();
|
|
1357
|
+
}
|
|
1358
|
+
/** Load the single most recent cycle row across all canary sets (health surface). */
|
|
1359
|
+
export function getLatestCycleMetrics(db) {
|
|
1360
|
+
const row = db
|
|
1361
|
+
.prepare(`SELECT run_id, ts, pass, canary_set_id, mean_recall, mean_ndcg, mean_mrr,
|
|
1362
|
+
canary_ranks_json, store_total, store_by_type_json, distinct_content_ratio,
|
|
1363
|
+
mean_bigram_diversity, over_generation_count, accepted_actions,
|
|
1364
|
+
merge_floor_violations, alerts_json
|
|
1365
|
+
FROM improve_cycle_metrics ORDER BY ts DESC, id DESC LIMIT 1`)
|
|
1366
|
+
.get();
|
|
1367
|
+
return row == null ? undefined : row;
|
|
1368
|
+
}
|
|
1369
|
+
/**
|
|
1370
|
+
* Delete cycle rows older than `retentionDays` (default 365 — owner-approved;
|
|
1371
|
+
* a slow collapse needs a longer trend window than the 90-day events log).
|
|
1372
|
+
* Returns the purged row count. canary_queries rows are never purged.
|
|
1373
|
+
*/
|
|
1374
|
+
export function purgeOldCycleMetrics(db, retentionDays = 365) {
|
|
1375
|
+
if (!Number.isFinite(retentionDays) || retentionDays <= 0)
|
|
1376
|
+
return 0;
|
|
1377
|
+
const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
|
|
1378
|
+
const result = db.prepare("DELETE FROM improve_cycle_metrics WHERE ts < ?").run(cutoff);
|
|
1379
|
+
const changes = result.changes ?? 0;
|
|
1380
|
+
return typeof changes === "bigint" ? Number(changes) : changes;
|
|
1381
|
+
}
|
package/dist/core/warn.js
CHANGED
|
@@ -17,6 +17,11 @@ import path from "node:path";
|
|
|
17
17
|
let quiet = false;
|
|
18
18
|
let verbose = false;
|
|
19
19
|
let logFilePath;
|
|
20
|
+
let sinkOverride;
|
|
21
|
+
/** TEST-ONLY. Swap the output sink; pass undefined to restore real output. */
|
|
22
|
+
export function _setWarnSinkForTests(fake) {
|
|
23
|
+
sinkOverride = fake;
|
|
24
|
+
}
|
|
20
25
|
export function setQuiet(value) {
|
|
21
26
|
quiet = value;
|
|
22
27
|
}
|
|
@@ -96,6 +101,10 @@ function appendToLogFile(level, args) {
|
|
|
96
101
|
* Use for progress counters and status lines (replaces console.error used for progress).
|
|
97
102
|
*/
|
|
98
103
|
export function info(...args) {
|
|
104
|
+
if (sinkOverride) {
|
|
105
|
+
sinkOverride("info", args);
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
99
108
|
appendToLogFile("INFO", args);
|
|
100
109
|
if (!quiet) {
|
|
101
110
|
console.warn(...args);
|
|
@@ -107,6 +116,10 @@ export function info(...args) {
|
|
|
107
116
|
* Drop-in replacement for console.warn() across the codebase.
|
|
108
117
|
*/
|
|
109
118
|
export function warn(...args) {
|
|
119
|
+
if (sinkOverride) {
|
|
120
|
+
sinkOverride("warn", args);
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
110
123
|
appendToLogFile("WARN", args);
|
|
111
124
|
if (!quiet) {
|
|
112
125
|
console.warn(...args);
|
|
@@ -118,6 +131,10 @@ export function warn(...args) {
|
|
|
118
131
|
* Drop-in replacement for console.error() used for diagnostic failures.
|
|
119
132
|
*/
|
|
120
133
|
export function error(...args) {
|
|
134
|
+
if (sinkOverride) {
|
|
135
|
+
sinkOverride("error", args);
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
121
138
|
appendToLogFile("ERROR", args);
|
|
122
139
|
if (!quiet) {
|
|
123
140
|
console.error(...args);
|
|
@@ -129,6 +146,10 @@ export function error(...args) {
|
|
|
129
146
|
* default verbosity (e.g. registry-content workflow validation errors).
|
|
130
147
|
*/
|
|
131
148
|
export function warnVerbose(...args) {
|
|
149
|
+
if (sinkOverride) {
|
|
150
|
+
sinkOverride("warnVerbose", args);
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
132
153
|
if (isVerbose()) {
|
|
133
154
|
warn(...args);
|
|
134
155
|
}
|
package/dist/indexer/db/db.js
CHANGED
|
@@ -1633,6 +1633,11 @@ function bareRef(ref) {
|
|
|
1633
1633
|
* entry_ref populated (see logCurateEvent), so curation is a real retrieval
|
|
1634
1634
|
* signal here. Legacy summary-only curate rows with a NULL entry_ref simply
|
|
1635
1635
|
* contribute nothing.
|
|
1636
|
+
*
|
|
1637
|
+
* Machine-sourced events (`source` = 'improve' or 'task') are EXCLUDED: this
|
|
1638
|
+
* count feeds salience/ranking, and pipeline probe traffic counting as demand
|
|
1639
|
+
* creates a self-reinforcing loop (meta-review 05 DRIFT-6). NULL sources
|
|
1640
|
+
* (pre-column rows) count as user demand.
|
|
1636
1641
|
*/
|
|
1637
1642
|
export function getRetrievalCounts(db, refs) {
|
|
1638
1643
|
if (refs.length === 0)
|
|
@@ -1671,6 +1676,7 @@ export function getRetrievalCounts(db, refs) {
|
|
|
1671
1676
|
FROM usage_events
|
|
1672
1677
|
WHERE event_type IN ('search','show','curate')
|
|
1673
1678
|
AND entry_ref IS NOT NULL
|
|
1679
|
+
AND (source IS NULL OR source NOT IN ('improve','task'))
|
|
1674
1680
|
AND CASE
|
|
1675
1681
|
WHEN instr(entry_ref, '//') > 0
|
|
1676
1682
|
THEN substr(entry_ref, instr(entry_ref, '//') + 2)
|