akm-cli 0.9.0-beta.6 → 0.9.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +71 -0
- package/dist/cli.js +7 -0
- package/dist/commands/feedback-cli.js +42 -37
- package/dist/commands/graph/graph.js +75 -71
- package/dist/commands/health.js +10 -2
- package/dist/commands/improve/consolidate.js +18 -1
- package/dist/commands/improve/distill.js +26 -5
- package/dist/commands/improve/extract-prompt.js +1 -1
- package/dist/commands/improve/improve-auto-accept.js +6 -0
- package/dist/commands/improve/improve-profiles.js +4 -0
- package/dist/commands/improve/improve.js +720 -468
- package/dist/commands/improve/proactive-maintenance.js +113 -0
- package/dist/commands/improve/reflect.js +6 -0
- package/dist/commands/proposal/proposal.js +5 -0
- package/dist/commands/proposal/validators/proposals.js +67 -54
- package/dist/commands/read/curate.js +17 -0
- package/dist/commands/sources/stash-cli.js +10 -2
- package/dist/core/config/config-schema.js +11 -0
- package/dist/core/paths.js +3 -0
- package/dist/core/state-db.js +46 -1
- package/dist/indexer/db/db.js +97 -11
- package/dist/indexer/ensure-index.js +152 -17
- package/dist/indexer/index-writer-lock.js +99 -0
- package/dist/indexer/indexer.js +114 -111
- package/dist/integrations/harnesses/claude/session-log.js +1 -1
- package/dist/llm/client.js +23 -4
- package/dist/scripts/migrate-storage.js +85 -13
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +8 -1
- package/dist/sources/providers/tar-utils.js +16 -8
- package/package.json +1 -1
|
@@ -19,6 +19,7 @@ import { info, warn } from "../../core/warn.js";
|
|
|
19
19
|
import { closeDatabase, getAllEntries, getEntryCount, getRetrievalCounts, getUtilityScoresByIds, getZeroResultSearches, openDatabase, openExistingDatabase, } from "../../indexer/db/db.js";
|
|
20
20
|
import { ensureIndex } from "../../indexer/ensure-index.js";
|
|
21
21
|
import { runGraphExtractionPass } from "../../indexer/graph/graph-extraction.js";
|
|
22
|
+
import { withIndexWriterLease } from "../../indexer/index-writer-lock.js";
|
|
22
23
|
import { akmIndex } from "../../indexer/indexer.js";
|
|
23
24
|
import { collectPendingMemories, runMemoryInferencePass, } from "../../indexer/passes/memory-inference.js";
|
|
24
25
|
import { runStalenessDetectionPass } from "../../indexer/passes/staleness-detect.js";
|
|
@@ -46,7 +47,105 @@ import { makeGateConfig, resolveExtractConfidence, runAutoAcceptGate } from "./i
|
|
|
46
47
|
import { isProfileFilteredForAllPasses, resolveImproveProfile, resolveProcessEnabled, shouldSkipRef, } from "./improve-profiles.js";
|
|
47
48
|
import { detectAndWriteContradictions } from "./memory/memory-contradiction-detect.js";
|
|
48
49
|
import { analyzeMemoryCleanup, applyMemoryCleanup } from "./memory/memory-improve.js";
|
|
50
|
+
import { DEFAULT_DUE_DAYS, DEFAULT_MAX_PER_RUN, selectProactiveMaintenanceRefs } from "./proactive-maintenance.js";
|
|
49
51
|
import { akmReflect } from "./reflect.js";
|
|
52
|
+
// #607 Lock Decomposition: fine-grained per-process locks replace the single
|
|
53
|
+
// `improve.lock`. Three independent locks allow concurrent improve runs when
|
|
54
|
+
// they touch different subsystems (e.g. quick-shredder consolidate can run
|
|
55
|
+
// alongside daily reflect+distill).
|
|
56
|
+
//
|
|
57
|
+
// consolidate.lock — protects consolidate + memoryInference (both write index.db)
|
|
58
|
+
// reflect-distill.lock — protects reflect + distill (both write state.db proposals)
|
|
59
|
+
// triage.lock — protects triage (writes proposal promotions)
|
|
60
|
+
//
|
|
61
|
+
// Stale timeouts are per-lock, tuned to the expected runtime of the protected
|
|
62
|
+
// processes: consolidate is disk-bound (1h), reflect+distill is GPU-bound (2h),
|
|
63
|
+
// triage is fast (30min).
|
|
64
|
+
const PROCESS_LOCK_DEFS = {
|
|
65
|
+
consolidate: { fileName: "consolidate.lock", staleAfterMs: 60 * 60 * 1000 },
|
|
66
|
+
reflectDistill: { fileName: "reflect-distill.lock", staleAfterMs: 2 * 60 * 60 * 1000 },
|
|
67
|
+
triage: { fileName: "triage.lock", staleAfterMs: 30 * 60 * 1000 },
|
|
68
|
+
};
|
|
69
|
+
const heldProcessLocks = new Set();
|
|
70
|
+
export function resetHeldProcessLocks() {
|
|
71
|
+
heldProcessLocks.clear();
|
|
72
|
+
}
|
|
73
|
+
function processLockPath(lockBaseDir, lockName) {
|
|
74
|
+
return path.join(lockBaseDir, PROCESS_LOCK_DEFS[lockName].fileName);
|
|
75
|
+
}
|
|
76
|
+
function tryAcquireProcessLock(lockPath, staleAfterMs, skipIfLocked, lockLabel) {
|
|
77
|
+
fs.mkdirSync(path.dirname(lockPath), { recursive: true });
|
|
78
|
+
const lockPayload = () => JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() });
|
|
79
|
+
if (tryAcquireLockSync(lockPath, lockPayload())) {
|
|
80
|
+
heldProcessLocks.add(lockPath);
|
|
81
|
+
return "acquired";
|
|
82
|
+
}
|
|
83
|
+
const probe = probeLock(lockPath, { staleAfterMs });
|
|
84
|
+
const rawContent = probe.state === "absent" ? undefined : probe.rawContent;
|
|
85
|
+
const lock = rawContent
|
|
86
|
+
? (() => {
|
|
87
|
+
try {
|
|
88
|
+
return JSON.parse(rawContent);
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
})()
|
|
94
|
+
: null;
|
|
95
|
+
if (probe.state === "stale") {
|
|
96
|
+
try {
|
|
97
|
+
appendEvent({
|
|
98
|
+
eventType: "improve_lock_recovered",
|
|
99
|
+
metadata: {
|
|
100
|
+
lockName: lockLabel,
|
|
101
|
+
stalePid: lock?.pid ?? null,
|
|
102
|
+
lockedAt: lock?.startedAt ?? null,
|
|
103
|
+
recoveredAt: new Date().toISOString(),
|
|
104
|
+
lockAgeMs: probe.ageMs ?? null,
|
|
105
|
+
reason: probe.reason === "pid_dead" ? "pid_not_alive" : probe.reason,
|
|
106
|
+
},
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
/* event emission is best-effort; never block lock recovery */
|
|
111
|
+
}
|
|
112
|
+
releaseLock(lockPath);
|
|
113
|
+
if (tryAcquireLockSync(lockPath, lockPayload())) {
|
|
114
|
+
heldProcessLocks.add(lockPath);
|
|
115
|
+
return "acquired";
|
|
116
|
+
}
|
|
117
|
+
if (skipIfLocked) {
|
|
118
|
+
warn(`[improve] ${lockLabel} lock acquired by another run during stale recovery; skipping (--skip-if-locked)`);
|
|
119
|
+
return "skipped";
|
|
120
|
+
}
|
|
121
|
+
throw new ConfigError(`akm improve ${lockLabel} is already running. Delete ${lockPath} to force.`, "INVALID_CONFIG_FILE");
|
|
122
|
+
}
|
|
123
|
+
if (skipIfLocked) {
|
|
124
|
+
warn(`[improve] ${lockLabel} lock held by another run (PID ${lock?.pid}, started ${lock?.startedAt}); skipping (--skip-if-locked)`);
|
|
125
|
+
return "skipped";
|
|
126
|
+
}
|
|
127
|
+
throw new ConfigError(`akm improve ${lockLabel} is already running (PID ${lock?.pid}, started ${lock?.startedAt}). Delete ${lockPath} to force.`, "INVALID_CONFIG_FILE");
|
|
128
|
+
}
|
|
129
|
+
function releaseProcessLock(lockPath) {
|
|
130
|
+
try {
|
|
131
|
+
fs.unlinkSync(lockPath);
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
// ignore
|
|
135
|
+
}
|
|
136
|
+
heldProcessLocks.delete(lockPath);
|
|
137
|
+
}
|
|
138
|
+
function releaseAllProcessLocks() {
|
|
139
|
+
for (const p of heldProcessLocks) {
|
|
140
|
+
try {
|
|
141
|
+
fs.unlinkSync(p);
|
|
142
|
+
}
|
|
143
|
+
catch {
|
|
144
|
+
// ignore
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
heldProcessLocks.clear();
|
|
148
|
+
}
|
|
50
149
|
function resolveImproveScope(scope) {
|
|
51
150
|
const trimmed = scope?.trim();
|
|
52
151
|
if (!trimmed)
|
|
@@ -102,6 +201,22 @@ export function renderSyncCommitMessage(template, result, nowMs) {
|
|
|
102
201
|
};
|
|
103
202
|
return template.replace(/\{(\w+)\}/g, (match, key) => (Object.hasOwn(tokens, key) ? tokens[key] : match));
|
|
104
203
|
}
|
|
204
|
+
/**
|
|
205
|
+
* Dedupe a list of eligible refs by `ref`, preserving first-seen order. Used to
|
|
206
|
+
* merge the three eligibility sources (feedback-signal, P0-A high-retrieval,
|
|
207
|
+
* Layer-2 proactive-maintenance) without admitting a ref into the loop twice.
|
|
208
|
+
*/
|
|
209
|
+
function dedupeRefs(refs) {
|
|
210
|
+
const seen = new Set();
|
|
211
|
+
const out = [];
|
|
212
|
+
for (const r of refs) {
|
|
213
|
+
if (seen.has(r.ref))
|
|
214
|
+
continue;
|
|
215
|
+
seen.add(r.ref);
|
|
216
|
+
out.push(r);
|
|
217
|
+
}
|
|
218
|
+
return out;
|
|
219
|
+
}
|
|
105
220
|
async function collectEligibleRefs(scope, stashDir, improveProfile) {
|
|
106
221
|
if (scope.mode === "ref" && scope.value) {
|
|
107
222
|
const parsed = parseAssetRef(scope.value);
|
|
@@ -491,103 +606,16 @@ export async function akmImprove(options = {}) {
|
|
|
491
606
|
// timeout root cause). Because beforeEach runs synchronously, env is still the
|
|
492
607
|
// calling test's own at this point; we capture it before yielding the loop.
|
|
493
608
|
const resolvedStateDbPath = getStateDbPathInDataDir();
|
|
494
|
-
//
|
|
495
|
-
//
|
|
496
|
-
//
|
|
497
|
-
//
|
|
498
|
-
//
|
|
499
|
-
//
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
const
|
|
504
|
-
const acquireLock = () => {
|
|
505
|
-
fs.mkdirSync(path.dirname(resolvedLockPath), { recursive: true });
|
|
506
|
-
const lockPayload = () => JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() });
|
|
507
|
-
if (tryAcquireLockSync(resolvedLockPath, lockPayload()))
|
|
508
|
-
return "acquired";
|
|
509
|
-
// Lock file already exists — probe to determine whether it's still held
|
|
510
|
-
// or whether the prior run died without cleaning up.
|
|
511
|
-
const probe = probeLock(resolvedLockPath, { staleAfterMs: MAX_LOCK_AGE_MS });
|
|
512
|
-
const rawContent = probe.state === "absent" ? undefined : probe.rawContent;
|
|
513
|
-
const lock = rawContent
|
|
514
|
-
? (() => {
|
|
515
|
-
try {
|
|
516
|
-
return JSON.parse(rawContent);
|
|
517
|
-
}
|
|
518
|
-
catch {
|
|
519
|
-
return null;
|
|
520
|
-
}
|
|
521
|
-
})()
|
|
522
|
-
: null;
|
|
523
|
-
if (probe.state === "stale") {
|
|
524
|
-
// O-7 / #394: Emit improve_lock_recovered event before recovery so the
|
|
525
|
-
// audit trail records the abnormal prior-run exit (Temporal/Airflow pattern).
|
|
526
|
-
try {
|
|
527
|
-
appendEvent({
|
|
528
|
-
eventType: "improve_lock_recovered",
|
|
529
|
-
metadata: {
|
|
530
|
-
stalePid: lock?.pid ?? null,
|
|
531
|
-
lockedAt: lock?.startedAt ?? null,
|
|
532
|
-
recoveredAt: new Date().toISOString(),
|
|
533
|
-
lockAgeMs: probe.ageMs ?? null,
|
|
534
|
-
reason: probe.reason === "pid_dead" ? "pid_not_alive" : probe.reason,
|
|
535
|
-
},
|
|
536
|
-
});
|
|
537
|
-
}
|
|
538
|
-
catch {
|
|
539
|
-
/* event emission is best-effort; never block lock recovery */
|
|
540
|
-
}
|
|
541
|
-
releaseLock(resolvedLockPath);
|
|
542
|
-
if (tryAcquireLockSync(resolvedLockPath, lockPayload()))
|
|
543
|
-
return "acquired";
|
|
544
|
-
// Lost the race to another run that grabbed the freed stale lock.
|
|
545
|
-
if (options.skipIfLocked) {
|
|
546
|
-
warn("[improve] another run acquired the lock during stale recovery; skipping (--skip-if-locked)");
|
|
547
|
-
return "skipped";
|
|
548
|
-
}
|
|
549
|
-
throw new ConfigError(`akm improve is already running. Delete ${resolvedLockPath} to force.`, "INVALID_CONFIG_FILE");
|
|
550
|
-
}
|
|
551
|
-
// Lock is held by a live run within the staleness window.
|
|
552
|
-
if (options.skipIfLocked) {
|
|
553
|
-
warn(`[improve] another improve run holds the lock (PID ${lock?.pid}, started ${lock?.startedAt}); skipping (--skip-if-locked)`);
|
|
554
|
-
return "skipped";
|
|
555
|
-
}
|
|
556
|
-
throw new ConfigError(`akm improve is already running (PID ${lock?.pid}, started ${lock?.startedAt}). Delete ${resolvedLockPath} to force.`, "INVALID_CONFIG_FILE");
|
|
557
|
-
};
|
|
558
|
-
// Phase 4 lock-leak guard (§7 ordering hazard): hoisting `improve.lock` above
|
|
559
|
-
// the pre-index region (so the triage pre-pass runs under it) means the lock is
|
|
560
|
-
// held while ensureIndex / collectEligibleRefs / contradiction-detection /
|
|
561
|
-
// memory-cleanup analysis run — but the main protecting `try { … } finally {
|
|
562
|
-
// unlinkSync(resolvedLockPath) }` does not begin until after them. A throw in
|
|
563
|
-
// any of those steps would leak the lock. We close that window by wrapping the
|
|
564
|
-
// whole region in a try whose catch releases the lock (when held) and
|
|
565
|
-
// re-throws. The values this region computes are declared in the outer scope so
|
|
566
|
-
// they remain visible to the main run below. The dry-run path never sets
|
|
567
|
-
// `lockAcquired`, so its early return releases nothing.
|
|
568
|
-
let lockAcquired = false;
|
|
569
|
-
const releaseLockOnError = () => {
|
|
570
|
-
if (!lockAcquired)
|
|
571
|
-
return;
|
|
572
|
-
try {
|
|
573
|
-
fs.unlinkSync(resolvedLockPath);
|
|
574
|
-
}
|
|
575
|
-
catch {
|
|
576
|
-
// best-effort release on the error path
|
|
577
|
-
}
|
|
578
|
-
lockAcquired = false;
|
|
579
|
-
};
|
|
580
|
-
// Signal-safe lock release. The SIGTERM/SIGINT/SIGHUP handler in improve-cli.ts
|
|
581
|
-
// calls `process.exit()`, which does NOT run the `finally` below that owns lock
|
|
582
|
-
// release — so a cron-timeout SIGTERM leaked `improve.lock` every run.
|
|
583
|
-
// `process.exit()` DOES fire `'exit'` listeners, so we release the lock from
|
|
584
|
-
// one. `releaseLockIfOwned` only unlinks a lock still owned by this PID, so it
|
|
585
|
-
// is safe even if a later run re-acquired it. The listener is removed in the
|
|
586
|
-
// `finally` so the normal path stays single-release and repeated in-process
|
|
587
|
-
// `akmImprove` calls (tests) do not accumulate listeners.
|
|
588
|
-
const releaseLockOnExit = () => {
|
|
589
|
-
releaseLockIfOwned(resolvedLockPath, process.pid);
|
|
590
|
-
};
|
|
609
|
+
// #607 Lock decomposition: three per-process locks replace the single
|
|
610
|
+
// `improve.lock`. Each process acquires only the lock(s) it needs, so
|
|
611
|
+
// quick-shredder consolidate can run alongside daily reflect+distill.
|
|
612
|
+
//
|
|
613
|
+
// consolidate.lock — protects consolidate + memoryInference + graphExtraction (index.db writers)
|
|
614
|
+
// reflect-distill.lock — protects reflect + distill (state.db proposal writers)
|
|
615
|
+
// triage.lock — protects triage pre-pass (state.db proposal promotions)
|
|
616
|
+
//
|
|
617
|
+
// Lock base directory — same `.akm/` under the primary stash dir.
|
|
618
|
+
const lockBaseDir = primaryStashDir ? path.join(primaryStashDir, ".akm") : path.join(options.stashDir ?? ".", ".akm");
|
|
591
619
|
const preEnsureCleanupWarnings = [];
|
|
592
620
|
let plannedRefs;
|
|
593
621
|
let memorySummary;
|
|
@@ -596,65 +624,59 @@ export async function akmImprove(options = {}) {
|
|
|
596
624
|
let guidance;
|
|
597
625
|
let triageDrain;
|
|
598
626
|
try {
|
|
599
|
-
//
|
|
600
|
-
// The dry-run branch
|
|
601
|
-
//
|
|
627
|
+
// #607: Per-process lock acquisition. Each process acquires only the lock(s)
|
|
628
|
+
// it needs. The dry-run branch produces plannedRefs/memorySummary WITHOUT any
|
|
629
|
+
// locks (decision: dry-run never mutates the queue).
|
|
602
630
|
if (!options.dryRun) {
|
|
603
|
-
if (acquireLock() === "skipped") {
|
|
604
|
-
// Another improve holds the lock and the caller asked to skip rather
|
|
605
|
-
// than fail. Return a clean no-op result (exit 0) before any index/DB
|
|
606
|
-
// work — never registered the exit listener, never set lockAcquired,
|
|
607
|
-
// so we release nothing belonging to the run that owns the lock.
|
|
608
|
-
return {
|
|
609
|
-
schemaVersion: 1,
|
|
610
|
-
ok: true,
|
|
611
|
-
scope,
|
|
612
|
-
dryRun: false,
|
|
613
|
-
skipped: { reason: "lock-held" },
|
|
614
|
-
memorySummary: { eligible: 0, derived: 0 },
|
|
615
|
-
plannedRefs: [],
|
|
616
|
-
};
|
|
617
|
-
}
|
|
618
|
-
lockAcquired = true;
|
|
619
631
|
// Backstop release on process.exit() (signal handler / budget watchdog),
|
|
620
632
|
// which skips the finally below. Removed in that finally on the normal path.
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
//
|
|
628
|
-
//
|
|
629
|
-
//
|
|
633
|
+
const releaseAllOnExit = () => {
|
|
634
|
+
for (const p of heldProcessLocks) {
|
|
635
|
+
releaseLockIfOwned(p, process.pid);
|
|
636
|
+
}
|
|
637
|
+
};
|
|
638
|
+
process.on("exit", releaseAllOnExit);
|
|
639
|
+
// #607 triage pre-pass: acquire triage.lock, drain the standing pending
|
|
640
|
+
// backlog BEFORE ensureIndex so improve generates fresh proposals against
|
|
641
|
+
// a cleared queue (no `duplicate_pending` collisions) and ensureIndex
|
|
642
|
+
// absorbs triage's promotions for free. Release immediately after —
|
|
643
|
+
// triage.lock is not needed again until the next improve run.
|
|
630
644
|
if (primaryStashDir && resolveProcessEnabled("triage", improveProfile)) {
|
|
631
645
|
if (scope.mode === "ref") {
|
|
632
646
|
warn("[improve] triage pre-pass skipped (single-ref scope never drains the whole queue)");
|
|
633
647
|
}
|
|
634
648
|
else {
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
const maxAccepts = triageConfig?.maxAcceptsPerRun ?? 25;
|
|
640
|
-
const judgment = triageConfig?.judgment
|
|
641
|
-
? resolveTriageJudgmentRunner(triageConfig.judgment, _earlyConfig)
|
|
642
|
-
: null;
|
|
643
|
-
triageDrain = await drainProposalsFn({
|
|
644
|
-
stashDir: primaryStashDir,
|
|
645
|
-
policy,
|
|
646
|
-
applyMode,
|
|
647
|
-
maxAccepts,
|
|
648
|
-
dryRun: false,
|
|
649
|
-
// No fresh ids exist yet — triage runs before improve generates any.
|
|
650
|
-
excludeIds: new Set(),
|
|
651
|
-
...(triageConfig?.maxDiffLines !== undefined ? { maxDiffLines: triageConfig.maxDiffLines } : {}),
|
|
652
|
-
judgment,
|
|
653
|
-
});
|
|
649
|
+
const triageLPath = processLockPath(lockBaseDir, "triage");
|
|
650
|
+
const triageResult = tryAcquireProcessLock(triageLPath, PROCESS_LOCK_DEFS.triage.staleAfterMs, options.skipIfLocked, "triage");
|
|
651
|
+
if (triageResult === "skipped") {
|
|
652
|
+
triageDrain = undefined;
|
|
654
653
|
}
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
654
|
+
else {
|
|
655
|
+
try {
|
|
656
|
+
const triageConfig = improveProfile.processes?.triage;
|
|
657
|
+
const policy = resolveDrainPolicy(triageConfig?.policy);
|
|
658
|
+
const applyMode = triageConfig?.applyMode ?? "queue";
|
|
659
|
+
const maxAccepts = triageConfig?.maxAcceptsPerRun ?? 25;
|
|
660
|
+
const judgment = triageConfig?.judgment
|
|
661
|
+
? resolveTriageJudgmentRunner(triageConfig.judgment, _earlyConfig)
|
|
662
|
+
: null;
|
|
663
|
+
triageDrain = await drainProposalsFn({
|
|
664
|
+
stashDir: primaryStashDir,
|
|
665
|
+
policy,
|
|
666
|
+
applyMode,
|
|
667
|
+
maxAccepts,
|
|
668
|
+
dryRun: false,
|
|
669
|
+
excludeIds: new Set(),
|
|
670
|
+
...(triageConfig?.maxDiffLines !== undefined ? { maxDiffLines: triageConfig.maxDiffLines } : {}),
|
|
671
|
+
judgment,
|
|
672
|
+
});
|
|
673
|
+
}
|
|
674
|
+
catch (err) {
|
|
675
|
+
warn(`[improve] triage pre-pass failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
676
|
+
}
|
|
677
|
+
finally {
|
|
678
|
+
releaseProcessLock(triageLPath);
|
|
679
|
+
}
|
|
658
680
|
}
|
|
659
681
|
}
|
|
660
682
|
}
|
|
@@ -686,7 +708,7 @@ export async function akmImprove(options = {}) {
|
|
|
686
708
|
// best-effort; leave preEnsureEntryCount undefined
|
|
687
709
|
}
|
|
688
710
|
try {
|
|
689
|
-
await ensureIndexFn(primaryStashDir);
|
|
711
|
+
await ensureIndexFn(primaryStashDir, { mode: "blocking" });
|
|
690
712
|
}
|
|
691
713
|
catch (err) {
|
|
692
714
|
preEnsureCleanupWarnings.push(`ensureIndex failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
@@ -754,17 +776,14 @@ export async function akmImprove(options = {}) {
|
|
|
754
776
|
}
|
|
755
777
|
}
|
|
756
778
|
catch (err) {
|
|
757
|
-
|
|
779
|
+
releaseAllProcessLocks();
|
|
758
780
|
throw err;
|
|
759
781
|
}
|
|
760
|
-
//
|
|
761
|
-
//
|
|
762
|
-
//
|
|
763
|
-
//
|
|
764
|
-
//
|
|
765
|
-
// any of them used to leak the lock (blocking the next improve up to 4h);
|
|
766
|
-
// now the finally releases it exactly once. The dry-run path already returned
|
|
767
|
-
// above without acquiring the lock, so it never reaches this finally; the
|
|
782
|
+
// #607: per-process locks are acquired/released around each stage below.
|
|
783
|
+
// The triage pre-pass already ran under triage.lock (released). The
|
|
784
|
+
// preparation stage runs under consolidate.lock, the loop stage under
|
|
785
|
+
// reflect-distill.lock, and the post-loop stage under consolidate.lock again.
|
|
786
|
+
// Each stage acquires its lock just before starting and releases in finally.
|
|
768
787
|
// best-effort `unlinkSync` is a no-op when no lock file exists.
|
|
769
788
|
const startMs = Date.now();
|
|
770
789
|
const budgetMs = options.timeoutMs ?? 2 * 60 * 60 * 1000; // default 2 hours
|
|
@@ -828,6 +847,10 @@ export async function akmImprove(options = {}) {
|
|
|
828
847
|
},
|
|
829
848
|
}, eventsCtx);
|
|
830
849
|
}
|
|
850
|
+
// #607: acquire consolidate.lock for the preparation stage (consolidate,
|
|
851
|
+
// ensureIndex, extract all write index.db). Released immediately after.
|
|
852
|
+
const consolidateLPath = processLockPath(lockBaseDir, "consolidate");
|
|
853
|
+
const consolidatePrepAcquired = tryAcquireProcessLock(consolidateLPath, PROCESS_LOCK_DEFS.consolidate.staleAfterMs, options.skipIfLocked, "consolidate") === "acquired";
|
|
831
854
|
const preparation = await runImprovePreparationStage({
|
|
832
855
|
scope,
|
|
833
856
|
options,
|
|
@@ -842,6 +865,8 @@ export async function akmImprove(options = {}) {
|
|
|
842
865
|
initialCleanupWarnings: preEnsureCleanupWarnings,
|
|
843
866
|
improveProfile,
|
|
844
867
|
});
|
|
868
|
+
if (consolidatePrepAcquired)
|
|
869
|
+
releaseProcessLock(consolidateLPath);
|
|
845
870
|
// D6: pre-load all proposal_rejected events from the last 30 days once,
|
|
846
871
|
// so the per-asset loop can use a Map lookup instead of N DB round trips.
|
|
847
872
|
const REJECTED_PROPOSAL_WINDOW_MS = daysToMs(30);
|
|
@@ -853,6 +878,10 @@ export async function akmImprove(options = {}) {
|
|
|
853
878
|
rejectedProposalsByRef.set(e.ref, e);
|
|
854
879
|
}
|
|
855
880
|
}
|
|
881
|
+
// #607: acquire reflect-distill.lock for the loop stage (reflect + distill
|
|
882
|
+
// both write proposals to state.db). Released immediately after.
|
|
883
|
+
const reflectDistillLPath = processLockPath(lockBaseDir, "reflectDistill");
|
|
884
|
+
const reflectDistillAcquired = tryAcquireProcessLock(reflectDistillLPath, PROCESS_LOCK_DEFS.reflectDistill.staleAfterMs, options.skipIfLocked, "reflect-distill") === "acquired";
|
|
856
885
|
const { reflectsWithErrorContext, memoryRefsForInference, gateAutoAcceptedCount: loopGateCount, gateAutoAcceptFailedCount: loopGateFailedCount, } = await runImproveLoopStage({
|
|
857
886
|
scope,
|
|
858
887
|
options,
|
|
@@ -872,9 +901,15 @@ export async function akmImprove(options = {}) {
|
|
|
872
901
|
eventsCtx,
|
|
873
902
|
improveProfile,
|
|
874
903
|
});
|
|
904
|
+
if (reflectDistillAcquired)
|
|
905
|
+
releaseProcessLock(reflectDistillLPath);
|
|
875
906
|
// #551: consolidation now runs in the preparation stage (before extract);
|
|
876
907
|
// its result and run-flag are read from `preparation`, not the post-loop.
|
|
877
908
|
const consolidation = preparation.consolidation;
|
|
909
|
+
// #607: acquire consolidate.lock for the post-loop stage (memoryInference +
|
|
910
|
+
// graphExtraction both write index.db). Released immediately after.
|
|
911
|
+
const consolidatePostLPath = processLockPath(lockBaseDir, "consolidate");
|
|
912
|
+
const consolidatePostAcquired = tryAcquireProcessLock(consolidatePostLPath, PROCESS_LOCK_DEFS.consolidate.staleAfterMs, options.skipIfLocked, "consolidate") === "acquired";
|
|
878
913
|
const { allWarnings, deadUrls, memoryInference, graphExtraction, stalenessDetection, maintenanceActions, memoryInferenceDurationMs, graphExtractionDurationMs, orphansPurged, proposalsExpired, gateAutoAcceptedCount: postLoopGateCount, gateAutoAcceptFailedCount: postLoopGateFailedCount, } = await runImprovePostLoopStage({
|
|
879
914
|
scope,
|
|
880
915
|
options,
|
|
@@ -885,11 +920,12 @@ export async function akmImprove(options = {}) {
|
|
|
885
920
|
memoryRefsForInference,
|
|
886
921
|
reindexFn,
|
|
887
922
|
eventsCtx,
|
|
888
|
-
// O-1 (#364): propagate wall-clock budget signal to post-loop maintenance.
|
|
889
923
|
budgetSignal: budgetAbortController.signal,
|
|
890
924
|
improveProfile,
|
|
891
925
|
consolidationRan: preparation.consolidationRan,
|
|
892
926
|
});
|
|
927
|
+
if (consolidatePostAcquired)
|
|
928
|
+
releaseProcessLock(consolidatePostLPath);
|
|
893
929
|
const finalActions = maintenanceActions && maintenanceActions.length > 0
|
|
894
930
|
? [...preparation.actions, ...maintenanceActions]
|
|
895
931
|
: preparation.actions;
|
|
@@ -974,6 +1010,7 @@ export async function akmImprove(options = {}) {
|
|
|
974
1010
|
},
|
|
975
1011
|
}
|
|
976
1012
|
: {}),
|
|
1013
|
+
...(preparation.proactiveMaintenance ? { proactiveMaintenance: preparation.proactiveMaintenance } : {}),
|
|
977
1014
|
...(options.runId !== undefined ? { runId: options.runId } : {}),
|
|
978
1015
|
};
|
|
979
1016
|
if (!result.dryRun)
|
|
@@ -1056,15 +1093,12 @@ export async function akmImprove(options = {}) {
|
|
|
1056
1093
|
// O-1 (#364): Clear the budget abort timer so it does not keep the event
|
|
1057
1094
|
// loop alive after the run completes.
|
|
1058
1095
|
clearBudgetTimer();
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
// The normal path released the lock above; drop the process.exit backstop so
|
|
1066
|
-
// it does not fire later (or accumulate across repeated in-process calls).
|
|
1067
|
-
process.removeListener("exit", releaseLockOnExit);
|
|
1096
|
+
// #607: release any per-process locks still held (backstop for error paths;
|
|
1097
|
+
// the normal path already released each lock after its stage completed).
|
|
1098
|
+
releaseAllProcessLocks();
|
|
1099
|
+
// Drop the process.exit backstop so it does not fire later (or accumulate
|
|
1100
|
+
// across repeated in-process calls).
|
|
1101
|
+
process.removeAllListeners("exit");
|
|
1068
1102
|
// I1: close the long-lived state.db connection opened at the top of the run.
|
|
1069
1103
|
try {
|
|
1070
1104
|
eventsDb?.close();
|
|
@@ -1177,6 +1211,11 @@ function emitImproveCompletedEvent(result, durations, eventsCtx) {
|
|
|
1177
1211
|
memoryInferenceDurationMs: durations.memoryInferenceDurationMs,
|
|
1178
1212
|
graphExtractionExtractedFiles: result.graphExtraction?.quality.extractedFiles ?? 0,
|
|
1179
1213
|
graphExtractionDurationMs: durations.graphExtractionDurationMs,
|
|
1214
|
+
// Layer-2 proactive-maintenance coverage (0 when the process is disabled
|
|
1215
|
+
// or the run was ref-scoped) so a scheduled sweep's reach is trackable.
|
|
1216
|
+
proactiveSelected: result.proactiveMaintenance?.selected ?? 0,
|
|
1217
|
+
proactiveDueTotal: result.proactiveMaintenance?.dueTotal ?? 0,
|
|
1218
|
+
proactiveNeverReflected: result.proactiveMaintenance?.neverReflected ?? 0,
|
|
1180
1219
|
// New metrics for tuning the improve loop.
|
|
1181
1220
|
...(durations.totalDurationMs !== undefined ? { durationMs: durations.totalDurationMs } : {}),
|
|
1182
1221
|
...(durations.warningCount !== undefined ? { warningCount: durations.warningCount } : {}),
|
|
@@ -1422,7 +1461,14 @@ async function runConsolidationPass(args) {
|
|
|
1422
1461
|
appendEvent({
|
|
1423
1462
|
eventType: "consolidate_completed",
|
|
1424
1463
|
ref: "memory:_consolidation",
|
|
1425
|
-
metadata: {
|
|
1464
|
+
metadata: {
|
|
1465
|
+
processed: consolidation.processed,
|
|
1466
|
+
merged: consolidation.merged,
|
|
1467
|
+
deleted: consolidation.deleted,
|
|
1468
|
+
contradicted: consolidation.contradicted,
|
|
1469
|
+
failedChunks: consolidation.failedChunks ?? 0,
|
|
1470
|
+
durationMs: consolidation.durationMs,
|
|
1471
|
+
},
|
|
1426
1472
|
}, eventsCtx);
|
|
1427
1473
|
}
|
|
1428
1474
|
}
|
|
@@ -1793,10 +1839,19 @@ async function runImprovePreparationStage(args) {
|
|
|
1793
1839
|
// refs that fail the distill signal-delta gate).
|
|
1794
1840
|
// distillOnlyRefs — reflect blocked but distill signal-delta passes
|
|
1795
1841
|
// AND ref is a distill candidate.
|
|
1796
|
-
//
|
|
1797
|
-
//
|
|
1842
|
+
// noFeedbackPool — neither signal-delta gate passes *and* the ref has
|
|
1843
|
+
// no recent feedback signal at all. These are NOT
|
|
1844
|
+
// skipped here: they are handed to the high-retrieval
|
|
1845
|
+
// fallback (P0-A) below so frequently-retrieved but
|
|
1846
|
+
// never-rated assets can still be improved. Only refs
|
|
1847
|
+
// that P0-A declines are ultimately fully skipped.
|
|
1848
|
+
// fullySkippedCount — has stale feedback but no signal delta → genuine
|
|
1849
|
+
// skip (counted, aggregated event emitted post-loop),
|
|
1850
|
+
// excluded from sort.
|
|
1798
1851
|
const eligibleRefs = [];
|
|
1799
1852
|
const distillOnlyRefs = [];
|
|
1853
|
+
// Zero-(recent-)feedback refs deferred to the P0-A high-retrieval fallback.
|
|
1854
|
+
const noFeedbackPool = [];
|
|
1800
1855
|
let fullySkippedCount = 0;
|
|
1801
1856
|
// O-2 (#365): explicit --scope <ref> bypasses every gate (user intent wins).
|
|
1802
1857
|
const scopeRefBypass = scope.mode === "ref";
|
|
@@ -1834,22 +1889,59 @@ async function runImprovePreparationStage(args) {
|
|
|
1834
1889
|
// Reflect blocked but distill passes → distill-only bucket.
|
|
1835
1890
|
distillOnlyRefs.push(r);
|
|
1836
1891
|
}
|
|
1892
|
+
else if (!latestFeedbackTs.has(r.ref)) {
|
|
1893
|
+
// Neither signal-delta gate passes AND there is no recent feedback signal
|
|
1894
|
+
// at all. Rather than skip outright, defer to the high-retrieval fallback
|
|
1895
|
+
// (P0-A) below: a never-rated-but-frequently-retrieved asset is exactly
|
|
1896
|
+
// what that path is meant to rescue. Refs P0-A declines are skipped there.
|
|
1897
|
+
noFeedbackPool.push(r);
|
|
1898
|
+
}
|
|
1837
1899
|
else {
|
|
1838
|
-
//
|
|
1900
|
+
// Has feedback on record but no signal delta since the last proposal —
|
|
1901
|
+
// genuinely fully skipped. Counted here; a single aggregated
|
|
1902
|
+
// improve_skipped event is emitted after the loop (mirrors
|
|
1903
|
+
// profile_filtered_all_passes) instead of one event per ref.
|
|
1839
1904
|
fullySkippedCount++;
|
|
1840
1905
|
actions.push({
|
|
1841
1906
|
ref: r.ref,
|
|
1842
1907
|
mode: "distill-skipped",
|
|
1843
1908
|
result: { ok: true, reason: "no new signal since last proposal" },
|
|
1844
1909
|
});
|
|
1845
|
-
appendEvent({ eventType: "improve_skipped", ref: r.ref, metadata: { reason: "no_new_signal" } }, eventsCtx);
|
|
1846
1910
|
}
|
|
1847
1911
|
}
|
|
1912
|
+
// Emit ONE aggregated skip event for the fully-skipped bucket rather than one
|
|
1913
|
+
// improve_skipped event per ref (#592 pattern, mirrors
|
|
1914
|
+
// profile_filtered_all_passes above). The per-ref loop previously produced
|
|
1915
|
+
// ~11K state.db writes per run on a large stash, the dominant contributor to
|
|
1916
|
+
// 900 s timeouts. The in-memory `actions` log keeps the per-ref detail for the
|
|
1917
|
+
// run summary; no downstream consumer needs a per-ref DB audit trail (health's
|
|
1918
|
+
// skip histogram reads the `no_new_signal` counter from the count field).
|
|
1919
|
+
if (fullySkippedCount > 0) {
|
|
1920
|
+
appendEvent({
|
|
1921
|
+
eventType: "improve_skipped",
|
|
1922
|
+
ref: undefined,
|
|
1923
|
+
metadata: {
|
|
1924
|
+
reason: "no_new_signal",
|
|
1925
|
+
count: fullySkippedCount,
|
|
1926
|
+
},
|
|
1927
|
+
}, eventsCtx);
|
|
1928
|
+
}
|
|
1848
1929
|
// ── Phase 4: signal/feedback/utility/sort on the reduced set ──────────────
|
|
1849
|
-
// Everything from here works
|
|
1850
|
-
//
|
|
1851
|
-
//
|
|
1930
|
+
// Everything from here works on (eligibleRefs ∪ distillOnlyRefs) plus the
|
|
1931
|
+
// deferred noFeedbackPool that may be rescued by the high-retrieval fallback
|
|
1932
|
+
// (P0-A). The fully-skipped bucket has already been routed and its aggregated
|
|
1933
|
+
// event emitted; we deliberately avoid spending DB/CPU on refs that the
|
|
1934
|
+
// signal-delta gate rejected with feedback already on record.
|
|
1852
1935
|
const processableRefs = [...eligibleRefs, ...distillOnlyRefs];
|
|
1936
|
+
// Refs eligible for the high-retrieval fallback (P0-A): the signal-delta
|
|
1937
|
+
// partition above could not place these in a reflect/distill bucket, but they
|
|
1938
|
+
// may still qualify if they have been retrieved often enough. Two disjoint
|
|
1939
|
+
// sources feed this set:
|
|
1940
|
+
// 1. noFeedbackPool — refs with no recent feedback that the partition loop
|
|
1941
|
+
// deliberately deferred here (otherwise they would never reach P0-A).
|
|
1942
|
+
// 2. processableRefs entries that turn out to carry no recent feedback
|
|
1943
|
+
// *signal* once feedbackSummary is computed below.
|
|
1944
|
+
// (1) is added here; (2) is folded in after feedbackSummary is built.
|
|
1853
1945
|
// Gap 6: only surface feedback signals from the last 30 days so that
|
|
1854
1946
|
// ancient one-off feedback events don't permanently lock an asset into
|
|
1855
1947
|
// every improve run. Assets with only stale signals fall through to the
|
|
@@ -1859,8 +1951,12 @@ async function runImprovePreparationStage(args) {
|
|
|
1859
1951
|
// Pre-compute feedback summary per ref in a single pass so we don't issue
|
|
1860
1952
|
// two readEvents({type:"feedback", ref}) per asset (one for signal filtering,
|
|
1861
1953
|
// one for ratio computation).
|
|
1954
|
+
// Cover processableRefs *and* the deferred noFeedbackPool so utility/feedback
|
|
1955
|
+
// ratios are available for any noFeedbackPool ref that P0-A rescues below.
|
|
1862
1956
|
const feedbackSummary = new Map();
|
|
1863
|
-
for (const candidate of processableRefs) {
|
|
1957
|
+
for (const candidate of [...processableRefs, ...noFeedbackPool]) {
|
|
1958
|
+
if (feedbackSummary.has(candidate.ref))
|
|
1959
|
+
continue;
|
|
1864
1960
|
const { events } = readEvents({ type: "feedback", ref: candidate.ref });
|
|
1865
1961
|
let hasSignal = false;
|
|
1866
1962
|
let positive = 0;
|
|
@@ -1883,8 +1979,21 @@ async function runImprovePreparationStage(args) {
|
|
|
1883
1979
|
// P0-A: also surface zero-feedback assets that have been retrieved many times.
|
|
1884
1980
|
const RETRIEVAL_COUNT_THRESHOLD = options.minRetrievalCount ?? 5;
|
|
1885
1981
|
const signalBearingSet = new Set(signalFiltered.map((r) => r.ref));
|
|
1886
|
-
|
|
1982
|
+
// Zero-feedback candidates for P0-A: processableRefs without a recent signal,
|
|
1983
|
+
// plus the deferred noFeedbackPool. Dedupe by ref (the two sources are
|
|
1984
|
+
// disjoint by construction, but guard against overlap defensively).
|
|
1985
|
+
const noFeedbackSeen = new Set();
|
|
1986
|
+
const noFeedbackCandidates = [];
|
|
1987
|
+
for (const r of [...processableRefs.filter((r) => !signalBearingSet.has(r.ref)), ...noFeedbackPool]) {
|
|
1988
|
+
if (noFeedbackSeen.has(r.ref))
|
|
1989
|
+
continue;
|
|
1990
|
+
noFeedbackSeen.add(r.ref);
|
|
1991
|
+
noFeedbackCandidates.push(r);
|
|
1992
|
+
}
|
|
1887
1993
|
let highRetrievalRefs = [];
|
|
1994
|
+
// Retrieval counts for the zero-feedback pool, hoisted so the Layer-2
|
|
1995
|
+
// proactive-maintenance selector below can reuse them without a second DB pass.
|
|
1996
|
+
let retrievalCounts = new Map();
|
|
1888
1997
|
let dbForRetrieval;
|
|
1889
1998
|
try {
|
|
1890
1999
|
dbForRetrieval = openExistingDatabase();
|
|
@@ -1892,15 +2001,21 @@ async function runImprovePreparationStage(args) {
|
|
|
1892
2001
|
if (showEventCount === 0) {
|
|
1893
2002
|
warn("Warning: show events not yet in usage_events — zero-feedback fallback will match only search-retrieved assets.");
|
|
1894
2003
|
}
|
|
1895
|
-
|
|
2004
|
+
retrievalCounts = getRetrievalCounts(dbForRetrieval, noFeedbackCandidates.map((r) => r.ref));
|
|
1896
2005
|
// High-retrieval signal-delta (simplified rule, 0.8.0): a no-feedback
|
|
1897
|
-
// ref qualifies exactly once — when
|
|
1898
|
-
//
|
|
1899
|
-
//
|
|
1900
|
-
//
|
|
1901
|
-
//
|
|
1902
|
-
//
|
|
1903
|
-
|
|
2006
|
+
// ref qualifies exactly once — when it has actually been retrieved
|
|
2007
|
+
// (retrievalCount ≥ 1) AND retrievalCount ≥ threshold AND no prior reflect
|
|
2008
|
+
// proposal exists for it. Once a reflect proposal is on record, subsequent
|
|
2009
|
+
// re-eligibility requires explicit feedback (which flows through the normal
|
|
2010
|
+
// signal-delta gate above). The explicit `> 0` guard keeps a threshold of 0
|
|
2011
|
+
// from rescuing genuinely never-retrieved assets — the fallback is for
|
|
2012
|
+
// *retrieved* assets, not silent ones. Tracking growth in retrieval count
|
|
2013
|
+
// would require persisting the count in proposal metadata; deferred to a
|
|
2014
|
+
// follow-up.
|
|
2015
|
+
highRetrievalRefs = noFeedbackCandidates.filter((r) => {
|
|
2016
|
+
const count = retrievalCounts.get(r.ref) ?? 0;
|
|
2017
|
+
return count > 0 && count >= RETRIEVAL_COUNT_THRESHOLD && !lastReflectProposalTs.has(r.ref);
|
|
2018
|
+
});
|
|
1904
2019
|
}
|
|
1905
2020
|
catch (err) {
|
|
1906
2021
|
rethrowIfTestIsolationError(err);
|
|
@@ -1910,6 +2025,91 @@ async function runImprovePreparationStage(args) {
|
|
|
1910
2025
|
if (dbForRetrieval)
|
|
1911
2026
|
closeDatabase(dbForRetrieval);
|
|
1912
2027
|
}
|
|
2028
|
+
// ── Layer 2: PROACTIVE MAINTENANCE SELECTOR (third eligibility source) ─────
|
|
2029
|
+
// The signal-delta gate and P0-A only surface assets with fresh feedback or a
|
|
2030
|
+
// raw-retrieval spike. Neither revisits a stable, high-value asset on a
|
|
2031
|
+
// schedule, so on a quiet stash useful assets drift stale and are never
|
|
2032
|
+
// refreshed. When the `proactiveMaintenance` process is enabled (DEFAULT OFF)
|
|
2033
|
+
// and the run is whole-stash / type scope, this selector ranks the eligible
|
|
2034
|
+
// population by a composite maintenance priority, gates on staleness ("due"),
|
|
2035
|
+
// bounds to top-N, and folds the winners into the SAME candidate set the other
|
|
2036
|
+
// two sources feed — so they flow through the existing #580 empty-diff /
|
|
2037
|
+
// cosmetic suppression and additive-distill gates. It adds no new mutation
|
|
2038
|
+
// logic of its own. The due gate doubles as the rotation cooldown: a freshly
|
|
2039
|
+
// reflected asset is excluded until it ages back past `dueDays`, so successive
|
|
2040
|
+
// runs rotate through the due pool rather than re-selecting the same heads.
|
|
2041
|
+
let proactiveRefs = [];
|
|
2042
|
+
let proactiveMaintenanceSummary;
|
|
2043
|
+
const proactiveEnabled = scope.mode !== "ref" && resolveProcessEnabled("proactiveMaintenance", improveProfile);
|
|
2044
|
+
if (proactiveEnabled) {
|
|
2045
|
+
const pmCfg = improveProfile.processes?.proactiveMaintenance;
|
|
2046
|
+
const dueDays = pmCfg?.dueDays ?? DEFAULT_DUE_DAYS;
|
|
2047
|
+
const maxPerRun = pmCfg?.maxPerRun ?? pmCfg?.limit ?? DEFAULT_MAX_PER_RUN;
|
|
2048
|
+
const importanceWeights = pmCfg?.importanceWeights;
|
|
2049
|
+
// Candidate population: the zero-feedback / non-signal pool — exactly the
|
|
2050
|
+
// assets the other two sources would NOT pick this run. Exclude any P0-A
|
|
2051
|
+
// rescued this run so we never double-select the same ref.
|
|
2052
|
+
const alreadySelected = new Set(highRetrievalRefs.map((r) => r.ref));
|
|
2053
|
+
const pmCandidates = noFeedbackCandidates.filter((r) => !alreadySelected.has(r.ref));
|
|
2054
|
+
const selection = selectProactiveMaintenanceRefs({
|
|
2055
|
+
candidates: pmCandidates,
|
|
2056
|
+
lastReflectTs: lastReflectProposalTs,
|
|
2057
|
+
lastDistillTs: lastDistillProposalTs,
|
|
2058
|
+
retrievalCounts,
|
|
2059
|
+
sizeBytesOf: (r) => {
|
|
2060
|
+
const fp = r.filePath;
|
|
2061
|
+
if (!fp)
|
|
2062
|
+
return undefined;
|
|
2063
|
+
try {
|
|
2064
|
+
return fs.statSync(fp).size;
|
|
2065
|
+
}
|
|
2066
|
+
catch {
|
|
2067
|
+
return undefined;
|
|
2068
|
+
}
|
|
2069
|
+
},
|
|
2070
|
+
dueDays,
|
|
2071
|
+
maxPerRun,
|
|
2072
|
+
importanceWeights,
|
|
2073
|
+
});
|
|
2074
|
+
proactiveRefs = selection.selected;
|
|
2075
|
+
proactiveMaintenanceSummary = {
|
|
2076
|
+
selected: selection.selected.length,
|
|
2077
|
+
dueTotal: selection.dueTotal,
|
|
2078
|
+
neverReflected: selection.neverReflected,
|
|
2079
|
+
};
|
|
2080
|
+
// Aggregated observability event (never per-ref — avoids the event flood the
|
|
2081
|
+
// Layer-1 work eliminated). Mirrors the `no_new_signal` aggregation pattern.
|
|
2082
|
+
appendEvent({
|
|
2083
|
+
eventType: "proactive_selected",
|
|
2084
|
+
ref: undefined,
|
|
2085
|
+
metadata: {
|
|
2086
|
+
count: selection.selected.length,
|
|
2087
|
+
dueTotal: selection.dueTotal,
|
|
2088
|
+
neverReflected: selection.neverReflected,
|
|
2089
|
+
},
|
|
2090
|
+
}, eventsCtx);
|
|
2091
|
+
if (selection.selected.length > 0) {
|
|
2092
|
+
info(`[improve] proactive maintenance selected ${selection.selected.length}/${selection.dueTotal} due refs ` +
|
|
2093
|
+
`(${selection.neverReflected} never reflected, dueDays=${dueDays}, maxPerRun=${maxPerRun})`);
|
|
2094
|
+
}
|
|
2095
|
+
}
|
|
2096
|
+
// Record an in-memory skip action for every zero-feedback ref that the
|
|
2097
|
+
// partition loop deferred to P0-A but P0-A then declined (retrievalCount below
|
|
2098
|
+
// threshold, or a prior reflect proposal already on record). These never make
|
|
2099
|
+
// it into mergedRefs, so without this they would silently vanish from the run
|
|
2100
|
+
// summary. No DB event is written here — these refs carry no signal at all, so
|
|
2101
|
+
// there is nothing for the skip histogram to aggregate; the action log alone
|
|
2102
|
+
// preserves the per-ref audit trail (mirrors the fully-skipped action above).
|
|
2103
|
+
const rescuedSet = new Set([...highRetrievalRefs, ...proactiveRefs].map((r) => r.ref));
|
|
2104
|
+
for (const r of noFeedbackPool) {
|
|
2105
|
+
if (rescuedSet.has(r.ref))
|
|
2106
|
+
continue;
|
|
2107
|
+
actions.push({
|
|
2108
|
+
ref: r.ref,
|
|
2109
|
+
mode: "distill-skipped",
|
|
2110
|
+
result: { ok: true, reason: "no new signal since last proposal" },
|
|
2111
|
+
});
|
|
2112
|
+
}
|
|
1913
2113
|
// If the user explicitly scoped to a single ref, always act on it —
|
|
1914
2114
|
// skip the signal/retrieval filter entirely. The filter exists to avoid
|
|
1915
2115
|
// noisy "improve everything" runs; it should not gate an intentional
|
|
@@ -1919,8 +2119,48 @@ async function runImprovePreparationStage(args) {
|
|
|
1919
2119
|
// or sufficient retrievals). A stash with no signals has 0 eligible refs —
|
|
1920
2120
|
// usage is the gate. Run `akm feedback <ref> --positive` or retrieve assets
|
|
1921
2121
|
// to bring them into the eligible pool.
|
|
1922
|
-
|
|
2122
|
+
// Layer-2 proactive refs join the eligible set alongside feedback-signal and
|
|
2123
|
+
// high-retrieval (P0-A) refs. The three sources are disjoint by construction
|
|
2124
|
+
// (proactive draws from noFeedbackCandidates with the P0-A picks removed), but
|
|
2125
|
+
// dedupe defensively so a ref can never enter the loop twice. `requireFeedbackSignal`
|
|
2126
|
+
// still suppresses both fallback sources for callers that want feedback-only runs.
|
|
2127
|
+
const signalAndRetrievalRefs = dedupeRefs([...signalFiltered, ...highRetrievalRefs, ...proactiveRefs]);
|
|
1923
2128
|
const mergedRefs = scope.mode === "ref" ? processableRefs : options.requireFeedbackSignal ? signalFiltered : signalAndRetrievalRefs;
|
|
2129
|
+
// ── Attribution tagging: stamp each ref with the eligibility lane that
|
|
2130
|
+
// selected it ──────────────────────────────────────────────────────────────
|
|
2131
|
+
// Every reflect/distill proposal must record WHICH lane chose its source asset
|
|
2132
|
+
// so downstream accept/reject/revert/retrieval outcomes can be sliced by lane
|
|
2133
|
+
// (does the PROACTIVE lane produce value vs the reactive lanes?). We build the
|
|
2134
|
+
// lane map here — the one place all four lanes are known — and stamp it onto
|
|
2135
|
+
// each ImproveEligibleRef object. Because the ref objects are shared by
|
|
2136
|
+
// reference across buckets, the stamp travels with the ref through the sort,
|
|
2137
|
+
// disk-check, and loop stages down to the reflect/distill event emit sites and
|
|
2138
|
+
// createProposal calls. See EligibilitySource for the lane vocabulary.
|
|
2139
|
+
//
|
|
2140
|
+
// Precedence (prefer the most specific reactive signal):
|
|
2141
|
+
// scope > signal-delta > high-retrieval > proactive
|
|
2142
|
+
// A ref with real feedback is attributed to feedback even if it was also due
|
|
2143
|
+
// for proactive maintenance. We apply lanes weakest-first so the strongest
|
|
2144
|
+
// overwrites; the explicit --scope <ref> bypass wins outright (user intent).
|
|
2145
|
+
const eligibilitySourceByRef = new Map();
|
|
2146
|
+
for (const r of proactiveRefs)
|
|
2147
|
+
eligibilitySourceByRef.set(r.ref, "proactive");
|
|
2148
|
+
for (const r of highRetrievalRefs)
|
|
2149
|
+
eligibilitySourceByRef.set(r.ref, "high-retrieval");
|
|
2150
|
+
for (const r of signalFiltered)
|
|
2151
|
+
eligibilitySourceByRef.set(r.ref, "signal-delta");
|
|
2152
|
+
if (scope.mode === "ref") {
|
|
2153
|
+
// O-2 (#365): explicit --scope <ref> bypass — every ref in processableRefs
|
|
2154
|
+
// arrived via the scopeRefBypass branch, so attribute the whole set to scope.
|
|
2155
|
+
for (const r of processableRefs)
|
|
2156
|
+
eligibilitySourceByRef.set(r.ref, "scope");
|
|
2157
|
+
}
|
|
2158
|
+
for (const r of mergedRefs) {
|
|
2159
|
+
// "unknown" is a genuine fallback, never a silent alias for signal-delta:
|
|
2160
|
+
// only refs we truly cannot attribute land here (none in practice, since
|
|
2161
|
+
// mergedRefs is always a subset of the four lanes above).
|
|
2162
|
+
r.eligibilitySource = eligibilitySourceByRef.get(r.ref) ?? "unknown";
|
|
2163
|
+
}
|
|
1924
2164
|
const utilityMap = buildUtilityMap(mergedRefs);
|
|
1925
2165
|
// Load feedback ratio per ref from the pre-computed summary (no extra DB pass).
|
|
1926
2166
|
const feedbackRatios = new Map();
|
|
@@ -2061,6 +2301,7 @@ async function runImprovePreparationStage(args) {
|
|
|
2061
2301
|
gateAutoAcceptFailedCount,
|
|
2062
2302
|
consolidation: consolidationPass.consolidation,
|
|
2063
2303
|
consolidationRan: consolidationPass.consolidationRan,
|
|
2304
|
+
...(proactiveMaintenanceSummary ? { proactiveMaintenance: proactiveMaintenanceSummary } : {}),
|
|
2064
2305
|
};
|
|
2065
2306
|
}
|
|
2066
2307
|
async function runImproveLoopStage(args) {
|
|
@@ -2237,6 +2478,9 @@ async function runImproveLoopStage(args) {
|
|
|
2237
2478
|
eventSource: "improve",
|
|
2238
2479
|
...(reflectBudgetMs > 0 ? { timeoutMs: reflectBudgetMs } : {}),
|
|
2239
2480
|
...(reflectProfileRunner ? { runner: reflectProfileRunner } : {}),
|
|
2481
|
+
// Attribution: carry the eligibility lane so reflect stamps it on
|
|
2482
|
+
// the reflect_invoked event and the persisted proposal.
|
|
2483
|
+
...(planned.eligibilitySource ? { eligibilitySource: planned.eligibilitySource } : {}),
|
|
2240
2484
|
};
|
|
2241
2485
|
// R-2 / #389: Self-consistency multi-sample voting for high-utility refs.
|
|
2242
2486
|
// Self-Consistency arXiv:2203.11171 — N=3 samples beat single-shot quality.
|
|
@@ -2261,6 +2505,9 @@ async function runImproveLoopStage(args) {
|
|
|
2261
2505
|
source: "reflect",
|
|
2262
2506
|
sourceRun: `reflect-sc-${Date.now()}`,
|
|
2263
2507
|
payload: winner.proposal.payload,
|
|
2508
|
+
// Attribution: the self-consistency path persists the winner here
|
|
2509
|
+
// (draftMode skips reflect's own createProposal), so stamp the lane.
|
|
2510
|
+
...(planned.eligibilitySource ? { eligibilitySource: planned.eligibilitySource } : {}),
|
|
2264
2511
|
});
|
|
2265
2512
|
reflectResult = isProposalSkipped(persistResult)
|
|
2266
2513
|
? {
|
|
@@ -2459,6 +2706,9 @@ async function runImproveLoopStage(args) {
|
|
|
2459
2706
|
ref: planned.ref,
|
|
2460
2707
|
...(parsedPlannedRef.type === "memory" ? { proposalKind: "auto" } : {}),
|
|
2461
2708
|
...(options.stashDir ? { stashDir: options.stashDir } : {}),
|
|
2709
|
+
// Attribution: carry the eligibility lane so distill stamps it on the
|
|
2710
|
+
// distill_invoked event and the persisted proposal.
|
|
2711
|
+
...(planned.eligibilitySource ? { eligibilitySource: planned.eligibilitySource } : {}),
|
|
2462
2712
|
}));
|
|
2463
2713
|
actions.push({ ref: planned.ref, mode: "distill", result: distillResult });
|
|
2464
2714
|
if (distillResult.outcome === "queued" && distillResult.proposal) {
|
|
@@ -2640,323 +2890,325 @@ export async function runImproveMaintenancePasses(args) {
|
|
|
2640
2890
|
db = openIndexDb();
|
|
2641
2891
|
}
|
|
2642
2892
|
};
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
2655
|
-
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2893
|
+
await withIndexWriterLease({ purpose: "improve-maintenance", signal: budgetSignal }, async () => {
|
|
2894
|
+
try {
|
|
2895
|
+
db = openIndexDb();
|
|
2896
|
+
// Memory inference candidate-discovery (post-Item 9 fix from
|
|
2897
|
+
// memory:akm-improve-critical-review-2026-05-20). Previously this pass
|
|
2898
|
+
// was gated on memoryRefsForInference.size > 0 AND passed those refs as a
|
|
2899
|
+
// candidateRefs filter. But memoryRefsForInference is populated from refs
|
|
2900
|
+
// distilled THIS RUN — by the time that happens, those parents are
|
|
2901
|
+
// already split (`inferenceProcessed: true`) and `isPendingMemory` excludes
|
|
2902
|
+
// them. The genuinely-pending parents in the stash never entered the
|
|
2903
|
+
// filter. Result: 0/0/0 for 25 consecutive runs.
|
|
2904
|
+
//
|
|
2905
|
+
// Fix: always run the pass when the feature is enabled; let the pass's
|
|
2906
|
+
// own `collectPendingMemories` + `isPendingMemory` predicate find
|
|
2907
|
+
// candidates from the filesystem-of-truth. The this-run set is still
|
|
2908
|
+
// logged as a hint but no longer used as a filter.
|
|
2909
|
+
const memoryInferenceDisabledByProfile = improveProfile?.processes?.memoryInference?.enabled === false;
|
|
2910
|
+
const minPendingCount = improveProfile?.processes?.memoryInference?.minPendingCount;
|
|
2911
|
+
const pendingBelowMinCount = (() => {
|
|
2912
|
+
if (!primaryStashDir || minPendingCount === undefined || minPendingCount <= 0)
|
|
2913
|
+
return false;
|
|
2914
|
+
const pending = collectPendingMemories(primaryStashDir).length;
|
|
2915
|
+
if (pending < minPendingCount) {
|
|
2916
|
+
info(`[improve] memory inference skipped (${pending} pending < minPendingCount ${minPendingCount})`);
|
|
2917
|
+
return true;
|
|
2918
|
+
}
|
|
2662
2919
|
return false;
|
|
2663
|
-
|
|
2664
|
-
if (
|
|
2665
|
-
info(
|
|
2666
|
-
return true;
|
|
2920
|
+
})();
|
|
2921
|
+
if (memoryInferenceDisabledByProfile) {
|
|
2922
|
+
info("[improve] memory inference skipped (disabled by improve profile)");
|
|
2667
2923
|
}
|
|
2668
|
-
|
|
2669
|
-
|
|
2670
|
-
if (memoryInferenceDisabledByProfile) {
|
|
2671
|
-
info("[improve] memory inference skipped (disabled by improve profile)");
|
|
2672
|
-
}
|
|
2673
|
-
else if (pendingBelowMinCount) {
|
|
2674
|
-
// skipped — message already emitted above
|
|
2675
|
-
}
|
|
2676
|
-
else {
|
|
2677
|
-
const hintRefs = memoryRefsForInference.size;
|
|
2678
|
-
info(hintRefs > 0
|
|
2679
|
-
? `[improve] memory inference starting (${hintRefs} hint refs touched this run; pass discovers all pending)`
|
|
2680
|
-
: "[improve] memory inference starting (discovering pending parents)");
|
|
2681
|
-
const inferenceStart = Date.now();
|
|
2682
|
-
try {
|
|
2683
|
-
// O-1 (#364): pass budget signal so a hung inference call is cancelled.
|
|
2684
|
-
memoryInference = await withLlmStage("memory-inference", () => memoryInferenceFn({
|
|
2685
|
-
config,
|
|
2686
|
-
sources,
|
|
2687
|
-
signal: budgetSignal,
|
|
2688
|
-
db,
|
|
2689
|
-
reEnrich: false,
|
|
2690
|
-
onProgress: (event) => {
|
|
2691
|
-
const current = event.currentRef ? ` ${event.currentRef}` : "";
|
|
2692
|
-
info(`[improve] memory inference ${event.processed}/${event.total}${current} (written ${event.writtenFacts}, skipped ${event.skippedNoFacts})`);
|
|
2693
|
-
},
|
|
2694
|
-
}));
|
|
2695
|
-
memoryInferenceDurationMs = Date.now() - inferenceStart;
|
|
2696
|
-
actions.push({ ref: "memory:_inference", mode: "memory-inference", result: memoryInference });
|
|
2697
|
-
info(`[improve] memory inference complete (${memoryInference.writtenFacts} facts written from ${memoryInference.splitParents} parents)`);
|
|
2924
|
+
else if (pendingBelowMinCount) {
|
|
2925
|
+
// skipped — message already emitted above
|
|
2698
2926
|
}
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2927
|
+
else {
|
|
2928
|
+
const hintRefs = memoryRefsForInference.size;
|
|
2929
|
+
info(hintRefs > 0
|
|
2930
|
+
? `[improve] memory inference starting (${hintRefs} hint refs touched this run; pass discovers all pending)`
|
|
2931
|
+
: "[improve] memory inference starting (discovering pending parents)");
|
|
2932
|
+
const inferenceStart = Date.now();
|
|
2933
|
+
try {
|
|
2934
|
+
// O-1 (#364): pass budget signal so a hung inference call is cancelled.
|
|
2935
|
+
memoryInference = await withLlmStage("memory-inference", () => memoryInferenceFn({
|
|
2936
|
+
config,
|
|
2937
|
+
sources,
|
|
2938
|
+
signal: budgetSignal,
|
|
2939
|
+
db,
|
|
2940
|
+
reEnrich: false,
|
|
2941
|
+
onProgress: (event) => {
|
|
2942
|
+
const current = event.currentRef ? ` ${event.currentRef}` : "";
|
|
2943
|
+
info(`[improve] memory inference ${event.processed}/${event.total}${current} (written ${event.writtenFacts}, skipped ${event.skippedNoFacts})`);
|
|
2944
|
+
},
|
|
2945
|
+
}));
|
|
2946
|
+
memoryInferenceDurationMs = Date.now() - inferenceStart;
|
|
2947
|
+
actions.push({ ref: "memory:_inference", mode: "memory-inference", result: memoryInference });
|
|
2948
|
+
info(`[improve] memory inference complete (${memoryInference.writtenFacts} facts written from ${memoryInference.splitParents} parents)`);
|
|
2949
|
+
}
|
|
2950
|
+
catch (err) {
|
|
2951
|
+
memoryInferenceDurationMs = Date.now() - inferenceStart;
|
|
2952
|
+
allWarnings.push(`memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2953
|
+
}
|
|
2702
2954
|
}
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2955
|
+
if (memoryInference && (memoryInference.splitParents > 0 || memoryInference.writtenFacts > 0)) {
|
|
2956
|
+
info("[improve] reindexing after memory inference writes");
|
|
2957
|
+
try {
|
|
2958
|
+
await reindexWithIndexDbReleased(primaryStashDir);
|
|
2959
|
+
reindexedAfterInference = true;
|
|
2960
|
+
info("[improve] reindex after memory inference complete");
|
|
2961
|
+
}
|
|
2962
|
+
catch (err) {
|
|
2963
|
+
allWarnings.push(`reindex after memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2964
|
+
}
|
|
2710
2965
|
}
|
|
2711
|
-
|
|
2712
|
-
|
|
2966
|
+
const graphEnabled = isProcessEnabled("index", "graph_extraction", config);
|
|
2967
|
+
const graphExtractionDisabledByProfile = improveProfile?.processes?.graphExtraction?.enabled === false;
|
|
2968
|
+
const graphExtractionFullScan = improveProfile?.processes?.graphExtraction?.fullScan === true;
|
|
2969
|
+
// Build the set of refs actually touched this run.
|
|
2970
|
+
const touchedRefs = new Set();
|
|
2971
|
+
for (const r of args.actionableRefs)
|
|
2972
|
+
touchedRefs.add(r.ref);
|
|
2973
|
+
for (const r of memoryRefsForInference)
|
|
2974
|
+
touchedRefs.add(r);
|
|
2975
|
+
// INVARIANT: graph extraction normally runs only on files touched by
|
|
2976
|
+
// actionable refs (candidatePaths). Full-corpus scans are opt-in via
|
|
2977
|
+
// profile.processes.graphExtraction.fullScan = true (used by the
|
|
2978
|
+
// `graph-refresh` built-in profile and its weekly scheduled task).
|
|
2979
|
+
// The empty-Set fallback is intentional when no refs were touched —
|
|
2980
|
+
// the extractor's filter rejects every file and returns empty, keeping
|
|
2981
|
+
// the pass invoked so the action is recorded and tests stay exercised.
|
|
2982
|
+
if (graphExtractionDisabledByProfile) {
|
|
2983
|
+
info("[improve] graph extraction skipped (disabled by improve profile)");
|
|
2713
2984
|
}
|
|
2714
|
-
|
|
2715
|
-
|
|
2716
|
-
|
|
2717
|
-
|
|
2718
|
-
|
|
2719
|
-
|
|
2720
|
-
|
|
2721
|
-
|
|
2722
|
-
|
|
2723
|
-
|
|
2724
|
-
|
|
2725
|
-
|
|
2726
|
-
|
|
2727
|
-
|
|
2728
|
-
|
|
2729
|
-
|
|
2730
|
-
// the pass invoked so the action is recorded and tests stay exercised.
|
|
2731
|
-
if (graphExtractionDisabledByProfile) {
|
|
2732
|
-
info("[improve] graph extraction skipped (disabled by improve profile)");
|
|
2733
|
-
}
|
|
2734
|
-
else if (sources.length > 0 && graphEnabled) {
|
|
2735
|
-
info(`[improve] graph extraction starting${graphExtractionFullScan ? " (full-corpus scan)" : ""}`);
|
|
2736
|
-
const extractionStart = Date.now();
|
|
2737
|
-
try {
|
|
2738
|
-
// D9: if consolidation ran but memory inference did not reindex, force a reindex
|
|
2739
|
-
// so graph extraction sees current DB state after consolidation writes.
|
|
2740
|
-
if (consolidationRan && !reindexedAfterInference) {
|
|
2741
|
-
info("[improve] reindexing after consolidation (graph extraction needs current state)");
|
|
2742
|
-
try {
|
|
2743
|
-
await reindexWithIndexDbReleased(primaryStashDir);
|
|
2744
|
-
reindexedAfterInference = true;
|
|
2745
|
-
info("[improve] reindex after consolidation complete");
|
|
2746
|
-
}
|
|
2747
|
-
catch (err) {
|
|
2748
|
-
allWarnings.push(`reindex after consolidation failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2985
|
+
else if (sources.length > 0 && graphEnabled) {
|
|
2986
|
+
info(`[improve] graph extraction starting${graphExtractionFullScan ? " (full-corpus scan)" : ""}`);
|
|
2987
|
+
const extractionStart = Date.now();
|
|
2988
|
+
try {
|
|
2989
|
+
// D9: if consolidation ran but memory inference did not reindex, force a reindex
|
|
2990
|
+
// so graph extraction sees current DB state after consolidation writes.
|
|
2991
|
+
if (consolidationRan && !reindexedAfterInference) {
|
|
2992
|
+
info("[improve] reindexing after consolidation (graph extraction needs current state)");
|
|
2993
|
+
try {
|
|
2994
|
+
await reindexWithIndexDbReleased(primaryStashDir);
|
|
2995
|
+
reindexedAfterInference = true;
|
|
2996
|
+
info("[improve] reindex after consolidation complete");
|
|
2997
|
+
}
|
|
2998
|
+
catch (err) {
|
|
2999
|
+
allWarnings.push(`reindex after consolidation failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
3000
|
+
}
|
|
2749
3001
|
}
|
|
2750
|
-
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2755
|
-
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
3002
|
+
// #584: no close/reopen needed here — reindexWithIndexDbReleased
|
|
3003
|
+
// already swapped in a fresh post-reindex handle.
|
|
3004
|
+
// Resolve touched refs to absolute file paths. Skipped for fullScan
|
|
3005
|
+
// (candidatePaths stays undefined → extractor processes all files).
|
|
3006
|
+
let candidatePaths;
|
|
3007
|
+
if (!graphExtractionFullScan) {
|
|
3008
|
+
candidatePaths = new Set();
|
|
3009
|
+
if (primaryStashDir && touchedRefs.size > 0) {
|
|
3010
|
+
const writableDirSet = new Set(getWritableStashDirs(primaryStashDir).map((d) => path.resolve(d)));
|
|
3011
|
+
const resolved = await Promise.all([...touchedRefs].map((ref) => findAssetFilePath(ref, primaryStashDir, writableDirSet).catch(() => null)));
|
|
3012
|
+
for (const p of resolved) {
|
|
3013
|
+
if (typeof p === "string" && p.length > 0)
|
|
3014
|
+
candidatePaths.add(p);
|
|
3015
|
+
}
|
|
2764
3016
|
}
|
|
2765
3017
|
}
|
|
3018
|
+
const progressHandler = (event) => {
|
|
3019
|
+
const current = event.currentPath ? ` ${path.basename(event.currentPath)}` : "";
|
|
3020
|
+
info(`[improve] graph extraction ${event.processed}/${event.total}${current} (extracted ${event.extracted}, entities ${event.totalEntities}, relations ${event.totalRelations})`);
|
|
3021
|
+
};
|
|
3022
|
+
// O-1 (#364): pass budget signal so a hung graph extraction call is cancelled.
|
|
3023
|
+
graphExtraction = await withLlmStage("graph-extraction", () => graphExtractionFn({
|
|
3024
|
+
config,
|
|
3025
|
+
sources,
|
|
3026
|
+
signal: budgetSignal,
|
|
3027
|
+
db,
|
|
3028
|
+
reEnrich: false,
|
|
3029
|
+
onProgress: progressHandler,
|
|
3030
|
+
options: { candidatePaths },
|
|
3031
|
+
}));
|
|
3032
|
+
graphExtractionDurationMs = Date.now() - extractionStart;
|
|
3033
|
+
actions.push({ ref: "graph:_artifact", mode: "graph-extraction", result: graphExtraction });
|
|
3034
|
+
info(`[improve] graph extraction complete (${graphExtraction.quality.extractedFiles} files, ${graphExtraction.quality.entityCount} entities, ${graphExtraction.quality.relationCount} relations)`);
|
|
2766
3035
|
}
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
};
|
|
2771
|
-
// O-1 (#364): pass budget signal so a hung graph extraction call is cancelled.
|
|
2772
|
-
graphExtraction = await withLlmStage("graph-extraction", () => graphExtractionFn({
|
|
2773
|
-
config,
|
|
2774
|
-
sources,
|
|
2775
|
-
signal: budgetSignal,
|
|
2776
|
-
db,
|
|
2777
|
-
reEnrich: false,
|
|
2778
|
-
onProgress: progressHandler,
|
|
2779
|
-
options: { candidatePaths },
|
|
2780
|
-
}));
|
|
2781
|
-
graphExtractionDurationMs = Date.now() - extractionStart;
|
|
2782
|
-
actions.push({ ref: "graph:_artifact", mode: "graph-extraction", result: graphExtraction });
|
|
2783
|
-
info(`[improve] graph extraction complete (${graphExtraction.quality.extractedFiles} files, ${graphExtraction.quality.entityCount} entities, ${graphExtraction.quality.relationCount} relations)`);
|
|
2784
|
-
}
|
|
2785
|
-
catch (err) {
|
|
2786
|
-
graphExtractionDurationMs = Date.now() - extractionStart;
|
|
2787
|
-
allWarnings.push(`graph extraction failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2788
|
-
}
|
|
2789
|
-
}
|
|
2790
|
-
else if (sources.length > 0 && !graphEnabled) {
|
|
2791
|
-
info("[improve] graph extraction skipped (features.index.graph_extraction is disabled)");
|
|
2792
|
-
}
|
|
2793
|
-
// Orphan proposal purge — reject pending reflect proposals whose target
|
|
2794
|
-
// asset no longer exists on disk. Runs after graph extraction so newly
|
|
2795
|
-
// promoted assets from accept flows during this run are already present.
|
|
2796
|
-
if (primaryStashDir) {
|
|
2797
|
-
try {
|
|
2798
|
-
const purgeResult = purgeOrphanProposals(primaryStashDir, sources.map((s) => s.path));
|
|
2799
|
-
orphansPurged = purgeResult.rejected;
|
|
2800
|
-
if (purgeResult.rejected > 0) {
|
|
2801
|
-
info(`[improve] orphan purge: ${purgeResult.rejected}/${purgeResult.checked} orphaned proposals rejected (${purgeResult.durationMs}ms)`);
|
|
2802
|
-
}
|
|
2803
|
-
appendEvent({
|
|
2804
|
-
eventType: "proposal_orphan_purge",
|
|
2805
|
-
ref: "proposals:_orphan-purge",
|
|
2806
|
-
metadata: {
|
|
2807
|
-
checked: purgeResult.checked,
|
|
2808
|
-
rejected: purgeResult.rejected,
|
|
2809
|
-
durationMs: purgeResult.durationMs,
|
|
2810
|
-
byType: purgeResult.byType,
|
|
2811
|
-
orphans: purgeResult.orphans.map((o) => o.ref),
|
|
2812
|
-
},
|
|
2813
|
-
}, eventsCtx);
|
|
2814
|
-
}
|
|
2815
|
-
catch (err) {
|
|
2816
|
-
allWarnings.push(`orphan purge failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2817
|
-
}
|
|
2818
|
-
// Phase 6B (Advantage D6b): expire pending proposals that have aged past
|
|
2819
|
-
// the retention window. Runs AFTER orphan purge so we never double-archive
|
|
2820
|
-
// a proposal that orphan-purge already moved. `expireStaleProposals` emits
|
|
2821
|
-
// its own per-proposal `proposal_expired` events; we additionally emit a
|
|
2822
|
-
// single roll-up event here for parity with the orphan-purge surface.
|
|
2823
|
-
try {
|
|
2824
|
-
const expireResult = expireStaleProposals(primaryStashDir, config);
|
|
2825
|
-
proposalsExpired = expireResult.expired;
|
|
2826
|
-
if (expireResult.expired > 0) {
|
|
2827
|
-
info(`[improve] expiration: ${expireResult.expired}/${expireResult.checked} pending proposals expired ` +
|
|
2828
|
-
`(retention=${expireResult.retentionDays}d, ${expireResult.durationMs}ms)`);
|
|
3036
|
+
catch (err) {
|
|
3037
|
+
graphExtractionDurationMs = Date.now() - extractionStart;
|
|
3038
|
+
allWarnings.push(`graph extraction failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2829
3039
|
}
|
|
2830
|
-
appendEvent({
|
|
2831
|
-
eventType: "proposal_expiration_pass",
|
|
2832
|
-
ref: "proposals:_expiration",
|
|
2833
|
-
metadata: {
|
|
2834
|
-
checked: expireResult.checked,
|
|
2835
|
-
expired: expireResult.expired,
|
|
2836
|
-
durationMs: expireResult.durationMs,
|
|
2837
|
-
retentionDays: expireResult.retentionDays,
|
|
2838
|
-
expiredProposals: expireResult.expiredProposals,
|
|
2839
|
-
},
|
|
2840
|
-
}, eventsCtx);
|
|
2841
3040
|
}
|
|
2842
|
-
|
|
2843
|
-
|
|
3041
|
+
else if (sources.length > 0 && !graphEnabled) {
|
|
3042
|
+
info("[improve] graph extraction skipped (features.index.graph_extraction is disabled)");
|
|
2844
3043
|
}
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
// without this trim, state.db is a permanent append-only log. Config key
|
|
2850
|
-
// `improve.eventRetentionDays` (default 90, set 0 to disable) controls the
|
|
2851
|
-
// window. The purge runs against state.db (a different SQLite file from
|
|
2852
|
-
// the index `db` above).
|
|
2853
|
-
{
|
|
2854
|
-
const retentionDays = typeof config.improve?.eventRetentionDays === "number" ? config.improve.eventRetentionDays : 90;
|
|
2855
|
-
if (retentionDays > 0) {
|
|
2856
|
-
// #585: reuse the long-lived eventsCtx.db connection when akmImprove
|
|
2857
|
-
// opened one — opening a second state.db write connection while
|
|
2858
|
-
// eventsDb is still live made two simultaneous writers contend on the
|
|
2859
|
-
// same WAL file ("database is locked"). Only the eventsCtx.dbPath
|
|
2860
|
-
// fallback path (state.db failed to open up-front) opens — and then
|
|
2861
|
-
// owns and closes — its own handle. C2 still holds: the fallback uses
|
|
2862
|
-
// the boundary-pinned path, never a live `process.env` re-read.
|
|
2863
|
-
const ownsStateDb = !eventsCtx?.db;
|
|
2864
|
-
let stateDb;
|
|
3044
|
+
// Orphan proposal purge — reject pending reflect proposals whose target
|
|
3045
|
+
// asset no longer exists on disk. Runs after graph extraction so newly
|
|
3046
|
+
// promoted assets from accept flows during this run are already present.
|
|
3047
|
+
if (primaryStashDir) {
|
|
2865
3048
|
try {
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
if (
|
|
2869
|
-
info(`[improve]
|
|
2870
|
-
}
|
|
2871
|
-
appendEvent({
|
|
2872
|
-
eventType: "events_purged",
|
|
2873
|
-
ref: "events:_purge",
|
|
2874
|
-
metadata: { purgedCount, retentionDays },
|
|
2875
|
-
}, eventsCtx);
|
|
2876
|
-
// improve_runs uses the same retention window as events — both are
|
|
2877
|
-
// observability/audit data, both grow append-only, both have a
|
|
2878
|
-
// dedicated purge helper. Mirroring the events purge here means a
|
|
2879
|
-
// single retention knob (improve.eventRetentionDays) governs both.
|
|
2880
|
-
const improveRunsPurged = purgeOldImproveRuns(stateDb, retentionDays);
|
|
2881
|
-
if (improveRunsPurged > 0) {
|
|
2882
|
-
info(`[improve] improve_runs purge: ${improveRunsPurged} run(s) older than ${retentionDays}d removed from state.db`);
|
|
3049
|
+
const purgeResult = purgeOrphanProposals(primaryStashDir, sources.map((s) => s.path));
|
|
3050
|
+
orphansPurged = purgeResult.rejected;
|
|
3051
|
+
if (purgeResult.rejected > 0) {
|
|
3052
|
+
info(`[improve] orphan purge: ${purgeResult.rejected}/${purgeResult.checked} orphaned proposals rejected (${purgeResult.durationMs}ms)`);
|
|
2883
3053
|
}
|
|
2884
3054
|
appendEvent({
|
|
2885
|
-
eventType: "
|
|
2886
|
-
ref: "
|
|
2887
|
-
metadata: {
|
|
3055
|
+
eventType: "proposal_orphan_purge",
|
|
3056
|
+
ref: "proposals:_orphan-purge",
|
|
3057
|
+
metadata: {
|
|
3058
|
+
checked: purgeResult.checked,
|
|
3059
|
+
rejected: purgeResult.rejected,
|
|
3060
|
+
durationMs: purgeResult.durationMs,
|
|
3061
|
+
byType: purgeResult.byType,
|
|
3062
|
+
orphans: purgeResult.orphans.map((o) => o.ref),
|
|
3063
|
+
},
|
|
2888
3064
|
}, eventsCtx);
|
|
2889
3065
|
}
|
|
2890
3066
|
catch (err) {
|
|
2891
|
-
allWarnings.push(`
|
|
2892
|
-
}
|
|
2893
|
-
finally {
|
|
2894
|
-
if (ownsStateDb && stateDb) {
|
|
2895
|
-
try {
|
|
2896
|
-
stateDb.close();
|
|
2897
|
-
}
|
|
2898
|
-
catch {
|
|
2899
|
-
// best-effort
|
|
2900
|
-
}
|
|
2901
|
-
}
|
|
3067
|
+
allWarnings.push(`orphan purge failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2902
3068
|
}
|
|
2903
|
-
//
|
|
2904
|
-
//
|
|
2905
|
-
//
|
|
2906
|
-
//
|
|
2907
|
-
//
|
|
2908
|
-
let logsDb;
|
|
3069
|
+
// Phase 6B (Advantage D6b): expire pending proposals that have aged past
|
|
3070
|
+
// the retention window. Runs AFTER orphan purge so we never double-archive
|
|
3071
|
+
// a proposal that orphan-purge already moved. `expireStaleProposals` emits
|
|
3072
|
+
// its own per-proposal `proposal_expired` events; we additionally emit a
|
|
3073
|
+
// single roll-up event here for parity with the orphan-purge surface.
|
|
2909
3074
|
try {
|
|
2910
|
-
|
|
2911
|
-
|
|
2912
|
-
if (
|
|
2913
|
-
info(`[improve]
|
|
3075
|
+
const expireResult = expireStaleProposals(primaryStashDir, config);
|
|
3076
|
+
proposalsExpired = expireResult.expired;
|
|
3077
|
+
if (expireResult.expired > 0) {
|
|
3078
|
+
info(`[improve] expiration: ${expireResult.expired}/${expireResult.checked} pending proposals expired ` +
|
|
3079
|
+
`(retention=${expireResult.retentionDays}d, ${expireResult.durationMs}ms)`);
|
|
2914
3080
|
}
|
|
2915
3081
|
appendEvent({
|
|
2916
|
-
eventType: "
|
|
2917
|
-
ref: "
|
|
2918
|
-
metadata: {
|
|
3082
|
+
eventType: "proposal_expiration_pass",
|
|
3083
|
+
ref: "proposals:_expiration",
|
|
3084
|
+
metadata: {
|
|
3085
|
+
checked: expireResult.checked,
|
|
3086
|
+
expired: expireResult.expired,
|
|
3087
|
+
durationMs: expireResult.durationMs,
|
|
3088
|
+
retentionDays: expireResult.retentionDays,
|
|
3089
|
+
expiredProposals: expireResult.expiredProposals,
|
|
3090
|
+
},
|
|
2919
3091
|
}, eventsCtx);
|
|
2920
3092
|
}
|
|
2921
3093
|
catch (err) {
|
|
2922
|
-
allWarnings.push(`
|
|
3094
|
+
allWarnings.push(`proposal expiration failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2923
3095
|
}
|
|
2924
|
-
|
|
2925
|
-
|
|
2926
|
-
|
|
2927
|
-
|
|
3096
|
+
}
|
|
3097
|
+
// Fix #2 (observability 0.8.0): trim the events table in state.db so it
|
|
3098
|
+
// doesn't grow unbounded. `akm health` writes a `health_probe` row on every
|
|
3099
|
+
// invocation, and every command surface emits at least one event besides —
|
|
3100
|
+
// without this trim, state.db is a permanent append-only log. Config key
|
|
3101
|
+
// `improve.eventRetentionDays` (default 90, set 0 to disable) controls the
|
|
3102
|
+
// window. The purge runs against state.db (a different SQLite file from
|
|
3103
|
+
// the index `db` above).
|
|
3104
|
+
{
|
|
3105
|
+
const retentionDays = typeof config.improve?.eventRetentionDays === "number" ? config.improve.eventRetentionDays : 90;
|
|
3106
|
+
if (retentionDays > 0) {
|
|
3107
|
+
// #585: reuse the long-lived eventsCtx.db connection when akmImprove
|
|
3108
|
+
// opened one — opening a second state.db write connection while
|
|
3109
|
+
// eventsDb is still live made two simultaneous writers contend on the
|
|
3110
|
+
// same WAL file ("database is locked"). Only the eventsCtx.dbPath
|
|
3111
|
+
// fallback path (state.db failed to open up-front) opens — and then
|
|
3112
|
+
// owns and closes — its own handle. C2 still holds: the fallback uses
|
|
3113
|
+
// the boundary-pinned path, never a live `process.env` re-read.
|
|
3114
|
+
const ownsStateDb = !eventsCtx?.db;
|
|
3115
|
+
let stateDb;
|
|
3116
|
+
try {
|
|
3117
|
+
stateDb = eventsCtx?.db ?? openStateDatabase(eventsCtx?.dbPath);
|
|
3118
|
+
const purgedCount = purgeOldEvents(stateDb, retentionDays);
|
|
3119
|
+
if (purgedCount > 0) {
|
|
3120
|
+
info(`[improve] events purge: ${purgedCount} event(s) older than ${retentionDays}d removed from state.db`);
|
|
2928
3121
|
}
|
|
2929
|
-
|
|
2930
|
-
|
|
3122
|
+
appendEvent({
|
|
3123
|
+
eventType: "events_purged",
|
|
3124
|
+
ref: "events:_purge",
|
|
3125
|
+
metadata: { purgedCount, retentionDays },
|
|
3126
|
+
}, eventsCtx);
|
|
3127
|
+
// improve_runs uses the same retention window as events — both are
|
|
3128
|
+
// observability/audit data, both grow append-only, both have a
|
|
3129
|
+
// dedicated purge helper. Mirroring the events purge here means a
|
|
3130
|
+
// single retention knob (improve.eventRetentionDays) governs both.
|
|
3131
|
+
const improveRunsPurged = purgeOldImproveRuns(stateDb, retentionDays);
|
|
3132
|
+
if (improveRunsPurged > 0) {
|
|
3133
|
+
info(`[improve] improve_runs purge: ${improveRunsPurged} run(s) older than ${retentionDays}d removed from state.db`);
|
|
3134
|
+
}
|
|
3135
|
+
appendEvent({
|
|
3136
|
+
eventType: "improve_runs_purged",
|
|
3137
|
+
ref: "improve_runs:_purge",
|
|
3138
|
+
metadata: { purgedCount: improveRunsPurged, retentionDays },
|
|
3139
|
+
}, eventsCtx);
|
|
3140
|
+
}
|
|
3141
|
+
catch (err) {
|
|
3142
|
+
allWarnings.push(`events purge failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
3143
|
+
}
|
|
3144
|
+
finally {
|
|
3145
|
+
if (ownsStateDb && stateDb) {
|
|
3146
|
+
try {
|
|
3147
|
+
stateDb.close();
|
|
3148
|
+
}
|
|
3149
|
+
catch {
|
|
3150
|
+
// best-effort
|
|
3151
|
+
}
|
|
3152
|
+
}
|
|
3153
|
+
}
|
|
3154
|
+
// task_logs in logs.db (#579) shares the same retention window as
|
|
3155
|
+
// events/improve_runs — all three are observability data governed by
|
|
3156
|
+
// the single improve.eventRetentionDays knob. Separate try/finally
|
|
3157
|
+
// because logs.db is a different file: a locked/missing logs.db must
|
|
3158
|
+
// not block the state.db purges above.
|
|
3159
|
+
let logsDb;
|
|
3160
|
+
try {
|
|
3161
|
+
logsDb = openLogsDatabase();
|
|
3162
|
+
const taskLogsPurged = purgeOldTaskLogs(logsDb, retentionDays);
|
|
3163
|
+
if (taskLogsPurged > 0) {
|
|
3164
|
+
info(`[improve] task_logs purge: ${taskLogsPurged} log line(s) older than ${retentionDays}d removed from logs.db`);
|
|
3165
|
+
}
|
|
3166
|
+
appendEvent({
|
|
3167
|
+
eventType: "task_logs_purged",
|
|
3168
|
+
ref: "task_logs:_purge",
|
|
3169
|
+
metadata: { purgedCount: taskLogsPurged, retentionDays },
|
|
3170
|
+
}, eventsCtx);
|
|
3171
|
+
}
|
|
3172
|
+
catch (err) {
|
|
3173
|
+
allWarnings.push(`task_logs purge failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
3174
|
+
}
|
|
3175
|
+
finally {
|
|
3176
|
+
if (logsDb) {
|
|
3177
|
+
try {
|
|
3178
|
+
logsDb.close();
|
|
3179
|
+
}
|
|
3180
|
+
catch {
|
|
3181
|
+
// best-effort
|
|
3182
|
+
}
|
|
2931
3183
|
}
|
|
2932
3184
|
}
|
|
2933
3185
|
}
|
|
2934
3186
|
}
|
|
2935
|
-
|
|
2936
|
-
|
|
2937
|
-
|
|
2938
|
-
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
|
|
2945
|
-
|
|
2946
|
-
|
|
3187
|
+
// Phase 4A (staleness detection). Activates the `deprecated` belief-state
|
|
3188
|
+
// machinery shipped in Phase 1A. Default OFF — gated by
|
|
3189
|
+
// `features.index.staleness_detection.enabled`. Runs after orphan purge
|
|
3190
|
+
// and before the URL check (which lives in the outer caller).
|
|
3191
|
+
if (sources.length > 0) {
|
|
3192
|
+
try {
|
|
3193
|
+
stalenessDetection = await withLlmStage("staleness-detection", () => stalenessDetectionFn({ config, sources, signal: budgetSignal, db }));
|
|
3194
|
+
if (stalenessDetection.considered > 0) {
|
|
3195
|
+
info(`[improve] staleness detection complete (considered ${stalenessDetection.considered}, ` +
|
|
3196
|
+
`deprecated ${stalenessDetection.deprecated}, confirmed ${stalenessDetection.confirmed}, ` +
|
|
3197
|
+
`skipped ${stalenessDetection.skipped}, ${stalenessDetection.durationMs}ms)`);
|
|
3198
|
+
}
|
|
3199
|
+
for (const w of stalenessDetection.warnings)
|
|
3200
|
+
allWarnings.push(`[improve] staleness detection: ${w}`);
|
|
3201
|
+
}
|
|
3202
|
+
catch (err) {
|
|
3203
|
+
allWarnings.push(`staleness detection failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2947
3204
|
}
|
|
2948
|
-
for (const w of stalenessDetection.warnings)
|
|
2949
|
-
allWarnings.push(`[improve] staleness detection: ${w}`);
|
|
2950
|
-
}
|
|
2951
|
-
catch (err) {
|
|
2952
|
-
allWarnings.push(`staleness detection failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2953
3205
|
}
|
|
2954
3206
|
}
|
|
2955
|
-
|
|
2956
|
-
|
|
2957
|
-
|
|
2958
|
-
|
|
2959
|
-
}
|
|
3207
|
+
finally {
|
|
3208
|
+
if (db)
|
|
3209
|
+
closeDatabase(db);
|
|
3210
|
+
}
|
|
3211
|
+
});
|
|
2960
3212
|
return {
|
|
2961
3213
|
...(memoryInference ? { memoryInference } : {}),
|
|
2962
3214
|
...(graphExtraction ? { graphExtraction } : {}),
|