akm-cli 0.9.0-beta.5 → 0.9.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +119 -0
- package/dist/cli.js +7 -0
- package/dist/commands/feedback-cli.js +42 -37
- package/dist/commands/graph/graph.js +75 -71
- package/dist/commands/health.js +10 -2
- package/dist/commands/improve/consolidate.js +24 -4
- package/dist/commands/improve/distill.js +26 -5
- package/dist/commands/improve/extract-prompt.js +1 -1
- package/dist/commands/improve/improve-auto-accept.js +6 -0
- package/dist/commands/improve/improve-profiles.js +4 -0
- package/dist/commands/improve/improve.js +753 -465
- package/dist/commands/improve/proactive-maintenance.js +113 -0
- package/dist/commands/improve/reflect.js +6 -0
- package/dist/commands/proposal/proposal.js +5 -0
- package/dist/commands/proposal/validators/proposals.js +67 -54
- package/dist/commands/read/curate.js +17 -0
- package/dist/commands/sources/stash-cli.js +10 -2
- package/dist/core/config/config-schema.js +25 -0
- package/dist/core/paths.js +3 -0
- package/dist/core/state-db.js +46 -1
- package/dist/indexer/db/db.js +97 -11
- package/dist/indexer/ensure-index.js +152 -17
- package/dist/indexer/index-writer-lock.js +99 -0
- package/dist/indexer/indexer.js +114 -111
- package/dist/integrations/harnesses/claude/session-log.js +1 -1
- package/dist/llm/client.js +23 -4
- package/dist/scripts/migrate-storage.js +90 -13
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +8 -1
- package/dist/sources/providers/tar-utils.js +16 -8
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,125 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.9.0-beta.9] - 2026-06-14
|
|
10
|
+
|
|
11
|
+
Restore and instrument `akm improve` steady-state output. The reflect/distill
|
|
12
|
+
self-improvement lanes had been near-zero in steady state because the
|
|
13
|
+
signal-delta eligibility gate was the only lane (cache "no-access = no-work"
|
|
14
|
+
pathology) and the high-retrieval fallback was structurally dead. This release
|
|
15
|
+
revives proactive improvement, adds attribution + a measurement/kill-criterion
|
|
16
|
+
system so the lane must prove its value, and right-sizes reflect budgets to
|
|
17
|
+
their task timeouts.
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
|
|
21
|
+
- **Proactive maintenance selector** (`proactiveMaintenance` improve process):
|
|
22
|
+
due-gated, composite-priority (`importance × log(1+retrievalFreq) ×
|
|
23
|
+
recencyDecay / log(size)`), bounded rotating top-N reflect/distill over
|
|
24
|
+
stale/never-reflected assets. **Disabled by default**; enable per profile.
|
|
25
|
+
- **Eligibility attribution**: every reflect/distill proposal is stamped
|
|
26
|
+
`eligibilitySource ∈ {signal-delta, high-retrieval, proactive, scope,
|
|
27
|
+
unknown}` on `reflect_invoked`/`distill_invoked`/`promoted` events and the
|
|
28
|
+
proposal record, so outcomes are sliceable by lane.
|
|
29
|
+
- **Measurement system** under `scripts/akm-eval/`: a real-query retrieval suite
|
|
30
|
+
generated from `usage_events`, and `akm-eval-proactive-verdict` — a read-only
|
|
31
|
+
kill-criterion runner comparing the proactive lane (treatment) vs due-but-
|
|
32
|
+
untouched assets (control). Emits PASS/FAIL/INCONCLUSIVE and recommends
|
|
33
|
+
disabling the lane on FAIL. New `proactive_selected` event +
|
|
34
|
+
`proactiveSelected`/`proactiveDueTotal`/`proactiveNeverReflected` fields on
|
|
35
|
+
`improve_completed`.
|
|
36
|
+
|
|
37
|
+
### Fixed
|
|
38
|
+
|
|
39
|
+
- Revived the P0-A high-retrieval fallback: genuinely zero-feedback assets were
|
|
40
|
+
routed to the fully-skipped branch one phase before the fallback could see
|
|
41
|
+
them, so frequently-retrieved-but-never-rated assets were never improved.
|
|
42
|
+
- `getRetrievalCounts` now normalizes bare vs `origin//`-prefixed refs (it was
|
|
43
|
+
dropping ~half the retrieval signal) and counts `curate` events
|
|
44
|
+
(`akm curate` now records per-item `entry_ref`).
|
|
45
|
+
- The fully-skipped `no_new_signal` branch emitted one `improve_skipped` event
|
|
46
|
+
per ref (~11K writes/run, ~400K rows/day) — a contributor to 900s improve
|
|
47
|
+
timeouts and state.db bloat. Collapsed into one aggregated counted event.
|
|
48
|
+
|
|
49
|
+
## [0.9.0-beta.8] - 2026-06-13
|
|
50
|
+
|
|
51
|
+
Fix multi-process SQLite contention in `index.db` and harden concurrent proposal
|
|
52
|
+
queue mutations.
|
|
53
|
+
|
|
54
|
+
### Changed
|
|
55
|
+
|
|
56
|
+
- Added a global `index.db` writer lease used by foreground indexing,
|
|
57
|
+
background auto-index, improve maintenance index writers, graph updates, and
|
|
58
|
+
feedback writes.
|
|
59
|
+
- Replaced the racy background index PID-file dedup flow with lease-based
|
|
60
|
+
coordination and explicit handoff to the spawned worker.
|
|
61
|
+
- `akm feedback` now uses blocking index preparation and writes under the same
|
|
62
|
+
`index.db` lease, avoiding self-inflicted `database is locked` failures.
|
|
63
|
+
- Proposal queue create/archive/gate-decision mutations now run under
|
|
64
|
+
`BEGIN IMMEDIATE` state.db transactions so concurrent processes serialize on
|
|
65
|
+
live queue state.
|
|
66
|
+
|
|
67
|
+
## [0.9.0-beta.7] - 2026-06-13
|
|
68
|
+
|
|
69
|
+
Fix the `akm improve` regression introduced by background `ensureIndex`.
|
|
70
|
+
|
|
71
|
+
### Changed
|
|
72
|
+
|
|
73
|
+
- Added an explicit `ensureIndex` mode so callers choose `background` or
|
|
74
|
+
`blocking` behavior directly instead of relying on hidden environment state.
|
|
75
|
+
- `akm improve` now uses blocking index preparation before collecting eligible
|
|
76
|
+
refs, restoring the post-upgrade empty-index recovery path.
|
|
77
|
+
- Removed the `AKM_INDEX_INLINE` test-only override so tests exercise the same
|
|
78
|
+
index behavior model as production.
|
|
79
|
+
|
|
80
|
+
## [0.9.0-beta.6] - 2026-06-12
|
|
81
|
+
|
|
82
|
+
Pipeline optimization: new per-process config fields wire up the consolidation
|
|
83
|
+
and improve pipeline knobs exposed by the optimization report — incremental
|
|
84
|
+
consolidation, pool caps, distill gating, and memory inference throttling.
|
|
85
|
+
|
|
86
|
+
### Added
|
|
87
|
+
|
|
88
|
+
- **`consolidate.incrementalSince`** — profile config field that narrows the
|
|
89
|
+
consolidation candidate pool to memories modified within the given window
|
|
90
|
+
(e.g. `"1h"`, `"4h"`) plus their graph neighbours. Enables frequent
|
|
91
|
+
consolidation passes (e.g. `quick-shredder` every 15 min) without full-pool
|
|
92
|
+
sweeps. Absent = full-pool sweep (correct for nightly runs).
|
|
93
|
+
- **`consolidate.limit`** — hard cap on memories processed per consolidation
|
|
94
|
+
pass, applied after incremental narrowing. Prevents runaway full-pool sweeps
|
|
95
|
+
in the nightly default profile.
|
|
96
|
+
- **`consolidate.neighborsPerChanged`** — configurable graph-neighbour count
|
|
97
|
+
per changed memory during incremental consolidation (was hardcoded to 5).
|
|
98
|
+
`quick-shredder` sets this to 3 for a 40% candidate reduction per burst.
|
|
99
|
+
- **`distill.requirePlannedRefs`** — when `true`, the distill process is
|
|
100
|
+
skipped entirely for distill-only refs when the reflect phase produced zero
|
|
101
|
+
planned refs. Eliminates hundreds of `distill-skipped` events on quiet passes
|
|
102
|
+
where all refs are on reflect cooldown.
|
|
103
|
+
- **`memoryInference.minPendingCount`** — minimum pending split-parent memory
|
|
104
|
+
count below which the inference pass is skipped entirely (zero LLM calls).
|
|
105
|
+
Prevents lock acquisition on passes where there is nothing to infer.
|
|
106
|
+
- **`reflect.limit`** — per-process ref limit for the reflect/distill loop,
|
|
107
|
+
applied as the improve run limit when no CLI `--limit` is given.
|
|
108
|
+
- **New `reflect-distill` improve profile** — dedicated reflect + distill +
|
|
109
|
+
memoryInference + triage profile for the every-4h `akm-improve-frequent`
|
|
110
|
+
task. `reflect.limit: 25` bounds LLM cost per pass.
|
|
111
|
+
|
|
112
|
+
### Changed
|
|
113
|
+
|
|
114
|
+
- **`quick-shredder` profile tuned**: `incrementalSince` `4h` → `1h`,
|
|
115
|
+
`maxChunkSize` 25 → 35, added `minPoolSize: 10`, `neighborsPerChanged: 3`,
|
|
116
|
+
`memoryInference.minPendingCount: 5`. All `profile: "qwen-9b-shredder"`
|
|
117
|
+
process references removed — falls back to default LLM.
|
|
118
|
+
- **`default` improve profile** (nightly): extract disabled (dedicated
|
|
119
|
+
`akm-extract` task runs at 01:48), consolidate gets `limit: 500`,
|
|
120
|
+
reflect gets `limit: 100` and `allowedTypes`, distill gets
|
|
121
|
+
`requirePlannedRefs: true`, triage enabled at 50 accepts/run,
|
|
122
|
+
graphExtraction explicitly enabled.
|
|
123
|
+
- **Cron schedule optimised**: extract reverted to `8,28,48 * * * *` (3×/hr),
|
|
124
|
+
quick-shredder shifted to `4,19,34,49` (4-min extract gap), health-report
|
|
125
|
+
shifted to `:03` (avoids `:00` collision), `akm-improve-frequent` re-enabled
|
|
126
|
+
at `45 */4` with `reflect-distill` profile.
|
|
127
|
+
|
|
9
128
|
## [0.9.0-beta.3] - 2026-06-12
|
|
10
129
|
|
|
11
130
|
Stabilization batch closing the remaining 0.9.0 milestone: DB-locking and
|
package/dist/cli.js
CHANGED
|
@@ -536,6 +536,13 @@ const EXIT_HEALTH_WARN = EXIT_CODES.HEALTH_WARN;
|
|
|
536
536
|
// The wrapper sets `AKM_NODE_ENTRY=1` to opt into the startup block. The test
|
|
537
537
|
// harness never sets it, so importing cli.ts under Bun stays inert as before.
|
|
538
538
|
if (import.meta.main || process.env.AKM_NODE_ENTRY === "1") {
|
|
539
|
+
// Mark that this process is the real akm CLI: its `process.argv[1]` is the
|
|
540
|
+
// akm entrypoint, so the background auto-reindex may safely re-invoke it as a
|
|
541
|
+
// detached child. Hosts that merely import this module (the in-process test
|
|
542
|
+
// harness, library embeddings) never reach this block, so they fall back to
|
|
543
|
+
// an inline reindex instead of spawning the wrong program. See
|
|
544
|
+
// `ensureIndex` in src/indexer/ensure-index.ts.
|
|
545
|
+
process.env.AKM_CLI_ENTRY = "1";
|
|
539
546
|
// citty reads process.argv directly and does not accept a custom argv array,
|
|
540
547
|
// so we must replace process.argv with the normalized version before runMain.
|
|
541
548
|
process.argv = normalizeShowArgv(process.argv);
|
|
@@ -14,6 +14,7 @@ import { appendEvent } from "../core/events.js";
|
|
|
14
14
|
import { warn } from "../core/warn.js";
|
|
15
15
|
import { applyFeedbackToUtilityScore, closeDatabase, findEntryIdByRef, getEntryFilePathById, openExistingDatabase, } from "../indexer/db/db.js";
|
|
16
16
|
import { ensureIndex } from "../indexer/ensure-index.js";
|
|
17
|
+
import { withIndexWriterLease } from "../indexer/index-writer-lock.js";
|
|
17
18
|
import { resolveSourceEntries } from "../indexer/search/search-source.js";
|
|
18
19
|
import { countFeedbackSignals, insertUsageEvent } from "../indexer/usage/usage-events.js";
|
|
19
20
|
// ── Tag validation ────────────────────────────────────────────────────────────
|
|
@@ -203,47 +204,51 @@ export const feedbackCommand = defineCommand({
|
|
|
203
204
|
...(validatedTags.length > 0 ? { tags: validatedTags } : {}),
|
|
204
205
|
};
|
|
205
206
|
const metadataStr = Object.keys(metadataObj).length > 1 ? JSON.stringify(metadataObj) : undefined;
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
const db = openExistingDatabase();
|
|
213
|
-
try {
|
|
214
|
-
const entryId = findEntryIdByRef(db, ref);
|
|
215
|
-
if (entryId === undefined) {
|
|
216
|
-
throw new UsageError(`Ref "${ref}" is not in the index. ` +
|
|
217
|
-
"Run 'akm search' to verify the asset exists, then 'akm index' if it was recently added.");
|
|
207
|
+
const utilityResult = await withIndexWriterLease({ purpose: "feedback-write" }, async () => {
|
|
208
|
+
// Feedback is itself an index.db writer, so it must not spawn a detached
|
|
209
|
+
// reindex and then compete with it for the same database file.
|
|
210
|
+
const sources = resolveSourceEntries();
|
|
211
|
+
if (sources.length > 0) {
|
|
212
|
+
await ensureIndex(sources[0].path, { mode: "blocking" });
|
|
218
213
|
}
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
// For negative signals, the score is adjusted the next time `akm index`
|
|
222
|
-
// runs — the signal is durable in the DB but does NOT suppress ranking
|
|
223
|
-
// in search results until after reindexing.
|
|
224
|
-
insertUsageEvent(db, {
|
|
225
|
-
event_type: "feedback",
|
|
226
|
-
entry_ref: ref,
|
|
227
|
-
entry_id: entryId,
|
|
228
|
-
signal,
|
|
229
|
-
metadata: metadataStr,
|
|
230
|
-
});
|
|
231
|
-
// Apply feedback-derived utility score adjustment immediately so that
|
|
232
|
-
// positive/negative signals influence search ranking without requiring
|
|
233
|
-
// a full reindex. We query the total accumulated feedback counts from
|
|
234
|
-
// usage_events so the delta reflects the entire signal history.
|
|
235
|
-
// Uses MemRL bounded-step EMA (F-5 / #386, arXiv:2601.03192).
|
|
214
|
+
let scopedUtilityResult;
|
|
215
|
+
const db = openExistingDatabase();
|
|
236
216
|
try {
|
|
237
|
-
const
|
|
238
|
-
|
|
217
|
+
const entryId = findEntryIdByRef(db, ref);
|
|
218
|
+
if (entryId === undefined) {
|
|
219
|
+
throw new UsageError(`Ref "${ref}" is not in the index. ` +
|
|
220
|
+
"Run 'akm search' to verify the asset exists, then 'akm index' if it was recently added.");
|
|
221
|
+
}
|
|
222
|
+
// Persist the feedback signal into usage_events. For positive signals,
|
|
223
|
+
// the EMA utility score is updated immediately on the next read path.
|
|
224
|
+
// For negative signals, the score is adjusted the next time `akm index`
|
|
225
|
+
// runs — the signal is durable in the DB but does NOT suppress ranking
|
|
226
|
+
// in search results until after reindexing.
|
|
227
|
+
insertUsageEvent(db, {
|
|
228
|
+
event_type: "feedback",
|
|
229
|
+
entry_ref: ref,
|
|
230
|
+
entry_id: entryId,
|
|
231
|
+
signal,
|
|
232
|
+
metadata: metadataStr,
|
|
233
|
+
});
|
|
234
|
+
// Apply feedback-derived utility score adjustment immediately so that
|
|
235
|
+
// positive/negative signals influence search ranking without requiring
|
|
236
|
+
// a full reindex. We query the total accumulated feedback counts from
|
|
237
|
+
// usage_events so the delta reflects the entire signal history.
|
|
238
|
+
// Uses MemRL bounded-step EMA (F-5 / #386, arXiv:2601.03192).
|
|
239
|
+
try {
|
|
240
|
+
const { pos, neg } = countFeedbackSignals(db, entryId);
|
|
241
|
+
scopedUtilityResult = applyFeedbackToUtilityScore(db, entryId, pos, neg);
|
|
242
|
+
}
|
|
243
|
+
catch {
|
|
244
|
+
// best-effort — feedback recording succeeds even if utility update fails
|
|
245
|
+
}
|
|
239
246
|
}
|
|
240
|
-
|
|
241
|
-
|
|
247
|
+
finally {
|
|
248
|
+
closeDatabase(db);
|
|
242
249
|
}
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
closeDatabase(db);
|
|
246
|
-
}
|
|
250
|
+
return scopedUtilityResult;
|
|
251
|
+
});
|
|
247
252
|
appendEvent({
|
|
248
253
|
eventType: "feedback",
|
|
249
254
|
ref,
|
|
@@ -12,6 +12,7 @@ import { closeDatabase, findEntryIdByRef, getEntryById, getEntryRefRowsForStashR
|
|
|
12
12
|
import { loadStoredGraphSnapshot } from "../../indexer/db/graph-db.js";
|
|
13
13
|
import { listRelatedPathsForFile } from "../../indexer/graph/graph-boost.js";
|
|
14
14
|
import { runGraphExtractionPass } from "../../indexer/graph/graph-extraction.js";
|
|
15
|
+
import { withIndexWriterLease } from "../../indexer/index-writer-lock.js";
|
|
15
16
|
import { lookup } from "../../indexer/indexer.js";
|
|
16
17
|
import { findSourceForPath, resolveSourceEntries } from "../../indexer/search/search-source.js";
|
|
17
18
|
import { resolveAssetPath } from "../../indexer/walk/path-resolver.js";
|
|
@@ -375,85 +376,88 @@ export async function akmGraphUpdate(options) {
|
|
|
375
376
|
}
|
|
376
377
|
}
|
|
377
378
|
const scoped = Array.isArray(options.refs) && options.refs.length > 0;
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
379
|
+
return withIndexWriterLease({ purpose: "graph-update" }, async () => {
|
|
380
|
+
let candidatePaths;
|
|
381
|
+
if (scoped && options.refs) {
|
|
382
|
+
// Resolve each ref to an absolute file path while the writer lease is held
|
|
383
|
+
// so the scoped graph write sees the same index snapshot it resolved from.
|
|
384
|
+
const dbPath = getDbPath();
|
|
385
|
+
let db;
|
|
386
|
+
const resolvedPaths = new Set();
|
|
387
|
+
try {
|
|
388
|
+
db = openDatabase(dbPath);
|
|
389
|
+
for (const ref of options.refs) {
|
|
390
|
+
const trimmed = ref.trim();
|
|
391
|
+
if (!trimmed)
|
|
392
|
+
continue;
|
|
393
|
+
const entryId = findEntryIdByRef(db, trimmed);
|
|
394
|
+
if (entryId === undefined) {
|
|
395
|
+
warn(`[graph] ref not found in index, skipping: ${trimmed}`);
|
|
396
|
+
continue;
|
|
397
|
+
}
|
|
398
|
+
const row = getEntryById(db, entryId);
|
|
399
|
+
if (!row?.filePath) {
|
|
400
|
+
warn(`[graph] could not resolve path for ref, skipping: ${trimmed}`);
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
resolvedPaths.add(row.filePath);
|
|
394
404
|
}
|
|
395
|
-
const row = getEntryById(db, entryId);
|
|
396
|
-
if (!row?.filePath) {
|
|
397
|
-
warn(`[graph] could not resolve path for ref, skipping: ${trimmed}`);
|
|
398
|
-
continue;
|
|
399
|
-
}
|
|
400
|
-
resolvedPaths.add(row.filePath);
|
|
401
405
|
}
|
|
406
|
+
finally {
|
|
407
|
+
if (db)
|
|
408
|
+
closeDatabase(db);
|
|
409
|
+
}
|
|
410
|
+
if (resolvedPaths.size === 0) {
|
|
411
|
+
warn("[graph] none of the provided refs resolved to indexed paths — no extraction performed.");
|
|
412
|
+
return {
|
|
413
|
+
shape: "graph-update",
|
|
414
|
+
ok: true,
|
|
415
|
+
filesExtracted: 0,
|
|
416
|
+
entitiesUpserted: 0,
|
|
417
|
+
relationsUpserted: 0,
|
|
418
|
+
durationMs: 0,
|
|
419
|
+
scoped: true,
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
candidatePaths = resolvedPaths;
|
|
402
423
|
}
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
424
|
+
const extractionFn = options.graphExtractionFn ?? runGraphExtractionPass;
|
|
425
|
+
const passOptions = candidatePaths ? { candidatePaths } : {};
|
|
426
|
+
let db;
|
|
427
|
+
const startMs = Date.now();
|
|
428
|
+
try {
|
|
429
|
+
db = openDatabase(getDbPath());
|
|
430
|
+
const onProgress = (event) => {
|
|
431
|
+
if (!event.currentPath)
|
|
432
|
+
return;
|
|
433
|
+
const file = path.basename(event.currentPath);
|
|
434
|
+
warn(`[graph] extracting ${event.processed}/${event.total} ${file}`);
|
|
435
|
+
};
|
|
436
|
+
const result = await extractionFn({
|
|
437
|
+
config,
|
|
438
|
+
sources,
|
|
439
|
+
signal: undefined,
|
|
440
|
+
db,
|
|
441
|
+
reEnrich: false,
|
|
442
|
+
onProgress,
|
|
443
|
+
options: passOptions,
|
|
444
|
+
});
|
|
445
|
+
const durationMs = Date.now() - startMs;
|
|
409
446
|
return {
|
|
410
447
|
shape: "graph-update",
|
|
411
448
|
ok: true,
|
|
412
|
-
filesExtracted:
|
|
413
|
-
entitiesUpserted:
|
|
414
|
-
relationsUpserted:
|
|
415
|
-
durationMs
|
|
416
|
-
scoped
|
|
449
|
+
filesExtracted: result.quality.extractedFiles,
|
|
450
|
+
entitiesUpserted: result.quality.entityCount,
|
|
451
|
+
relationsUpserted: result.quality.relationCount,
|
|
452
|
+
durationMs,
|
|
453
|
+
scoped,
|
|
417
454
|
};
|
|
418
455
|
}
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
const startMs = Date.now();
|
|
425
|
-
try {
|
|
426
|
-
db = openDatabase(getDbPath());
|
|
427
|
-
const onProgress = (event) => {
|
|
428
|
-
if (!event.currentPath)
|
|
429
|
-
return;
|
|
430
|
-
const file = path.basename(event.currentPath);
|
|
431
|
-
warn(`[graph] extracting ${event.processed}/${event.total} ${file}`);
|
|
432
|
-
};
|
|
433
|
-
const result = await extractionFn({
|
|
434
|
-
config,
|
|
435
|
-
sources,
|
|
436
|
-
signal: undefined,
|
|
437
|
-
db,
|
|
438
|
-
reEnrich: false,
|
|
439
|
-
onProgress,
|
|
440
|
-
options: passOptions,
|
|
441
|
-
});
|
|
442
|
-
const durationMs = Date.now() - startMs;
|
|
443
|
-
return {
|
|
444
|
-
shape: "graph-update",
|
|
445
|
-
ok: true,
|
|
446
|
-
filesExtracted: result.quality.extractedFiles,
|
|
447
|
-
entitiesUpserted: result.quality.entityCount,
|
|
448
|
-
relationsUpserted: result.quality.relationCount,
|
|
449
|
-
durationMs,
|
|
450
|
-
scoped,
|
|
451
|
-
};
|
|
452
|
-
}
|
|
453
|
-
finally {
|
|
454
|
-
if (db)
|
|
455
|
-
closeDatabase(db);
|
|
456
|
-
}
|
|
456
|
+
finally {
|
|
457
|
+
if (db)
|
|
458
|
+
closeDatabase(db);
|
|
459
|
+
}
|
|
460
|
+
});
|
|
457
461
|
}
|
|
458
462
|
async function resolveGraphTarget(ref, source) {
|
|
459
463
|
const parsedRef = parseAssetRef(ref);
|
package/dist/commands/health.js
CHANGED
|
@@ -760,11 +760,19 @@ function computeWallTimeStats(durationsMs, byPhase) {
|
|
|
760
760
|
}
|
|
761
761
|
function buildImproveSkipSummary(events) {
|
|
762
762
|
const skipReasons = {};
|
|
763
|
+
let skipped = 0;
|
|
763
764
|
for (const event of events) {
|
|
764
765
|
const reason = typeof event.metadata?.reason === "string" && event.metadata.reason.trim() ? event.metadata.reason : "unknown";
|
|
765
|
-
|
|
766
|
+
// Aggregated skip events (e.g. `no_new_signal`, `profile_filtered_all_passes`)
|
|
767
|
+
// carry a `count` of the refs they represent in a single row instead of one
|
|
768
|
+
// event per ref. Honor that count so the skip histogram reflects the true
|
|
769
|
+
// number of skipped refs; per-ref events without a count contribute 1.
|
|
770
|
+
const rawCount = event.metadata?.count;
|
|
771
|
+
const count = typeof rawCount === "number" && Number.isFinite(rawCount) && rawCount > 0 ? rawCount : 1;
|
|
772
|
+
skipReasons[reason] = (skipReasons[reason] ?? 0) + count;
|
|
773
|
+
skipped += count;
|
|
766
774
|
}
|
|
767
|
-
return { skipped
|
|
775
|
+
return { skipped, skipReasons };
|
|
768
776
|
}
|
|
769
777
|
function probeStateDbRoundTrip(stateDbPath) {
|
|
770
778
|
const before = readEvents({}, { dbPath: stateDbPath }).nextOffset;
|
|
@@ -809,7 +809,7 @@ export async function akmConsolidate(opts = {}) {
|
|
|
809
809
|
};
|
|
810
810
|
}
|
|
811
811
|
if (opts.incrementalSince) {
|
|
812
|
-
memories = narrowToIncrementalCandidates(memories, opts.incrementalSince, warnings);
|
|
812
|
+
memories = narrowToIncrementalCandidates(memories, opts.incrementalSince, warnings, opts.neighborsPerChanged);
|
|
813
813
|
if (memories.length === 0) {
|
|
814
814
|
return {
|
|
815
815
|
schemaVersion: 1,
|
|
@@ -828,6 +828,27 @@ export async function akmConsolidate(opts = {}) {
|
|
|
828
828
|
};
|
|
829
829
|
}
|
|
830
830
|
}
|
|
831
|
+
if (opts.limit !== undefined && memories.length > opts.limit) {
|
|
832
|
+
// Order oldest-modified-first before capping so the limit selects the
|
|
833
|
+
// stalest memories rather than a fixed head of the (rowid-ordered) DB
|
|
834
|
+
// query. Consolidation rewrites surviving files, bumping their mtime, so
|
|
835
|
+
// processed memories drift to the back of the queue and the cap rotates
|
|
836
|
+
// across the whole corpus over successive runs instead of revisiting the
|
|
837
|
+
// same slice every time. Fail-open to 0 (front of queue) when a file can
|
|
838
|
+
// no longer be stat'd.
|
|
839
|
+
const mtimeOf = (m) => {
|
|
840
|
+
try {
|
|
841
|
+
return fs.statSync(m.filePath).mtimeMs;
|
|
842
|
+
}
|
|
843
|
+
catch {
|
|
844
|
+
return 0;
|
|
845
|
+
}
|
|
846
|
+
};
|
|
847
|
+
const mtimeCache = new Map(memories.map((m) => [m.filePath, mtimeOf(m)]));
|
|
848
|
+
memories = [...memories].sort((a, b) => (mtimeCache.get(a.filePath) ?? 0) - (mtimeCache.get(b.filePath) ?? 0));
|
|
849
|
+
warnings.push(`Consolidation: pool capped at ${opts.limit} of ${memories.length} memories (limit option, oldest-modified first).`);
|
|
850
|
+
memories = memories.slice(0, opts.limit);
|
|
851
|
+
}
|
|
831
852
|
// Consolidation always uses the HTTP LLM client directly — never the agent
|
|
832
853
|
// CLI. The agent CLI is for interactive agent sessions (reflect, propose);
|
|
833
854
|
// structured JSON generation works better and faster via HTTP.
|
|
@@ -2004,7 +2025,7 @@ function parseSinceToIso(since) {
|
|
|
2004
2025
|
const multiplier = { m: 60_000, h: 3_600_000, d: 86_400_000 }[m[2]];
|
|
2005
2026
|
return new Date(Date.now() - parseInt(m[1], 10) * multiplier).toISOString();
|
|
2006
2027
|
}
|
|
2007
|
-
export function narrowToIncrementalCandidates(memories, since, warnings) {
|
|
2028
|
+
export function narrowToIncrementalCandidates(memories, since, warnings, neighborsPerChanged = 5) {
|
|
2008
2029
|
const sinceIso = parseSinceToIso(since);
|
|
2009
2030
|
const isChanged = (m) => {
|
|
2010
2031
|
try {
|
|
@@ -2019,7 +2040,6 @@ export function narrowToIncrementalCandidates(memories, since, warnings) {
|
|
|
2019
2040
|
return [];
|
|
2020
2041
|
if (changed.length === memories.length)
|
|
2021
2042
|
return memories;
|
|
2022
|
-
const NEIGHBORS_PER_CHANGED = 5;
|
|
2023
2043
|
const byName = new Map(memories.map((m) => [m.name, m]));
|
|
2024
2044
|
const keep = new Set(changed.map((m) => m.name));
|
|
2025
2045
|
let db;
|
|
@@ -2029,7 +2049,7 @@ export function narrowToIncrementalCandidates(memories, since, warnings) {
|
|
|
2029
2049
|
const id = findEntryIdByRef(db, `memory:${m.name}`);
|
|
2030
2050
|
if (id === undefined)
|
|
2031
2051
|
continue;
|
|
2032
|
-
for (const hit of getNeighborsByEntryId(db, id,
|
|
2052
|
+
for (const hit of getNeighborsByEntryId(db, id, neighborsPerChanged + 1)) {
|
|
2033
2053
|
if (hit.id === id)
|
|
2034
2054
|
continue;
|
|
2035
2055
|
const entry = getEntryById(db, hit.id);
|
|
@@ -586,7 +586,7 @@ similarLessons) {
|
|
|
586
586
|
* @param reason - Human-readable rejection reason.
|
|
587
587
|
* @param extraMeta - Optional additional metadata for the event.
|
|
588
588
|
*/
|
|
589
|
-
function writeQualityRejection(stash, inputRef, lessonRef, content, score, reason, extraMeta = {}) {
|
|
589
|
+
function writeQualityRejection(stash, inputRef, lessonRef, content, score, reason, extraMeta = {}, eligibilitySource) {
|
|
590
590
|
// D-5 / #388: reviewNeeded flag selects "review_needed" vs "quality_rejected" outcome.
|
|
591
591
|
const outcome = extraMeta.reviewNeeded ? "review_needed" : "quality_rejected";
|
|
592
592
|
const rejectDir = path.join(stash, ".akm", "distill-rejected");
|
|
@@ -602,6 +602,9 @@ function writeQualityRejection(stash, inputRef, lessonRef, content, score, reaso
|
|
|
602
602
|
score,
|
|
603
603
|
reason,
|
|
604
604
|
...extraMeta,
|
|
605
|
+
// Attribution tagging: stamp the eligibility lane so distill_invoked can be
|
|
606
|
+
// sliced by lane downstream. See EligibilitySource.
|
|
607
|
+
...(eligibilitySource ? { eligibilitySource } : {}),
|
|
605
608
|
},
|
|
606
609
|
});
|
|
607
610
|
return {
|
|
@@ -629,6 +632,12 @@ export async function akmDistill(options) {
|
|
|
629
632
|
// Validate the ref shape up front so a typo never reaches the LLM.
|
|
630
633
|
const parsedInputRef = parseAssetRef(inputRef);
|
|
631
634
|
const targetKind = options.proposalKind ?? "lesson";
|
|
635
|
+
// Attribution tagging: spread into every distill_invoked event's metadata so
|
|
636
|
+
// the lane that selected this asset is recorded uniformly across all outcome
|
|
637
|
+
// branches. Empty object when no lane was supplied (direct `akm distill`).
|
|
638
|
+
const eligMeta = options.eligibilitySource
|
|
639
|
+
? { eligibilitySource: options.eligibilitySource }
|
|
640
|
+
: {};
|
|
632
641
|
// Recursive-distillation guard. Distill produces *lessons* from non-lesson
|
|
633
642
|
// sources (memory, skill, knowledge, etc.). Calling distill on an existing
|
|
634
643
|
// lesson would derive `lesson:lesson-<name>-lesson-lesson` (double `-lesson`
|
|
@@ -650,6 +659,7 @@ export async function akmDistill(options) {
|
|
|
650
659
|
lessonRef: skippedRef,
|
|
651
660
|
message: "distill refuses lesson inputs — lessons are the distilled form, not a source",
|
|
652
661
|
skipReason: "recursive_lesson_input",
|
|
662
|
+
...eligMeta,
|
|
653
663
|
},
|
|
654
664
|
});
|
|
655
665
|
return {
|
|
@@ -766,6 +776,7 @@ export async function akmDistill(options) {
|
|
|
766
776
|
outcome: "skipped",
|
|
767
777
|
lessonRef: promotion.knowledgeRef,
|
|
768
778
|
message: "D-1: LLM resolved destination conflict as NOOP — existing content kept",
|
|
779
|
+
...eligMeta,
|
|
769
780
|
},
|
|
770
781
|
});
|
|
771
782
|
return {
|
|
@@ -814,9 +825,9 @@ export async function akmDistill(options) {
|
|
|
814
825
|
if (!judgeResult.pass) {
|
|
815
826
|
if (judgeResult.reviewNeeded) {
|
|
816
827
|
// Uncertainty band (2.5–3.5): queue as review_needed instead of rejecting.
|
|
817
|
-
return writeQualityRejection(stash, inputRef, promotion.knowledgeRef, resolvedPromotionContent, judgeResult.score, judgeResult.reason, { reviewNeeded: true });
|
|
828
|
+
return writeQualityRejection(stash, inputRef, promotion.knowledgeRef, resolvedPromotionContent, judgeResult.score, judgeResult.reason, { reviewNeeded: true }, options.eligibilitySource);
|
|
818
829
|
}
|
|
819
|
-
return writeQualityRejection(stash, inputRef, promotion.knowledgeRef, resolvedPromotionContent, judgeResult.score, judgeResult.reason);
|
|
830
|
+
return writeQualityRejection(stash, inputRef, promotion.knowledgeRef, resolvedPromotionContent, judgeResult.score, judgeResult.reason, {}, options.eligibilitySource);
|
|
820
831
|
}
|
|
821
832
|
// Normalize 1-5 judge score to [0, 1]. Score of -1 means pass-through
|
|
822
833
|
// (no LLM / timeout / parse failure) — leave confidence undefined so
|
|
@@ -834,6 +845,8 @@ export async function akmDistill(options) {
|
|
|
834
845
|
...(Object.keys(knowledgeParsed.data).length > 0 ? { frontmatter: knowledgeParsed.data } : {}),
|
|
835
846
|
},
|
|
836
847
|
...(knowledgeJudgeConfidence !== undefined ? { confidence: knowledgeJudgeConfidence } : {}),
|
|
848
|
+
// Attribution tagging: persist the eligibility lane on the proposal.
|
|
849
|
+
...(options.eligibilitySource ? { eligibilitySource: options.eligibilitySource } : {}),
|
|
837
850
|
}, options.ctx);
|
|
838
851
|
if (isProposalSkipped(proposalResult)) {
|
|
839
852
|
appendEvent({
|
|
@@ -844,6 +857,7 @@ export async function akmDistill(options) {
|
|
|
844
857
|
lessonRef: promotion.knowledgeRef,
|
|
845
858
|
message: proposalResult.message,
|
|
846
859
|
skipReason: proposalResult.reason,
|
|
860
|
+
...eligMeta,
|
|
847
861
|
},
|
|
848
862
|
});
|
|
849
863
|
return {
|
|
@@ -867,6 +881,7 @@ export async function akmDistill(options) {
|
|
|
867
881
|
proposalId: proposal.id,
|
|
868
882
|
...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
|
|
869
883
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
|
|
884
|
+
...eligMeta,
|
|
870
885
|
},
|
|
871
886
|
});
|
|
872
887
|
return {
|
|
@@ -979,6 +994,7 @@ export async function akmDistill(options) {
|
|
|
979
994
|
lessonRef: effectiveLessonRef,
|
|
980
995
|
proposalKind: effectiveProposalKind,
|
|
981
996
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
|
|
997
|
+
...eligMeta,
|
|
982
998
|
},
|
|
983
999
|
});
|
|
984
1000
|
return {
|
|
@@ -1203,6 +1219,7 @@ export async function akmDistill(options) {
|
|
|
1203
1219
|
proposalKind: effectiveProposalKind,
|
|
1204
1220
|
findingKinds: findings.map((f) => f.kind),
|
|
1205
1221
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
|
|
1222
|
+
...eligMeta,
|
|
1206
1223
|
},
|
|
1207
1224
|
});
|
|
1208
1225
|
const message = findings.map((f) => f.message).join("\n");
|
|
@@ -1224,9 +1241,9 @@ export async function akmDistill(options) {
|
|
|
1224
1241
|
return writeQualityRejection(stash, inputRef, effectiveLessonRef, content, judgeResult.score, judgeResult.reason, {
|
|
1225
1242
|
reviewNeeded: true,
|
|
1226
1243
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount, feedbackFullyFiltered } : {}),
|
|
1227
|
-
});
|
|
1244
|
+
}, options.eligibilitySource);
|
|
1228
1245
|
}
|
|
1229
|
-
return writeQualityRejection(stash, inputRef, effectiveLessonRef, content, judgeResult.score, judgeResult.reason, exclusionSet.size > 0 ? { filteredFeedbackCount, feedbackFullyFiltered } : {});
|
|
1246
|
+
return writeQualityRejection(stash, inputRef, effectiveLessonRef, content, judgeResult.score, judgeResult.reason, exclusionSet.size > 0 ? { filteredFeedbackCount, feedbackFullyFiltered } : {}, options.eligibilitySource);
|
|
1230
1247
|
}
|
|
1231
1248
|
// Normalize 1-5 judge score to [0, 1]. Score of -1 means pass-through
|
|
1232
1249
|
// (no LLM / timeout / parse failure) — leave confidence undefined so
|
|
@@ -1256,6 +1273,8 @@ export async function akmDistill(options) {
|
|
|
1256
1273
|
frontmatter: frontmatterWithSources,
|
|
1257
1274
|
},
|
|
1258
1275
|
...(lessonJudgeConfidence !== undefined ? { confidence: lessonJudgeConfidence } : {}),
|
|
1276
|
+
// Attribution tagging: persist the eligibility lane on the proposal.
|
|
1277
|
+
...(options.eligibilitySource ? { eligibilitySource: options.eligibilitySource } : {}),
|
|
1259
1278
|
}, options.ctx);
|
|
1260
1279
|
if (isProposalSkipped(proposalResult2)) {
|
|
1261
1280
|
appendEvent({
|
|
@@ -1266,6 +1285,7 @@ export async function akmDistill(options) {
|
|
|
1266
1285
|
lessonRef: effectiveLessonRef,
|
|
1267
1286
|
message: proposalResult2.message,
|
|
1268
1287
|
skipReason: proposalResult2.reason,
|
|
1288
|
+
...eligMeta,
|
|
1269
1289
|
},
|
|
1270
1290
|
});
|
|
1271
1291
|
return {
|
|
@@ -1290,6 +1310,7 @@ export async function akmDistill(options) {
|
|
|
1290
1310
|
...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
|
|
1291
1311
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
|
|
1292
1312
|
...(descriptionSwapped > 0 ? { descriptionSwapped } : {}),
|
|
1313
|
+
...eligMeta,
|
|
1293
1314
|
},
|
|
1294
1315
|
});
|
|
1295
1316
|
return {
|
|
@@ -55,7 +55,7 @@ export const EXTRACT_JSON_SCHEMA = {
|
|
|
55
55
|
type: "string",
|
|
56
56
|
minLength: 20,
|
|
57
57
|
maxLength: 400,
|
|
58
|
-
description: "One-sentence summary of the candidate. Must be a complete sentence
|
|
58
|
+
description: "One-sentence summary of the candidate. Must be a complete sentence in active voice. Do NOT start with 'When', 'If', 'How', 'Use', or 'Avoid'. Do NOT end with ':', ';', or ','. Do NOT use heading-fragment text ('Summary', 'Overview', 'Key finding:'). Minimum 20 characters, maximum 400 characters.",
|
|
59
59
|
},
|
|
60
60
|
when_to_use: {
|
|
61
61
|
type: "string",
|
|
@@ -72,6 +72,12 @@ export async function runAutoAcceptGate(candidates, cfg, promoteFn = promoteProp
|
|
|
72
72
|
confidence,
|
|
73
73
|
threshold: effectiveThreshold,
|
|
74
74
|
phase: cfg.phase,
|
|
75
|
+
// Attribution tagging: carry the eligibility lane from the proposal
|
|
76
|
+
// record onto the auto-accept promoted event so the lane survives to
|
|
77
|
+
// accept time even when promotion happens in a later run.
|
|
78
|
+
...(promotion.proposal.eligibilitySource !== undefined
|
|
79
|
+
? { eligibilitySource: promotion.proposal.eligibilitySource }
|
|
80
|
+
: {}),
|
|
75
81
|
},
|
|
76
82
|
}, cfg.eventsCtx ?? {});
|
|
77
83
|
info(`[improve] auto-accepted ${promotion.ref} (${cfg.phase}; confidence=${confidence.toFixed(2)} >= threshold=${effectiveThreshold.toFixed(2)})`);
|