akm-cli 0.9.0-beta.2 → 0.9.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +248 -0
- package/dist/assets/templates/html/default.html +78 -0
- package/dist/assets/templates/html/health.html +560 -0
- package/dist/assets/templates/html/vendor/echarts.min.js +45 -0
- package/dist/cli/shared.js +21 -5
- package/dist/cli.js +36 -5
- package/dist/commands/health/html-report.js +448 -0
- package/dist/commands/health.js +97 -6
- package/dist/commands/improve/consolidate.js +15 -2
- package/dist/commands/improve/extract.js +38 -2
- package/dist/commands/improve/improve-auto-accept.js +27 -1
- package/dist/commands/improve/improve.js +167 -53
- package/dist/commands/improve/reflect-noise.js +0 -0
- package/dist/commands/improve/reflect.js +25 -0
- package/dist/commands/proposal/drain.js +73 -6
- package/dist/commands/proposal/proposal-cli.js +22 -10
- package/dist/commands/proposal/proposal.js +12 -1
- package/dist/commands/proposal/validators/proposals.js +361 -338
- package/dist/commands/remember.js +6 -2
- package/dist/core/config/config-schema.js +5 -0
- package/dist/core/logs-db.js +304 -0
- package/dist/core/state-db.js +107 -14
- package/dist/indexer/db/db.js +2 -2
- package/dist/indexer/passes/memory-inference.js +61 -22
- package/dist/integrations/harnesses/claude/session-log.js +16 -4
- package/dist/llm/client.js +15 -0
- package/dist/llm/usage-persist.js +77 -0
- package/dist/llm/usage-telemetry.js +103 -0
- package/dist/output/context.js +3 -2
- package/dist/output/html-render.js +73 -0
- package/dist/output/shapes/helpers.js +17 -1
- package/dist/output/text/helpers.js +69 -1
- package/dist/scripts/migrate-storage.js +65 -14
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +14 -2
- package/dist/tasks/runner.js +99 -16
- package/dist/workflows/db.js +4 -0
- package/package.json +2 -1
|
@@ -46,7 +46,7 @@ import { runAgent } from "../../integrations/agent/index.js";
|
|
|
46
46
|
import { runOpencodeSdk } from "../../integrations/harnesses/opencode-sdk/index.js";
|
|
47
47
|
import { chatCompletion, stripJsonFences } from "../../llm/client.js";
|
|
48
48
|
import { akmProposalAccept, akmProposalReject } from "./proposal.js";
|
|
49
|
-
import { listProposals } from "./validators/proposals.js";
|
|
49
|
+
import { listProposals, recordGateDecision } from "./validators/proposals.js";
|
|
50
50
|
// ---------------------------------------------------------------------------
|
|
51
51
|
// Content helpers
|
|
52
52
|
// ---------------------------------------------------------------------------
|
|
@@ -78,7 +78,7 @@ export function classifyProposal(proposal, policy, maxDiffLines) {
|
|
|
78
78
|
const content = proposal.payload.content ?? "";
|
|
79
79
|
// Empty / near-empty diffs reject first (the reject-empty floor).
|
|
80
80
|
if (policy.rejectEmpty && isEmptyDiff(proposal)) {
|
|
81
|
-
return { verdict: "reject", reason: "empty diff" };
|
|
81
|
+
return { verdict: "reject", reason: "empty diff", gate: { reason: "empty-diff" } };
|
|
82
82
|
}
|
|
83
83
|
const rule = policy.accept.find((r) => r.generator === proposal.source);
|
|
84
84
|
if (rule) {
|
|
@@ -87,16 +87,25 @@ export function classifyProposal(proposal, policy, maxDiffLines) {
|
|
|
87
87
|
// Per-rule and global diff bounds defer large accepts (no silent rewrites).
|
|
88
88
|
const effectiveMax = Math.min(rule.maxDiffLines ?? Number.POSITIVE_INFINITY, maxDiffLines ?? Number.POSITIVE_INFINITY);
|
|
89
89
|
if (lines > effectiveMax) {
|
|
90
|
-
return {
|
|
90
|
+
return {
|
|
91
|
+
verdict: "defer",
|
|
92
|
+
reason: "mid-band",
|
|
93
|
+
gate: { reason: "max-diff-lines", measured: lines, thresholds: { maxDiffLines: effectiveMax } },
|
|
94
|
+
};
|
|
91
95
|
}
|
|
92
96
|
if (rule.minContentLines !== undefined && body < rule.minContentLines) {
|
|
93
97
|
// Too little content to confidently auto-accept — leave for judgment.
|
|
94
|
-
return {
|
|
98
|
+
return {
|
|
99
|
+
verdict: "defer",
|
|
100
|
+
reason: "mid-band",
|
|
101
|
+
gate: { reason: "min-content-lines", measured: body, thresholds: { minContentLines: rule.minContentLines } },
|
|
102
|
+
};
|
|
95
103
|
}
|
|
96
|
-
return { verdict: "accept" };
|
|
104
|
+
return { verdict: "accept", gate: { reason: "policy-accept" } };
|
|
97
105
|
}
|
|
98
106
|
if (policy.defer.includes(proposal.source)) {
|
|
99
|
-
|
|
107
|
+
const reason = deferReasonForSource(proposal.source);
|
|
108
|
+
return { verdict: "defer", reason, gate: { reason } };
|
|
100
109
|
}
|
|
101
110
|
// No matching rule — leave pending, untouched.
|
|
102
111
|
return null;
|
|
@@ -347,10 +356,31 @@ export async function drainProposals(opts, promoteFn = akmProposalAccept, reject
|
|
|
347
356
|
// First, classify every proposal deterministically.
|
|
348
357
|
const acceptIds = [];
|
|
349
358
|
const rejectTargets = [];
|
|
359
|
+
const gateLabel = `triage:${opts.policy.name}`;
|
|
360
|
+
// Items deferred purely because they need a judge (no threshold-based reason)
|
|
361
|
+
// — these are re-stamped `no-judge-configured` when no runner resolves them.
|
|
362
|
+
const needsJudge = new Set();
|
|
350
363
|
for (const proposal of pending) {
|
|
351
364
|
const decision = classifyProposal(proposal, opts.policy, opts.maxDiffLines);
|
|
352
365
|
if (decision === null)
|
|
353
366
|
continue;
|
|
367
|
+
// #577: stamp the gate's verdict onto the proposal so `akm proposal show`
|
|
368
|
+
// can explain WHY it landed here. A dry-run performs zero writes, so it
|
|
369
|
+
// records nothing.
|
|
370
|
+
const outcome = decision.verdict === "accept" ? "auto-accepted" : decision.verdict === "reject" ? "auto-rejected" : "deferred";
|
|
371
|
+
stampGateDecision(opts, proposal.id, {
|
|
372
|
+
outcome,
|
|
373
|
+
reason: decision.gate.reason,
|
|
374
|
+
...(decision.gate.measured !== undefined ? { measured: decision.gate.measured } : {}),
|
|
375
|
+
...(decision.gate.thresholds ? { thresholds: decision.gate.thresholds } : {}),
|
|
376
|
+
gate: gateLabel,
|
|
377
|
+
});
|
|
378
|
+
// A defer with no threshold (mid-band / possible-dup from the defer list) is
|
|
379
|
+
// pending only because it needs adjudication — re-stampable to
|
|
380
|
+
// `no-judge-configured`. A band-based defer keeps its specific reason.
|
|
381
|
+
if (decision.verdict === "defer" && !decision.gate.thresholds) {
|
|
382
|
+
needsJudge.add(proposal.id);
|
|
383
|
+
}
|
|
354
384
|
if (decision.verdict === "accept") {
|
|
355
385
|
acceptIds.push(proposal.id);
|
|
356
386
|
}
|
|
@@ -434,14 +464,51 @@ export async function drainProposals(opts, promoteFn = akmProposalAccept, reject
|
|
|
434
464
|
if (tier.skippedByCap.length > 0) {
|
|
435
465
|
info(`[triage] accept ceiling reached in judgment tier: ${tier.skippedByCap.length} judged-accept items skipped by cap (maxAccepts=${opts.maxAccepts})`);
|
|
436
466
|
}
|
|
467
|
+
// #577: re-stamp the gate decision for items the judgment tier resolved so
|
|
468
|
+
// `akm proposal show` reflects the judge's verdict, not the earlier
|
|
469
|
+
// deterministic defer.
|
|
470
|
+
for (const id of tier.promoted) {
|
|
471
|
+
stampGateDecision(opts, id, { outcome: "auto-accepted", reason: "judgment-accept", gate: gateLabel });
|
|
472
|
+
}
|
|
473
|
+
for (const id of tier.rejected) {
|
|
474
|
+
stampGateDecision(opts, id, { outcome: "auto-rejected", reason: "judgment-reject", gate: gateLabel });
|
|
475
|
+
}
|
|
437
476
|
// Replace the deferred list with only the items the judgment tier could NOT
|
|
438
477
|
// resolve (verdict "defer", parse failure, or runner error). Staged
|
|
439
478
|
// queue-mode accepts are RESOLVED and tracked in result.staged instead.
|
|
440
479
|
result.deferred = tier.stillDeferred;
|
|
441
480
|
}
|
|
481
|
+
else if (result.deferred.length > 0) {
|
|
482
|
+
// #577: no judgment runner configured — items deferred *because they need a
|
|
483
|
+
// judge* (mid-band / possible-dup, no threshold reason) stay pending solely
|
|
484
|
+
// for lack of one. Re-stamp those as `no-judge-configured` so the operator
|
|
485
|
+
// sees a per-proposal reason instead of inferring it from the run-level
|
|
486
|
+
// triage_deferred aggregate. Band-deferred items keep their specific reason
|
|
487
|
+
// (e.g. `max-diff-lines`), which is more actionable than "no judge".
|
|
488
|
+
for (const item of result.deferred) {
|
|
489
|
+
if (needsJudge.has(item.id)) {
|
|
490
|
+
stampGateDecision(opts, item.id, { outcome: "deferred", reason: "no-judge-configured", gate: gateLabel });
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
}
|
|
442
494
|
emitDrainEvents(opts, result);
|
|
443
495
|
return result;
|
|
444
496
|
}
|
|
497
|
+
/**
|
|
498
|
+
* Persist a gate decision onto a proposal, honouring the dry-run contract
|
|
499
|
+
* (a dry run performs zero writes, so it records nothing) and never letting a
|
|
500
|
+
* persistence failure abort the drain (#577). Best-effort by design.
|
|
501
|
+
*/
|
|
502
|
+
function stampGateDecision(opts, id, decision) {
|
|
503
|
+
if (opts.dryRun)
|
|
504
|
+
return;
|
|
505
|
+
try {
|
|
506
|
+
recordGateDecision(opts.stashDir, id, decision);
|
|
507
|
+
}
|
|
508
|
+
catch (err) {
|
|
509
|
+
warn(`[triage] failed to record gate decision for ${id}: ${err instanceof Error ? err.message : String(err)}`);
|
|
510
|
+
}
|
|
511
|
+
}
|
|
445
512
|
// ---------------------------------------------------------------------------
|
|
446
513
|
// Events
|
|
447
514
|
// ---------------------------------------------------------------------------
|
|
@@ -16,6 +16,8 @@ import { resolveStashDir } from "../../core/common.js";
|
|
|
16
16
|
import { loadConfig } from "../../core/config/config.js";
|
|
17
17
|
import { UsageError } from "../../core/errors.js";
|
|
18
18
|
import { resolveTriageJudgmentRunner } from "../../integrations/agent/runner.js";
|
|
19
|
+
import { installLlmUsagePersistenceIfAbsent } from "../../llm/usage-persist.js";
|
|
20
|
+
import { withLlmStage } from "../../llm/usage-telemetry.js";
|
|
19
21
|
import { resolveImproveProfile } from "../improve/improve-profiles.js";
|
|
20
22
|
import { drainProposals } from "./drain.js";
|
|
21
23
|
import { resolveDrainPolicy } from "./drain-policies.js";
|
|
@@ -407,16 +409,26 @@ const proposalDrainCommand = defineJsonCommand({
|
|
|
407
409
|
// nothing is configured → the engine leaves deferred items unresolved and
|
|
408
410
|
// emits triage_deferred.
|
|
409
411
|
const judgment = args.judgment === true ? resolveTriageJudgmentRunner(triageConfig?.judgment, cfg) : null;
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
412
|
+
// #576: persist + attribute per-call LLM usage for the standalone drain
|
|
413
|
+
// path. `IfAbsent` keeps an enclosing `akm improve` sink in charge when
|
|
414
|
+
// drain runs as a sub-step; the disposer clears only a sink we installed.
|
|
415
|
+
const disposeDrainUsageSink = installLlmUsagePersistenceIfAbsent();
|
|
416
|
+
let result;
|
|
417
|
+
try {
|
|
418
|
+
result = await withLlmStage("drain", () => drainProposals({
|
|
419
|
+
stashDir,
|
|
420
|
+
policy,
|
|
421
|
+
applyMode,
|
|
422
|
+
maxAccepts,
|
|
423
|
+
dryRun,
|
|
424
|
+
...(maxDiffLines !== undefined ? { maxDiffLines } : {}),
|
|
425
|
+
...(excludeIds ? { excludeIds } : {}),
|
|
426
|
+
judgment,
|
|
427
|
+
}));
|
|
428
|
+
}
|
|
429
|
+
finally {
|
|
430
|
+
disposeDrainUsageSink();
|
|
431
|
+
}
|
|
420
432
|
output("proposal-drain", {
|
|
421
433
|
schemaVersion: 1,
|
|
422
434
|
ok: true,
|
|
@@ -22,6 +22,17 @@ function resolveStash(stashDir) {
|
|
|
22
22
|
return stashDir;
|
|
23
23
|
return resolveStashDir();
|
|
24
24
|
}
|
|
25
|
+
/**
|
|
26
|
+
* Thin in-process read of the pending proposal queue, used by the health HTML
|
|
27
|
+
* report builder (#582) so it never shells out to `akm proposal list`.
|
|
28
|
+
*
|
|
29
|
+
* Deliberately narrow (one optional arg, returns the storage-layer rows) so
|
|
30
|
+
* the parallel proposal-storage-to-SQLite consolidation only has to swap this
|
|
31
|
+
* one function's body.
|
|
32
|
+
*/
|
|
33
|
+
export function listPendingProposals(stashDir) {
|
|
34
|
+
return listProposals(resolveStash(stashDir), { status: "pending" });
|
|
35
|
+
}
|
|
25
36
|
export function akmProposalList(options = {}) {
|
|
26
37
|
const stash = resolveStash(options.stashDir);
|
|
27
38
|
// `--status accepted|rejected|reverted` implies archive-inclusion since the
|
|
@@ -141,7 +152,7 @@ export function akmProposalCreate(options) {
|
|
|
141
152
|
* (raised by `resolveProposalId` / `getProposal`).
|
|
142
153
|
* - Proposal is not `status === "accepted"` → `UsageError("INVALID_FLAG_VALUE")`
|
|
143
154
|
* with message `"only accepted proposals can be reverted ..."`.
|
|
144
|
-
* - No
|
|
155
|
+
* - No backup content on the record (new-asset proposals capture none) →
|
|
145
156
|
* `UsageError` with message `"no backup available for this proposal ..."`.
|
|
146
157
|
*
|
|
147
158
|
* On success, emits a `proposal_reverted` event for observability, mirroring
|