npm - akm-cli - Versions diffs - 0.9.0-beta.2 → 0.9.0-beta.3 - Mend

akm-cli 0.9.0-beta.2 → 0.9.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/CHANGELOG.md +87 -0
package/dist/assets/templates/html/default.html +78 -0
package/dist/assets/templates/html/health.html +560 -0
package/dist/assets/templates/html/vendor/echarts.min.js +45 -0
package/dist/cli/shared.js +21 -5
package/dist/cli.js +36 -5
package/dist/commands/health/html-report.js +448 -0
package/dist/commands/health.js +97 -6
package/dist/commands/improve/extract.js +38 -2
package/dist/commands/improve/improve-auto-accept.js +27 -1
package/dist/commands/improve/improve.js +167 -53
package/dist/commands/improve/reflect-noise.js +0 -0
package/dist/commands/improve/reflect.js +25 -0
package/dist/commands/proposal/drain.js +73 -6
package/dist/commands/proposal/proposal-cli.js +22 -10
package/dist/commands/proposal/proposal.js +12 -1
package/dist/commands/proposal/validators/proposals.js +361 -338
package/dist/commands/remember.js +6 -2
package/dist/core/config/config-schema.js +5 -0
package/dist/core/logs-db.js +304 -0
package/dist/core/state-db.js +107 -14
package/dist/indexer/db/db.js +2 -2
package/dist/indexer/passes/memory-inference.js +61 -22
package/dist/integrations/harnesses/claude/session-log.js +16 -4
package/dist/llm/client.js +15 -0
package/dist/llm/usage-persist.js +77 -0
package/dist/llm/usage-telemetry.js +103 -0
package/dist/output/context.js +3 -2
package/dist/output/html-render.js +73 -0
package/dist/output/shapes/helpers.js +17 -1
package/dist/output/text/helpers.js +69 -1
package/dist/scripts/migrate-storage.js +65 -14
package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +14 -2
package/dist/tasks/runner.js +99 -16
package/dist/workflows/db.js +4 -0
package/package.json +1 -1

package/dist/commands/health.js CHANGED Viewed

@@ -4,11 +4,13 @@
 import fs from "node:fs";
 import { ConfigError, UsageError } from "../core/errors.js";
 import { appendEvent, readEvents } from "../core/events.js";
+import { buildTaskRunId, getLoggedRunIds, openLogsDatabase } from "../core/logs-db.js";
 import { getStateDbPathInDataDir } from "../core/paths.js";
 import { listExistingTableNames, openStateDatabase, queryCompletedTaskIntervals, queryImproveRuns, queryTaskHistory, } from "../core/state-db.js";
 import { parseSinceToIso } from "../core/time.js";
 import { readSemanticStatus } from "../indexer/search/semantic-status.js";
 import { getExecutionLogCandidates } from "../integrations/session-logs/index.js";
+import { LLM_USAGE_EVENT } from "../llm/usage-persist.js";
 import { HEALTH_CHECKS } from "./health/checks.js";
 const DEFAULT_SINCE_MS = 24 * 60 * 60 * 1000;
 const IMPROVE_COMPLETED_EVENT = "improve_completed";
@@ -882,14 +884,84 @@ function computeDeltas(first, last) {
     }
     return out;
 }
-function buildWindowMetrics(db, stateDbPath, since, until, now = () => Date.now()) {
+/**
+ * Partition task_history rows into "should have a log" (non-null log_path) and
+ * "log is actually backed". A run counts as backed when logs.db holds rows for
+ * its run_id (#579 — the DB is the primary record); rows written before logs.db
+ * existed fall back to the transitional on-disk file check. `logsDb` may be
+ * undefined when logs.db could not be opened — then only the file check runs.
+ */
+function partitionLogBackedRows(taskRows, logsDb) {
+    const withLogs = taskRows.filter((row) => row.log_path !== null);
+    const loggedRunIds = logsDb
+        ? getLoggedRunIds(logsDb, withLogs.map((row) => buildTaskRunId(row.task_id, row.started_at)))
+        : new Set();
+    const backed = withLogs.filter((row) => loggedRunIds.has(buildTaskRunId(row.task_id, row.started_at)) ||
+        (row.log_path !== null && fs.existsSync(row.log_path)));
+    return { withLogs, backed };
+}
+/** Stage key used for `llm_usage` events recorded outside any stage scope. */
+const UNATTRIBUTED_STAGE = "unattributed";
+function emptyLlmUsageStageAggregate() {
+    return {
+        calls: 0,
+        totalDurationMs: 0,
+        promptTokens: 0,
+        completionTokens: 0,
+        totalTokens: 0,
+        reasoningTokens: 0,
+    };
+}
+function emptyLlmUsageAggregate() {
+    return { ...emptyLlmUsageStageAggregate(), byStage: {} };
+}
+/**
+ * Aggregate `llm_usage` events (#576) into a window total plus a per-stage
+ * breakdown of call count, wall-time, and token usage. Token fields absent from
+ * a best-effort record contribute 0. Calls with no `stage` land under
+ * {@link UNATTRIBUTED_STAGE}.
+ */
+function summarizeLlmUsage(events) {
+    const aggregate = emptyLlmUsageAggregate();
+    for (const event of events) {
+        const meta = event.metadata ?? {};
+        const stageKey = typeof meta.stage === "string" && meta.stage ? meta.stage : UNATTRIBUTED_STAGE;
+        let stage = aggregate.byStage[stageKey];
+        if (!stage) {
+            stage = emptyLlmUsageStageAggregate();
+            aggregate.byStage[stageKey] = stage;
+        }
+        const durationMs = toFiniteNumber(meta.durationMs);
+        const promptTokens = toFiniteNumber(meta.promptTokens);
+        const completionTokens = toFiniteNumber(meta.completionTokens);
+        const totalTokens = toFiniteNumber(meta.totalTokens);
+        const reasoningTokens = toFiniteNumber(meta.reasoningTokens);
+        for (const target of [aggregate, stage]) {
+            target.calls += 1;
+            target.totalDurationMs += durationMs;
+            target.promptTokens += promptTokens;
+            target.completionTokens += completionTokens;
+            target.totalTokens += totalTokens;
+            target.reasoningTokens += reasoningTokens;
+        }
+    }
+    return aggregate;
+}
+function readLlmUsageAggregate(stateDbPath, since, until) {
+    const events = readEvents({ since, type: LLM_USAGE_EVENT }, { dbPath: stateDbPath }).events.filter((event) => {
+        if (until === undefined)
+            return true;
+        return new Date(event.ts ?? since).getTime() < new Date(until).getTime();
+    });
+    return summarizeLlmUsage(events);
+}
+function buildWindowMetrics(db, stateDbPath, since, until, now = () => Date.now(), logsDb) {
     const taskRows = queryTaskHistory(db, { since }).filter((row) => {
         const startMs = new Date(row.started_at).getTime();
         const untilMs = new Date(until).getTime();
         return !Number.isFinite(untilMs) || startMs < untilMs;
     });
-    const taskRowsWithLogs = taskRows.filter((row) => row.log_path !== null);
-    const existingLogRows = taskRowsWithLogs.filter((row) => row.log_path && fs.existsSync(row.log_path));
+    const { withLogs: taskRowsWithLogs, backed: existingLogRows } = partitionLogBackedRows(taskRows, logsDb);
     const failedTaskRows = taskRows.filter((row) => row.status === "failed");
     const activeRows = taskRows.filter((row) => row.status === "active");
     const stuckActiveRuns = activeRows.filter((row) => now() - new Date(row.started_at).getTime() > ACTIVE_RUN_WARN_MS).length;
@@ -923,6 +995,7 @@ function buildWindowMetrics(db, stateDbPath, since, until, now = () => Date.now(
         stuckActiveRuns,
         logBackingRate: roundRate(logBackingRate),
         probeRoundTripMs: null,
+        llmUsage: readLlmUsageAggregate(stateDbPath, since, until),
     };
     return { improve: improveSummary, metrics, runs: runCount };
 }
@@ -961,6 +1034,16 @@ export function akmHealth(options = {}) {
     catch (error) {
         throw new ConfigError(`Unable to open state.db: ${error instanceof Error ? error.message : String(error)}`, "INVALID_CONFIG_FILE");
     }
+    // logs.db backs the log-backing metric (#579). Best-effort: when it cannot
+    // be opened, partitionLogBackedRows falls back to the on-disk file check, so
+    // health never hard-fails on a missing/locked logs database.
+    let logsDb;
+    try {
+        logsDb = openLogsDatabase(options.logsDbPath);
+    }
+    catch {
+        logsDb = undefined;
+    }
     try {
         const tables = listExistingTableNames(db, ["events", "task_history", "proposals", "schema_migrations"]);
         const tableNames = tables.map((row) => row.name).sort();
@@ -968,8 +1051,7 @@ export function akmHealth(options = {}) {
         const missingTables = requiredTables.filter((name) => !tableNames.includes(name));
         const probe = probeStateDbRoundTrip(stateDbPath);
         const taskRows = queryTaskHistory(db, { since });
-        const taskRowsWithLogs = taskRows.filter((row) => row.log_path !== null);
-        const existingLogRows = taskRowsWithLogs.filter((row) => row.log_path && fs.existsSync(row.log_path));
+        const { withLogs: taskRowsWithLogs, backed: existingLogRows } = partitionLogBackedRows(taskRows, logsDb);
         const failedTaskRows = taskRows.filter((row) => row.status === "failed");
         const activeRows = taskRows.filter((row) => row.status === "active");
         const stuckActiveRuns = activeRows.filter((row) => now() - new Date(row.started_at).getTime() > ACTIVE_RUN_WARN_MS).length;
@@ -1041,6 +1123,7 @@ export function akmHealth(options = {}) {
             stuckActiveRuns,
             logBackingRate: roundRate(logBackingRate),
             probeRoundTripMs: probe.durationMs,
+            llmUsage: readLlmUsageAggregate(stateDbPath, since),
         };
         const hardFailure = hardChecks.some((check) => check.status === "fail");
         const deterministicWarnings = [...hardChecks, ...advisories].some((check) => check.status === "warn" && check.kind === "deterministic");
@@ -1062,7 +1145,7 @@ export function akmHealth(options = {}) {
             windowResults = windowSpecs.map((spec) => {
                 const winSince = parseHealthSince(spec.since);
                 const winUntil = spec.until ? parseHealthSince(spec.until) : new Date(now()).toISOString();
-                const bundle = buildWindowMetrics(db, stateDbPath, winSince, winUntil, now);
+                const bundle = buildWindowMetrics(db, stateDbPath, winSince, winUntil, now, logsDb);
                 return {
                     name: spec.name,
                     since: winSince,
@@ -1112,6 +1195,14 @@ export function akmHealth(options = {}) {
     }
     finally {
         db.close();
+        if (logsDb) {
+            try {
+                logsDb.close();
+            }
+            catch {
+                // best-effort
+            }
+        }
     }
 }
 // ── Markdown renderers ───────────────────────────────────────────────────────

package/dist/commands/improve/extract.js CHANGED Viewed

@@ -42,6 +42,14 @@ import { buildExtractPrompt, EXTRACT_JSON_SCHEMA, parseExtractPayload } from "./
 import { buildSessionSummaryPrompt, parseSessionSummary, SESSION_SUMMARY_JSON_SCHEMA, sessionMeetsDurationGate, writeSessionAsset, } from "./session-asset.js";
 /** Default minimum session duration (minutes) for session indexing (#561). */
 const DEFAULT_MIN_SESSION_DURATION_MINUTES = 5;
+/**
+ * Default minimum raw session size (chars) below which the extract LLM call is
+ * skipped (#595/#596). Deliberately tiny: analysis of 218 candidate-producing
+ * sessions showed sessions of 22–368 raw chars regularly yield 1–5 candidates,
+ * so size is not a reliable proxy for value — only truly empty sessions
+ * (0 chars, journal files) are safe to skip.
+ */
+const DEFAULT_MIN_CONTENT_CHARS = 10;
 // ── Helpers ──────────────────────────────────────────────────────────────────
 /**
  * Parse a since-string into an absolute ms-epoch cutoff. Accepts:
@@ -115,7 +123,7 @@ function buildCandidateProposal(candidate, sourceRef) {
  * proposal validation failure) the session result records a warning and
  * keeps going — one session's bad luck never aborts a multi-session run.
  */
-async function processSession(harness, sessionRef, stashDir, config, llmConfig, chat, ctx, sourceRun, dryRun, timeoutMs, maxTotalChars, sessionIndexing) {
+async function processSession(harness, sessionRef, stashDir, config, llmConfig, chat, ctx, sourceRun, dryRun, timeoutMs, maxTotalChars, minContentChars, sessionIndexing) {
     const warnings = [];
     let data;
     try {
@@ -136,6 +144,31 @@ async function processSession(harness, sessionRef, stashDir, config, llmConfig,
     const filtered = preFilterSession(data, {
         ...(typeof maxTotalChars === "number" ? { maxTotalChars } : {}),
     });
+    // #595/#596 — minContentChars gate: skip the LLM call for sessions whose RAW
+    // size is below threshold. Measured on the raw event text BEFORE the noise
+    // pre-filter, NOT on post-filter output — the pre-filter strips boilerplate
+    // so aggressively that even signal-bearing sessions can have tiny output
+    // (#596: gating post-filter filtered out 100% of sessions). Note: the 0.8.x
+    // fix gated on `filtered.stats.inputCount`, which is an EVENT count, not a
+    // char count — this port measures actual raw chars so the threshold matches
+    // the config key's documented unit.
+    const rawContentChars = data.events.reduce((sum, event) => sum + event.text.length, 0);
+    if (minContentChars > 0 && rawContentChars < minContentChars) {
+        return {
+            sessionId: sessionRef.sessionId,
+            harness: harness.name,
+            candidateCount: 0,
+            proposalIds: [],
+            preFilter: {
+                inputCount: filtered.stats.inputCount,
+                outputCount: filtered.stats.outputCount,
+                truncatedCount: filtered.stats.truncatedCount,
+            },
+            warnings: [],
+            skipped: true,
+            skipReason: "too_short",
+        };
+    }
     const prompt = buildExtractPrompt({ data, events: filtered.events, inlineRefs: data.inlineRefs });
     // #561 — ADDITIVE session indexing. Generate + write the session asset
     // (`sessions/<harness>/<id>.md`). FAIL-OPEN: any failure only records a
@@ -339,6 +372,9 @@ export async function akmExtract(options) {
         60_000;
     // Pre-filter budget — process config can raise it for large-context models.
     const maxTotalChars = typeof extractProcess?.maxTotalChars === "number" ? extractProcess.maxTotalChars : undefined;
+    // #595/#596 — minimum raw session size; sessions below it skip the LLM call
+    // entirely. Set `processes.extract.minContentChars: 0` to disable the gate.
+    const minContentChars = typeof extractProcess?.minContentChars === "number" ? extractProcess.minContentChars : DEFAULT_MIN_CONTENT_CHARS;
     // Default discovery window — process config can override the built-in 24h.
     const effectiveSince = options.since ?? extractProcess?.defaultSince;
     // #561 — resolve session-indexing config. Default ON: we only reach this code
@@ -483,7 +519,7 @@ export async function akmExtract(options) {
             continue;
         }
         try {
-            const result = await processSession(harness, summary, stashDir, config, llmConfig, chat, options.ctx, sourceRun, dryRun, timeoutMs, maxTotalChars, sessionIndexing);
+            const result = await processSession(harness, summary, stashDir, config, llmConfig, chat, options.ctx, sourceRun, dryRun, timeoutMs, maxTotalChars, minContentChars, sessionIndexing);
             sessions.push(result);
             if (result.skipped)
                 skippedCount += 1;

package/dist/commands/improve/improve-auto-accept.js CHANGED Viewed

@@ -4,7 +4,7 @@
 import { loadConfig } from "../../core/config/config.js";
 import { appendEvent } from "../../core/events.js";
 import { info, warn } from "../../core/warn.js";
-import { promoteProposal } from "../proposal/validators/proposals.js";
+import { promoteProposal, recordGateDecision } from "../proposal/validators/proposals.js";
 // ---------------------------------------------------------------------------
 // Gate implementation
 // ---------------------------------------------------------------------------
@@ -26,14 +26,40 @@ export async function runAutoAcceptGate(candidates, cfg, promoteFn = promoteProp
     }
     const effectiveThreshold = Math.max(cfg.globalThreshold, cfg.minimumThreshold ?? 0) / 100;
     const resolvedConfig = typeof cfg.config === "function" ? cfg.config() : cfg.config;
+    const gateLabel = `improve:${cfg.phase}`;
+    // #577: stamp the gate's verdict onto each proposal so `akm proposal show`
+    // can explain why a proposal is pending (e.g. "deferred: below-threshold,
+    // 0.72 < 0.90"). Best-effort — a recording failure must never abort the gate.
+    const stamp = (proposalId, decision) => {
+        try {
+            recordGateDecision(cfg.stashDir, proposalId, decision);
+        }
+        catch (err) {
+            warn(`[improve] ${cfg.phase} failed to record gate decision for ${proposalId}: ${err instanceof Error ? err.message : String(err)}`);
+        }
+    };
     for (const candidate of candidates) {
         const { proposalId, confidence } = candidate;
         if (confidence === undefined || confidence < effectiveThreshold) {
+            stamp(proposalId, {
+                outcome: "deferred",
+                reason: confidence === undefined ? "no-confidence" : "below-threshold",
+                ...(confidence !== undefined ? { confidence } : {}),
+                thresholds: { autoAccept: effectiveThreshold },
+                gate: gateLabel,
+            });
             result.skipped.push(proposalId);
             continue;
         }
         try {
             const promotion = await promoteFn(cfg.stashDir, resolvedConfig, proposalId, {}, undefined);
+            stamp(promotion.proposal.id, {
+                outcome: "auto-accepted",
+                reason: "above-threshold",
+                confidence,
+                thresholds: { autoAccept: effectiveThreshold },
+                gate: gateLabel,
+            });
             appendEvent({
                 eventType: "promoted",
                 ref: promotion.ref,