npm - @gethmy/agent - Versions diffs - 1.7.0 → 1.7.1 - Mend

@gethmy/agent 1.7.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +8 -1
package/dist/budget.d.ts +20 -28
package/dist/budget.js +24 -112
package/dist/cli.d.ts +0 -2
package/dist/cli.js +0 -64
package/dist/completion.d.ts +5 -1
package/dist/completion.js +20 -2
package/dist/episode-writer.d.ts +32 -0
package/dist/episode-writer.js +120 -3
package/dist/git-diff-stat.d.ts +24 -0
package/dist/git-diff-stat.js +56 -0
package/dist/http-server.d.ts +1 -14
package/dist/http-server.js +1 -19
package/dist/index.js +1 -9
package/dist/pool.d.ts +4 -3
package/dist/pool.js +19 -18
package/dist/progress-tracker.d.ts +3 -0
package/dist/progress-tracker.js +15 -0
package/dist/prompt.d.ts +5 -0
package/dist/prompt.js +44 -1
package/dist/review-completion.d.ts +0 -5
package/dist/review-completion.js +63 -62
package/dist/state-store.d.ts +8 -7
package/dist/state-store.js +14 -23
package/dist/types.d.ts +33 -6
package/dist/types.js +0 -3
package/dist/worker.d.ts +1 -0
package/dist/worker.js +47 -4
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -16,13 +16,20 @@ Built for **failsafe auto mode**: crashed daemons recover on restart, misconfigu
 ```bash
 # Run directly (works with any package manager)
-npx @gethmy/agent
+npx @gethmy/agent@latest
 # Or install globally
 npm install -g @gethmy/agent
 harmony-agent
 ```
+> **Always pin `@latest`.** A bare `npx @gethmy/agent` reuses any previously
+> cached version that satisfies the spec — so an old install in `~/.npm/_npx`
+> can shadow the current release and you'll get stale startup logs and CLI
+> behavior. `npx @gethmy/agent@latest` re-resolves to the newest published
+> version. If you ever suspect a stale run, clear the cache with
+> `rm -rf ~/.npm/_npx` (or install globally to skip npx caching entirely).
 ## Configuration
 1. Set up the MCP server first:

package/dist/budget.d.ts CHANGED Viewed

@@ -1,6 +1,4 @@
-import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
-import type { Card } from "@harmony/shared";
-import type { StateStore } from "./state-store.js";
+import type { FailureSummaryRecord, StateStore } from "./state-store.js";
 import type { AgentConfig } from "./types.js";
 export type GuardDecision = {
     allow: true;
@@ -9,39 +7,33 @@ export type GuardDecision = {
     reason: GuardReason;
     detail: string;
 };
-export type GuardReason = "dlq" | "max_attempts" | "card_cost_cap" | "daily_budget";
+export type GuardReason = "max_attempts" | "daily_budget";
 /**
- * BudgetGuard is consulted on every pickup and on every run start.
- * It protects the daemon from three failure modes:
- *   1. Cards that can never succeed (DLQ after N failed attempts).
- *   2. Cards that burn unbounded tokens on a single attempt.
- *   3. Runaway daily spend across the entire daemon.
+ * BudgetGuard is consulted on every implement pickup. It protects the
+ * daemon from two failure modes:
+ *   1. Cards that can never succeed — after N failed attempts the daemon
+ *      gives up quietly (`max_attempts`) and pings once via a comment.
+ *   2. Runaway daily spend across the entire daemon (`daily_budget`).
  *
- * The guard is advisory for the hot path (returns a decision); the
- * caller is responsible for marking DLQ and skipping the enqueue.
+ * Both are recoverable: `max_attempts` resets when the card is reassigned
+ * (see `StateStore.resetAttempts`), and `daily_budget` resets at UTC
+ * midnight. There is no per-card cost cap and no permanent dead-letter
+ * quarantine — the guard never blocks a card forever.
  */
 export declare class BudgetGuard {
     private config;
     private store;
     constructor(config: AgentConfig["budget"], store: StateStore);
     /**
-     * Inspect a card before we commit to picking it up. If any threshold
-     * is already exceeded, return a skip decision — the caller should
-     * apply the DLQ label (for `dlq`/`max_attempts`/`card_cost_cap`) or
-     * simply hold until the daily budget resets (`daily_budget`).
+     * Inspect a card before we commit to picking it up. `max_attempts` means
+     * the daemon has given up (the worker has already posted a comment);
+     * `daily_budget` is a soft pause until the UTC day rolls over.
      */
     check(cardId: string): GuardDecision;
-    /**
-     * Does the guard's decision warrant a permanent DLQ marker? The daily
-     * budget is *not* permanent — it resets at UTC midnight — so we only
-     * DLQ for terminal states.
-     */
-    isTerminal(reason: GuardReason): boolean;
-    /**
-     * Apply the DLQ label to a card, persist the reason, and append a
-     * post-mortem block to the card description listing the last 3 failure
-     * summaries. Safe to call repeatedly — labels are idempotent and the
-     * description block is delimited so reruns replace rather than stack.
-     */
-    markDlq(client: HarmonyApiClient, card: Card, reason: GuardReason, detail: string): Promise<void>;
 }
+/**
+ * Build the one-shot "agent gave up" comment posted when a card exhausts
+ * its attempt budget. Lists the recent failure summaries so a human has a
+ * post-mortem trail and a recovery branch to check out.
+ */
+export declare function buildGaveUpComment(maxAttempts: number, failures: FailureSummaryRecord[]): string;

package/dist/budget.js CHANGED Viewed

@@ -1,15 +1,14 @@
-import { log } from "./log.js";
-import { runTransition } from "./transitions.js";
-const TAG = "budget";
 /**
- * BudgetGuard is consulted on every pickup and on every run start.
- * It protects the daemon from three failure modes:
- *   1. Cards that can never succeed (DLQ after N failed attempts).
- *   2. Cards that burn unbounded tokens on a single attempt.
- *   3. Runaway daily spend across the entire daemon.
+ * BudgetGuard is consulted on every implement pickup. It protects the
+ * daemon from two failure modes:
+ *   1. Cards that can never succeed — after N failed attempts the daemon
+ *      gives up quietly (`max_attempts`) and pings once via a comment.
+ *   2. Runaway daily spend across the entire daemon (`daily_budget`).
  *
- * The guard is advisory for the hot path (returns a decision); the
- * caller is responsible for marking DLQ and skipping the enqueue.
+ * Both are recoverable: `max_attempts` resets when the card is reassigned
+ * (see `StateStore.resetAttempts`), and `daily_budget` resets at UTC
+ * midnight. There is no per-card cost cap and no permanent dead-letter
+ * quarantine — the guard never blocks a card forever.
  */
 export class BudgetGuard {
     config;
@@ -19,37 +18,19 @@ export class BudgetGuard {
         this.store = store;
     }
     /**
-     * Inspect a card before we commit to picking it up. If any threshold
-     * is already exceeded, return a skip decision — the caller should
-     * apply the DLQ label (for `dlq`/`max_attempts`/`card_cost_cap`) or
-     * simply hold until the daily budget resets (`daily_budget`).
+     * Inspect a card before we commit to picking it up. `max_attempts` means
+     * the daemon has given up (the worker has already posted a comment);
+     * `daily_budget` is a soft pause until the UTC day rolls over.
      */
     check(cardId) {
-        if (this.store.isDlq(cardId)) {
-            const rec = this.store.getCard(cardId);
+        const card = this.store.getCard(cardId);
+        if (card && card.attempts >= this.config.maxAttemptsPerCard) {
             return {
                 allow: false,
-                reason: "dlq",
-                detail: rec?.dlqReason ?? "previously marked DLQ",
+                reason: "max_attempts",
+                detail: `${card.attempts} of ${this.config.maxAttemptsPerCard} attempts exhausted`,
             };
         }
-        const card = this.store.getCard(cardId);
-        if (card) {
-            if (card.attempts >= this.config.maxAttemptsPerCard) {
-                return {
-                    allow: false,
-                    reason: "max_attempts",
-                    detail: `${card.attempts} of ${this.config.maxAttemptsPerCard} attempts exhausted`,
-                };
-            }
-            if (card.totalCostCents >= this.config.maxCentsPerCard) {
-                return {
-                    allow: false,
-                    reason: "card_cost_cap",
-                    detail: `spent ${formatCents(card.totalCostCents)} of ${formatCents(this.config.maxCentsPerCard)} per-card cap`,
-                };
-            }
-        }
         const dailySpent = this.store.getDailyCostCents();
         if (dailySpent >= this.config.dailyBudgetCents) {
             return {
@@ -60,60 +41,16 @@ export class BudgetGuard {
         }
         return { allow: true };
     }
-    /**
-     * Does the guard's decision warrant a permanent DLQ marker? The daily
-     * budget is *not* permanent — it resets at UTC midnight — so we only
-     * DLQ for terminal states.
-     */
-    isTerminal(reason) {
-        return (reason === "dlq" ||
-            reason === "max_attempts" ||
-            reason === "card_cost_cap");
-    }
-    /**
-     * Apply the DLQ label to a card, persist the reason, and append a
-     * post-mortem block to the card description listing the last 3 failure
-     * summaries. Safe to call repeatedly — labels are idempotent and the
-     * description block is delimited so reruns replace rather than stack.
-     */
-    async markDlq(client, card, reason, detail) {
-        await this.store.markDlq(card.id, `${reason}: ${detail}`);
-        try {
-            await runTransition(client, card, {
-                addLabels: [
-                    { name: this.config.dlqLabel, color: this.config.dlqLabelColor },
-                ],
-            });
-        }
-        catch (err) {
-            log.warn(TAG, `failed to add dlq label to #${card.short_id}: ${err instanceof Error ? err.message : err}`);
-        }
-        try {
-            const recent = this.store.getRecentFailures(card.id, 3);
-            const block = buildDlqDescriptionBlock(reason, detail, recent);
-            const existing = card.description ?? "";
-            const stripped = stripDlqBlock(existing);
-            await client.updateCard(card.id, {
-                description: `${stripped}${stripped ? "\n\n" : ""}${block}`,
-            });
-        }
-        catch (err) {
-            log.warn(TAG, `failed to post DLQ summary to #${card.short_id}: ${err instanceof Error ? err.message : err}`);
-        }
-        log.warn(TAG, `#${card.short_id} DLQ'd — ${reason}: ${detail}`);
-    }
 }
-const DLQ_FENCE_START = "<!-- agent-dlq:start -->";
-const DLQ_FENCE_END = "<!-- agent-dlq:end -->";
-// Legacy marker — pre-fence DLQ blocks written before 2026-05-23. Strip path
-// only; new blocks always emit the fenced form.
-const LEGACY_DLQ_MARKER = "---\n**Agent DLQ**";
-function buildDlqDescriptionBlock(reason, detail, failures) {
+/**
+ * Build the one-shot "agent gave up" comment posted when a card exhausts
+ * its attempt budget. Lists the recent failure summaries so a human has a
+ * post-mortem trail and a recovery branch to check out.
+ */
+export function buildGaveUpComment(maxAttempts, failures) {
     const lines = [
-        DLQ_FENCE_START,
-        "---",
-        "**Agent DLQ**",
-        `Cap hit: ${reason} — ${detail}`,
+        "**Agent gave up — needs a human.**",
+        `Stopped after ${maxAttempts} failed attempt${maxAttempts === 1 ? "" : "s"}. Reassign the card to try again.`,
     ];
     if (failures.length > 0) {
         lines.push("", "Recent failures:");
@@ -129,33 +66,8 @@ function buildDlqDescriptionBlock(reason, detail, failures) {
     else {
         lines.push("", "_No prior failure summaries recorded._");
     }
-    lines.push(DLQ_FENCE_END);
     return lines.join("\n");
 }
-function stripDlqBlock(description) {
-    const start = description.indexOf(DLQ_FENCE_START);
-    if (start >= 0) {
-        const end = description.indexOf(DLQ_FENCE_END, start);
-        if (end < 0) {
-            // Malformed: opening fence with no closer. Treat the rest of the
-            // description as the block — safer than preserving an orphan fence.
-            return description.slice(0, start).trimEnd();
-        }
-        const prefix = description.slice(0, start).trimEnd();
-        const suffix = description
-            .slice(end + DLQ_FENCE_END.length)
-            .replace(/^\s+/, "");
-        if (prefix && suffix)
-            return `${prefix}\n\n${suffix}`;
-        return prefix || suffix;
-    }
-    // Legacy unfenced block — match the original behavior (no suffix to
-    // preserve, since the legacy emitter always wrote to end-of-description).
-    const legacy = description.indexOf(LEGACY_DLQ_MARKER);
-    if (legacy >= 0)
-        return description.slice(0, legacy).trimEnd();
-    return description.trimEnd();
-}
 function formatCents(cents) {
     return `$${(cents / 100).toFixed(2)}`;
 }

package/dist/cli.d.ts CHANGED Viewed

@@ -9,8 +9,6 @@
  *   health          — GET /health, exit 0 if healthy, 1 otherwise
  *   doctor          — run preflight checks without starting the daemon
  *   gc              — one-shot worktree garbage collection
- *   dlq list        — print DLQ entries
- *   dlq clear <id>  — clear a card's DLQ mark
  *   help            — show usage
  */
 export {};

package/dist/cli.js CHANGED Viewed

@@ -9,8 +9,6 @@
  *   health          — GET /health, exit 0 if healthy, 1 otherwise
  *   doctor          — run preflight checks without starting the daemon
  *   gc              — one-shot worktree garbage collection
- *   dlq list        — print DLQ entries
- *   dlq clear <id>  — clear a card's DLQ mark
  *   help            — show usage
  */
 import { log } from "./log.js";
@@ -23,8 +21,6 @@ Usage:
   harmony-agent health             Exit 0 if daemon is healthy, 1 otherwise
   harmony-agent doctor             Run preflight checks (don't start)
   harmony-agent gc                 One-shot worktree garbage collection
-  harmony-agent dlq list           List dead-lettered cards
-  harmony-agent dlq clear <cardId> Clear a card's DLQ marker
   harmony-agent help               Show this help
 Flags:
@@ -45,12 +41,6 @@ async function httpCall(path, init) {
     const url = `http://${cfg.agent.http.bindAddr}:${cfg.agent.http.port}${path}`;
     return fetch(url, init);
 }
-/** True if the error looks like "daemon is not running" (ECONNREFUSED). */
-function isDaemonDown(err) {
-    const code = err?.cause?.code;
-    const msg = err instanceof Error ? err.message : String(err);
-    return (code === "ECONNREFUSED" || /ECONNREFUSED|fetch failed|connect/i.test(msg));
-}
 function printStatus(body) {
     const out = process.stdout;
     const uptime = formatDuration(body.uptimeMs);
@@ -70,10 +60,6 @@ function printStatus(body) {
     for (const q of body.reviewQueue) {
         out.write(`  #${q.shortId} priority=${q.priority}\n`);
     }
-    out.write(`dlq          (${body.dlq.length})\n`);
-    for (const d of body.dlq) {
-        out.write(`  ${d.cardId} attempts=${d.attempts} cost=$${(d.totalCostCents / 100).toFixed(2)} reason=${d.reason}\n`);
-    }
 }
 function formatDuration(ms) {
     const s = Math.floor(ms / 1000);
@@ -155,54 +141,6 @@ async function gcCommand() {
     }
     return 0;
 }
-async function dlqCommand(args) {
-    const sub = args[0];
-    const { StateStore } = await import("./state-store.js");
-    const store = StateStore.open();
-    if (!sub || sub === "list") {
-        const entries = store.listDlq();
-        if (entries.length === 0) {
-            process.stdout.write("DLQ is empty\n");
-            return 0;
-        }
-        for (const c of entries) {
-            process.stdout.write(`${c.cardId}  attempts=${c.attempts} cost=$${(c.totalCostCents / 100).toFixed(2)}  reason=${c.dlqReason ?? "(unknown)"}\n`);
-        }
-        return 0;
-    }
-    if (sub === "clear") {
-        const cardId = args[1];
-        if (!cardId) {
-            process.stderr.write("usage: harmony-agent dlq clear <cardId>\n");
-            return 2;
-        }
-        // Prefer the running daemon if present — direct file writes race
-        // the daemon's own in-memory state-store and silently lose data.
-        try {
-            const res = await httpCall(`/dlq/clear/${encodeURIComponent(cardId)}`, {
-                method: "POST",
-            });
-            if (res.ok) {
-                process.stdout.write(`cleared DLQ for ${cardId} (via daemon)\n`);
-                return 0;
-            }
-            process.stderr.write(`daemon returned ${res.status} ${res.statusText}\n`);
-            return 1;
-        }
-        catch (err) {
-            if (!isDaemonDown(err)) {
-                process.stderr.write(`daemon HTTP error: ${err instanceof Error ? err.message : err}\n`);
-                return 1;
-            }
-            // Daemon offline → safe to write directly.
-            await store.clearDlq(cardId);
-            process.stdout.write(`cleared DLQ for ${cardId} (daemon offline, wrote directly)\n`);
-            return 0;
-        }
-    }
-    process.stderr.write(`unknown dlq subcommand: ${sub}\n`);
-    return 2;
-}
 async function dispatch(argv) {
     // Strip node, script, and any global flags we own.
     const args = argv.filter((a) => a !== "--pretty" && a !== "--json");
@@ -221,8 +159,6 @@ async function dispatch(argv) {
             return doctorCommand();
         case "gc":
             return gcCommand();
-        case "dlq":
-            return dlqCommand(args.slice(1));
         case "help":
         case "--help":
         case "-h":

package/dist/completion.d.ts CHANGED Viewed

@@ -5,11 +5,15 @@ import type { CostUpdate } from "./stream-parser.js";
 import { type AgentConfig } from "./types.js";
 export interface SessionStats {
     filesEdited: number;
+    /** Edited file paths tracked by the ProgressTracker (#272). */
+    filesEditedPaths?: string[];
     filesRead: number;
     toolCalls: number;
     cost: CostUpdate | null;
     /** Trimmed last assistant text — feeds the episode write hook (Phase 1.5). */
     lastAssistantText?: string;
+    /** All non-trivial assistant text blocks — richer summary source (#272). */
+    assistantTextBlocks?: string[];
 }
 export declare function buildTokenPayload(stats?: SessionStats | null): {
     costCents?: undefined;
@@ -29,4 +33,4 @@ export declare function buildTokenPayload(stats?: SessionStats | null): {
 /**
  * Post-work pipeline: push branch, create PR, move card, post summary.
  */
-export declare function runCompletion(client: HarmonyApiClient, card: Card, branchName: string, worktreePath: string, config: AgentConfig, workerId: number, sessionStats: SessionStats | undefined, workspaceId: string | undefined, agentSessionId: string | null | undefined, stateStore: StateStore): Promise<void>;
+export declare function runCompletion(client: HarmonyApiClient, card: Card, branchName: string, worktreePath: string, config: AgentConfig, workerId: number, sessionStats: SessionStats | undefined, workspaceId: string | undefined, agentSessionId: string | null | undefined, stateStore: StateStore): Promise<boolean>;

package/dist/completion.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { execFileSync } from "node:child_process";
 import { moveCardToColumn } from "./board-helpers.js";
 import { writeEpisode } from "./episode-writer.js";
+import { captureDiffStat } from "./git-diff-stat.js";
 import { createPullRequest, detectGitProvider, getBranchWebUrl, pushBranch, } from "./git-pr.js";
 import { log } from "./log.js";
 import { AGENT_NAME, agentIdentifier } from "./types.js";
@@ -47,7 +48,7 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
             ...buildTokenPayload(sessionStats),
         });
         cleanupWorktree(worktreePath, branchName);
-        return;
+        return true; // nothing to verify — not a failed attempt
     }
     // 1. Push branch FIRST so commits are durable on origin regardless of
     // verification outcome. A failed verify (below) then preserves the work
@@ -160,7 +161,7 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
             // Local-only cleanup. The remote ref under `agent-attempts/*` stays
             // up; the GC sweep (worktree-gc.ts) prunes it after retention.
             cleanupWorktree(worktreePath, branchName);
-            return;
+            return false; // verification failed — counts as a failed attempt
         }
         log.info(TAG, `Verification passed for #${card.short_id}`);
     }
@@ -194,21 +195,38 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
     // a separate write hook into the pre-return path, which D8 intentionally
     // omits ("daemon crashes ≠ task outcome").
     if (workspaceId) {
+        // Capture changed files + churn from the diff (#272). Best-effort + guarded:
+        // a null result just falls back to the ProgressTracker-tracked edit paths.
+        // The diff's file list is authoritative (it reflects what actually landed,
+        // including renames/deletes the tracker can't see) so prefer it.
+        const diffStat = captureDiffStat(worktreePath, config.worktree.baseBranch);
+        const changedFiles = diffStat && diffStat.files.length > 0
+            ? diffStat.files
+            : (sessionStats?.filesEditedPaths ?? []);
         await writeEpisode(client, {
             kind: "implement",
             card,
             workspaceId,
             outcome: "success",
             approachSummary: sessionStats?.lastAssistantText ?? "",
+            approachBlocks: sessionStats?.assistantTextBlocks,
             result: verificationResult,
             cost: sessionStats?.cost ?? null,
             filesEdited: sessionStats?.filesEdited ?? 0,
+            changedFiles,
+            churn: diffStat
+                ? {
+                    insertions: diffStat.insertions,
+                    deletions: diffStat.deletions,
+                }
+                : undefined,
             agentSessionId: agentSessionId ?? null,
         });
     }
     // 7. Cleanup worktree
     cleanupWorktree(worktreePath, branchName);
     log.info(TAG, `Completion done for #${card.short_id}${prUrl ? ` — PR: ${prUrl}` : ""}`);
+    return true;
 }
 function buildVerificationFailureSummary(result, autoFixAttempts) {
     const counts = [];

package/dist/episode-writer.d.ts CHANGED Viewed

@@ -9,9 +9,22 @@ interface ImplementEpisodeInput {
     workspaceId: string;
     outcome: EpisodeOutcome;
     approachSummary: string;
+    /**
+     * All non-trivial assistant text blocks from the run (#272). When present,
+     * used to assemble a richer approach summary + extract a key insight. Falls
+     * back to `approachSummary` (last turn) when empty.
+     */
+    approachBlocks?: string[];
     result: VerificationResult;
     cost: CostUpdate | null;
     filesEdited: number;
+    /** Changed file paths (#272): diff list when available, else tracked paths. */
+    changedFiles?: string[];
+    /** Line churn (#272), best-effort from `git diff --numstat`. */
+    churn?: {
+        insertions: number;
+        deletions: number;
+    };
     errorMessage?: string;
     agentSessionId?: string | null;
 }
@@ -43,6 +56,25 @@ export declare function computeQualityScore(result: VerificationResult, opts: {
  * as a recallable hit (rather than an empty bullet) in future prompts.
  */
 export declare function trimApproachSummary(text: string): string;
+/**
+ * Cap the review rationale ("why approved / rejected") richly (#272 task 5)
+ * rather than re-trimming to the 400-char implement bound. Empty input
+ * collapses to a marker so the episode still surfaces as a recallable hit.
+ */
+export declare function trimReviewRationale(text: string): string;
+/**
+ * Assemble a richer approach summary from the collected assistant text blocks
+ * (#272). Joins the trailing blocks (most relevant context lives near the end
+ * of a run) up to a longer bounded cap than the single-turn trim. Falls back to
+ * the last-turn `fallback` text when no blocks were collected. No LLM call.
+ */
+export declare function buildRichApproachSummary(blocks: string[] | undefined, fallback: string): string;
+/**
+ * Extract a cheap deterministic "key insight" line from the run's assistant
+ * text (#272 task 3). Scans for a sentence/line matching a root-cause / gotcha
+ * pattern. Returns undefined when nothing matches — never fabricated, no LLM.
+ */
+export declare function extractKeyInsight(blocks: string[] | undefined, fallback: string): string | undefined;
 /**
  * Build the entity payload for one episode. Pure — returned object can be
  * snapshotted in tests without hitting the network.