@gethmy/agent 1.0.9 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +67 -16
  2. package/dist/__tests__/budget.test.d.ts +1 -0
  3. package/dist/__tests__/budget.test.js +94 -0
  4. package/dist/__tests__/config-validation.test.d.ts +1 -0
  5. package/dist/__tests__/config-validation.test.js +65 -0
  6. package/dist/__tests__/dev-server-readiness.test.d.ts +1 -0
  7. package/dist/__tests__/dev-server-readiness.test.js +26 -0
  8. package/dist/__tests__/http-server.test.d.ts +1 -0
  9. package/dist/__tests__/http-server.test.js +115 -0
  10. package/dist/__tests__/log.test.d.ts +1 -0
  11. package/dist/__tests__/log.test.js +115 -0
  12. package/dist/__tests__/process-group.test.d.ts +1 -0
  13. package/dist/__tests__/process-group.test.js +68 -0
  14. package/dist/__tests__/reconcile-heartbeat.test.d.ts +1 -0
  15. package/dist/__tests__/reconcile-heartbeat.test.js +116 -0
  16. package/dist/__tests__/recovery.test.d.ts +1 -0
  17. package/dist/__tests__/recovery.test.js +126 -0
  18. package/dist/__tests__/review-parser.test.d.ts +1 -0
  19. package/dist/__tests__/review-parser.test.js +65 -0
  20. package/dist/__tests__/state-store.test.d.ts +1 -0
  21. package/dist/__tests__/state-store.test.js +132 -0
  22. package/dist/__tests__/transitions.test.d.ts +1 -0
  23. package/dist/__tests__/transitions.test.js +130 -0
  24. package/dist/__tests__/worktree-gc.test.d.ts +1 -0
  25. package/dist/__tests__/worktree-gc.test.js +137 -0
  26. package/dist/budget.d.ts +45 -0
  27. package/dist/budget.js +94 -0
  28. package/dist/cli.d.ts +15 -1
  29. package/dist/cli.js +239 -1
  30. package/dist/completion.d.ts +9 -0
  31. package/dist/completion.js +28 -2
  32. package/dist/config-validation.d.ts +18 -0
  33. package/dist/config-validation.js +66 -0
  34. package/dist/config.js +12 -0
  35. package/dist/http-server.d.ts +79 -0
  36. package/dist/http-server.js +115 -0
  37. package/dist/index.d.ts +4 -1
  38. package/dist/index.js +125 -10
  39. package/dist/log.d.ts +29 -5
  40. package/dist/log.js +80 -15
  41. package/dist/pool.d.ts +27 -2
  42. package/dist/pool.js +69 -4
  43. package/dist/process-group.d.ts +26 -0
  44. package/dist/process-group.js +72 -0
  45. package/dist/progress-tracker.js +2 -0
  46. package/dist/queue.d.ts +2 -0
  47. package/dist/queue.js +4 -0
  48. package/dist/reconcile.d.ts +15 -1
  49. package/dist/reconcile.js +63 -2
  50. package/dist/recovery.d.ts +30 -0
  51. package/dist/recovery.js +136 -0
  52. package/dist/review-completion.d.ts +12 -4
  53. package/dist/review-completion.js +158 -49
  54. package/dist/review-worker.d.ts +9 -2
  55. package/dist/review-worker.js +182 -78
  56. package/dist/run-log.d.ts +6 -0
  57. package/dist/run-log.js +19 -0
  58. package/dist/state-store.d.ts +72 -0
  59. package/dist/state-store.js +216 -0
  60. package/dist/transitions.d.ts +57 -0
  61. package/dist/transitions.js +131 -0
  62. package/dist/types.d.ts +23 -0
  63. package/dist/types.js +19 -1
  64. package/dist/verification.d.ts +17 -0
  65. package/dist/verification.js +71 -10
  66. package/dist/watcher.d.ts +2 -0
  67. package/dist/watcher.js +11 -0
  68. package/dist/worker.d.ts +9 -2
  69. package/dist/worker.js +168 -47
  70. package/dist/worktree-gc.d.ts +39 -0
  71. package/dist/worktree-gc.js +139 -0
  72. package/package.json +2 -2
package/dist/pool.d.ts CHANGED
@@ -1,16 +1,24 @@
1
1
  import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
2
2
  import type { Card, Column, Label, Subtask } from "@harmony/shared";
3
+ import { PriorityQueue } from "./queue.js";
4
+ import type { StateStore } from "./state-store.js";
3
5
  import type { AgentConfig, WorkMode } from "./types.js";
4
6
  export declare class Pool {
7
+ private client;
5
8
  private implWorkers;
6
9
  private reviewWorkers;
7
10
  private implQueue;
8
11
  private reviewQueue;
9
- constructor(config: AgentConfig, client: HarmonyApiClient, userEmail: string, workspaceId: string, projectId: string);
12
+ private budget;
13
+ constructor(config: AgentConfig, client: HarmonyApiClient, userEmail: string, workspaceId: string, projectId: string, stateStore: StateStore);
10
14
  /**
11
15
  * Enqueue a card for processing with the given mode.
16
+ *
17
+ * Returns async so callers can await the DLQ side-effects on skip.
18
+ * Budget/DLQ checks happen here so the reconciler, realtime watcher,
19
+ * and manual API calls all go through the same gate.
12
20
  */
13
- enqueue(card: Card, column: Column, labels: Label[], subtasks: Subtask[], mode?: WorkMode): void;
21
+ enqueue(card: Card, column: Column, labels: Label[], subtasks: Subtask[], mode?: WorkMode): Promise<void>;
14
22
  /**
15
23
  * Remove a card from any queue or cancel an active worker.
16
24
  */
@@ -31,6 +39,23 @@ export declare class Pool {
31
39
  * Handle an agent command (pause/resume/stop) for a specific card.
32
40
  */
33
41
  handleAgentCommand(cardId: string, command: "pause" | "resume" | "stop"): Promise<void>;
42
+ /**
43
+ * Point-in-time snapshot for the HTTP /status endpoint. Safe to call
44
+ * from anywhere — reads in-memory state only.
45
+ */
46
+ snapshotWorkers(): Array<{
47
+ id: number;
48
+ pipeline: "implement" | "review";
49
+ state: string;
50
+ cardId: string | null;
51
+ cardShortId: number | null;
52
+ startedAt: number | null;
53
+ branchName: string | null;
54
+ }>;
55
+ snapshotQueues(): {
56
+ impl: ReturnType<PriorityQueue["snapshot"]>;
57
+ review: ReturnType<PriorityQueue["snapshot"]>;
58
+ };
34
59
  /**
35
60
  * Gracefully shutdown all workers.
36
61
  */
package/dist/pool.js CHANGED
@@ -1,36 +1,45 @@
1
+ import { BudgetGuard } from "./budget.js";
1
2
  import { log } from "./log.js";
2
3
  import { PriorityQueue } from "./queue.js";
3
4
  import { ReviewWorker } from "./review-worker.js";
4
5
  import { Worker } from "./worker.js";
5
6
  const TAG = "pool";
6
7
  export class Pool {
8
+ client;
7
9
  implWorkers = [];
8
10
  reviewWorkers = [];
9
11
  implQueue;
10
12
  reviewQueue;
11
- constructor(config, client, userEmail, workspaceId, projectId) {
13
+ budget;
14
+ constructor(config, client, userEmail, workspaceId, projectId, stateStore) {
15
+ this.client = client;
12
16
  this.implQueue = new PriorityQueue(config);
13
17
  this.reviewQueue = new PriorityQueue(config);
18
+ this.budget = new BudgetGuard(config.budget, stateStore);
14
19
  // Create implementation workers
15
20
  for (let i = 0; i < config.poolSize; i++) {
16
21
  this.implWorkers.push(new Worker(i, config, client, userEmail, () => {
17
22
  this.tryDispatchFor(this.implWorkers, this.implQueue, "impl");
18
- }, workspaceId, projectId));
23
+ }, workspaceId, projectId, stateStore));
19
24
  }
20
25
  // Create review worker(s) — 1 review worker per pool
21
26
  if (config.review.enabled) {
22
27
  const reviewWorkerId = config.poolSize; // offset to avoid ID collision
23
28
  this.reviewWorkers.push(new ReviewWorker(reviewWorkerId, config, client, userEmail, () => {
24
29
  this.tryDispatchFor(this.reviewWorkers, this.reviewQueue, "review");
25
- }));
30
+ }, stateStore));
26
31
  }
27
32
  const reviewCount = this.reviewWorkers.length;
28
33
  log.info(TAG, `Pool initialized: ${config.poolSize} impl worker(s), ${reviewCount} review worker(s)`);
29
34
  }
30
35
  /**
31
36
  * Enqueue a card for processing with the given mode.
37
+ *
38
+ * Returns async so callers can await the DLQ side-effects on skip.
39
+ * Budget/DLQ checks happen here so the reconciler, realtime watcher,
40
+ * and manual API calls all go through the same gate.
32
41
  */
33
- enqueue(card, column, labels, subtasks, mode = "implement") {
42
+ async enqueue(card, column, labels, subtasks, mode = "implement") {
34
43
  // Don't enqueue if already in any queue or actively being worked on
35
44
  if (this.implQueue.has(card.id) ||
36
45
  this.reviewQueue.has(card.id) ||
@@ -38,6 +47,26 @@ export class Pool {
38
47
  log.debug(TAG, `Card ${card.id} already queued or active, skipping`);
39
48
  return;
40
49
  }
50
+ // Review pickups bypass per-card attempt limits (reviews are cheap
51
+ // and orthogonal to implement attempts). Daily budget still applies.
52
+ if (mode === "implement") {
53
+ const decision = this.budget.check(card.id);
54
+ if (!decision.allow) {
55
+ // Already-DLQ cards are expected noise on every reconcile tick;
56
+ // only the terminal decision itself deserves a warn.
57
+ const wasAlreadyDlq = decision.reason === "dlq";
58
+ if (!wasAlreadyDlq) {
59
+ log.warn(TAG, `#${card.short_id} skipped (${decision.reason}): ${decision.detail}`);
60
+ if (this.budget.isTerminal(decision.reason)) {
61
+ await this.budget.markDlq(this.client, card, decision.reason, decision.detail);
62
+ }
63
+ }
64
+ else {
65
+ log.debug(TAG, `#${card.short_id} in DLQ: ${decision.detail}`);
66
+ }
67
+ return;
68
+ }
69
+ }
41
70
  const queue = mode === "review" ? this.reviewQueue : this.implQueue;
42
71
  queue.enqueue(card, column, labels, mode);
43
72
  // Store card data for when it gets dispatched
@@ -122,6 +151,42 @@ export class Pool {
122
151
  break;
123
152
  }
124
153
  }
154
+ /**
155
+ * Point-in-time snapshot for the HTTP /status endpoint. Safe to call
156
+ * from anywhere — reads in-memory state only.
157
+ */
158
+ snapshotWorkers() {
159
+ const out = [];
160
+ for (const w of this.implWorkers) {
161
+ out.push({
162
+ id: w.id,
163
+ pipeline: "implement",
164
+ state: w.state,
165
+ cardId: w.cardId,
166
+ cardShortId: null,
167
+ startedAt: w.startedAt,
168
+ branchName: w.branchName,
169
+ });
170
+ }
171
+ for (const w of this.reviewWorkers) {
172
+ out.push({
173
+ id: w.id,
174
+ pipeline: "review",
175
+ state: w.state,
176
+ cardId: w.cardId,
177
+ cardShortId: null,
178
+ startedAt: w.startedAt,
179
+ branchName: w.branchName,
180
+ });
181
+ }
182
+ return out;
183
+ }
184
+ snapshotQueues() {
185
+ return {
186
+ impl: this.implQueue.snapshot(),
187
+ review: this.reviewQueue.snapshot(),
188
+ };
189
+ }
125
190
  /**
126
191
  * Gracefully shutdown all workers.
127
192
  */
@@ -0,0 +1,26 @@
1
+ import { type ChildProcess, type SpawnOptions } from "node:child_process";
2
+ /**
3
+ * Spawn a child in its own process group so we can reliably kill the
4
+ * whole subtree later. The Claude CLI shells out to git, build tools,
5
+ * dev servers, etc. — signalling only the direct child leaves orphans.
6
+ *
7
+ * - POSIX: `detached: true` puts the child in a new process group whose
8
+ * pgid equals its pid. Killing the negative pid signals every member.
9
+ * - Windows: `detached: true` creates a new process group that can be
10
+ * signalled via the child's pid (no negation).
11
+ */
12
+ export declare function spawnInGroup(command: string, args: readonly string[], options?: SpawnOptions): ChildProcess;
13
+ /**
14
+ * Send a signal to every process in the group whose leader is `proc`.
15
+ * On POSIX, this is `process.kill(-pid, signal)`. If the group has
16
+ * already exited, returns silently.
17
+ */
18
+ export declare function signalGroup(proc: ChildProcess, signal: NodeJS.Signals): void;
19
+ /**
20
+ * Escalating termination: SIGINT → wait → SIGTERM → wait → SIGKILL.
21
+ * Returns when the process has exited or all signals have been sent.
22
+ */
23
+ export declare function terminateGroup(proc: ChildProcess, opts: {
24
+ sigintTimeoutMs: number;
25
+ sigtermTimeoutMs: number;
26
+ }): Promise<void>;
@@ -0,0 +1,72 @@
1
+ import { spawn, } from "node:child_process";
2
+ import { log } from "./log.js";
3
+ const TAG = "pgroup";
4
+ /**
5
+ * Spawn a child in its own process group so we can reliably kill the
6
+ * whole subtree later. The Claude CLI shells out to git, build tools,
7
+ * dev servers, etc. — signalling only the direct child leaves orphans.
8
+ *
9
+ * - POSIX: `detached: true` puts the child in a new process group whose
10
+ * pgid equals its pid. Killing the negative pid signals every member.
11
+ * - Windows: `detached: true` creates a new process group that can be
12
+ * signalled via the child's pid (no negation).
13
+ */
14
+ export function spawnInGroup(command, args, options = {}) {
15
+ return spawn(command, args, {
16
+ ...options,
17
+ detached: true,
18
+ // Keep stdio wired up so streaming still works.
19
+ stdio: options.stdio ?? ["ignore", "pipe", "pipe"],
20
+ });
21
+ }
22
+ /**
23
+ * Send a signal to every process in the group whose leader is `proc`.
24
+ * On POSIX, this is `process.kill(-pid, signal)`. If the group has
25
+ * already exited, returns silently.
26
+ */
27
+ export function signalGroup(proc, signal) {
28
+ if (!proc.pid || proc.killed)
29
+ return;
30
+ try {
31
+ if (process.platform === "win32") {
32
+ // No process groups on Windows; best effort tree kill via the child.
33
+ proc.kill(signal);
34
+ return;
35
+ }
36
+ process.kill(-proc.pid, signal);
37
+ }
38
+ catch (err) {
39
+ // ESRCH means the group is already gone — that is the goal, not an error.
40
+ const code = err.code;
41
+ if (code !== "ESRCH") {
42
+ log.warn(TAG, `signal ${signal} to pgid ${proc.pid} failed: ${err instanceof Error ? err.message : err}`);
43
+ }
44
+ }
45
+ }
46
+ /**
47
+ * Escalating termination: SIGINT → wait → SIGTERM → wait → SIGKILL.
48
+ * Returns when the process has exited or all signals have been sent.
49
+ */
50
+ export async function terminateGroup(proc, opts) {
51
+ if (!proc.pid || proc.killed)
52
+ return;
53
+ // Unpause first in case the process was suspended — otherwise it
54
+ // can't react to signals.
55
+ signalGroup(proc, "SIGCONT");
56
+ const waitForExit = (timeout) => new Promise((resolve) => {
57
+ if (proc.killed || proc.exitCode !== null)
58
+ return resolve(true);
59
+ const timer = setTimeout(() => resolve(false), timeout);
60
+ proc.once("exit", () => {
61
+ clearTimeout(timer);
62
+ resolve(true);
63
+ });
64
+ });
65
+ signalGroup(proc, "SIGINT");
66
+ if (await waitForExit(opts.sigintTimeoutMs))
67
+ return;
68
+ signalGroup(proc, "SIGTERM");
69
+ if (await waitForExit(opts.sigtermTimeoutMs))
70
+ return;
71
+ signalGroup(proc, "SIGKILL");
72
+ }
@@ -347,6 +347,8 @@ export class ProgressTracker {
347
347
  phase: this.phase,
348
348
  filesChanged: this.filesEdited.size,
349
349
  costCents: Math.round((this.lastCost?.totalCostUsd ?? 0) * 100),
350
+ inputTokens: this.lastCost?.totalInputTokens ?? 0,
351
+ outputTokens: this.lastCost?.totalOutputTokens ?? 0,
350
352
  })
351
353
  .catch((err) => {
352
354
  log.warn(TAG, `Failed to send progress update: ${err}`);
package/dist/queue.d.ts CHANGED
@@ -34,4 +34,6 @@ export declare class PriorityQueue {
34
34
  cardIds(): string[];
35
35
  get length(): number;
36
36
  peek(): QueueItem | null;
37
+ /** Copy of the queue in priority order (for introspection). */
38
+ snapshot(): QueueItem[];
37
39
  }
package/dist/queue.js CHANGED
@@ -93,4 +93,8 @@ export class PriorityQueue {
93
93
  peek() {
94
94
  return this.items[0] ?? null;
95
95
  }
96
+ /** Copy of the queue in priority order (for introspection). */
97
+ snapshot() {
98
+ return this.items.slice();
99
+ }
96
100
  }
@@ -1,5 +1,7 @@
1
1
  import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
2
2
  import type { Pool } from "./pool.js";
3
+ import type { StateStore } from "./state-store.js";
4
+ import { type AgentConfig } from "./types.js";
3
5
  /**
4
6
  * Reconciliation heartbeat: polls the board every `intervalMs` to catch
5
7
  * missed realtime events and sync state.
@@ -13,9 +15,21 @@ export declare class Reconciler {
13
15
  private reviewColumns;
14
16
  private approvedLabel;
15
17
  private intervalMs;
18
+ private stateStore?;
19
+ private agentConfig?;
16
20
  private timer;
17
- constructor(client: HarmonyApiClient, pool: Pool, projectId: string, agentUserId: string, pickupColumns: string[], reviewColumns: string[], approvedLabel: string, intervalMs?: number);
21
+ private lastTickAt;
22
+ get lastTick(): number | null;
23
+ get isRunning(): boolean;
24
+ constructor(client: HarmonyApiClient, pool: Pool, projectId: string, agentUserId: string, pickupColumns: string[], reviewColumns: string[], approvedLabel: string, intervalMs?: number, stateStore?: StateStore | undefined, agentConfig?: AgentConfig | undefined);
18
25
  start(): void;
19
26
  stop(): void;
27
+ /**
28
+ * Walk the state store for runs marked active whose owning daemon is
29
+ * dead OR whose heartbeat is stale. Each such run gets the same
30
+ * recovery treatment as startup orphans: session ended, card returned
31
+ * to pickup column with agent-recovered label, worktree cleaned up.
32
+ */
33
+ private recoverStaleRuns;
20
34
  private tick;
21
35
  }
package/dist/reconcile.js CHANGED
@@ -1,5 +1,7 @@
1
1
  import { buildLabelMap, hasLabel, resolveCardLabels } from "./board-helpers.js";
2
2
  import { log } from "./log.js";
3
+ import { isProcessAlive, recoverRun } from "./recovery.js";
4
+ import { extractBranchFromDescription } from "./review-worktree.js";
3
5
  import { NEED_REVIEW_LABEL } from "./types.js";
4
6
  const TAG = "reconcile";
5
7
  /**
@@ -15,8 +17,17 @@ export class Reconciler {
15
17
  reviewColumns;
16
18
  approvedLabel;
17
19
  intervalMs;
20
+ stateStore;
21
+ agentConfig;
18
22
  timer = null;
19
- constructor(client, pool, projectId, agentUserId, pickupColumns, reviewColumns, approvedLabel, intervalMs = 60_000) {
23
+ lastTickAt = null;
24
+ get lastTick() {
25
+ return this.lastTickAt;
26
+ }
27
+ get isRunning() {
28
+ return this.timer !== null;
29
+ }
30
+ constructor(client, pool, projectId, agentUserId, pickupColumns, reviewColumns, approvedLabel, intervalMs = 60_000, stateStore, agentConfig) {
20
31
  this.client = client;
21
32
  this.pool = pool;
22
33
  this.projectId = projectId;
@@ -25,6 +36,8 @@ export class Reconciler {
25
36
  this.reviewColumns = reviewColumns;
26
37
  this.approvedLabel = approvedLabel;
27
38
  this.intervalMs = intervalMs;
39
+ this.stateStore = stateStore;
40
+ this.agentConfig = agentConfig;
28
41
  }
29
42
  start() {
30
43
  log.info(TAG, `Heartbeat every ${this.intervalMs / 1000}s`);
@@ -39,7 +52,42 @@ export class Reconciler {
39
52
  }
40
53
  log.info(TAG, "Heartbeat stopped");
41
54
  }
55
+ /**
56
+ * Walk the state store for runs marked active whose owning daemon is
57
+ * dead OR whose heartbeat is stale. Each such run gets the same
58
+ * recovery treatment as startup orphans: session ended, card returned
59
+ * to pickup column with agent-recovered label, worktree cleaned up.
60
+ */
61
+ async recoverStaleRuns() {
62
+ if (!this.stateStore || !this.agentConfig)
63
+ return;
64
+ const now = Date.now();
65
+ const stale = this.agentConfig.timing.staleHeartbeatMs;
66
+ const active = this.stateStore.getActiveRuns();
67
+ const pool = this.pool;
68
+ for (const run of active) {
69
+ const foreignDaemon = run.daemonPid !== process.pid;
70
+ const daemonDead = foreignDaemon && !isProcessAlive(run.daemonPid, process.pid);
71
+ const heartbeatStale = now - run.lastHeartbeatAt > stale;
72
+ const ourZombie = !foreignDaemon && !pool.isCardActive(run.cardId);
73
+ if (!daemonDead && !(heartbeatStale && ourZombie))
74
+ continue;
75
+ const reason = daemonDead
76
+ ? `foreign daemon ${run.daemonPid} is dead`
77
+ : `our worker lost card ${run.cardId} with ${Math.round((now - run.lastHeartbeatAt) / 1000)}s stale heartbeat`;
78
+ log.warn(TAG, `zombie run ${run.runId} (#${run.cardShortId}): ${reason} — recovering`);
79
+ await recoverRun(run, this.stateStore, this.client, this.agentConfig, {
80
+ runId: run.runId,
81
+ cardId: run.cardId,
82
+ cardShortId: run.cardShortId,
83
+ pipeline: run.pipeline,
84
+ actions: [],
85
+ errors: [],
86
+ });
87
+ }
88
+ }
42
89
  async tick() {
90
+ this.lastTickAt = Date.now();
43
91
  try {
44
92
  const board = await this.client.getBoard(this.projectId);
45
93
  const cards = (board.cards ?? []);
@@ -94,10 +142,23 @@ export class Reconciler {
94
142
  log.debug(TAG, `Skipping #${card.short_id} — has "${NEED_REVIEW_LABEL}" label (needs human)`);
95
143
  continue;
96
144
  }
145
+ // Skip review for cards without a branch reference — not qualified for auto-review
146
+ if (mode === "review" &&
147
+ !extractBranchFromDescription(card.description)) {
148
+ log.debug(TAG, `Skipping #${card.short_id} — no branch reference (not qualified for auto-review)`);
149
+ continue;
150
+ }
97
151
  log.info(TAG, `Missed assignment: #${card.short_id} "${card.title}" (${mode}) — enqueueing`);
98
- this.pool.enqueue(card, column, cardLabels, subtasks, mode);
152
+ await this.pool.enqueue(card, column, cardLabels, subtasks, mode);
99
153
  }
100
154
  }
155
+ // Detect zombie runs: state-store says active, but either:
156
+ // (a) another daemon's PID is dead, or
157
+ // (b) our daemon holds the run but no worker is on the card, or
158
+ // (c) heartbeat is older than staleHeartbeatMs.
159
+ if (this.stateStore && this.agentConfig) {
160
+ await this.recoverStaleRuns();
161
+ }
101
162
  // Cards in queue/active but no longer assigned to agent → cancel/remove
102
163
  for (const knownId of knownCardIds) {
103
164
  if (!allAgentCardIds.has(knownId)) {
@@ -0,0 +1,30 @@
1
+ import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
2
+ import type { RunRecord, StateStore } from "./state-store.js";
3
+ import type { AgentConfig } from "./types.js";
4
+ export interface RecoveryOutcome {
5
+ runId: string;
6
+ cardId: string;
7
+ cardShortId: number;
8
+ pipeline: "implement" | "review";
9
+ actions: string[];
10
+ errors: string[];
11
+ }
12
+ /**
13
+ * Check if a process is still alive. A crashed daemon's PID is unlikely
14
+ * to be reused within a reboot window; if it is, we still treat it as
15
+ * orphaned because our current process is the new daemon.
16
+ */
17
+ export declare function isProcessAlive(pid: number, currentPid: number): boolean;
18
+ /**
19
+ * Reconcile orphaned runs from a previous daemon life.
20
+ *
21
+ * For each active run in the state store:
22
+ * - If the daemon PID is alive (should not happen for a fresh process),
23
+ * skip it — it's another instance.
24
+ * - Otherwise: end the Harmony session, return the card to its pickup
25
+ * column with the `agent-recovered` label, and cleanup the worktree.
26
+ *
27
+ * This runs once at daemon startup, before the pool accepts work.
28
+ */
29
+ export declare function recoverOrphans(store: StateStore, client: HarmonyApiClient, config: AgentConfig): Promise<RecoveryOutcome[]>;
30
+ export declare function recoverRun(run: RunRecord, store: StateStore, client: HarmonyApiClient, config: AgentConfig, outcome: RecoveryOutcome): Promise<void>;
@@ -0,0 +1,136 @@
1
+ import { addLabelByName, moveCardToColumn } from "./board-helpers.js";
2
+ import { log } from "./log.js";
3
+ import { cleanupWorktree } from "./worktree.js";
4
+ const TAG = "recovery";
5
+ const RECOVERED_LABEL = "agent-recovered";
6
+ const RECOVERED_LABEL_COLOR = "#f59e0b";
7
+ /**
8
+ * Check if a process is still alive. A crashed daemon's PID is unlikely
9
+ * to be reused within a reboot window; if it is, we still treat it as
10
+ * orphaned because our current process is the new daemon.
11
+ */
12
+ export function isProcessAlive(pid, currentPid) {
13
+ if (pid === currentPid)
14
+ return true;
15
+ try {
16
+ process.kill(pid, 0);
17
+ return true;
18
+ }
19
+ catch {
20
+ return false;
21
+ }
22
+ }
23
+ async function fetchCardSafely(client, cardId) {
24
+ try {
25
+ const { card } = (await client.getCard(cardId));
26
+ return card;
27
+ }
28
+ catch (err) {
29
+ log.warn(TAG, `cannot fetch card ${cardId}: ${err instanceof Error ? err.message : err}`);
30
+ return null;
31
+ }
32
+ }
33
+ /**
34
+ * Reconcile orphaned runs from a previous daemon life.
35
+ *
36
+ * For each active run in the state store:
37
+ * - If the daemon PID is alive (should not happen for a fresh process),
38
+ * skip it — it's another instance.
39
+ * - Otherwise: end the Harmony session, return the card to its pickup
40
+ * column with the `agent-recovered` label, and cleanup the worktree.
41
+ *
42
+ * This runs once at daemon startup, before the pool accepts work.
43
+ */
44
+ export async function recoverOrphans(store, client, config) {
45
+ const active = store.getActiveRuns();
46
+ if (active.length === 0) {
47
+ log.info(TAG, "no orphan runs to recover");
48
+ return [];
49
+ }
50
+ const outcomes = [];
51
+ log.info(TAG, `recovering ${active.length} orphan run(s) from prior daemon`);
52
+ for (const run of active) {
53
+ const outcome = {
54
+ runId: run.runId,
55
+ cardId: run.cardId,
56
+ cardShortId: run.cardShortId,
57
+ pipeline: run.pipeline,
58
+ actions: [],
59
+ errors: [],
60
+ };
61
+ outcomes.push(outcome);
62
+ if (isProcessAlive(run.daemonPid, process.pid)) {
63
+ log.warn(TAG, `run ${run.runId} claims live daemon pid ${run.daemonPid} — skipping`);
64
+ outcome.actions.push("skipped: daemon pid still alive");
65
+ continue;
66
+ }
67
+ log.info(TAG, `recovering ${run.pipeline} run ${run.runId} for card #${run.cardShortId}`);
68
+ await recoverRun(run, store, client, config, outcome);
69
+ }
70
+ return outcomes;
71
+ }
72
+ export async function recoverRun(run, store, client, config, outcome) {
73
+ // 1. End the agent session so the card stops showing the progress ring.
74
+ try {
75
+ await client.endAgentSession(run.cardId, {
76
+ status: "paused",
77
+ progressPercent: run.phase === "completing" ? 95 : undefined,
78
+ });
79
+ outcome.actions.push("ended agent session (paused)");
80
+ }
81
+ catch (err) {
82
+ const msg = err instanceof Error ? err.message : String(err);
83
+ outcome.errors.push(`endAgentSession: ${msg}`);
84
+ log.warn(TAG, `endAgentSession failed for ${run.cardId}: ${msg}`);
85
+ }
86
+ // 2. Move card back to a safe column and add the recovered label.
87
+ // - implement pipeline → pickup column (usually "To Do")
88
+ // - review pipeline → leave in place (reviewer will re-pick)
89
+ const card = await fetchCardSafely(client, run.cardId);
90
+ if (card) {
91
+ if (run.pipeline === "implement") {
92
+ const target = config.pickupColumns[0];
93
+ if (target) {
94
+ try {
95
+ await moveCardToColumn(client, card, target);
96
+ outcome.actions.push(`moved to "${target}"`);
97
+ }
98
+ catch (err) {
99
+ const msg = err instanceof Error ? err.message : String(err);
100
+ outcome.errors.push(`moveCardToColumn: ${msg}`);
101
+ }
102
+ }
103
+ }
104
+ try {
105
+ await addLabelByName(client, card, RECOVERED_LABEL, RECOVERED_LABEL_COLOR);
106
+ outcome.actions.push(`labeled "${RECOVERED_LABEL}"`);
107
+ }
108
+ catch (err) {
109
+ const msg = err instanceof Error ? err.message : String(err);
110
+ outcome.errors.push(`addLabel: ${msg}`);
111
+ }
112
+ }
113
+ else {
114
+ outcome.actions.push("card not reachable — local cleanup only");
115
+ }
116
+ // 3. Cleanup local worktree so it doesn't collide with future runs.
117
+ if (run.worktreePath) {
118
+ try {
119
+ cleanupWorktree(run.worktreePath, run.branchName ?? undefined);
120
+ outcome.actions.push("cleaned up worktree");
121
+ }
122
+ catch (err) {
123
+ const msg = err instanceof Error ? err.message : String(err);
124
+ outcome.errors.push(`cleanupWorktree: ${msg}`);
125
+ }
126
+ }
127
+ // 4. Mark the run as orphaned in the store.
128
+ try {
129
+ await store.endRun(run.runId, "orphaned", "recovered after daemon restart");
130
+ }
131
+ catch (err) {
132
+ const msg = err instanceof Error ? err.message : String(err);
133
+ outcome.errors.push(`endRun: ${msg}`);
134
+ }
135
+ log.info(TAG, `recovered run ${run.runId} (card #${run.cardShortId}): ${outcome.actions.join(", ")}${outcome.errors.length ? ` | errors: ${outcome.errors.join("; ")}` : ""}`);
136
+ }
@@ -1,6 +1,7 @@
1
1
  import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
2
2
  import type { Card } from "@harmony/shared";
3
- import type { AgentConfig } from "./types.js";
3
+ import { type SessionStats } from "./completion.js";
4
+ import { type AgentConfig } from "./types.js";
4
5
  export interface ReviewFinding {
5
6
  severity: "critical" | "major" | "minor";
6
7
  title: string;
@@ -13,14 +14,21 @@ export interface ScopeCheck {
13
14
  notes?: string;
14
15
  }
15
16
  export interface ReviewResult {
16
- verdict: "approved" | "rejected";
17
+ verdict: "approved" | "rejected" | "error";
17
18
  summary: string;
18
19
  scopeCheck?: ScopeCheck;
19
20
  findings: ReviewFinding[];
20
21
  }
21
22
  /**
22
23
  * Parse Claude's review output into a structured ReviewResult.
23
- * Looks for a JSON block in the output.
24
+ *
25
+ * Tries multiple extraction strategies in order:
26
+ * 1. ```json ... ``` fenced block (what the prompt asks for)
27
+ * 2. Any top-level JSON object containing a "verdict" key (last-wins)
28
+ * 3. Regex for a bare `"verdict": "approved|rejected"` anywhere — lossy
29
+ * but keeps the pipeline moving
30
+ * 4. Falls back to verdict: "error" — keeps card in Review instead of
31
+ * bouncing it to To Do for a parse failure that isn't a code quality signal.
24
32
  */
25
33
  export declare function parseReviewOutput(stdout: string): ReviewResult;
26
34
  /**
@@ -28,4 +36,4 @@ export declare function parseReviewOutput(stdout: string): ReviewResult;
28
36
  * Handles approved/rejected verdicts, creates subtasks for findings,
29
37
  * and moves the card to the appropriate column.
30
38
  */
31
- export declare function runReviewCompletion(client: HarmonyApiClient, card: Card, result: ReviewResult, config: AgentConfig, worktreePath: string, branchName: string | null): Promise<void>;
39
+ export declare function runReviewCompletion(client: HarmonyApiClient, card: Card, result: ReviewResult, config: AgentConfig, worktreePath: string, branchName: string | null, sessionStats?: SessionStats | null): Promise<void>;