@gethmy/agent 1.4.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/budget.d.ts +4 -2
- package/dist/budget.js +41 -2
- package/dist/completion.d.ts +4 -1
- package/dist/completion.js +102 -8
- package/dist/episode-writer.d.ts +84 -0
- package/dist/episode-writer.js +232 -0
- package/dist/git-pr.d.ts +13 -0
- package/dist/git-pr.js +73 -0
- package/dist/index.js +6 -1
- package/dist/pool.d.ts +6 -1
- package/dist/pool.js +39 -5
- package/dist/progress-tracker.d.ts +2 -0
- package/dist/progress-tracker.js +7 -0
- package/dist/prompt.d.ts +6 -0
- package/dist/prompt.js +47 -2
- package/dist/recovery.js +8 -2
- package/dist/review-completion.d.ts +2 -1
- package/dist/review-completion.js +84 -5
- package/dist/review-worker.d.ts +3 -1
- package/dist/review-worker.js +30 -3
- package/dist/state-store.d.ts +16 -0
- package/dist/state-store.js +23 -0
- package/dist/types.d.ts +38 -0
- package/dist/types.js +3 -0
- package/dist/verification.d.ts +7 -1
- package/dist/verification.js +6 -1
- package/dist/worker.js +2 -2
- package/dist/worktree-gc.d.ts +29 -1
- package/dist/worktree-gc.js +108 -1
- package/dist/worktree.d.ts +6 -1
- package/dist/worktree.js +7 -2
- package/package.json +1 -1
package/dist/budget.d.ts
CHANGED
|
@@ -38,8 +38,10 @@ export declare class BudgetGuard {
|
|
|
38
38
|
*/
|
|
39
39
|
isTerminal(reason: GuardReason): boolean;
|
|
40
40
|
/**
|
|
41
|
-
* Apply the DLQ label to a card
|
|
42
|
-
*
|
|
41
|
+
* Apply the DLQ label to a card, persist the reason, and append a
|
|
42
|
+
* post-mortem block to the card description listing the last 3 failure
|
|
43
|
+
* summaries. Safe to call repeatedly — labels are idempotent and the
|
|
44
|
+
* description block is delimited so reruns replace rather than stack.
|
|
43
45
|
*/
|
|
44
46
|
markDlq(client: HarmonyApiClient, card: Card, reason: GuardReason, detail: string): Promise<void>;
|
|
45
47
|
}
|
package/dist/budget.js
CHANGED
|
@@ -71,8 +71,10 @@ export class BudgetGuard {
|
|
|
71
71
|
reason === "card_cost_cap");
|
|
72
72
|
}
|
|
73
73
|
/**
|
|
74
|
-
* Apply the DLQ label to a card
|
|
75
|
-
*
|
|
74
|
+
* Apply the DLQ label to a card, persist the reason, and append a
|
|
75
|
+
* post-mortem block to the card description listing the last 3 failure
|
|
76
|
+
* summaries. Safe to call repeatedly — labels are idempotent and the
|
|
77
|
+
* description block is delimited so reruns replace rather than stack.
|
|
76
78
|
*/
|
|
77
79
|
async markDlq(client, card, reason, detail) {
|
|
78
80
|
await this.store.markDlq(card.id, `${reason}: ${detail}`);
|
|
@@ -86,9 +88,46 @@ export class BudgetGuard {
|
|
|
86
88
|
catch (err) {
|
|
87
89
|
log.warn(TAG, `failed to add dlq label to #${card.short_id}: ${err instanceof Error ? err.message : err}`);
|
|
88
90
|
}
|
|
91
|
+
try {
|
|
92
|
+
const recent = this.store.getRecentFailures(card.id, 3);
|
|
93
|
+
const block = buildDlqDescriptionBlock(reason, detail, recent);
|
|
94
|
+
const existing = card.description ?? "";
|
|
95
|
+
const stripped = stripDlqBlock(existing);
|
|
96
|
+
await client.updateCard(card.id, {
|
|
97
|
+
description: `${stripped}${stripped ? "\n\n" : ""}${block}`,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
catch (err) {
|
|
101
|
+
log.warn(TAG, `failed to post DLQ summary to #${card.short_id}: ${err instanceof Error ? err.message : err}`);
|
|
102
|
+
}
|
|
89
103
|
log.warn(TAG, `#${card.short_id} DLQ'd — ${reason}: ${detail}`);
|
|
90
104
|
}
|
|
91
105
|
}
|
|
106
|
+
const DLQ_MARKER = "---\n**Agent DLQ**";
|
|
107
|
+
function buildDlqDescriptionBlock(reason, detail, failures) {
|
|
108
|
+
const lines = [DLQ_MARKER, `Cap hit: ${reason} — ${detail}`];
|
|
109
|
+
if (failures.length > 0) {
|
|
110
|
+
lines.push("", "Recent failures:");
|
|
111
|
+
for (const f of failures) {
|
|
112
|
+
const when = new Date(f.ts).toISOString().replace("T", " ").slice(0, 16);
|
|
113
|
+
const tag = f.reason ? ` [${f.reason}]` : "";
|
|
114
|
+
const branch = f.recoveryBranch
|
|
115
|
+
? `\n recover: \`git fetch && git checkout ${f.recoveryBranch}\``
|
|
116
|
+
: "";
|
|
117
|
+
lines.push(`- ${when} UTC${tag} — ${f.summary}${branch}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
lines.push("", "_No prior failure summaries recorded._");
|
|
122
|
+
}
|
|
123
|
+
return lines.join("\n");
|
|
124
|
+
}
|
|
125
|
+
function stripDlqBlock(description) {
|
|
126
|
+
const idx = description.indexOf(DLQ_MARKER);
|
|
127
|
+
if (idx < 0)
|
|
128
|
+
return description.trimEnd();
|
|
129
|
+
return description.slice(0, idx).trimEnd();
|
|
130
|
+
}
|
|
92
131
|
function formatCents(cents) {
|
|
93
132
|
return `$${(cents / 100).toFixed(2)}`;
|
|
94
133
|
}
|
package/dist/completion.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
|
|
2
2
|
import type { Card } from "@harmony/shared";
|
|
3
|
+
import type { StateStore } from "./state-store.js";
|
|
3
4
|
import type { CostUpdate } from "./stream-parser.js";
|
|
4
5
|
import { type AgentConfig } from "./types.js";
|
|
5
6
|
export interface SessionStats {
|
|
@@ -7,6 +8,8 @@ export interface SessionStats {
|
|
|
7
8
|
filesRead: number;
|
|
8
9
|
toolCalls: number;
|
|
9
10
|
cost: CostUpdate | null;
|
|
11
|
+
/** Trimmed last assistant text — feeds the episode write hook (Phase 1.5). */
|
|
12
|
+
lastAssistantText?: string;
|
|
10
13
|
}
|
|
11
14
|
export declare function buildTokenPayload(stats?: SessionStats | null): {
|
|
12
15
|
costCents?: undefined;
|
|
@@ -26,4 +29,4 @@ export declare function buildTokenPayload(stats?: SessionStats | null): {
|
|
|
26
29
|
/**
|
|
27
30
|
* Post-work pipeline: push branch, create PR, move card, post summary.
|
|
28
31
|
*/
|
|
29
|
-
export declare function runCompletion(client: HarmonyApiClient, card: Card, branchName: string, worktreePath: string, config: AgentConfig, workerId?: number, sessionStats?: SessionStats): Promise<void>;
|
|
32
|
+
export declare function runCompletion(client: HarmonyApiClient, card: Card, branchName: string, worktreePath: string, config: AgentConfig, workerId?: number, sessionStats?: SessionStats, workspaceId?: string, agentSessionId?: string | null, stateStore?: StateStore): Promise<void>;
|
package/dist/completion.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { execFileSync } from "node:child_process";
|
|
2
2
|
import { moveCardToColumn } from "./board-helpers.js";
|
|
3
|
-
import {
|
|
3
|
+
import { writeEpisode } from "./episode-writer.js";
|
|
4
|
+
import { createPullRequest, detectGitProvider, getBranchWebUrl, pushBranch, } from "./git-pr.js";
|
|
4
5
|
import { log } from "./log.js";
|
|
5
6
|
import { AGENT_NAME, agentIdentifier } from "./types.js";
|
|
6
7
|
import { attemptAutoFix, reportFindings, runVerification, } from "./verification.js";
|
|
@@ -28,7 +29,14 @@ export function buildTokenPayload(stats) {
|
|
|
28
29
|
/**
|
|
29
30
|
* Post-work pipeline: push branch, create PR, move card, post summary.
|
|
30
31
|
*/
|
|
31
|
-
export async function runCompletion(client, card, branchName, worktreePath, config, workerId = 0, sessionStats) {
|
|
32
|
+
export async function runCompletion(client, card, branchName, worktreePath, config, workerId = 0, sessionStats, workspaceId, agentSessionId, stateStore) {
|
|
33
|
+
// Hoisted so the episode write hook can read final verification state.
|
|
34
|
+
let verificationResult = {
|
|
35
|
+
passed: true,
|
|
36
|
+
buildErrors: [],
|
|
37
|
+
lintWarnings: [],
|
|
38
|
+
reviewFindings: [],
|
|
39
|
+
};
|
|
32
40
|
// Check if there are any commits on the branch
|
|
33
41
|
const hasCommits = checkHasCommits(worktreePath, config.worktree.baseBranch);
|
|
34
42
|
if (!hasCommits) {
|
|
@@ -41,7 +49,22 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
|
|
|
41
49
|
cleanupWorktree(worktreePath, branchName);
|
|
42
50
|
return;
|
|
43
51
|
}
|
|
44
|
-
// 1.
|
|
52
|
+
// 1. Push branch FIRST so commits are durable on origin regardless of
|
|
53
|
+
// verification outcome. A failed verify (below) then preserves the work
|
|
54
|
+
// under `agent-attempts/*` for `failedAttemptRetentionDays`. Without this
|
|
55
|
+
// ordering, verify failures used to orphan commits in a deleted worktree —
|
|
56
|
+
// recoverable only via `git reflog`.
|
|
57
|
+
log.info(TAG, `Pushing branch ${branchName} (pre-verify)...`);
|
|
58
|
+
try {
|
|
59
|
+
pushBranch(branchName, worktreePath);
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
// Push failure shouldn't prevent verification from running, but the
|
|
63
|
+
// safety guarantee is gone — surface it loudly so the operator notices.
|
|
64
|
+
log.error(TAG, `pre-verify push failed for ${branchName}: ${err instanceof Error ? err.message : err}`);
|
|
65
|
+
}
|
|
66
|
+
const recoveryUrl = getBranchWebUrl(branchName, worktreePath);
|
|
67
|
+
// 2. Verification gate
|
|
45
68
|
if (config.verification.enabled) {
|
|
46
69
|
await client.updateAgentProgress(card.id, {
|
|
47
70
|
agentIdentifier: agentIdentifier(workerId),
|
|
@@ -51,6 +74,7 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
|
|
|
51
74
|
progressPercent: 80,
|
|
52
75
|
});
|
|
53
76
|
let result = await runVerification(worktreePath, config, workerId);
|
|
77
|
+
let autoFixAttempts = 0;
|
|
54
78
|
if (!result.passed && config.verification.autoFix) {
|
|
55
79
|
for (let attempt = 0; attempt < config.verification.maxFixAttempts; attempt++) {
|
|
56
80
|
log.info(TAG, `Auto-fix attempt ${attempt + 1}/${config.verification.maxFixAttempts}`);
|
|
@@ -64,28 +88,61 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
|
|
|
64
88
|
const allErrors = [...result.buildErrors, ...result.lintWarnings];
|
|
65
89
|
await attemptAutoFix(worktreePath, config, allErrors);
|
|
66
90
|
result = await runVerification(worktreePath, config, workerId);
|
|
91
|
+
autoFixAttempts = attempt + 1;
|
|
67
92
|
if (result.passed) {
|
|
68
93
|
log.info(TAG, `Auto-fix succeeded on attempt ${attempt + 1}`);
|
|
94
|
+
// Push again so the auto-fix commit is also durable on origin.
|
|
95
|
+
try {
|
|
96
|
+
pushBranch(branchName, worktreePath);
|
|
97
|
+
}
|
|
98
|
+
catch (err) {
|
|
99
|
+
log.warn(TAG, `post-fix push failed for ${branchName}: ${err instanceof Error ? err.message : err}`);
|
|
100
|
+
}
|
|
69
101
|
break;
|
|
70
102
|
}
|
|
71
103
|
}
|
|
72
104
|
}
|
|
105
|
+
verificationResult = result;
|
|
73
106
|
if (!result.passed) {
|
|
74
107
|
log.warn(TAG, `Verification failed for #${card.short_id} — reporting findings`);
|
|
75
|
-
|
|
108
|
+
// Push the latest tip (including any auto-fix attempts) so the
|
|
109
|
+
// failed branch on origin reflects what verify saw.
|
|
110
|
+
try {
|
|
111
|
+
pushBranch(branchName, worktreePath);
|
|
112
|
+
}
|
|
113
|
+
catch (err) {
|
|
114
|
+
log.warn(TAG, `post-fail push failed for ${branchName}: ${err instanceof Error ? err.message : err}`);
|
|
115
|
+
}
|
|
116
|
+
const failureSummary = buildVerificationFailureSummary(result, autoFixAttempts);
|
|
117
|
+
try {
|
|
118
|
+
await stateStore?.recordFailureSummary(card.id, {
|
|
119
|
+
summary: failureSummary,
|
|
120
|
+
reason: "verification",
|
|
121
|
+
recoveryBranch: branchName,
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
catch (err) {
|
|
125
|
+
log.debug(TAG, `recordFailureSummary failed: ${err instanceof Error ? err.message : err}`);
|
|
126
|
+
}
|
|
127
|
+
await reportFindings(client, card.id, result, {
|
|
128
|
+
branchName,
|
|
129
|
+
branchUrl: recoveryUrl,
|
|
130
|
+
});
|
|
76
131
|
await moveCardToColumn(client, card, config.verification.failColumn);
|
|
77
132
|
await client.endAgentSession(card.id, {
|
|
78
|
-
status: "
|
|
133
|
+
status: "failed",
|
|
134
|
+
failureReason: "verification",
|
|
135
|
+
failureSummary,
|
|
136
|
+
recoveryBranch: branchName,
|
|
79
137
|
...buildTokenPayload(sessionStats),
|
|
80
138
|
});
|
|
139
|
+
// Local-only cleanup. The remote ref under `agent-attempts/*` stays
|
|
140
|
+
// up; the GC sweep (worktree-gc.ts) prunes it after retention.
|
|
81
141
|
cleanupWorktree(worktreePath, branchName);
|
|
82
142
|
return;
|
|
83
143
|
}
|
|
84
144
|
log.info(TAG, `Verification passed for #${card.short_id}`);
|
|
85
145
|
}
|
|
86
|
-
// 2. Push branch (force-push on rework if remote branch already exists)
|
|
87
|
-
log.info(TAG, `Pushing branch ${branchName}...`);
|
|
88
|
-
pushBranch(branchName, worktreePath);
|
|
89
146
|
// 3. Create PR
|
|
90
147
|
let prUrl = null;
|
|
91
148
|
if (config.completion.createPR) {
|
|
@@ -106,10 +163,47 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
|
|
|
106
163
|
progressPercent: 100,
|
|
107
164
|
...buildTokenPayload(sessionStats),
|
|
108
165
|
});
|
|
166
|
+
// 6a. Episode write hook (Phase 1.5): completed implement runs accumulate
|
|
167
|
+
// into project-scoped episodic memory. Best-effort — failures never block
|
|
168
|
+
// the completion path (plan §"Write hook" + D8).
|
|
169
|
+
//
|
|
170
|
+
// Outcome is constant "success" here: verification failures return early
|
|
171
|
+
// above with status=paused, and D8 mandates paused/orphaned runs skip the
|
|
172
|
+
// episode write entirely. A failure-outcome episode would require routing
|
|
173
|
+
// a separate write hook into the pre-return path, which D8 intentionally
|
|
174
|
+
// omits ("daemon crashes ≠ task outcome").
|
|
175
|
+
if (workspaceId) {
|
|
176
|
+
await writeEpisode(client, {
|
|
177
|
+
kind: "implement",
|
|
178
|
+
card,
|
|
179
|
+
workspaceId,
|
|
180
|
+
outcome: "success",
|
|
181
|
+
approachSummary: sessionStats?.lastAssistantText ?? "",
|
|
182
|
+
result: verificationResult,
|
|
183
|
+
cost: sessionStats?.cost ?? null,
|
|
184
|
+
filesEdited: sessionStats?.filesEdited ?? 0,
|
|
185
|
+
agentSessionId: agentSessionId ?? null,
|
|
186
|
+
});
|
|
187
|
+
}
|
|
109
188
|
// 7. Cleanup worktree
|
|
110
189
|
cleanupWorktree(worktreePath, branchName);
|
|
111
190
|
log.info(TAG, `Completion done for #${card.short_id}${prUrl ? ` — PR: ${prUrl}` : ""}`);
|
|
112
191
|
}
|
|
192
|
+
function buildVerificationFailureSummary(result, autoFixAttempts) {
|
|
193
|
+
const counts = [];
|
|
194
|
+
if (result.buildErrors.length > 0) {
|
|
195
|
+
counts.push(`${result.buildErrors.length} build error(s)`);
|
|
196
|
+
}
|
|
197
|
+
if (result.lintWarnings.length > 0) {
|
|
198
|
+
counts.push(`${result.lintWarnings.length} lint issue(s)`);
|
|
199
|
+
}
|
|
200
|
+
if (result.reviewFindings.length > 0) {
|
|
201
|
+
counts.push(`${result.reviewFindings.length} review finding(s)`);
|
|
202
|
+
}
|
|
203
|
+
const head = counts.length > 0 ? counts.join(", ") : "verification failed";
|
|
204
|
+
const tail = autoFixAttempts > 0 ? ` after ${autoFixAttempts} auto-fix attempt(s)` : "";
|
|
205
|
+
return `${head}${tail}`;
|
|
206
|
+
}
|
|
113
207
|
function checkHasCommits(worktreePath, baseBranch) {
|
|
114
208
|
try {
|
|
115
209
|
const count = execFileSync("git", ["rev-list", "--count", `origin/${baseBranch}..HEAD`], { cwd: worktreePath, encoding: "utf-8" }).trim();
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
|
|
2
|
+
import type { Card } from "@harmony/shared";
|
|
3
|
+
import type { CostUpdate } from "./stream-parser.js";
|
|
4
|
+
import type { EpisodeMeta, EpisodeOutcome } from "./types.js";
|
|
5
|
+
import type { VerificationResult } from "./verification.js";
|
|
6
|
+
interface ImplementEpisodeInput {
|
|
7
|
+
kind: "implement";
|
|
8
|
+
card: Card;
|
|
9
|
+
workspaceId: string;
|
|
10
|
+
outcome: EpisodeOutcome;
|
|
11
|
+
approachSummary: string;
|
|
12
|
+
result: VerificationResult;
|
|
13
|
+
cost: CostUpdate | null;
|
|
14
|
+
filesEdited: number;
|
|
15
|
+
errorMessage?: string;
|
|
16
|
+
agentSessionId?: string | null;
|
|
17
|
+
}
|
|
18
|
+
interface ReviewEpisodeInput {
|
|
19
|
+
kind: "review";
|
|
20
|
+
card: Card;
|
|
21
|
+
workspaceId: string;
|
|
22
|
+
verdict: "approved" | "rejected";
|
|
23
|
+
summary: string;
|
|
24
|
+
cost: CostUpdate | null;
|
|
25
|
+
agentSessionId?: string | null;
|
|
26
|
+
reviewSessionId?: string | null;
|
|
27
|
+
originalEpisodeId?: string | null;
|
|
28
|
+
}
|
|
29
|
+
export type EpisodeInput = ImplementEpisodeInput | ReviewEpisodeInput;
|
|
30
|
+
/**
|
|
31
|
+
* Rule-derived quality score (0..1) for an implement run. Failures default to 0.
|
|
32
|
+
* Plan §"Quality score": +0.4 if build passed, +0.2 if lint passed, +0.2 if no
|
|
33
|
+
* error thrown, +0.2 if run completed cleanly.
|
|
34
|
+
*/
|
|
35
|
+
export declare function computeQualityScore(result: VerificationResult, opts: {
|
|
36
|
+
errorThrown: boolean;
|
|
37
|
+
runCompletedCleanly: boolean;
|
|
38
|
+
}): number;
|
|
39
|
+
/**
|
|
40
|
+
* Trim a free-form summary down to the documented 400-char cap. v1 uses a
|
|
41
|
+
* last-turn trim rather than an LLM rewrite (plan §"Write hook"). Empty or
|
|
42
|
+
* whitespace-only input collapses to a marker so the episode still surfaces
|
|
43
|
+
* as a recallable hit (rather than an empty bullet) in future prompts.
|
|
44
|
+
*/
|
|
45
|
+
export declare function trimApproachSummary(text: string): string;
|
|
46
|
+
/**
|
|
47
|
+
* Build the entity payload for one episode. Pure — returned object can be
|
|
48
|
+
* snapshotted in tests without hitting the network.
|
|
49
|
+
*/
|
|
50
|
+
export declare function buildEpisodePayload(input: EpisodeInput, projectId: string): {
|
|
51
|
+
workspace_id: string;
|
|
52
|
+
project_id?: string;
|
|
53
|
+
type: string;
|
|
54
|
+
memory_tier: string;
|
|
55
|
+
scope: string;
|
|
56
|
+
title: string;
|
|
57
|
+
content: string;
|
|
58
|
+
metadata: EpisodeMeta;
|
|
59
|
+
importance: number;
|
|
60
|
+
confidence: number;
|
|
61
|
+
tags: string[];
|
|
62
|
+
agent_identifier: string;
|
|
63
|
+
};
|
|
64
|
+
/**
|
|
65
|
+
* Write one episode entity. Best-effort: any failure is logged and swallowed
|
|
66
|
+
* so the calling pipeline can complete (plan D8: episode writes never block
|
|
67
|
+
* run completion).
|
|
68
|
+
*
|
|
69
|
+
* Returns the entity id on success, or null on swallowed failure.
|
|
70
|
+
*/
|
|
71
|
+
export declare function writeEpisode(client: HarmonyApiClient, input: EpisodeInput): Promise<string | null>;
|
|
72
|
+
/**
|
|
73
|
+
* Find the most recent implement episode for a given card so the review
|
|
74
|
+
* pipeline can back-fill its verdict. Returns null when none exists or the
|
|
75
|
+
* lookup throws — back-fill is best-effort.
|
|
76
|
+
*/
|
|
77
|
+
export declare function findLatestImplementEpisode(client: HarmonyApiClient, workspaceId: string, projectId: string, cardShortId: number): Promise<string | null>;
|
|
78
|
+
/**
|
|
79
|
+
* Apply the review verdict to an earlier implement episode (plan §"Read hook"
|
|
80
|
+
* back-fill block). Approved nudges the original episode's confidence up;
|
|
81
|
+
* rejected tombstones it via superseded_by.
|
|
82
|
+
*/
|
|
83
|
+
export declare function backfillReviewVerdict(client: HarmonyApiClient, originalEpisodeId: string, verdict: "approved" | "rejected", reviewEpisodeId: string | null): Promise<void>;
|
|
84
|
+
export {};
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import { log } from "./log.js";
|
|
2
|
+
const TAG = "episode-writer";
|
|
3
|
+
const MAX_APPROACH_SUMMARY_CHARS = 400;
|
|
4
|
+
/**
|
|
5
|
+
* Rule-derived quality score (0..1) for an implement run. Failures default to 0.
|
|
6
|
+
* Plan §"Quality score": +0.4 if build passed, +0.2 if lint passed, +0.2 if no
|
|
7
|
+
* error thrown, +0.2 if run completed cleanly.
|
|
8
|
+
*/
|
|
9
|
+
export function computeQualityScore(result, opts) {
|
|
10
|
+
if (!result.passed)
|
|
11
|
+
return 0;
|
|
12
|
+
let score = 0;
|
|
13
|
+
if (result.buildErrors.length === 0)
|
|
14
|
+
score += 0.4;
|
|
15
|
+
if (result.lintWarnings.length === 0)
|
|
16
|
+
score += 0.2;
|
|
17
|
+
if (!opts.errorThrown)
|
|
18
|
+
score += 0.2;
|
|
19
|
+
if (opts.runCompletedCleanly)
|
|
20
|
+
score += 0.2;
|
|
21
|
+
return Math.min(1, score);
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Clamp confidence into the documented [0.4, 1.0] band so failures retain a
|
|
25
|
+
* minimum floor (plan §"Episode record shape").
|
|
26
|
+
*/
|
|
27
|
+
function clampConfidence(qualityScore) {
|
|
28
|
+
return Math.max(0.4, Math.min(1.0, qualityScore));
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Trim a free-form summary down to the documented 400-char cap. v1 uses a
|
|
32
|
+
* last-turn trim rather than an LLM rewrite (plan §"Write hook"). Empty or
|
|
33
|
+
* whitespace-only input collapses to a marker so the episode still surfaces
|
|
34
|
+
* as a recallable hit (rather than an empty bullet) in future prompts.
|
|
35
|
+
*/
|
|
36
|
+
export function trimApproachSummary(text) {
|
|
37
|
+
const trimmed = text.trim();
|
|
38
|
+
if (trimmed.length === 0)
|
|
39
|
+
return "(no approach summary captured)";
|
|
40
|
+
if (trimmed.length <= MAX_APPROACH_SUMMARY_CHARS)
|
|
41
|
+
return trimmed;
|
|
42
|
+
return `${trimmed.slice(0, MAX_APPROACH_SUMMARY_CHARS - 1).trimEnd()}…`;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Build the entity payload for one episode. Pure — returned object can be
|
|
46
|
+
* snapshotted in tests without hitting the network.
|
|
47
|
+
*/
|
|
48
|
+
export function buildEpisodePayload(input, projectId) {
|
|
49
|
+
if (input.kind === "implement") {
|
|
50
|
+
const qualityScore = computeQualityScore(input.result, {
|
|
51
|
+
errorThrown: input.errorMessage !== undefined,
|
|
52
|
+
runCompletedCleanly: input.result.passed,
|
|
53
|
+
});
|
|
54
|
+
const type = input.outcome === "success" ? "solution" : "error";
|
|
55
|
+
const importance = input.outcome === "success" ? 7 : 5;
|
|
56
|
+
const approachSummary = trimApproachSummary(input.approachSummary);
|
|
57
|
+
const outcomeRationale = input.outcome === "success"
|
|
58
|
+
? `Build ${input.result.buildErrors.length === 0 ? "passed" : "failed"}, lint ${input.result.lintWarnings.length === 0 ? "clean" : "issues"}.`
|
|
59
|
+
: `Verification failed: ${input.errorMessage ?? "see findings"}.`;
|
|
60
|
+
const metadata = {
|
|
61
|
+
episode_kind: "implement",
|
|
62
|
+
card_short_id: input.card.short_id,
|
|
63
|
+
card_title: input.card.title,
|
|
64
|
+
approach_summary: approachSummary,
|
|
65
|
+
outcome: input.outcome,
|
|
66
|
+
quality_score: qualityScore,
|
|
67
|
+
duration_ms: input.cost?.durationMs ?? 0,
|
|
68
|
+
token_cost: {
|
|
69
|
+
input: input.cost?.totalInputTokens ?? 0,
|
|
70
|
+
output: input.cost?.totalOutputTokens ?? 0,
|
|
71
|
+
usd: input.cost?.totalCostUsd ?? 0,
|
|
72
|
+
},
|
|
73
|
+
files_touched: input.filesEdited,
|
|
74
|
+
num_turns: input.cost?.numTurns ?? 0,
|
|
75
|
+
};
|
|
76
|
+
if (input.errorMessage)
|
|
77
|
+
metadata.error = input.errorMessage;
|
|
78
|
+
if (input.agentSessionId)
|
|
79
|
+
metadata.agent_session_id = input.agentSessionId;
|
|
80
|
+
return {
|
|
81
|
+
workspace_id: input.workspaceId,
|
|
82
|
+
project_id: projectId,
|
|
83
|
+
type,
|
|
84
|
+
memory_tier: "episode",
|
|
85
|
+
scope: "project",
|
|
86
|
+
title: `Agent run implement — #${input.card.short_id}: ${input.card.title}`,
|
|
87
|
+
content: `${approachSummary}\n\nOutcome: ${outcomeRationale}`,
|
|
88
|
+
metadata,
|
|
89
|
+
importance,
|
|
90
|
+
confidence: clampConfidence(qualityScore),
|
|
91
|
+
tags: ["implement", input.outcome, `card:${input.card.short_id}`],
|
|
92
|
+
agent_identifier: "harmony-agent",
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
// Review episode
|
|
96
|
+
const qualityScore = input.verdict === "approved" ? 1 : 0.4;
|
|
97
|
+
const summary = trimApproachSummary(input.summary || "(no summary captured)");
|
|
98
|
+
const metadata = {
|
|
99
|
+
episode_kind: "review",
|
|
100
|
+
card_short_id: input.card.short_id,
|
|
101
|
+
card_title: input.card.title,
|
|
102
|
+
approach_summary: summary,
|
|
103
|
+
outcome: input.verdict === "approved" ? "success" : "failure",
|
|
104
|
+
quality_score: qualityScore,
|
|
105
|
+
duration_ms: input.cost?.durationMs ?? 0,
|
|
106
|
+
token_cost: {
|
|
107
|
+
input: input.cost?.totalInputTokens ?? 0,
|
|
108
|
+
output: input.cost?.totalOutputTokens ?? 0,
|
|
109
|
+
usd: input.cost?.totalCostUsd ?? 0,
|
|
110
|
+
},
|
|
111
|
+
files_touched: 0,
|
|
112
|
+
num_turns: input.cost?.numTurns ?? 0,
|
|
113
|
+
};
|
|
114
|
+
if (input.agentSessionId)
|
|
115
|
+
metadata.agent_session_id = input.agentSessionId;
|
|
116
|
+
if (input.reviewSessionId)
|
|
117
|
+
metadata.review_session_id = input.reviewSessionId;
|
|
118
|
+
if (input.originalEpisodeId)
|
|
119
|
+
metadata.original_episode_id = input.originalEpisodeId;
|
|
120
|
+
return {
|
|
121
|
+
workspace_id: input.workspaceId,
|
|
122
|
+
project_id: projectId,
|
|
123
|
+
type: "decision",
|
|
124
|
+
memory_tier: "episode",
|
|
125
|
+
scope: "project",
|
|
126
|
+
title: `Agent run review — #${input.card.short_id}: ${input.card.title}`,
|
|
127
|
+
content: `Review verdict: ${input.verdict}.\n\n${summary}`,
|
|
128
|
+
metadata,
|
|
129
|
+
importance: 8,
|
|
130
|
+
confidence: clampConfidence(qualityScore),
|
|
131
|
+
tags: ["review", input.verdict, `card:${input.card.short_id}`],
|
|
132
|
+
agent_identifier: "harmony-agent",
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Write one episode entity. Best-effort: any failure is logged and swallowed
|
|
137
|
+
* so the calling pipeline can complete (plan D8: episode writes never block
|
|
138
|
+
* run completion).
|
|
139
|
+
*
|
|
140
|
+
* Returns the entity id on success, or null on swallowed failure.
|
|
141
|
+
*/
|
|
142
|
+
export async function writeEpisode(client, input) {
|
|
143
|
+
const payload = buildEpisodePayload(input, input.card.project_id);
|
|
144
|
+
try {
|
|
145
|
+
const { entity } = await client.createMemoryEntity({
|
|
146
|
+
...payload,
|
|
147
|
+
metadata: payload.metadata,
|
|
148
|
+
});
|
|
149
|
+
const id = entity && typeof entity === "object" && "id" in entity
|
|
150
|
+
? (entity.id ?? null)
|
|
151
|
+
: null;
|
|
152
|
+
log.info(TAG, `episode written for #${input.card.short_id}`, {
|
|
153
|
+
cardId: input.card.id,
|
|
154
|
+
event: "episode_write",
|
|
155
|
+
kind: input.kind,
|
|
156
|
+
});
|
|
157
|
+
return id;
|
|
158
|
+
}
|
|
159
|
+
catch (err) {
|
|
160
|
+
log.warn(TAG, `episode write failed for #${input.card.short_id}`, {
|
|
161
|
+
cardId: input.card.id,
|
|
162
|
+
event: "episode_write_failed",
|
|
163
|
+
kind: input.kind,
|
|
164
|
+
error: err instanceof Error ? err.message : String(err),
|
|
165
|
+
});
|
|
166
|
+
return null;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Find the most recent implement episode for a given card so the review
|
|
171
|
+
* pipeline can back-fill its verdict. Returns null when none exists or the
|
|
172
|
+
* lookup throws — back-fill is best-effort.
|
|
173
|
+
*/
|
|
174
|
+
export async function findLatestImplementEpisode(client, workspaceId, projectId, cardShortId) {
|
|
175
|
+
try {
|
|
176
|
+
const { entities } = await client.harmonyRecall({
|
|
177
|
+
workspaceId,
|
|
178
|
+
projectId,
|
|
179
|
+
type: ["solution", "error"],
|
|
180
|
+
memory_tier: "episode",
|
|
181
|
+
scope: "project",
|
|
182
|
+
tags: [`card:${cardShortId}`],
|
|
183
|
+
topK: 1,
|
|
184
|
+
});
|
|
185
|
+
const first = entities[0];
|
|
186
|
+
if (first &&
|
|
187
|
+
typeof first === "object" &&
|
|
188
|
+
"id" in first &&
|
|
189
|
+
typeof first.id === "string") {
|
|
190
|
+
return first.id;
|
|
191
|
+
}
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
catch (err) {
|
|
195
|
+
log.warn(TAG, "implement-episode lookup failed", {
|
|
196
|
+
event: "episode_lookup_failed",
|
|
197
|
+
cardShortId,
|
|
198
|
+
error: err instanceof Error ? err.message : String(err),
|
|
199
|
+
});
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Apply the review verdict to an earlier implement episode (plan §"Read hook"
|
|
205
|
+
* back-fill block). Approved nudges the original episode's confidence up;
|
|
206
|
+
* rejected tombstones it via superseded_by.
|
|
207
|
+
*/
|
|
208
|
+
export async function backfillReviewVerdict(client, originalEpisodeId, verdict, reviewEpisodeId) {
|
|
209
|
+
try {
|
|
210
|
+
if (verdict === "approved") {
|
|
211
|
+
const { entity } = await client.getMemoryEntity(originalEpisodeId);
|
|
212
|
+
const current = entity?.confidence ?? 0.4;
|
|
213
|
+
const bumped = Math.min(1, current + 0.05);
|
|
214
|
+
await client.updateMemoryEntity(originalEpisodeId, {
|
|
215
|
+
confidence: bumped,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
await client.updateMemoryEntity(originalEpisodeId, {
|
|
220
|
+
superseded_by: reviewEpisodeId,
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
catch (err) {
|
|
225
|
+
log.warn(TAG, "review back-fill failed", {
|
|
226
|
+
event: "episode_backfill_failed",
|
|
227
|
+
originalEpisodeId,
|
|
228
|
+
verdict,
|
|
229
|
+
error: err instanceof Error ? err.message : String(err),
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
}
|
package/dist/git-pr.d.ts
CHANGED
|
@@ -19,6 +19,19 @@ export declare function checkPrMergeStatus(prUrl: string, cwd: string, provider:
|
|
|
19
19
|
export declare function extractPrUrl(description: string | null): string | null;
|
|
20
20
|
export declare function remoteBranchExists(branchName: string, cwd: string): boolean;
|
|
21
21
|
export declare function pushBranch(branchName: string, cwd: string): void;
|
|
22
|
+
/**
|
|
23
|
+
* Push the current branch's tip to `newRef` on origin and delete `oldRef`.
|
|
24
|
+
* Used when an approved attempt graduates from `agent-attempts/*` to
|
|
25
|
+
* `agent/*` — keeps the commits durable across the rename and avoids any
|
|
26
|
+
* window where the work is unreachable on origin.
|
|
27
|
+
*/
|
|
28
|
+
export declare function renameRemoteBranch(oldRef: string, newRef: string, cwd: string): void;
|
|
29
|
+
/**
|
|
30
|
+
* Best-effort public branch URL for the recovery button on a failed session.
|
|
31
|
+
* Returns null when we can't infer a tree URL — the daemon falls back to a
|
|
32
|
+
* plain `git fetch && git checkout <ref>` instruction in that case.
|
|
33
|
+
*/
|
|
34
|
+
export declare function getBranchWebUrl(branchName: string, cwd: string): string | null;
|
|
22
35
|
export declare function buildPrBody(card: Card, commitLog: string): string;
|
|
23
36
|
export declare function createPullRequest(card: Card, branchName: string, worktreePath: string, config: AgentConfig, provider: GitProvider): string | null;
|
|
24
37
|
export declare function findExistingPr(branchName: string, worktreePath: string, provider: GitProvider): string | null;
|