@gethmy/agent 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/completion.d.ts +3 -1
- package/dist/completion.js +35 -1
- package/dist/episode-writer.d.ts +84 -0
- package/dist/episode-writer.js +232 -0
- package/dist/pool.js +1 -1
- package/dist/progress-tracker.d.ts +2 -0
- package/dist/progress-tracker.js +7 -0
- package/dist/prompt.d.ts +6 -0
- package/dist/prompt.js +47 -2
- package/dist/review-completion.d.ts +1 -1
- package/dist/review-completion.js +41 -1
- package/dist/review-worker.d.ts +3 -1
- package/dist/review-worker.js +12 -3
- package/dist/types.d.ts +32 -0
- package/dist/worker.js +1 -1
- package/package.json +1 -1
package/dist/completion.d.ts
CHANGED
|
@@ -7,6 +7,8 @@ export interface SessionStats {
|
|
|
7
7
|
filesRead: number;
|
|
8
8
|
toolCalls: number;
|
|
9
9
|
cost: CostUpdate | null;
|
|
10
|
+
/** Trimmed last assistant text — feeds the episode write hook (Phase 1.5). */
|
|
11
|
+
lastAssistantText?: string;
|
|
10
12
|
}
|
|
11
13
|
export declare function buildTokenPayload(stats?: SessionStats | null): {
|
|
12
14
|
costCents?: undefined;
|
|
@@ -26,4 +28,4 @@ export declare function buildTokenPayload(stats?: SessionStats | null): {
|
|
|
26
28
|
/**
|
|
27
29
|
* Post-work pipeline: push branch, create PR, move card, post summary.
|
|
28
30
|
*/
|
|
29
|
-
export declare function runCompletion(client: HarmonyApiClient, card: Card, branchName: string, worktreePath: string, config: AgentConfig, workerId?: number, sessionStats?: SessionStats): Promise<void>;
|
|
31
|
+
export declare function runCompletion(client: HarmonyApiClient, card: Card, branchName: string, worktreePath: string, config: AgentConfig, workerId?: number, sessionStats?: SessionStats, workspaceId?: string, agentSessionId?: string | null): Promise<void>;
|
package/dist/completion.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { execFileSync } from "node:child_process";
|
|
2
2
|
import { moveCardToColumn } from "./board-helpers.js";
|
|
3
|
+
import { writeEpisode } from "./episode-writer.js";
|
|
3
4
|
import { createPullRequest, detectGitProvider, pushBranch } from "./git-pr.js";
|
|
4
5
|
import { log } from "./log.js";
|
|
5
6
|
import { AGENT_NAME, agentIdentifier } from "./types.js";
|
|
@@ -28,7 +29,14 @@ export function buildTokenPayload(stats) {
|
|
|
28
29
|
/**
|
|
29
30
|
* Post-work pipeline: push branch, create PR, move card, post summary.
|
|
30
31
|
*/
|
|
31
|
-
export async function runCompletion(client, card, branchName, worktreePath, config, workerId = 0, sessionStats) {
|
|
32
|
+
export async function runCompletion(client, card, branchName, worktreePath, config, workerId = 0, sessionStats, workspaceId, agentSessionId) {
|
|
33
|
+
// Hoisted so the episode write hook can read final verification state.
|
|
34
|
+
let verificationResult = {
|
|
35
|
+
passed: true,
|
|
36
|
+
buildErrors: [],
|
|
37
|
+
lintWarnings: [],
|
|
38
|
+
reviewFindings: [],
|
|
39
|
+
};
|
|
32
40
|
// Check if there are any commits on the branch
|
|
33
41
|
const hasCommits = checkHasCommits(worktreePath, config.worktree.baseBranch);
|
|
34
42
|
if (!hasCommits) {
|
|
@@ -70,6 +78,7 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
|
|
|
70
78
|
}
|
|
71
79
|
}
|
|
72
80
|
}
|
|
81
|
+
verificationResult = result;
|
|
73
82
|
if (!result.passed) {
|
|
74
83
|
log.warn(TAG, `Verification failed for #${card.short_id} — reporting findings`);
|
|
75
84
|
await reportFindings(client, card.id, result);
|
|
@@ -78,6 +87,9 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
|
|
|
78
87
|
status: "paused",
|
|
79
88
|
...buildTokenPayload(sessionStats),
|
|
80
89
|
});
|
|
90
|
+
// Episode write: paused/orphaned runs skip silently (plan D8). Failure
|
|
91
|
+
// here would only fire on a status===completed path, which we don't
|
|
92
|
+
// hit when verification fails.
|
|
81
93
|
cleanupWorktree(worktreePath, branchName);
|
|
82
94
|
return;
|
|
83
95
|
}
|
|
@@ -106,6 +118,28 @@ export async function runCompletion(client, card, branchName, worktreePath, conf
|
|
|
106
118
|
progressPercent: 100,
|
|
107
119
|
...buildTokenPayload(sessionStats),
|
|
108
120
|
});
|
|
121
|
+
// 6a. Episode write hook (Phase 1.5): completed implement runs accumulate
|
|
122
|
+
// into project-scoped episodic memory. Best-effort — failures never block
|
|
123
|
+
// the completion path (plan §"Write hook" + D8).
|
|
124
|
+
//
|
|
125
|
+
// Outcome is constant "success" here: verification failures return early
|
|
126
|
+
// above with status=paused, and D8 mandates paused/orphaned runs skip the
|
|
127
|
+
// episode write entirely. A failure-outcome episode would require routing
|
|
128
|
+
// a separate write hook into the pre-return path, which D8 intentionally
|
|
129
|
+
// omits ("daemon crashes ≠ task outcome").
|
|
130
|
+
if (workspaceId) {
|
|
131
|
+
await writeEpisode(client, {
|
|
132
|
+
kind: "implement",
|
|
133
|
+
card,
|
|
134
|
+
workspaceId,
|
|
135
|
+
outcome: "success",
|
|
136
|
+
approachSummary: sessionStats?.lastAssistantText ?? "",
|
|
137
|
+
result: verificationResult,
|
|
138
|
+
cost: sessionStats?.cost ?? null,
|
|
139
|
+
filesEdited: sessionStats?.filesEdited ?? 0,
|
|
140
|
+
agentSessionId: agentSessionId ?? null,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
109
143
|
// 7. Cleanup worktree
|
|
110
144
|
cleanupWorktree(worktreePath, branchName);
|
|
111
145
|
log.info(TAG, `Completion done for #${card.short_id}${prUrl ? ` — PR: ${prUrl}` : ""}`);
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
|
|
2
|
+
import type { Card } from "@harmony/shared";
|
|
3
|
+
import type { CostUpdate } from "./stream-parser.js";
|
|
4
|
+
import type { EpisodeMeta, EpisodeOutcome } from "./types.js";
|
|
5
|
+
import type { VerificationResult } from "./verification.js";
|
|
6
|
+
interface ImplementEpisodeInput {
|
|
7
|
+
kind: "implement";
|
|
8
|
+
card: Card;
|
|
9
|
+
workspaceId: string;
|
|
10
|
+
outcome: EpisodeOutcome;
|
|
11
|
+
approachSummary: string;
|
|
12
|
+
result: VerificationResult;
|
|
13
|
+
cost: CostUpdate | null;
|
|
14
|
+
filesEdited: number;
|
|
15
|
+
errorMessage?: string;
|
|
16
|
+
agentSessionId?: string | null;
|
|
17
|
+
}
|
|
18
|
+
interface ReviewEpisodeInput {
|
|
19
|
+
kind: "review";
|
|
20
|
+
card: Card;
|
|
21
|
+
workspaceId: string;
|
|
22
|
+
verdict: "approved" | "rejected";
|
|
23
|
+
summary: string;
|
|
24
|
+
cost: CostUpdate | null;
|
|
25
|
+
agentSessionId?: string | null;
|
|
26
|
+
reviewSessionId?: string | null;
|
|
27
|
+
originalEpisodeId?: string | null;
|
|
28
|
+
}
|
|
29
|
+
export type EpisodeInput = ImplementEpisodeInput | ReviewEpisodeInput;
|
|
30
|
+
/**
|
|
31
|
+
* Rule-derived quality score (0..1) for an implement run. Failures default to 0.
|
|
32
|
+
* Plan §"Quality score": +0.4 if build passed, +0.2 if lint passed, +0.2 if no
|
|
33
|
+
* error thrown, +0.2 if run completed cleanly.
|
|
34
|
+
*/
|
|
35
|
+
export declare function computeQualityScore(result: VerificationResult, opts: {
|
|
36
|
+
errorThrown: boolean;
|
|
37
|
+
runCompletedCleanly: boolean;
|
|
38
|
+
}): number;
|
|
39
|
+
/**
|
|
40
|
+
* Trim a free-form summary down to the documented 400-char cap. v1 uses a
|
|
41
|
+
* last-turn trim rather than an LLM rewrite (plan §"Write hook"). Empty or
|
|
42
|
+
* whitespace-only input collapses to a marker so the episode still surfaces
|
|
43
|
+
* as a recallable hit (rather than an empty bullet) in future prompts.
|
|
44
|
+
*/
|
|
45
|
+
export declare function trimApproachSummary(text: string): string;
|
|
46
|
+
/**
|
|
47
|
+
* Build the entity payload for one episode. Pure — returned object can be
|
|
48
|
+
* snapshotted in tests without hitting the network.
|
|
49
|
+
*/
|
|
50
|
+
export declare function buildEpisodePayload(input: EpisodeInput, projectId: string): {
|
|
51
|
+
workspace_id: string;
|
|
52
|
+
project_id?: string;
|
|
53
|
+
type: string;
|
|
54
|
+
memory_tier: string;
|
|
55
|
+
scope: string;
|
|
56
|
+
title: string;
|
|
57
|
+
content: string;
|
|
58
|
+
metadata: EpisodeMeta;
|
|
59
|
+
importance: number;
|
|
60
|
+
confidence: number;
|
|
61
|
+
tags: string[];
|
|
62
|
+
agent_identifier: string;
|
|
63
|
+
};
|
|
64
|
+
/**
|
|
65
|
+
* Write one episode entity. Best-effort: any failure is logged and swallowed
|
|
66
|
+
* so the calling pipeline can complete (plan D8: episode writes never block
|
|
67
|
+
* run completion).
|
|
68
|
+
*
|
|
69
|
+
* Returns the entity id on success, or null on swallowed failure.
|
|
70
|
+
*/
|
|
71
|
+
export declare function writeEpisode(client: HarmonyApiClient, input: EpisodeInput): Promise<string | null>;
|
|
72
|
+
/**
|
|
73
|
+
* Find the most recent implement episode for a given card so the review
|
|
74
|
+
* pipeline can back-fill its verdict. Returns null when none exists or the
|
|
75
|
+
* lookup throws — back-fill is best-effort.
|
|
76
|
+
*/
|
|
77
|
+
export declare function findLatestImplementEpisode(client: HarmonyApiClient, workspaceId: string, projectId: string, cardShortId: number): Promise<string | null>;
|
|
78
|
+
/**
|
|
79
|
+
* Apply the review verdict to an earlier implement episode (plan §"Read hook"
|
|
80
|
+
* back-fill block). Approved nudges the original episode's confidence up;
|
|
81
|
+
* rejected tombstones it via superseded_by.
|
|
82
|
+
*/
|
|
83
|
+
export declare function backfillReviewVerdict(client: HarmonyApiClient, originalEpisodeId: string, verdict: "approved" | "rejected", reviewEpisodeId: string | null): Promise<void>;
|
|
84
|
+
export {};
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import { log } from "./log.js";
|
|
2
|
+
const TAG = "episode-writer";
|
|
3
|
+
const MAX_APPROACH_SUMMARY_CHARS = 400;
|
|
4
|
+
/**
|
|
5
|
+
* Rule-derived quality score (0..1) for an implement run. Failures default to 0.
|
|
6
|
+
* Plan §"Quality score": +0.4 if build passed, +0.2 if lint passed, +0.2 if no
|
|
7
|
+
* error thrown, +0.2 if run completed cleanly.
|
|
8
|
+
*/
|
|
9
|
+
export function computeQualityScore(result, opts) {
|
|
10
|
+
if (!result.passed)
|
|
11
|
+
return 0;
|
|
12
|
+
let score = 0;
|
|
13
|
+
if (result.buildErrors.length === 0)
|
|
14
|
+
score += 0.4;
|
|
15
|
+
if (result.lintWarnings.length === 0)
|
|
16
|
+
score += 0.2;
|
|
17
|
+
if (!opts.errorThrown)
|
|
18
|
+
score += 0.2;
|
|
19
|
+
if (opts.runCompletedCleanly)
|
|
20
|
+
score += 0.2;
|
|
21
|
+
return Math.min(1, score);
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Clamp confidence into the documented [0.4, 1.0] band so failures retain a
|
|
25
|
+
* minimum floor (plan §"Episode record shape").
|
|
26
|
+
*/
|
|
27
|
+
function clampConfidence(qualityScore) {
|
|
28
|
+
return Math.max(0.4, Math.min(1.0, qualityScore));
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Trim a free-form summary down to the documented 400-char cap. v1 uses a
|
|
32
|
+
* last-turn trim rather than an LLM rewrite (plan §"Write hook"). Empty or
|
|
33
|
+
* whitespace-only input collapses to a marker so the episode still surfaces
|
|
34
|
+
* as a recallable hit (rather than an empty bullet) in future prompts.
|
|
35
|
+
*/
|
|
36
|
+
export function trimApproachSummary(text) {
|
|
37
|
+
const trimmed = text.trim();
|
|
38
|
+
if (trimmed.length === 0)
|
|
39
|
+
return "(no approach summary captured)";
|
|
40
|
+
if (trimmed.length <= MAX_APPROACH_SUMMARY_CHARS)
|
|
41
|
+
return trimmed;
|
|
42
|
+
return `${trimmed.slice(0, MAX_APPROACH_SUMMARY_CHARS - 1).trimEnd()}…`;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Build the entity payload for one episode. Pure — returned object can be
|
|
46
|
+
* snapshotted in tests without hitting the network.
|
|
47
|
+
*/
|
|
48
|
+
export function buildEpisodePayload(input, projectId) {
|
|
49
|
+
if (input.kind === "implement") {
|
|
50
|
+
const qualityScore = computeQualityScore(input.result, {
|
|
51
|
+
errorThrown: input.errorMessage !== undefined,
|
|
52
|
+
runCompletedCleanly: input.result.passed,
|
|
53
|
+
});
|
|
54
|
+
const type = input.outcome === "success" ? "solution" : "error";
|
|
55
|
+
const importance = input.outcome === "success" ? 7 : 5;
|
|
56
|
+
const approachSummary = trimApproachSummary(input.approachSummary);
|
|
57
|
+
const outcomeRationale = input.outcome === "success"
|
|
58
|
+
? `Build ${input.result.buildErrors.length === 0 ? "passed" : "failed"}, lint ${input.result.lintWarnings.length === 0 ? "clean" : "issues"}.`
|
|
59
|
+
: `Verification failed: ${input.errorMessage ?? "see findings"}.`;
|
|
60
|
+
const metadata = {
|
|
61
|
+
episode_kind: "implement",
|
|
62
|
+
card_short_id: input.card.short_id,
|
|
63
|
+
card_title: input.card.title,
|
|
64
|
+
approach_summary: approachSummary,
|
|
65
|
+
outcome: input.outcome,
|
|
66
|
+
quality_score: qualityScore,
|
|
67
|
+
duration_ms: input.cost?.durationMs ?? 0,
|
|
68
|
+
token_cost: {
|
|
69
|
+
input: input.cost?.totalInputTokens ?? 0,
|
|
70
|
+
output: input.cost?.totalOutputTokens ?? 0,
|
|
71
|
+
usd: input.cost?.totalCostUsd ?? 0,
|
|
72
|
+
},
|
|
73
|
+
files_touched: input.filesEdited,
|
|
74
|
+
num_turns: input.cost?.numTurns ?? 0,
|
|
75
|
+
};
|
|
76
|
+
if (input.errorMessage)
|
|
77
|
+
metadata.error = input.errorMessage;
|
|
78
|
+
if (input.agentSessionId)
|
|
79
|
+
metadata.agent_session_id = input.agentSessionId;
|
|
80
|
+
return {
|
|
81
|
+
workspace_id: input.workspaceId,
|
|
82
|
+
project_id: projectId,
|
|
83
|
+
type,
|
|
84
|
+
memory_tier: "episode",
|
|
85
|
+
scope: "project",
|
|
86
|
+
title: `Agent run implement — #${input.card.short_id}: ${input.card.title}`,
|
|
87
|
+
content: `${approachSummary}\n\nOutcome: ${outcomeRationale}`,
|
|
88
|
+
metadata,
|
|
89
|
+
importance,
|
|
90
|
+
confidence: clampConfidence(qualityScore),
|
|
91
|
+
tags: ["implement", input.outcome, `card:${input.card.short_id}`],
|
|
92
|
+
agent_identifier: "harmony-agent",
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
// Review episode
|
|
96
|
+
const qualityScore = input.verdict === "approved" ? 1 : 0.4;
|
|
97
|
+
const summary = trimApproachSummary(input.summary || "(no summary captured)");
|
|
98
|
+
const metadata = {
|
|
99
|
+
episode_kind: "review",
|
|
100
|
+
card_short_id: input.card.short_id,
|
|
101
|
+
card_title: input.card.title,
|
|
102
|
+
approach_summary: summary,
|
|
103
|
+
outcome: input.verdict === "approved" ? "success" : "failure",
|
|
104
|
+
quality_score: qualityScore,
|
|
105
|
+
duration_ms: input.cost?.durationMs ?? 0,
|
|
106
|
+
token_cost: {
|
|
107
|
+
input: input.cost?.totalInputTokens ?? 0,
|
|
108
|
+
output: input.cost?.totalOutputTokens ?? 0,
|
|
109
|
+
usd: input.cost?.totalCostUsd ?? 0,
|
|
110
|
+
},
|
|
111
|
+
files_touched: 0,
|
|
112
|
+
num_turns: input.cost?.numTurns ?? 0,
|
|
113
|
+
};
|
|
114
|
+
if (input.agentSessionId)
|
|
115
|
+
metadata.agent_session_id = input.agentSessionId;
|
|
116
|
+
if (input.reviewSessionId)
|
|
117
|
+
metadata.review_session_id = input.reviewSessionId;
|
|
118
|
+
if (input.originalEpisodeId)
|
|
119
|
+
metadata.original_episode_id = input.originalEpisodeId;
|
|
120
|
+
return {
|
|
121
|
+
workspace_id: input.workspaceId,
|
|
122
|
+
project_id: projectId,
|
|
123
|
+
type: "decision",
|
|
124
|
+
memory_tier: "episode",
|
|
125
|
+
scope: "project",
|
|
126
|
+
title: `Agent run review — #${input.card.short_id}: ${input.card.title}`,
|
|
127
|
+
content: `Review verdict: ${input.verdict}.\n\n${summary}`,
|
|
128
|
+
metadata,
|
|
129
|
+
importance: 8,
|
|
130
|
+
confidence: clampConfidence(qualityScore),
|
|
131
|
+
tags: ["review", input.verdict, `card:${input.card.short_id}`],
|
|
132
|
+
agent_identifier: "harmony-agent",
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Write one episode entity. Best-effort: any failure is logged and swallowed
|
|
137
|
+
* so the calling pipeline can complete (plan D8: episode writes never block
|
|
138
|
+
* run completion).
|
|
139
|
+
*
|
|
140
|
+
* Returns the entity id on success, or null on swallowed failure.
|
|
141
|
+
*/
|
|
142
|
+
export async function writeEpisode(client, input) {
|
|
143
|
+
const payload = buildEpisodePayload(input, input.card.project_id);
|
|
144
|
+
try {
|
|
145
|
+
const { entity } = await client.createMemoryEntity({
|
|
146
|
+
...payload,
|
|
147
|
+
metadata: payload.metadata,
|
|
148
|
+
});
|
|
149
|
+
const id = entity && typeof entity === "object" && "id" in entity
|
|
150
|
+
? (entity.id ?? null)
|
|
151
|
+
: null;
|
|
152
|
+
log.info(TAG, `episode written for #${input.card.short_id}`, {
|
|
153
|
+
cardId: input.card.id,
|
|
154
|
+
event: "episode_write",
|
|
155
|
+
kind: input.kind,
|
|
156
|
+
});
|
|
157
|
+
return id;
|
|
158
|
+
}
|
|
159
|
+
catch (err) {
|
|
160
|
+
log.warn(TAG, `episode write failed for #${input.card.short_id}`, {
|
|
161
|
+
cardId: input.card.id,
|
|
162
|
+
event: "episode_write_failed",
|
|
163
|
+
kind: input.kind,
|
|
164
|
+
error: err instanceof Error ? err.message : String(err),
|
|
165
|
+
});
|
|
166
|
+
return null;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Find the most recent implement episode for a given card so the review
|
|
171
|
+
* pipeline can back-fill its verdict. Returns null when none exists or the
|
|
172
|
+
* lookup throws — back-fill is best-effort.
|
|
173
|
+
*/
|
|
174
|
+
export async function findLatestImplementEpisode(client, workspaceId, projectId, cardShortId) {
|
|
175
|
+
try {
|
|
176
|
+
const { entities } = await client.harmonyRecall({
|
|
177
|
+
workspaceId,
|
|
178
|
+
projectId,
|
|
179
|
+
type: ["solution", "error"],
|
|
180
|
+
memory_tier: "episode",
|
|
181
|
+
scope: "project",
|
|
182
|
+
tags: [`card:${cardShortId}`],
|
|
183
|
+
topK: 1,
|
|
184
|
+
});
|
|
185
|
+
const first = entities[0];
|
|
186
|
+
if (first &&
|
|
187
|
+
typeof first === "object" &&
|
|
188
|
+
"id" in first &&
|
|
189
|
+
typeof first.id === "string") {
|
|
190
|
+
return first.id;
|
|
191
|
+
}
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
catch (err) {
|
|
195
|
+
log.warn(TAG, "implement-episode lookup failed", {
|
|
196
|
+
event: "episode_lookup_failed",
|
|
197
|
+
cardShortId,
|
|
198
|
+
error: err instanceof Error ? err.message : String(err),
|
|
199
|
+
});
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Apply the review verdict to an earlier implement episode (plan §"Read hook"
|
|
205
|
+
* back-fill block). Approved nudges the original episode's confidence up;
|
|
206
|
+
* rejected tombstones it via superseded_by.
|
|
207
|
+
*/
|
|
208
|
+
export async function backfillReviewVerdict(client, originalEpisodeId, verdict, reviewEpisodeId) {
|
|
209
|
+
try {
|
|
210
|
+
if (verdict === "approved") {
|
|
211
|
+
const { entity } = await client.getMemoryEntity(originalEpisodeId);
|
|
212
|
+
const current = entity?.confidence ?? 0.4;
|
|
213
|
+
const bumped = Math.min(1, current + 0.05);
|
|
214
|
+
await client.updateMemoryEntity(originalEpisodeId, {
|
|
215
|
+
confidence: bumped,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
await client.updateMemoryEntity(originalEpisodeId, {
|
|
220
|
+
superseded_by: reviewEpisodeId,
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
catch (err) {
|
|
225
|
+
log.warn(TAG, "review back-fill failed", {
|
|
226
|
+
event: "episode_backfill_failed",
|
|
227
|
+
originalEpisodeId,
|
|
228
|
+
verdict,
|
|
229
|
+
error: err instanceof Error ? err.message : String(err),
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
}
|
package/dist/pool.js
CHANGED
|
@@ -27,7 +27,7 @@ export class Pool {
|
|
|
27
27
|
const reviewWorkerId = config.poolSize; // offset to avoid ID collision
|
|
28
28
|
this.reviewWorkers.push(new ReviewWorker(reviewWorkerId, config, client, userEmail, () => {
|
|
29
29
|
this.tryDispatchFor(this.reviewWorkers, this.reviewQueue, "review");
|
|
30
|
-
}, stateStore));
|
|
30
|
+
}, stateStore, workspaceId, projectId));
|
|
31
31
|
}
|
|
32
32
|
}
|
|
33
33
|
/**
|
|
@@ -30,6 +30,7 @@ export declare class ProgressTracker {
|
|
|
30
30
|
private lastCost;
|
|
31
31
|
private logBuffer;
|
|
32
32
|
private sessionId;
|
|
33
|
+
private lastAssistantText;
|
|
33
34
|
constructor(client: HarmonyApiClient, cardId: string, workerId: number, subtasks: {
|
|
34
35
|
completed: boolean;
|
|
35
36
|
}[]);
|
|
@@ -48,6 +49,7 @@ export declare class ProgressTracker {
|
|
|
48
49
|
filesRead: number;
|
|
49
50
|
toolCalls: number;
|
|
50
51
|
cost: CostUpdate | null;
|
|
52
|
+
lastAssistantText: string;
|
|
51
53
|
};
|
|
52
54
|
private onToolStart;
|
|
53
55
|
private onToolEnd;
|
package/dist/progress-tracker.js
CHANGED
|
@@ -62,6 +62,9 @@ export class ProgressTracker {
|
|
|
62
62
|
lastCost = null;
|
|
63
63
|
logBuffer = [];
|
|
64
64
|
sessionId = null;
|
|
65
|
+
// Last assistant text block — used by the episode write hook to
|
|
66
|
+
// capture an approach summary without re-running an LLM (plan §"Write hook").
|
|
67
|
+
lastAssistantText = "";
|
|
65
68
|
constructor(client, cardId, workerId, subtasks) {
|
|
66
69
|
this.client = client;
|
|
67
70
|
this.cardId = cardId;
|
|
@@ -129,6 +132,7 @@ export class ProgressTracker {
|
|
|
129
132
|
filesRead: this.filesRead.size,
|
|
130
133
|
toolCalls: this.toolCallCount,
|
|
131
134
|
cost: this.lastCost,
|
|
135
|
+
lastAssistantText: this.lastAssistantText,
|
|
132
136
|
};
|
|
133
137
|
}
|
|
134
138
|
onToolStart(name, input) {
|
|
@@ -205,6 +209,9 @@ export class ProgressTracker {
|
|
|
205
209
|
const trimmed = content.trim();
|
|
206
210
|
if (trimmed.length < 10)
|
|
207
211
|
return;
|
|
212
|
+
// Always remember the latest non-trivial assistant turn for the episode
|
|
213
|
+
// write hook — last-turn trim, no LLM rewrite (plan §"Write hook").
|
|
214
|
+
this.lastAssistantText = trimmed;
|
|
208
215
|
// Extract first sentence or line as a brief description
|
|
209
216
|
const end = trimmed.search(SENTENCE_SPLIT);
|
|
210
217
|
const firstLine = (end === -1 ? trimmed : trimmed.slice(0, end)).trim();
|
package/dist/prompt.d.ts
CHANGED
|
@@ -10,3 +10,9 @@ import type { EnrichedCard } from "./types.js";
|
|
|
10
10
|
* Falls back to a minimal local prompt if the API call fails.
|
|
11
11
|
*/
|
|
12
12
|
export declare function buildPrompt(enriched: EnrichedCard, branchName: string, worktreePath: string, client: HarmonyApiClient, workspaceId: string, projectId?: string): Promise<string>;
|
|
13
|
+
/**
|
|
14
|
+
* Recall similar past episodes (implement solution/error type) and render them
|
|
15
|
+
* as a "Similar past tasks" section. Returns the empty string on no hits or
|
|
16
|
+
* recall failure — never throws.
|
|
17
|
+
*/
|
|
18
|
+
export declare function renderPastEpisodesSection(client: HarmonyApiClient, title: string, description: string, workspaceId: string, projectId?: string): Promise<string>;
|
package/dist/prompt.js
CHANGED
|
@@ -11,6 +11,10 @@ const TAG = "prompt";
|
|
|
11
11
|
*/
|
|
12
12
|
export async function buildPrompt(enriched, branchName, worktreePath, client, workspaceId, projectId) {
|
|
13
13
|
const { card } = enriched;
|
|
14
|
+
// Phase 1.5 read hook: surface similar past episodes for this card. Block
|
|
15
|
+
// on recall — v2 §6.3 budget already caps latency. Errors degrade silently
|
|
16
|
+
// so prompt build always succeeds (plan §"Read hook").
|
|
17
|
+
const pastEpisodesSection = await renderPastEpisodesSection(client, card.title, card.description ?? "", workspaceId, projectId);
|
|
14
18
|
try {
|
|
15
19
|
const result = await client.generateCardPrompt({
|
|
16
20
|
cardId: card.id,
|
|
@@ -22,12 +26,53 @@ Do NOT push to main. All your work stays on \`${branchName}\`.
|
|
|
22
26
|
When finished, call harmony_end_agent_session with status="completed".`,
|
|
23
27
|
});
|
|
24
28
|
log.info(TAG, `Generated prompt for #${card.short_id} — ${result.contextSummary.memoryCount} memories, ${result.tokenEstimate} tokens`);
|
|
25
|
-
return result.prompt;
|
|
29
|
+
return result.prompt + pastEpisodesSection;
|
|
26
30
|
}
|
|
27
31
|
catch (err) {
|
|
28
32
|
const msg = err instanceof Error ? err.message : String(err);
|
|
29
33
|
log.warn(TAG, `Failed to generate prompt via API, using fallback: ${msg}`);
|
|
30
|
-
return buildFallbackPrompt(enriched, branchName, worktreePath)
|
|
34
|
+
return (buildFallbackPrompt(enriched, branchName, worktreePath) +
|
|
35
|
+
pastEpisodesSection);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Recall similar past episodes (implement solution/error type) and render them
|
|
40
|
+
* as a "Similar past tasks" section. Returns the empty string on no hits or
|
|
41
|
+
* recall failure — never throws.
|
|
42
|
+
*/
|
|
43
|
+
export async function renderPastEpisodesSection(client, title, description, workspaceId, projectId) {
|
|
44
|
+
if (!projectId)
|
|
45
|
+
return "";
|
|
46
|
+
try {
|
|
47
|
+
const query = `${title}\n${description}`.trim();
|
|
48
|
+
const { entities } = await client.harmonyRecall({
|
|
49
|
+
workspaceId,
|
|
50
|
+
projectId,
|
|
51
|
+
query,
|
|
52
|
+
type: ["solution", "error"],
|
|
53
|
+
memory_tier: "episode",
|
|
54
|
+
scope: "project",
|
|
55
|
+
topK: 3,
|
|
56
|
+
});
|
|
57
|
+
if (entities.length === 0)
|
|
58
|
+
return "";
|
|
59
|
+
const bullets = entities
|
|
60
|
+
.map((entity) => {
|
|
61
|
+
const e = entity;
|
|
62
|
+
const meta = e.metadata ?? {};
|
|
63
|
+
const outcomeTag = meta.outcome ? `[${meta.outcome}]` : "[?]";
|
|
64
|
+
const approach = meta.approach_summary ?? "";
|
|
65
|
+
return `- ${outcomeTag} ${e.title ?? "(untitled episode)"}\n Approach: ${approach}`;
|
|
66
|
+
})
|
|
67
|
+
.join("\n");
|
|
68
|
+
return `\n\n## Similar past tasks\n${bullets}`;
|
|
69
|
+
}
|
|
70
|
+
catch (err) {
|
|
71
|
+
log.warn(TAG, "past-episodes recall failed", {
|
|
72
|
+
event: "episode_recall_failed",
|
|
73
|
+
error: err instanceof Error ? err.message : String(err),
|
|
74
|
+
});
|
|
75
|
+
return "";
|
|
31
76
|
}
|
|
32
77
|
}
|
|
33
78
|
/**
|
|
@@ -36,4 +36,4 @@ export declare function parseReviewOutput(stdout: string): ReviewResult;
|
|
|
36
36
|
* Handles approved/rejected verdicts, creates subtasks for findings,
|
|
37
37
|
* and moves the card to the appropriate column.
|
|
38
38
|
*/
|
|
39
|
-
export declare function runReviewCompletion(client: HarmonyApiClient, card: Card, result: ReviewResult, config: AgentConfig, worktreePath: string, branchName: string | null, sessionStats?: SessionStats | null, runLogPath?: string | null): Promise<void>;
|
|
39
|
+
export declare function runReviewCompletion(client: HarmonyApiClient, card: Card, result: ReviewResult, config: AgentConfig, worktreePath: string, branchName: string | null, sessionStats?: SessionStats | null, runLogPath?: string | null, workspaceId?: string, agentSessionId?: string | null): Promise<void>;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readFileSync, statSync } from "node:fs";
|
|
2
2
|
import { addLabelByName, moveCardToColumn } from "./board-helpers.js";
|
|
3
3
|
import { buildTokenPayload } from "./completion.js";
|
|
4
|
+
import { backfillReviewVerdict, findLatestImplementEpisode, writeEpisode, } from "./episode-writer.js";
|
|
4
5
|
import { createPullRequest, detectGitProvider, pushBranch } from "./git-pr.js";
|
|
5
6
|
import { log } from "./log.js";
|
|
6
7
|
import { NEED_REVIEW_LABEL, NEED_REVIEW_LABEL_COLOR, } from "./types.js";
|
|
@@ -182,7 +183,7 @@ function stripReviewSummary(description) {
|
|
|
182
183
|
* Handles approved/rejected verdicts, creates subtasks for findings,
|
|
183
184
|
* and moves the card to the appropriate column.
|
|
184
185
|
*/
|
|
185
|
-
export async function runReviewCompletion(client, card, result, config, worktreePath, branchName, sessionStats, runLogPath) {
|
|
186
|
+
export async function runReviewCompletion(client, card, result, config, worktreePath, branchName, sessionStats, runLogPath, workspaceId, agentSessionId) {
|
|
186
187
|
// Re-fetch card for fresh description (avoids stale data from enqueue time)
|
|
187
188
|
let freshDesc;
|
|
188
189
|
try {
|
|
@@ -321,6 +322,24 @@ export async function runReviewCompletion(client, card, result, config, worktree
|
|
|
321
322
|
status: "completed",
|
|
322
323
|
...buildTokenPayload(sessionStats),
|
|
323
324
|
});
|
|
325
|
+
// Max-cycles rejection: the verdict still teaches "this approach kept
|
|
326
|
+
// failing review" — write the episode + back-fill before exiting.
|
|
327
|
+
if (workspaceId) {
|
|
328
|
+
const origId = await findLatestImplementEpisode(client, workspaceId, card.project_id, card.short_id);
|
|
329
|
+
const reviewId = await writeEpisode(client, {
|
|
330
|
+
kind: "review",
|
|
331
|
+
card,
|
|
332
|
+
workspaceId,
|
|
333
|
+
verdict: "rejected",
|
|
334
|
+
summary: `Reached max review cycles (${maxCycles}). ${result.summary}`,
|
|
335
|
+
cost: sessionStats?.cost ?? null,
|
|
336
|
+
agentSessionId: agentSessionId ?? null,
|
|
337
|
+
originalEpisodeId: origId,
|
|
338
|
+
});
|
|
339
|
+
if (origId) {
|
|
340
|
+
await backfillReviewVerdict(client, origId, "rejected", reviewId);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
324
343
|
if (branchName) {
|
|
325
344
|
cleanupWorktree(worktreePath, branchName);
|
|
326
345
|
}
|
|
@@ -388,6 +407,27 @@ export async function runReviewCompletion(client, card, result, config, worktree
|
|
|
388
407
|
});
|
|
389
408
|
log.info(TAG, `#${card.short_id} rejected (cycle ${currentCycle}/${maxCycles}) — moved to "${config.review.failColumn}"`);
|
|
390
409
|
}
|
|
410
|
+
// Episode write + verdict back-fill (Phase 1.5). Runs for approved or
|
|
411
|
+
// rejected verdicts only — "error" verdicts return early above. Best-effort:
|
|
412
|
+
// failures are logged by writeEpisode/backfillReviewVerdict and never block
|
|
413
|
+
// worktree cleanup.
|
|
414
|
+
if (workspaceId &&
|
|
415
|
+
(result.verdict === "approved" || result.verdict === "rejected")) {
|
|
416
|
+
const originalEpisodeId = await findLatestImplementEpisode(client, workspaceId, card.project_id, card.short_id);
|
|
417
|
+
const reviewEpisodeId = await writeEpisode(client, {
|
|
418
|
+
kind: "review",
|
|
419
|
+
card,
|
|
420
|
+
workspaceId,
|
|
421
|
+
verdict: result.verdict,
|
|
422
|
+
summary: result.summary,
|
|
423
|
+
cost: sessionStats?.cost ?? null,
|
|
424
|
+
agentSessionId: agentSessionId ?? null,
|
|
425
|
+
originalEpisodeId,
|
|
426
|
+
});
|
|
427
|
+
if (originalEpisodeId) {
|
|
428
|
+
await backfillReviewVerdict(client, originalEpisodeId, result.verdict, reviewEpisodeId);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
391
431
|
// Cleanup worktree (skip in local mode — no worktree to clean)
|
|
392
432
|
if (branchName) {
|
|
393
433
|
cleanupWorktree(worktreePath, branchName);
|
package/dist/review-worker.d.ts
CHANGED
|
@@ -7,6 +7,7 @@ export declare class ReviewWorker {
|
|
|
7
7
|
private client;
|
|
8
8
|
private onDone;
|
|
9
9
|
private stateStore;
|
|
10
|
+
private workspaceId?;
|
|
10
11
|
id: number;
|
|
11
12
|
state: WorkerState;
|
|
12
13
|
cardId: string | null;
|
|
@@ -22,7 +23,8 @@ export declare class ReviewWorker {
|
|
|
22
23
|
private aborted;
|
|
23
24
|
private runId;
|
|
24
25
|
private lastRunLogPath;
|
|
25
|
-
|
|
26
|
+
private sessionId;
|
|
27
|
+
constructor(id: number, config: AgentConfig, client: HarmonyApiClient, _userEmail: string, onDone: (worker: ReviewWorker) => void, stateStore: StateStore, workspaceId?: string | undefined, _projectId?: string);
|
|
26
28
|
private startHeartbeat;
|
|
27
29
|
private stopHeartbeat;
|
|
28
30
|
private recordPhase;
|
package/dist/review-worker.js
CHANGED
|
@@ -23,6 +23,7 @@ export class ReviewWorker {
|
|
|
23
23
|
client;
|
|
24
24
|
onDone;
|
|
25
25
|
stateStore;
|
|
26
|
+
workspaceId;
|
|
26
27
|
id;
|
|
27
28
|
state = "idle";
|
|
28
29
|
cardId = null;
|
|
@@ -38,11 +39,13 @@ export class ReviewWorker {
|
|
|
38
39
|
aborted = false;
|
|
39
40
|
runId = null;
|
|
40
41
|
lastRunLogPath = null;
|
|
41
|
-
|
|
42
|
+
sessionId = null;
|
|
43
|
+
constructor(id, config, client, _userEmail, onDone, stateStore, workspaceId, _projectId) {
|
|
42
44
|
this.config = config;
|
|
43
45
|
this.client = client;
|
|
44
46
|
this.onDone = onDone;
|
|
45
47
|
this.stateStore = stateStore;
|
|
48
|
+
this.workspaceId = workspaceId;
|
|
46
49
|
this.id = id;
|
|
47
50
|
}
|
|
48
51
|
startHeartbeat() {
|
|
@@ -152,7 +155,7 @@ export class ReviewWorker {
|
|
|
152
155
|
log.info(this.tag, `Review branch: ${this.branchName}`);
|
|
153
156
|
}
|
|
154
157
|
// Start agent session and make it visible on the board
|
|
155
|
-
await this.client.startAgentSession(card.id, {
|
|
158
|
+
const { session: reviewSession } = await this.client.startAgentSession(card.id, {
|
|
156
159
|
agentIdentifier: agentIdentifier(this.id),
|
|
157
160
|
agentName: `${AGENT_NAME} (Review)`,
|
|
158
161
|
status: "working",
|
|
@@ -161,6 +164,12 @@ export class ReviewWorker {
|
|
|
161
164
|
: "Setting up review worktree",
|
|
162
165
|
progressPercent: 5,
|
|
163
166
|
});
|
|
167
|
+
this.sessionId =
|
|
168
|
+
reviewSession &&
|
|
169
|
+
typeof reviewSession === "object" &&
|
|
170
|
+
"id" in reviewSession
|
|
171
|
+
? (reviewSession.id ?? null)
|
|
172
|
+
: null;
|
|
164
173
|
// Fire label addition concurrently with sync worktree checkout
|
|
165
174
|
const labelPromise = addLabelByName(this.client, card, "agent", "#8b5cf6");
|
|
166
175
|
if (!localMode) {
|
|
@@ -281,7 +290,7 @@ export class ReviewWorker {
|
|
|
281
290
|
progressPercent: 80,
|
|
282
291
|
});
|
|
283
292
|
// Run review completion pipeline
|
|
284
|
-
await runReviewCompletion(this.client, card, result, this.config, cwd, this.branchName, sessionStats, this.lastRunLogPath);
|
|
293
|
+
await runReviewCompletion(this.client, card, result, this.config, cwd, this.branchName, sessionStats, this.lastRunLogPath, this.workspaceId, this.sessionId);
|
|
285
294
|
}
|
|
286
295
|
catch (err) {
|
|
287
296
|
this.state = "error";
|
package/dist/types.d.ts
CHANGED
|
@@ -98,3 +98,35 @@ export interface RealtimeCredentials {
|
|
|
98
98
|
supabaseUrl: string;
|
|
99
99
|
supabaseAnonKey: string;
|
|
100
100
|
}
|
|
101
|
+
/** Pipeline that produced an episode. */
|
|
102
|
+
export type EpisodeKind = "implement" | "review";
|
|
103
|
+
/** Outcome of an implement run; review verdict maps to its own type. */
|
|
104
|
+
export type EpisodeOutcome = "success" | "failure";
|
|
105
|
+
/**
|
|
106
|
+
* Structured metadata persisted alongside every episode entity in
|
|
107
|
+
* `knowledge_entities.metadata`. Read by the recall path to render the
|
|
108
|
+
* "Similar past tasks" section in subsequent agent prompts.
|
|
109
|
+
*/
|
|
110
|
+
export interface EpisodeMeta {
|
|
111
|
+
episode_kind: EpisodeKind;
|
|
112
|
+
card_short_id: number;
|
|
113
|
+
card_title: string;
|
|
114
|
+
approach_summary: string;
|
|
115
|
+
outcome: EpisodeOutcome;
|
|
116
|
+
quality_score: number;
|
|
117
|
+
duration_ms: number;
|
|
118
|
+
token_cost: {
|
|
119
|
+
input: number;
|
|
120
|
+
output: number;
|
|
121
|
+
usd: number;
|
|
122
|
+
};
|
|
123
|
+
files_touched: number;
|
|
124
|
+
num_turns: number;
|
|
125
|
+
error?: string;
|
|
126
|
+
/** Provenance only — never used as memory scope. */
|
|
127
|
+
agent_session_id?: string;
|
|
128
|
+
/** Set on back-fill from review pipeline. */
|
|
129
|
+
review_session_id?: string;
|
|
130
|
+
/** Set on review-decision entities so back-fill can find the original. */
|
|
131
|
+
original_episode_id?: string;
|
|
132
|
+
}
|
package/dist/worker.js
CHANGED
|
@@ -195,7 +195,7 @@ export class Worker {
|
|
|
195
195
|
});
|
|
196
196
|
this.state = "completing";
|
|
197
197
|
await this.recordPhase("completing");
|
|
198
|
-
await runCompletion(this.client, card, this.branchName, this.worktreePath, this.config, this.id, this.lastSessionStats);
|
|
198
|
+
await runCompletion(this.client, card, this.branchName, this.worktreePath, this.config, this.id, this.lastSessionStats, this.workspaceId, this.sessionId);
|
|
199
199
|
}
|
|
200
200
|
catch (err) {
|
|
201
201
|
this.state = "error";
|