@tritard/waterbrother 0.12.8 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent.js +1 -0
- package/src/cli.js +8 -2
- package/src/scorecard.js +321 -0
- package/src/workflow.js +30 -1
package/package.json
CHANGED
package/src/agent.js
CHANGED
|
@@ -136,6 +136,7 @@ function buildSystemPrompt(profile, experienceMode = "standard", autonomyMode =
|
|
|
136
136
|
ctxLines.push(`Benchmark site-type rules:\n- ${frontend.benchmarkSiteTypeRules.join("\n- ")}`);
|
|
137
137
|
}
|
|
138
138
|
}
|
|
139
|
+
if (executionContext.calibration) ctxLines.push(`Scope calibration (from scored build history):\n${executionContext.calibration}`);
|
|
139
140
|
if (executionContext.reminders) ctxLines.push(`Scope reminders:\n${executionContext.reminders}`);
|
|
140
141
|
if (ctxLines.length > 0) base += `\n\nExecution context:\n${ctxLines.join("\n")}`;
|
|
141
142
|
}
|
package/src/cli.js
CHANGED
|
@@ -33,6 +33,7 @@ import { runBuildWorkflow, startFeatureTask, runChallengeWorkflow } from "./work
|
|
|
33
33
|
import { createPanelRenderer, buildPanelState } from "./panel.js";
|
|
34
34
|
import { deriveTaskNameFromPrompt, nextActionsForState, routeNaturalInput } from "./router.js";
|
|
35
35
|
import { compressEpisode, compressSessionEpisode, saveEpisode, loadRecentEpisodes, findRelevantEpisodes, buildEpisodicMemoryBlock, buildReminderBlock } from "./episodic.js";
|
|
36
|
+
import { formatScorecardSummary } from "./scorecard.js";
|
|
36
37
|
import { createProduct, loadProduct, saveProduct, hasProduct, generateBlueprint, buildProductContext, detectProductRequest, parseProductIntent, addSurface, createCampaign, getActiveCampaign, matchTemplate, applyTemplate, startPreview, killPreview } from "./product.js";
|
|
37
38
|
import { runQualityChecks, formatQualityFindings, buildQualityFixPrompt } from "./quality.js";
|
|
38
39
|
import { scanForInitiatives, formatInitiatives, buildInitiativeFixPrompt } from "./initiative.js";
|
|
@@ -5244,7 +5245,7 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
5244
5245
|
}
|
|
5245
5246
|
|
|
5246
5247
|
// Refine: natural language changes to a built product
|
|
5247
|
-
if (/^(fix these|fix quality|fix initiatives|fix product)$/.test(lower) && !context.runtime.activeTask) {
|
|
5248
|
+
if (/^(fix these|fix quality|fix initiatives|fix product|fix issues|fix the issues|fix it|fix everything|fix all)$/.test(lower) && !context.runtime.activeTask) {
|
|
5248
5249
|
// Only handle product fixes when no task is active — otherwise let router handle "fix these" for task reviews
|
|
5249
5250
|
if (context.runtime.pendingInitiatives?.length > 0) {
|
|
5250
5251
|
const spinner = createProgressSpinner("fixing product gaps...");
|
|
@@ -5270,7 +5271,7 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
5270
5271
|
}
|
|
5271
5272
|
}
|
|
5272
5273
|
|
|
5273
|
-
if (/^(fix these|fix quality)$/.test(lower) && !context.runtime.activeTask) {
|
|
5274
|
+
if (/^(fix these|fix quality|fix issues|fix the issues|fix it|fix everything|fix all)$/.test(lower) && !context.runtime.activeTask) {
|
|
5274
5275
|
const spinner = createProgressSpinner("fixing quality issues...");
|
|
5275
5276
|
try {
|
|
5276
5277
|
const findings = await runQualityChecks({ cwd: context.cwd });
|
|
@@ -6446,6 +6447,11 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
6446
6447
|
lines.push(`${dim("impact:")} ${parts.join(", ")}`);
|
|
6447
6448
|
}
|
|
6448
6449
|
|
|
6450
|
+
// Scorecard
|
|
6451
|
+
if (buildResult.scorecard) {
|
|
6452
|
+
lines.push(`${dim("score:")} ${formatScorecardSummary(buildResult.scorecard)}`);
|
|
6453
|
+
}
|
|
6454
|
+
|
|
6449
6455
|
// Sentinel verdict
|
|
6450
6456
|
if (buildResult.review) {
|
|
6451
6457
|
const v = buildResult.review.verdict;
|
package/src/scorecard.js
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import crypto from "node:crypto";
|
|
4
|
+
|
|
5
|
+
const MAX_INDEX_ENTRIES = 200;
|
|
6
|
+
const MAX_CALIBRATION_CHARS = 1500;
|
|
7
|
+
|
|
8
|
+
function scorecardsDir(cwd) {
|
|
9
|
+
return path.join(cwd, ".waterbrother", "memory", "scorecards");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function indexPath(cwd) {
|
|
13
|
+
return path.join(scorecardsDir(cwd), "index.json");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function scorecardPath(cwd, id) {
|
|
17
|
+
return path.join(scorecardsDir(cwd), `${id}.json`);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function makeId(name) {
|
|
21
|
+
const slug = String(name || "")
|
|
22
|
+
.toLowerCase()
|
|
23
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
24
|
+
.replace(/^-|-$/g, "")
|
|
25
|
+
.slice(0, 40);
|
|
26
|
+
const rand = crypto.randomBytes(3).toString("hex");
|
|
27
|
+
return slug ? `sc_${slug}-${rand}` : `sc_${rand}`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// --- Score computation ---
|
|
31
|
+
|
|
32
|
+
function computeVerificationScore(verification) {
|
|
33
|
+
if (!Array.isArray(verification) || verification.length === 0) return null;
|
|
34
|
+
const passed = verification.filter((v) => v.pass || v.ok).length;
|
|
35
|
+
return passed / verification.length;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function computeSentinelScore(verdict) {
|
|
39
|
+
if (verdict === "ship") return 1.0;
|
|
40
|
+
if (verdict === "caution") return 0.5;
|
|
41
|
+
if (verdict === "block") return 0.0;
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function computeQualityScore(warningCount) {
|
|
46
|
+
if (warningCount === null || warningCount === undefined) return null;
|
|
47
|
+
const maxWarnings = 10;
|
|
48
|
+
return Math.max(0, 1 - (warningCount / maxWarnings));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function computeUserScore(action) {
|
|
52
|
+
if (action === "accepted" || action === "ship it") return 1.0;
|
|
53
|
+
if (action === "fix-these" || action === "fix these") return 0.0;
|
|
54
|
+
if (action === "redo") return -0.5;
|
|
55
|
+
if (action === "challenge") return 0.25;
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function computeComposite({ verificationScore, sentinelScore, qualityScore, userScore }) {
|
|
60
|
+
const weights = { verification: 0.25, sentinel: 0.25, quality: 0.25, user: 0.25 };
|
|
61
|
+
let total = 0;
|
|
62
|
+
let weightSum = 0;
|
|
63
|
+
|
|
64
|
+
if (verificationScore !== null) { total += verificationScore * weights.verification; weightSum += weights.verification; }
|
|
65
|
+
if (sentinelScore !== null) { total += sentinelScore * weights.sentinel; weightSum += weights.sentinel; }
|
|
66
|
+
if (qualityScore !== null) { total += qualityScore * weights.quality; weightSum += weights.quality; }
|
|
67
|
+
if (userScore !== null) { total += userScore * weights.user; weightSum += weights.user; }
|
|
68
|
+
|
|
69
|
+
return weightSum > 0 ? Math.round((total / weightSum) * 100) / 100 : null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// --- Brier score (Layer 3) ---
|
|
73
|
+
|
|
74
|
+
export function computeBrierScores(predictions, outcomes) {
|
|
75
|
+
if (!predictions || !outcomes) return null;
|
|
76
|
+
const scores = {};
|
|
77
|
+
|
|
78
|
+
if (predictions.testPass !== undefined && Array.isArray(outcomes.verification)) {
|
|
79
|
+
const actual = outcomes.verification.every((v) => v.pass || v.ok) ? 1 : 0;
|
|
80
|
+
scores.testPass = Math.round(Math.pow(predictions.testPass - actual, 2) * 1000) / 1000;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (predictions.sentinelShip !== undefined && outcomes.sentinel?.verdict) {
|
|
84
|
+
const actual = outcomes.sentinel.verdict === "ship" ? 1 : 0;
|
|
85
|
+
scores.sentinelShip = Math.round(Math.pow(predictions.sentinelShip - actual, 2) * 1000) / 1000;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (predictions.userAcceptFirstTry !== undefined && outcomes.userAction) {
|
|
89
|
+
const actual = (outcomes.userAction === "accepted" || outcomes.userAction === "ship it") ? 1 : 0;
|
|
90
|
+
scores.userAcceptFirstTry = Math.round(Math.pow(predictions.userAcceptFirstTry - actual, 2) * 1000) / 1000;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return Object.keys(scores).length > 0 ? scores : null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// --- Scorecard creation (Layer 1: passive scoring) ---
|
|
97
|
+
|
|
98
|
+
export function computeScorecard({ task, receipt, qualityFindings, userAction }) {
|
|
99
|
+
const id = makeId(task?.name || task?.id || "build");
|
|
100
|
+
|
|
101
|
+
// Extract outcomes from receipt
|
|
102
|
+
const verification = (receipt?.verification || []).map((v) => ({
|
|
103
|
+
command: v.command || v.label || "check",
|
|
104
|
+
pass: Boolean(v.ok)
|
|
105
|
+
}));
|
|
106
|
+
|
|
107
|
+
const sentinel = receipt?.review
|
|
108
|
+
? { verdict: receipt.review.verdict, concerns: receipt.review.concerns || [] }
|
|
109
|
+
: null;
|
|
110
|
+
|
|
111
|
+
const challenge = receipt?.challenge
|
|
112
|
+
? { concerns: receipt.challenge.concerns || [] }
|
|
113
|
+
: null;
|
|
114
|
+
|
|
115
|
+
const warningCount = Array.isArray(qualityFindings)
|
|
116
|
+
? qualityFindings.filter((f) => f.severity === "warning").length
|
|
117
|
+
: null;
|
|
118
|
+
|
|
119
|
+
const quality = {
|
|
120
|
+
warnings: warningCount || 0,
|
|
121
|
+
findings: Array.isArray(qualityFindings)
|
|
122
|
+
? qualityFindings.filter((f) => f.severity === "warning").map((f) => f.message).slice(0, 5)
|
|
123
|
+
: []
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
// Compute scores
|
|
127
|
+
const verificationScore = computeVerificationScore(verification);
|
|
128
|
+
const sentinelScore = computeSentinelScore(sentinel?.verdict);
|
|
129
|
+
const qualityScore = computeQualityScore(warningCount);
|
|
130
|
+
const userScoreVal = computeUserScore(userAction);
|
|
131
|
+
const composite = computeComposite({ verificationScore, sentinelScore, qualityScore, userScore: userScoreVal });
|
|
132
|
+
|
|
133
|
+
// Derive scope from receipt
|
|
134
|
+
const scope = [];
|
|
135
|
+
if (receipt?.changedFiles?.length) {
|
|
136
|
+
const dirs = new Set();
|
|
137
|
+
for (const f of receipt.changedFiles) {
|
|
138
|
+
const dir = f.replace(/\\/g, "/").split("/").slice(0, -1).join("/");
|
|
139
|
+
if (dir) dirs.add(`${dir}/**`);
|
|
140
|
+
}
|
|
141
|
+
scope.push(...dirs);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
id,
|
|
146
|
+
taskId: task?.id || null,
|
|
147
|
+
taskName: task?.name || null,
|
|
148
|
+
scope,
|
|
149
|
+
approach: task?.chosenOption || null,
|
|
150
|
+
timestamp: new Date().toISOString(),
|
|
151
|
+
predictions: null,
|
|
152
|
+
outcomes: {
|
|
153
|
+
verification,
|
|
154
|
+
sentinel,
|
|
155
|
+
challenge,
|
|
156
|
+
quality,
|
|
157
|
+
designReview: receipt?.designReview ? { verdict: receipt.designReview.verdict } : null,
|
|
158
|
+
userAction: userAction || null,
|
|
159
|
+
experimentDelta: null
|
|
160
|
+
},
|
|
161
|
+
scores: {
|
|
162
|
+
verificationScore,
|
|
163
|
+
sentinelScore,
|
|
164
|
+
qualityScore,
|
|
165
|
+
userScore: userScoreVal,
|
|
166
|
+
composite
|
|
167
|
+
},
|
|
168
|
+
brierScores: null
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// --- Storage ---
|
|
173
|
+
|
|
174
|
+
async function readIndex(cwd) {
|
|
175
|
+
try {
|
|
176
|
+
const raw = await fs.readFile(indexPath(cwd), "utf8");
|
|
177
|
+
return JSON.parse(raw);
|
|
178
|
+
} catch {
|
|
179
|
+
return [];
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async function writeIndex(cwd, index) {
|
|
184
|
+
await fs.mkdir(scorecardsDir(cwd), { recursive: true });
|
|
185
|
+
await fs.writeFile(indexPath(cwd), `${JSON.stringify(index, null, 2)}\n`, "utf8");
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export async function saveScorecard({ cwd, scorecard }) {
|
|
189
|
+
await fs.mkdir(scorecardsDir(cwd), { recursive: true });
|
|
190
|
+
await fs.writeFile(scorecardPath(cwd, scorecard.id), `${JSON.stringify(scorecard, null, 2)}\n`, "utf8");
|
|
191
|
+
|
|
192
|
+
const index = await readIndex(cwd);
|
|
193
|
+
index.unshift({
|
|
194
|
+
id: scorecard.id,
|
|
195
|
+
taskName: scorecard.taskName,
|
|
196
|
+
scope: scorecard.scope,
|
|
197
|
+
approach: scorecard.approach,
|
|
198
|
+
composite: scorecard.scores.composite,
|
|
199
|
+
timestamp: scorecard.timestamp
|
|
200
|
+
});
|
|
201
|
+
if (index.length > MAX_INDEX_ENTRIES) index.length = MAX_INDEX_ENTRIES;
|
|
202
|
+
await writeIndex(cwd, index);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export async function findRelevantScorecards({ cwd, filePatterns = [], limit = 10 }) {
|
|
206
|
+
const index = await readIndex(cwd);
|
|
207
|
+
if (index.length === 0) return [];
|
|
208
|
+
|
|
209
|
+
const queryDirs = filePatterns.map((p) => p.replace(/\/?\*\*$/, "").replace(/\\/g, "/").toLowerCase());
|
|
210
|
+
|
|
211
|
+
const scored = [];
|
|
212
|
+
for (const entry of index) {
|
|
213
|
+
let relevance = 0;
|
|
214
|
+
if (queryDirs.length > 0 && Array.isArray(entry.scope)) {
|
|
215
|
+
for (const s of entry.scope) {
|
|
216
|
+
const sDir = s.replace(/\/?\*\*$/, "").replace(/\\/g, "/").toLowerCase();
|
|
217
|
+
for (const qd of queryDirs) {
|
|
218
|
+
if (sDir.startsWith(qd) || qd.startsWith(sDir)) { relevance += 3; break; }
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
if (relevance > 0) scored.push({ entry, relevance });
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
scored.sort((a, b) => b.relevance - a.relevance || new Date(b.entry.timestamp) - new Date(a.entry.timestamp));
|
|
226
|
+
const top = scored.slice(0, limit);
|
|
227
|
+
|
|
228
|
+
const cards = [];
|
|
229
|
+
for (const { entry } of top) {
|
|
230
|
+
try {
|
|
231
|
+
const raw = await fs.readFile(scorecardPath(cwd, entry.id), "utf8");
|
|
232
|
+
cards.push(JSON.parse(raw));
|
|
233
|
+
} catch {}
|
|
234
|
+
}
|
|
235
|
+
return cards;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
export async function loadRecentScorecards({ cwd, limit = 10 }) {
|
|
239
|
+
const index = await readIndex(cwd);
|
|
240
|
+
const cards = [];
|
|
241
|
+
for (const entry of index.slice(0, limit)) {
|
|
242
|
+
try {
|
|
243
|
+
const raw = await fs.readFile(scorecardPath(cwd, entry.id), "utf8");
|
|
244
|
+
cards.push(JSON.parse(raw));
|
|
245
|
+
} catch {}
|
|
246
|
+
}
|
|
247
|
+
return cards;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// --- Layer 2: Context injection ---
|
|
251
|
+
|
|
252
|
+
export function buildCalibrationBlock(scorecards) {
|
|
253
|
+
if (!scorecards || scorecards.length === 0) return "";
|
|
254
|
+
|
|
255
|
+
const lines = ["Build history for this scope:"];
|
|
256
|
+
let chars = lines[0].length;
|
|
257
|
+
|
|
258
|
+
// Group by approach
|
|
259
|
+
const byApproach = {};
|
|
260
|
+
for (const sc of scorecards) {
|
|
261
|
+
const key = sc.approach || "unknown";
|
|
262
|
+
if (!byApproach[key]) byApproach[key] = [];
|
|
263
|
+
byApproach[key].push(sc);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
for (const [approach, cards] of Object.entries(byApproach)) {
|
|
267
|
+
const avg = cards.reduce((sum, c) => sum + (c.scores.composite || 0), 0) / cards.length;
|
|
268
|
+
const verdicts = cards.map((c) => c.outcomes.sentinel?.verdict).filter(Boolean);
|
|
269
|
+
const actions = cards.map((c) => c.outcomes.userAction).filter(Boolean);
|
|
270
|
+
const line = `- ${approach}: avg score ${avg.toFixed(2)} (${verdicts.join(", ")}) → user: ${actions.join(", ")}`;
|
|
271
|
+
if (chars + line.length > MAX_CALIBRATION_CHARS) break;
|
|
272
|
+
lines.push(line);
|
|
273
|
+
chars += line.length;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Aggregate blind spots
|
|
277
|
+
const allFindings = {};
|
|
278
|
+
for (const sc of scorecards) {
|
|
279
|
+
for (const f of (sc.outcomes.quality?.findings || [])) {
|
|
280
|
+
allFindings[f] = (allFindings[f] || 0) + 1;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
const blindSpots = Object.entries(allFindings)
|
|
284
|
+
.sort((a, b) => b[1] - a[1])
|
|
285
|
+
.slice(0, 3)
|
|
286
|
+
.map(([finding, count]) => `${finding} (${count}x)`);
|
|
287
|
+
|
|
288
|
+
if (blindSpots.length > 0) {
|
|
289
|
+
const bsLine = `Quality blind spots: ${blindSpots.join(", ")}`;
|
|
290
|
+
if (chars + bsLine.length <= MAX_CALIBRATION_CHARS) {
|
|
291
|
+
lines.push(bsLine);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Brier calibration note (Layer 3)
|
|
296
|
+
const brierCards = scorecards.filter((sc) => sc.brierScores);
|
|
297
|
+
if (brierCards.length >= 3) {
|
|
298
|
+
const avgBrier = brierCards.reduce((sum, sc) => {
|
|
299
|
+
const vals = Object.values(sc.brierScores);
|
|
300
|
+
return sum + (vals.reduce((a, b) => a + b, 0) / vals.length);
|
|
301
|
+
}, 0) / brierCards.length;
|
|
302
|
+
if (avgBrier > 0.3) {
|
|
303
|
+
lines.push(`Calibration warning: avg Brier ${avgBrier.toFixed(2)} — lower your confidence estimates.`);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
return lines.join("\n");
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// --- Summary for display ---
|
|
311
|
+
|
|
312
|
+
export function formatScorecardSummary(scorecard) {
|
|
313
|
+
const s = scorecard.scores;
|
|
314
|
+
const parts = [];
|
|
315
|
+
if (s.verificationScore !== null) parts.push(`verify:${(s.verificationScore * 100).toFixed(0)}%`);
|
|
316
|
+
if (s.sentinelScore !== null) parts.push(`sentinel:${(s.sentinelScore * 100).toFixed(0)}%`);
|
|
317
|
+
if (s.qualityScore !== null) parts.push(`quality:${(s.qualityScore * 100).toFixed(0)}%`);
|
|
318
|
+
if (s.userScore !== null) parts.push(`user:${(s.userScore * 100).toFixed(0)}%`);
|
|
319
|
+
if (s.composite !== null) parts.push(`composite:${(s.composite * 100).toFixed(0)}%`);
|
|
320
|
+
return parts.join(" ");
|
|
321
|
+
}
|
package/src/workflow.js
CHANGED
|
@@ -16,6 +16,7 @@ import {
|
|
|
16
16
|
} from "./frontend.js";
|
|
17
17
|
import { runPlannerPass, formatPlanForExecutor, formatPlanForDisplay } from "./planner.js";
|
|
18
18
|
import { runVerificationPass, formatVerifierResults, hasFailures } from "./verifier.js";
|
|
19
|
+
import { computeScorecard, saveScorecard, findRelevantScorecards, buildCalibrationBlock } from "./scorecard.js";
|
|
19
20
|
|
|
20
21
|
export async function runBuildWorkflow({
|
|
21
22
|
agent,
|
|
@@ -27,6 +28,18 @@ export async function runBuildWorkflow({
|
|
|
27
28
|
if (!task) throw new Error("no active task");
|
|
28
29
|
if (!promptText) throw new Error("build requires a prompt");
|
|
29
30
|
|
|
31
|
+
// Layer 2: Inject calibration from scored memory before planning
|
|
32
|
+
let calibrationBlock = "";
|
|
33
|
+
try {
|
|
34
|
+
const contractPaths = task.activeContract?.paths || [];
|
|
35
|
+
if (contractPaths.length > 0) {
|
|
36
|
+
const relevantCards = await findRelevantScorecards({ cwd: context.cwd, filePatterns: contractPaths, limit: 5 });
|
|
37
|
+
if (relevantCards.length > 0) {
|
|
38
|
+
calibrationBlock = buildCalibrationBlock(relevantCards);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
} catch {}
|
|
42
|
+
|
|
30
43
|
// Planner/Executor split: if plannerModel is configured, run planner first
|
|
31
44
|
const plannerModel = context.runtime?.plannerModel;
|
|
32
45
|
let planBlock = "";
|
|
@@ -68,6 +81,7 @@ export async function runBuildWorkflow({
|
|
|
68
81
|
Object.assign(executionCtx, frontendCtx);
|
|
69
82
|
}
|
|
70
83
|
if (planBlock) executionCtx.plan = planBlock;
|
|
84
|
+
if (calibrationBlock) executionCtx.calibration = calibrationBlock;
|
|
71
85
|
agent.setExecutionContext(executionCtx);
|
|
72
86
|
|
|
73
87
|
// Pre-seed contract if task has one
|
|
@@ -311,6 +325,20 @@ export async function runBuildWorkflow({
|
|
|
311
325
|
context.runtime.lastImpact = impact || null;
|
|
312
326
|
}
|
|
313
327
|
|
|
328
|
+
// Layer 1: Compute and save scorecard (passive scoring)
|
|
329
|
+
let scorecard = null;
|
|
330
|
+
if (finalReceipt?.mutated) {
|
|
331
|
+
try {
|
|
332
|
+
scorecard = computeScorecard({
|
|
333
|
+
task,
|
|
334
|
+
receipt: finalReceipt,
|
|
335
|
+
qualityFindings: null, // quality findings come from the caller if available
|
|
336
|
+
userAction: null // populated later when user acts
|
|
337
|
+
});
|
|
338
|
+
await saveScorecard({ cwd: context.cwd, scorecard });
|
|
339
|
+
} catch {}
|
|
340
|
+
}
|
|
341
|
+
|
|
314
342
|
return {
|
|
315
343
|
response,
|
|
316
344
|
receipt: finalReceipt,
|
|
@@ -320,7 +348,8 @@ export async function runBuildWorkflow({
|
|
|
320
348
|
screenshotReview,
|
|
321
349
|
impactSummary: impact ? summarizeImpactMap(impact) : null,
|
|
322
350
|
verifierResults,
|
|
323
|
-
verifierSummary: verifierResults ? formatVerifierResults(verifierResults) : null
|
|
351
|
+
verifierSummary: verifierResults ? formatVerifierResults(verifierResults) : null,
|
|
352
|
+
scorecard
|
|
324
353
|
};
|
|
325
354
|
}
|
|
326
355
|
|