@tritard/waterbrother 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/scorecard.js +206 -32
- package/src/workflow.js +9 -6
package/package.json
CHANGED
package/src/scorecard.js
CHANGED
|
@@ -3,7 +3,7 @@ import path from "node:path";
|
|
|
3
3
|
import crypto from "node:crypto";
|
|
4
4
|
|
|
5
5
|
const MAX_INDEX_ENTRIES = 200;
|
|
6
|
-
const MAX_CALIBRATION_CHARS =
|
|
6
|
+
const MAX_CALIBRATION_CHARS = 2000;
|
|
7
7
|
|
|
8
8
|
function scorecardsDir(cwd) {
|
|
9
9
|
return path.join(cwd, ".waterbrother", "memory", "scorecards");
|
|
@@ -56,8 +56,64 @@ function computeUserScore(action) {
|
|
|
56
56
|
return null;
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
// --- Phase 4: Attribution decomposition ---
|
|
60
|
+
// Chain: prompt → plan → execution → verification → sentinel → user
|
|
61
|
+
// Each stage gets a score. Composite is precision-weighted.
|
|
62
|
+
|
|
63
|
+
function computeAttribution({ planQuality, executionQuality, verificationScore, sentinelScore, userScore }) {
|
|
64
|
+
return {
|
|
65
|
+
plan: planQuality !== null ? Math.round(planQuality * 100) / 100 : null,
|
|
66
|
+
execution: executionQuality !== null ? Math.round(executionQuality * 100) / 100 : null,
|
|
67
|
+
verification: verificationScore !== null ? Math.round(verificationScore * 100) / 100 : null,
|
|
68
|
+
sentinel: sentinelScore !== null ? Math.round(sentinelScore * 100) / 100 : null,
|
|
69
|
+
user: userScore !== null ? Math.round(userScore * 100) / 100 : null
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function computePlanQuality({ receipt, sentinel, challenge }) {
|
|
74
|
+
// Plan is good if: files changed match contract scope, no scope leaks flagged
|
|
75
|
+
let score = 0.7; // baseline
|
|
76
|
+
if (sentinel?.verdict === "ship") score += 0.2;
|
|
77
|
+
if (sentinel?.verdict === "block") score -= 0.3;
|
|
78
|
+
// Scope leak = plan was too broad or too narrow
|
|
79
|
+
const scopeLeakConcerns = [...(sentinel?.concerns || []), ...(challenge?.concerns || [])]
|
|
80
|
+
.filter((c) => /scope|outside|unrelated|unnecessary/i.test(c));
|
|
81
|
+
score -= scopeLeakConcerns.length * 0.15;
|
|
82
|
+
return Math.max(0, Math.min(1, score));
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function computeExecutionQuality({ receipt, verification }) {
|
|
86
|
+
// Execution is good if: code compiles, no runtime errors, verification passes
|
|
87
|
+
let score = 0.5; // baseline
|
|
88
|
+
if (Array.isArray(verification) && verification.length > 0) {
|
|
89
|
+
const passRate = verification.filter((v) => v.pass || v.ok).length / verification.length;
|
|
90
|
+
score = passRate;
|
|
91
|
+
}
|
|
92
|
+
// Bonus for clean diff (no empty files, no giant changes)
|
|
93
|
+
if (receipt?.changedFiles?.length > 0 && receipt.changedFiles.length <= 10) score += 0.1;
|
|
94
|
+
return Math.max(0, Math.min(1, score));
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// --- Precision weighting ---
|
|
98
|
+
// Larger changes = more evidence = higher precision
|
|
99
|
+
|
|
100
|
+
function computePrecision(receipt) {
|
|
101
|
+
const fileCount = receipt?.changedFiles?.length || 0;
|
|
102
|
+
if (fileCount === 0) return 0.1;
|
|
103
|
+
if (fileCount <= 2) return 0.5;
|
|
104
|
+
if (fileCount <= 5) return 0.75;
|
|
105
|
+
if (fileCount <= 15) return 1.0;
|
|
106
|
+
return 1.0; // cap at 1.0
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function computeComposite({ verificationScore, sentinelScore, qualityScore, userScore, precision }) {
|
|
110
|
+
// Precision-weighted blend
|
|
111
|
+
const weights = {
|
|
112
|
+
verification: 0.30 * (precision || 0.5),
|
|
113
|
+
sentinel: 0.25 * (precision || 0.5),
|
|
114
|
+
quality: 0.20,
|
|
115
|
+
user: 0.25
|
|
116
|
+
};
|
|
61
117
|
let total = 0;
|
|
62
118
|
let weightSum = 0;
|
|
63
119
|
|
|
@@ -69,7 +125,7 @@ function computeComposite({ verificationScore, sentinelScore, qualityScore, user
|
|
|
69
125
|
return weightSum > 0 ? Math.round((total / weightSum) * 100) / 100 : null;
|
|
70
126
|
}
|
|
71
127
|
|
|
72
|
-
// ---
|
|
128
|
+
// --- Phase 3: Brier scores ---
|
|
73
129
|
|
|
74
130
|
export function computeBrierScores(predictions, outcomes) {
|
|
75
131
|
if (!predictions || !outcomes) return null;
|
|
@@ -90,15 +146,48 @@ export function computeBrierScores(predictions, outcomes) {
|
|
|
90
146
|
scores.userAcceptFirstTry = Math.round(Math.pow(predictions.userAcceptFirstTry - actual, 2) * 1000) / 1000;
|
|
91
147
|
}
|
|
92
148
|
|
|
149
|
+
// Contrarian reward: predicted failure but it shipped clean
|
|
150
|
+
if (predictions.testPass !== undefined && predictions.testPass < 0.5) {
|
|
151
|
+
const actual = outcomes.verification?.every((v) => v.pass || v.ok) ? 1 : 0;
|
|
152
|
+
if (actual === 1) scores.contrarianReward = true;
|
|
153
|
+
}
|
|
154
|
+
|
|
93
155
|
return Object.keys(scores).length > 0 ? scores : null;
|
|
94
156
|
}
|
|
95
157
|
|
|
96
|
-
//
|
|
158
|
+
// Generate predictions from historical data for a scope
|
|
159
|
+
export function generatePredictions(historicalCards) {
|
|
160
|
+
if (!historicalCards || historicalCards.length < 2) return null;
|
|
161
|
+
|
|
162
|
+
const testPassRates = historicalCards
|
|
163
|
+
.map((c) => c.scores.verificationScore)
|
|
164
|
+
.filter((v) => v !== null);
|
|
165
|
+
const sentinelShipRates = historicalCards
|
|
166
|
+
.map((c) => c.scores.sentinelScore)
|
|
167
|
+
.filter((v) => v !== null);
|
|
168
|
+
const userAcceptRates = historicalCards
|
|
169
|
+
.map((c) => c.scores.userScore)
|
|
170
|
+
.filter((v) => v !== null);
|
|
171
|
+
|
|
172
|
+
const avg = (arr) => arr.length > 0 ? arr.reduce((a, b) => a + b, 0) / arr.length : null;
|
|
173
|
+
|
|
174
|
+
const predictions = {};
|
|
175
|
+
const tp = avg(testPassRates);
|
|
176
|
+
const ss = avg(sentinelShipRates);
|
|
177
|
+
const ua = avg(userAcceptRates);
|
|
178
|
+
|
|
179
|
+
if (tp !== null) predictions.testPass = Math.round(tp * 100) / 100;
|
|
180
|
+
if (ss !== null) predictions.sentinelShip = Math.round(ss * 100) / 100;
|
|
181
|
+
if (ua !== null) predictions.userAcceptFirstTry = Math.round(Math.max(0, ua) * 100) / 100;
|
|
182
|
+
|
|
183
|
+
return Object.keys(predictions).length > 0 ? predictions : null;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// --- Scorecard creation ---
|
|
97
187
|
|
|
98
|
-
export function computeScorecard({ task, receipt, qualityFindings, userAction }) {
|
|
188
|
+
export function computeScorecard({ task, receipt, qualityFindings, userAction, predictions }) {
|
|
99
189
|
const id = makeId(task?.name || task?.id || "build");
|
|
100
190
|
|
|
101
|
-
// Extract outcomes from receipt
|
|
102
191
|
const verification = (receipt?.verification || []).map((v) => ({
|
|
103
192
|
command: v.command || v.label || "check",
|
|
104
193
|
pass: Boolean(v.ok)
|
|
@@ -123,14 +212,30 @@ export function computeScorecard({ task, receipt, qualityFindings, userAction })
|
|
|
123
212
|
: []
|
|
124
213
|
};
|
|
125
214
|
|
|
126
|
-
//
|
|
215
|
+
// Scores
|
|
127
216
|
const verificationScore = computeVerificationScore(verification);
|
|
128
217
|
const sentinelScore = computeSentinelScore(sentinel?.verdict);
|
|
129
218
|
const qualityScore = computeQualityScore(warningCount);
|
|
130
219
|
const userScoreVal = computeUserScore(userAction);
|
|
131
|
-
const
|
|
220
|
+
const precision = computePrecision(receipt);
|
|
221
|
+
const composite = computeComposite({ verificationScore, sentinelScore, qualityScore, userScore: userScoreVal, precision });
|
|
222
|
+
|
|
223
|
+
// Attribution (Phase 4)
|
|
224
|
+
const planQuality = computePlanQuality({ receipt, sentinel, challenge });
|
|
225
|
+
const executionQuality = computeExecutionQuality({ receipt, verification });
|
|
226
|
+
const attribution = computeAttribution({
|
|
227
|
+
planQuality,
|
|
228
|
+
executionQuality,
|
|
229
|
+
verificationScore,
|
|
230
|
+
sentinelScore,
|
|
231
|
+
userScore: userScoreVal
|
|
232
|
+
});
|
|
132
233
|
|
|
133
|
-
//
|
|
234
|
+
// Brier (Phase 3)
|
|
235
|
+
const outcomes = { verification, sentinel, challenge, quality, userAction, designReview: receipt?.designReview ? { verdict: receipt.designReview.verdict } : null, experimentDelta: null };
|
|
236
|
+
const brierScores = predictions ? computeBrierScores(predictions, outcomes) : null;
|
|
237
|
+
|
|
238
|
+
// Scope
|
|
134
239
|
const scope = [];
|
|
135
240
|
if (receipt?.changedFiles?.length) {
|
|
136
241
|
const dirs = new Set();
|
|
@@ -148,16 +253,9 @@ export function computeScorecard({ task, receipt, qualityFindings, userAction })
|
|
|
148
253
|
scope,
|
|
149
254
|
approach: task?.chosenOption || null,
|
|
150
255
|
timestamp: new Date().toISOString(),
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
sentinel,
|
|
155
|
-
challenge,
|
|
156
|
-
quality,
|
|
157
|
-
designReview: receipt?.designReview ? { verdict: receipt.designReview.verdict } : null,
|
|
158
|
-
userAction: userAction || null,
|
|
159
|
-
experimentDelta: null
|
|
160
|
-
},
|
|
256
|
+
precision,
|
|
257
|
+
predictions: predictions || null,
|
|
258
|
+
outcomes,
|
|
161
259
|
scores: {
|
|
162
260
|
verificationScore,
|
|
163
261
|
sentinelScore,
|
|
@@ -165,7 +263,8 @@ export function computeScorecard({ task, receipt, qualityFindings, userAction })
|
|
|
165
263
|
userScore: userScoreVal,
|
|
166
264
|
composite
|
|
167
265
|
},
|
|
168
|
-
|
|
266
|
+
attribution,
|
|
267
|
+
brierScores
|
|
169
268
|
};
|
|
170
269
|
}
|
|
171
270
|
|
|
@@ -196,6 +295,7 @@ export async function saveScorecard({ cwd, scorecard }) {
|
|
|
196
295
|
scope: scorecard.scope,
|
|
197
296
|
approach: scorecard.approach,
|
|
198
297
|
composite: scorecard.scores.composite,
|
|
298
|
+
precision: scorecard.precision,
|
|
199
299
|
timestamp: scorecard.timestamp
|
|
200
300
|
});
|
|
201
301
|
if (index.length > MAX_INDEX_ENTRIES) index.length = MAX_INDEX_ENTRIES;
|
|
@@ -247,6 +347,28 @@ export async function loadRecentScorecards({ cwd, limit = 10 }) {
|
|
|
247
347
|
return cards;
|
|
248
348
|
}
|
|
249
349
|
|
|
350
|
+
// --- Scope variance (for autonomy adjustment) ---
|
|
351
|
+
|
|
352
|
+
export function computeScopeVariance(scorecards) {
|
|
353
|
+
if (!scorecards || scorecards.length < 2) return null;
|
|
354
|
+
const composites = scorecards.map((c) => c.scores.composite).filter((v) => v !== null);
|
|
355
|
+
if (composites.length < 2) return null;
|
|
356
|
+
const mean = composites.reduce((a, b) => a + b, 0) / composites.length;
|
|
357
|
+
const variance = composites.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / composites.length;
|
|
358
|
+
return Math.round(variance * 1000) / 1000;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
export function suggestAutonomyForScope(scorecards) {
|
|
362
|
+
if (!scorecards || scorecards.length < 3) return null;
|
|
363
|
+
const avg = scorecards.reduce((sum, c) => sum + (c.scores.composite || 0), 0) / scorecards.length;
|
|
364
|
+
const variance = computeScopeVariance(scorecards);
|
|
365
|
+
// High score + low variance = trust this scope
|
|
366
|
+
if (avg >= 0.8 && (variance === null || variance < 0.05)) return "auto";
|
|
367
|
+
// Low score or high variance = be careful
|
|
368
|
+
if (avg < 0.4 || (variance !== null && variance > 0.15)) return "ask";
|
|
369
|
+
return "scoped";
|
|
370
|
+
}
|
|
371
|
+
|
|
250
372
|
// --- Layer 2: Context injection ---
|
|
251
373
|
|
|
252
374
|
export function buildCalibrationBlock(scorecards) {
|
|
@@ -264,19 +386,50 @@ export function buildCalibrationBlock(scorecards) {
|
|
|
264
386
|
}
|
|
265
387
|
|
|
266
388
|
for (const [approach, cards] of Object.entries(byApproach)) {
|
|
267
|
-
|
|
268
|
-
const
|
|
269
|
-
const
|
|
270
|
-
|
|
389
|
+
// Precision-weighted average
|
|
390
|
+
const totalWeight = cards.reduce((sum, c) => sum + (c.precision || 0.5), 0);
|
|
391
|
+
const weightedAvg = totalWeight > 0
|
|
392
|
+
? cards.reduce((sum, c) => sum + (c.scores.composite || 0) * (c.precision || 0.5), 0) / totalWeight
|
|
393
|
+
: 0;
|
|
394
|
+
const verdicts = cards.map((c) => c.outcomes?.sentinel?.verdict).filter(Boolean);
|
|
395
|
+
const actions = cards.map((c) => c.outcomes?.userAction).filter(Boolean);
|
|
396
|
+
const line = `- ${approach}: weighted avg ${weightedAvg.toFixed(2)} (${verdicts.join(", ")}) → user: ${actions.join(", ")}`;
|
|
271
397
|
if (chars + line.length > MAX_CALIBRATION_CHARS) break;
|
|
272
398
|
lines.push(line);
|
|
273
399
|
chars += line.length;
|
|
274
400
|
}
|
|
275
401
|
|
|
276
|
-
//
|
|
402
|
+
// Attribution insights — where does the system fail?
|
|
403
|
+
const attrCounts = { plan: 0, execution: 0, verification: 0, sentinel: 0, user: 0 };
|
|
404
|
+
const attrSums = { plan: 0, execution: 0, verification: 0, sentinel: 0, user: 0 };
|
|
405
|
+
for (const sc of scorecards) {
|
|
406
|
+
if (!sc.attribution) continue;
|
|
407
|
+
for (const key of Object.keys(attrCounts)) {
|
|
408
|
+
if (sc.attribution[key] !== null && sc.attribution[key] !== undefined) {
|
|
409
|
+
attrCounts[key]++;
|
|
410
|
+
attrSums[key] += sc.attribution[key];
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
const attrAvgs = {};
|
|
415
|
+
for (const key of Object.keys(attrCounts)) {
|
|
416
|
+
if (attrCounts[key] > 0) attrAvgs[key] = attrSums[key] / attrCounts[key];
|
|
417
|
+
}
|
|
418
|
+
// Find weakest stage
|
|
419
|
+
const stages = Object.entries(attrAvgs).sort((a, b) => a[1] - b[1]);
|
|
420
|
+
if (stages.length > 0 && stages[0][1] < 0.6) {
|
|
421
|
+
const weakest = stages[0];
|
|
422
|
+
const attrLine = `Weakest stage: ${weakest[0]} (avg ${weakest[1].toFixed(2)}). Strengthen ${weakest[0] === "plan" ? "planning — add missing error handling, edge cases" : weakest[0] === "execution" ? "execution — check for compile errors, runtime crashes" : weakest[0] === "verification" ? "verification — ensure all tests pass before submitting" : "this stage"}.`;
|
|
423
|
+
if (chars + attrLine.length <= MAX_CALIBRATION_CHARS) {
|
|
424
|
+
lines.push(attrLine);
|
|
425
|
+
chars += attrLine.length;
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// Quality blind spots
|
|
277
430
|
const allFindings = {};
|
|
278
431
|
for (const sc of scorecards) {
|
|
279
|
-
for (const f of (sc.outcomes
|
|
432
|
+
for (const f of (sc.outcomes?.quality?.findings || [])) {
|
|
280
433
|
allFindings[f] = (allFindings[f] || 0) + 1;
|
|
281
434
|
}
|
|
282
435
|
}
|
|
@@ -292,18 +445,32 @@ export function buildCalibrationBlock(scorecards) {
|
|
|
292
445
|
}
|
|
293
446
|
}
|
|
294
447
|
|
|
295
|
-
// Brier calibration
|
|
448
|
+
// Brier calibration (Phase 3)
|
|
296
449
|
const brierCards = scorecards.filter((sc) => sc.brierScores);
|
|
297
450
|
if (brierCards.length >= 3) {
|
|
298
451
|
const avgBrier = brierCards.reduce((sum, sc) => {
|
|
299
|
-
const vals = Object.values(sc.brierScores);
|
|
300
|
-
return sum + (vals.reduce((a, b) => a + b, 0) / vals.length);
|
|
452
|
+
const vals = Object.values(sc.brierScores).filter((v) => typeof v === "number");
|
|
453
|
+
return sum + (vals.length > 0 ? vals.reduce((a, b) => a + b, 0) / vals.length : 0);
|
|
301
454
|
}, 0) / brierCards.length;
|
|
302
|
-
|
|
303
|
-
|
|
455
|
+
const brierLine = avgBrier > 0.3
|
|
456
|
+
? `Calibration warning: avg Brier ${avgBrier.toFixed(2)} — you are overconfident. Lower predictions.`
|
|
457
|
+
: `Calibration: avg Brier ${avgBrier.toFixed(2)} — well calibrated.`;
|
|
458
|
+
lines.push(brierLine);
|
|
459
|
+
|
|
460
|
+
// Contrarian signals
|
|
461
|
+
const contrarianCount = brierCards.filter((sc) => sc.brierScores?.contrarianReward).length;
|
|
462
|
+
if (contrarianCount > 0) {
|
|
463
|
+
lines.push(`Contrarian note: ${contrarianCount} builds succeeded despite low confidence — you may be too conservative on this scope.`);
|
|
304
464
|
}
|
|
305
465
|
}
|
|
306
466
|
|
|
467
|
+
// Scope variance → autonomy suggestion
|
|
468
|
+
const variance = computeScopeVariance(scorecards);
|
|
469
|
+
const suggestedAutonomy = suggestAutonomyForScope(scorecards);
|
|
470
|
+
if (suggestedAutonomy && variance !== null) {
|
|
471
|
+
lines.push(`Scope variance: ${variance.toFixed(3)}. Suggested autonomy: ${suggestedAutonomy}.`);
|
|
472
|
+
}
|
|
473
|
+
|
|
307
474
|
return lines.join("\n");
|
|
308
475
|
}
|
|
309
476
|
|
|
@@ -317,5 +484,12 @@ export function formatScorecardSummary(scorecard) {
|
|
|
317
484
|
if (s.qualityScore !== null) parts.push(`quality:${(s.qualityScore * 100).toFixed(0)}%`);
|
|
318
485
|
if (s.userScore !== null) parts.push(`user:${(s.userScore * 100).toFixed(0)}%`);
|
|
319
486
|
if (s.composite !== null) parts.push(`composite:${(s.composite * 100).toFixed(0)}%`);
|
|
487
|
+
if (scorecard.precision) parts.push(`precision:${scorecard.precision.toFixed(1)}`);
|
|
488
|
+
if (scorecard.attribution) {
|
|
489
|
+
const weakest = Object.entries(scorecard.attribution)
|
|
490
|
+
.filter(([, v]) => v !== null)
|
|
491
|
+
.sort((a, b) => a[1] - b[1])[0];
|
|
492
|
+
if (weakest && weakest[1] < 0.6) parts.push(`weak:${weakest[0]}`);
|
|
493
|
+
}
|
|
320
494
|
return parts.join(" ");
|
|
321
495
|
}
|
package/src/workflow.js
CHANGED
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
} from "./frontend.js";
|
|
17
17
|
import { runPlannerPass, formatPlanForExecutor, formatPlanForDisplay } from "./planner.js";
|
|
18
18
|
import { runVerificationPass, formatVerifierResults, hasFailures } from "./verifier.js";
|
|
19
|
-
import { computeScorecard, saveScorecard, findRelevantScorecards, buildCalibrationBlock } from "./scorecard.js";
|
|
19
|
+
import { computeScorecard, saveScorecard, findRelevantScorecards, buildCalibrationBlock, generatePredictions } from "./scorecard.js";
|
|
20
20
|
|
|
21
21
|
export async function runBuildWorkflow({
|
|
22
22
|
agent,
|
|
@@ -28,14 +28,16 @@ export async function runBuildWorkflow({
|
|
|
28
28
|
if (!task) throw new Error("no active task");
|
|
29
29
|
if (!promptText) throw new Error("build requires a prompt");
|
|
30
30
|
|
|
31
|
-
// Layer 2: Inject calibration from scored memory
|
|
31
|
+
// Layer 2+3: Inject calibration + generate predictions from scored memory
|
|
32
32
|
let calibrationBlock = "";
|
|
33
|
+
let predictions = null;
|
|
33
34
|
try {
|
|
34
35
|
const contractPaths = task.activeContract?.paths || [];
|
|
35
36
|
if (contractPaths.length > 0) {
|
|
36
|
-
const relevantCards = await findRelevantScorecards({ cwd: context.cwd, filePatterns: contractPaths, limit:
|
|
37
|
+
const relevantCards = await findRelevantScorecards({ cwd: context.cwd, filePatterns: contractPaths, limit: 10 });
|
|
37
38
|
if (relevantCards.length > 0) {
|
|
38
39
|
calibrationBlock = buildCalibrationBlock(relevantCards);
|
|
40
|
+
predictions = generatePredictions(relevantCards);
|
|
39
41
|
}
|
|
40
42
|
}
|
|
41
43
|
} catch {}
|
|
@@ -325,15 +327,16 @@ export async function runBuildWorkflow({
|
|
|
325
327
|
context.runtime.lastImpact = impact || null;
|
|
326
328
|
}
|
|
327
329
|
|
|
328
|
-
//
|
|
330
|
+
// Compute and save scorecard with predictions + attribution
|
|
329
331
|
let scorecard = null;
|
|
330
332
|
if (finalReceipt?.mutated) {
|
|
331
333
|
try {
|
|
332
334
|
scorecard = computeScorecard({
|
|
333
335
|
task,
|
|
334
336
|
receipt: finalReceipt,
|
|
335
|
-
qualityFindings: null,
|
|
336
|
-
userAction: null
|
|
337
|
+
qualityFindings: null,
|
|
338
|
+
userAction: null,
|
|
339
|
+
predictions
|
|
337
340
|
});
|
|
338
341
|
await saveScorecard({ cwd: context.cwd, scorecard });
|
|
339
342
|
} catch {}
|