cclaw-cli 0.48.30 → 0.48.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/artifact-linter.js +609 -10
- package/dist/config.d.ts +1 -1
- package/dist/config.js +82 -4
- package/dist/content/examples.js +23 -6
- package/dist/content/ideate-command.d.ts +6 -2
- package/dist/content/ideate-command.js +43 -16
- package/dist/content/ideate-frames.d.ts +31 -0
- package/dist/content/ideate-frames.js +140 -0
- package/dist/content/ideate-ranking.d.ts +25 -0
- package/dist/content/ideate-ranking.js +65 -0
- package/dist/content/review-loop.d.ts +192 -0
- package/dist/content/review-loop.js +689 -0
- package/dist/content/seed-shelf.d.ts +36 -0
- package/dist/content/seed-shelf.js +236 -0
- package/dist/content/skills.js +84 -67
- package/dist/content/stage-schema.d.ts +1 -1
- package/dist/content/stage-schema.js +14 -2
- package/dist/content/stages/brainstorm.js +15 -4
- package/dist/content/stages/design.js +31 -8
- package/dist/content/stages/schema-types.d.ts +10 -0
- package/dist/content/stages/scope.js +17 -6
- package/dist/content/start-command.js +24 -18
- package/dist/content/templates.js +108 -4
- package/dist/internal/advance-stage.js +143 -1
- package/dist/trace-matrix.d.ts +14 -0
- package/dist/trace-matrix.js +55 -1
- package/dist/types.d.ts +27 -0
- package/package.json +1 -1
|
@@ -0,0 +1,689 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import os from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
export const REVIEW_LOOP_STAGES = ["scope", "design"];
|
|
5
|
+
export const REVIEW_LOOP_DEFAULT_MAX_ITERATIONS = 3;
|
|
6
|
+
export const REVIEW_LOOP_DEFAULT_TARGET_SCORE = 0.8;
|
|
7
|
+
const REVIEW_LOOP_RESPONSE_SCHEMA = `{
|
|
8
|
+
"findings": [
|
|
9
|
+
{
|
|
10
|
+
"id": "F-1",
|
|
11
|
+
"dimensionId": "<one checklist id>",
|
|
12
|
+
"severity": "critical|important|suggestion",
|
|
13
|
+
"summary": "what is wrong",
|
|
14
|
+
"evidence": "artifact quote/path",
|
|
15
|
+
"recommendation": "concrete fix"
|
|
16
|
+
}
|
|
17
|
+
],
|
|
18
|
+
"dimensionScores": [
|
|
19
|
+
{
|
|
20
|
+
"dimensionId": "<one checklist id>",
|
|
21
|
+
"score": 0.0
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}`;
|
|
25
|
+
export const REVIEW_LOOP_CHECKLISTS = {
|
|
26
|
+
scope: [
|
|
27
|
+
{
|
|
28
|
+
id: "premise_fit",
|
|
29
|
+
label: "Premise fit",
|
|
30
|
+
weight: 1,
|
|
31
|
+
guidance: "Does the scope contract solve the actual user/problem framing without drifting into adjacent asks?"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
id: "alternatives_coverage",
|
|
35
|
+
label: "Alternatives coverage",
|
|
36
|
+
weight: 1,
|
|
37
|
+
guidance: "Are meaningful alternatives compared with explicit trade-offs and one clear recommendation?"
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
id: "error_rescue_registry",
|
|
41
|
+
label: "Error and rescue coverage",
|
|
42
|
+
weight: 1,
|
|
43
|
+
guidance: "Does each scoped capability define failure mode, detection signal, and fallback/rescue behavior?"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
id: "scope_creep_risk",
|
|
47
|
+
label: "Scope-creep risk",
|
|
48
|
+
weight: 1,
|
|
49
|
+
guidance: "Are in/out boundaries explicit and protected against silent expansion/reduction language?"
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
id: "completion_status_fidelity",
|
|
53
|
+
label: "Completion status fidelity",
|
|
54
|
+
weight: 1,
|
|
55
|
+
guidance: "Does the completion dashboard honestly report unresolved risks, decision count, and stop reason?"
|
|
56
|
+
}
|
|
57
|
+
],
|
|
58
|
+
design: [
|
|
59
|
+
{
|
|
60
|
+
id: "architecture_fit",
|
|
61
|
+
label: "Architecture fit",
|
|
62
|
+
weight: 1,
|
|
63
|
+
guidance: "Do architecture boundaries and diagrams align with scope and real blast-radius code?"
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
id: "failure_mode_coverage",
|
|
67
|
+
label: "Failure-mode coverage",
|
|
68
|
+
weight: 1,
|
|
69
|
+
guidance: "Does the failure-mode table capture method/exception/rescue/user-visible impact for critical paths?"
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
id: "test_coverage_realism",
|
|
73
|
+
label: "Test coverage realism",
|
|
74
|
+
weight: 1,
|
|
75
|
+
guidance: "Is the proposed test split realistic (unit/integration/e2e) with explicit gap handling?"
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
id: "performance_budget",
|
|
79
|
+
label: "Performance budget",
|
|
80
|
+
weight: 1,
|
|
81
|
+
guidance: "Are critical metrics, thresholds, and measurement methods concrete and enforceable?"
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
id: "observability_adequacy",
|
|
85
|
+
label: "Observability adequacy",
|
|
86
|
+
weight: 1,
|
|
87
|
+
guidance: "Can on-call trace a failure from user symptom to root cause via logs/metrics/traces/alerts?"
|
|
88
|
+
}
|
|
89
|
+
]
|
|
90
|
+
};
|
|
91
|
+
function clampScore(value) {
|
|
92
|
+
if (Number.isNaN(value))
|
|
93
|
+
return 0;
|
|
94
|
+
if (value < 0)
|
|
95
|
+
return 0;
|
|
96
|
+
if (value > 1)
|
|
97
|
+
return 1;
|
|
98
|
+
return value;
|
|
99
|
+
}
|
|
100
|
+
function normalizeBudget(budget) {
|
|
101
|
+
const maxIterations = typeof budget?.maxIterations === "number" && Number.isInteger(budget.maxIterations)
|
|
102
|
+
? Math.min(Math.max(budget.maxIterations, 1), 10)
|
|
103
|
+
: REVIEW_LOOP_DEFAULT_MAX_ITERATIONS;
|
|
104
|
+
const targetScore = typeof budget?.targetScore === "number"
|
|
105
|
+
? clampScore(budget.targetScore)
|
|
106
|
+
: REVIEW_LOOP_DEFAULT_TARGET_SCORE;
|
|
107
|
+
return { maxIterations, targetScore };
|
|
108
|
+
}
|
|
109
|
+
function formatChecklistForPrompt(checklist) {
|
|
110
|
+
return checklist
|
|
111
|
+
.map((dimension, index) => {
|
|
112
|
+
return `${index + 1}. [${dimension.id}] ${dimension.label} (weight=${dimension.weight})\n - ${dimension.guidance}`;
|
|
113
|
+
})
|
|
114
|
+
.join("\n");
|
|
115
|
+
}
|
|
116
|
+
function formatPriorIterationsForPrompt(priorIterations) {
|
|
117
|
+
if (priorIterations.length === 0) {
|
|
118
|
+
return "- none";
|
|
119
|
+
}
|
|
120
|
+
return priorIterations
|
|
121
|
+
.map((row) => {
|
|
122
|
+
return `- iteration ${row.iteration}: score=${row.qualityScore.toFixed(3)}, findings=${row.findingsCount}`;
|
|
123
|
+
})
|
|
124
|
+
.join("\n");
|
|
125
|
+
}
|
|
126
|
+
export function buildOutsideVoiceReviewPrompt(request) {
|
|
127
|
+
return [
|
|
128
|
+
"You are the Outside Voice adversarial reviewer.",
|
|
129
|
+
"Review ONLY the provided artifact markdown and return strict JSON (no prose).",
|
|
130
|
+
"",
|
|
131
|
+
`Stage: ${request.stage}`,
|
|
132
|
+
`Iteration: ${request.iteration}/${request.budget.maxIterations}`,
|
|
133
|
+
`Target quality score: ${request.budget.targetScore}`,
|
|
134
|
+
"",
|
|
135
|
+
"Checklist dimensions:",
|
|
136
|
+
formatChecklistForPrompt(request.checklist),
|
|
137
|
+
"",
|
|
138
|
+
"Prior iterations:",
|
|
139
|
+
formatPriorIterationsForPrompt(request.priorIterations),
|
|
140
|
+
"",
|
|
141
|
+
"Return JSON schema:",
|
|
142
|
+
REVIEW_LOOP_RESPONSE_SCHEMA
|
|
143
|
+
].join("\n");
|
|
144
|
+
}
|
|
145
|
+
export function createOutsideVoiceDispatcher(adapter) {
|
|
146
|
+
return async (request) => {
|
|
147
|
+
return adapter({
|
|
148
|
+
request,
|
|
149
|
+
prompt: buildOutsideVoiceReviewPrompt(request),
|
|
150
|
+
responseSchema: REVIEW_LOOP_RESPONSE_SCHEMA
|
|
151
|
+
});
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
function normalizeSeverity(value) {
|
|
155
|
+
if (typeof value !== "string")
|
|
156
|
+
return "important";
|
|
157
|
+
const normalized = value.trim().toLowerCase();
|
|
158
|
+
if (normalized === "critical")
|
|
159
|
+
return "critical";
|
|
160
|
+
if (normalized === "suggestion")
|
|
161
|
+
return "suggestion";
|
|
162
|
+
return "important";
|
|
163
|
+
}
|
|
164
|
+
function asRecord(value) {
|
|
165
|
+
if (!value || typeof value !== "object" || Array.isArray(value))
|
|
166
|
+
return null;
|
|
167
|
+
return value;
|
|
168
|
+
}
|
|
169
|
+
function parseFindings(value, checklist) {
|
|
170
|
+
if (!Array.isArray(value))
|
|
171
|
+
return [];
|
|
172
|
+
const fallbackDimension = checklist[0]?.id ?? "general";
|
|
173
|
+
const allowedDimensions = new Set(checklist.map((item) => item.id));
|
|
174
|
+
const findings = [];
|
|
175
|
+
value.forEach((raw, index) => {
|
|
176
|
+
const row = asRecord(raw);
|
|
177
|
+
if (!row)
|
|
178
|
+
return;
|
|
179
|
+
const summary = typeof row.summary === "string"
|
|
180
|
+
? row.summary.trim()
|
|
181
|
+
: typeof row.finding === "string"
|
|
182
|
+
? row.finding.trim()
|
|
183
|
+
: "";
|
|
184
|
+
if (summary.length === 0)
|
|
185
|
+
return;
|
|
186
|
+
const requestedDimension = typeof row.dimensionId === "string"
|
|
187
|
+
? row.dimensionId
|
|
188
|
+
: typeof row.dimension === "string"
|
|
189
|
+
? row.dimension
|
|
190
|
+
: fallbackDimension;
|
|
191
|
+
const dimensionId = allowedDimensions.has(requestedDimension)
|
|
192
|
+
? requestedDimension
|
|
193
|
+
: fallbackDimension;
|
|
194
|
+
findings.push({
|
|
195
|
+
id: typeof row.id === "string" && row.id.trim().length > 0
|
|
196
|
+
? row.id.trim()
|
|
197
|
+
: `F-${index + 1}`,
|
|
198
|
+
dimensionId,
|
|
199
|
+
severity: normalizeSeverity(row.severity),
|
|
200
|
+
summary,
|
|
201
|
+
evidence: typeof row.evidence === "string" ? row.evidence : undefined,
|
|
202
|
+
recommendation: typeof row.recommendation === "string" ? row.recommendation : undefined
|
|
203
|
+
});
|
|
204
|
+
});
|
|
205
|
+
return findings;
|
|
206
|
+
}
|
|
207
|
+
function inferDimensionScoresFromFindings(checklist, findings) {
|
|
208
|
+
const byDimension = new Map(checklist.map((dimension) => [dimension.id, 1]));
|
|
209
|
+
for (const finding of findings) {
|
|
210
|
+
const current = byDimension.get(finding.dimensionId) ?? 1;
|
|
211
|
+
const penalty = finding.severity === "critical"
|
|
212
|
+
? 0.4
|
|
213
|
+
: finding.severity === "important"
|
|
214
|
+
? 0.2
|
|
215
|
+
: 0.1;
|
|
216
|
+
byDimension.set(finding.dimensionId, clampScore(current - penalty));
|
|
217
|
+
}
|
|
218
|
+
return checklist.map((dimension) => ({
|
|
219
|
+
dimensionId: dimension.id,
|
|
220
|
+
score: byDimension.get(dimension.id) ?? 0,
|
|
221
|
+
weight: dimension.weight
|
|
222
|
+
}));
|
|
223
|
+
}
|
|
224
|
+
function parseDimensionScores(value, checklist, findings) {
|
|
225
|
+
if (!Array.isArray(value)) {
|
|
226
|
+
return inferDimensionScoresFromFindings(checklist, findings);
|
|
227
|
+
}
|
|
228
|
+
const allowedDimensions = new Set(checklist.map((item) => item.id));
|
|
229
|
+
const parsed = [];
|
|
230
|
+
value.forEach((raw) => {
|
|
231
|
+
const row = asRecord(raw);
|
|
232
|
+
if (!row)
|
|
233
|
+
return;
|
|
234
|
+
const rawDimension = typeof row.dimensionId === "string"
|
|
235
|
+
? row.dimensionId
|
|
236
|
+
: typeof row.dimension === "string"
|
|
237
|
+
? row.dimension
|
|
238
|
+
: "";
|
|
239
|
+
if (!allowedDimensions.has(rawDimension))
|
|
240
|
+
return;
|
|
241
|
+
if (typeof row.score !== "number" || Number.isNaN(row.score))
|
|
242
|
+
return;
|
|
243
|
+
parsed.push({
|
|
244
|
+
dimensionId: rawDimension,
|
|
245
|
+
score: clampScore(row.score),
|
|
246
|
+
weight: typeof row.weight === "number" ? row.weight : undefined,
|
|
247
|
+
rationale: typeof row.rationale === "string" ? row.rationale : undefined
|
|
248
|
+
});
|
|
249
|
+
});
|
|
250
|
+
if (parsed.length === 0) {
|
|
251
|
+
return inferDimensionScoresFromFindings(checklist, findings);
|
|
252
|
+
}
|
|
253
|
+
return parsed;
|
|
254
|
+
}
|
|
255
|
+
function unwrapDispatcherPayload(raw) {
|
|
256
|
+
if (typeof raw === "string") {
|
|
257
|
+
try {
|
|
258
|
+
return JSON.parse(raw);
|
|
259
|
+
}
|
|
260
|
+
catch {
|
|
261
|
+
return {
|
|
262
|
+
findings: [{ summary: raw, severity: "important" }]
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
const record = asRecord(raw);
|
|
267
|
+
if (!record) {
|
|
268
|
+
return raw;
|
|
269
|
+
}
|
|
270
|
+
const payload = asRecord(record.payload);
|
|
271
|
+
if (payload && (Array.isArray(payload.findings) || Array.isArray(payload.dimensionScores))) {
|
|
272
|
+
return payload;
|
|
273
|
+
}
|
|
274
|
+
if (typeof record.output === "string") {
|
|
275
|
+
try {
|
|
276
|
+
return JSON.parse(record.output);
|
|
277
|
+
}
|
|
278
|
+
catch {
|
|
279
|
+
return { findings: [{ summary: record.output, severity: "important" }] };
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
if (typeof record.text === "string") {
|
|
283
|
+
try {
|
|
284
|
+
return JSON.parse(record.text);
|
|
285
|
+
}
|
|
286
|
+
catch {
|
|
287
|
+
return { findings: [{ summary: record.text, severity: "important" }] };
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return raw;
|
|
291
|
+
}
|
|
292
|
+
export function parseReviewLoopDispatcherResult(raw, checklist) {
|
|
293
|
+
const payload = unwrapDispatcherPayload(raw);
|
|
294
|
+
const record = asRecord(payload);
|
|
295
|
+
const findings = parseFindings(record?.findings, checklist);
|
|
296
|
+
const dimensionScores = parseDimensionScores(record?.dimensionScores, checklist, findings);
|
|
297
|
+
return { findings, dimensionScores };
|
|
298
|
+
}
|
|
299
|
+
function normalizeSecondOpinionPolicy(policy) {
|
|
300
|
+
const enabled = policy?.enabled === true;
|
|
301
|
+
const scoreDeltaThreshold = typeof policy?.scoreDeltaThreshold === "number"
|
|
302
|
+
? clampScore(policy.scoreDeltaThreshold)
|
|
303
|
+
: 0.2;
|
|
304
|
+
const modelLabel = typeof policy?.modelLabel === "string" && policy.modelLabel.trim().length > 0
|
|
305
|
+
? policy.modelLabel.trim()
|
|
306
|
+
: undefined;
|
|
307
|
+
return { enabled, scoreDeltaThreshold, modelLabel };
|
|
308
|
+
}
|
|
309
|
+
function dedupeFindings(findings) {
|
|
310
|
+
const seen = new Set();
|
|
311
|
+
const out = [];
|
|
312
|
+
for (const finding of findings) {
|
|
313
|
+
const key = `${finding.dimensionId}:${finding.severity}:${finding.summary.trim().toLowerCase()}`;
|
|
314
|
+
if (seen.has(key))
|
|
315
|
+
continue;
|
|
316
|
+
seen.add(key);
|
|
317
|
+
out.push(finding);
|
|
318
|
+
}
|
|
319
|
+
return out;
|
|
320
|
+
}
|
|
321
|
+
export function mergeSecondOpinionResults(primaryRaw, secondOpinionRaw, checklist, policy) {
|
|
322
|
+
const normalizedPolicy = normalizeSecondOpinionPolicy(policy);
|
|
323
|
+
const primary = parseReviewLoopDispatcherResult(primaryRaw, checklist);
|
|
324
|
+
const secondOpinion = parseReviewLoopDispatcherResult(secondOpinionRaw, checklist);
|
|
325
|
+
const primaryScore = aggregateQualityScore(primary.dimensionScores, checklist);
|
|
326
|
+
const secondOpinionScore = aggregateQualityScore(secondOpinion.dimensionScores, checklist);
|
|
327
|
+
const scoreDelta = Math.abs(primaryScore - secondOpinionScore);
|
|
328
|
+
const byDimension = new Map();
|
|
329
|
+
for (const dimension of checklist) {
|
|
330
|
+
byDimension.set(dimension.id, []);
|
|
331
|
+
}
|
|
332
|
+
for (const row of [...primary.dimensionScores, ...secondOpinion.dimensionScores]) {
|
|
333
|
+
const bucket = byDimension.get(row.dimensionId);
|
|
334
|
+
if (!bucket)
|
|
335
|
+
continue;
|
|
336
|
+
bucket.push(clampScore(row.score));
|
|
337
|
+
}
|
|
338
|
+
const dimensionScores = checklist.map((dimension) => {
|
|
339
|
+
const bucket = byDimension.get(dimension.id) ?? [];
|
|
340
|
+
const average = bucket.length > 0 ? bucket.reduce((sum, score) => sum + score, 0) / bucket.length : 0;
|
|
341
|
+
return {
|
|
342
|
+
dimensionId: dimension.id,
|
|
343
|
+
score: clampScore(average),
|
|
344
|
+
weight: dimension.weight
|
|
345
|
+
};
|
|
346
|
+
});
|
|
347
|
+
const findings = dedupeFindings([...primary.findings, ...secondOpinion.findings]);
|
|
348
|
+
if (scoreDelta >= normalizedPolicy.scoreDeltaThreshold) {
|
|
349
|
+
findings.push({
|
|
350
|
+
id: "F-cross-model-disagreement",
|
|
351
|
+
dimensionId: checklist[0]?.id ?? "general",
|
|
352
|
+
severity: "important",
|
|
353
|
+
summary: "Cross-model second opinion found a meaningful quality-score disagreement that needs explicit disposition.",
|
|
354
|
+
evidence: `primary=${primaryScore.toFixed(3)} secondOpinion=${secondOpinionScore.toFixed(3)} threshold=${normalizedPolicy.scoreDeltaThreshold.toFixed(3)}`,
|
|
355
|
+
recommendation: "Record why the team accepts one view or synthesize both findings before closing the review loop."
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
return {
|
|
359
|
+
findings,
|
|
360
|
+
dimensionScores,
|
|
361
|
+
secondOpinion: {
|
|
362
|
+
enabled: true,
|
|
363
|
+
modelLabel: normalizedPolicy.modelLabel,
|
|
364
|
+
primaryScore,
|
|
365
|
+
secondOpinionScore,
|
|
366
|
+
scoreDelta,
|
|
367
|
+
threshold: normalizedPolicy.scoreDeltaThreshold
|
|
368
|
+
}
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
export function createSecondOpinionDispatcher(args) {
|
|
372
|
+
const normalizedPolicy = normalizeSecondOpinionPolicy(args.policy);
|
|
373
|
+
return async (request) => {
|
|
374
|
+
const primaryRaw = await args.primary(request);
|
|
375
|
+
if (!normalizedPolicy.enabled || !args.secondOpinion) {
|
|
376
|
+
return primaryRaw;
|
|
377
|
+
}
|
|
378
|
+
const secondOpinionRaw = await args.secondOpinion(request);
|
|
379
|
+
return mergeSecondOpinionResults(primaryRaw, secondOpinionRaw, request.checklist, normalizedPolicy);
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
export function aggregateQualityScore(scores, checklist) {
|
|
383
|
+
if (checklist.length === 0)
|
|
384
|
+
return 0;
|
|
385
|
+
const byDimension = new Map(scores.map((row) => [row.dimensionId, row]));
|
|
386
|
+
let weightedScore = 0;
|
|
387
|
+
let totalWeight = 0;
|
|
388
|
+
for (const dimension of checklist) {
|
|
389
|
+
const scoreRow = byDimension.get(dimension.id);
|
|
390
|
+
const score = clampScore(scoreRow?.score ?? 0);
|
|
391
|
+
const weight = typeof scoreRow?.weight === "number" && scoreRow.weight > 0
|
|
392
|
+
? scoreRow.weight
|
|
393
|
+
: dimension.weight;
|
|
394
|
+
totalWeight += weight;
|
|
395
|
+
weightedScore += score * weight;
|
|
396
|
+
}
|
|
397
|
+
if (totalWeight <= 0)
|
|
398
|
+
return 0;
|
|
399
|
+
return clampScore(weightedScore / totalWeight);
|
|
400
|
+
}
|
|
401
|
+
async function materializeArtifactForDispatch(artifactPath, stage, iteration) {
|
|
402
|
+
const markdown = await fs.readFile(artifactPath, "utf8");
|
|
403
|
+
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), `cclaw-review-loop-${stage}-`));
|
|
404
|
+
const tempArtifactPath = path.join(tempDir, `artifact-iteration-${iteration}.md`);
|
|
405
|
+
await fs.writeFile(tempArtifactPath, markdown, "utf8");
|
|
406
|
+
return { tempDir, tempArtifactPath };
|
|
407
|
+
}
|
|
408
|
+
export async function runReviewLoopIteration(input, dispatcher) {
|
|
409
|
+
const checklist = input.checklist ?? REVIEW_LOOP_CHECKLISTS[input.stage];
|
|
410
|
+
const budget = normalizeBudget(input.budget);
|
|
411
|
+
const priorIterations = input.priorIterations ?? [];
|
|
412
|
+
const { tempDir, tempArtifactPath } = await materializeArtifactForDispatch(input.artifactPath, input.stage, input.iteration);
|
|
413
|
+
try {
|
|
414
|
+
const raw = await dispatcher({
|
|
415
|
+
stage: input.stage,
|
|
416
|
+
artifactPath: tempArtifactPath,
|
|
417
|
+
checklist,
|
|
418
|
+
priorIterations,
|
|
419
|
+
iteration: input.iteration,
|
|
420
|
+
budget
|
|
421
|
+
});
|
|
422
|
+
const { findings, dimensionScores } = parseReviewLoopDispatcherResult(raw, checklist);
|
|
423
|
+
const qualityScore = aggregateQualityScore(dimensionScores, checklist);
|
|
424
|
+
return {
|
|
425
|
+
qualityScore,
|
|
426
|
+
findings,
|
|
427
|
+
iteration: input.iteration,
|
|
428
|
+
shouldContinue: qualityScore < budget.targetScore && input.iteration < budget.maxIterations,
|
|
429
|
+
dimensionScores
|
|
430
|
+
};
|
|
431
|
+
}
|
|
432
|
+
finally {
|
|
433
|
+
await fs.rm(tempDir, { recursive: true, force: true });
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
export function buildReviewLoopEnvelope(args) {
|
|
437
|
+
return {
|
|
438
|
+
type: "review-loop",
|
|
439
|
+
version: "1",
|
|
440
|
+
stage: args.stage,
|
|
441
|
+
artifactPath: args.artifactPath,
|
|
442
|
+
targetScore: args.targetScore,
|
|
443
|
+
maxIterations: args.maxIterations,
|
|
444
|
+
stopReason: args.stopReason,
|
|
445
|
+
iterations: [...args.iterations]
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
function formatScore(value) {
|
|
449
|
+
return clampScore(value).toFixed(3);
|
|
450
|
+
}
|
|
451
|
+
function finalEnvelopeScore(envelope) {
|
|
452
|
+
if (envelope.iterations.length === 0)
|
|
453
|
+
return 0;
|
|
454
|
+
return envelope.iterations[envelope.iterations.length - 1].qualityScore;
|
|
455
|
+
}
|
|
456
|
+
export function renderReviewLoopHeader(envelope) {
|
|
457
|
+
return `> Review Loop Quality: ${formatScore(finalEnvelopeScore(envelope))} | stop: ${envelope.stopReason} | iterations: ${envelope.iterations.length}/${envelope.maxIterations}`;
|
|
458
|
+
}
|
|
459
|
+
export function upsertReviewLoopHeader(markdown, envelope) {
|
|
460
|
+
const header = renderReviewLoopHeader(envelope);
|
|
461
|
+
const existingHeader = /^>\s+Review Loop Quality:.*$/m;
|
|
462
|
+
if (existingHeader.test(markdown)) {
|
|
463
|
+
return markdown.replace(existingHeader, header);
|
|
464
|
+
}
|
|
465
|
+
const firstHeading = /^# .+$/m.exec(markdown);
|
|
466
|
+
if (!firstHeading || firstHeading.index < 0) {
|
|
467
|
+
const prefix = markdown.length > 0 ? `${header}\n\n` : `${header}\n`;
|
|
468
|
+
return `${prefix}${markdown}`;
|
|
469
|
+
}
|
|
470
|
+
const headingEnd = firstHeading.index + firstHeading[0].length;
|
|
471
|
+
return `${markdown.slice(0, headingEnd)}\n\n${header}${markdown.slice(headingEnd)}`;
|
|
472
|
+
}
|
|
473
|
+
export function renderReviewLoopSummarySection(envelope) {
|
|
474
|
+
const rows = envelope.iterations.length > 0
|
|
475
|
+
? envelope.iterations
|
|
476
|
+
.map((row) => {
|
|
477
|
+
return `| ${row.iteration} | ${formatScore(row.qualityScore)} | ${row.findingsCount} |`;
|
|
478
|
+
})
|
|
479
|
+
.join("\n")
|
|
480
|
+
: "| 0 | 0.000 | 0 |";
|
|
481
|
+
return `## Spec Review Loop
|
|
482
|
+
| Iteration | Quality Score | Findings |
|
|
483
|
+
|---|---|---|
|
|
484
|
+
${rows}
|
|
485
|
+
|
|
486
|
+
- Stop reason: ${envelope.stopReason}
|
|
487
|
+
- Target score: ${formatScore(envelope.targetScore)}
|
|
488
|
+
- Max iterations: ${envelope.maxIterations}`;
|
|
489
|
+
}
|
|
490
|
+
export function upsertReviewLoopSummary(markdown, envelope) {
|
|
491
|
+
const withHeader = upsertReviewLoopHeader(markdown, envelope);
|
|
492
|
+
const section = renderReviewLoopSummarySection(envelope);
|
|
493
|
+
const headingRe = /^##\s+Spec Review Loop\s*$/m;
|
|
494
|
+
const match = headingRe.exec(withHeader);
|
|
495
|
+
if (!match || match.index < 0) {
|
|
496
|
+
const needsBreak = withHeader.endsWith("\n") ? "" : "\n";
|
|
497
|
+
return `${withHeader}${needsBreak}\n${section}\n`;
|
|
498
|
+
}
|
|
499
|
+
const start = match.index;
|
|
500
|
+
const afterStart = withHeader.slice(start + match[0].length);
|
|
501
|
+
const nextHeading = /\n##\s+/m.exec(afterStart);
|
|
502
|
+
const end = nextHeading ? start + match[0].length + nextHeading.index + 1 : withHeader.length;
|
|
503
|
+
return `${withHeader.slice(0, start)}${section}\n${withHeader.slice(end)}`.replace(/\n{3,}/g, "\n\n");
|
|
504
|
+
}
|
|
505
|
+
function extractH2Section(markdown, heading) {
|
|
506
|
+
const escaped = heading.replace(/[.*+?^${}()|[\]\\]/gu, "\\$&");
|
|
507
|
+
const sectionStartRe = new RegExp(`^##\\s+${escaped}\\s*$`, "mi");
|
|
508
|
+
const startMatch = sectionStartRe.exec(markdown);
|
|
509
|
+
if (!startMatch || startMatch.index < 0) {
|
|
510
|
+
return null;
|
|
511
|
+
}
|
|
512
|
+
const start = startMatch.index + startMatch[0].length;
|
|
513
|
+
const rest = markdown.slice(start);
|
|
514
|
+
const nextHeading = /\n##\s+/m.exec(rest);
|
|
515
|
+
const end = nextHeading ? start + nextHeading.index + 1 : markdown.length;
|
|
516
|
+
return markdown.slice(start, end).trim();
|
|
517
|
+
}
|
|
518
|
+
function normalizeStopReason(value) {
|
|
519
|
+
if (typeof value !== "string")
|
|
520
|
+
return null;
|
|
521
|
+
const normalized = value.trim();
|
|
522
|
+
if (normalized === "quality_threshold_met")
|
|
523
|
+
return "quality_threshold_met";
|
|
524
|
+
if (normalized === "max_iterations_reached")
|
|
525
|
+
return "max_iterations_reached";
|
|
526
|
+
if (normalized === "user_opt_out")
|
|
527
|
+
return "user_opt_out";
|
|
528
|
+
return null;
|
|
529
|
+
}
|
|
530
|
+
function parseIterationsTable(sectionBody) {
|
|
531
|
+
const rows = [];
|
|
532
|
+
const lines = sectionBody.split(/\r?\n/gu);
|
|
533
|
+
for (const line of lines) {
|
|
534
|
+
const trimmed = line.trim();
|
|
535
|
+
if (!trimmed.startsWith("|"))
|
|
536
|
+
continue;
|
|
537
|
+
const cells = trimmed
|
|
538
|
+
.split("|")
|
|
539
|
+
.slice(1, -1)
|
|
540
|
+
.map((cell) => cell.trim());
|
|
541
|
+
if (cells.length < 3)
|
|
542
|
+
continue;
|
|
543
|
+
if (/iteration/iu.test(cells[0] ?? ""))
|
|
544
|
+
continue;
|
|
545
|
+
if (/^-+$/u.test((cells[0] ?? "").replace(/:/gu, "")))
|
|
546
|
+
continue;
|
|
547
|
+
const iteration = Number(cells[0]);
|
|
548
|
+
const qualityScore = Number(cells[1]);
|
|
549
|
+
const findingsCount = Number(cells[2]);
|
|
550
|
+
if (!Number.isInteger(iteration) || iteration < 1)
|
|
551
|
+
continue;
|
|
552
|
+
if (!Number.isFinite(qualityScore))
|
|
553
|
+
continue;
|
|
554
|
+
if (!Number.isInteger(findingsCount) || findingsCount < 0)
|
|
555
|
+
continue;
|
|
556
|
+
rows.push({
|
|
557
|
+
iteration,
|
|
558
|
+
qualityScore: clampScore(qualityScore),
|
|
559
|
+
findingsCount
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
rows.sort((a, b) => a.iteration - b.iteration);
|
|
563
|
+
return rows;
|
|
564
|
+
}
|
|
565
|
+
function parseHeaderMeta(markdown) {
|
|
566
|
+
const match = /^>\s*Review Loop Quality:\s*([0-9]*\.?[0-9]+)\s*\|\s*stop:\s*([a-z_]+)\s*\|\s*iterations:\s*(\d+)\s*\/\s*(\d+)\s*$/mi.exec(markdown);
|
|
567
|
+
if (!match)
|
|
568
|
+
return {};
|
|
569
|
+
const score = Number(match[1]);
|
|
570
|
+
const stopReason = normalizeStopReason(match[2] ?? "");
|
|
571
|
+
const iterations = Number(match[3]);
|
|
572
|
+
const maxIterations = Number(match[4]);
|
|
573
|
+
return {
|
|
574
|
+
score: Number.isFinite(score) ? clampScore(score) : undefined,
|
|
575
|
+
stopReason: stopReason ?? undefined,
|
|
576
|
+
iterations: Number.isInteger(iterations) ? iterations : undefined,
|
|
577
|
+
maxIterations: Number.isInteger(maxIterations) ? maxIterations : undefined
|
|
578
|
+
};
|
|
579
|
+
}
|
|
580
|
+
export function extractReviewLoopEnvelopeFromArtifact(markdown, stage, artifactPath) {
|
|
581
|
+
const sectionBody = extractH2Section(markdown, "Spec Review Loop");
|
|
582
|
+
if (!sectionBody)
|
|
583
|
+
return null;
|
|
584
|
+
const iterations = parseIterationsTable(sectionBody);
|
|
585
|
+
if (iterations.length === 0)
|
|
586
|
+
return null;
|
|
587
|
+
const stopReasonFromSection = normalizeStopReason(/-\s*Stop reason:\s*([a-z_]+)/iu.exec(sectionBody)?.[1]);
|
|
588
|
+
const targetFromSection = Number(/-\s*Target score:\s*([0-9]*\.?[0-9]+)/iu.exec(sectionBody)?.[1] ?? "");
|
|
589
|
+
const maxFromSection = Number(/-\s*Max iterations:\s*(\d+)/iu.exec(sectionBody)?.[1] ?? "");
|
|
590
|
+
const header = parseHeaderMeta(markdown);
|
|
591
|
+
const targetScore = Number.isFinite(targetFromSection)
|
|
592
|
+
? clampScore(targetFromSection)
|
|
593
|
+
: REVIEW_LOOP_DEFAULT_TARGET_SCORE;
|
|
594
|
+
const maxIterationsCandidate = Number.isInteger(maxFromSection) && maxFromSection > 0
|
|
595
|
+
? maxFromSection
|
|
596
|
+
: Number.isInteger(header.maxIterations) && (header.maxIterations ?? 0) > 0
|
|
597
|
+
? header.maxIterations
|
|
598
|
+
: REVIEW_LOOP_DEFAULT_MAX_ITERATIONS;
|
|
599
|
+
const maxIterations = Math.max(maxIterationsCandidate, iterations.length);
|
|
600
|
+
const stopReason = stopReasonFromSection
|
|
601
|
+
?? header.stopReason
|
|
602
|
+
?? (iterations[iterations.length - 1].qualityScore >= targetScore
|
|
603
|
+
? "quality_threshold_met"
|
|
604
|
+
: iterations.length >= maxIterations
|
|
605
|
+
? "max_iterations_reached"
|
|
606
|
+
: "user_opt_out");
|
|
607
|
+
return {
|
|
608
|
+
type: "review-loop",
|
|
609
|
+
version: "1",
|
|
610
|
+
stage,
|
|
611
|
+
artifactPath,
|
|
612
|
+
targetScore,
|
|
613
|
+
maxIterations,
|
|
614
|
+
stopReason,
|
|
615
|
+
iterations
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
export function toSkillEnvelope(envelope, emittedAt = new Date().toISOString(), agent) {
|
|
619
|
+
return {
|
|
620
|
+
version: "1",
|
|
621
|
+
kind: "stage-output",
|
|
622
|
+
stage: envelope.stage,
|
|
623
|
+
payload: envelope,
|
|
624
|
+
emittedAt,
|
|
625
|
+
...(agent ? { agent } : {})
|
|
626
|
+
};
|
|
627
|
+
}
|
|
628
|
+
export async function runReviewLoop(input, options) {
|
|
629
|
+
const budget = normalizeBudget(input.budget);
|
|
630
|
+
const prior = [...(input.priorIterations ?? [])];
|
|
631
|
+
const iterations = [];
|
|
632
|
+
let stopReason = "max_iterations_reached";
|
|
633
|
+
while (iterations.length < budget.maxIterations) {
|
|
634
|
+
if (options.shouldOptOut?.()) {
|
|
635
|
+
stopReason = "user_opt_out";
|
|
636
|
+
break;
|
|
637
|
+
}
|
|
638
|
+
const iteration = prior.length + iterations.length + 1;
|
|
639
|
+
const result = await runReviewLoopIteration({
|
|
640
|
+
...input,
|
|
641
|
+
iteration,
|
|
642
|
+
priorIterations: [
|
|
643
|
+
...prior,
|
|
644
|
+
...iterations.map((row) => ({
|
|
645
|
+
iteration: row.iteration,
|
|
646
|
+
qualityScore: row.qualityScore,
|
|
647
|
+
findingsCount: row.findings.length
|
|
648
|
+
}))
|
|
649
|
+
]
|
|
650
|
+
}, options.dispatcher);
|
|
651
|
+
iterations.push(result);
|
|
652
|
+
await options.applyFindings(result);
|
|
653
|
+
if (result.qualityScore >= budget.targetScore) {
|
|
654
|
+
stopReason = "quality_threshold_met";
|
|
655
|
+
break;
|
|
656
|
+
}
|
|
657
|
+
if (iterations.length >= budget.maxIterations) {
|
|
658
|
+
stopReason = "max_iterations_reached";
|
|
659
|
+
break;
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
const summaryRows = [
|
|
663
|
+
...prior,
|
|
664
|
+
...iterations.map((row) => ({
|
|
665
|
+
iteration: row.iteration,
|
|
666
|
+
qualityScore: row.qualityScore,
|
|
667
|
+
findingsCount: row.findings.length
|
|
668
|
+
}))
|
|
669
|
+
];
|
|
670
|
+
const finalQualityScore = summaryRows.length > 0 ? summaryRows[summaryRows.length - 1].qualityScore : 0;
|
|
671
|
+
const envelope = buildReviewLoopEnvelope({
|
|
672
|
+
stage: input.stage,
|
|
673
|
+
artifactPath: input.artifactPath,
|
|
674
|
+
targetScore: budget.targetScore,
|
|
675
|
+
maxIterations: budget.maxIterations,
|
|
676
|
+
stopReason,
|
|
677
|
+
iterations: summaryRows
|
|
678
|
+
});
|
|
679
|
+
options.emitEnvelope?.(envelope);
|
|
680
|
+
return {
|
|
681
|
+
iterations,
|
|
682
|
+
qualityScore: finalQualityScore,
|
|
683
|
+
stopReason,
|
|
684
|
+
envelope
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
export function isReviewLoopStage(stage) {
|
|
688
|
+
return REVIEW_LOOP_STAGES.includes(stage);
|
|
689
|
+
}
|