principles-disciple 1.8.2 → 1.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +4 -4
- package/package.json +1 -1
- package/templates/langs/en/skills/ai-sprint-orchestration/EXAMPLES.md +63 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/REFERENCE.md +136 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/SKILL.md +67 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +214 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +107 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +107 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +105 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +108 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/references/workflow-v1-acceptance-checklist.md +58 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/references/workflow-v1.4-work-unit-handoff.md +190 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/archive.mjs +310 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/contract-enforcement.mjs +683 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/decision.mjs +604 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/state-store.mjs +32 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/task-specs.mjs +707 -0
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +3419 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/EXAMPLES.md +63 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/REFERENCE.md +136 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/SKILL.md +67 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +214 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +107 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +107 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +105 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +108 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/workflow-v1-acceptance-checklist.md +58 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/workflow-v1.4-work-unit-handoff.md +190 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/archive.mjs +310 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/contract-enforcement.mjs +683 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/decision.mjs +604 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/state-store.mjs +32 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/task-specs.mjs +707 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +3419 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/test/archive.test.mjs +230 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/test/contract-enforcement.test.mjs +672 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/test/decision.test.mjs +1321 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +1419 -0
|
@@ -0,0 +1,1321 @@
|
|
|
1
|
+
import test from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
import {
|
|
4
|
+
decideStage,
|
|
5
|
+
normalizeVerdict,
|
|
6
|
+
extractBullets,
|
|
7
|
+
buildStageMetrics,
|
|
8
|
+
hasExplicitVerdict,
|
|
9
|
+
parseDimensions,
|
|
10
|
+
checkDimensionThresholds,
|
|
11
|
+
extractContractItems,
|
|
12
|
+
checkContractCompletion,
|
|
13
|
+
buildHandoff,
|
|
14
|
+
extractCodeEvidence,
|
|
15
|
+
hasCodeEvidence,
|
|
16
|
+
extractMacroAnswers,
|
|
17
|
+
} from '../scripts/lib/decision.mjs';
|
|
18
|
+
import { OUTPUT_QUALITY } from '../scripts/lib/contract-enforcement.mjs';
|
|
19
|
+
|
|
20
|
+
test('normalizeVerdict extracts explicit verdict', () => {
|
|
21
|
+
assert.equal(normalizeVerdict('VERDICT: approve'), 'APPROVE');
|
|
22
|
+
assert.equal(normalizeVerdict('VERDICT: BLOCK'), 'BLOCK');
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test('hasExplicitVerdict rejects non-standard verdicts', () => {
|
|
26
|
+
assert.equal(hasExplicitVerdict('VERDICT: APPROVE'), true);
|
|
27
|
+
assert.equal(hasExplicitVerdict('VERDICT: PARTIAL_APPROVE'), false);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test('extractBullets reads bullet lines from section', () => {
|
|
31
|
+
const text = [
|
|
32
|
+
'VERDICT: REVISE',
|
|
33
|
+
'BLOCKERS:',
|
|
34
|
+
'- blocker one',
|
|
35
|
+
'- blocker two',
|
|
36
|
+
'FINDINGS:',
|
|
37
|
+
'- finding',
|
|
38
|
+
].join('\n');
|
|
39
|
+
assert.deepEqual(extractBullets(text, 'BLOCKERS'), ['blocker one', 'blocker two']);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test('decideStage advances only when both reviewers approve', () => {
|
|
43
|
+
const result = decideStage({
|
|
44
|
+
stageCriteria: {
|
|
45
|
+
requiredApprovals: 2,
|
|
46
|
+
requiredProducerSections: ['SUMMARY', 'EVIDENCE'],
|
|
47
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
48
|
+
},
|
|
49
|
+
producer: 'SUMMARY:\nDone\nEVIDENCE:\nDone',
|
|
50
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
51
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
52
|
+
currentRound: 1,
|
|
53
|
+
maxRoundsPerStage: 3,
|
|
54
|
+
skipContractValidation: true,
|
|
55
|
+
});
|
|
56
|
+
assert.equal(result.outcome, 'advance');
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test('decideStage halts when max rounds reached without approval', () => {
|
|
60
|
+
const result = decideStage({
|
|
61
|
+
stageCriteria: {
|
|
62
|
+
requiredApprovals: 2,
|
|
63
|
+
requiredProducerSections: ['SUMMARY'],
|
|
64
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
65
|
+
},
|
|
66
|
+
producer: 'SUMMARY:\nDone',
|
|
67
|
+
reviewerA: 'VERDICT: REVISE\nBLOCKERS:\n- one',
|
|
68
|
+
reviewerB: 'VERDICT: BLOCK\nBLOCKERS:\n- two',
|
|
69
|
+
currentRound: 3,
|
|
70
|
+
maxRoundsPerStage: 3,
|
|
71
|
+
skipContractValidation: true,
|
|
72
|
+
});
|
|
73
|
+
assert.equal(result.outcome, 'halt');
|
|
74
|
+
assert.deepEqual(result.blockers, ['one', 'two']);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test('buildStageMetrics tracks section and approval counts', () => {
|
|
78
|
+
const metrics = buildStageMetrics({
|
|
79
|
+
stageCriteria: {
|
|
80
|
+
requiredProducerSections: ['SUMMARY', 'CHECKS'],
|
|
81
|
+
requiredReviewerSections: ['VERDICT', 'CHECKS'],
|
|
82
|
+
},
|
|
83
|
+
producer: 'SUMMARY:\nDone\nCHECKS: evidence=ok',
|
|
84
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nCHECKS: criteria=met',
|
|
85
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nCHECKS: criteria=met',
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
assert.equal(metrics.approvalCount, 2);
|
|
89
|
+
assert.equal(metrics.producerSectionChecks.SUMMARY, true);
|
|
90
|
+
assert.equal(metrics.producerSectionChecks.CHECKS, true);
|
|
91
|
+
assert.equal(metrics.reviewerSectionChecks.VERDICT, true);
|
|
92
|
+
assert.equal(metrics.reviewerSectionChecks.CHECKS, true);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test('decideStage does not advance with invalid reviewer verdict syntax', () => {
|
|
96
|
+
const result = decideStage({
|
|
97
|
+
stageCriteria: {
|
|
98
|
+
requiredApprovals: 2,
|
|
99
|
+
requiredProducerSections: ['SUMMARY'],
|
|
100
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
101
|
+
},
|
|
102
|
+
producer: 'SUMMARY:\nDone',
|
|
103
|
+
reviewerA: 'VERDICT: PARTIAL_APPROVE\nBLOCKERS:\n- None.',
|
|
104
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
105
|
+
currentRound: 1,
|
|
106
|
+
maxRoundsPerStage: 3,
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
assert.equal(result.outcome, 'revise');
|
|
110
|
+
assert.equal(result.blockers[0], 'Reviewer A did not emit a strict VERDICT: APPROVE|REVISE|BLOCK line.');
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test('decideStage does not advance when reviewers list real blockers despite APPROVE', () => {
|
|
114
|
+
const result = decideStage({
|
|
115
|
+
stageCriteria: {
|
|
116
|
+
requiredApprovals: 2,
|
|
117
|
+
requiredProducerSections: ['SUMMARY'],
|
|
118
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
119
|
+
},
|
|
120
|
+
producer: 'SUMMARY:\nDone',
|
|
121
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- Missing test for edge case',
|
|
122
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
123
|
+
currentRound: 1,
|
|
124
|
+
maxRoundsPerStage: 3,
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
assert.equal(result.outcome, 'revise');
|
|
128
|
+
assert.equal(result.metrics.blockerCount, 1);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// --- Multi-dimensional scoring tests ---
|
|
132
|
+
|
|
133
|
+
test('parseDimensions extracts key=value pairs from DIMENSIONS line', () => {
|
|
134
|
+
const text = 'VERDICT: APPROVE\nDIMENSIONS: evidence_quality=4;hypothesis_coverage=3;root_cause_confidence=5\nBLOCKERS:\n- None.';
|
|
135
|
+
const dims = parseDimensions(text);
|
|
136
|
+
assert.deepEqual(dims, {
|
|
137
|
+
evidence_quality: 4,
|
|
138
|
+
hypothesis_coverage: 3,
|
|
139
|
+
root_cause_confidence: 5,
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
test('parseDimensions returns empty object when no DIMENSIONS line', () => {
|
|
144
|
+
assert.deepEqual(parseDimensions('VERDICT: APPROVE'), {});
|
|
145
|
+
assert.deepEqual(parseDimensions(''), {});
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
test('parseDimensions ignores malformed entries', () => {
|
|
149
|
+
const text = 'DIMENSIONS: valid=3;no_equals_sign;also_valid=4;bad=not_a_number';
|
|
150
|
+
const dims = parseDimensions(text);
|
|
151
|
+
assert.deepEqual(dims, { valid: 3, also_valid: 4 });
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
test('checkDimensionThresholds passes when all dimensions meet threshold', () => {
|
|
155
|
+
const scores = { evidence_quality: 4, scope_control: 5 };
|
|
156
|
+
const result = checkDimensionThresholds(scores, ['evidence_quality', 'scope_control'], 3);
|
|
157
|
+
assert.equal(result.failures.length, 0);
|
|
158
|
+
assert.equal(result.checks.evidence_quality, true);
|
|
159
|
+
assert.equal(result.checks.scope_control, true);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
test('checkDimensionThresholds reports failures for below-threshold scores', () => {
|
|
163
|
+
const scores = { evidence_quality: 2, scope_control: 4 };
|
|
164
|
+
const result = checkDimensionThresholds(scores, ['evidence_quality', 'scope_control'], 3);
|
|
165
|
+
assert.equal(result.failures.length, 1);
|
|
166
|
+
assert.equal(result.checks.evidence_quality, false);
|
|
167
|
+
assert.equal(result.checks.scope_control, true);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test('checkDimensionThresholds reports failure when dimension not scored', () => {
|
|
171
|
+
const scores = { evidence_quality: 4 };
|
|
172
|
+
const result = checkDimensionThresholds(scores, ['evidence_quality', 'missing_dim'], 3);
|
|
173
|
+
assert.equal(result.failures.length, 1);
|
|
174
|
+
assert.equal(result.checks.evidence_quality, true);
|
|
175
|
+
assert.equal(result.checks.missing_dim, null);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
test('decideStage advances when dimension scores below threshold but reviewers approve', () => {
|
|
179
|
+
const result = decideStage({
|
|
180
|
+
stageCriteria: {
|
|
181
|
+
requiredApprovals: 2,
|
|
182
|
+
requiredProducerSections: ['SUMMARY'],
|
|
183
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
184
|
+
scoringDimensions: ['evidence_quality', 'scope_control'],
|
|
185
|
+
dimensionThreshold: 3,
|
|
186
|
+
},
|
|
187
|
+
producer: 'SUMMARY:\nDone\nCHANGES:\nNone\nEVIDENCE:\nFound\nCODE_EVIDENCE:\nfiles_checked: a.ts\nKEY_EVENTS:\nEvent1\nHYPOTHESIS_MATRIX:\nH1: SUPPORTED\nCHECKS: all=ok\nOPEN_RISKS:\nNone',
|
|
188
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nFINDINGS:\nGood\nCODE_EVIDENCE:\nfiles_verified: a.ts\nHYPOTHESIS_MATRIX:\nH1: SUPPORTED\nNEXT_FOCUS:\nNone\nCHECKS: all=ok\nDIMENSIONS: evidence_quality=2;scope_control=4',
|
|
189
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nFINDINGS:\nGood\nCODE_EVIDENCE:\nfiles_verified: a.ts\nHYPOTHESIS_MATRIX:\nH1: SUPPORTED\nNEXT_FOCUS:\nNone\nCHECKS: all=ok\nDIMENSIONS: evidence_quality=4;scope_control=5',
|
|
190
|
+
currentRound: 1,
|
|
191
|
+
maxRoundsPerStage: 3,
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
// Dimension failures no longer block advance — they are subjective judgments.
|
|
195
|
+
// Both reviewers APPROVE → advance. Low dimensions affect outputQuality.
|
|
196
|
+
assert.equal(result.outcome, 'advance');
|
|
197
|
+
assert.ok(result.metrics.dimensionFailures.length > 0, 'dimension failures should still be recorded');
|
|
198
|
+
assert.equal(result.outputQuality, 'needs_work', 'low dimensions downgrade quality');
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
test('decideStage advances when all dimension scores meet threshold', () => {
|
|
202
|
+
const result = decideStage({
|
|
203
|
+
stageCriteria: {
|
|
204
|
+
requiredApprovals: 2,
|
|
205
|
+
requiredProducerSections: ['SUMMARY'],
|
|
206
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
207
|
+
scoringDimensions: ['evidence_quality', 'scope_control'],
|
|
208
|
+
dimensionThreshold: 3,
|
|
209
|
+
},
|
|
210
|
+
producer: 'SUMMARY:\nDone',
|
|
211
|
+
reviewerA: 'VERDICT: APPROVE\nDIMENSIONS: evidence_quality=4;scope_control=5\nBLOCKERS:\n- None.',
|
|
212
|
+
reviewerB: 'VERDICT: APPROVE\nDIMENSIONS: evidence_quality=5;scope_control=4\nBLOCKERS:\n- None.',
|
|
213
|
+
currentRound: 1,
|
|
214
|
+
maxRoundsPerStage: 3,
|
|
215
|
+
skipContractValidation: true,
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
assert.equal(result.outcome, 'advance');
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
test('buildStageMetrics includes dimension scores and checks', () => {
|
|
222
|
+
const metrics = buildStageMetrics({
|
|
223
|
+
stageCriteria: {
|
|
224
|
+
requiredProducerSections: ['SUMMARY'],
|
|
225
|
+
requiredReviewerSections: ['VERDICT'],
|
|
226
|
+
scoringDimensions: ['correctness'],
|
|
227
|
+
dimensionThreshold: 3,
|
|
228
|
+
},
|
|
229
|
+
producer: 'SUMMARY:\nDone',
|
|
230
|
+
reviewerA: 'VERDICT: APPROVE\nDIMENSIONS: correctness=4',
|
|
231
|
+
reviewerB: 'VERDICT: APPROVE\nDIMENSIONS: correctness=5',
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
assert.deepEqual(metrics.reviewerADimensions, { correctness: 4 });
|
|
235
|
+
assert.deepEqual(metrics.reviewerBDimensions, { correctness: 5 });
|
|
236
|
+
assert.equal(metrics.dimensionFailures.length, 0);
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
// --- Sprint contract tests ---
|
|
240
|
+
|
|
241
|
+
test('extractContractItems parses CONTRACT section', () => {
|
|
242
|
+
const text = [
|
|
243
|
+
'SUMMARY:\nDone',
|
|
244
|
+
'CONTRACT:',
|
|
245
|
+
'- Root cause identified with evidence status: DONE evidence: "see EVIDENCE"',
|
|
246
|
+
'- Reproduction steps documented status: PARTIAL evidence: "partial"',
|
|
247
|
+
'- Fix proposed status: TODO',
|
|
248
|
+
].join('\n');
|
|
249
|
+
|
|
250
|
+
const items = extractContractItems(text);
|
|
251
|
+
assert.equal(items.length, 3);
|
|
252
|
+
assert.equal(items[0].status, 'DONE');
|
|
253
|
+
assert.equal(items[1].status, 'PARTIAL');
|
|
254
|
+
assert.equal(items[2].status, 'TODO');
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
test('extractContractItems returns empty array when no CONTRACT section', () => {
|
|
258
|
+
assert.deepEqual(extractContractItems('SUMMARY:\nDone'), []);
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
test('checkContractCompletion reports all done', () => {
|
|
262
|
+
const items = [
|
|
263
|
+
{ deliverable: 'Root cause', status: 'DONE' },
|
|
264
|
+
{ deliverable: 'Fix plan', status: 'DONE' },
|
|
265
|
+
];
|
|
266
|
+
const result = checkContractCompletion(items);
|
|
267
|
+
assert.equal(result.allDone, true);
|
|
268
|
+
assert.equal(result.doneItems, 2);
|
|
269
|
+
assert.equal(result.incompleteItems.length, 0);
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
test('checkContractCompletion reports incomplete items', () => {
|
|
273
|
+
const items = [
|
|
274
|
+
{ deliverable: 'Root cause', status: 'DONE' },
|
|
275
|
+
{ deliverable: 'Fix plan', status: 'PARTIAL' },
|
|
276
|
+
];
|
|
277
|
+
const result = checkContractCompletion(items);
|
|
278
|
+
assert.equal(result.allDone, false);
|
|
279
|
+
assert.equal(result.incompleteItems.length, 1);
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
test('decideStage does not advance when contract items are incomplete', () => {
|
|
283
|
+
const result = decideStage({
|
|
284
|
+
stageCriteria: {
|
|
285
|
+
requiredApprovals: 2,
|
|
286
|
+
requiredProducerSections: ['SUMMARY'],
|
|
287
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
288
|
+
requiredDeliverables: ['root_cause', 'fix_plan'],
|
|
289
|
+
},
|
|
290
|
+
producer: 'SUMMARY:\nDone\nCONTRACT:\n- Root cause identified status: DONE\n- Fix plan written status: PARTIAL',
|
|
291
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
292
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
293
|
+
currentRound: 1,
|
|
294
|
+
maxRoundsPerStage: 3,
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
assert.equal(result.outcome, 'revise');
|
|
298
|
+
assert.ok(result.blockers.some((b) => b.includes('Contract not fulfilled')));
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
test('decideStage advances when all contract items are DONE', () => {
|
|
302
|
+
const result = decideStage({
|
|
303
|
+
stageCriteria: {
|
|
304
|
+
requiredApprovals: 2,
|
|
305
|
+
requiredProducerSections: ['SUMMARY'],
|
|
306
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
307
|
+
requiredDeliverables: ['root_cause'],
|
|
308
|
+
},
|
|
309
|
+
producer: 'SUMMARY:\nDone\nCONTRACT:\n- Root cause identified status: DONE',
|
|
310
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
311
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
312
|
+
currentRound: 1,
|
|
313
|
+
maxRoundsPerStage: 3,
|
|
314
|
+
skipContractValidation: true,
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
assert.equal(result.outcome, 'advance');
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
// --- Structured handoff tests ---
|
|
321
|
+
|
|
322
|
+
test('buildHandoff extracts structured data from reviewer reports', () => {
|
|
323
|
+
const handoff = buildHandoff({
|
|
324
|
+
reviewerA: 'VERDICT: REVISE\nBLOCKERS:\n- Missing edge case\nNEXT_FOCUS: Add test for null input\nDIMENSIONS: correctness=2;scope=4',
|
|
325
|
+
reviewerB: 'VERDICT: REVISE\nBLOCKERS:\n- No error handling\nNEXT_FOCUS: Handle errors gracefully\nDIMENSIONS: correctness=3;scope=5',
|
|
326
|
+
producer: 'SUMMARY:\nDone\nCHECKS: evidence=ok\nCONTRACT:\n- Root cause status: DONE\n- Fix status: PARTIAL',
|
|
327
|
+
metrics: {
|
|
328
|
+
reviewerADimensions: { correctness: 2, scope: 4 },
|
|
329
|
+
reviewerBDimensions: { correctness: 3, scope: 5 },
|
|
330
|
+
},
|
|
331
|
+
stageName: 'implement',
|
|
332
|
+
round: 1,
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
assert.deepEqual(handoff.blockers, ['Missing edge case', 'No error handling']);
|
|
336
|
+
assert.equal(handoff.focusForNextRound, 'Add test for null input; Handle errors gracefully');
|
|
337
|
+
assert.equal(handoff.producerChecks, 'evidence=ok');
|
|
338
|
+
assert.deepEqual(handoff.contractItems.length, 2);
|
|
339
|
+
assert.equal(handoff.stageName, 'implement');
|
|
340
|
+
assert.equal(handoff.round, 1);
|
|
341
|
+
assert.ok(handoff.generatedAt);
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
test('buildHandoff handles missing NEXT_FOCUS gracefully', () => {
|
|
345
|
+
const handoff = buildHandoff({
|
|
346
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
347
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
348
|
+
producer: 'SUMMARY:\nDone',
|
|
349
|
+
metrics: {},
|
|
350
|
+
stageName: 'verify',
|
|
351
|
+
round: 2,
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
assert.equal(handoff.focusForNextRound, null);
|
|
355
|
+
assert.deepEqual(handoff.blockers, []);
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
// --- Combined: dimensions + contract + blockers ---
|
|
359
|
+
|
|
360
|
+
test('decideStage with dimensions, contract, and blockers all passing advances', () => {
|
|
361
|
+
const result = decideStage({
|
|
362
|
+
stageCriteria: {
|
|
363
|
+
requiredApprovals: 2,
|
|
364
|
+
requiredProducerSections: ['SUMMARY'],
|
|
365
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
366
|
+
scoringDimensions: ['correctness', 'scope_control'],
|
|
367
|
+
dimensionThreshold: 3,
|
|
368
|
+
requiredDeliverables: ['root_cause'],
|
|
369
|
+
},
|
|
370
|
+
producer: 'SUMMARY:\nDone\nCONTRACT:\n- Root cause found status: DONE',
|
|
371
|
+
reviewerA: 'VERDICT: APPROVE\nDIMENSIONS: correctness=4;scope_control=5\nBLOCKERS:\n- None.',
|
|
372
|
+
reviewerB: 'VERDICT: APPROVE\nDIMENSIONS: correctness=5;scope_control=4\nBLOCKERS:\n- None.',
|
|
373
|
+
currentRound: 1,
|
|
374
|
+
maxRoundsPerStage: 3,
|
|
375
|
+
skipContractValidation: true,
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
assert.equal(result.outcome, 'advance');
|
|
379
|
+
assert.equal(result.metrics.dimensionFailures.length, 0);
|
|
380
|
+
assert.equal(result.metrics.contractCheck.allDone, true);
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
test('decideStage with dimension failure AND contract failure — contract blocks but dimensions do not', () => {
|
|
384
|
+
const result = decideStage({
|
|
385
|
+
stageCriteria: {
|
|
386
|
+
requiredApprovals: 2,
|
|
387
|
+
requiredProducerSections: ['SUMMARY'],
|
|
388
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
389
|
+
scoringDimensions: ['correctness'],
|
|
390
|
+
dimensionThreshold: 3,
|
|
391
|
+
requiredDeliverables: ['fix'],
|
|
392
|
+
},
|
|
393
|
+
producer: 'SUMMARY:\nDone\nCONTRACT:\n- Fix implemented status: TODO',
|
|
394
|
+
reviewerA: 'VERDICT: APPROVE\nDIMENSIONS: correctness=2\nBLOCKERS:\n- None.',
|
|
395
|
+
reviewerB: 'VERDICT: APPROVE\nDIMENSIONS: correctness=5\nBLOCKERS:\n- None.',
|
|
396
|
+
currentRound: 1,
|
|
397
|
+
maxRoundsPerStage: 3,
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
// Dimension failures no longer block advance. Contract failures still do.
|
|
401
|
+
assert.equal(result.outcome, 'revise');
|
|
402
|
+
const hasContractBlocker = result.blockers.some((b) => b.includes('Contract not fulfilled'));
|
|
403
|
+
assert.ok(hasContractBlocker, 'should have contract blocker');
|
|
404
|
+
assert.ok(result.metrics.dimensionFailures.length > 0, 'dimension failures should still be recorded');
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
// --- CODE_EVIDENCE tests ---
|
|
408
|
+
|
|
409
|
+
test('extractCodeEvidence parses producer CODE_EVIDENCE section', () => {
|
|
410
|
+
const text = [
|
|
411
|
+
'SUMMARY:\nDone',
|
|
412
|
+
'CODE_EVIDENCE:',
|
|
413
|
+
'- files_checked: [src/observer.js, src/persistence.ts]',
|
|
414
|
+
'- evidence_source: local',
|
|
415
|
+
'- sha: abc123def',
|
|
416
|
+
'- branch/worktree: sprint/abc123/investigate',
|
|
417
|
+
'FINDINGS:\nNone.',
|
|
418
|
+
].join('\n');
|
|
419
|
+
const evidence = extractCodeEvidence(text);
|
|
420
|
+
assert.deepEqual(evidence.filesChecked, ['src/observer.js', 'src/persistence.ts']);
|
|
421
|
+
assert.equal(evidence.evidenceSource, 'local');
|
|
422
|
+
assert.equal(evidence.sha, 'abc123def');
|
|
423
|
+
assert.equal(evidence.branchWorktree, 'sprint/abc123/investigate');
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
test('extractCodeEvidence parses reviewer CODE_EVIDENCE section', () => {
|
|
427
|
+
const text = [
|
|
428
|
+
'VERDICT: APPROVE',
|
|
429
|
+
'CODE_EVIDENCE:',
|
|
430
|
+
'- files_verified: [src/fix.ts, src/test.ts]',
|
|
431
|
+
'- evidence_source: both',
|
|
432
|
+
'- sha: fed123',
|
|
433
|
+
].join('\n');
|
|
434
|
+
const evidence = extractCodeEvidence(text);
|
|
435
|
+
assert.deepEqual(evidence.filesChecked, ['src/fix.ts', 'src/test.ts']);
|
|
436
|
+
assert.equal(evidence.evidenceSource, 'both');
|
|
437
|
+
assert.equal(evidence.sha, 'fed123');
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
test('extractCodeEvidence returns null when no CODE_EVIDENCE section', () => {
|
|
441
|
+
assert.equal(extractCodeEvidence('SUMMARY:\nDone'), null);
|
|
442
|
+
assert.equal(extractCodeEvidence(''), null);
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
test('extractCodeEvidence parses evidence_scope annotation', () => {
|
|
446
|
+
const text = [
|
|
447
|
+
'CODE_EVIDENCE:',
|
|
448
|
+
'- files_checked: [src/runtime.ts]',
|
|
449
|
+
'- evidence_source: both',
|
|
450
|
+
'- sha: fed123',
|
|
451
|
+
'- evidence_scope: openclaw',
|
|
452
|
+
].join('\n');
|
|
453
|
+
const evidence = extractCodeEvidence(text);
|
|
454
|
+
assert.equal(evidence.evidenceScope, 'openclaw');
|
|
455
|
+
});
|
|
456
|
+
|
|
457
|
+
test('extractCodeEvidence handles missing fields gracefully', () => {
|
|
458
|
+
const text = 'CODE_EVIDENCE:\n- files_checked: [src/a.ts]\n- sha: abc';
|
|
459
|
+
const evidence = extractCodeEvidence(text);
|
|
460
|
+
assert.deepEqual(evidence.filesChecked, ['src/a.ts']);
|
|
461
|
+
assert.equal(evidence.evidenceSource, null);
|
|
462
|
+
assert.equal(evidence.evidenceScope, null);
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
test('hasCodeEvidence returns true when CODE_EVIDENCE present', () => {
|
|
466
|
+
assert.equal(hasCodeEvidence('CODE_EVIDENCE:\n- files_checked: [a.ts]'), true);
|
|
467
|
+
assert.equal(hasCodeEvidence('SUMMARY:\nDone'), false);
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
test('buildStageMetrics includes CODE_EVIDENCE fields', () => {
|
|
471
|
+
const metrics = buildStageMetrics({
|
|
472
|
+
stageCriteria: {
|
|
473
|
+
requiredProducerSections: ['SUMMARY'],
|
|
474
|
+
requiredReviewerSections: ['VERDICT'],
|
|
475
|
+
},
|
|
476
|
+
producer: 'SUMMARY:\nDone\nCODE_EVIDENCE:\n- files_checked: [a.ts]\n- sha: abc',
|
|
477
|
+
reviewerA: 'VERDICT: APPROVE\nCODE_EVIDENCE:\n- files_verified: [b.ts]\n- sha: def',
|
|
478
|
+
reviewerB: 'VERDICT: APPROVE\nCODE_EVIDENCE:\n- files_verified: [c.ts]\n- sha: def',
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
assert.equal(metrics.producerHasCodeEvidence, true);
|
|
482
|
+
assert.equal(metrics.reviewerAHasCodeEvidence, true);
|
|
483
|
+
assert.equal(metrics.reviewerBHasCodeEvidence, true);
|
|
484
|
+
assert.equal(metrics.producerCodeEvidence.sha, 'abc');
|
|
485
|
+
assert.equal(metrics.reviewerACodeEvidence.sha, 'def');
|
|
486
|
+
assert.equal(metrics.reviewerBCodeEvidence.sha, 'def');
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
test('buildStageMetrics handles missing CODE_EVIDENCE gracefully', () => {
|
|
490
|
+
const metrics = buildStageMetrics({
|
|
491
|
+
stageCriteria: {
|
|
492
|
+
requiredProducerSections: ['SUMMARY'],
|
|
493
|
+
requiredReviewerSections: ['VERDICT'],
|
|
494
|
+
},
|
|
495
|
+
producer: 'SUMMARY:\nDone',
|
|
496
|
+
reviewerA: 'VERDICT: APPROVE',
|
|
497
|
+
reviewerB: 'VERDICT: APPROVE',
|
|
498
|
+
});
|
|
499
|
+
|
|
500
|
+
assert.equal(metrics.producerHasCodeEvidence, false);
|
|
501
|
+
assert.equal(metrics.reviewerAHasCodeEvidence, false);
|
|
502
|
+
assert.equal(metrics.reviewerBHasCodeEvidence, false);
|
|
503
|
+
assert.equal(metrics.producerCodeEvidence, null);
|
|
504
|
+
assert.equal(metrics.reviewerACodeEvidence, null);
|
|
505
|
+
assert.equal(metrics.reviewerBCodeEvidence, null);
|
|
506
|
+
});
|
|
507
|
+
|
|
508
|
+
test('buildHandoff includes CODE_EVIDENCE from all roles', () => {
|
|
509
|
+
const handoff = buildHandoff({
|
|
510
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nCODE_EVIDENCE:\n- files_verified: [a.ts]\n- sha: abc',
|
|
511
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nCODE_EVIDENCE:\n- files_verified: [b.ts]\n- sha: def',
|
|
512
|
+
producer: 'SUMMARY:\nDone\nCODE_EVIDENCE:\n- files_checked: [main.ts]\n- sha: ghi',
|
|
513
|
+
metrics: {},
|
|
514
|
+
stageName: 'implement-pass-1',
|
|
515
|
+
round: 1,
|
|
516
|
+
});
|
|
517
|
+
assert.deepEqual(handoff.producerCodeEvidence.filesChecked, ['main.ts']);
|
|
518
|
+
assert.deepEqual(handoff.reviewerACodeEvidence.filesChecked, ['a.ts']);
|
|
519
|
+
assert.deepEqual(handoff.reviewerBCodeEvidence.filesChecked, ['b.ts']);
|
|
520
|
+
});
|
|
521
|
+
|
|
522
|
+
// --- Global reviewer tests ---
|
|
523
|
+
|
|
524
|
+
test('extractMacroAnswers parses MACRO_ANSWERS section', () => {
|
|
525
|
+
const text = [
|
|
526
|
+
'VERDICT: APPROVE',
|
|
527
|
+
'MACRO_ANSWERS:',
|
|
528
|
+
'Q1: OpenClaw hook timing verified via source reading — hooks/subagent.ts line 42',
|
|
529
|
+
'Q2: Business flow closed — empathy results persisted to subagent_workflows table',
|
|
530
|
+
'Q3: Architecture converging — unified RuntimeDirectDriver used by both empathy and deep-reflect',
|
|
531
|
+
'Q4: Data flow closed — sessionKey = child session identity, runId = agent run identity',
|
|
532
|
+
'Q5: Sprint moves closer to unified PD subagent workflow — next step is Nocturnal migration',
|
|
533
|
+
'NEXT_FOCUS: Monitor shadow-run parity',
|
|
534
|
+
].join('\n');
|
|
535
|
+
const result = extractMacroAnswers(text, ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']);
|
|
536
|
+
assert.deepEqual(result.found, ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']);
|
|
537
|
+
assert.deepEqual(result.satisfied, ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']);
|
|
538
|
+
assert.equal(result.allSatisfied, true);
|
|
539
|
+
});
|
|
540
|
+
|
|
541
|
+
test('extractMacroAnswers marks incomplete answers as not satisfied', () => {
|
|
542
|
+
const text = [
|
|
543
|
+
'VERDICT: REVISE',
|
|
544
|
+
'MACRO_ANSWERS:',
|
|
545
|
+
'Q1: OpenClaw hook timing verified — hooks/subagent.ts line 42',
|
|
546
|
+
'Q2: n/a — pending cross-repo verification',
|
|
547
|
+
'Q3: Architecture converging — unified RuntimeDirectDriver',
|
|
548
|
+
'BLOCKERS:\n- Need more evidence for Q2',
|
|
549
|
+
].join('\n');
|
|
550
|
+
const result = extractMacroAnswers(text, ['Q1', 'Q2', 'Q3', 'Q4']);
|
|
551
|
+
assert.deepEqual(result.found, ['Q1', 'Q2', 'Q3']);
|
|
552
|
+
assert.deepEqual(result.satisfied, ['Q1', 'Q3']);
|
|
553
|
+
assert.equal(result.allSatisfied, false);
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
test('extractMacroAnswers returns empty when no MACRO_ANSWERS section', () => {
|
|
557
|
+
const result = extractMacroAnswers('VERDICT: APPROVE\nBLOCKERS:\n- None.', ['Q1', 'Q2']);
|
|
558
|
+
assert.deepEqual(result.found, []);
|
|
559
|
+
assert.deepEqual(result.satisfied, []);
|
|
560
|
+
assert.equal(result.allSatisfied, false);
|
|
561
|
+
});
|
|
562
|
+
|
|
563
|
+
test('buildStageMetrics includes global_reviewer fields', () => {
|
|
564
|
+
const metrics = buildStageMetrics({
|
|
565
|
+
stageCriteria: {
|
|
566
|
+
requiredProducerSections: ['SUMMARY'],
|
|
567
|
+
requiredReviewerSections: ['VERDICT'],
|
|
568
|
+
requiredGlobalReviewerSections: ['VERDICT', 'MACRO_ANSWERS'],
|
|
569
|
+
globalReviewerRequired: true,
|
|
570
|
+
globalReviewerMustAnswer: ['Q1', 'Q2'],
|
|
571
|
+
},
|
|
572
|
+
producer: 'SUMMARY:\nDone',
|
|
573
|
+
reviewerA: 'VERDICT: APPROVE',
|
|
574
|
+
reviewerB: 'VERDICT: APPROVE',
|
|
575
|
+
globalReviewer: [
|
|
576
|
+
'VERDICT: APPROVE',
|
|
577
|
+
'MACRO_ANSWERS:',
|
|
578
|
+
'Q1: OpenClaw compatible — hooks/subagent.ts verified',
|
|
579
|
+
'Q2: Business flow closed — results persisted',
|
|
580
|
+
].join('\n'),
|
|
581
|
+
});
|
|
582
|
+
|
|
583
|
+
assert.equal(metrics.globalReviewerVerdict, 'APPROVE');
|
|
584
|
+
assert.equal(metrics.globalReviewerHasExplicitVerdict, true);
|
|
585
|
+
assert.equal(metrics.globalReviewerRequired, true);
|
|
586
|
+
assert.deepEqual(metrics.globalReviewerChecks.VERDICT, true);
|
|
587
|
+
assert.deepEqual(metrics.globalReviewerChecks.MACRO_ANSWERS, true);
|
|
588
|
+
assert.deepEqual(metrics.macroAnswersFound, ['Q1', 'Q2']);
|
|
589
|
+
assert.deepEqual(metrics.macroAnswersSatisfied, ['Q1', 'Q2']);
|
|
590
|
+
assert.equal(metrics.macroAnswersAllSatisfied, true);
|
|
591
|
+
assert.deepEqual(metrics.requiredMacroAnswers, ['Q1', 'Q2']);
|
|
592
|
+
});
|
|
593
|
+
|
|
594
|
+
test('decideStage with globalReviewerRequired — advances when all three APPROVE', () => {
|
|
595
|
+
const result = decideStage({
|
|
596
|
+
stageCriteria: {
|
|
597
|
+
requiredApprovals: 3,
|
|
598
|
+
requiredProducerSections: ['SUMMARY'],
|
|
599
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
600
|
+
requiredGlobalReviewerSections: ['VERDICT', 'MACRO_ANSWERS'],
|
|
601
|
+
globalReviewerRequired: true,
|
|
602
|
+
globalReviewerMustAnswer: ['Q1', 'Q2'],
|
|
603
|
+
},
|
|
604
|
+
producer: 'SUMMARY:\nDone',
|
|
605
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
606
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
607
|
+
globalReviewer: [
|
|
608
|
+
'VERDICT: APPROVE',
|
|
609
|
+
'MACRO_ANSWERS:',
|
|
610
|
+
'Q1: OpenClaw compatible',
|
|
611
|
+
'Q2: Business flow closed',
|
|
612
|
+
].join('\n'),
|
|
613
|
+
currentRound: 1,
|
|
614
|
+
maxRoundsPerStage: 3,
|
|
615
|
+
skipContractValidation: true,
|
|
616
|
+
});
|
|
617
|
+
|
|
618
|
+
assert.equal(result.outcome, 'advance');
|
|
619
|
+
assert.equal(result.metrics.approvalCount, 3);
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
// ============================================================================
|
|
623
|
+
// Output Quality Tests (Task 3: Shadow-Complete vs Production-Ready)
|
|
624
|
+
// ============================================================================
|
|
625
|
+
|
|
626
|
+
test('decideStage returns outputQuality: needs_work when revise required', () => {
|
|
627
|
+
const result = decideStage({
|
|
628
|
+
stageCriteria: {
|
|
629
|
+
requiredApprovals: 1,
|
|
630
|
+
requiredProducerSections: ['SUMMARY'],
|
|
631
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
632
|
+
},
|
|
633
|
+
producer: 'SUMMARY:\nDone',
|
|
634
|
+
reviewerA: 'VERDICT: REVISE\nBLOCKERS:\n- Need more tests',
|
|
635
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
636
|
+
currentRound: 1,
|
|
637
|
+
maxRoundsPerStage: 3,
|
|
638
|
+
});
|
|
639
|
+
assert.equal(result.outcome, 'revise');
|
|
640
|
+
assert.equal(result.outputQuality, OUTPUT_QUALITY.NEEDS_WORK);
|
|
641
|
+
});
|
|
642
|
+
|
|
643
|
+
test('decideStage returns outputQuality: needs_work when halt', () => {
|
|
644
|
+
const result = decideStage({
|
|
645
|
+
stageCriteria: {
|
|
646
|
+
requiredApprovals: 1,
|
|
647
|
+
requiredProducerSections: ['SUMMARY'],
|
|
648
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
649
|
+
},
|
|
650
|
+
producer: 'SUMMARY:\nDone',
|
|
651
|
+
reviewerA: 'VERDICT: REVISE\nBLOCKERS:\n- Issue',
|
|
652
|
+
reviewerB: 'VERDICT: BLOCK\nBLOCKERS:\n- Cannot proceed',
|
|
653
|
+
currentRound: 3,
|
|
654
|
+
maxRoundsPerStage: 3,
|
|
655
|
+
});
|
|
656
|
+
assert.equal(result.outcome, 'halt');
|
|
657
|
+
assert.equal(result.outputQuality, OUTPUT_QUALITY.NEEDS_WORK);
|
|
658
|
+
assert.ok(result.qualityReasons.some(r => r.includes('Max rounds')));
|
|
659
|
+
});
|
|
660
|
+
|
|
661
|
+
test('decideStage returns outputQuality: shadow_complete on basic advance', () => {
|
|
662
|
+
const result = decideStage({
|
|
663
|
+
stageCriteria: {
|
|
664
|
+
requiredApprovals: 1,
|
|
665
|
+
requiredProducerSections: ['SUMMARY'],
|
|
666
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
667
|
+
},
|
|
668
|
+
producer: 'SUMMARY:\nDone',
|
|
669
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
670
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
671
|
+
currentRound: 1,
|
|
672
|
+
maxRoundsPerStage: 3,
|
|
673
|
+
skipContractValidation: true, // Skip contract validation for this legacy test
|
|
674
|
+
});
|
|
675
|
+
assert.equal(result.outcome, 'advance');
|
|
676
|
+
// Without cross-repo evidence scope, it should be shadow_complete
|
|
677
|
+
assert.equal(result.outputQuality, OUTPUT_QUALITY.SHADOW_COMPLETE);
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
test('decideStage returns outputQuality: production_ready when all criteria met', () => {
|
|
681
|
+
const result = decideStage({
|
|
682
|
+
stageCriteria: {
|
|
683
|
+
requiredApprovals: 1,
|
|
684
|
+
requiredProducerSections: ['SUMMARY'],
|
|
685
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
686
|
+
scoringDimensions: ['correctness', 'scope_control'],
|
|
687
|
+
dimensionThreshold: 3,
|
|
688
|
+
},
|
|
689
|
+
producer: 'SUMMARY:\nDone\nCODE_EVIDENCE:\n- files_checked: [src/a.ts]\n- evidence_scope: both\n- sha: abc123',
|
|
690
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nDIMENSIONS: correctness=4;scope_control=5\nCODE_EVIDENCE:\n- files_verified: [src/a.ts]\n- sha: abc123',
|
|
691
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nDIMENSIONS: correctness=5;scope_control=4\nCODE_EVIDENCE:\n- files_verified: [src/a.ts]\n- sha: abc123',
|
|
692
|
+
currentRound: 1,
|
|
693
|
+
maxRoundsPerStage: 3,
|
|
694
|
+
skipContractValidation: true,
|
|
695
|
+
});
|
|
696
|
+
assert.equal(result.outcome, 'advance');
|
|
697
|
+
// With evidence_scope: both and all dimensions >= 4, should be production_ready
|
|
698
|
+
assert.equal(result.outputQuality, OUTPUT_QUALITY.PRODUCTION_READY);
|
|
699
|
+
});
|
|
700
|
+
|
|
701
|
+
test('decideStage returns shadow_complete when dimensions below production threshold', () => {
|
|
702
|
+
const result = decideStage({
|
|
703
|
+
stageCriteria: {
|
|
704
|
+
requiredApprovals: 1,
|
|
705
|
+
requiredProducerSections: ['SUMMARY'],
|
|
706
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
707
|
+
scoringDimensions: ['correctness'],
|
|
708
|
+
dimensionThreshold: 3,
|
|
709
|
+
},
|
|
710
|
+
producer: 'SUMMARY:\nDone\nCODE_EVIDENCE:\n- files_checked: [src/a.ts]\n- evidence_scope: both\n- sha: abc123',
|
|
711
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nDIMENSIONS: correctness=3\nCODE_EVIDENCE:\n- files_verified: [src/a.ts]\n- sha: abc123',
|
|
712
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nDIMENSIONS: correctness=3\nCODE_EVIDENCE:\n- files_verified: [src/a.ts]\n- sha: abc123',
|
|
713
|
+
currentRound: 1,
|
|
714
|
+
maxRoundsPerStage: 3,
|
|
715
|
+
skipContractValidation: true,
|
|
716
|
+
});
|
|
717
|
+
assert.equal(result.outcome, 'advance');
|
|
718
|
+
// Dimensions pass threshold (3) but below production threshold (4)
|
|
719
|
+
assert.equal(result.outputQuality, OUTPUT_QUALITY.SHADOW_COMPLETE);
|
|
720
|
+
assert.ok(result.qualityReasons.some(r => r.includes('below production threshold')));
|
|
721
|
+
});
|
|
722
|
+
|
|
723
|
+
test('decideStage includes validation field', () => {
|
|
724
|
+
const result = decideStage({
|
|
725
|
+
stageCriteria: {
|
|
726
|
+
requiredApprovals: 1,
|
|
727
|
+
requiredProducerSections: ['SUMMARY'],
|
|
728
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
729
|
+
},
|
|
730
|
+
producer: 'SUMMARY:\nDone',
|
|
731
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
732
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
733
|
+
currentRound: 1,
|
|
734
|
+
maxRoundsPerStage: 3,
|
|
735
|
+
});
|
|
736
|
+
assert.ok(result.validation);
|
|
737
|
+
assert.equal(typeof result.validation.valid, 'boolean');
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
// Regression test: invalid contract should block advance even if reviewers approve
|
|
741
|
+
test('decideStage does NOT advance when contract validation fails', () => {
|
|
742
|
+
// Producer missing required sections for contract compliance
|
|
743
|
+
const result = decideStage({
|
|
744
|
+
stageCriteria: {
|
|
745
|
+
requiredApprovals: 1,
|
|
746
|
+
requiredProducerSections: ['SUMMARY'],
|
|
747
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
748
|
+
},
|
|
749
|
+
producer: 'SUMMARY:\nDone', // Missing CHANGES, EVIDENCE, etc. required by PRODUCER_SCHEMA
|
|
750
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.', // Missing FINDINGS, CODE_EVIDENCE, etc.
|
|
751
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
752
|
+
currentRound: 1,
|
|
753
|
+
maxRoundsPerStage: 3,
|
|
754
|
+
// NOT skipping contract validation
|
|
755
|
+
});
|
|
756
|
+
|
|
757
|
+
// Should NOT advance because validation.valid is false
|
|
758
|
+
assert.equal(result.validation.valid, false);
|
|
759
|
+
assert.notEqual(result.outcome, 'advance');
|
|
760
|
+
assert.ok(result.blockers.length > 0, 'Should have blockers from contract validation');
|
|
761
|
+
assert.ok(result.blockers.some((b) => b.includes('contract violation') || b.includes('missing required section')),
|
|
762
|
+
`Blockers should mention contract violation, got: ${result.blockers.join('; ')}`);
|
|
763
|
+
});
|
|
764
|
+
|
|
765
|
+
test('decideStage outputQuality is shadow_complete for PR2-like case', () => {
|
|
766
|
+
// Simulate PR2 scenario: advance with local-only evidence (no cross-repo verification)
|
|
767
|
+
const result = decideStage({
|
|
768
|
+
stageCriteria: {
|
|
769
|
+
requiredApprovals: 1,
|
|
770
|
+
requiredProducerSections: ['SUMMARY', 'CHANGES', 'EVIDENCE', 'CODE_EVIDENCE', 'KEY_EVENTS', 'HYPOTHESIS_MATRIX', 'CHECKS', 'OPEN_RISKS'],
|
|
771
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS', 'FINDINGS', 'CODE_EVIDENCE', 'HYPOTHESIS_MATRIX', 'NEXT_FOCUS', 'CHECKS'],
|
|
772
|
+
},
|
|
773
|
+
producer: [
|
|
774
|
+
'SUMMARY:\nImplementation complete.',
|
|
775
|
+
'CHANGES:\nModified src/helper.ts',
|
|
776
|
+
'EVIDENCE:\nTests pass locally.',
|
|
777
|
+
'CODE_EVIDENCE:',
|
|
778
|
+
'- files_checked: [src/helper.ts]',
|
|
779
|
+
'- evidence_source: local',
|
|
780
|
+
'- sha: abc123',
|
|
781
|
+
'- evidence_scope: principles',
|
|
782
|
+
'KEY_EVENTS:\n- Code changes made',
|
|
783
|
+
'HYPOTHESIS_MATRIX:\n- H1: SUPPORTED',
|
|
784
|
+
'CHECKS: evidence=ok;tests=passed;scope=pd-only',
|
|
785
|
+
'OPEN_RISKS:\n- None',
|
|
786
|
+
].join('\n'),
|
|
787
|
+
reviewerA: [
|
|
788
|
+
'VERDICT: APPROVE',
|
|
789
|
+
'BLOCKERS:\n- None',
|
|
790
|
+
'FINDINGS:\n- Implementation looks correct',
|
|
791
|
+
'CODE_EVIDENCE:',
|
|
792
|
+
'- files_verified: [src/helper.ts]',
|
|
793
|
+
'- evidence_source: local',
|
|
794
|
+
'- evidence_scope: principles',
|
|
795
|
+
'- sha: abc123',
|
|
796
|
+
'HYPOTHESIS_MATRIX:\n- H1: SUPPORTED',
|
|
797
|
+
'NEXT_FOCUS: Consider production deployment',
|
|
798
|
+
'CHECKS: criteria=met',
|
|
799
|
+
].join('\n'),
|
|
800
|
+
reviewerB: [
|
|
801
|
+
'VERDICT: APPROVE',
|
|
802
|
+
'BLOCKERS:\n- None',
|
|
803
|
+
'FINDINGS:\n- Scope is controlled',
|
|
804
|
+
'CODE_EVIDENCE:',
|
|
805
|
+
'- files_verified: [src/helper.ts]',
|
|
806
|
+
'- evidence_source: local',
|
|
807
|
+
'- evidence_scope: principles',
|
|
808
|
+
'- sha: abc123',
|
|
809
|
+
'HYPOTHESIS_MATRIX:\n- H1: SUPPORTED',
|
|
810
|
+
'NEXT_FOCUS: Ready for production',
|
|
811
|
+
'CHECKS: criteria=met',
|
|
812
|
+
].join('\n'),
|
|
813
|
+
currentRound: 1,
|
|
814
|
+
maxRoundsPerStage: 3,
|
|
815
|
+
skipContractValidation: true,
|
|
816
|
+
});
|
|
817
|
+
|
|
818
|
+
assert.equal(result.outcome, 'advance');
|
|
819
|
+
// PR2 scenario: evidence_scope: principles (not both) -> shadow_complete
|
|
820
|
+
assert.equal(result.outputQuality, OUTPUT_QUALITY.SHADOW_COMPLETE);
|
|
821
|
+
assert.ok(result.qualityReasons.some(r => r.includes('both')));
|
|
822
|
+
});
|
|
823
|
+
|
|
824
|
+
test('decideStage with globalReviewerRequired — cannot advance when global reviewer missing', () => {
|
|
825
|
+
const result = decideStage({
|
|
826
|
+
stageCriteria: {
|
|
827
|
+
requiredApprovals: 3,
|
|
828
|
+
requiredProducerSections: ['SUMMARY'],
|
|
829
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
830
|
+
requiredGlobalReviewerSections: ['VERDICT', 'MACRO_ANSWERS'],
|
|
831
|
+
globalReviewerRequired: true,
|
|
832
|
+
globalReviewerMustAnswer: ['Q1', 'Q2'],
|
|
833
|
+
},
|
|
834
|
+
producer: 'SUMMARY:\nDone',
|
|
835
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
836
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
837
|
+
globalReviewer: null,
|
|
838
|
+
currentRound: 1,
|
|
839
|
+
maxRoundsPerStage: 3,
|
|
840
|
+
});
|
|
841
|
+
|
|
842
|
+
assert.equal(result.outcome, 'revise');
|
|
843
|
+
assert.ok(result.blockers.some((b) => b.includes('Global reviewer')));
|
|
844
|
+
});
|
|
845
|
+
|
|
846
|
+
test('decideStage with globalReviewerRequired — cannot advance when Q-answers missing', () => {
|
|
847
|
+
const result = decideStage({
|
|
848
|
+
stageCriteria: {
|
|
849
|
+
requiredApprovals: 3,
|
|
850
|
+
requiredProducerSections: ['SUMMARY'],
|
|
851
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
852
|
+
requiredGlobalReviewerSections: ['VERDICT', 'MACRO_ANSWERS'],
|
|
853
|
+
globalReviewerRequired: true,
|
|
854
|
+
globalReviewerMustAnswer: ['Q1', 'Q2', 'Q3'],
|
|
855
|
+
},
|
|
856
|
+
producer: 'SUMMARY:\nDone',
|
|
857
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
858
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
859
|
+
globalReviewer: [
|
|
860
|
+
'VERDICT: APPROVE',
|
|
861
|
+
'MACRO_ANSWERS:',
|
|
862
|
+
'Q1: OpenClaw compatible',
|
|
863
|
+
'Q2: Business flow closed',
|
|
864
|
+
// Q3 missing
|
|
865
|
+
].join('\n'),
|
|
866
|
+
currentRound: 1,
|
|
867
|
+
maxRoundsPerStage: 3,
|
|
868
|
+
});
|
|
869
|
+
|
|
870
|
+
assert.equal(result.outcome, 'revise');
|
|
871
|
+
assert.ok(result.blockers.some((b) => b.includes('Q3')));
|
|
872
|
+
});
|
|
873
|
+
|
|
874
|
+
test('decideStage with globalReviewerRequired — BLOCK from global reviewer halts even if A and B approve', () => {
|
|
875
|
+
const result = decideStage({
|
|
876
|
+
stageCriteria: {
|
|
877
|
+
requiredApprovals: 3,
|
|
878
|
+
requiredProducerSections: ['SUMMARY'],
|
|
879
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
880
|
+
requiredGlobalReviewerSections: ['VERDICT', 'MACRO_ANSWERS'],
|
|
881
|
+
globalReviewerRequired: true,
|
|
882
|
+
globalReviewerMustAnswer: ['Q1'],
|
|
883
|
+
},
|
|
884
|
+
producer: 'SUMMARY:\nDone',
|
|
885
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
886
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
887
|
+
globalReviewer: [
|
|
888
|
+
'VERDICT: BLOCK',
|
|
889
|
+
'BLOCKERS:\n- Architecture diverges — new implicit protocol introduced',
|
|
890
|
+
'MACRO_ANSWERS:',
|
|
891
|
+
'Q1: OpenClaw compatible — hooks verified',
|
|
892
|
+
].join('\n'),
|
|
893
|
+
currentRound: 1,
|
|
894
|
+
maxRoundsPerStage: 3,
|
|
895
|
+
});
|
|
896
|
+
|
|
897
|
+
assert.equal(result.outcome, 'revise');
|
|
898
|
+
assert.ok(result.blockers.some((b) => b.includes('[GLOBAL]') && b.includes('Architecture diverges')));
|
|
899
|
+
});
|
|
900
|
+
|
|
901
|
+
test('decideStage with globalReviewerRequired — BLOCK without specific blockers still blocks', () => {
|
|
902
|
+
const result = decideStage({
|
|
903
|
+
stageCriteria: {
|
|
904
|
+
requiredApprovals: 3,
|
|
905
|
+
requiredProducerSections: ['SUMMARY'],
|
|
906
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
907
|
+
requiredGlobalReviewerSections: ['VERDICT'],
|
|
908
|
+
globalReviewerRequired: true,
|
|
909
|
+
},
|
|
910
|
+
producer: 'SUMMARY:\nDone',
|
|
911
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
912
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
913
|
+
globalReviewer: 'VERDICT: BLOCK',
|
|
914
|
+
currentRound: 1,
|
|
915
|
+
maxRoundsPerStage: 3,
|
|
916
|
+
});
|
|
917
|
+
|
|
918
|
+
assert.equal(result.outcome, 'revise');
|
|
919
|
+
assert.ok(result.blockers.some((b) => b.includes('Global reviewer BLOCKED with no specific blockers')));
|
|
920
|
+
});
|
|
921
|
+
|
|
922
|
+
test('decideStage without globalReviewerRequired — ignores global_reviewer even if provided', () => {
|
|
923
|
+
const result = decideStage({
|
|
924
|
+
stageCriteria: {
|
|
925
|
+
requiredApprovals: 2,
|
|
926
|
+
requiredProducerSections: ['SUMMARY'],
|
|
927
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
928
|
+
globalReviewerRequired: false,
|
|
929
|
+
},
|
|
930
|
+
producer: 'SUMMARY:\nDone',
|
|
931
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
932
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
933
|
+
globalReviewer: 'VERDICT: BLOCK\nBLOCKERS:\n- Should be ignored',
|
|
934
|
+
currentRound: 1,
|
|
935
|
+
maxRoundsPerStage: 3,
|
|
936
|
+
skipContractValidation: true,
|
|
937
|
+
});
|
|
938
|
+
|
|
939
|
+
assert.equal(result.outcome, 'advance');
|
|
940
|
+
assert.equal(result.metrics.approvalCount, 2);
|
|
941
|
+
});
|
|
942
|
+
|
|
943
|
+
test('buildHandoff includes global_reviewer blockers and focus', () => {
|
|
944
|
+
const handoff = buildHandoff({
|
|
945
|
+
reviewerA: 'VERDICT: REVISE\nBLOCKERS:\n- ReviewerA blocker\nNEXT_FOCUS: Fix X',
|
|
946
|
+
reviewerB: 'VERDICT: REVISE\nBLOCKERS:\n- ReviewerB blocker\nNEXT_FOCUS: Fix Y',
|
|
947
|
+
globalReviewer: 'VERDICT: REVISE\nBLOCKERS:\n- Global blocker\nNEXT_FOCUS: Fix Z',
|
|
948
|
+
producer: 'SUMMARY:\nDone',
|
|
949
|
+
metrics: { globalReviewerVerdict: 'REVISE' },
|
|
950
|
+
stageName: 'architecture-cut',
|
|
951
|
+
round: 2,
|
|
952
|
+
});
|
|
953
|
+
|
|
954
|
+
assert.deepEqual(handoff.blockers, ['ReviewerA blocker', 'ReviewerB blocker', 'Global blocker']);
|
|
955
|
+
assert.ok(handoff.focusForNextRound.includes('Fix X'));
|
|
956
|
+
assert.ok(handoff.focusForNextRound.includes('Fix Y'));
|
|
957
|
+
assert.ok(handoff.focusForNextRound.includes('Fix Z'));
|
|
958
|
+
assert.equal(handoff.dimensionScores.globalReviewer, 'REVISE');
|
|
959
|
+
});
|
|
960
|
+
|
|
961
|
+
test('buildHandoff handles missing global_reviewer gracefully', () => {
|
|
962
|
+
const handoff = buildHandoff({
|
|
963
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
964
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
965
|
+
globalReviewer: null,
|
|
966
|
+
producer: 'SUMMARY:\nDone',
|
|
967
|
+
metrics: {},
|
|
968
|
+
stageName: 'investigate',
|
|
969
|
+
round: 1,
|
|
970
|
+
});
|
|
971
|
+
|
|
972
|
+
assert.deepEqual(handoff.blockers, []);
|
|
973
|
+
assert.equal(handoff.focusForNextRound, null);
|
|
974
|
+
assert.equal(handoff.globalReviewerCodeEvidence, null);
|
|
975
|
+
assert.equal(handoff.dimensionScores.globalReviewer, null);
|
|
976
|
+
});
|
|
977
|
+
|
|
978
|
+
// --- Markdown heading compatibility tests ---
|
|
979
|
+
|
|
980
|
+
test('extractContractItems parses ## CONTRACT (markdown heading)', () => {
|
|
981
|
+
const text = [
|
|
982
|
+
'SUMMARY:\nDone',
|
|
983
|
+
'## CONTRACT',
|
|
984
|
+
'- Root cause identified with evidence status: DONE evidence: "see EVIDENCE"',
|
|
985
|
+
'- Fix proposed status: TODO',
|
|
986
|
+
].join('\n');
|
|
987
|
+
|
|
988
|
+
const items = extractContractItems(text);
|
|
989
|
+
assert.equal(items.length, 2);
|
|
990
|
+
assert.equal(items[0].status, 'DONE');
|
|
991
|
+
assert.equal(items[1].status, 'TODO');
|
|
992
|
+
});
|
|
993
|
+
|
|
994
|
+
test('extractContractItems parses CONTRACT: (colon format)', () => {
|
|
995
|
+
const text = [
|
|
996
|
+
'SUMMARY:\nDone',
|
|
997
|
+
'CONTRACT:',
|
|
998
|
+
'- Root cause identified status: DONE',
|
|
999
|
+
'- Fix proposed status: PARTIAL',
|
|
1000
|
+
].join('\n');
|
|
1001
|
+
|
|
1002
|
+
const items = extractContractItems(text);
|
|
1003
|
+
assert.equal(items.length, 2);
|
|
1004
|
+
assert.equal(items[0].status, 'DONE');
|
|
1005
|
+
assert.equal(items[1].status, 'PARTIAL');
|
|
1006
|
+
});
|
|
1007
|
+
|
|
1008
|
+
test('extractCodeEvidence parses ## CODE_EVIDENCE (markdown heading)', () => {
|
|
1009
|
+
const text = [
|
|
1010
|
+
'## CODE_EVIDENCE',
|
|
1011
|
+
'- files_checked: src/observer.js, src/persistence.ts',
|
|
1012
|
+
'- evidence_source: local',
|
|
1013
|
+
'- sha: abc123def',
|
|
1014
|
+
'- branch/worktree: main',
|
|
1015
|
+
].join('\n');
|
|
1016
|
+
const evidence = extractCodeEvidence(text);
|
|
1017
|
+
assert.ok(evidence, 'should parse ## CODE_EVIDENCE');
|
|
1018
|
+
assert.deepEqual(evidence.filesChecked, ['src/observer.js', 'src/persistence.ts']);
|
|
1019
|
+
assert.equal(evidence.evidenceSource, 'local');
|
|
1020
|
+
assert.equal(evidence.sha, 'abc123def');
|
|
1021
|
+
});
|
|
1022
|
+
|
|
1023
|
+
test('extractCodeEvidence parses comma-separated files_checked (no brackets)', () => {
|
|
1024
|
+
const text = [
|
|
1025
|
+
'CODE_EVIDENCE:',
|
|
1026
|
+
'- files_checked: empathy-observer-manager.ts, hooks/subagent.ts, index.ts',
|
|
1027
|
+
'- evidence_source: both',
|
|
1028
|
+
'- sha: b1964a55',
|
|
1029
|
+
].join('\n');
|
|
1030
|
+
const evidence = extractCodeEvidence(text);
|
|
1031
|
+
assert.ok(evidence, 'should parse flat comma list');
|
|
1032
|
+
assert.deepEqual(evidence.filesChecked, ['empathy-observer-manager.ts', 'hooks/subagent.ts', 'index.ts']);
|
|
1033
|
+
});
|
|
1034
|
+
|
|
1035
|
+
test('extractCodeEvidence parses comma-separated files_verified (no brackets)', () => {
|
|
1036
|
+
const text = [
|
|
1037
|
+
'## CODE_EVIDENCE',
|
|
1038
|
+
'- files_verified: src/fix.ts, src/test.ts, src/helper.ts',
|
|
1039
|
+
'- evidence_source: both',
|
|
1040
|
+
'- sha: fed123',
|
|
1041
|
+
].join('\n');
|
|
1042
|
+
const evidence = extractCodeEvidence(text);
|
|
1043
|
+
assert.ok(evidence, 'should parse flat comma list for files_verified');
|
|
1044
|
+
assert.deepEqual(evidence.filesChecked, ['src/fix.ts', 'src/test.ts', 'src/helper.ts']);
|
|
1045
|
+
});
|
|
1046
|
+
|
|
1047
|
+
test('hasCodeEvidence returns true for ## CODE_EVIDENCE', () => {
|
|
1048
|
+
assert.equal(hasCodeEvidence('## CODE_EVIDENCE\n- files_checked: a.ts'), true);
|
|
1049
|
+
});
|
|
1050
|
+
|
|
1051
|
+
test('decideStage advances with ## CONTRACT and ## CODE_EVIDENCE', () => {
|
|
1052
|
+
const result = decideStage({
|
|
1053
|
+
stageCriteria: {
|
|
1054
|
+
requiredApprovals: 2,
|
|
1055
|
+
requiredProducerSections: ['SUMMARY'],
|
|
1056
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
1057
|
+
requiredDeliverables: ['root_cause'],
|
|
1058
|
+
},
|
|
1059
|
+
producer: [
|
|
1060
|
+
'SUMMARY:\nDone',
|
|
1061
|
+
'## CONTRACT',
|
|
1062
|
+
'- Root cause identified status: DONE',
|
|
1063
|
+
'## CODE_EVIDENCE',
|
|
1064
|
+
'- files_checked: a.ts, b.ts',
|
|
1065
|
+
'- sha: abc123',
|
|
1066
|
+
].join('\n'),
|
|
1067
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
1068
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
1069
|
+
currentRound: 1,
|
|
1070
|
+
maxRoundsPerStage: 3,
|
|
1071
|
+
skipContractValidation: true,
|
|
1072
|
+
});
|
|
1073
|
+
|
|
1074
|
+
assert.equal(result.outcome, 'advance');
|
|
1075
|
+
assert.equal(result.metrics.contractCheck.allDone, true);
|
|
1076
|
+
assert.ok(result.metrics.producerCodeEvidence, 'should have CODE_EVIDENCE');
|
|
1077
|
+
assert.deepEqual(result.metrics.producerCodeEvidence.filesChecked, ['a.ts', 'b.ts']);
|
|
1078
|
+
});
|
|
1079
|
+
|
|
1080
|
+
test('extractContractItems parses ## CONTRACT correctly when followed by ## CODE_EVIDENCE', () => {
|
|
1081
|
+
// Verify ## CONTRACT section stops at ## CODE_EVIDENCE boundary
|
|
1082
|
+
const text = [
|
|
1083
|
+
'SUMMARY:\nDone',
|
|
1084
|
+
'## CONTRACT',
|
|
1085
|
+
'- Root cause identified status: DONE',
|
|
1086
|
+
'## CODE_EVIDENCE',
|
|
1087
|
+
'- files_checked: a.ts, b.ts',
|
|
1088
|
+
'- sha: abc123',
|
|
1089
|
+
].join('\n');
|
|
1090
|
+
const items = extractContractItems(text);
|
|
1091
|
+
// Should only have the contract item, NOT the CODE_EVIDENCE lines
|
|
1092
|
+
assert.equal(items.length, 1);
|
|
1093
|
+
assert.equal(items[0].status, 'DONE');
|
|
1094
|
+
assert.ok(items[0].deliverable.includes('Root cause identified'));
|
|
1095
|
+
});
|
|
1096
|
+
|
|
1097
|
+
test('extractContractItems ignores markdown horizontal rules (---, ***, ___)', () => {
|
|
1098
|
+
const text = [
|
|
1099
|
+
'## CONTRACT',
|
|
1100
|
+
'- transport_audit status: DONE',
|
|
1101
|
+
'- lifecycle_hook_map status: DONE',
|
|
1102
|
+
'---',
|
|
1103
|
+
'**Round 3 Producer Report**',
|
|
1104
|
+
].join('\n');
|
|
1105
|
+
const items = extractContractItems(text);
|
|
1106
|
+
assert.equal(items.length, 2);
|
|
1107
|
+
assert.equal(items[0].status, 'DONE');
|
|
1108
|
+
assert.equal(items[1].status, 'DONE');
|
|
1109
|
+
assert.ok(!items.some((i) => i.deliverable.includes('--')), 'no horizontal rule artifacts');
|
|
1110
|
+
});
|
|
1111
|
+
|
|
1112
|
+
test('extractContractItems ignores *** and ___ separators', () => {
|
|
1113
|
+
const text = [
|
|
1114
|
+
'CONTRACT:',
|
|
1115
|
+
'- item_a status: DONE',
|
|
1116
|
+
'***',
|
|
1117
|
+
'- item_b status: PARTIAL',
|
|
1118
|
+
'___',
|
|
1119
|
+
].join('\n');
|
|
1120
|
+
const items = extractContractItems(text);
|
|
1121
|
+
assert.equal(items.length, 2);
|
|
1122
|
+
assert.equal(items[0].deliverable, 'item_a');
|
|
1123
|
+
assert.equal(items[1].deliverable, 'item_b');
|
|
1124
|
+
});
|
|
1125
|
+
|
|
1126
|
+
test('extractContractItems strips markdown code fences before parsing', () => {
|
|
1127
|
+
const text = [
|
|
1128
|
+
'## CONTRACT',
|
|
1129
|
+
'',
|
|
1130
|
+
'```',
|
|
1131
|
+
'CONTRACT:',
|
|
1132
|
+
'- transport_audit status: DONE',
|
|
1133
|
+
'- lifecycle_hook_map status: DONE',
|
|
1134
|
+
'- openclaw_assumptions_documented status: DONE',
|
|
1135
|
+
'- failure_mode_inventory status: DONE',
|
|
1136
|
+
'```',
|
|
1137
|
+
'',
|
|
1138
|
+
'---',
|
|
1139
|
+
'',
|
|
1140
|
+
'## APPENDIX: Round 2 Blocker Resolution',
|
|
1141
|
+
].join('\n');
|
|
1142
|
+
const items = extractContractItems(text);
|
|
1143
|
+
assert.equal(items.length, 4);
|
|
1144
|
+
assert.equal(items[0].deliverable, 'transport_audit');
|
|
1145
|
+
assert.equal(items[0].status, 'DONE');
|
|
1146
|
+
assert.equal(items[3].deliverable, 'failure_mode_inventory');
|
|
1147
|
+
assert.equal(items[3].status, 'DONE');
|
|
1148
|
+
});
|
|
1149
|
+
|
|
1150
|
+
// --- Parser robustness: DIMENSIONS markdown bold ---
|
|
1151
|
+
|
|
1152
|
+
test('parseDimensions extracts from **DIMENSIONS**: markdown bold', () => {
|
|
1153
|
+
const text = 'VERDICT: APPROVE\n**DIMENSIONS**: decision_quality=4; openclaw_verification_completeness=4; interface_soundness=5; extensibility=4\nBLOCKERS:\n- None.';
|
|
1154
|
+
const dims = parseDimensions(text);
|
|
1155
|
+
assert.deepEqual(dims, {
|
|
1156
|
+
decision_quality: 4,
|
|
1157
|
+
openclaw_verification_completeness: 4,
|
|
1158
|
+
interface_soundness: 5,
|
|
1159
|
+
extensibility: 4,
|
|
1160
|
+
});
|
|
1161
|
+
});
|
|
1162
|
+
|
|
1163
|
+
test('parseDimensions still extracts from plain DIMENSIONS:', () => {
|
|
1164
|
+
const text = 'DIMENSIONS: correctness=5; scope=3';
|
|
1165
|
+
const dims = parseDimensions(text);
|
|
1166
|
+
assert.deepEqual(dims, { correctness: 5, scope: 3 });
|
|
1167
|
+
});
|
|
1168
|
+
|
|
1169
|
+
// --- Parser robustness: MACRO_ANSWERS with markdown headings and prose colons ---
|
|
1170
|
+
|
|
1171
|
+
test('extractMacroAnswers finds Q5 in markdown ### Q5 format', () => {
|
|
1172
|
+
const text = [
|
|
1173
|
+
'## MACRO_ANSWERS',
|
|
1174
|
+
'',
|
|
1175
|
+
'### Q1: Is migration architecturally sound?',
|
|
1176
|
+
'**Yes.** Single transport model.',
|
|
1177
|
+
'',
|
|
1178
|
+
'### Q2: Are assumptions verified?',
|
|
1179
|
+
'**Yes.** Cross-repo verification done.',
|
|
1180
|
+
'',
|
|
1181
|
+
'### Q3: Are sidecar boundaries explicit?',
|
|
1182
|
+
'**Yes.** Four boundaries enforced.',
|
|
1183
|
+
'',
|
|
1184
|
+
'### Q4: Is business flow closed?',
|
|
1185
|
+
'**Yes.** State transitions are complete:',
|
|
1186
|
+
'- pending -> active -> wait_result -> completed',
|
|
1187
|
+
'',
|
|
1188
|
+
'### Q5: Does this serve the end goal?',
|
|
1189
|
+
'**Yes.** End goal achieved.',
|
|
1190
|
+
'',
|
|
1191
|
+
'---',
|
|
1192
|
+
'',
|
|
1193
|
+
'## BLOCKERS',
|
|
1194
|
+
'',
|
|
1195
|
+
'**None.**',
|
|
1196
|
+
].join('\n');
|
|
1197
|
+
const result = extractMacroAnswers(text, ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']);
|
|
1198
|
+
assert.deepEqual(result.found, ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']);
|
|
1199
|
+
assert.deepEqual(result.satisfied, ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']);
|
|
1200
|
+
assert.equal(result.allSatisfied, true);
|
|
1201
|
+
});
|
|
1202
|
+
|
|
1203
|
+
// --- Dimensions fallback from state JSON ---
|
|
1204
|
+
|
|
1205
|
+
test('buildStageMetrics uses reviewerADimensionsFallback when report has no DIMENSIONS', () => {
|
|
1206
|
+
const metrics = buildStageMetrics({
|
|
1207
|
+
stageCriteria: {
|
|
1208
|
+
requiredProducerSections: ['SUMMARY'],
|
|
1209
|
+
requiredReviewerSections: ['VERDICT'],
|
|
1210
|
+
scoringDimensions: ['correctness', 'scope_control'],
|
|
1211
|
+
dimensionThreshold: 3,
|
|
1212
|
+
},
|
|
1213
|
+
producer: 'SUMMARY:\nDone',
|
|
1214
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
1215
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
1216
|
+
reviewerADimensionsFallback: { correctness: 4, scope_control: 5 },
|
|
1217
|
+
reviewerBDimensionsFallback: { correctness: 5, scope_control: 4 },
|
|
1218
|
+
});
|
|
1219
|
+
assert.deepEqual(metrics.reviewerADimensions, { correctness: 4, scope_control: 5 });
|
|
1220
|
+
assert.deepEqual(metrics.reviewerBDimensions, { correctness: 5, scope_control: 4 });
|
|
1221
|
+
assert.equal(metrics.dimensionFailures.length, 0);
|
|
1222
|
+
});
|
|
1223
|
+
|
|
1224
|
+
test('decideStage advances with dimensions from fallback', () => {
|
|
1225
|
+
const result = decideStage({
|
|
1226
|
+
stageCriteria: {
|
|
1227
|
+
requiredApprovals: 2,
|
|
1228
|
+
requiredProducerSections: ['SUMMARY'],
|
|
1229
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
1230
|
+
scoringDimensions: ['correctness'],
|
|
1231
|
+
dimensionThreshold: 3,
|
|
1232
|
+
},
|
|
1233
|
+
producer: 'SUMMARY:\nDone',
|
|
1234
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
1235
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.',
|
|
1236
|
+
currentRound: 1,
|
|
1237
|
+
maxRoundsPerStage: 3,
|
|
1238
|
+
reviewerADimensionsFallback: { correctness: 4 },
|
|
1239
|
+
reviewerBDimensionsFallback: { correctness: 5 },
|
|
1240
|
+
skipContractValidation: true,
|
|
1241
|
+
});
|
|
1242
|
+
assert.equal(result.outcome, 'advance');
|
|
1243
|
+
});
|
|
1244
|
+
|
|
1245
|
+
test('decideStage prefers report DIMENSIONS over fallback for quality downgrade', () => {
|
|
1246
|
+
const result = decideStage({
|
|
1247
|
+
stageCriteria: {
|
|
1248
|
+
requiredApprovals: 2,
|
|
1249
|
+
requiredProducerSections: ['SUMMARY'],
|
|
1250
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS'],
|
|
1251
|
+
scoringDimensions: ['correctness'],
|
|
1252
|
+
dimensionThreshold: 3,
|
|
1253
|
+
},
|
|
1254
|
+
producer: 'SUMMARY:\nDone\nCHANGES:\nNone\nEVIDENCE:\nFound\nCODE_EVIDENCE:\nfiles_checked: a.ts\nKEY_EVENTS:\nEvent1\nHYPOTHESIS_MATRIX:\nH1: SUPPORTED\nCHECKS: all=ok\nOPEN_RISKS:\nNone',
|
|
1255
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nFINDINGS:\nGood\nCODE_EVIDENCE:\nfiles_verified: a.ts\nHYPOTHESIS_MATRIX:\nH1: SUPPORTED\nNEXT_FOCUS:\nNone\nCHECKS: all=ok\nDIMENSIONS: correctness=2',
|
|
1256
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nFINDINGS:\nGood\nCODE_EVIDENCE:\nfiles_verified: a.ts\nHYPOTHESIS_MATRIX:\nH1: SUPPORTED\nNEXT_FOCUS:\nNone\nCHECKS: all=ok\nDIMENSIONS: correctness=5',
|
|
1257
|
+
currentRound: 1,
|
|
1258
|
+
maxRoundsPerStage: 3,
|
|
1259
|
+
reviewerADimensionsFallback: { correctness: 5 },
|
|
1260
|
+
reviewerBDimensionsFallback: { correctness: 5 },
|
|
1261
|
+
});
|
|
1262
|
+
// Report says correctness=2 (below threshold), fallback says 5 — report should win.
|
|
1263
|
+
// Dimension failures no longer block advance but they still affect output quality.
|
|
1264
|
+
assert.equal(result.outcome, 'advance');
|
|
1265
|
+
assert.ok(result.metrics.dimensionFailures.length > 0, 'dimension failures from report should be recorded');
|
|
1266
|
+
assert.equal(result.metrics.reviewerADimensions.correctness, 2, 'report value should win over fallback');
|
|
1267
|
+
});
|
|
1268
|
+
|
|
1269
|
+
// --- Round 3 replay: 3 APPROVE + 6/6 contract + Q1-Q5 should advance ---
|
|
1270
|
+
|
|
1271
|
+
test('decideStage advances when all 3 reviewers approve, contract done, Q1-Q5 satisfied, dimensions from **DIMENSIONS**', () => {
|
|
1272
|
+
const result = decideStage({
|
|
1273
|
+
stageCriteria: {
|
|
1274
|
+
requiredApprovals: 3,
|
|
1275
|
+
requiredProducerSections: ['SUMMARY', 'ARCHITECTURE_DECISION', 'INTERFACE_DESIGN', 'CHECKS'],
|
|
1276
|
+
requiredReviewerSections: ['VERDICT', 'BLOCKERS', 'FINDINGS', 'CHECKS'],
|
|
1277
|
+
requiredGlobalReviewerSections: ['VERDICT', 'MACRO_ANSWERS'],
|
|
1278
|
+
globalReviewerRequired: true,
|
|
1279
|
+
globalReviewerMustAnswer: ['Q1', 'Q2', 'Q3', 'Q4', 'Q5'],
|
|
1280
|
+
scoringDimensions: ['decision_quality', 'openclaw_verification_completeness', 'interface_soundness', 'extensibility'],
|
|
1281
|
+
dimensionThreshold: 3,
|
|
1282
|
+
requiredDeliverables: ['architecture_decision', 'openclaw_cross_repo_verification', 'helper_interface_draft', 'shadow_run_plan', 'runtime_direct_subagent_ended_verified', 'surface_degrade_policy'],
|
|
1283
|
+
},
|
|
1284
|
+
producer: 'SUMMARY:\nDone\nARCHITECTURE_DECISION:\nDecided\nINTERFACE_DESIGN:\nDesigned\nCHECKS: ok\nCONTRACT:\n- architecture_decision status: DONE\n- openclaw_cross_repo_verification status: DONE\n- helper_interface_draft status: DONE\n- shadow_run_plan status: DONE\n- runtime_direct_subagent_ended_verified status: DONE\n- surface_degrade_policy status: DONE',
|
|
1285
|
+
reviewerA: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nFINDINGS:\n- Good\nCHECKS: ok\n**DIMENSIONS**: decision_quality=4; openclaw_verification_completeness=4; interface_soundness=5; extensibility=4',
|
|
1286
|
+
reviewerB: 'VERDICT: APPROVE\nBLOCKERS:\n- None.\nFINDINGS:\n- Good\nCHECKS: ok\n**DIMENSIONS**: decision_quality=4; openclaw_verification_completeness=4; interface_soundness=5; extensibility=4',
|
|
1287
|
+
globalReviewer: [
|
|
1288
|
+
'VERDICT: APPROVE',
|
|
1289
|
+
'## MACRO_ANSWERS',
|
|
1290
|
+
'',
|
|
1291
|
+
'### Q1: Is migration architecturally sound?',
|
|
1292
|
+
'**Yes.** Single transport model verified.',
|
|
1293
|
+
'',
|
|
1294
|
+
'### Q2: Are assumptions verified?',
|
|
1295
|
+
'**Yes.** Cross-repo verification done.',
|
|
1296
|
+
'',
|
|
1297
|
+
'### Q3: Are sidecar boundaries explicit?',
|
|
1298
|
+
'**Yes.** Four boundaries enforced.',
|
|
1299
|
+
'',
|
|
1300
|
+
'### Q4: Is business flow closed?',
|
|
1301
|
+
'**Yes.** State transitions are complete:',
|
|
1302
|
+
'- pending -> active -> completed',
|
|
1303
|
+
'',
|
|
1304
|
+
'### Q5: Does this serve the end goal?',
|
|
1305
|
+
'**Yes.** End goal achieved.',
|
|
1306
|
+
'',
|
|
1307
|
+
'---',
|
|
1308
|
+
'',
|
|
1309
|
+
'## BLOCKERS',
|
|
1310
|
+
'',
|
|
1311
|
+
'**None.**',
|
|
1312
|
+
].join('\n'),
|
|
1313
|
+
currentRound: 3,
|
|
1314
|
+
maxRoundsPerStage: 3,
|
|
1315
|
+
skipContractValidation: true,
|
|
1316
|
+
});
|
|
1317
|
+
assert.equal(result.outcome, 'advance');
|
|
1318
|
+
assert.equal(result.metrics.approvalCount, 3);
|
|
1319
|
+
assert.equal(result.metrics.dimensionFailures.length, 0);
|
|
1320
|
+
assert.equal(result.metrics.contractCheck.allDone, true);
|
|
1321
|
+
});
|