maestro-flow 0.4.20 → 0.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/maestro-ralph-execute/SKILL.md +2 -1
- package/.agents/skills/maestro-swarm-workflow/SKILL.md +27 -19
- package/.agents/skills/maestro-universal-workflow/SKILL.md +563 -0
- package/.agents/skills/team-adversarial-swarm/SKILL.md +235 -0
- package/.agents/skills/team-adversarial-swarm/scripts/aco.py +473 -0
- package/.agents/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
- package/.agents/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
- package/.agents/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
- package/.agents/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
- package/.agents/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
- package/.agents/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
- package/.agents/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
- package/.agents/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
- package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
- package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
- package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
- package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
- package/.agy/skills/maestro-ralph-execute/SKILL.md +2 -1
- package/.agy/skills/maestro-swarm-workflow/SKILL.md +27 -19
- package/.agy/skills/maestro-universal-workflow/SKILL.md +560 -0
- package/.agy/skills/team-adversarial-swarm/SKILL.md +244 -0
- package/.agy/skills/team-adversarial-swarm/scripts/aco.py +473 -0
- package/.agy/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
- package/.agy/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
- package/.agy/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
- package/.agy/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
- package/.agy/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
- package/.agy/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
- package/.agy/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
- package/.agy/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
- package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
- package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
- package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
- package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
- package/.claude/commands/maestro-ralph-execute.md +2 -1
- package/.claude/commands/maestro-swarm-workflow.md +27 -19
- package/.claude/commands/maestro-universal-workflow.md +561 -0
- package/.claude/skills/team-adversarial-swarm/SKILL.md +233 -0
- package/.claude/skills/team-adversarial-swarm/scripts/aco.py +473 -0
- package/.claude/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
- package/.claude/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
- package/.claude/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
- package/.claude/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
- package/.claude/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
- package/.claude/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
- package/.claude/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
- package/.claude/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
- package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
- package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
- package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
- package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
- package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/search.js +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/search.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.d.ts +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js +5 -5
- package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js +3 -3
- package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js.map +1 -1
- package/dashboard/dist-server/src/graph/types.d.ts +111 -0
- package/dashboard/dist-server/src/graph/types.js +2 -0
- package/dashboard/dist-server/src/graph/types.js.map +1 -0
- package/dist/src/commands/install-backend.d.ts +0 -7
- package/dist/src/commands/install-backend.d.ts.map +1 -1
- package/dist/src/commands/install-backend.js +0 -14
- package/dist/src/commands/install-backend.js.map +1 -1
- package/dist/src/commands/install.d.ts.map +1 -1
- package/dist/src/commands/install.js +0 -18
- package/dist/src/commands/install.js.map +1 -1
- package/dist/src/commands/kg.d.ts +2 -2
- package/dist/src/commands/kg.d.ts.map +1 -1
- package/dist/src/commands/kg.js +150 -179
- package/dist/src/commands/kg.js.map +1 -1
- package/dist/src/graph/analyzers/fs-analyzer.d.ts +10 -0
- package/dist/src/graph/analyzers/fs-analyzer.d.ts.map +1 -0
- package/dist/src/graph/analyzers/fs-analyzer.js +959 -0
- package/dist/src/graph/analyzers/fs-analyzer.js.map +1 -0
- package/dist/src/graph/index.d.ts +6 -0
- package/dist/src/graph/index.d.ts.map +1 -0
- package/dist/src/graph/index.js +6 -0
- package/dist/src/graph/index.js.map +1 -0
- package/dist/src/graph/loader.d.ts +3 -0
- package/dist/src/graph/loader.d.ts.map +1 -0
- package/dist/src/graph/loader.js +12 -0
- package/dist/src/graph/loader.js.map +1 -0
- package/dist/src/graph/merger.d.ts +56 -0
- package/dist/src/graph/merger.d.ts.map +1 -0
- package/dist/src/graph/merger.js +896 -0
- package/dist/src/graph/merger.js.map +1 -0
- package/dist/src/graph/query.d.ts +7 -0
- package/dist/src/graph/query.d.ts.map +1 -0
- package/dist/src/graph/query.js +126 -0
- package/dist/src/graph/query.js.map +1 -0
- package/dist/src/graph/types.d.ts +112 -0
- package/dist/src/graph/types.d.ts.map +1 -0
- package/dist/src/graph/types.js +2 -0
- package/dist/src/graph/types.js.map +1 -0
- package/dist/src/i18n/locales/en.d.ts.map +1 -1
- package/dist/src/i18n/locales/en.js +0 -10
- package/dist/src/i18n/locales/en.js.map +1 -1
- package/dist/src/i18n/locales/zh.d.ts.map +1 -1
- package/dist/src/i18n/locales/zh.js +0 -10
- package/dist/src/i18n/locales/zh.js.map +1 -1
- package/dist/src/i18n/types.d.ts +0 -9
- package/dist/src/i18n/types.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallConfirm.d.ts +0 -1
- package/dist/src/tui/install-ui/InstallConfirm.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallConfirm.js +1 -1
- package/dist/src/tui/install-ui/InstallConfirm.js.map +1 -1
- package/dist/src/tui/install-ui/InstallExecution.d.ts +0 -1
- package/dist/src/tui/install-ui/InstallExecution.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallExecution.js +0 -22
- package/dist/src/tui/install-ui/InstallExecution.js.map +1 -1
- package/dist/src/tui/install-ui/InstallFlow.d.ts +1 -1
- package/dist/src/tui/install-ui/InstallFlow.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallFlow.js +5 -23
- package/dist/src/tui/install-ui/InstallFlow.js.map +1 -1
- package/dist/src/tui/install-ui/InstallHub.d.ts +0 -2
- package/dist/src/tui/install-ui/InstallHub.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallHub.js +0 -6
- package/dist/src/tui/install-ui/InstallHub.js.map +1 -1
- package/dist/src/tui/install-ui/InstallResult.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallResult.js +1 -1
- package/dist/src/tui/install-ui/InstallResult.js.map +1 -1
- package/dist/src/utils/update-notices.js +12 -0
- package/dist/src/utils/update-notices.js.map +1 -1
- package/package.json +1 -1
- package/workflows/swarm/wf-analyze.js +195 -34
- package/workflows/swarm/wf-brainstorm.js +225 -53
- package/workflows/swarm/wf-execute.js +199 -23
- package/workflows/swarm/wf-grill.js +181 -20
- package/workflows/swarm/wf-milestone-audit.js +178 -29
- package/workflows/swarm/wf-plan.js +288 -53
- package/workflows/swarm/wf-review.js +195 -80
- package/workflows/swarm/wf-verify.js +125 -28
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
export const meta = {
|
|
2
2
|
name: 'wf-review',
|
|
3
|
-
description: 'Multi-dimension
|
|
4
|
-
whenToUse: 'Accelerate quality-review with parallel
|
|
3
|
+
description: 'Multi-dimension code review with 3-vote adversarial verification and multi-perspective verdict',
|
|
4
|
+
whenToUse: 'Accelerate quality-review with parallel scanning + 3-vote finding verification + 3-perspective verdict arbitration',
|
|
5
5
|
phases: [
|
|
6
6
|
{ title: 'Scan', detail: 'Parallel dimension scanning via workflow-reviewer' },
|
|
7
|
-
{ title: 'Verify', detail: '
|
|
8
|
-
{ title: 'Report', detail: '
|
|
7
|
+
{ title: 'Verify', detail: '3-vote adversarial verification per critical finding (majority wins)' },
|
|
8
|
+
{ title: 'Report', detail: '3-perspective reporters (strict/lenient/objective) + arbitrated verdict' },
|
|
9
9
|
],
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
-
// Aligned with workflow-reviewer.md dimension definitions
|
|
13
12
|
const REVIEW_DIMENSIONS = [
|
|
14
13
|
{ key: 'correctness', prefix: 'COR', prompt: 'Dimension: correctness. Focus: Logic errors, off-by-one, null handling, missing error propagation, type mismatches, unhandled edge cases, broken invariants, incorrect conditions.' },
|
|
15
14
|
{ key: 'security', prefix: 'SEC', prompt: 'Dimension: security. Focus: Injection vectors (SQL/command/XSS), auth bypass, hardcoded secrets, missing input validation, data exposure in logs/errors, SSRF, IDOR, insecure crypto.' },
|
|
@@ -57,6 +56,19 @@ const VERDICT_SCHEMA = {
|
|
|
57
56
|
required: ['finding_id', 'is_real', 'confidence', 'reasoning'],
|
|
58
57
|
}
|
|
59
58
|
|
|
59
|
+
const PERSPECTIVE_REPORT_SCHEMA = {
|
|
60
|
+
type: 'object',
|
|
61
|
+
properties: {
|
|
62
|
+
perspective: { type: 'string' },
|
|
63
|
+
verdict: { type: 'string', enum: ['APPROVE', 'REQUEST_CHANGES', 'BLOCK'] },
|
|
64
|
+
overall_quality: { type: 'number', minimum: 1, maximum: 5 },
|
|
65
|
+
rationale: { type: 'string' },
|
|
66
|
+
blocking_issues: { type: 'array', items: { type: 'object', properties: { id: { type: 'string' }, title: { type: 'string' }, file: { type: 'string' }, severity: { type: 'string' } }, required: ['id', 'title'] } },
|
|
67
|
+
confidence: { type: 'number', minimum: 0, maximum: 100 },
|
|
68
|
+
},
|
|
69
|
+
required: ['perspective', 'verdict', 'overall_quality', 'rationale', 'confidence'],
|
|
70
|
+
}
|
|
71
|
+
|
|
60
72
|
const REPORT_SCHEMA = {
|
|
61
73
|
type: 'object',
|
|
62
74
|
properties: {
|
|
@@ -76,9 +88,10 @@ const REPORT_SCHEMA = {
|
|
|
76
88
|
},
|
|
77
89
|
},
|
|
78
90
|
blocking_issues: { type: 'array', items: { type: 'object', properties: { id: { type: 'string' }, title: { type: 'string' }, file: { type: 'string' }, severity: { type: 'string' }, suggestion: { type: 'string' } }, required: ['id', 'title', 'file', 'severity'] } },
|
|
91
|
+
adversarial_verdict: { type: 'object', properties: { strict: { type: 'string' }, lenient: { type: 'string' }, objective: { type: 'string' }, decisive_factor: { type: 'string' } }, required: ['strict', 'lenient', 'objective', 'decisive_factor'] },
|
|
79
92
|
summary: { type: 'string' },
|
|
80
93
|
},
|
|
81
|
-
required: ['verdict', 'overall_quality', 'dimension_summary', 'blocking_issues', 'summary'],
|
|
94
|
+
required: ['verdict', 'overall_quality', 'dimension_summary', 'blocking_issues', 'adversarial_verdict', 'summary'],
|
|
82
95
|
}
|
|
83
96
|
|
|
84
97
|
const target = args?.target || 'changed files on current branch'
|
|
@@ -89,7 +102,7 @@ const dimensions = args?.dimensions
|
|
|
89
102
|
? REVIEW_DIMENSIONS.filter(d => args.dimensions.includes(d.key))
|
|
90
103
|
: (tier === 'quick' ? REVIEW_DIMENSIONS.slice(0, 3) : REVIEW_DIMENSIONS)
|
|
91
104
|
|
|
92
|
-
// Phase 1: Parallel dimension scanning
|
|
105
|
+
// Phase 1: Parallel dimension scanning
|
|
93
106
|
phase('Scan')
|
|
94
107
|
log(`Scanning ${dimensions.length} dimensions in parallel via workflow-reviewer...`)
|
|
95
108
|
|
|
@@ -121,106 +134,208 @@ const criticalHigh = allFindings.filter(f => f.severity === 'critical' || f.seve
|
|
|
121
134
|
|
|
122
135
|
log(`Found ${allFindings.length} total (${criticalHigh.length} critical/high across ${validScans.length} dimensions)`)
|
|
123
136
|
|
|
124
|
-
// Phase 2:
|
|
137
|
+
// Phase 2: 3-vote adversarial verification per critical/high finding
|
|
125
138
|
phase('Verify')
|
|
126
139
|
|
|
140
|
+
const confirmedFindings = []
|
|
141
|
+
const falsePositives = []
|
|
142
|
+
|
|
127
143
|
if (criticalHigh.length > 0) {
|
|
128
|
-
log(`
|
|
144
|
+
log(`3-vote adversarial verification of ${criticalHigh.length} critical/high findings...`)
|
|
129
145
|
|
|
130
146
|
const verified = await pipeline(
|
|
131
147
|
criticalHigh,
|
|
132
|
-
(finding) =>
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
- Less severe than claimed (downgrade severity)
|
|
136
|
-
- Not applicable in this context
|
|
148
|
+
(finding) => parallel([
|
|
149
|
+
() => agent(
|
|
150
|
+
`VOTE 1 — PROSECUTOR: Argue this finding IS REAL and the severity is justified.
|
|
137
151
|
|
|
138
152
|
Finding: [${finding.severity}] ${finding.id}: ${finding.title}
|
|
139
153
|
File: ${finding.file}${finding.line ? ':' + finding.line : ''}
|
|
140
154
|
Description: ${finding.description}
|
|
141
155
|
Evidence: ${finding.evidence || 'none provided'}
|
|
142
156
|
|
|
143
|
-
Read the actual source code
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
157
|
+
Read the actual source code. Build the case that this is a genuine issue:
|
|
158
|
+
- Show the exact code path that triggers the bug/vulnerability
|
|
159
|
+
- Demonstrate the impact with a concrete scenario
|
|
160
|
+
- Argue why the severity rating is correct or should be higher
|
|
147
161
|
|
|
148
|
-
Default to is_real=
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
162
|
+
Default to is_real=true. Only say false if the code clearly doesn't have this issue.`,
|
|
163
|
+
{ label: `vote1:${finding.id}`, phase: 'Verify', schema: VERDICT_SCHEMA }
|
|
164
|
+
),
|
|
165
|
+
() => agent(
|
|
166
|
+
`VOTE 2 — DEFENSE: Argue this finding is a FALSE POSITIVE or overstated.
|
|
153
167
|
|
|
154
|
-
|
|
155
|
-
|
|
168
|
+
Finding: [${finding.severity}] ${finding.id}: ${finding.title}
|
|
169
|
+
File: ${finding.file}${finding.line ? ':' + finding.line : ''}
|
|
170
|
+
Description: ${finding.description}
|
|
171
|
+
Evidence: ${finding.evidence || 'none provided'}
|
|
172
|
+
|
|
173
|
+
Read the actual source code. Build the case AGAINST this finding:
|
|
174
|
+
- Show handling elsewhere that mitigates the issue
|
|
175
|
+
- Demonstrate why the severity is overstated
|
|
176
|
+
- Find framework guarantees or type safety that prevents the claimed scenario
|
|
177
|
+
|
|
178
|
+
Default to is_real=false. Only confirm if you genuinely cannot find any defense.`,
|
|
179
|
+
{ label: `vote2:${finding.id}`, phase: 'Verify', schema: VERDICT_SCHEMA }
|
|
180
|
+
),
|
|
181
|
+
() => agent(
|
|
182
|
+
`VOTE 3 — INDEPENDENT JUDGE: Evaluate this finding objectively, without bias.
|
|
183
|
+
|
|
184
|
+
Finding: [${finding.severity}] ${finding.id}: ${finding.title}
|
|
185
|
+
File: ${finding.file}${finding.line ? ':' + finding.line : ''}
|
|
186
|
+
Description: ${finding.description}
|
|
187
|
+
Evidence: ${finding.evidence || 'none provided'}
|
|
188
|
+
|
|
189
|
+
Read the actual source code. Make an independent, evidence-based assessment:
|
|
190
|
+
- Verify the claimed behavior exists in the code
|
|
191
|
+
- Check if there are mitigations the reporter missed
|
|
192
|
+
- Assess the actual severity based on real-world impact
|
|
193
|
+
|
|
194
|
+
No default bias. Judge purely on evidence. Confidence should reflect evidence strength.`,
|
|
195
|
+
{ label: `vote3:${finding.id}`, phase: 'Verify', schema: VERDICT_SCHEMA }
|
|
196
|
+
),
|
|
197
|
+
])
|
|
198
|
+
)
|
|
156
199
|
|
|
157
|
-
verified.filter(Boolean).forEach((
|
|
200
|
+
verified.filter(Boolean).forEach((votes, i) => {
|
|
158
201
|
const finding = criticalHigh[i]
|
|
159
|
-
|
|
160
|
-
|
|
202
|
+
const validVotes = votes.filter(Boolean)
|
|
203
|
+
const realVotes = validVotes.filter(v => v.is_real)
|
|
204
|
+
const isConfirmed = realVotes.length >= 2
|
|
205
|
+
|
|
206
|
+
if (isConfirmed) {
|
|
207
|
+
const avgConfidence = Math.round(realVotes.reduce((s, v) => s + v.confidence, 0) / realVotes.length)
|
|
208
|
+
const maxSeverity = validVotes.reduce((max, v) => {
|
|
209
|
+
const order = ['false-positive', 'low', 'medium', 'high', 'critical']
|
|
210
|
+
return order.indexOf(v.adjusted_severity || finding.severity) > order.indexOf(max) ? (v.adjusted_severity || finding.severity) : max
|
|
211
|
+
}, 'low')
|
|
212
|
+
confirmedFindings.push({
|
|
213
|
+
...finding,
|
|
214
|
+
vote_count: `${realVotes.length}/${validVotes.length}`,
|
|
215
|
+
avg_confidence: avgConfidence,
|
|
216
|
+
adjusted_severity: maxSeverity,
|
|
217
|
+
verdicts: validVotes,
|
|
218
|
+
})
|
|
161
219
|
} else {
|
|
162
|
-
falsePositives.push({
|
|
220
|
+
falsePositives.push({
|
|
221
|
+
...finding,
|
|
222
|
+
vote_count: `${realVotes.length}/${validVotes.length}`,
|
|
223
|
+
verdicts: validVotes,
|
|
224
|
+
})
|
|
163
225
|
}
|
|
164
226
|
})
|
|
165
227
|
|
|
166
|
-
|
|
228
|
+
log(`Verified: ${confirmedFindings.length} confirmed, ${falsePositives.length} false-positives (3-vote majority)`)
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const lowMedFindings = allFindings.filter(f => f.severity === 'medium' || f.severity === 'low')
|
|
232
|
+
|
|
233
|
+
// Phase 3: 3-perspective report generation + arbitrated verdict
|
|
234
|
+
phase('Report')
|
|
235
|
+
|
|
236
|
+
const findingsDigest = `Confirmed findings (${confirmedFindings.length}, adversarially verified by 3-vote majority):
|
|
237
|
+
${confirmedFindings.map(f => `- [${f.adjusted_severity}] ${f.id}: ${f.title} @ ${f.file}:${f.line || '?'} (votes: ${f.vote_count}, confidence: ${f.avg_confidence}%)`).join('\n') || 'None'}
|
|
238
|
+
|
|
239
|
+
False positives filtered (${falsePositives.length}):
|
|
240
|
+
${falsePositives.map(f => `- ${f.id}: ${f.title} (votes: ${f.vote_count})`).join('\n') || 'None'}
|
|
167
241
|
|
|
168
|
-
|
|
169
|
-
|
|
242
|
+
Low/medium findings (${lowMedFindings.length}, not individually verified):
|
|
243
|
+
${lowMedFindings.map(f => `- [${f.severity}] ${f.id}: ${f.title} @ ${f.file}`).join('\n') || 'None'}`
|
|
170
244
|
|
|
171
|
-
|
|
172
|
-
`Generate a consolidated code review report.
|
|
245
|
+
log('Launching 3-perspective reporters (strict / lenient / objective)...')
|
|
173
246
|
|
|
174
|
-
|
|
175
|
-
|
|
247
|
+
const perspectives = await parallel([
|
|
248
|
+
() => agent(
|
|
249
|
+
`You are the STRICT REVIEWER. Apply the highest quality bar.
|
|
176
250
|
|
|
177
|
-
|
|
178
|
-
${falsePositives.map(f => `- ${f.id}: ${f.title} — ${f.verdict.reasoning}`).join('\n') || ''}
|
|
251
|
+
${findingsDigest}
|
|
179
252
|
|
|
180
|
-
|
|
181
|
-
|
|
253
|
+
Your philosophy: ANY confirmed critical/high finding warrants BLOCK. Any confirmed finding warrants REQUEST_CHANGES. Only APPROVE if zero findings exist.
|
|
254
|
+
- Rate quality conservatively
|
|
255
|
+
- List ALL confirmed findings as blocking
|
|
256
|
+
- Consider unverified medium findings as potential risks
|
|
182
257
|
|
|
183
|
-
|
|
258
|
+
Be strict but fair. Provide your verdict and rationale.`,
|
|
259
|
+
{ label: 'report:strict', phase: 'Report', schema: PERSPECTIVE_REPORT_SCHEMA }
|
|
260
|
+
),
|
|
261
|
+
() => agent(
|
|
262
|
+
`You are the LENIENT REVIEWER. Apply a practical, ship-focused bar.
|
|
263
|
+
|
|
264
|
+
${findingsDigest}
|
|
265
|
+
|
|
266
|
+
Your philosophy: Only BLOCK for confirmed critical findings with >80% confidence. REQUEST_CHANGES for confirmed high findings. APPROVE for everything else — medium/low findings can be addressed in follow-ups.
|
|
267
|
+
- Rate quality generously (good code is the norm)
|
|
268
|
+
- Only list truly blocking issues
|
|
269
|
+
- Unverified medium/low findings are informational
|
|
270
|
+
|
|
271
|
+
Be practical but honest. Provide your verdict and rationale.`,
|
|
272
|
+
{ label: 'report:lenient', phase: 'Report', schema: PERSPECTIVE_REPORT_SCHEMA }
|
|
273
|
+
),
|
|
274
|
+
() => agent(
|
|
275
|
+
`You are the OBJECTIVE REVIEWER. Apply evidence-based judgment.
|
|
276
|
+
|
|
277
|
+
${findingsDigest}
|
|
278
|
+
|
|
279
|
+
Your philosophy: Follow the evidence. No default bias.
|
|
280
|
+
- BLOCK: confirmed critical findings exist
|
|
281
|
+
- REQUEST_CHANGES: confirmed high findings but no critical
|
|
184
282
|
- APPROVE: no confirmed critical/high findings
|
|
185
|
-
-
|
|
186
|
-
-
|
|
283
|
+
- Quality rating based on finding density and severity distribution
|
|
284
|
+
- Weight findings by vote confidence
|
|
187
285
|
|
|
188
|
-
|
|
189
|
-
{ label: 'report', phase: 'Report', schema:
|
|
190
|
-
)
|
|
286
|
+
Be analytical and evidence-driven. Provide your verdict and rationale.`,
|
|
287
|
+
{ label: 'report:objective', phase: 'Report', schema: PERSPECTIVE_REPORT_SCHEMA }
|
|
288
|
+
),
|
|
289
|
+
])
|
|
191
290
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
291
|
+
const validPerspectives = perspectives.filter(Boolean)
|
|
292
|
+
const verdictCounts = { APPROVE: 0, REQUEST_CHANGES: 0, BLOCK: 0 }
|
|
293
|
+
validPerspectives.forEach(p => { verdictCounts[p.verdict] = (verdictCounts[p.verdict] || 0) + 1 })
|
|
294
|
+
|
|
295
|
+
const perspectiveDigest = validPerspectives.map(p =>
|
|
296
|
+
`${p.perspective}: ${p.verdict} (quality: ${p.overall_quality}/5, confidence: ${p.confidence}%)\n ${p.rationale}`
|
|
297
|
+
).join('\n\n')
|
|
298
|
+
|
|
299
|
+
log(`Perspective votes: APPROVE=${verdictCounts.APPROVE} REQUEST_CHANGES=${verdictCounts.REQUEST_CHANGES} BLOCK=${verdictCounts.BLOCK}`)
|
|
300
|
+
log('Arbitrating final verdict...')
|
|
301
|
+
|
|
302
|
+
const report = await agent(
|
|
303
|
+
`Generate the final review report by arbitrating 3 reviewer perspectives.
|
|
304
|
+
|
|
305
|
+
=== 3 REVIEWER PERSPECTIVES ===
|
|
306
|
+
${perspectiveDigest}
|
|
307
|
+
|
|
308
|
+
Vote tally: APPROVE=${verdictCounts.APPROVE}, REQUEST_CHANGES=${verdictCounts.REQUEST_CHANGES}, BLOCK=${verdictCounts.BLOCK}
|
|
309
|
+
|
|
310
|
+
=== FINDING DATA ===
|
|
311
|
+
${findingsDigest}
|
|
312
|
+
|
|
313
|
+
ARBITRATE:
|
|
314
|
+
1. The final verdict follows MAJORITY VOTE among the 3 perspectives
|
|
315
|
+
2. Tie-break rule: if split 3 ways (1-1-1), go with the OBJECTIVE reviewer
|
|
316
|
+
3. If strict and objective agree → use their verdict regardless of lenient
|
|
317
|
+
4. Calculate overall_quality as weighted average (strict .25, lenient .25, objective .50)
|
|
318
|
+
5. Record adversarial_verdict with each perspective's vote and the decisive_factor
|
|
319
|
+
6. Compile dimension_summary from scan phase data
|
|
320
|
+
7. List blocking_issues = confirmed findings with adjusted_severity critical or high
|
|
321
|
+
8. Write summary including the adversarial deliberation outcome`,
|
|
322
|
+
{ label: 'arbitrate', phase: 'Report', schema: REPORT_SCHEMA }
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
return {
|
|
326
|
+
report: report,
|
|
327
|
+
confirmed: confirmedFindings,
|
|
328
|
+
false_positives: falsePositives,
|
|
329
|
+
low_findings: lowMedFindings,
|
|
330
|
+
perspectives: validPerspectives,
|
|
331
|
+
metadata: {
|
|
332
|
+
target: target,
|
|
333
|
+
dimensions_scanned: dimensions.length,
|
|
334
|
+
total_findings: allFindings.length,
|
|
335
|
+
verified_count: criticalHigh.length,
|
|
336
|
+
confirmed_count: confirmedFindings.length,
|
|
337
|
+
false_positive_count: falsePositives.length,
|
|
338
|
+
verdict_votes: verdictCounts,
|
|
339
|
+
verdict: report ? report.verdict : 'UNKNOWN',
|
|
340
|
+
},
|
|
226
341
|
}
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
export const meta = {
|
|
2
2
|
name: 'wf-verify',
|
|
3
|
-
description: 'Three-layer
|
|
4
|
-
whenToUse: 'Accelerate maestro-verify with parallel
|
|
3
|
+
description: 'Three-layer verification with prosecutor/defender/judge adversarial aggregation',
|
|
4
|
+
whenToUse: 'Accelerate maestro-verify with parallel layer checks + adversarial pass/fail determination',
|
|
5
5
|
phases: [
|
|
6
6
|
{ title: 'Check', detail: 'Parallel 3-layer verification + anti-pattern scan via workflow-verifier' },
|
|
7
|
-
{ title: '
|
|
7
|
+
{ title: 'Argue', detail: 'Prosecutor argues FAIL, Defender argues PASS — adversarial positions' },
|
|
8
|
+
{ title: 'Judge', detail: 'Judge resolves adversarial debate into final verdict' },
|
|
8
9
|
],
|
|
9
10
|
}
|
|
10
11
|
|
|
11
|
-
// Aligned with workflow-verifier.md: Layer 1 Existence, Layer 2 Substance, Layer 3 Connection
|
|
12
12
|
const LAYER_SCHEMA = {
|
|
13
13
|
type: 'object',
|
|
14
14
|
properties: {
|
|
@@ -78,11 +78,47 @@ const ANTIPATTERN_SCHEMA = {
|
|
|
78
78
|
required: ['clean', 'findings'],
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
+
const ARGUMENT_SCHEMA = {
|
|
82
|
+
type: 'object',
|
|
83
|
+
properties: {
|
|
84
|
+
role: { type: 'string', enum: ['prosecutor', 'defender'] },
|
|
85
|
+
stance: { type: 'string', enum: ['pass', 'fail'] },
|
|
86
|
+
argument: { type: 'string' },
|
|
87
|
+
key_points: {
|
|
88
|
+
type: 'array',
|
|
89
|
+
items: {
|
|
90
|
+
type: 'object',
|
|
91
|
+
properties: {
|
|
92
|
+
point: { type: 'string' },
|
|
93
|
+
evidence: { type: 'string' },
|
|
94
|
+
layer: { type: 'string' },
|
|
95
|
+
strength: { type: 'string', enum: ['strong', 'moderate', 'weak'] },
|
|
96
|
+
},
|
|
97
|
+
required: ['point', 'evidence', 'strength'],
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
concessions: { type: 'array', items: { type: 'string' } },
|
|
101
|
+
confidence: { type: 'number', minimum: 0, maximum: 100 },
|
|
102
|
+
},
|
|
103
|
+
required: ['role', 'stance', 'argument', 'key_points', 'confidence'],
|
|
104
|
+
}
|
|
105
|
+
|
|
81
106
|
const AGGREGATE_SCHEMA = {
|
|
82
107
|
type: 'object',
|
|
83
108
|
properties: {
|
|
84
109
|
status: { type: 'string', enum: ['pass', 'fail'] },
|
|
85
110
|
confidence: { type: 'number', minimum: 0, maximum: 100 },
|
|
111
|
+
adversarial_outcome: {
|
|
112
|
+
type: 'object',
|
|
113
|
+
properties: {
|
|
114
|
+
prosecutor_confidence: { type: 'number' },
|
|
115
|
+
defender_confidence: { type: 'number' },
|
|
116
|
+
decisive_factor: { type: 'string' },
|
|
117
|
+
prosecutor_concessions: { type: 'array', items: { type: 'string' } },
|
|
118
|
+
defender_concessions: { type: 'array', items: { type: 'string' } },
|
|
119
|
+
},
|
|
120
|
+
required: ['prosecutor_confidence', 'defender_confidence', 'decisive_factor'],
|
|
121
|
+
},
|
|
86
122
|
layers: {
|
|
87
123
|
type: 'array',
|
|
88
124
|
items: {
|
|
@@ -123,7 +159,7 @@ const AGGREGATE_SCHEMA = {
|
|
|
123
159
|
antipattern_blockers: { type: 'number' },
|
|
124
160
|
executive_summary: { type: 'string' },
|
|
125
161
|
},
|
|
126
|
-
required: ['status', 'confidence', 'layers', 'gaps', 'executive_summary'],
|
|
162
|
+
required: ['status', 'confidence', 'adversarial_outcome', 'layers', 'gaps', 'executive_summary'],
|
|
127
163
|
}
|
|
128
164
|
|
|
129
165
|
const goals = args?.goals || ''
|
|
@@ -138,7 +174,6 @@ const mustHaves = args?.must_haves || ''
|
|
|
138
174
|
phase('Check')
|
|
139
175
|
|
|
140
176
|
const checks = [
|
|
141
|
-
// Layer 1: Existence — verify all expected artifacts exist
|
|
142
177
|
() => agent(
|
|
143
178
|
`Layer 1 — EXISTENCE verification.
|
|
144
179
|
Goals: ${goals}
|
|
@@ -156,8 +191,6 @@ Verify all expected artifacts EXIST:
|
|
|
156
191
|
Set layer="existence" in output.`,
|
|
157
192
|
{ label: 'layer:existence', phase: 'Check', schema: LAYER_SCHEMA, agentType: 'workflow-verifier' }
|
|
158
193
|
),
|
|
159
|
-
|
|
160
|
-
// Layer 2: Substance — verify artifacts are non-trivial
|
|
161
194
|
() => agent(
|
|
162
195
|
`Layer 2 — SUBSTANCE verification.
|
|
163
196
|
Goals: ${goals}
|
|
@@ -174,8 +207,6 @@ Verify artifacts contain REAL SUBSTANCE (not stubs):
|
|
|
174
207
|
Set layer="substance" in output.`,
|
|
175
208
|
{ label: 'layer:substance', phase: 'Check', schema: LAYER_SCHEMA, agentType: 'workflow-verifier' }
|
|
176
209
|
),
|
|
177
|
-
|
|
178
|
-
// Layer 3: Connection — verify wiring
|
|
179
210
|
() => agent(
|
|
180
211
|
`Layer 3 — CONNECTION verification.
|
|
181
212
|
Goals: ${goals}
|
|
@@ -196,7 +227,6 @@ Set layer="connection" in output.`,
|
|
|
196
227
|
),
|
|
197
228
|
]
|
|
198
229
|
|
|
199
|
-
// Anti-pattern scan (unless skipped)
|
|
200
230
|
if (!skipAntipattern) {
|
|
201
231
|
checks.push(() => agent(
|
|
202
232
|
`Anti-pattern scan for modified files.
|
|
@@ -218,7 +248,6 @@ Severity: "blocker" for stubs/not-implemented/hardcoded-secrets, "warning" for T
|
|
|
218
248
|
))
|
|
219
249
|
}
|
|
220
250
|
|
|
221
|
-
// Per-task convergence validation (if task files provided)
|
|
222
251
|
if (taskFiles.length > 0) {
|
|
223
252
|
checks.push(...taskFiles.map((taskFile, idx) => () => agent(
|
|
224
253
|
`Per-task convergence validation for: ${taskFile}
|
|
@@ -242,9 +271,6 @@ const layers = validResults.filter(r => r.layer)
|
|
|
242
271
|
const antipatterns = validResults.find(r => r.clean !== undefined) || { clean: true, findings: [] }
|
|
243
272
|
const convergenceResults = validResults.filter(r => r.task_id)
|
|
244
273
|
|
|
245
|
-
// Phase 2: Aggregate
|
|
246
|
-
phase('Aggregate')
|
|
247
|
-
|
|
248
274
|
const layerDigest = layers.map(l => {
|
|
249
275
|
const passCount = l.checks.filter(c => c.status === 'pass').length
|
|
250
276
|
const failCount = l.checks.filter(c => c.status === 'fail').length
|
|
@@ -259,29 +285,98 @@ const antipatternDigest = antipatterns.clean
|
|
|
259
285
|
? 'Anti-pattern scan: CLEAN'
|
|
260
286
|
: `Anti-pattern scan: ${antipatterns.findings.length} issues (${antipatterns.findings.filter(f => f.severity === 'blocker').length} blockers)\n${antipatterns.findings.map(f => ` [${f.severity}] ${f.type} @ ${f.file}:${f.line || '?'}: ${f.content}`).join('\n')}`
|
|
261
287
|
|
|
262
|
-
const
|
|
263
|
-
`Aggregate all verification results into a final assessment.
|
|
288
|
+
const evidencePackage = `${layerDigest}\n\n${convergenceDigest}\n\n${antipatternDigest}`
|
|
264
289
|
|
|
265
|
-
|
|
290
|
+
// Phase 2: Adversarial Arguments — Prosecutor vs Defender
|
|
291
|
+
phase('Argue')
|
|
292
|
+
log('Launching adversarial debate: Prosecutor (FAIL) vs Defender (PASS)...')
|
|
266
293
|
|
|
267
|
-
|
|
294
|
+
const arguments_ = await parallel([
|
|
295
|
+
() => agent(
|
|
296
|
+
`You are the PROSECUTOR. Argue that this verification should FAIL.
|
|
297
|
+
|
|
298
|
+
=== VERIFICATION EVIDENCE ===
|
|
299
|
+
${evidencePackage}
|
|
300
|
+
|
|
301
|
+
Build the STRONGEST case for FAILURE:
|
|
302
|
+
1. Magnify every failed check — explain the downstream consequences
|
|
303
|
+
2. Connect antipattern findings to substance/connection failures
|
|
304
|
+
3. Challenge "pass" checks — are they truly passing or just not checking hard enough?
|
|
305
|
+
4. Highlight convergence gaps as unfinished work
|
|
306
|
+
5. Argue that partial passes are effectively failures
|
|
307
|
+
|
|
308
|
+
Your job is to convince the Judge that quality is insufficient.
|
|
309
|
+
Concede points where the evidence genuinely supports a pass — admitted concessions strengthen your credibility.
|
|
310
|
+
Confidence reflects how strong your FAIL case actually is.`,
|
|
311
|
+
{ label: 'prosecutor', phase: 'Argue', schema: ARGUMENT_SCHEMA }
|
|
312
|
+
),
|
|
313
|
+
() => agent(
|
|
314
|
+
`You are the DEFENDER. Argue that this verification should PASS.
|
|
315
|
+
|
|
316
|
+
=== VERIFICATION EVIDENCE ===
|
|
317
|
+
${evidencePackage}
|
|
318
|
+
|
|
319
|
+
Build the STRONGEST case for PASSING:
|
|
320
|
+
1. Emphasize passed checks and their coverage
|
|
321
|
+
2. Contextualize failures — are they truly blocking or just minor gaps?
|
|
322
|
+
3. Argue that antipattern warnings don't indicate real quality issues
|
|
323
|
+
4. Show that the core goals are met even if some checks are partial
|
|
324
|
+
5. Demonstrate that failed checks have low real-world impact
|
|
325
|
+
|
|
326
|
+
Your job is to convince the Judge that quality is sufficient.
|
|
327
|
+
Concede points where the evidence genuinely supports a fail — admitted concessions strengthen your credibility.
|
|
328
|
+
Confidence reflects how strong your PASS case actually is.`,
|
|
329
|
+
{ label: 'defender', phase: 'Argue', schema: ARGUMENT_SCHEMA }
|
|
330
|
+
),
|
|
331
|
+
])
|
|
332
|
+
|
|
333
|
+
const validArguments = arguments_.filter(Boolean)
|
|
334
|
+
const prosecutorArg = validArguments.find(a => a.role === 'prosecutor')
|
|
335
|
+
const defenderArg = validArguments.find(a => a.role === 'defender')
|
|
268
336
|
|
|
269
|
-
|
|
337
|
+
const debateDigest = validArguments.map(a =>
|
|
338
|
+
`### ${a.role.toUpperCase()} (stance: ${a.stance}, confidence: ${a.confidence}%)\n${a.argument}\n\nKey points:\n${a.key_points.map(p => `- [${p.strength}] ${p.point} (evidence: ${p.evidence})`).join('\n')}\n\nConcessions:\n${a.concessions.map(c => `- ${c}`).join('\n') || ' none'}`
|
|
339
|
+
).join('\n\n---\n\n')
|
|
270
340
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
341
|
+
log(`Prosecutor: ${prosecutorArg ? prosecutorArg.confidence : '?'}% confident FAIL | Defender: ${defenderArg ? defenderArg.confidence : '?'}% confident PASS`)
|
|
342
|
+
|
|
343
|
+
// Phase 3: Judge resolves the adversarial debate
|
|
344
|
+
phase('Judge')
|
|
345
|
+
log('Judge resolving adversarial verification debate...')
|
|
346
|
+
|
|
347
|
+
const aggregate = await agent(
|
|
348
|
+
`You are the JUDGE. Two advocates have argued for and against passing this verification.
|
|
349
|
+
|
|
350
|
+
=== ADVERSARIAL DEBATE ===
|
|
351
|
+
${debateDigest}
|
|
352
|
+
|
|
353
|
+
=== RAW EVIDENCE ===
|
|
354
|
+
${evidencePackage}
|
|
355
|
+
|
|
356
|
+
JUDGE the debate:
|
|
357
|
+
1. Evaluate each advocate's key points against the raw evidence
|
|
358
|
+
2. Weigh point strength: strong > moderate > weak
|
|
359
|
+
3. Points conceded by the opposing side have extra weight
|
|
360
|
+
4. Check for arguments NOT backed by evidence (rhetoric without substance)
|
|
361
|
+
|
|
362
|
+
Decision rules:
|
|
363
|
+
- If ALL layers truly pass AND antipattern clean AND convergence met → PASS
|
|
364
|
+
- If any layer has >50% failed checks → FAIL regardless of defense
|
|
365
|
+
- If antipattern has blockers → FAIL unless defender proves they're false positives
|
|
366
|
+
- If prosecutor confidence > 80% AND defender concedes major points → FAIL
|
|
367
|
+
- If defender confidence > 80% AND prosecutor only has weak points → PASS
|
|
368
|
+
- Otherwise → weigh evidence strength on both sides
|
|
369
|
+
|
|
370
|
+
Record adversarial_outcome with both confidences, concessions, and the decisive_factor.
|
|
371
|
+
Compile layers, convergence_summary, gaps, and executive_summary.`,
|
|
372
|
+
{ label: 'judge', phase: 'Judge', schema: AGGREGATE_SCHEMA }
|
|
279
373
|
)
|
|
280
374
|
|
|
281
375
|
return {
|
|
282
376
|
layers: layers,
|
|
283
377
|
convergence: convergenceResults,
|
|
284
378
|
antipatterns: antipatterns,
|
|
379
|
+
debate: { prosecutor: prosecutorArg, defender: defenderArg },
|
|
285
380
|
aggregate: aggregate,
|
|
286
381
|
metadata: {
|
|
287
382
|
layer_count: layers.length,
|
|
@@ -292,6 +387,8 @@ return {
|
|
|
292
387
|
converged_tasks: convergenceResults.filter(c => c.overall_converged).length,
|
|
293
388
|
antipattern_count: antipatterns.findings.length,
|
|
294
389
|
blocker_count: antipatterns.findings.filter(f => f.severity === 'blocker').length,
|
|
390
|
+
prosecutor_confidence: prosecutorArg ? prosecutorArg.confidence : null,
|
|
391
|
+
defender_confidence: defenderArg ? defenderArg.confidence : null,
|
|
295
392
|
overall_status: aggregate ? aggregate.status : 'unknown',
|
|
296
393
|
confidence: aggregate ? aggregate.confidence : 0,
|
|
297
394
|
},
|