maestro-flow 0.4.20 → 0.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/.agents/skills/maestro-ralph-execute/SKILL.md +2 -1
  2. package/.agents/skills/maestro-swarm-workflow/SKILL.md +27 -19
  3. package/.agents/skills/maestro-universal-workflow/SKILL.md +563 -0
  4. package/.agents/skills/team-adversarial-swarm/SKILL.md +235 -0
  5. package/.agents/skills/team-adversarial-swarm/scripts/aco.py +473 -0
  6. package/.agents/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
  7. package/.agents/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
  8. package/.agents/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
  9. package/.agents/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
  10. package/.agents/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
  11. package/.agents/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
  12. package/.agents/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
  13. package/.agents/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
  14. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
  15. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
  16. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
  17. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
  18. package/.agy/skills/maestro-ralph-execute/SKILL.md +2 -1
  19. package/.agy/skills/maestro-swarm-workflow/SKILL.md +27 -19
  20. package/.agy/skills/maestro-universal-workflow/SKILL.md +560 -0
  21. package/.agy/skills/team-adversarial-swarm/SKILL.md +244 -0
  22. package/.agy/skills/team-adversarial-swarm/scripts/aco.py +473 -0
  23. package/.agy/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
  24. package/.agy/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
  25. package/.agy/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
  26. package/.agy/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
  27. package/.agy/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
  28. package/.agy/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
  29. package/.agy/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
  30. package/.agy/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
  31. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
  32. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
  33. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
  34. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
  35. package/.claude/commands/maestro-ralph-execute.md +2 -1
  36. package/.claude/commands/maestro-swarm-workflow.md +27 -19
  37. package/.claude/commands/maestro-universal-workflow.md +561 -0
  38. package/.claude/skills/team-adversarial-swarm/SKILL.md +233 -0
  39. package/.claude/skills/team-adversarial-swarm/scripts/aco.py +473 -0
  40. package/.claude/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
  41. package/.claude/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
  42. package/.claude/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
  43. package/.claude/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
  44. package/.claude/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
  45. package/.claude/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
  46. package/.claude/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
  47. package/.claude/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
  48. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
  49. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
  50. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
  51. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
  52. package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js +1 -1
  53. package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js.map +1 -1
  54. package/dashboard/dist-server/dashboard/src/server/wiki/search.js +1 -1
  55. package/dashboard/dist-server/dashboard/src/server/wiki/search.js.map +1 -1
  56. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.d.ts +1 -1
  57. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js +5 -5
  58. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js.map +1 -1
  59. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js +3 -3
  60. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js.map +1 -1
  61. package/dashboard/dist-server/src/graph/types.d.ts +111 -0
  62. package/dashboard/dist-server/src/graph/types.js +2 -0
  63. package/dashboard/dist-server/src/graph/types.js.map +1 -0
  64. package/dist/src/commands/install-backend.d.ts +0 -7
  65. package/dist/src/commands/install-backend.d.ts.map +1 -1
  66. package/dist/src/commands/install-backend.js +0 -14
  67. package/dist/src/commands/install-backend.js.map +1 -1
  68. package/dist/src/commands/install.d.ts.map +1 -1
  69. package/dist/src/commands/install.js +0 -18
  70. package/dist/src/commands/install.js.map +1 -1
  71. package/dist/src/commands/kg.d.ts +2 -2
  72. package/dist/src/commands/kg.d.ts.map +1 -1
  73. package/dist/src/commands/kg.js +150 -179
  74. package/dist/src/commands/kg.js.map +1 -1
  75. package/dist/src/graph/analyzers/fs-analyzer.d.ts +10 -0
  76. package/dist/src/graph/analyzers/fs-analyzer.d.ts.map +1 -0
  77. package/dist/src/graph/analyzers/fs-analyzer.js +959 -0
  78. package/dist/src/graph/analyzers/fs-analyzer.js.map +1 -0
  79. package/dist/src/graph/index.d.ts +6 -0
  80. package/dist/src/graph/index.d.ts.map +1 -0
  81. package/dist/src/graph/index.js +6 -0
  82. package/dist/src/graph/index.js.map +1 -0
  83. package/dist/src/graph/loader.d.ts +3 -0
  84. package/dist/src/graph/loader.d.ts.map +1 -0
  85. package/dist/src/graph/loader.js +12 -0
  86. package/dist/src/graph/loader.js.map +1 -0
  87. package/dist/src/graph/merger.d.ts +56 -0
  88. package/dist/src/graph/merger.d.ts.map +1 -0
  89. package/dist/src/graph/merger.js +896 -0
  90. package/dist/src/graph/merger.js.map +1 -0
  91. package/dist/src/graph/query.d.ts +7 -0
  92. package/dist/src/graph/query.d.ts.map +1 -0
  93. package/dist/src/graph/query.js +126 -0
  94. package/dist/src/graph/query.js.map +1 -0
  95. package/dist/src/graph/types.d.ts +112 -0
  96. package/dist/src/graph/types.d.ts.map +1 -0
  97. package/dist/src/graph/types.js +2 -0
  98. package/dist/src/graph/types.js.map +1 -0
  99. package/dist/src/i18n/locales/en.d.ts.map +1 -1
  100. package/dist/src/i18n/locales/en.js +0 -10
  101. package/dist/src/i18n/locales/en.js.map +1 -1
  102. package/dist/src/i18n/locales/zh.d.ts.map +1 -1
  103. package/dist/src/i18n/locales/zh.js +0 -10
  104. package/dist/src/i18n/locales/zh.js.map +1 -1
  105. package/dist/src/i18n/types.d.ts +0 -9
  106. package/dist/src/i18n/types.d.ts.map +1 -1
  107. package/dist/src/tui/install-ui/InstallConfirm.d.ts +0 -1
  108. package/dist/src/tui/install-ui/InstallConfirm.d.ts.map +1 -1
  109. package/dist/src/tui/install-ui/InstallConfirm.js +1 -1
  110. package/dist/src/tui/install-ui/InstallConfirm.js.map +1 -1
  111. package/dist/src/tui/install-ui/InstallExecution.d.ts +0 -1
  112. package/dist/src/tui/install-ui/InstallExecution.d.ts.map +1 -1
  113. package/dist/src/tui/install-ui/InstallExecution.js +0 -22
  114. package/dist/src/tui/install-ui/InstallExecution.js.map +1 -1
  115. package/dist/src/tui/install-ui/InstallFlow.d.ts +1 -1
  116. package/dist/src/tui/install-ui/InstallFlow.d.ts.map +1 -1
  117. package/dist/src/tui/install-ui/InstallFlow.js +5 -23
  118. package/dist/src/tui/install-ui/InstallFlow.js.map +1 -1
  119. package/dist/src/tui/install-ui/InstallHub.d.ts +0 -2
  120. package/dist/src/tui/install-ui/InstallHub.d.ts.map +1 -1
  121. package/dist/src/tui/install-ui/InstallHub.js +0 -6
  122. package/dist/src/tui/install-ui/InstallHub.js.map +1 -1
  123. package/dist/src/tui/install-ui/InstallResult.d.ts.map +1 -1
  124. package/dist/src/tui/install-ui/InstallResult.js +1 -1
  125. package/dist/src/tui/install-ui/InstallResult.js.map +1 -1
  126. package/dist/src/utils/update-notices.js +12 -0
  127. package/dist/src/utils/update-notices.js.map +1 -1
  128. package/package.json +1 -1
  129. package/workflows/swarm/wf-analyze.js +195 -34
  130. package/workflows/swarm/wf-brainstorm.js +225 -53
  131. package/workflows/swarm/wf-execute.js +199 -23
  132. package/workflows/swarm/wf-grill.js +181 -20
  133. package/workflows/swarm/wf-milestone-audit.js +178 -29
  134. package/workflows/swarm/wf-plan.js +288 -53
  135. package/workflows/swarm/wf-review.js +195 -80
  136. package/workflows/swarm/wf-verify.js +125 -28
@@ -1,15 +1,14 @@
1
1
  export const meta = {
2
2
  name: 'wf-review',
3
- description: 'Multi-dimension parallel code review via workflow-reviewer with adversarial verification',
4
- whenToUse: 'Accelerate quality-review with parallel dimension-specific scanning and finding verification',
3
+ description: 'Multi-dimension code review with 3-vote adversarial verification and multi-perspective verdict',
4
+ whenToUse: 'Accelerate quality-review with parallel scanning + 3-vote finding verification + 3-perspective verdict arbitration',
5
5
  phases: [
6
6
  { title: 'Scan', detail: 'Parallel dimension scanning via workflow-reviewer' },
7
- { title: 'Verify', detail: 'Adversarial verification of critical findings' },
8
- { title: 'Report', detail: 'Consolidated review report with verdict' },
7
+ { title: 'Verify', detail: '3-vote adversarial verification per critical finding (majority wins)' },
8
+ { title: 'Report', detail: '3-perspective reporters (strict/lenient/objective) + arbitrated verdict' },
9
9
  ],
10
10
  }
11
11
 
12
- // Aligned with workflow-reviewer.md dimension definitions
13
12
  const REVIEW_DIMENSIONS = [
14
13
  { key: 'correctness', prefix: 'COR', prompt: 'Dimension: correctness. Focus: Logic errors, off-by-one, null handling, missing error propagation, type mismatches, unhandled edge cases, broken invariants, incorrect conditions.' },
15
14
  { key: 'security', prefix: 'SEC', prompt: 'Dimension: security. Focus: Injection vectors (SQL/command/XSS), auth bypass, hardcoded secrets, missing input validation, data exposure in logs/errors, SSRF, IDOR, insecure crypto.' },
@@ -57,6 +56,19 @@ const VERDICT_SCHEMA = {
57
56
  required: ['finding_id', 'is_real', 'confidence', 'reasoning'],
58
57
  }
59
58
 
59
+ const PERSPECTIVE_REPORT_SCHEMA = {
60
+ type: 'object',
61
+ properties: {
62
+ perspective: { type: 'string' },
63
+ verdict: { type: 'string', enum: ['APPROVE', 'REQUEST_CHANGES', 'BLOCK'] },
64
+ overall_quality: { type: 'number', minimum: 1, maximum: 5 },
65
+ rationale: { type: 'string' },
66
+ blocking_issues: { type: 'array', items: { type: 'object', properties: { id: { type: 'string' }, title: { type: 'string' }, file: { type: 'string' }, severity: { type: 'string' } }, required: ['id', 'title'] } },
67
+ confidence: { type: 'number', minimum: 0, maximum: 100 },
68
+ },
69
+ required: ['perspective', 'verdict', 'overall_quality', 'rationale', 'confidence'],
70
+ }
71
+
60
72
  const REPORT_SCHEMA = {
61
73
  type: 'object',
62
74
  properties: {
@@ -76,9 +88,10 @@ const REPORT_SCHEMA = {
76
88
  },
77
89
  },
78
90
  blocking_issues: { type: 'array', items: { type: 'object', properties: { id: { type: 'string' }, title: { type: 'string' }, file: { type: 'string' }, severity: { type: 'string' }, suggestion: { type: 'string' } }, required: ['id', 'title', 'file', 'severity'] } },
91
+ adversarial_verdict: { type: 'object', properties: { strict: { type: 'string' }, lenient: { type: 'string' }, objective: { type: 'string' }, decisive_factor: { type: 'string' } }, required: ['strict', 'lenient', 'objective', 'decisive_factor'] },
79
92
  summary: { type: 'string' },
80
93
  },
81
- required: ['verdict', 'overall_quality', 'dimension_summary', 'blocking_issues', 'summary'],
94
+ required: ['verdict', 'overall_quality', 'dimension_summary', 'blocking_issues', 'adversarial_verdict', 'summary'],
82
95
  }
83
96
 
84
97
  const target = args?.target || 'changed files on current branch'
@@ -89,7 +102,7 @@ const dimensions = args?.dimensions
89
102
  ? REVIEW_DIMENSIONS.filter(d => args.dimensions.includes(d.key))
90
103
  : (tier === 'quick' ? REVIEW_DIMENSIONS.slice(0, 3) : REVIEW_DIMENSIONS)
91
104
 
92
- // Phase 1: Parallel dimension scanning via workflow-reviewer
105
+ // Phase 1: Parallel dimension scanning
93
106
  phase('Scan')
94
107
  log(`Scanning ${dimensions.length} dimensions in parallel via workflow-reviewer...`)
95
108
 
@@ -121,106 +134,208 @@ const criticalHigh = allFindings.filter(f => f.severity === 'critical' || f.seve
121
134
 
122
135
  log(`Found ${allFindings.length} total (${criticalHigh.length} critical/high across ${validScans.length} dimensions)`)
123
136
 
124
- // Phase 2: Adversarial verification of critical/high findings
137
+ // Phase 2: 3-vote adversarial verification per critical/high finding
125
138
  phase('Verify')
126
139
 
140
+ const confirmedFindings = []
141
+ const falsePositives = []
142
+
127
143
  if (criticalHigh.length > 0) {
128
- log(`Adversarially verifying ${criticalHigh.length} critical/high findings...`)
144
+ log(`3-vote adversarial verification of ${criticalHigh.length} critical/high findings...`)
129
145
 
130
146
  const verified = await pipeline(
131
147
  criticalHigh,
132
- (finding) => agent(
133
- `Adversarially verify this code review finding. Your job is to REFUTE it — find reasons it might be:
134
- - A false positive (the code is actually correct)
135
- - Less severe than claimed (downgrade severity)
136
- - Not applicable in this context
148
+ (finding) => parallel([
149
+ () => agent(
150
+ `VOTE 1 PROSECUTOR: Argue this finding IS REAL and the severity is justified.
137
151
 
138
152
  Finding: [${finding.severity}] ${finding.id}: ${finding.title}
139
153
  File: ${finding.file}${finding.line ? ':' + finding.line : ''}
140
154
  Description: ${finding.description}
141
155
  Evidence: ${finding.evidence || 'none provided'}
142
156
 
143
- Read the actual source code at the specified location. Check:
144
- 1. Is the code actually doing what the finding claims?
145
- 2. Is there handling elsewhere that mitigates this?
146
- 3. Is the severity justified?
157
+ Read the actual source code. Build the case that this is a genuine issue:
158
+ - Show the exact code path that triggers the bug/vulnerability
159
+ - Demonstrate the impact with a concrete scenario
160
+ - Argue why the severity rating is correct or should be higher
147
161
 
148
- Default to is_real=false and adjusted_severity=false-positive if uncertain.
149
- Only confirm findings you can verify in the actual code with high confidence.`,
150
- { label: `verify:${finding.id}`, phase: 'Verify', schema: VERDICT_SCHEMA }
151
- )
152
- )
162
+ Default to is_real=true. Only say false if the code clearly doesn't have this issue.`,
163
+ { label: `vote1:${finding.id}`, phase: 'Verify', schema: VERDICT_SCHEMA }
164
+ ),
165
+ () => agent(
166
+ `VOTE 2 — DEFENSE: Argue this finding is a FALSE POSITIVE or overstated.
153
167
 
154
- const confirmedFindings = []
155
- const falsePositives = []
168
+ Finding: [${finding.severity}] ${finding.id}: ${finding.title}
169
+ File: ${finding.file}${finding.line ? ':' + finding.line : ''}
170
+ Description: ${finding.description}
171
+ Evidence: ${finding.evidence || 'none provided'}
172
+
173
+ Read the actual source code. Build the case AGAINST this finding:
174
+ - Show handling elsewhere that mitigates the issue
175
+ - Demonstrate why the severity is overstated
176
+ - Find framework guarantees or type safety that prevents the claimed scenario
177
+
178
+ Default to is_real=false. Only confirm if you genuinely cannot find any defense.`,
179
+ { label: `vote2:${finding.id}`, phase: 'Verify', schema: VERDICT_SCHEMA }
180
+ ),
181
+ () => agent(
182
+ `VOTE 3 — INDEPENDENT JUDGE: Evaluate this finding objectively, without bias.
183
+
184
+ Finding: [${finding.severity}] ${finding.id}: ${finding.title}
185
+ File: ${finding.file}${finding.line ? ':' + finding.line : ''}
186
+ Description: ${finding.description}
187
+ Evidence: ${finding.evidence || 'none provided'}
188
+
189
+ Read the actual source code. Make an independent, evidence-based assessment:
190
+ - Verify the claimed behavior exists in the code
191
+ - Check if there are mitigations the reporter missed
192
+ - Assess the actual severity based on real-world impact
193
+
194
+ No default bias. Judge purely on evidence. Confidence should reflect evidence strength.`,
195
+ { label: `vote3:${finding.id}`, phase: 'Verify', schema: VERDICT_SCHEMA }
196
+ ),
197
+ ])
198
+ )
156
199
 
157
- verified.filter(Boolean).forEach((verdict, i) => {
200
+ verified.filter(Boolean).forEach((votes, i) => {
158
201
  const finding = criticalHigh[i]
159
- if (verdict.is_real && verdict.confidence >= 60) {
160
- confirmedFindings.push({ ...finding, verdict: verdict, adjusted_severity: verdict.adjusted_severity || finding.severity })
202
+ const validVotes = votes.filter(Boolean)
203
+ const realVotes = validVotes.filter(v => v.is_real)
204
+ const isConfirmed = realVotes.length >= 2
205
+
206
+ if (isConfirmed) {
207
+ const avgConfidence = Math.round(realVotes.reduce((s, v) => s + v.confidence, 0) / realVotes.length)
208
+ const maxSeverity = validVotes.reduce((max, v) => {
209
+ const order = ['false-positive', 'low', 'medium', 'high', 'critical']
210
+ return order.indexOf(v.adjusted_severity || finding.severity) > order.indexOf(max) ? (v.adjusted_severity || finding.severity) : max
211
+ }, 'low')
212
+ confirmedFindings.push({
213
+ ...finding,
214
+ vote_count: `${realVotes.length}/${validVotes.length}`,
215
+ avg_confidence: avgConfidence,
216
+ adjusted_severity: maxSeverity,
217
+ verdicts: validVotes,
218
+ })
161
219
  } else {
162
- falsePositives.push({ ...finding, verdict: verdict })
220
+ falsePositives.push({
221
+ ...finding,
222
+ vote_count: `${realVotes.length}/${validVotes.length}`,
223
+ verdicts: validVotes,
224
+ })
163
225
  }
164
226
  })
165
227
 
166
- const lowMedFindings = allFindings.filter(f => f.severity === 'medium' || f.severity === 'low')
228
+ log(`Verified: ${confirmedFindings.length} confirmed, ${falsePositives.length} false-positives (3-vote majority)`)
229
+ }
230
+
231
+ const lowMedFindings = allFindings.filter(f => f.severity === 'medium' || f.severity === 'low')
232
+
233
+ // Phase 3: 3-perspective report generation + arbitrated verdict
234
+ phase('Report')
235
+
236
+ const findingsDigest = `Confirmed findings (${confirmedFindings.length}, adversarially verified by 3-vote majority):
237
+ ${confirmedFindings.map(f => `- [${f.adjusted_severity}] ${f.id}: ${f.title} @ ${f.file}:${f.line || '?'} (votes: ${f.vote_count}, confidence: ${f.avg_confidence}%)`).join('\n') || 'None'}
238
+
239
+ False positives filtered (${falsePositives.length}):
240
+ ${falsePositives.map(f => `- ${f.id}: ${f.title} (votes: ${f.vote_count})`).join('\n') || 'None'}
167
241
 
168
- // Phase 3: Consolidated report
169
- phase('Report')
242
+ Low/medium findings (${lowMedFindings.length}, not individually verified):
243
+ ${lowMedFindings.map(f => `- [${f.severity}] ${f.id}: ${f.title} @ ${f.file}`).join('\n') || 'None'}`
170
244
 
171
- const report = await agent(
172
- `Generate a consolidated code review report.
245
+ log('Launching 3-perspective reporters (strict / lenient / objective)...')
173
246
 
174
- Confirmed findings (adversarially verified, ${confirmedFindings.length}):
175
- ${confirmedFindings.map(f => `- [${f.adjusted_severity}] ${f.id}: ${f.title} @ ${f.file}:${f.line || '?'} (confidence: ${f.verdict.confidence}%)\n ${f.description}`).join('\n') || 'None'}
247
+ const perspectives = await parallel([
248
+ () => agent(
249
+ `You are the STRICT REVIEWER. Apply the highest quality bar.
176
250
 
177
- False positives filtered: ${falsePositives.length}
178
- ${falsePositives.map(f => `- ${f.id}: ${f.title} — ${f.verdict.reasoning}`).join('\n') || ''}
251
+ ${findingsDigest}
179
252
 
180
- Low/medium findings (not individually verified, ${lowMedFindings.length}):
181
- ${lowMedFindings.map(f => `- [${f.severity}] ${f.id}: ${f.title} @ ${f.file}`).join('\n') || 'None'}
253
+ Your philosophy: ANY confirmed critical/high finding warrants BLOCK. Any confirmed finding warrants REQUEST_CHANGES. Only APPROVE if zero findings exist.
254
+ - Rate quality conservatively
255
+ - List ALL confirmed findings as blocking
256
+ - Consider unverified medium findings as potential risks
182
257
 
183
- Determine verdict:
258
+ Be strict but fair. Provide your verdict and rationale.`,
259
+ { label: 'report:strict', phase: 'Report', schema: PERSPECTIVE_REPORT_SCHEMA }
260
+ ),
261
+ () => agent(
262
+ `You are the LENIENT REVIEWER. Apply a practical, ship-focused bar.
263
+
264
+ ${findingsDigest}
265
+
266
+ Your philosophy: Only BLOCK for confirmed critical findings with >80% confidence. REQUEST_CHANGES for confirmed high findings. APPROVE for everything else — medium/low findings can be addressed in follow-ups.
267
+ - Rate quality generously (good code is the norm)
268
+ - Only list truly blocking issues
269
+ - Unverified medium/low findings are informational
270
+
271
+ Be practical but honest. Provide your verdict and rationale.`,
272
+ { label: 'report:lenient', phase: 'Report', schema: PERSPECTIVE_REPORT_SCHEMA }
273
+ ),
274
+ () => agent(
275
+ `You are the OBJECTIVE REVIEWER. Apply evidence-based judgment.
276
+
277
+ ${findingsDigest}
278
+
279
+ Your philosophy: Follow the evidence. No default bias.
280
+ - BLOCK: confirmed critical findings exist
281
+ - REQUEST_CHANGES: confirmed high findings but no critical
184
282
  - APPROVE: no confirmed critical/high findings
185
- - REQUEST_CHANGES: has confirmed high findings but no critical
186
- - BLOCK: has confirmed critical findings
283
+ - Quality rating based on finding density and severity distribution
284
+ - Weight findings by vote confidence
187
285
 
188
- Rate overall quality (1-5) and summarize per dimension.`,
189
- { label: 'report', phase: 'Report', schema: REPORT_SCHEMA }
190
- )
286
+ Be analytical and evidence-driven. Provide your verdict and rationale.`,
287
+ { label: 'report:objective', phase: 'Report', schema: PERSPECTIVE_REPORT_SCHEMA }
288
+ ),
289
+ ])
191
290
 
192
- return {
193
- report: report,
194
- confirmed: confirmedFindings,
195
- false_positives: falsePositives,
196
- low_findings: lowMedFindings,
197
- metadata: {
198
- target: target,
199
- dimensions_scanned: dimensions.length,
200
- total_findings: allFindings.length,
201
- verified_count: criticalHigh.length,
202
- confirmed_count: confirmedFindings.length,
203
- false_positive_count: falsePositives.length,
204
- verdict: report ? report.verdict : 'UNKNOWN',
205
- },
206
- }
207
- } else {
208
- phase('Report')
209
- log('No critical/high findings generating clean report')
210
-
211
- return {
212
- report: { verdict: 'APPROVE', overall_quality: 4, dimension_summary: validScans.map(s => ({ dimension: s.dimension, finding_count: s.findings.length, max_severity: s.findings[0]?.severity || 'none', assessment: 'Clean' })), blocking_issues: [], summary: 'No critical or high severity issues found. Code passes review.' },
213
- confirmed: [],
214
- false_positives: [],
215
- low_findings: allFindings,
216
- metadata: {
217
- target: target,
218
- dimensions_scanned: dimensions.length,
219
- total_findings: allFindings.length,
220
- verified_count: 0,
221
- confirmed_count: 0,
222
- false_positive_count: 0,
223
- verdict: 'APPROVE',
224
- },
225
- }
291
+ const validPerspectives = perspectives.filter(Boolean)
292
+ const verdictCounts = { APPROVE: 0, REQUEST_CHANGES: 0, BLOCK: 0 }
293
+ validPerspectives.forEach(p => { verdictCounts[p.verdict] = (verdictCounts[p.verdict] || 0) + 1 })
294
+
295
+ const perspectiveDigest = validPerspectives.map(p =>
296
+ `${p.perspective}: ${p.verdict} (quality: ${p.overall_quality}/5, confidence: ${p.confidence}%)\n ${p.rationale}`
297
+ ).join('\n\n')
298
+
299
+ log(`Perspective votes: APPROVE=${verdictCounts.APPROVE} REQUEST_CHANGES=${verdictCounts.REQUEST_CHANGES} BLOCK=${verdictCounts.BLOCK}`)
300
+ log('Arbitrating final verdict...')
301
+
302
+ const report = await agent(
303
+ `Generate the final review report by arbitrating 3 reviewer perspectives.
304
+
305
+ === 3 REVIEWER PERSPECTIVES ===
306
+ ${perspectiveDigest}
307
+
308
+ Vote tally: APPROVE=${verdictCounts.APPROVE}, REQUEST_CHANGES=${verdictCounts.REQUEST_CHANGES}, BLOCK=${verdictCounts.BLOCK}
309
+
310
+ === FINDING DATA ===
311
+ ${findingsDigest}
312
+
313
+ ARBITRATE:
314
+ 1. The final verdict follows MAJORITY VOTE among the 3 perspectives
315
+ 2. Tie-break rule: if split 3 ways (1-1-1), go with the OBJECTIVE reviewer
316
+ 3. If strict and objective agree → use their verdict regardless of lenient
317
+ 4. Calculate overall_quality as weighted average (strict .25, lenient .25, objective .50)
318
+ 5. Record adversarial_verdict with each perspective's vote and the decisive_factor
319
+ 6. Compile dimension_summary from scan phase data
320
+ 7. List blocking_issues = confirmed findings with adjusted_severity critical or high
321
+ 8. Write summary including the adversarial deliberation outcome`,
322
+ { label: 'arbitrate', phase: 'Report', schema: REPORT_SCHEMA }
323
+ )
324
+
325
+ return {
326
+ report: report,
327
+ confirmed: confirmedFindings,
328
+ false_positives: falsePositives,
329
+ low_findings: lowMedFindings,
330
+ perspectives: validPerspectives,
331
+ metadata: {
332
+ target: target,
333
+ dimensions_scanned: dimensions.length,
334
+ total_findings: allFindings.length,
335
+ verified_count: criticalHigh.length,
336
+ confirmed_count: confirmedFindings.length,
337
+ false_positive_count: falsePositives.length,
338
+ verdict_votes: verdictCounts,
339
+ verdict: report ? report.verdict : 'UNKNOWN',
340
+ },
226
341
  }
@@ -1,14 +1,14 @@
1
1
  export const meta = {
2
2
  name: 'wf-verify',
3
- description: 'Three-layer goal-backward verification via workflow-verifier + anti-pattern scan',
4
- whenToUse: 'Accelerate maestro-verify with parallel existence/substance/connection checks and convergence validation',
3
+ description: 'Three-layer verification with prosecutor/defender/judge adversarial aggregation',
4
+ whenToUse: 'Accelerate maestro-verify with parallel layer checks + adversarial pass/fail determination',
5
5
  phases: [
6
6
  { title: 'Check', detail: 'Parallel 3-layer verification + anti-pattern scan via workflow-verifier' },
7
- { title: 'Aggregate', detail: 'Cross-layer aggregation and gap analysis' },
7
+ { title: 'Argue', detail: 'Prosecutor argues FAIL, Defender argues PASS — adversarial positions' },
8
+ { title: 'Judge', detail: 'Judge resolves adversarial debate into final verdict' },
8
9
  ],
9
10
  }
10
11
 
11
- // Aligned with workflow-verifier.md: Layer 1 Existence, Layer 2 Substance, Layer 3 Connection
12
12
  const LAYER_SCHEMA = {
13
13
  type: 'object',
14
14
  properties: {
@@ -78,11 +78,47 @@ const ANTIPATTERN_SCHEMA = {
78
78
  required: ['clean', 'findings'],
79
79
  }
80
80
 
81
+ const ARGUMENT_SCHEMA = {
82
+ type: 'object',
83
+ properties: {
84
+ role: { type: 'string', enum: ['prosecutor', 'defender'] },
85
+ stance: { type: 'string', enum: ['pass', 'fail'] },
86
+ argument: { type: 'string' },
87
+ key_points: {
88
+ type: 'array',
89
+ items: {
90
+ type: 'object',
91
+ properties: {
92
+ point: { type: 'string' },
93
+ evidence: { type: 'string' },
94
+ layer: { type: 'string' },
95
+ strength: { type: 'string', enum: ['strong', 'moderate', 'weak'] },
96
+ },
97
+ required: ['point', 'evidence', 'strength'],
98
+ },
99
+ },
100
+ concessions: { type: 'array', items: { type: 'string' } },
101
+ confidence: { type: 'number', minimum: 0, maximum: 100 },
102
+ },
103
+ required: ['role', 'stance', 'argument', 'key_points', 'confidence'],
104
+ }
105
+
81
106
  const AGGREGATE_SCHEMA = {
82
107
  type: 'object',
83
108
  properties: {
84
109
  status: { type: 'string', enum: ['pass', 'fail'] },
85
110
  confidence: { type: 'number', minimum: 0, maximum: 100 },
111
+ adversarial_outcome: {
112
+ type: 'object',
113
+ properties: {
114
+ prosecutor_confidence: { type: 'number' },
115
+ defender_confidence: { type: 'number' },
116
+ decisive_factor: { type: 'string' },
117
+ prosecutor_concessions: { type: 'array', items: { type: 'string' } },
118
+ defender_concessions: { type: 'array', items: { type: 'string' } },
119
+ },
120
+ required: ['prosecutor_confidence', 'defender_confidence', 'decisive_factor'],
121
+ },
86
122
  layers: {
87
123
  type: 'array',
88
124
  items: {
@@ -123,7 +159,7 @@ const AGGREGATE_SCHEMA = {
123
159
  antipattern_blockers: { type: 'number' },
124
160
  executive_summary: { type: 'string' },
125
161
  },
126
- required: ['status', 'confidence', 'layers', 'gaps', 'executive_summary'],
162
+ required: ['status', 'confidence', 'adversarial_outcome', 'layers', 'gaps', 'executive_summary'],
127
163
  }
128
164
 
129
165
  const goals = args?.goals || ''
@@ -138,7 +174,6 @@ const mustHaves = args?.must_haves || ''
138
174
  phase('Check')
139
175
 
140
176
  const checks = [
141
- // Layer 1: Existence — verify all expected artifacts exist
142
177
  () => agent(
143
178
  `Layer 1 — EXISTENCE verification.
144
179
  Goals: ${goals}
@@ -156,8 +191,6 @@ Verify all expected artifacts EXIST:
156
191
  Set layer="existence" in output.`,
157
192
  { label: 'layer:existence', phase: 'Check', schema: LAYER_SCHEMA, agentType: 'workflow-verifier' }
158
193
  ),
159
-
160
- // Layer 2: Substance — verify artifacts are non-trivial
161
194
  () => agent(
162
195
  `Layer 2 — SUBSTANCE verification.
163
196
  Goals: ${goals}
@@ -174,8 +207,6 @@ Verify artifacts contain REAL SUBSTANCE (not stubs):
174
207
  Set layer="substance" in output.`,
175
208
  { label: 'layer:substance', phase: 'Check', schema: LAYER_SCHEMA, agentType: 'workflow-verifier' }
176
209
  ),
177
-
178
- // Layer 3: Connection — verify wiring
179
210
  () => agent(
180
211
  `Layer 3 — CONNECTION verification.
181
212
  Goals: ${goals}
@@ -196,7 +227,6 @@ Set layer="connection" in output.`,
196
227
  ),
197
228
  ]
198
229
 
199
- // Anti-pattern scan (unless skipped)
200
230
  if (!skipAntipattern) {
201
231
  checks.push(() => agent(
202
232
  `Anti-pattern scan for modified files.
@@ -218,7 +248,6 @@ Severity: "blocker" for stubs/not-implemented/hardcoded-secrets, "warning" for T
218
248
  ))
219
249
  }
220
250
 
221
- // Per-task convergence validation (if task files provided)
222
251
  if (taskFiles.length > 0) {
223
252
  checks.push(...taskFiles.map((taskFile, idx) => () => agent(
224
253
  `Per-task convergence validation for: ${taskFile}
@@ -242,9 +271,6 @@ const layers = validResults.filter(r => r.layer)
242
271
  const antipatterns = validResults.find(r => r.clean !== undefined) || { clean: true, findings: [] }
243
272
  const convergenceResults = validResults.filter(r => r.task_id)
244
273
 
245
- // Phase 2: Aggregate
246
- phase('Aggregate')
247
-
248
274
  const layerDigest = layers.map(l => {
249
275
  const passCount = l.checks.filter(c => c.status === 'pass').length
250
276
  const failCount = l.checks.filter(c => c.status === 'fail').length
@@ -259,29 +285,98 @@ const antipatternDigest = antipatterns.clean
259
285
  ? 'Anti-pattern scan: CLEAN'
260
286
  : `Anti-pattern scan: ${antipatterns.findings.length} issues (${antipatterns.findings.filter(f => f.severity === 'blocker').length} blockers)\n${antipatterns.findings.map(f => ` [${f.severity}] ${f.type} @ ${f.file}:${f.line || '?'}: ${f.content}`).join('\n')}`
261
287
 
262
- const aggregate = await agent(
263
- `Aggregate all verification results into a final assessment.
288
+ const evidencePackage = `${layerDigest}\n\n${convergenceDigest}\n\n${antipatternDigest}`
264
289
 
265
- ${layerDigest}
290
+ // Phase 2: Adversarial Arguments — Prosecutor vs Defender
291
+ phase('Argue')
292
+ log('Launching adversarial debate: Prosecutor (FAIL) vs Defender (PASS)...')
266
293
 
267
- ${convergenceDigest}
294
+ const arguments_ = await parallel([
295
+ () => agent(
296
+ `You are the PROSECUTOR. Argue that this verification should FAIL.
297
+
298
+ === VERIFICATION EVIDENCE ===
299
+ ${evidencePackage}
300
+
301
+ Build the STRONGEST case for FAILURE:
302
+ 1. Magnify every failed check — explain the downstream consequences
303
+ 2. Connect antipattern findings to substance/connection failures
304
+ 3. Challenge "pass" checks — are they truly passing or just not checking hard enough?
305
+ 4. Highlight convergence gaps as unfinished work
306
+ 5. Argue that partial passes are effectively failures
307
+
308
+ Your job is to convince the Judge that quality is insufficient.
309
+ Concede points where the evidence genuinely supports a pass — admitted concessions strengthen your credibility.
310
+ Confidence reflects how strong your FAIL case actually is.`,
311
+ { label: 'prosecutor', phase: 'Argue', schema: ARGUMENT_SCHEMA }
312
+ ),
313
+ () => agent(
314
+ `You are the DEFENDER. Argue that this verification should PASS.
315
+
316
+ === VERIFICATION EVIDENCE ===
317
+ ${evidencePackage}
318
+
319
+ Build the STRONGEST case for PASSING:
320
+ 1. Emphasize passed checks and their coverage
321
+ 2. Contextualize failures — are they truly blocking or just minor gaps?
322
+ 3. Argue that antipattern warnings don't indicate real quality issues
323
+ 4. Show that the core goals are met even if some checks are partial
324
+ 5. Demonstrate that failed checks have low real-world impact
325
+
326
+ Your job is to convince the Judge that quality is sufficient.
327
+ Concede points where the evidence genuinely supports a fail — admitted concessions strengthen your credibility.
328
+ Confidence reflects how strong your PASS case actually is.`,
329
+ { label: 'defender', phase: 'Argue', schema: ARGUMENT_SCHEMA }
330
+ ),
331
+ ])
332
+
333
+ const validArguments = arguments_.filter(Boolean)
334
+ const prosecutorArg = validArguments.find(a => a.role === 'prosecutor')
335
+ const defenderArg = validArguments.find(a => a.role === 'defender')
268
336
 
269
- ${antipatternDigest}
337
+ const debateDigest = validArguments.map(a =>
338
+ `### ${a.role.toUpperCase()} (stance: ${a.stance}, confidence: ${a.confidence}%)\n${a.argument}\n\nKey points:\n${a.key_points.map(p => `- [${p.strength}] ${p.point} (evidence: ${p.evidence})`).join('\n')}\n\nConcessions:\n${a.concessions.map(c => `- ${c}`).join('\n') || ' none'}`
339
+ ).join('\n\n---\n\n')
270
340
 
271
- Determine:
272
- 1. Overall status: "pass" requires ALL layers pass AND no antipattern blockers AND all convergence met
273
- 2. Confidence score (0-100) based on evidence strength and coverage
274
- 3. Per-layer summary with check counts
275
- 4. Convergence summary (if tasks checked)
276
- 5. Consolidated gap list: extract every failed check and antipattern blocker, assign severity, suggest remediation
277
- 6. Executive summary (what works, what's broken, what to do next)`,
278
- { label: 'aggregate', phase: 'Aggregate', schema: AGGREGATE_SCHEMA }
341
+ log(`Prosecutor: ${prosecutorArg ? prosecutorArg.confidence : '?'}% confident FAIL | Defender: ${defenderArg ? defenderArg.confidence : '?'}% confident PASS`)
342
+
343
+ // Phase 3: Judge resolves the adversarial debate
344
+ phase('Judge')
345
+ log('Judge resolving adversarial verification debate...')
346
+
347
+ const aggregate = await agent(
348
+ `You are the JUDGE. Two advocates have argued for and against passing this verification.
349
+
350
+ === ADVERSARIAL DEBATE ===
351
+ ${debateDigest}
352
+
353
+ === RAW EVIDENCE ===
354
+ ${evidencePackage}
355
+
356
+ JUDGE the debate:
357
+ 1. Evaluate each advocate's key points against the raw evidence
358
+ 2. Weigh point strength: strong > moderate > weak
359
+ 3. Points conceded by the opposing side have extra weight
360
+ 4. Check for arguments NOT backed by evidence (rhetoric without substance)
361
+
362
+ Decision rules:
363
+ - If ALL layers truly pass AND antipattern clean AND convergence met → PASS
364
+ - If any layer has >50% failed checks → FAIL regardless of defense
365
+ - If antipattern has blockers → FAIL unless defender proves they're false positives
366
+ - If prosecutor confidence > 80% AND defender concedes major points → FAIL
367
+ - If defender confidence > 80% AND prosecutor only has weak points → PASS
368
+ - Otherwise → weigh evidence strength on both sides
369
+
370
+ Record adversarial_outcome with both confidences, concessions, and the decisive_factor.
371
+ Compile layers, convergence_summary, gaps, and executive_summary.`,
372
+ { label: 'judge', phase: 'Judge', schema: AGGREGATE_SCHEMA }
279
373
  )
280
374
 
281
375
  return {
282
376
  layers: layers,
283
377
  convergence: convergenceResults,
284
378
  antipatterns: antipatterns,
379
+ debate: { prosecutor: prosecutorArg, defender: defenderArg },
285
380
  aggregate: aggregate,
286
381
  metadata: {
287
382
  layer_count: layers.length,
@@ -292,6 +387,8 @@ return {
292
387
  converged_tasks: convergenceResults.filter(c => c.overall_converged).length,
293
388
  antipattern_count: antipatterns.findings.length,
294
389
  blocker_count: antipatterns.findings.filter(f => f.severity === 'blocker').length,
390
+ prosecutor_confidence: prosecutorArg ? prosecutorArg.confidence : null,
391
+ defender_confidence: defenderArg ? defenderArg.confidence : null,
295
392
  overall_status: aggregate ? aggregate.status : 'unknown',
296
393
  confidence: aggregate ? aggregate.confidence : 0,
297
394
  },