maestro-flow 0.4.20 → 0.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/.agents/skills/maestro-ralph-execute/SKILL.md +2 -1
  2. package/.agents/skills/maestro-swarm-workflow/SKILL.md +27 -19
  3. package/.agents/skills/maestro-universal-workflow/SKILL.md +563 -0
  4. package/.agents/skills/team-adversarial-swarm/SKILL.md +235 -0
  5. package/.agents/skills/team-adversarial-swarm/scripts/aco.py +473 -0
  6. package/.agents/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
  7. package/.agents/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
  8. package/.agents/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
  9. package/.agents/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
  10. package/.agents/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
  11. package/.agents/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
  12. package/.agents/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
  13. package/.agents/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
  14. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
  15. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
  16. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
  17. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
  18. package/.agy/skills/maestro-ralph-execute/SKILL.md +2 -1
  19. package/.agy/skills/maestro-swarm-workflow/SKILL.md +27 -19
  20. package/.agy/skills/maestro-universal-workflow/SKILL.md +560 -0
  21. package/.agy/skills/team-adversarial-swarm/SKILL.md +244 -0
  22. package/.agy/skills/team-adversarial-swarm/scripts/aco.py +473 -0
  23. package/.agy/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
  24. package/.agy/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
  25. package/.agy/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
  26. package/.agy/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
  27. package/.agy/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
  28. package/.agy/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
  29. package/.agy/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
  30. package/.agy/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
  31. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
  32. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
  33. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
  34. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
  35. package/.claude/commands/maestro-ralph-execute.md +2 -1
  36. package/.claude/commands/maestro-swarm-workflow.md +27 -19
  37. package/.claude/commands/maestro-universal-workflow.md +561 -0
  38. package/.claude/skills/team-adversarial-swarm/SKILL.md +233 -0
  39. package/.claude/skills/team-adversarial-swarm/scripts/aco.py +473 -0
  40. package/.claude/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
  41. package/.claude/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
  42. package/.claude/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
  43. package/.claude/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
  44. package/.claude/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
  45. package/.claude/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
  46. package/.claude/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
  47. package/.claude/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
  48. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
  49. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
  50. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
  51. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
  52. package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js +1 -1
  53. package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js.map +1 -1
  54. package/dashboard/dist-server/dashboard/src/server/wiki/search.js +1 -1
  55. package/dashboard/dist-server/dashboard/src/server/wiki/search.js.map +1 -1
  56. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.d.ts +1 -1
  57. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js +5 -5
  58. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js.map +1 -1
  59. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js +3 -3
  60. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js.map +1 -1
  61. package/dashboard/dist-server/src/graph/types.d.ts +111 -0
  62. package/dashboard/dist-server/src/graph/types.js +2 -0
  63. package/dashboard/dist-server/src/graph/types.js.map +1 -0
  64. package/dist/src/commands/install-backend.d.ts +0 -7
  65. package/dist/src/commands/install-backend.d.ts.map +1 -1
  66. package/dist/src/commands/install-backend.js +0 -14
  67. package/dist/src/commands/install-backend.js.map +1 -1
  68. package/dist/src/commands/install.d.ts.map +1 -1
  69. package/dist/src/commands/install.js +0 -18
  70. package/dist/src/commands/install.js.map +1 -1
  71. package/dist/src/commands/kg.d.ts +2 -2
  72. package/dist/src/commands/kg.d.ts.map +1 -1
  73. package/dist/src/commands/kg.js +150 -179
  74. package/dist/src/commands/kg.js.map +1 -1
  75. package/dist/src/graph/analyzers/fs-analyzer.d.ts +10 -0
  76. package/dist/src/graph/analyzers/fs-analyzer.d.ts.map +1 -0
  77. package/dist/src/graph/analyzers/fs-analyzer.js +959 -0
  78. package/dist/src/graph/analyzers/fs-analyzer.js.map +1 -0
  79. package/dist/src/graph/index.d.ts +6 -0
  80. package/dist/src/graph/index.d.ts.map +1 -0
  81. package/dist/src/graph/index.js +6 -0
  82. package/dist/src/graph/index.js.map +1 -0
  83. package/dist/src/graph/loader.d.ts +3 -0
  84. package/dist/src/graph/loader.d.ts.map +1 -0
  85. package/dist/src/graph/loader.js +12 -0
  86. package/dist/src/graph/loader.js.map +1 -0
  87. package/dist/src/graph/merger.d.ts +56 -0
  88. package/dist/src/graph/merger.d.ts.map +1 -0
  89. package/dist/src/graph/merger.js +896 -0
  90. package/dist/src/graph/merger.js.map +1 -0
  91. package/dist/src/graph/query.d.ts +7 -0
  92. package/dist/src/graph/query.d.ts.map +1 -0
  93. package/dist/src/graph/query.js +126 -0
  94. package/dist/src/graph/query.js.map +1 -0
  95. package/dist/src/graph/types.d.ts +112 -0
  96. package/dist/src/graph/types.d.ts.map +1 -0
  97. package/dist/src/graph/types.js +2 -0
  98. package/dist/src/graph/types.js.map +1 -0
  99. package/dist/src/i18n/locales/en.d.ts.map +1 -1
  100. package/dist/src/i18n/locales/en.js +0 -10
  101. package/dist/src/i18n/locales/en.js.map +1 -1
  102. package/dist/src/i18n/locales/zh.d.ts.map +1 -1
  103. package/dist/src/i18n/locales/zh.js +0 -10
  104. package/dist/src/i18n/locales/zh.js.map +1 -1
  105. package/dist/src/i18n/types.d.ts +0 -9
  106. package/dist/src/i18n/types.d.ts.map +1 -1
  107. package/dist/src/tui/install-ui/InstallConfirm.d.ts +0 -1
  108. package/dist/src/tui/install-ui/InstallConfirm.d.ts.map +1 -1
  109. package/dist/src/tui/install-ui/InstallConfirm.js +1 -1
  110. package/dist/src/tui/install-ui/InstallConfirm.js.map +1 -1
  111. package/dist/src/tui/install-ui/InstallExecution.d.ts +0 -1
  112. package/dist/src/tui/install-ui/InstallExecution.d.ts.map +1 -1
  113. package/dist/src/tui/install-ui/InstallExecution.js +0 -22
  114. package/dist/src/tui/install-ui/InstallExecution.js.map +1 -1
  115. package/dist/src/tui/install-ui/InstallFlow.d.ts +1 -1
  116. package/dist/src/tui/install-ui/InstallFlow.d.ts.map +1 -1
  117. package/dist/src/tui/install-ui/InstallFlow.js +5 -23
  118. package/dist/src/tui/install-ui/InstallFlow.js.map +1 -1
  119. package/dist/src/tui/install-ui/InstallHub.d.ts +0 -2
  120. package/dist/src/tui/install-ui/InstallHub.d.ts.map +1 -1
  121. package/dist/src/tui/install-ui/InstallHub.js +0 -6
  122. package/dist/src/tui/install-ui/InstallHub.js.map +1 -1
  123. package/dist/src/tui/install-ui/InstallResult.d.ts.map +1 -1
  124. package/dist/src/tui/install-ui/InstallResult.js +1 -1
  125. package/dist/src/tui/install-ui/InstallResult.js.map +1 -1
  126. package/dist/src/utils/update-notices.js +12 -0
  127. package/dist/src/utils/update-notices.js.map +1 -1
  128. package/package.json +1 -1
  129. package/workflows/swarm/wf-analyze.js +195 -34
  130. package/workflows/swarm/wf-brainstorm.js +225 -53
  131. package/workflows/swarm/wf-execute.js +199 -23
  132. package/workflows/swarm/wf-grill.js +181 -20
  133. package/workflows/swarm/wf-milestone-audit.js +178 -29
  134. package/workflows/swarm/wf-plan.js +288 -53
  135. package/workflows/swarm/wf-review.js +195 -80
  136. package/workflows/swarm/wf-verify.js +125 -28
@@ -1,11 +1,12 @@
1
1
  export const meta = {
2
2
  name: 'wf-grill',
3
- description: 'Parallel adversarial stress-testing across decision branches',
4
- whenToUse: 'Accelerate maestro-grill auto mode with parallel branch exploration and contradiction detection',
3
+ description: 'Adversarial stress-testing with meta-skeptic challenge and 3-vote verdict',
4
+ whenToUse: 'Accelerate maestro-grill with parallel branch probing + meta-adversarial synthesis + 3-vote verdict',
5
5
  phases: [
6
6
  { title: 'Explore', detail: 'Codebase evidence gathering via cli-explore-agent' },
7
7
  { title: 'Stress', detail: 'Parallel adversarial branch probing' },
8
- { title: 'Synthesize', detail: 'Contradiction detection and terminology crystallization' },
8
+ { title: 'MetaChallenge', detail: 'Meta-skeptic challenges the stress-test findings themselves' },
9
+ { title: 'Synthesize', detail: '3-vote adversarial verdict (optimist/pessimist/realist)' },
9
10
  ],
10
11
  }
11
12
 
@@ -68,17 +69,65 @@ const BRANCH_SCHEMA = {
68
69
  required: ['branch', 'challenges', 'terminology_conflicts', 'assumptions_challenged', 'verdict'],
69
70
  }
70
71
 
72
+ const META_CHALLENGE_SCHEMA = {
73
+ type: 'object',
74
+ properties: {
75
+ overblown_findings: {
76
+ type: 'array',
77
+ items: {
78
+ type: 'object',
79
+ properties: {
80
+ branch: { type: 'string' },
81
+ finding: { type: 'string' },
82
+ why_overblown: { type: 'string' },
83
+ actual_severity: { type: 'string', enum: ['blocking', 'significant', 'minor', 'non-issue'] },
84
+ },
85
+ required: ['branch', 'finding', 'why_overblown', 'actual_severity'],
86
+ },
87
+ },
88
+ missed_issues: {
89
+ type: 'array',
90
+ items: {
91
+ type: 'object',
92
+ properties: {
93
+ area: { type: 'string' },
94
+ missed_issue: { type: 'string' },
95
+ severity: { type: 'string', enum: ['blocking', 'significant', 'minor'] },
96
+ evidence: { type: 'string' },
97
+ },
98
+ required: ['area', 'missed_issue', 'severity'],
99
+ },
100
+ },
101
+ stress_test_quality: { type: 'number', minimum: 1, maximum: 5 },
102
+ meta_assessment: { type: 'string' },
103
+ },
104
+ required: ['overblown_findings', 'missed_issues', 'stress_test_quality', 'meta_assessment'],
105
+ }
106
+
107
+ const VERDICT_VOTE_SCHEMA = {
108
+ type: 'object',
109
+ properties: {
110
+ perspective: { type: 'string' },
111
+ verdict: { type: 'string', enum: ['ready-for-brainstorm', 'needs-refinement', 'back-to-drawing-board'] },
112
+ argument: { type: 'string' },
113
+ confidence: { type: 'number', minimum: 0, maximum: 100 },
114
+ key_reasons: { type: 'array', items: { type: 'string' } },
115
+ },
116
+ required: ['perspective', 'verdict', 'argument', 'confidence'],
117
+ }
118
+
71
119
  const SYNTHESIS_SCHEMA = {
72
120
  type: 'object',
73
121
  properties: {
74
122
  overall_verdict: { type: 'string', enum: ['ready-for-brainstorm', 'needs-refinement', 'back-to-drawing-board'] },
123
+ adversarial_outcome: { type: 'object', properties: { optimist: { type: 'string' }, pessimist: { type: 'string' }, realist: { type: 'string' }, decisive_factor: { type: 'string' } }, required: ['optimist', 'pessimist', 'realist', 'decisive_factor'] },
75
124
  blocking_issues: { type: 'array', items: { type: 'object', properties: { branch: { type: 'string' }, issue: { type: 'string' }, must_resolve_before: { type: 'string' } }, required: ['branch', 'issue'] } },
76
125
  terminology: { type: 'array', items: { type: 'object', properties: { term: { type: 'string' }, definition: { type: 'string' }, code_alignment: { type: 'string' } }, required: ['term', 'definition'] } },
77
126
  contradictions: { type: 'array', items: { type: 'object', properties: { between_branches: { type: 'array', items: { type: 'string' } }, description: { type: 'string' }, resolution: { type: 'string' } }, required: ['between_branches', 'description'] } },
78
127
  constraints_discovered: { type: 'array', items: { type: 'object', properties: { constraint: { type: 'string' }, source: { type: 'string' }, impact: { type: 'string' }, status: { type: 'string', enum: ['locked', 'free', 'deferred'] } }, required: ['constraint', 'source', 'status'] } },
79
128
  executive_summary: { type: 'string' },
80
129
  },
81
- required: ['overall_verdict', 'blocking_issues', 'terminology', 'contradictions', 'constraints_discovered', 'executive_summary'],
130
+ required: ['overall_verdict', 'adversarial_outcome', 'blocking_issues', 'terminology', 'contradictions', 'constraints_discovered', 'executive_summary'],
82
131
  }
83
132
 
84
133
  const topic = args?.topic || ''
@@ -149,50 +198,162 @@ Be adversarial but fair — only raise real issues backed by evidence.`,
149
198
  const validBranches = branchResults.filter(Boolean)
150
199
  log(`${validBranches.length}/${selectedBranches.length} branches probed`)
151
200
 
152
- // Phase 3: Cross-branch synthesis
153
- phase('Synthesize')
154
-
155
201
  const branchDigest = validBranches.map(b => {
156
202
  const blocking = b.challenges.filter(c => c.severity === 'blocking')
157
203
  return `## ${b.branch} [${b.verdict}]
158
204
  Challenges: ${b.challenges.length} (${blocking.length} blocking)
159
- ${blocking.map(c => ` ${c.question}${c.contradiction ? ' — ' + c.contradiction : ''}`).join('\n')}
205
+ ${blocking.map(c => ` ! ${c.question}${c.contradiction ? ' — ' + c.contradiction : ''}`).join('\n')}
160
206
  Terminology conflicts: ${b.terminology_conflicts.map(t => t.proposed_term + ' vs ' + t.codebase_term).join(', ') || 'none'}
161
207
  Assumptions challenged: ${b.assumptions_challenged.join('; ') || 'none'}`
162
208
  }).join('\n\n')
163
209
 
210
+ // Phase 3: Meta-skeptic challenges the stress-test findings
211
+ phase('MetaChallenge')
212
+ log('Meta-skeptic challenging the stress-test findings themselves...')
213
+
214
+ const metaChallenge = await agent(
215
+ `You are the META-SKEPTIC — the devil's advocate OF the devil's advocates.
216
+
217
+ The stress-testers above tried to break this proposal:
218
+ Topic: ${topic}
219
+
220
+ Their findings:
221
+ ${branchDigest}
222
+
223
+ ${evidenceContext}
224
+
225
+ Your job is to challenge THE STRESS-TESTERS:
226
+ 1. OVERBLOWN FINDINGS: Which challenges are exaggerated, based on unlikely scenarios, or missing context?
227
+ - Check if the "blocking" issues are actually blocking
228
+ - See if the code evidence actually supports the claimed contradiction
229
+ - Identify where stress-testers assumed worst-case without justification
230
+ 2. MISSED ISSUES: What did the stress-testers NOT catch that they should have?
231
+ - Blind spots across all branches
232
+ - Interactions between branches that no single branch tested
233
+ - Real risks that were obscured by focus on minor issues
234
+ 3. Rate the overall stress_test_quality (1-5): how thorough and fair were the findings?
235
+
236
+ Be ruthlessly honest. Some stress-test findings ARE real; confirm those. But call out any that are theatrical rather than substantive.`,
237
+ { label: 'meta-skeptic', phase: 'MetaChallenge', schema: META_CHALLENGE_SCHEMA }
238
+ )
239
+
240
+ const metaDigest = metaChallenge
241
+ ? `Meta-skeptic assessment (quality: ${metaChallenge.stress_test_quality}/5):
242
+ Overblown: ${metaChallenge.overblown_findings.length} findings downgraded
243
+ ${metaChallenge.overblown_findings.map(f => ` ${f.branch}: "${f.finding}" → ${f.actual_severity} — ${f.why_overblown}`).join('\n')}
244
+ Missed: ${metaChallenge.missed_issues.length} new issues surfaced
245
+ ${metaChallenge.missed_issues.map(m => ` [${m.severity}] ${m.area}: ${m.missed_issue}`).join('\n')}
246
+ Assessment: ${metaChallenge.meta_assessment}`
247
+ : 'Meta-challenge not available.'
248
+
249
+ // Phase 4: 3-vote adversarial verdict
250
+ phase('Synthesize')
251
+ log('Launching 3-vote adversarial verdict (optimist / pessimist / realist)...')
252
+
253
+ const votes = await parallel([
254
+ () => agent(
255
+ `You are the OPTIMIST. Vote on the proposal's readiness.
256
+
257
+ Proposal: ${topic}
258
+ Stress-test findings:\n${branchDigest}
259
+ Meta-skeptic review:\n${metaDigest}
260
+
261
+ Your lens: Focus on what IS working. Discount overblown findings. Trust proposed resolutions.
262
+ - "ready-for-brainstorm": blocking issues are addressable, proceed with awareness
263
+ - "needs-refinement": some issues need attention but proposal has merit
264
+ - "back-to-drawing-board": only if genuinely unfixable (you should almost never vote this)
265
+
266
+ Vote with your confidence level.`,
267
+ { label: 'vote:optimist', phase: 'Synthesize', schema: VERDICT_VOTE_SCHEMA }
268
+ ),
269
+ () => agent(
270
+ `You are the PESSIMIST. Vote on the proposal's readiness.
271
+
272
+ Proposal: ${topic}
273
+ Stress-test findings:\n${branchDigest}
274
+ Meta-skeptic review:\n${metaDigest}
275
+
276
+ Your lens: Focus on what is BROKEN. Amplify blocking issues. Question proposed resolutions.
277
+ - "back-to-drawing-board": if there are fundamental flaws or too many blocking issues
278
+ - "needs-refinement": if issues are real but fixable
279
+ - "ready-for-brainstorm": only if stress-testing found almost nothing (you should almost never vote this)
280
+
281
+ Vote with your confidence level.`,
282
+ { label: 'vote:pessimist', phase: 'Synthesize', schema: VERDICT_VOTE_SCHEMA }
283
+ ),
284
+ () => agent(
285
+ `You are the REALIST. Vote on the proposal's readiness.
286
+
287
+ Proposal: ${topic}
288
+ Stress-test findings:\n${branchDigest}
289
+ Meta-skeptic review:\n${metaDigest}
290
+
291
+ Your lens: Evidence-based, no bias. Weigh the meta-skeptic's corrections. Discount both theatrical threats and wishful thinking.
292
+ - "ready-for-brainstorm": if blocking issues are few, well-understood, and have clear resolutions
293
+ - "needs-refinement": if real issues exist but are tractable
294
+ - "back-to-drawing-board": if fundamental assumptions are wrong
295
+
296
+ Vote with your confidence level.`,
297
+ { label: 'vote:realist', phase: 'Synthesize', schema: VERDICT_VOTE_SCHEMA }
298
+ ),
299
+ ])
300
+
301
+ const validVotes = votes.filter(Boolean)
302
+ const voteDigest = validVotes.map(v =>
303
+ `${v.perspective}: ${v.verdict} (confidence: ${v.confidence}%)\n ${v.argument}`
304
+ ).join('\n\n')
305
+
306
+ const verdictCounts = {}
307
+ validVotes.forEach(v => { verdictCounts[v.verdict] = (verdictCounts[v.verdict] || 0) + 1 })
308
+ log(`Votes: ${Object.entries(verdictCounts).map(([k, v]) => k + '=' + v).join(', ')}`)
309
+
310
+ log('Synthesizing final verdict from adversarial votes...')
311
+
164
312
  const synthesis = await agent(
165
- `Synthesize stress-test results across all branches.
313
+ `Synthesize the final stress-test verdict from 3 adversarial voters.
166
314
 
167
315
  Proposal: ${topic}
168
316
 
169
- Branch Results:
317
+ === VOTES ===
318
+ ${voteDigest}
319
+
320
+ Vote tally: ${Object.entries(verdictCounts).map(([k, v]) => k + '=' + v).join(', ')}
321
+
322
+ === META-SKEPTIC REVIEW ===
323
+ ${metaDigest}
324
+
325
+ === BRANCH FINDINGS ===
170
326
  ${branchDigest}
171
327
 
172
- Tasks:
173
- 1. Identify cross-branch CONTRADICTIONS where one branch's finding conflicts with another's
174
- 2. Compile unified TERMINOLOGY list (proposed term aligned codebase term → definition)
175
- 3. Extract CONSTRAINTS discovered (things that MUST be true based on evidence) — classify as locked/free/deferred
176
- 4. List all BLOCKING issues that must resolve before brainstorm/planning
177
- 5. Determine overall verdict:
178
- - "ready-for-brainstorm": no blocking issues, well-scoped
179
- - "needs-refinement": some blocking issues but fixable
180
- - "back-to-drawing-board": fundamental flaws detected
181
- 6. Write executive summary`,
328
+ RESOLVE:
329
+ 1. Majority vote wins. Tie-break: go with the REALIST.
330
+ 2. Record adversarial_outcome with each voter's verdict and the decisive factor
331
+ 3. Compile blocking_issues from branches BUT exclude any the meta-skeptic downgraded to non-issue
332
+ 4. Add any missed_issues from meta-skeptic as additional blocking if severity is blocking
333
+ 5. Build unified terminology list
334
+ 6. Detect cross-branch contradictions
335
+ 7. Extract discovered constraints (locked/free/deferred)
336
+ 8. Write executive summary including the adversarial debate and meta-challenge outcomes`,
182
337
  { label: 'synthesize', phase: 'Synthesize', schema: SYNTHESIS_SCHEMA }
183
338
  )
184
339
 
185
340
  return {
186
341
  exploration: exploration,
187
342
  branches: validBranches,
343
+ metaChallenge: metaChallenge,
344
+ votes: validVotes,
188
345
  synthesis: synthesis,
189
346
  metadata: {
190
347
  topic: topic,
191
348
  depth: depth,
192
349
  branch_count: selectedBranches.length,
193
350
  completed_count: validBranches.length,
351
+ meta_overblown: metaChallenge ? metaChallenge.overblown_findings.length : 0,
352
+ meta_missed: metaChallenge ? metaChallenge.missed_issues.length : 0,
353
+ stress_test_quality: metaChallenge ? metaChallenge.stress_test_quality : null,
194
354
  blocking_count: synthesis ? synthesis.blocking_issues.length : 0,
195
355
  contradiction_count: synthesis ? synthesis.contradictions.length : 0,
356
+ verdict_votes: verdictCounts,
196
357
  overall_verdict: synthesis ? synthesis.overall_verdict : 'unknown',
197
358
  },
198
359
  }
@@ -1,10 +1,11 @@
1
1
  export const meta = {
2
2
  name: 'wf-milestone-audit',
3
- description: 'Parallel cross-phase integration audit via workflow-integration-checker',
4
- whenToUse: 'Accelerate maestro-milestone-audit with parallel phase coverage, execution completeness, and integration checks',
3
+ description: 'Parallel milestone audit with adversarial challenge and 3-vote verdict',
4
+ whenToUse: 'Accelerate maestro-milestone-audit with parallel dimension checks + adversarial challenge + 3-vote verdict',
5
5
  phases: [
6
6
  { title: 'Audit', detail: 'Parallel 4-dimension milestone audit' },
7
- { title: 'Report', detail: 'Consolidated audit verdict' },
7
+ { title: 'Challenge', detail: 'Adversarial challenge of each audit dimension' },
8
+ { title: 'Report', detail: '3-vote adversarial verdict (strict/lenient/objective)' },
8
9
  ],
9
10
  }
10
11
 
@@ -86,38 +87,74 @@ const INTEGRATION_SCHEMA = {
86
87
  required: ['check_type', 'passed', 'interfaces', 'summary'],
87
88
  }
88
89
 
90
+ const CHALLENGE_SCHEMA = {
91
+ type: 'object',
92
+ properties: {
93
+ dimension: { type: 'string' },
94
+ original_passed: { type: 'boolean' },
95
+ challenge_result: { type: 'string', enum: ['confirmed', 'overturned-to-fail', 'overturned-to-pass'] },
96
+ counter_evidence: { type: 'array', items: { type: 'object', properties: { point: { type: 'string' }, evidence: { type: 'string' } }, required: ['point', 'evidence'] } },
97
+ reasoning: { type: 'string' },
98
+ confidence: { type: 'number', minimum: 0, maximum: 100 },
99
+ },
100
+ required: ['dimension', 'original_passed', 'challenge_result', 'reasoning', 'confidence'],
101
+ }
102
+
103
+ const VERDICT_VOTE_SCHEMA = {
104
+ type: 'object',
105
+ properties: {
106
+ perspective: { type: 'string' },
107
+ verdict: { type: 'string', enum: ['PASS', 'FAIL'] },
108
+ rationale: { type: 'string' },
109
+ confidence: { type: 'number', minimum: 0, maximum: 100 },
110
+ next_step: { type: 'string', enum: ['milestone-complete', 'plan-gaps', 'execute', 'verify'] },
111
+ },
112
+ required: ['perspective', 'verdict', 'rationale', 'confidence', 'next_step'],
113
+ }
114
+
89
115
  const REPORT_SCHEMA = {
90
116
  type: 'object',
91
117
  properties: {
92
118
  verdict: { type: 'string', enum: ['PASS', 'FAIL'] },
93
119
  confidence: { type: 'number', minimum: 0, maximum: 100 },
120
+ adversarial_outcome: {
121
+ type: 'object',
122
+ properties: {
123
+ strict: { type: 'string' },
124
+ lenient: { type: 'string' },
125
+ objective: { type: 'string' },
126
+ challenges_overturned: { type: 'number' },
127
+ decisive_factor: { type: 'string' },
128
+ },
129
+ required: ['strict', 'lenient', 'objective', 'decisive_factor'],
130
+ },
94
131
  dimension_results: {
95
132
  type: 'array',
96
133
  items: {
97
134
  type: 'object',
98
135
  properties: {
99
136
  dimension: { type: 'string' },
100
- passed: { type: 'boolean' },
137
+ original_passed: { type: 'boolean' },
138
+ post_challenge_passed: { type: 'boolean' },
101
139
  issue_count: { type: 'number' },
102
140
  },
103
- required: ['dimension', 'passed'],
141
+ required: ['dimension', 'original_passed', 'post_challenge_passed'],
104
142
  },
105
143
  },
106
144
  blocking_issues: { type: 'array', items: { type: 'object', properties: { dimension: { type: 'string' }, description: { type: 'string' }, remediation: { type: 'string' } }, required: ['dimension', 'description', 'remediation'] } },
107
145
  next_step: { type: 'string', enum: ['milestone-complete', 'plan-gaps', 'execute', 'verify'] },
108
146
  summary: { type: 'string' },
109
147
  },
110
- required: ['verdict', 'confidence', 'dimension_results', 'blocking_issues', 'next_step', 'summary'],
148
+ required: ['verdict', 'confidence', 'adversarial_outcome', 'dimension_results', 'blocking_issues', 'next_step', 'summary'],
111
149
  }
112
150
 
113
151
  const milestone = args?.milestone || ''
114
152
  const isAdhoc = args?.is_adhoc || false
115
153
 
116
- // Phase 1: Parallel 4-dimension audit
154
+ // Phase 1: Parallel audit dimensions
117
155
  phase('Audit')
118
156
 
119
157
  const checks = [
120
- // Dimension 1: Phase coverage (skip for adhoc milestones)
121
158
  () => agent(
122
159
  `Phase Coverage Audit${isAdhoc ? ' (ADHOC — skip roadmap phase checks, only verify artifact chain PLN→EXC exists)' : ''}.
123
160
  ${milestone ? 'Milestone: ' + milestone : 'Use current_milestone from .workflow/state.json'}
@@ -137,8 +174,6 @@ ${isAdhoc ? `Adhoc milestone: skip roadmap.md parsing. Only check:
137
174
  Set check_type="phase-coverage" in output.`,
138
175
  { label: 'audit:coverage', phase: 'Audit', schema: COVERAGE_SCHEMA }
139
176
  ),
140
-
141
- // Dimension 2: Execution completeness
142
177
  () => agent(
143
178
  `Execution Completeness Audit.
144
179
  ${milestone ? 'Milestone: ' + milestone : 'Use current_milestone from .workflow/state.json'}
@@ -154,8 +189,6 @@ ${milestone ? 'Milestone: ' + milestone : 'Use current_milestone from .workflow/
154
189
  Set check_type="execution-completeness" in output.`,
155
190
  { label: 'audit:execution', phase: 'Audit', schema: EXECUTION_SCHEMA }
156
191
  ),
157
-
158
- // Dimension 3: Cross-phase integration
159
192
  () => agent(
160
193
  `Cross-Phase Integration Audit.
161
194
  ${milestone ? 'Milestone: ' + milestone : 'Use current_milestone from .workflow/state.json'}
@@ -184,9 +217,6 @@ log(`Running ${checks.length} audit dimensions in parallel...`)
184
217
  const results = await parallel(checks)
185
218
  const validResults = results.filter(Boolean)
186
219
 
187
- // Phase 2: Consolidated report
188
- phase('Report')
189
-
190
220
  const coverage = validResults.find(r => r.check_type === 'phase-coverage')
191
221
  const execution = validResults.find(r => r.check_type === 'execution-completeness')
192
222
  const integration = validResults.find(r => r.check_type === 'integration')
@@ -194,27 +224,142 @@ const integration = validResults.find(r => r.check_type === 'integration')
194
224
  const auditDigest = `Phase Coverage: ${coverage ? (coverage.passed ? 'PASS' : 'FAIL') + ' — ' + coverage.summary : 'NOT RUN'}
195
225
 
196
226
  Execution Completeness: ${execution ? (execution.passed ? 'PASS' : 'FAIL') + ' — ' + execution.summary : 'NOT RUN'}
197
- ${execution && !execution.passed ? 'Incomplete plans: ' + execution.plans.filter(p => p.pending_tasks > 0 || p.failed_tasks > 0).map(p => p.plan_id + ' (' + p.pending_tasks + ' pending, ' + p.failed_tasks + ' failed)').join('; ') : ''}
227
+ ${execution && !execution.passed ? 'Incomplete: ' + execution.plans.filter(p => p.pending_tasks > 0 || p.failed_tasks > 0).map(p => p.plan_id + ' (' + p.pending_tasks + ' pending, ' + p.failed_tasks + ' failed)').join('; ') : ''}
198
228
 
199
229
  Integration: ${integration ? (integration.passed ? 'PASS' : 'FAIL') + ' — ' + integration.summary : 'NOT RUN'}
200
- ${integration && !integration.passed ? 'Failed interfaces: ' + integration.interfaces.filter(i => i.status === 'fail').map(i => i.interface_name + ': ' + i.issue).join('; ') : ''}
201
- ${integration && integration.data_contract_issues.length > 0 ? 'Data contract issues: ' + integration.data_contract_issues.map(d => d.contract + ' — ' + d.mismatch).join('; ') : ''}`
230
+ ${integration && !integration.passed ? 'Failed: ' + integration.interfaces.filter(i => i.status === 'fail').map(i => i.interface_name + ': ' + i.issue).join('; ') : ''}`
231
+
232
+ // Phase 2: Adversarial challenge of each audit dimension
233
+ phase('Challenge')
234
+ log('Adversarial challenge of audit dimension results...')
235
+
236
+ const dimensionData = [
237
+ { name: 'coverage', result: coverage },
238
+ { name: 'execution', result: execution },
239
+ { name: 'integration', result: integration },
240
+ ].filter(d => d.result)
241
+
242
+ const challengeResults = await parallel(
243
+ dimensionData.map(dim => () =>
244
+ agent(
245
+ `ADVERSARIAL CHALLENGE of the "${dim.name}" audit dimension.
246
+
247
+ Original result: ${dim.result.passed ? 'PASS' : 'FAIL'}
248
+ Summary: ${dim.result.summary}
249
+
250
+ ${dim.name === 'coverage' && dim.result.phases ? 'Phase details:\n' + dim.result.phases.map(p => ` ${p.phase}: ${p.status} (plan:${p.has_plan} execute:${p.has_execute})`).join('\n') : ''}
251
+ ${dim.name === 'execution' && dim.result.plans ? 'Plan details:\n' + dim.result.plans.map(p => ` ${p.plan_id}: ${p.completed_tasks}/${p.total_tasks} complete`).join('\n') : ''}
252
+ ${dim.name === 'integration' && dim.result.interfaces ? 'Interface details:\n' + dim.result.interfaces.map(i => ` ${i.interface_name}: ${i.status}`).join('\n') : ''}
253
+
254
+ Your job: Try to OVERTURN the result.
255
+ - If it PASSED: find evidence it should have FAILED (missed checks, false passes, overlooked issues)
256
+ - If it FAILED: find evidence it should have PASSED (issues are minor, not blocking, or already resolved)
257
+
258
+ Challenge the audit's thoroughness:
259
+ 1. Did it check everything it should?
260
+ 2. Were the checks actually verifying what they claim?
261
+ 3. Is the evidence genuine or superficial?
262
+
263
+ challenge_result:
264
+ - "confirmed": the original result stands after challenge
265
+ - "overturned-to-fail": was PASS, should be FAIL (found missed issues)
266
+ - "overturned-to-pass": was FAIL, should be PASS (issues are not blocking)
267
+
268
+ Default to "confirmed" only if you genuinely cannot find counter-evidence.`,
269
+ { label: `challenge:${dim.name}`, phase: 'Challenge', schema: CHALLENGE_SCHEMA }
270
+ )
271
+ )
272
+ )
273
+
274
+ const validChallenges = challengeResults.filter(Boolean)
275
+ const overturnedCount = validChallenges.filter(c => c.challenge_result !== 'confirmed').length
276
+
277
+ const challengeDigest = validChallenges.map(c =>
278
+ `${c.dimension}: ${c.original_passed ? 'PASS' : 'FAIL'} → ${c.challenge_result} (confidence: ${c.confidence}%)\n ${c.reasoning}`
279
+ ).join('\n\n')
280
+
281
+ log(`Challenges: ${overturnedCount}/${validChallenges.length} dimensions overturned`)
282
+
283
+ // Phase 3: 3-vote adversarial verdict
284
+ phase('Report')
285
+ log('3-vote adversarial verdict (strict / lenient / objective)...')
286
+
287
+ const fullContext = `=== ORIGINAL AUDIT ===\n${auditDigest}\n\n=== ADVERSARIAL CHALLENGES ===\n${challengeDigest}`
288
+
289
+ const verdictVotes = await parallel([
290
+ () => agent(
291
+ `STRICT VOTER: Apply the highest quality bar for milestone completion.
292
+
293
+ ${fullContext}
294
+
295
+ Your philosophy: A milestone is complete ONLY when everything is truly done.
296
+ - If any challenge overturned a PASS to FAIL → FAIL
297
+ - If any dimension was originally FAIL and not overturned → FAIL
298
+ - PASS only if all dimensions pass AND all challenges confirm
299
+
300
+ Vote with next_step recommendation.`,
301
+ { label: 'vote:strict', phase: 'Report', schema: VERDICT_VOTE_SCHEMA }
302
+ ),
303
+ () => agent(
304
+ `LENIENT VOTER: Apply a practical bar for milestone completion.
305
+
306
+ ${fullContext}
307
+
308
+ Your philosophy: Milestones should move forward when substantially complete.
309
+ - If challenges overturned FAILs to PASs → good, count them
310
+ - Minor coverage/execution gaps are acceptable if integration is solid
311
+ - PASS if the core functionality works even with minor gaps
312
+
313
+ Vote with next_step recommendation.`,
314
+ { label: 'vote:lenient', phase: 'Report', schema: VERDICT_VOTE_SCHEMA }
315
+ ),
316
+ () => agent(
317
+ `OBJECTIVE VOTER: Apply evidence-based judgment for milestone completion.
318
+
319
+ ${fullContext}
320
+
321
+ Your philosophy: Follow the evidence, weigh challenge confidence.
322
+ - High-confidence challenges (>80%) override original results
323
+ - Low-confidence challenges (<50%) are noise
324
+ - If the post-challenge picture shows all dimensions pass → PASS
325
+ - If any dimension genuinely fails after challenge → FAIL
326
+
327
+ Vote with next_step recommendation.`,
328
+ { label: 'vote:objective', phase: 'Report', schema: VERDICT_VOTE_SCHEMA }
329
+ ),
330
+ ])
331
+
332
+ const validVotes = verdictVotes.filter(Boolean)
333
+ const voteCounts = { PASS: 0, FAIL: 0 }
334
+ validVotes.forEach(v => { voteCounts[v.verdict] = (voteCounts[v.verdict] || 0) + 1 })
335
+
336
+ const voteDigest = validVotes.map(v =>
337
+ `${v.perspective}: ${v.verdict} → ${v.next_step} (confidence: ${v.confidence}%)\n ${v.rationale}`
338
+ ).join('\n\n')
339
+
340
+ log(`Verdict votes: PASS=${voteCounts.PASS} FAIL=${voteCounts.FAIL}`)
202
341
 
203
342
  const report = await agent(
204
- `Generate consolidated milestone audit report.
343
+ `Generate final milestone audit report from adversarial deliberation.
344
+
345
+ === VOTES ===
346
+ ${voteDigest}
347
+
348
+ Vote tally: PASS=${voteCounts.PASS}, FAIL=${voteCounts.FAIL}
349
+
350
+ === CHALLENGE RESULTS ===
351
+ ${challengeDigest}
205
352
 
353
+ === ORIGINAL AUDIT ===
206
354
  ${auditDigest}
207
355
 
208
- Determine:
209
- 1. Overall verdict: PASS only if ALL dimensions pass
210
- 2. Confidence score (0-100)
211
- 3. List blocking issues with specific remediation
212
- 4. Determine next step:
213
- - "milestone-complete": all pass ready to close milestone
214
- - "plan-gaps": integration issues need new plan
215
- - "execute": incomplete execution
216
- - "verify": missing verification artifacts
217
- 5. Write summary`,
356
+ RESOLVE:
357
+ 1. Majority vote wins. Tie: go with OBJECTIVE voter.
358
+ 2. Record adversarial_outcome with each voter's position and challenges_overturned count
359
+ 3. Build dimension_results with original AND post-challenge status
360
+ 4. Compile blocking_issues from dimensions that FAIL after challenges
361
+ 5. Determine next_step by majority vote (tie: go with objective)
362
+ 6. Write summary including challenge and deliberation outcomes`,
218
363
  { label: 'report', phase: 'Report', schema: REPORT_SCHEMA }
219
364
  )
220
365
 
@@ -222,14 +367,18 @@ return {
222
367
  coverage: coverage,
223
368
  execution: execution,
224
369
  integration: integration,
370
+ challenges: validChallenges,
371
+ votes: validVotes,
225
372
  report: report,
226
373
  metadata: {
227
374
  milestone: milestone,
228
375
  is_adhoc: isAdhoc,
229
376
  dimensions_checked: validResults.length,
377
+ dimensions_overturned: overturnedCount,
230
378
  coverage_passed: coverage ? coverage.passed : null,
231
379
  execution_passed: execution ? execution.passed : null,
232
380
  integration_passed: integration ? integration.passed : null,
381
+ vote_counts: voteCounts,
233
382
  verdict: report ? report.verdict : 'UNKNOWN',
234
383
  next_step: report ? report.next_step : null,
235
384
  },