maestro-flow 0.4.20 → 0.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/maestro-ralph-execute/SKILL.md +2 -1
- package/.agents/skills/maestro-swarm-workflow/SKILL.md +27 -19
- package/.agents/skills/maestro-universal-workflow/SKILL.md +563 -0
- package/.agents/skills/team-adversarial-swarm/SKILL.md +235 -0
- package/.agents/skills/team-adversarial-swarm/scripts/aco.py +473 -0
- package/.agents/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
- package/.agents/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
- package/.agents/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
- package/.agents/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
- package/.agents/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
- package/.agents/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
- package/.agents/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
- package/.agents/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
- package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
- package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
- package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
- package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
- package/.agy/skills/maestro-ralph-execute/SKILL.md +2 -1
- package/.agy/skills/maestro-swarm-workflow/SKILL.md +27 -19
- package/.agy/skills/maestro-universal-workflow/SKILL.md +560 -0
- package/.agy/skills/team-adversarial-swarm/SKILL.md +244 -0
- package/.agy/skills/team-adversarial-swarm/scripts/aco.py +473 -0
- package/.agy/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
- package/.agy/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
- package/.agy/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
- package/.agy/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
- package/.agy/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
- package/.agy/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
- package/.agy/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
- package/.agy/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
- package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
- package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
- package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
- package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
- package/.claude/commands/maestro-ralph-execute.md +2 -1
- package/.claude/commands/maestro-swarm-workflow.md +27 -19
- package/.claude/commands/maestro-universal-workflow.md +561 -0
- package/.claude/skills/team-adversarial-swarm/SKILL.md +233 -0
- package/.claude/skills/team-adversarial-swarm/scripts/aco.py +473 -0
- package/.claude/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
- package/.claude/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
- package/.claude/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
- package/.claude/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
- package/.claude/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
- package/.claude/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
- package/.claude/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
- package/.claude/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
- package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
- package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
- package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
- package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
- package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/search.js +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/search.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.d.ts +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js +5 -5
- package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js +3 -3
- package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js.map +1 -1
- package/dashboard/dist-server/src/graph/types.d.ts +111 -0
- package/dashboard/dist-server/src/graph/types.js +2 -0
- package/dashboard/dist-server/src/graph/types.js.map +1 -0
- package/dist/src/commands/install-backend.d.ts +0 -7
- package/dist/src/commands/install-backend.d.ts.map +1 -1
- package/dist/src/commands/install-backend.js +0 -14
- package/dist/src/commands/install-backend.js.map +1 -1
- package/dist/src/commands/install.d.ts.map +1 -1
- package/dist/src/commands/install.js +0 -18
- package/dist/src/commands/install.js.map +1 -1
- package/dist/src/commands/kg.d.ts +2 -2
- package/dist/src/commands/kg.d.ts.map +1 -1
- package/dist/src/commands/kg.js +150 -179
- package/dist/src/commands/kg.js.map +1 -1
- package/dist/src/graph/analyzers/fs-analyzer.d.ts +10 -0
- package/dist/src/graph/analyzers/fs-analyzer.d.ts.map +1 -0
- package/dist/src/graph/analyzers/fs-analyzer.js +959 -0
- package/dist/src/graph/analyzers/fs-analyzer.js.map +1 -0
- package/dist/src/graph/index.d.ts +6 -0
- package/dist/src/graph/index.d.ts.map +1 -0
- package/dist/src/graph/index.js +6 -0
- package/dist/src/graph/index.js.map +1 -0
- package/dist/src/graph/loader.d.ts +3 -0
- package/dist/src/graph/loader.d.ts.map +1 -0
- package/dist/src/graph/loader.js +12 -0
- package/dist/src/graph/loader.js.map +1 -0
- package/dist/src/graph/merger.d.ts +56 -0
- package/dist/src/graph/merger.d.ts.map +1 -0
- package/dist/src/graph/merger.js +896 -0
- package/dist/src/graph/merger.js.map +1 -0
- package/dist/src/graph/query.d.ts +7 -0
- package/dist/src/graph/query.d.ts.map +1 -0
- package/dist/src/graph/query.js +126 -0
- package/dist/src/graph/query.js.map +1 -0
- package/dist/src/graph/types.d.ts +112 -0
- package/dist/src/graph/types.d.ts.map +1 -0
- package/dist/src/graph/types.js +2 -0
- package/dist/src/graph/types.js.map +1 -0
- package/dist/src/i18n/locales/en.d.ts.map +1 -1
- package/dist/src/i18n/locales/en.js +0 -10
- package/dist/src/i18n/locales/en.js.map +1 -1
- package/dist/src/i18n/locales/zh.d.ts.map +1 -1
- package/dist/src/i18n/locales/zh.js +0 -10
- package/dist/src/i18n/locales/zh.js.map +1 -1
- package/dist/src/i18n/types.d.ts +0 -9
- package/dist/src/i18n/types.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallConfirm.d.ts +0 -1
- package/dist/src/tui/install-ui/InstallConfirm.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallConfirm.js +1 -1
- package/dist/src/tui/install-ui/InstallConfirm.js.map +1 -1
- package/dist/src/tui/install-ui/InstallExecution.d.ts +0 -1
- package/dist/src/tui/install-ui/InstallExecution.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallExecution.js +0 -22
- package/dist/src/tui/install-ui/InstallExecution.js.map +1 -1
- package/dist/src/tui/install-ui/InstallFlow.d.ts +1 -1
- package/dist/src/tui/install-ui/InstallFlow.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallFlow.js +5 -23
- package/dist/src/tui/install-ui/InstallFlow.js.map +1 -1
- package/dist/src/tui/install-ui/InstallHub.d.ts +0 -2
- package/dist/src/tui/install-ui/InstallHub.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallHub.js +0 -6
- package/dist/src/tui/install-ui/InstallHub.js.map +1 -1
- package/dist/src/tui/install-ui/InstallResult.d.ts.map +1 -1
- package/dist/src/tui/install-ui/InstallResult.js +1 -1
- package/dist/src/tui/install-ui/InstallResult.js.map +1 -1
- package/dist/src/utils/update-notices.js +12 -0
- package/dist/src/utils/update-notices.js.map +1 -1
- package/package.json +1 -1
- package/workflows/swarm/wf-analyze.js +195 -34
- package/workflows/swarm/wf-brainstorm.js +225 -53
- package/workflows/swarm/wf-execute.js +199 -23
- package/workflows/swarm/wf-grill.js +181 -20
- package/workflows/swarm/wf-milestone-audit.js +178 -29
- package/workflows/swarm/wf-plan.js +288 -53
- package/workflows/swarm/wf-review.js +195 -80
- package/workflows/swarm/wf-verify.js +125 -28
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
export const meta = {
|
|
2
2
|
name: 'wf-grill',
|
|
3
|
-
description: '
|
|
4
|
-
whenToUse: 'Accelerate maestro-grill
|
|
3
|
+
description: 'Adversarial stress-testing with meta-skeptic challenge and 3-vote verdict',
|
|
4
|
+
whenToUse: 'Accelerate maestro-grill with parallel branch probing + meta-adversarial synthesis + 3-vote verdict',
|
|
5
5
|
phases: [
|
|
6
6
|
{ title: 'Explore', detail: 'Codebase evidence gathering via cli-explore-agent' },
|
|
7
7
|
{ title: 'Stress', detail: 'Parallel adversarial branch probing' },
|
|
8
|
-
{ title: '
|
|
8
|
+
{ title: 'MetaChallenge', detail: 'Meta-skeptic challenges the stress-test findings themselves' },
|
|
9
|
+
{ title: 'Synthesize', detail: '3-vote adversarial verdict (optimist/pessimist/realist)' },
|
|
9
10
|
],
|
|
10
11
|
}
|
|
11
12
|
|
|
@@ -68,17 +69,65 @@ const BRANCH_SCHEMA = {
|
|
|
68
69
|
required: ['branch', 'challenges', 'terminology_conflicts', 'assumptions_challenged', 'verdict'],
|
|
69
70
|
}
|
|
70
71
|
|
|
72
|
+
const META_CHALLENGE_SCHEMA = {
|
|
73
|
+
type: 'object',
|
|
74
|
+
properties: {
|
|
75
|
+
overblown_findings: {
|
|
76
|
+
type: 'array',
|
|
77
|
+
items: {
|
|
78
|
+
type: 'object',
|
|
79
|
+
properties: {
|
|
80
|
+
branch: { type: 'string' },
|
|
81
|
+
finding: { type: 'string' },
|
|
82
|
+
why_overblown: { type: 'string' },
|
|
83
|
+
actual_severity: { type: 'string', enum: ['blocking', 'significant', 'minor', 'non-issue'] },
|
|
84
|
+
},
|
|
85
|
+
required: ['branch', 'finding', 'why_overblown', 'actual_severity'],
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
missed_issues: {
|
|
89
|
+
type: 'array',
|
|
90
|
+
items: {
|
|
91
|
+
type: 'object',
|
|
92
|
+
properties: {
|
|
93
|
+
area: { type: 'string' },
|
|
94
|
+
missed_issue: { type: 'string' },
|
|
95
|
+
severity: { type: 'string', enum: ['blocking', 'significant', 'minor'] },
|
|
96
|
+
evidence: { type: 'string' },
|
|
97
|
+
},
|
|
98
|
+
required: ['area', 'missed_issue', 'severity'],
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
stress_test_quality: { type: 'number', minimum: 1, maximum: 5 },
|
|
102
|
+
meta_assessment: { type: 'string' },
|
|
103
|
+
},
|
|
104
|
+
required: ['overblown_findings', 'missed_issues', 'stress_test_quality', 'meta_assessment'],
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const VERDICT_VOTE_SCHEMA = {
|
|
108
|
+
type: 'object',
|
|
109
|
+
properties: {
|
|
110
|
+
perspective: { type: 'string' },
|
|
111
|
+
verdict: { type: 'string', enum: ['ready-for-brainstorm', 'needs-refinement', 'back-to-drawing-board'] },
|
|
112
|
+
argument: { type: 'string' },
|
|
113
|
+
confidence: { type: 'number', minimum: 0, maximum: 100 },
|
|
114
|
+
key_reasons: { type: 'array', items: { type: 'string' } },
|
|
115
|
+
},
|
|
116
|
+
required: ['perspective', 'verdict', 'argument', 'confidence'],
|
|
117
|
+
}
|
|
118
|
+
|
|
71
119
|
const SYNTHESIS_SCHEMA = {
|
|
72
120
|
type: 'object',
|
|
73
121
|
properties: {
|
|
74
122
|
overall_verdict: { type: 'string', enum: ['ready-for-brainstorm', 'needs-refinement', 'back-to-drawing-board'] },
|
|
123
|
+
adversarial_outcome: { type: 'object', properties: { optimist: { type: 'string' }, pessimist: { type: 'string' }, realist: { type: 'string' }, decisive_factor: { type: 'string' } }, required: ['optimist', 'pessimist', 'realist', 'decisive_factor'] },
|
|
75
124
|
blocking_issues: { type: 'array', items: { type: 'object', properties: { branch: { type: 'string' }, issue: { type: 'string' }, must_resolve_before: { type: 'string' } }, required: ['branch', 'issue'] } },
|
|
76
125
|
terminology: { type: 'array', items: { type: 'object', properties: { term: { type: 'string' }, definition: { type: 'string' }, code_alignment: { type: 'string' } }, required: ['term', 'definition'] } },
|
|
77
126
|
contradictions: { type: 'array', items: { type: 'object', properties: { between_branches: { type: 'array', items: { type: 'string' } }, description: { type: 'string' }, resolution: { type: 'string' } }, required: ['between_branches', 'description'] } },
|
|
78
127
|
constraints_discovered: { type: 'array', items: { type: 'object', properties: { constraint: { type: 'string' }, source: { type: 'string' }, impact: { type: 'string' }, status: { type: 'string', enum: ['locked', 'free', 'deferred'] } }, required: ['constraint', 'source', 'status'] } },
|
|
79
128
|
executive_summary: { type: 'string' },
|
|
80
129
|
},
|
|
81
|
-
required: ['overall_verdict', 'blocking_issues', 'terminology', 'contradictions', 'constraints_discovered', 'executive_summary'],
|
|
130
|
+
required: ['overall_verdict', 'adversarial_outcome', 'blocking_issues', 'terminology', 'contradictions', 'constraints_discovered', 'executive_summary'],
|
|
82
131
|
}
|
|
83
132
|
|
|
84
133
|
const topic = args?.topic || ''
|
|
@@ -149,50 +198,162 @@ Be adversarial but fair — only raise real issues backed by evidence.`,
|
|
|
149
198
|
const validBranches = branchResults.filter(Boolean)
|
|
150
199
|
log(`${validBranches.length}/${selectedBranches.length} branches probed`)
|
|
151
200
|
|
|
152
|
-
// Phase 3: Cross-branch synthesis
|
|
153
|
-
phase('Synthesize')
|
|
154
|
-
|
|
155
201
|
const branchDigest = validBranches.map(b => {
|
|
156
202
|
const blocking = b.challenges.filter(c => c.severity === 'blocking')
|
|
157
203
|
return `## ${b.branch} [${b.verdict}]
|
|
158
204
|
Challenges: ${b.challenges.length} (${blocking.length} blocking)
|
|
159
|
-
${blocking.map(c => `
|
|
205
|
+
${blocking.map(c => ` ! ${c.question}${c.contradiction ? ' — ' + c.contradiction : ''}`).join('\n')}
|
|
160
206
|
Terminology conflicts: ${b.terminology_conflicts.map(t => t.proposed_term + ' vs ' + t.codebase_term).join(', ') || 'none'}
|
|
161
207
|
Assumptions challenged: ${b.assumptions_challenged.join('; ') || 'none'}`
|
|
162
208
|
}).join('\n\n')
|
|
163
209
|
|
|
210
|
+
// Phase 3: Meta-skeptic challenges the stress-test findings
|
|
211
|
+
phase('MetaChallenge')
|
|
212
|
+
log('Meta-skeptic challenging the stress-test findings themselves...')
|
|
213
|
+
|
|
214
|
+
const metaChallenge = await agent(
|
|
215
|
+
`You are the META-SKEPTIC — the devil's advocate OF the devil's advocates.
|
|
216
|
+
|
|
217
|
+
The stress-testers above tried to break this proposal:
|
|
218
|
+
Topic: ${topic}
|
|
219
|
+
|
|
220
|
+
Their findings:
|
|
221
|
+
${branchDigest}
|
|
222
|
+
|
|
223
|
+
${evidenceContext}
|
|
224
|
+
|
|
225
|
+
Your job is to challenge THE STRESS-TESTERS:
|
|
226
|
+
1. OVERBLOWN FINDINGS: Which challenges are exaggerated, based on unlikely scenarios, or missing context?
|
|
227
|
+
- Check if the "blocking" issues are actually blocking
|
|
228
|
+
- See if the code evidence actually supports the claimed contradiction
|
|
229
|
+
- Identify where stress-testers assumed worst-case without justification
|
|
230
|
+
2. MISSED ISSUES: What did the stress-testers NOT catch that they should have?
|
|
231
|
+
- Blind spots across all branches
|
|
232
|
+
- Interactions between branches that no single branch tested
|
|
233
|
+
- Real risks that were obscured by focus on minor issues
|
|
234
|
+
3. Rate the overall stress_test_quality (1-5): how thorough and fair were the findings?
|
|
235
|
+
|
|
236
|
+
Be ruthlessly honest. Some stress-test findings ARE real; confirm those. But call out any that are theatrical rather than substantive.`,
|
|
237
|
+
{ label: 'meta-skeptic', phase: 'MetaChallenge', schema: META_CHALLENGE_SCHEMA }
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
const metaDigest = metaChallenge
|
|
241
|
+
? `Meta-skeptic assessment (quality: ${metaChallenge.stress_test_quality}/5):
|
|
242
|
+
Overblown: ${metaChallenge.overblown_findings.length} findings downgraded
|
|
243
|
+
${metaChallenge.overblown_findings.map(f => ` ${f.branch}: "${f.finding}" → ${f.actual_severity} — ${f.why_overblown}`).join('\n')}
|
|
244
|
+
Missed: ${metaChallenge.missed_issues.length} new issues surfaced
|
|
245
|
+
${metaChallenge.missed_issues.map(m => ` [${m.severity}] ${m.area}: ${m.missed_issue}`).join('\n')}
|
|
246
|
+
Assessment: ${metaChallenge.meta_assessment}`
|
|
247
|
+
: 'Meta-challenge not available.'
|
|
248
|
+
|
|
249
|
+
// Phase 4: 3-vote adversarial verdict
|
|
250
|
+
phase('Synthesize')
|
|
251
|
+
log('Launching 3-vote adversarial verdict (optimist / pessimist / realist)...')
|
|
252
|
+
|
|
253
|
+
const votes = await parallel([
|
|
254
|
+
() => agent(
|
|
255
|
+
`You are the OPTIMIST. Vote on the proposal's readiness.
|
|
256
|
+
|
|
257
|
+
Proposal: ${topic}
|
|
258
|
+
Stress-test findings:\n${branchDigest}
|
|
259
|
+
Meta-skeptic review:\n${metaDigest}
|
|
260
|
+
|
|
261
|
+
Your lens: Focus on what IS working. Discount overblown findings. Trust proposed resolutions.
|
|
262
|
+
- "ready-for-brainstorm": blocking issues are addressable, proceed with awareness
|
|
263
|
+
- "needs-refinement": some issues need attention but proposal has merit
|
|
264
|
+
- "back-to-drawing-board": only if genuinely unfixable (you should almost never vote this)
|
|
265
|
+
|
|
266
|
+
Vote with your confidence level.`,
|
|
267
|
+
{ label: 'vote:optimist', phase: 'Synthesize', schema: VERDICT_VOTE_SCHEMA }
|
|
268
|
+
),
|
|
269
|
+
() => agent(
|
|
270
|
+
`You are the PESSIMIST. Vote on the proposal's readiness.
|
|
271
|
+
|
|
272
|
+
Proposal: ${topic}
|
|
273
|
+
Stress-test findings:\n${branchDigest}
|
|
274
|
+
Meta-skeptic review:\n${metaDigest}
|
|
275
|
+
|
|
276
|
+
Your lens: Focus on what is BROKEN. Amplify blocking issues. Question proposed resolutions.
|
|
277
|
+
- "back-to-drawing-board": if there are fundamental flaws or too many blocking issues
|
|
278
|
+
- "needs-refinement": if issues are real but fixable
|
|
279
|
+
- "ready-for-brainstorm": only if stress-testing found almost nothing (you should almost never vote this)
|
|
280
|
+
|
|
281
|
+
Vote with your confidence level.`,
|
|
282
|
+
{ label: 'vote:pessimist', phase: 'Synthesize', schema: VERDICT_VOTE_SCHEMA }
|
|
283
|
+
),
|
|
284
|
+
() => agent(
|
|
285
|
+
`You are the REALIST. Vote on the proposal's readiness.
|
|
286
|
+
|
|
287
|
+
Proposal: ${topic}
|
|
288
|
+
Stress-test findings:\n${branchDigest}
|
|
289
|
+
Meta-skeptic review:\n${metaDigest}
|
|
290
|
+
|
|
291
|
+
Your lens: Evidence-based, no bias. Weigh the meta-skeptic's corrections. Discount both theatrical threats and wishful thinking.
|
|
292
|
+
- "ready-for-brainstorm": if blocking issues are few, well-understood, and have clear resolutions
|
|
293
|
+
- "needs-refinement": if real issues exist but are tractable
|
|
294
|
+
- "back-to-drawing-board": if fundamental assumptions are wrong
|
|
295
|
+
|
|
296
|
+
Vote with your confidence level.`,
|
|
297
|
+
{ label: 'vote:realist', phase: 'Synthesize', schema: VERDICT_VOTE_SCHEMA }
|
|
298
|
+
),
|
|
299
|
+
])
|
|
300
|
+
|
|
301
|
+
const validVotes = votes.filter(Boolean)
|
|
302
|
+
const voteDigest = validVotes.map(v =>
|
|
303
|
+
`${v.perspective}: ${v.verdict} (confidence: ${v.confidence}%)\n ${v.argument}`
|
|
304
|
+
).join('\n\n')
|
|
305
|
+
|
|
306
|
+
const verdictCounts = {}
|
|
307
|
+
validVotes.forEach(v => { verdictCounts[v.verdict] = (verdictCounts[v.verdict] || 0) + 1 })
|
|
308
|
+
log(`Votes: ${Object.entries(verdictCounts).map(([k, v]) => k + '=' + v).join(', ')}`)
|
|
309
|
+
|
|
310
|
+
log('Synthesizing final verdict from adversarial votes...')
|
|
311
|
+
|
|
164
312
|
const synthesis = await agent(
|
|
165
|
-
`Synthesize stress-test
|
|
313
|
+
`Synthesize the final stress-test verdict from 3 adversarial voters.
|
|
166
314
|
|
|
167
315
|
Proposal: ${topic}
|
|
168
316
|
|
|
169
|
-
|
|
317
|
+
=== VOTES ===
|
|
318
|
+
${voteDigest}
|
|
319
|
+
|
|
320
|
+
Vote tally: ${Object.entries(verdictCounts).map(([k, v]) => k + '=' + v).join(', ')}
|
|
321
|
+
|
|
322
|
+
=== META-SKEPTIC REVIEW ===
|
|
323
|
+
${metaDigest}
|
|
324
|
+
|
|
325
|
+
=== BRANCH FINDINGS ===
|
|
170
326
|
${branchDigest}
|
|
171
327
|
|
|
172
|
-
|
|
173
|
-
1.
|
|
174
|
-
2.
|
|
175
|
-
3.
|
|
176
|
-
4.
|
|
177
|
-
5.
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
6. Write executive summary`,
|
|
328
|
+
RESOLVE:
|
|
329
|
+
1. Majority vote wins. Tie-break: go with the REALIST.
|
|
330
|
+
2. Record adversarial_outcome with each voter's verdict and the decisive factor
|
|
331
|
+
3. Compile blocking_issues from branches BUT exclude any the meta-skeptic downgraded to non-issue
|
|
332
|
+
4. Add any missed_issues from meta-skeptic as additional blocking if severity is blocking
|
|
333
|
+
5. Build unified terminology list
|
|
334
|
+
6. Detect cross-branch contradictions
|
|
335
|
+
7. Extract discovered constraints (locked/free/deferred)
|
|
336
|
+
8. Write executive summary including the adversarial debate and meta-challenge outcomes`,
|
|
182
337
|
{ label: 'synthesize', phase: 'Synthesize', schema: SYNTHESIS_SCHEMA }
|
|
183
338
|
)
|
|
184
339
|
|
|
185
340
|
return {
|
|
186
341
|
exploration: exploration,
|
|
187
342
|
branches: validBranches,
|
|
343
|
+
metaChallenge: metaChallenge,
|
|
344
|
+
votes: validVotes,
|
|
188
345
|
synthesis: synthesis,
|
|
189
346
|
metadata: {
|
|
190
347
|
topic: topic,
|
|
191
348
|
depth: depth,
|
|
192
349
|
branch_count: selectedBranches.length,
|
|
193
350
|
completed_count: validBranches.length,
|
|
351
|
+
meta_overblown: metaChallenge ? metaChallenge.overblown_findings.length : 0,
|
|
352
|
+
meta_missed: metaChallenge ? metaChallenge.missed_issues.length : 0,
|
|
353
|
+
stress_test_quality: metaChallenge ? metaChallenge.stress_test_quality : null,
|
|
194
354
|
blocking_count: synthesis ? synthesis.blocking_issues.length : 0,
|
|
195
355
|
contradiction_count: synthesis ? synthesis.contradictions.length : 0,
|
|
356
|
+
verdict_votes: verdictCounts,
|
|
196
357
|
overall_verdict: synthesis ? synthesis.overall_verdict : 'unknown',
|
|
197
358
|
},
|
|
198
359
|
}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
export const meta = {
|
|
2
2
|
name: 'wf-milestone-audit',
|
|
3
|
-
description: 'Parallel
|
|
4
|
-
whenToUse: 'Accelerate maestro-milestone-audit with parallel
|
|
3
|
+
description: 'Parallel milestone audit with adversarial challenge and 3-vote verdict',
|
|
4
|
+
whenToUse: 'Accelerate maestro-milestone-audit with parallel dimension checks + adversarial challenge + 3-vote verdict',
|
|
5
5
|
phases: [
|
|
6
6
|
{ title: 'Audit', detail: 'Parallel 4-dimension milestone audit' },
|
|
7
|
-
{ title: '
|
|
7
|
+
{ title: 'Challenge', detail: 'Adversarial challenge of each audit dimension' },
|
|
8
|
+
{ title: 'Report', detail: '3-vote adversarial verdict (strict/lenient/objective)' },
|
|
8
9
|
],
|
|
9
10
|
}
|
|
10
11
|
|
|
@@ -86,38 +87,74 @@ const INTEGRATION_SCHEMA = {
|
|
|
86
87
|
required: ['check_type', 'passed', 'interfaces', 'summary'],
|
|
87
88
|
}
|
|
88
89
|
|
|
90
|
+
const CHALLENGE_SCHEMA = {
|
|
91
|
+
type: 'object',
|
|
92
|
+
properties: {
|
|
93
|
+
dimension: { type: 'string' },
|
|
94
|
+
original_passed: { type: 'boolean' },
|
|
95
|
+
challenge_result: { type: 'string', enum: ['confirmed', 'overturned-to-fail', 'overturned-to-pass'] },
|
|
96
|
+
counter_evidence: { type: 'array', items: { type: 'object', properties: { point: { type: 'string' }, evidence: { type: 'string' } }, required: ['point', 'evidence'] } },
|
|
97
|
+
reasoning: { type: 'string' },
|
|
98
|
+
confidence: { type: 'number', minimum: 0, maximum: 100 },
|
|
99
|
+
},
|
|
100
|
+
required: ['dimension', 'original_passed', 'challenge_result', 'reasoning', 'confidence'],
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const VERDICT_VOTE_SCHEMA = {
|
|
104
|
+
type: 'object',
|
|
105
|
+
properties: {
|
|
106
|
+
perspective: { type: 'string' },
|
|
107
|
+
verdict: { type: 'string', enum: ['PASS', 'FAIL'] },
|
|
108
|
+
rationale: { type: 'string' },
|
|
109
|
+
confidence: { type: 'number', minimum: 0, maximum: 100 },
|
|
110
|
+
next_step: { type: 'string', enum: ['milestone-complete', 'plan-gaps', 'execute', 'verify'] },
|
|
111
|
+
},
|
|
112
|
+
required: ['perspective', 'verdict', 'rationale', 'confidence', 'next_step'],
|
|
113
|
+
}
|
|
114
|
+
|
|
89
115
|
const REPORT_SCHEMA = {
|
|
90
116
|
type: 'object',
|
|
91
117
|
properties: {
|
|
92
118
|
verdict: { type: 'string', enum: ['PASS', 'FAIL'] },
|
|
93
119
|
confidence: { type: 'number', minimum: 0, maximum: 100 },
|
|
120
|
+
adversarial_outcome: {
|
|
121
|
+
type: 'object',
|
|
122
|
+
properties: {
|
|
123
|
+
strict: { type: 'string' },
|
|
124
|
+
lenient: { type: 'string' },
|
|
125
|
+
objective: { type: 'string' },
|
|
126
|
+
challenges_overturned: { type: 'number' },
|
|
127
|
+
decisive_factor: { type: 'string' },
|
|
128
|
+
},
|
|
129
|
+
required: ['strict', 'lenient', 'objective', 'decisive_factor'],
|
|
130
|
+
},
|
|
94
131
|
dimension_results: {
|
|
95
132
|
type: 'array',
|
|
96
133
|
items: {
|
|
97
134
|
type: 'object',
|
|
98
135
|
properties: {
|
|
99
136
|
dimension: { type: 'string' },
|
|
100
|
-
|
|
137
|
+
original_passed: { type: 'boolean' },
|
|
138
|
+
post_challenge_passed: { type: 'boolean' },
|
|
101
139
|
issue_count: { type: 'number' },
|
|
102
140
|
},
|
|
103
|
-
required: ['dimension', '
|
|
141
|
+
required: ['dimension', 'original_passed', 'post_challenge_passed'],
|
|
104
142
|
},
|
|
105
143
|
},
|
|
106
144
|
blocking_issues: { type: 'array', items: { type: 'object', properties: { dimension: { type: 'string' }, description: { type: 'string' }, remediation: { type: 'string' } }, required: ['dimension', 'description', 'remediation'] } },
|
|
107
145
|
next_step: { type: 'string', enum: ['milestone-complete', 'plan-gaps', 'execute', 'verify'] },
|
|
108
146
|
summary: { type: 'string' },
|
|
109
147
|
},
|
|
110
|
-
required: ['verdict', 'confidence', 'dimension_results', 'blocking_issues', 'next_step', 'summary'],
|
|
148
|
+
required: ['verdict', 'confidence', 'adversarial_outcome', 'dimension_results', 'blocking_issues', 'next_step', 'summary'],
|
|
111
149
|
}
|
|
112
150
|
|
|
113
151
|
const milestone = args?.milestone || ''
|
|
114
152
|
const isAdhoc = args?.is_adhoc || false
|
|
115
153
|
|
|
116
|
-
// Phase 1: Parallel
|
|
154
|
+
// Phase 1: Parallel audit dimensions
|
|
117
155
|
phase('Audit')
|
|
118
156
|
|
|
119
157
|
const checks = [
|
|
120
|
-
// Dimension 1: Phase coverage (skip for adhoc milestones)
|
|
121
158
|
() => agent(
|
|
122
159
|
`Phase Coverage Audit${isAdhoc ? ' (ADHOC — skip roadmap phase checks, only verify artifact chain PLN→EXC exists)' : ''}.
|
|
123
160
|
${milestone ? 'Milestone: ' + milestone : 'Use current_milestone from .workflow/state.json'}
|
|
@@ -137,8 +174,6 @@ ${isAdhoc ? `Adhoc milestone: skip roadmap.md parsing. Only check:
|
|
|
137
174
|
Set check_type="phase-coverage" in output.`,
|
|
138
175
|
{ label: 'audit:coverage', phase: 'Audit', schema: COVERAGE_SCHEMA }
|
|
139
176
|
),
|
|
140
|
-
|
|
141
|
-
// Dimension 2: Execution completeness
|
|
142
177
|
() => agent(
|
|
143
178
|
`Execution Completeness Audit.
|
|
144
179
|
${milestone ? 'Milestone: ' + milestone : 'Use current_milestone from .workflow/state.json'}
|
|
@@ -154,8 +189,6 @@ ${milestone ? 'Milestone: ' + milestone : 'Use current_milestone from .workflow/
|
|
|
154
189
|
Set check_type="execution-completeness" in output.`,
|
|
155
190
|
{ label: 'audit:execution', phase: 'Audit', schema: EXECUTION_SCHEMA }
|
|
156
191
|
),
|
|
157
|
-
|
|
158
|
-
// Dimension 3: Cross-phase integration
|
|
159
192
|
() => agent(
|
|
160
193
|
`Cross-Phase Integration Audit.
|
|
161
194
|
${milestone ? 'Milestone: ' + milestone : 'Use current_milestone from .workflow/state.json'}
|
|
@@ -184,9 +217,6 @@ log(`Running ${checks.length} audit dimensions in parallel...`)
|
|
|
184
217
|
const results = await parallel(checks)
|
|
185
218
|
const validResults = results.filter(Boolean)
|
|
186
219
|
|
|
187
|
-
// Phase 2: Consolidated report
|
|
188
|
-
phase('Report')
|
|
189
|
-
|
|
190
220
|
const coverage = validResults.find(r => r.check_type === 'phase-coverage')
|
|
191
221
|
const execution = validResults.find(r => r.check_type === 'execution-completeness')
|
|
192
222
|
const integration = validResults.find(r => r.check_type === 'integration')
|
|
@@ -194,27 +224,142 @@ const integration = validResults.find(r => r.check_type === 'integration')
|
|
|
194
224
|
const auditDigest = `Phase Coverage: ${coverage ? (coverage.passed ? 'PASS' : 'FAIL') + ' — ' + coverage.summary : 'NOT RUN'}
|
|
195
225
|
|
|
196
226
|
Execution Completeness: ${execution ? (execution.passed ? 'PASS' : 'FAIL') + ' — ' + execution.summary : 'NOT RUN'}
|
|
197
|
-
${execution && !execution.passed ? 'Incomplete
|
|
227
|
+
${execution && !execution.passed ? 'Incomplete: ' + execution.plans.filter(p => p.pending_tasks > 0 || p.failed_tasks > 0).map(p => p.plan_id + ' (' + p.pending_tasks + ' pending, ' + p.failed_tasks + ' failed)').join('; ') : ''}
|
|
198
228
|
|
|
199
229
|
Integration: ${integration ? (integration.passed ? 'PASS' : 'FAIL') + ' — ' + integration.summary : 'NOT RUN'}
|
|
200
|
-
${integration && !integration.passed ? 'Failed
|
|
201
|
-
|
|
230
|
+
${integration && !integration.passed ? 'Failed: ' + integration.interfaces.filter(i => i.status === 'fail').map(i => i.interface_name + ': ' + i.issue).join('; ') : ''}`
|
|
231
|
+
|
|
232
|
+
// Phase 2: Adversarial challenge of each audit dimension
|
|
233
|
+
phase('Challenge')
|
|
234
|
+
log('Adversarial challenge of audit dimension results...')
|
|
235
|
+
|
|
236
|
+
const dimensionData = [
|
|
237
|
+
{ name: 'coverage', result: coverage },
|
|
238
|
+
{ name: 'execution', result: execution },
|
|
239
|
+
{ name: 'integration', result: integration },
|
|
240
|
+
].filter(d => d.result)
|
|
241
|
+
|
|
242
|
+
const challengeResults = await parallel(
|
|
243
|
+
dimensionData.map(dim => () =>
|
|
244
|
+
agent(
|
|
245
|
+
`ADVERSARIAL CHALLENGE of the "${dim.name}" audit dimension.
|
|
246
|
+
|
|
247
|
+
Original result: ${dim.result.passed ? 'PASS' : 'FAIL'}
|
|
248
|
+
Summary: ${dim.result.summary}
|
|
249
|
+
|
|
250
|
+
${dim.name === 'coverage' && dim.result.phases ? 'Phase details:\n' + dim.result.phases.map(p => ` ${p.phase}: ${p.status} (plan:${p.has_plan} execute:${p.has_execute})`).join('\n') : ''}
|
|
251
|
+
${dim.name === 'execution' && dim.result.plans ? 'Plan details:\n' + dim.result.plans.map(p => ` ${p.plan_id}: ${p.completed_tasks}/${p.total_tasks} complete`).join('\n') : ''}
|
|
252
|
+
${dim.name === 'integration' && dim.result.interfaces ? 'Interface details:\n' + dim.result.interfaces.map(i => ` ${i.interface_name}: ${i.status}`).join('\n') : ''}
|
|
253
|
+
|
|
254
|
+
Your job: Try to OVERTURN the result.
|
|
255
|
+
- If it PASSED: find evidence it should have FAILED (missed checks, false passes, overlooked issues)
|
|
256
|
+
- If it FAILED: find evidence it should have PASSED (issues are minor, not blocking, or already resolved)
|
|
257
|
+
|
|
258
|
+
Challenge the audit's thoroughness:
|
|
259
|
+
1. Did it check everything it should?
|
|
260
|
+
2. Were the checks actually verifying what they claim?
|
|
261
|
+
3. Is the evidence genuine or superficial?
|
|
262
|
+
|
|
263
|
+
challenge_result:
|
|
264
|
+
- "confirmed": the original result stands after challenge
|
|
265
|
+
- "overturned-to-fail": was PASS, should be FAIL (found missed issues)
|
|
266
|
+
- "overturned-to-pass": was FAIL, should be PASS (issues are not blocking)
|
|
267
|
+
|
|
268
|
+
Default to "confirmed" only if you genuinely cannot find counter-evidence.`,
|
|
269
|
+
{ label: `challenge:${dim.name}`, phase: 'Challenge', schema: CHALLENGE_SCHEMA }
|
|
270
|
+
)
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
const validChallenges = challengeResults.filter(Boolean)
|
|
275
|
+
const overturnedCount = validChallenges.filter(c => c.challenge_result !== 'confirmed').length
|
|
276
|
+
|
|
277
|
+
const challengeDigest = validChallenges.map(c =>
|
|
278
|
+
`${c.dimension}: ${c.original_passed ? 'PASS' : 'FAIL'} → ${c.challenge_result} (confidence: ${c.confidence}%)\n ${c.reasoning}`
|
|
279
|
+
).join('\n\n')
|
|
280
|
+
|
|
281
|
+
log(`Challenges: ${overturnedCount}/${validChallenges.length} dimensions overturned`)
|
|
282
|
+
|
|
283
|
+
// Phase 3: 3-vote adversarial verdict
|
|
284
|
+
phase('Report')
|
|
285
|
+
log('3-vote adversarial verdict (strict / lenient / objective)...')
|
|
286
|
+
|
|
287
|
+
const fullContext = `=== ORIGINAL AUDIT ===\n${auditDigest}\n\n=== ADVERSARIAL CHALLENGES ===\n${challengeDigest}`
|
|
288
|
+
|
|
289
|
+
const verdictVotes = await parallel([
|
|
290
|
+
() => agent(
|
|
291
|
+
`STRICT VOTER: Apply the highest quality bar for milestone completion.
|
|
292
|
+
|
|
293
|
+
${fullContext}
|
|
294
|
+
|
|
295
|
+
Your philosophy: A milestone is complete ONLY when everything is truly done.
|
|
296
|
+
- If any challenge overturned a PASS to FAIL → FAIL
|
|
297
|
+
- If any dimension was originally FAIL and not overturned → FAIL
|
|
298
|
+
- PASS only if all dimensions pass AND all challenges confirm
|
|
299
|
+
|
|
300
|
+
Vote with next_step recommendation.`,
|
|
301
|
+
{ label: 'vote:strict', phase: 'Report', schema: VERDICT_VOTE_SCHEMA }
|
|
302
|
+
),
|
|
303
|
+
() => agent(
|
|
304
|
+
`LENIENT VOTER: Apply a practical bar for milestone completion.
|
|
305
|
+
|
|
306
|
+
${fullContext}
|
|
307
|
+
|
|
308
|
+
Your philosophy: Milestones should move forward when substantially complete.
|
|
309
|
+
- If challenges overturned FAILs to PASs → good, count them
|
|
310
|
+
- Minor coverage/execution gaps are acceptable if integration is solid
|
|
311
|
+
- PASS if the core functionality works even with minor gaps
|
|
312
|
+
|
|
313
|
+
Vote with next_step recommendation.`,
|
|
314
|
+
{ label: 'vote:lenient', phase: 'Report', schema: VERDICT_VOTE_SCHEMA }
|
|
315
|
+
),
|
|
316
|
+
() => agent(
|
|
317
|
+
`OBJECTIVE VOTER: Apply evidence-based judgment for milestone completion.
|
|
318
|
+
|
|
319
|
+
${fullContext}
|
|
320
|
+
|
|
321
|
+
Your philosophy: Follow the evidence, weigh challenge confidence.
|
|
322
|
+
- High-confidence challenges (>80%) override original results
|
|
323
|
+
- Low-confidence challenges (<50%) are noise
|
|
324
|
+
- If the post-challenge picture shows all dimensions pass → PASS
|
|
325
|
+
- If any dimension genuinely fails after challenge → FAIL
|
|
326
|
+
|
|
327
|
+
Vote with next_step recommendation.`,
|
|
328
|
+
{ label: 'vote:objective', phase: 'Report', schema: VERDICT_VOTE_SCHEMA }
|
|
329
|
+
),
|
|
330
|
+
])
|
|
331
|
+
|
|
332
|
+
const validVotes = verdictVotes.filter(Boolean)
|
|
333
|
+
const voteCounts = { PASS: 0, FAIL: 0 }
|
|
334
|
+
validVotes.forEach(v => { voteCounts[v.verdict] = (voteCounts[v.verdict] || 0) + 1 })
|
|
335
|
+
|
|
336
|
+
const voteDigest = validVotes.map(v =>
|
|
337
|
+
`${v.perspective}: ${v.verdict} → ${v.next_step} (confidence: ${v.confidence}%)\n ${v.rationale}`
|
|
338
|
+
).join('\n\n')
|
|
339
|
+
|
|
340
|
+
log(`Verdict votes: PASS=${voteCounts.PASS} FAIL=${voteCounts.FAIL}`)
|
|
202
341
|
|
|
203
342
|
const report = await agent(
|
|
204
|
-
`Generate
|
|
343
|
+
`Generate final milestone audit report from adversarial deliberation.
|
|
344
|
+
|
|
345
|
+
=== VOTES ===
|
|
346
|
+
${voteDigest}
|
|
347
|
+
|
|
348
|
+
Vote tally: PASS=${voteCounts.PASS}, FAIL=${voteCounts.FAIL}
|
|
349
|
+
|
|
350
|
+
=== CHALLENGE RESULTS ===
|
|
351
|
+
${challengeDigest}
|
|
205
352
|
|
|
353
|
+
=== ORIGINAL AUDIT ===
|
|
206
354
|
${auditDigest}
|
|
207
355
|
|
|
208
|
-
|
|
209
|
-
1.
|
|
210
|
-
2.
|
|
211
|
-
3.
|
|
212
|
-
4.
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
- "execute": incomplete execution
|
|
216
|
-
- "verify": missing verification artifacts
|
|
217
|
-
5. Write summary`,
|
|
356
|
+
RESOLVE:
|
|
357
|
+
1. Majority vote wins. Tie: go with OBJECTIVE voter.
|
|
358
|
+
2. Record adversarial_outcome with each voter's position and challenges_overturned count
|
|
359
|
+
3. Build dimension_results with original AND post-challenge status
|
|
360
|
+
4. Compile blocking_issues from dimensions that FAIL after challenges
|
|
361
|
+
5. Determine next_step by majority vote (tie: go with objective)
|
|
362
|
+
6. Write summary including challenge and deliberation outcomes`,
|
|
218
363
|
{ label: 'report', phase: 'Report', schema: REPORT_SCHEMA }
|
|
219
364
|
)
|
|
220
365
|
|
|
@@ -222,14 +367,18 @@ return {
|
|
|
222
367
|
coverage: coverage,
|
|
223
368
|
execution: execution,
|
|
224
369
|
integration: integration,
|
|
370
|
+
challenges: validChallenges,
|
|
371
|
+
votes: validVotes,
|
|
225
372
|
report: report,
|
|
226
373
|
metadata: {
|
|
227
374
|
milestone: milestone,
|
|
228
375
|
is_adhoc: isAdhoc,
|
|
229
376
|
dimensions_checked: validResults.length,
|
|
377
|
+
dimensions_overturned: overturnedCount,
|
|
230
378
|
coverage_passed: coverage ? coverage.passed : null,
|
|
231
379
|
execution_passed: execution ? execution.passed : null,
|
|
232
380
|
integration_passed: integration ? integration.passed : null,
|
|
381
|
+
vote_counts: voteCounts,
|
|
233
382
|
verdict: report ? report.verdict : 'UNKNOWN',
|
|
234
383
|
next_step: report ? report.next_step : null,
|
|
235
384
|
},
|