maestro-flow 0.4.20 → 0.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/.agents/skills/maestro-ralph-execute/SKILL.md +2 -1
  2. package/.agents/skills/maestro-swarm-workflow/SKILL.md +27 -19
  3. package/.agents/skills/maestro-universal-workflow/SKILL.md +563 -0
  4. package/.agents/skills/team-adversarial-swarm/SKILL.md +235 -0
  5. package/.agents/skills/team-adversarial-swarm/scripts/aco.py +473 -0
  6. package/.agents/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
  7. package/.agents/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
  8. package/.agents/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
  9. package/.agents/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
  10. package/.agents/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
  11. package/.agents/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
  12. package/.agents/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
  13. package/.agents/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
  14. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
  15. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
  16. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
  17. package/.agents/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
  18. package/.agy/skills/maestro-ralph-execute/SKILL.md +2 -1
  19. package/.agy/skills/maestro-swarm-workflow/SKILL.md +27 -19
  20. package/.agy/skills/maestro-universal-workflow/SKILL.md +560 -0
  21. package/.agy/skills/team-adversarial-swarm/SKILL.md +244 -0
  22. package/.agy/skills/team-adversarial-swarm/scripts/aco.py +473 -0
  23. package/.agy/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
  24. package/.agy/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
  25. package/.agy/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
  26. package/.agy/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
  27. package/.agy/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
  28. package/.agy/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
  29. package/.agy/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
  30. package/.agy/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
  31. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
  32. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
  33. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
  34. package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
  35. package/.claude/commands/maestro-ralph-execute.md +2 -1
  36. package/.claude/commands/maestro-swarm-workflow.md +27 -19
  37. package/.claude/commands/maestro-universal-workflow.md +561 -0
  38. package/.claude/skills/team-adversarial-swarm/SKILL.md +233 -0
  39. package/.claude/skills/team-adversarial-swarm/scripts/aco.py +473 -0
  40. package/.claude/skills/team-adversarial-swarm/scripts/pheromone.py +144 -0
  41. package/.claude/skills/team-adversarial-swarm/scripts/scoring.py +92 -0
  42. package/.claude/skills/team-adversarial-swarm/scripts/test_aco.py +475 -0
  43. package/.claude/skills/team-adversarial-swarm/specs/ant-output-schema.md +115 -0
  44. package/.claude/skills/team-adversarial-swarm/specs/convergence-criteria.md +75 -0
  45. package/.claude/skills/team-adversarial-swarm/specs/pheromone-schema.md +90 -0
  46. package/.claude/skills/team-adversarial-swarm/specs/swarm-config-template.json +66 -0
  47. package/.claude/skills/team-adversarial-swarm/specs/swarm-protocol.md +105 -0
  48. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-converge.js +197 -0
  49. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js +194 -0
  50. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-score.js +188 -0
  51. package/.claude/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js +248 -0
  52. package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js +1 -1
  53. package/dashboard/dist-server/dashboard/src/server/wiki/graph-analysis.js.map +1 -1
  54. package/dashboard/dist-server/dashboard/src/server/wiki/search.js +1 -1
  55. package/dashboard/dist-server/dashboard/src/server/wiki/search.js.map +1 -1
  56. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.d.ts +1 -1
  57. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js +5 -5
  58. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js.map +1 -1
  59. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js +3 -3
  60. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js.map +1 -1
  61. package/dashboard/dist-server/src/graph/types.d.ts +111 -0
  62. package/dashboard/dist-server/src/graph/types.js +2 -0
  63. package/dashboard/dist-server/src/graph/types.js.map +1 -0
  64. package/dist/src/commands/install-backend.d.ts +0 -7
  65. package/dist/src/commands/install-backend.d.ts.map +1 -1
  66. package/dist/src/commands/install-backend.js +0 -14
  67. package/dist/src/commands/install-backend.js.map +1 -1
  68. package/dist/src/commands/install.d.ts.map +1 -1
  69. package/dist/src/commands/install.js +0 -18
  70. package/dist/src/commands/install.js.map +1 -1
  71. package/dist/src/commands/kg.d.ts +2 -2
  72. package/dist/src/commands/kg.d.ts.map +1 -1
  73. package/dist/src/commands/kg.js +150 -179
  74. package/dist/src/commands/kg.js.map +1 -1
  75. package/dist/src/graph/analyzers/fs-analyzer.d.ts +10 -0
  76. package/dist/src/graph/analyzers/fs-analyzer.d.ts.map +1 -0
  77. package/dist/src/graph/analyzers/fs-analyzer.js +959 -0
  78. package/dist/src/graph/analyzers/fs-analyzer.js.map +1 -0
  79. package/dist/src/graph/index.d.ts +6 -0
  80. package/dist/src/graph/index.d.ts.map +1 -0
  81. package/dist/src/graph/index.js +6 -0
  82. package/dist/src/graph/index.js.map +1 -0
  83. package/dist/src/graph/loader.d.ts +3 -0
  84. package/dist/src/graph/loader.d.ts.map +1 -0
  85. package/dist/src/graph/loader.js +12 -0
  86. package/dist/src/graph/loader.js.map +1 -0
  87. package/dist/src/graph/merger.d.ts +56 -0
  88. package/dist/src/graph/merger.d.ts.map +1 -0
  89. package/dist/src/graph/merger.js +896 -0
  90. package/dist/src/graph/merger.js.map +1 -0
  91. package/dist/src/graph/query.d.ts +7 -0
  92. package/dist/src/graph/query.d.ts.map +1 -0
  93. package/dist/src/graph/query.js +126 -0
  94. package/dist/src/graph/query.js.map +1 -0
  95. package/dist/src/graph/types.d.ts +112 -0
  96. package/dist/src/graph/types.d.ts.map +1 -0
  97. package/dist/src/graph/types.js +2 -0
  98. package/dist/src/graph/types.js.map +1 -0
  99. package/dist/src/i18n/locales/en.d.ts.map +1 -1
  100. package/dist/src/i18n/locales/en.js +0 -10
  101. package/dist/src/i18n/locales/en.js.map +1 -1
  102. package/dist/src/i18n/locales/zh.d.ts.map +1 -1
  103. package/dist/src/i18n/locales/zh.js +0 -10
  104. package/dist/src/i18n/locales/zh.js.map +1 -1
  105. package/dist/src/i18n/types.d.ts +0 -9
  106. package/dist/src/i18n/types.d.ts.map +1 -1
  107. package/dist/src/tui/install-ui/InstallConfirm.d.ts +0 -1
  108. package/dist/src/tui/install-ui/InstallConfirm.d.ts.map +1 -1
  109. package/dist/src/tui/install-ui/InstallConfirm.js +1 -1
  110. package/dist/src/tui/install-ui/InstallConfirm.js.map +1 -1
  111. package/dist/src/tui/install-ui/InstallExecution.d.ts +0 -1
  112. package/dist/src/tui/install-ui/InstallExecution.d.ts.map +1 -1
  113. package/dist/src/tui/install-ui/InstallExecution.js +0 -22
  114. package/dist/src/tui/install-ui/InstallExecution.js.map +1 -1
  115. package/dist/src/tui/install-ui/InstallFlow.d.ts +1 -1
  116. package/dist/src/tui/install-ui/InstallFlow.d.ts.map +1 -1
  117. package/dist/src/tui/install-ui/InstallFlow.js +5 -23
  118. package/dist/src/tui/install-ui/InstallFlow.js.map +1 -1
  119. package/dist/src/tui/install-ui/InstallHub.d.ts +0 -2
  120. package/dist/src/tui/install-ui/InstallHub.d.ts.map +1 -1
  121. package/dist/src/tui/install-ui/InstallHub.js +0 -6
  122. package/dist/src/tui/install-ui/InstallHub.js.map +1 -1
  123. package/dist/src/tui/install-ui/InstallResult.d.ts.map +1 -1
  124. package/dist/src/tui/install-ui/InstallResult.js +1 -1
  125. package/dist/src/tui/install-ui/InstallResult.js.map +1 -1
  126. package/dist/src/utils/update-notices.js +12 -0
  127. package/dist/src/utils/update-notices.js.map +1 -1
  128. package/package.json +1 -1
  129. package/workflows/swarm/wf-analyze.js +195 -34
  130. package/workflows/swarm/wf-brainstorm.js +225 -53
  131. package/workflows/swarm/wf-execute.js +199 -23
  132. package/workflows/swarm/wf-grill.js +181 -20
  133. package/workflows/swarm/wf-milestone-audit.js +178 -29
  134. package/workflows/swarm/wf-plan.js +288 -53
  135. package/workflows/swarm/wf-review.js +195 -80
  136. package/workflows/swarm/wf-verify.js +125 -28
@@ -0,0 +1,194 @@
1
+ export const meta = {
2
+ name: 'wf-swarm-explore',
3
+ description: 'Parallel ant exploration — N ants explore task space concurrently guided by pheromone hints',
4
+ whenToUse: 'Single ACO iteration: spawn N ants in parallel, each builds a path through the task space',
5
+ phases: [
6
+ { title: 'Explore', detail: 'N ants explore task space in parallel' },
7
+ { title: 'Validate', detail: 'Cross-validate ant paths for node validity and evidence' },
8
+ ],
9
+ }
10
+
11
+ const ANT_RESULT_SCHEMA = {
12
+ type: 'object',
13
+ properties: {
14
+ ant_id: { type: 'string' },
15
+ iteration: { type: 'number' },
16
+ path: { type: 'array', items: { type: 'string' } },
17
+ path_decisions: {
18
+ type: 'array',
19
+ items: {
20
+ type: 'object',
21
+ properties: {
22
+ from: { type: 'string' },
23
+ to: { type: 'string' },
24
+ rationale: { type: 'string' },
25
+ guided_by: { type: 'string', enum: ['pheromone', 'heuristic', 'evidence'] },
26
+ pheromone_weight: { type: 'number' },
27
+ deviation_from_hint: { type: 'boolean' },
28
+ },
29
+ required: ['from', 'to', 'rationale', 'guided_by'],
30
+ },
31
+ },
32
+ self_score: { type: 'number', minimum: 0, maximum: 1 },
33
+ self_confidence: { type: 'number', minimum: 0, maximum: 1 },
34
+ candidate_solution: {
35
+ type: 'object',
36
+ properties: {
37
+ type: { type: 'string', enum: ['string', 'object', 'file_ref'] },
38
+ summary: { type: 'string' },
39
+ content: { type: 'string' },
40
+ },
41
+ required: ['summary'],
42
+ },
43
+ evidence: {
44
+ type: 'array',
45
+ items: {
46
+ type: 'object',
47
+ properties: {
48
+ source: { type: 'string' },
49
+ finding: { type: 'string' },
50
+ strength: { type: 'string', enum: ['strong', 'moderate', 'weak'] },
51
+ },
52
+ required: ['source', 'finding'],
53
+ },
54
+ },
55
+ notes: { type: 'string' },
56
+ },
57
+ required: ['ant_id', 'iteration', 'path', 'path_decisions', 'self_score', 'self_confidence', 'candidate_solution', 'evidence'],
58
+ }
59
+
60
+ const VALIDATION_SCHEMA = {
61
+ type: 'object',
62
+ properties: {
63
+ ant_id: { type: 'string' },
64
+ valid: { type: 'boolean' },
65
+ issues: { type: 'array', items: { type: 'string' } },
66
+ evidence_verified: { type: 'number' },
67
+ evidence_total: { type: 'number' },
68
+ },
69
+ required: ['ant_id', 'valid', 'issues'],
70
+ }
71
+
72
+ const iteration = args?.iteration || 1
73
+ const assignments = args?.assignments || []
74
+ const objective = args?.objective || ''
75
+ const session = args?.session || ''
76
+ const config = args?.config || {}
77
+ const taskSpace = args?.task_space || []
78
+ const wisdom = args?.wisdom || ''
79
+
80
+ // Phase 1: Parallel ant exploration
81
+ phase('Explore')
82
+ log(`Iteration ${iteration}: launching ${assignments.length} ants in parallel...`)
83
+
84
+ const antResults = await parallel(
85
+ assignments.map((assignment, idx) => () =>
86
+ agent(
87
+ `You are ANT-${iteration}-${idx + 1} in an ant colony optimization swarm.
88
+
89
+ ## Objective
90
+ ${objective}
91
+
92
+ ## Your Assignment
93
+ Start node: ${assignment.start_node}
94
+ Edge preferences (pheromone-derived weights):
95
+ ${JSON.stringify(assignment.edge_preferences || {}, null, 2)}
96
+ Max path length: ${assignment.max_path_length || 5}
97
+
98
+ ## Task Space
99
+ Valid nodes: ${JSON.stringify(taskSpace.slice(0, 50))}${taskSpace.length > 50 ? '... (' + taskSpace.length + ' total)' : ''}
100
+
101
+ ## Session
102
+ Session path: ${session}
103
+ ${wisdom ? 'Prior iteration learnings:\n' + wisdom : ''}
104
+
105
+ ## Instructions
106
+ 1. Read the task space to understand what each node represents
107
+ 2. Start from your assigned start_node
108
+ 3. At each step, evaluate candidate next nodes:
109
+ - Use edge_preferences as pheromone guidance (higher = more explored/promising)
110
+ - BUT use your OWN judgment — deviate when evidence supports a different path
111
+ - Record whether each decision was guided by pheromone, heuristic, or evidence
112
+ 4. Build a path of 1..${assignment.max_path_length || 5} nodes (no revisiting)
113
+ 5. Gather EVIDENCE along your path (file:line references, code snippets, test results)
114
+ 6. Self-evaluate: score (0-1) how well your path achieves the objective
115
+ 7. Extract a candidate_solution from your exploration
116
+
117
+ Be thorough in evidence gathering. Read actual source files, run greps, verify claims.
118
+ ${config.evidence_requirements ? 'Evidence requirements: ' + config.evidence_requirements : ''}`,
119
+ {
120
+ label: `ant:${iteration}-${idx + 1}`,
121
+ phase: 'Explore',
122
+ schema: ANT_RESULT_SCHEMA,
123
+ agentType: 'cli-explore-agent',
124
+ }
125
+ )
126
+ )
127
+ )
128
+
129
+ const validAnts = antResults.filter(Boolean)
130
+ log(`${validAnts.length}/${assignments.length} ants completed exploration`)
131
+
132
+ // Phase 2: Cross-validate ant paths
133
+ phase('Validate')
134
+
135
+ if (validAnts.length > 0) {
136
+ log(`Validating ${validAnts.length} ant paths...`)
137
+
138
+ const validations = await parallel(
139
+ validAnts.map(ant => () =>
140
+ agent(
141
+ `Validate this ant's exploration results.
142
+
143
+ Ant: ${ant.ant_id}
144
+ Path: ${ant.path.join(' → ')}
145
+ Self-score: ${ant.self_score} (confidence: ${ant.self_confidence})
146
+ Evidence count: ${ant.evidence.length}
147
+ Solution summary: ${ant.candidate_solution.summary}
148
+
149
+ Task space nodes: ${JSON.stringify(taskSpace.slice(0, 30))}
150
+ Session: ${session}
151
+
152
+ Validate:
153
+ 1. Every node in path exists in the task space
154
+ 2. Path has no cycles (no repeated nodes)
155
+ 3. path_decisions length == path length - 1
156
+ 4. At least 1 evidence item exists
157
+ 5. If evidence references files — verify they exist (Read/Glob)
158
+ 6. self_score > 0.9 requires ≥3 evidence items
159
+
160
+ Report issues found. Set valid=true only if no blocking issues.`,
161
+ { label: `validate:${ant.ant_id}`, phase: 'Validate', schema: VALIDATION_SCHEMA }
162
+ )
163
+ )
164
+ )
165
+
166
+ const validResults = validations.filter(Boolean)
167
+ const validCount = validResults.filter(v => v.valid).length
168
+ log(`Validation: ${validCount}/${validResults.length} ants passed`)
169
+
170
+ return {
171
+ iteration: iteration,
172
+ ant_results: validAnts.map((ant, i) => ({
173
+ ...ant,
174
+ validation: validResults[i] || null,
175
+ })),
176
+ metadata: {
177
+ total_ants: assignments.length,
178
+ completed_ants: validAnts.length,
179
+ valid_ants: validCount,
180
+ avg_self_score: validAnts.length > 0
181
+ ? Math.round(validAnts.reduce((s, a) => s + a.self_score, 0) / validAnts.length * 100) / 100
182
+ : 0,
183
+ avg_path_length: validAnts.length > 0
184
+ ? Math.round(validAnts.reduce((s, a) => s + a.path.length, 0) / validAnts.length * 10) / 10
185
+ : 0,
186
+ },
187
+ }
188
+ }
189
+
190
+ return {
191
+ iteration: iteration,
192
+ ant_results: [],
193
+ metadata: { total_ants: assignments.length, completed_ants: 0, valid_ants: 0 },
194
+ }
@@ -0,0 +1,188 @@
1
+ export const meta = {
2
+ name: 'wf-swarm-score',
3
+ description: 'Adversarial 3-vote scoring — prosecutor/defender/judge per ant for verified scores',
4
+ whenToUse: 'Score ant results from one iteration using adversarial 3-vote pattern instead of single scorer',
5
+ phases: [
6
+ { title: 'Score', detail: '3-vote adversarial scoring per ant (prosecutor/defender/judge)' },
7
+ { title: 'Calibrate', detail: 'Cross-ant calibration and hallucination detection' },
8
+ ],
9
+ }
10
+
11
+ const VOTE_SCHEMA = {
12
+ type: 'object',
13
+ properties: {
14
+ ant_id: { type: 'string' },
15
+ role: { type: 'string', enum: ['prosecutor', 'defender', 'judge'] },
16
+ score: { type: 'number', minimum: 0, maximum: 1 },
17
+ reasoning: { type: 'string' },
18
+ evidence_verified: { type: 'number' },
19
+ evidence_total: { type: 'number' },
20
+ confidence: { type: 'number', minimum: 0, maximum: 100 },
21
+ },
22
+ required: ['ant_id', 'role', 'score', 'reasoning', 'confidence'],
23
+ }
24
+
25
+ const CALIBRATION_SCHEMA = {
26
+ type: 'object',
27
+ properties: {
28
+ scores: {
29
+ type: 'object',
30
+ additionalProperties: {
31
+ type: 'object',
32
+ properties: {
33
+ verified_score: { type: 'number' },
34
+ rationale: { type: 'string' },
35
+ votes: { type: 'object' },
36
+ hallucination_flag: { type: 'boolean' },
37
+ self_vs_verified_delta: { type: 'number' },
38
+ },
39
+ required: ['verified_score', 'rationale'],
40
+ },
41
+ },
42
+ calibration: {
43
+ type: 'object',
44
+ properties: {
45
+ mean: { type: 'number' },
46
+ std: { type: 'number' },
47
+ min: { type: 'number' },
48
+ max: { type: 'number' },
49
+ hallucination_rate: { type: 'number' },
50
+ },
51
+ required: ['mean', 'min', 'max'],
52
+ },
53
+ ranking: { type: 'array', items: { type: 'string' } },
54
+ warnings: { type: 'array', items: { type: 'string' } },
55
+ },
56
+ required: ['scores', 'calibration', 'ranking'],
57
+ }
58
+
59
+ const iteration = args?.iteration || 1
60
+ const antResults = args?.ant_results || []
61
+ const objective = args?.objective || ''
62
+ const rubric = args?.rubric || ''
63
+
64
+ if (antResults.length === 0) {
65
+ log('No ant results to score')
66
+ return { scores: {}, calibration: { mean: 0, min: 0, max: 0 }, ranking: [] }
67
+ }
68
+
69
+ // Phase 1: 3-vote adversarial scoring per ant
70
+ phase('Score')
71
+ log(`Adversarial 3-vote scoring of ${antResults.length} ants...`)
72
+
73
+ const allVotes = await pipeline(
74
+ antResults,
75
+ (ant) => parallel([
76
+ () => agent(
77
+ `PROSECUTOR: Score this ant's result HARSHLY. Find flaws.
78
+
79
+ Objective: ${objective}
80
+ ${rubric ? 'Rubric: ' + rubric : 'Default rubric: path_relevance(0.35) + evidence_strength(0.30) + solution_quality(0.25) + path_coherence(0.10)'}
81
+
82
+ Ant: ${ant.ant_id}
83
+ Path: ${ant.path.join(' → ')} (${ant.path.length} nodes)
84
+ Decisions: ${ant.path_decisions.map(d => d.from + '→' + d.to + ' [' + d.guided_by + '] ' + d.rationale).join('; ')}
85
+ Self-score: ${ant.self_score} (DO NOT anchor on this — score blind first)
86
+ Evidence: ${ant.evidence.map(e => e.source + ': ' + e.finding + ' [' + (e.strength || 'unknown') + ']').join('\n')}
87
+ Solution: ${ant.candidate_solution.summary}
88
+
89
+ Your job: MINIMIZE the score. Find every weakness.
90
+ - Does the path actually address the objective?
91
+ - Is the evidence real and strong, or vague/unverifiable?
92
+ - Is the solution actionable or hand-wavy?
93
+ - Are there logical gaps in the path decisions?
94
+
95
+ Score 0.0-1.0. Verify evidence count if possible. Be harsh but fair.`,
96
+ { label: `prosecutor:${ant.ant_id}`, phase: 'Score', schema: VOTE_SCHEMA }
97
+ ),
98
+ () => agent(
99
+ `DEFENDER: Score this ant's result GENEROUSLY. Find strengths.
100
+
101
+ Objective: ${objective}
102
+ ${rubric ? 'Rubric: ' + rubric : 'Default rubric: path_relevance(0.35) + evidence_strength(0.30) + solution_quality(0.25) + path_coherence(0.10)'}
103
+
104
+ Ant: ${ant.ant_id}
105
+ Path: ${ant.path.join(' → ')} (${ant.path.length} nodes)
106
+ Decisions: ${ant.path_decisions.map(d => d.from + '→' + d.to + ' [' + d.guided_by + '] ' + d.rationale).join('; ')}
107
+ Self-score: ${ant.self_score} (DO NOT anchor on this — score blind first)
108
+ Evidence: ${ant.evidence.map(e => e.source + ': ' + e.finding + ' [' + (e.strength || 'unknown') + ']').join('\n')}
109
+ Solution: ${ant.candidate_solution.summary}
110
+
111
+ Your job: MAXIMIZE the score. Find every strength.
112
+ - Does the path show creative or insightful exploration?
113
+ - Is the evidence concrete even if limited?
114
+ - Does the solution provide actionable value?
115
+ - Are path deviations from pheromone justified?
116
+
117
+ Score 0.0-1.0. Be generous but honest. Don't inflate without basis.`,
118
+ { label: `defender:${ant.ant_id}`, phase: 'Score', schema: VOTE_SCHEMA }
119
+ ),
120
+ () => agent(
121
+ `JUDGE: Score this ant's result OBJECTIVELY. No bias.
122
+
123
+ Objective: ${objective}
124
+ ${rubric ? 'Rubric: ' + rubric : 'Default rubric: path_relevance(0.35) + evidence_strength(0.30) + solution_quality(0.25) + path_coherence(0.10)'}
125
+
126
+ Ant: ${ant.ant_id}
127
+ Path: ${ant.path.join(' → ')} (${ant.path.length} nodes)
128
+ Decisions: ${ant.path_decisions.map(d => d.from + '→' + d.to + ' [' + d.guided_by + '] ' + d.rationale).join('; ')}
129
+ Self-score: ${ant.self_score} (DO NOT anchor on this — score blind first)
130
+ Evidence: ${ant.evidence.map(e => e.source + ': ' + e.finding + ' [' + (e.strength || 'unknown') + ']').join('\n')}
131
+ Solution: ${ant.candidate_solution.summary}
132
+
133
+ Your job: Score PURELY on evidence. No default bias.
134
+ - Apply rubric dimensions systematically
135
+ - Weight each dimension, compute total
136
+ - Verify evidence references if possible (Read files cited)
137
+ - Compare path coherence objectively
138
+
139
+ Score 0.0-1.0. Confidence reflects evidence coverage.`,
140
+ { label: `judge:${ant.ant_id}`, phase: 'Score', schema: VOTE_SCHEMA }
141
+ ),
142
+ ])
143
+ )
144
+
145
+ log(`${allVotes.filter(Boolean).length}/${antResults.length} ants scored by 3-vote panel`)
146
+
147
+ // Phase 2: Calibrate across all ants
148
+ phase('Calibrate')
149
+ log('Cross-ant calibration and hallucination detection...')
150
+
151
+ const voteDigest = antResults.map((ant, i) => {
152
+ const votes = allVotes[i]
153
+ if (!votes) return `${ant.ant_id}: no votes`
154
+ const validVotes = votes.filter(Boolean)
155
+ const scores = validVotes.map(v => v.score)
156
+ const avgScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0
157
+ return `${ant.ant_id}: self=${ant.self_score} | prosecutor=${validVotes.find(v => v.role === 'prosecutor')?.score || '?'} defender=${validVotes.find(v => v.role === 'defender')?.score || '?'} judge=${validVotes.find(v => v.role === 'judge')?.score || '?'} | avg=${Math.round(avgScore * 100) / 100} | delta=${Math.round(Math.abs(ant.self_score - avgScore) * 100) / 100}`
158
+ }).join('\n')
159
+
160
+ const calibration = await agent(
161
+ `Calibrate adversarial scores across ${antResults.length} ants.
162
+
163
+ Per-ant votes:
164
+ ${voteDigest}
165
+
166
+ Tasks:
167
+ 1. For each ant: compute verified_score as weighted average (prosecutor 0.25, defender 0.25, judge 0.50)
168
+ 2. Compare self_score vs verified_score — flag hallucination if delta > 0.3
169
+ 3. If all scores within ±0.05 (compressed range) — force differentiation by re-ranking
170
+ 4. Compute calibration stats (mean, std, min, max, hallucination_rate)
171
+ 5. Produce ranking (best to worst by verified_score)
172
+ 6. Warnings: flag if >50% ants are hallucinating, if range is too compressed, etc.
173
+
174
+ Return the complete calibrated scoring result.`,
175
+ { label: 'calibrate', phase: 'Calibrate', schema: CALIBRATION_SCHEMA }
176
+ )
177
+
178
+ return {
179
+ iteration: iteration,
180
+ votes: allVotes,
181
+ calibration: calibration,
182
+ metadata: {
183
+ ants_scored: antResults.length,
184
+ hallucination_rate: calibration ? calibration.calibration.hallucination_rate : null,
185
+ best_ant: calibration ? calibration.ranking[0] : null,
186
+ score_range: calibration ? [calibration.calibration.min, calibration.calibration.max] : null,
187
+ },
188
+ }
@@ -0,0 +1,248 @@
1
+ export const meta = {
2
+ name: 'wf-swarm-synthesize',
3
+ description: 'Adversarial 3-perspective synthesis of swarm results with arbitrator',
4
+ whenToUse: 'After swarm converges: synthesize best solution via 3 perspectives + arbitrated final report',
5
+ phases: [
6
+ { title: 'Analyze', detail: '3 parallel analysts: why-it-won, stability, caveats' },
7
+ { title: 'Arbitrate', detail: 'Arbitrator synthesizes perspectives into best-solution report' },
8
+ ],
9
+ }
10
+
11
+ const PERSPECTIVE_SCHEMA = {
12
+ type: 'object',
13
+ properties: {
14
+ perspective: { type: 'string' },
15
+ assessment: { type: 'string' },
16
+ key_findings: {
17
+ type: 'array',
18
+ items: {
19
+ type: 'object',
20
+ properties: {
21
+ finding: { type: 'string' },
22
+ evidence: { type: 'string' },
23
+ significance: { type: 'string', enum: ['critical', 'important', 'minor'] },
24
+ },
25
+ required: ['finding', 'evidence', 'significance'],
26
+ },
27
+ },
28
+ confidence: { type: 'number', minimum: 0, maximum: 100 },
29
+ verdict: { type: 'string' },
30
+ },
31
+ required: ['perspective', 'assessment', 'key_findings', 'confidence', 'verdict'],
32
+ }
33
+
34
+ const SYNTHESIS_SCHEMA = {
35
+ type: 'object',
36
+ properties: {
37
+ title: { type: 'string' },
38
+ best_solution: {
39
+ type: 'object',
40
+ properties: {
41
+ path: { type: 'array', items: { type: 'string' } },
42
+ score: { type: 'number' },
43
+ iteration: { type: 'number' },
44
+ ant_id: { type: 'string' },
45
+ summary: { type: 'string' },
46
+ evidence_chain: { type: 'array', items: { type: 'object', properties: { source: { type: 'string' }, finding: { type: 'string' } }, required: ['source', 'finding'] } },
47
+ },
48
+ required: ['summary'],
49
+ },
50
+ why_it_won: { type: 'string' },
51
+ pivotal_decisions: {
52
+ type: 'array',
53
+ items: {
54
+ type: 'object',
55
+ properties: {
56
+ decision: { type: 'string' },
57
+ pheromone_guided: { type: 'boolean' },
58
+ impact: { type: 'string' },
59
+ },
60
+ required: ['decision', 'impact'],
61
+ },
62
+ },
63
+ runner_ups: {
64
+ type: 'array',
65
+ items: {
66
+ type: 'object',
67
+ properties: {
68
+ ant_id: { type: 'string' },
69
+ path: { type: 'array', items: { type: 'string' } },
70
+ score: { type: 'number' },
71
+ diff_from_best: { type: 'string' },
72
+ },
73
+ required: ['ant_id', 'score', 'diff_from_best'],
74
+ },
75
+ },
76
+ convergence_story: { type: 'string' },
77
+ caveats: { type: 'array', items: { type: 'string' } },
78
+ adversarial_assessment: {
79
+ type: 'object',
80
+ properties: {
81
+ stability_verdict: { type: 'string' },
82
+ caveat_severity: { type: 'string', enum: ['none', 'minor', 'significant', 'critical'] },
83
+ confidence_in_result: { type: 'number' },
84
+ decisive_perspective: { type: 'string' },
85
+ },
86
+ required: ['stability_verdict', 'caveat_severity', 'confidence_in_result'],
87
+ },
88
+ executive_summary: { type: 'string' },
89
+ },
90
+ required: ['title', 'best_solution', 'why_it_won', 'runner_ups', 'convergence_story', 'caveats', 'adversarial_assessment', 'executive_summary'],
91
+ }
92
+
93
+ const best = args?.best || {}
94
+ const topK = args?.top_k || []
95
+ const convergenceStory = args?.convergence_story || ''
96
+ const objective = args?.objective || ''
97
+ const totalIterations = args?.total_iterations || 0
98
+ const totalAnts = args?.total_ants || 0
99
+
100
+ const bestDigest = `Best solution:
101
+ Ant: ${best.ant_id || 'unknown'}
102
+ Path: ${(best.path || []).join(' → ')}
103
+ Score: ${best.score || 'unknown'}
104
+ Iteration: ${best.iteration || 'unknown'}
105
+ Summary: ${best.summary || 'none'}
106
+ Evidence: ${(best.evidence || []).map(e => e.source + ': ' + e.finding).join('; ') || 'none'}`
107
+
108
+ const topKDigest = topK.map((t, i) =>
109
+ `#${i + 1}: ${t.ant_id} score=${t.score} path=${(t.path || []).join('→')}`
110
+ ).join('\n')
111
+
112
+ // Phase 1: 3 parallel perspective analysts
113
+ phase('Analyze')
114
+ log('Launching 3-perspective adversarial analysis...')
115
+
116
+ const perspectives = await parallel([
117
+ () => agent(
118
+ `You are the WHY-IT-WON analyst. Explain why the best solution won.
119
+
120
+ Objective: ${objective}
121
+ ${bestDigest}
122
+
123
+ Runner-ups:
124
+ ${topKDigest || 'None available'}
125
+
126
+ Convergence: ${convergenceStory}
127
+ Total iterations: ${totalIterations}, Total ants: ${totalAnts}
128
+
129
+ Focus:
130
+ 1. Which path decisions were PIVOTAL — where did best diverge from runner-ups?
131
+ 2. Which decisions followed pheromone hints vs deviated? Were deviations the key?
132
+ 3. Is the evidence chain compelling or circumstantial?
133
+ 4. Compare best vs #2: what SPECIFIC factor gave best the edge?
134
+
135
+ Verdict: one sentence on the quality of the winning strategy.`,
136
+ { label: 'analyst:why-won', phase: 'Analyze', schema: PERSPECTIVE_SCHEMA }
137
+ ),
138
+ () => agent(
139
+ `You are the STABILITY analyst. Assess whether this result is robust or lucky.
140
+
141
+ Objective: ${objective}
142
+ ${bestDigest}
143
+
144
+ Runner-ups:
145
+ ${topKDigest || 'None available'}
146
+
147
+ Convergence: ${convergenceStory}
148
+ Total iterations: ${totalIterations}, Total ants: ${totalAnts}
149
+
150
+ Focus:
151
+ 1. Did MULTIPLE ants find similar solutions? (convergence = robust)
152
+ 2. Is the best a lone outlier? (divergence from pack = possibly lucky)
153
+ 3. Score gap between #1 and #2: large gap = clear winner, small gap = could flip
154
+ 4. If the same swarm ran again, would it find the same answer?
155
+ 5. Was convergence triggered by genuine consensus or just timeout?
156
+
157
+ Verdict: "robust" / "fragile" / "uncertain" — with evidence.`,
158
+ { label: 'analyst:stability', phase: 'Analyze', schema: PERSPECTIVE_SCHEMA }
159
+ ),
160
+ () => agent(
161
+ `You are the CAVEATS analyst. Find every limitation and risk in this result.
162
+
163
+ Objective: ${objective}
164
+ ${bestDigest}
165
+
166
+ Runner-ups:
167
+ ${topKDigest || 'None available'}
168
+
169
+ Convergence: ${convergenceStory}
170
+ Total iterations: ${totalIterations}, Total ants: ${totalAnts}
171
+
172
+ Focus:
173
+ 1. Search space coverage: was the task space well-explored or did ants cluster?
174
+ 2. Evidence quality: single-source claims vs multi-source verification?
175
+ 3. Hallucination risk: how many ants were flagged for score inflation?
176
+ 4. Solution actionability: can the result be directly applied, or needs more work?
177
+ 5. What the swarm DIDN'T explore: are there obvious nodes/paths it missed?
178
+ 6. Scaling: would a larger swarm / more iterations have found something better?
179
+
180
+ Be THOROUGH — every result has caveats. Honest caveats are more valuable than false confidence.
181
+ Verdict: overall risk level of relying on this result.`,
182
+ { label: 'analyst:caveats', phase: 'Analyze', schema: PERSPECTIVE_SCHEMA }
183
+ ),
184
+ ])
185
+
186
+ const validPerspectives = perspectives.filter(Boolean)
187
+ const perspectiveDigest = validPerspectives.map(p =>
188
+ `### ${p.perspective} (confidence: ${p.confidence}%)\n${p.assessment}\nKey findings:\n${p.key_findings.map(f => '- [' + f.significance + '] ' + f.finding).join('\n')}\nVerdict: ${p.verdict}`
189
+ ).join('\n\n---\n\n')
190
+
191
+ log(`${validPerspectives.length} perspective analyses completed`)
192
+
193
+ // Phase 2: Arbitrator synthesizes
194
+ phase('Arbitrate')
195
+ log('Arbitrator synthesizing final report...')
196
+
197
+ const synthesis = await agent(
198
+ `You are the ARBITRATOR. Synthesize 3 analyst perspectives into a definitive swarm result report.
199
+
200
+ === OBJECTIVE ===
201
+ ${objective}
202
+
203
+ === BEST SOLUTION ===
204
+ ${bestDigest}
205
+
206
+ === RUNNER-UPS ===
207
+ ${topKDigest || 'None'}
208
+
209
+ === 3 ANALYST PERSPECTIVES ===
210
+ ${perspectiveDigest}
211
+
212
+ === CONVERGENCE ===
213
+ ${convergenceStory}
214
+ Iterations: ${totalIterations}, Ants: ${totalAnts}
215
+
216
+ SYNTHESIZE:
217
+ 1. Build the best_solution record with full evidence chain
218
+ 2. Write why_it_won from the first analyst's pivotal decision analysis
219
+ 3. Extract pivotal_decisions with pheromone guidance flags
220
+ 4. Format runner_ups with diff_from_best
221
+ 5. Write convergence_story narrative
222
+ 6. Compile ALL caveats from the caveats analyst — don't soften them
223
+ 7. adversarial_assessment:
224
+ - stability_verdict from stability analyst
225
+ - caveat_severity: none/minor/significant/critical based on caveats count and severity
226
+ - confidence_in_result: weighted from all 3 perspectives
227
+ - decisive_perspective: which analyst's findings had the most impact
228
+ 8. Write executive_summary (3-4 sentences): what was found, how confident, what to watch out for
229
+ 9. Title: concise result title
230
+
231
+ Max 150 lines in the generated content. Be sharp, not verbose.`,
232
+ { label: 'arbitrate', phase: 'Arbitrate', schema: SYNTHESIS_SCHEMA }
233
+ )
234
+
235
+ return {
236
+ perspectives: validPerspectives,
237
+ synthesis: synthesis,
238
+ metadata: {
239
+ objective: objective,
240
+ best_score: best.score,
241
+ best_ant: best.ant_id,
242
+ total_iterations: totalIterations,
243
+ total_ants: totalAnts,
244
+ stability_verdict: synthesis ? synthesis.adversarial_assessment.stability_verdict : null,
245
+ caveat_severity: synthesis ? synthesis.adversarial_assessment.caveat_severity : null,
246
+ confidence: synthesis ? synthesis.adversarial_assessment.confidence_in_result : null,
247
+ },
248
+ }
@@ -52,6 +52,7 @@ HARD RULES:
52
52
  6. **Self-invocation chain** — 持续直到全部 `completion_confirmed` 或 paused
53
53
  7. **status.json 每步骤后由 CLI 原子写盘** — resume-safe
54
54
  8. **STATUS 枚举受限** — 仅 `DONE | DONE_WITH_CONCERNS | NEEDS_RETRY | BLOCKED`;`NEEDS_CONTEXT` 已废除
55
+ 9. **CLI 输出禁止截断** — `maestro ralph next` 的 stdout 包含完整 skill prompt,必须全量捕获。**严禁** `| head`、`| tail`、`2>&1 | head -N` 等任何截断管道。Bash timeout 可加长但不可截断输出
55
56
  </invariants>
56
57
 
57
58
  <state_machine>
@@ -155,7 +156,7 @@ Write enriched args back to status.json.
155
156
 
156
157
  ### A_EXEC_STEP
157
158
 
158
- 1. **Load** — `Bash("maestro ralph next")`
159
+ 1. **Load** — `Bash("maestro ralph next --session <session_id>")` — **必须全量捕获 stdout,严禁 `| head`/`| tail` 等截断管道**(stdout 含完整 skill prompt,截断会导致执行内容不完整)
159
160
  - 退出码 0 → 按 stdout 内联执行
160
161
  - 退出码 2 → 交给 S_LOCATE
161
162
  - 退出码 3 → active_step_index 已被占用