npm - maestro-flow - Versions diffs - 0.4.20 → 0.4.21 - Mend

maestro-flow 0.4.20 → 0.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-explore.js ADDED Viewed

@@ -0,0 +1,194 @@
+export const meta = {
+  name: 'wf-swarm-explore',
+  description: 'Parallel ant exploration — N ants explore task space concurrently guided by pheromone hints',
+  whenToUse: 'Single ACO iteration: spawn N ants in parallel, each builds a path through the task space',
+  phases: [
+    { title: 'Explore', detail: 'N ants explore task space in parallel' },
+    { title: 'Validate', detail: 'Cross-validate ant paths for node validity and evidence' },
+  ],
+}
+const ANT_RESULT_SCHEMA = {
+  type: 'object',
+  properties: {
+    ant_id: { type: 'string' },
+    iteration: { type: 'number' },
+    path: { type: 'array', items: { type: 'string' } },
+    path_decisions: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          from: { type: 'string' },
+          to: { type: 'string' },
+          rationale: { type: 'string' },
+          guided_by: { type: 'string', enum: ['pheromone', 'heuristic', 'evidence'] },
+          pheromone_weight: { type: 'number' },
+          deviation_from_hint: { type: 'boolean' },
+        },
+        required: ['from', 'to', 'rationale', 'guided_by'],
+      },
+    },
+    self_score: { type: 'number', minimum: 0, maximum: 1 },
+    self_confidence: { type: 'number', minimum: 0, maximum: 1 },
+    candidate_solution: {
+      type: 'object',
+      properties: {
+        type: { type: 'string', enum: ['string', 'object', 'file_ref'] },
+        summary: { type: 'string' },
+        content: { type: 'string' },
+      },
+      required: ['summary'],
+    },
+    evidence: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          source: { type: 'string' },
+          finding: { type: 'string' },
+          strength: { type: 'string', enum: ['strong', 'moderate', 'weak'] },
+        },
+        required: ['source', 'finding'],
+      },
+    },
+    notes: { type: 'string' },
+  },
+  required: ['ant_id', 'iteration', 'path', 'path_decisions', 'self_score', 'self_confidence', 'candidate_solution', 'evidence'],
+}
+const VALIDATION_SCHEMA = {
+  type: 'object',
+  properties: {
+    ant_id: { type: 'string' },
+    valid: { type: 'boolean' },
+    issues: { type: 'array', items: { type: 'string' } },
+    evidence_verified: { type: 'number' },
+    evidence_total: { type: 'number' },
+  },
+  required: ['ant_id', 'valid', 'issues'],
+}
+const iteration = args?.iteration || 1
+const assignments = args?.assignments || []
+const objective = args?.objective || ''
+const session = args?.session || ''
+const config = args?.config || {}
+const taskSpace = args?.task_space || []
+const wisdom = args?.wisdom || ''
+// Phase 1: Parallel ant exploration
+phase('Explore')
+log(`Iteration ${iteration}: launching ${assignments.length} ants in parallel...`)
+const antResults = await parallel(
+  assignments.map((assignment, idx) => () =>
+    agent(
+      `You are ANT-${iteration}-${idx + 1} in an ant colony optimization swarm.
+## Objective
+${objective}
+## Your Assignment
+Start node: ${assignment.start_node}
+Edge preferences (pheromone-derived weights):
+${JSON.stringify(assignment.edge_preferences || {}, null, 2)}
+Max path length: ${assignment.max_path_length || 5}
+## Task Space
+Valid nodes: ${JSON.stringify(taskSpace.slice(0, 50))}${taskSpace.length > 50 ? '... (' + taskSpace.length + ' total)' : ''}
+## Session
+Session path: ${session}
+${wisdom ? 'Prior iteration learnings:\n' + wisdom : ''}
+## Instructions
+1. Read the task space to understand what each node represents
+2. Start from your assigned start_node
+3. At each step, evaluate candidate next nodes:
+   - Use edge_preferences as pheromone guidance (higher = more explored/promising)
+   - BUT use your OWN judgment — deviate when evidence supports a different path
+   - Record whether each decision was guided by pheromone, heuristic, or evidence
+4. Build a path of 1..${assignment.max_path_length || 5} nodes (no revisiting)
+5. Gather EVIDENCE along your path (file:line references, code snippets, test results)
+6. Self-evaluate: score (0-1) how well your path achieves the objective
+7. Extract a candidate_solution from your exploration
+Be thorough in evidence gathering. Read actual source files, run greps, verify claims.
+${config.evidence_requirements ? 'Evidence requirements: ' + config.evidence_requirements : ''}`,
+      {
+        label: `ant:${iteration}-${idx + 1}`,
+        phase: 'Explore',
+        schema: ANT_RESULT_SCHEMA,
+        agentType: 'cli-explore-agent',
+      }
+    )
+  )
+)
+const validAnts = antResults.filter(Boolean)
+log(`${validAnts.length}/${assignments.length} ants completed exploration`)
+// Phase 2: Cross-validate ant paths
+phase('Validate')
+if (validAnts.length > 0) {
+  log(`Validating ${validAnts.length} ant paths...`)
+  const validations = await parallel(
+    validAnts.map(ant => () =>
+      agent(
+        `Validate this ant's exploration results.
+Ant: ${ant.ant_id}
+Path: ${ant.path.join(' → ')}
+Self-score: ${ant.self_score} (confidence: ${ant.self_confidence})
+Evidence count: ${ant.evidence.length}
+Solution summary: ${ant.candidate_solution.summary}
+Task space nodes: ${JSON.stringify(taskSpace.slice(0, 30))}
+Session: ${session}
+Validate:
+1. Every node in path exists in the task space
+2. Path has no cycles (no repeated nodes)
+3. path_decisions length == path length - 1
+4. At least 1 evidence item exists
+5. If evidence references files — verify they exist (Read/Glob)
+6. self_score > 0.9 requires ≥3 evidence items
+Report issues found. Set valid=true only if no blocking issues.`,
+        { label: `validate:${ant.ant_id}`, phase: 'Validate', schema: VALIDATION_SCHEMA }
+      )
+    )
+  )
+  const validResults = validations.filter(Boolean)
+  const validCount = validResults.filter(v => v.valid).length
+  log(`Validation: ${validCount}/${validResults.length} ants passed`)
+  return {
+    iteration: iteration,
+    ant_results: validAnts.map((ant, i) => ({
+      ...ant,
+      validation: validResults[i] || null,
+    })),
+    metadata: {
+      total_ants: assignments.length,
+      completed_ants: validAnts.length,
+      valid_ants: validCount,
+      avg_self_score: validAnts.length > 0
+        ? Math.round(validAnts.reduce((s, a) => s + a.self_score, 0) / validAnts.length * 100) / 100
+        : 0,
+      avg_path_length: validAnts.length > 0
+        ? Math.round(validAnts.reduce((s, a) => s + a.path.length, 0) / validAnts.length * 10) / 10
+        : 0,
+    },
+  }
+}
+return {
+  iteration: iteration,
+  ant_results: [],
+  metadata: { total_ants: assignments.length, completed_ants: 0, valid_ants: 0 },
+}

package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-score.js ADDED Viewed

@@ -0,0 +1,188 @@
+export const meta = {
+  name: 'wf-swarm-score',
+  description: 'Adversarial 3-vote scoring — prosecutor/defender/judge per ant for verified scores',
+  whenToUse: 'Score ant results from one iteration using adversarial 3-vote pattern instead of single scorer',
+  phases: [
+    { title: 'Score', detail: '3-vote adversarial scoring per ant (prosecutor/defender/judge)' },
+    { title: 'Calibrate', detail: 'Cross-ant calibration and hallucination detection' },
+  ],
+}
+const VOTE_SCHEMA = {
+  type: 'object',
+  properties: {
+    ant_id: { type: 'string' },
+    role: { type: 'string', enum: ['prosecutor', 'defender', 'judge'] },
+    score: { type: 'number', minimum: 0, maximum: 1 },
+    reasoning: { type: 'string' },
+    evidence_verified: { type: 'number' },
+    evidence_total: { type: 'number' },
+    confidence: { type: 'number', minimum: 0, maximum: 100 },
+  },
+  required: ['ant_id', 'role', 'score', 'reasoning', 'confidence'],
+}
+const CALIBRATION_SCHEMA = {
+  type: 'object',
+  properties: {
+    scores: {
+      type: 'object',
+      additionalProperties: {
+        type: 'object',
+        properties: {
+          verified_score: { type: 'number' },
+          rationale: { type: 'string' },
+          votes: { type: 'object' },
+          hallucination_flag: { type: 'boolean' },
+          self_vs_verified_delta: { type: 'number' },
+        },
+        required: ['verified_score', 'rationale'],
+      },
+    },
+    calibration: {
+      type: 'object',
+      properties: {
+        mean: { type: 'number' },
+        std: { type: 'number' },
+        min: { type: 'number' },
+        max: { type: 'number' },
+        hallucination_rate: { type: 'number' },
+      },
+      required: ['mean', 'min', 'max'],
+    },
+    ranking: { type: 'array', items: { type: 'string' } },
+    warnings: { type: 'array', items: { type: 'string' } },
+  },
+  required: ['scores', 'calibration', 'ranking'],
+}
+const iteration = args?.iteration || 1
+const antResults = args?.ant_results || []
+const objective = args?.objective || ''
+const rubric = args?.rubric || ''
+if (antResults.length === 0) {
+  log('No ant results to score')
+  return { scores: {}, calibration: { mean: 0, min: 0, max: 0 }, ranking: [] }
+}
+// Phase 1: 3-vote adversarial scoring per ant
+phase('Score')
+log(`Adversarial 3-vote scoring of ${antResults.length} ants...`)
+const allVotes = await pipeline(
+  antResults,
+  (ant) => parallel([
+    () => agent(
+      `PROSECUTOR: Score this ant's result HARSHLY. Find flaws.
+Objective: ${objective}
+${rubric ? 'Rubric: ' + rubric : 'Default rubric: path_relevance(0.35) + evidence_strength(0.30) + solution_quality(0.25) + path_coherence(0.10)'}
+Ant: ${ant.ant_id}
+Path: ${ant.path.join(' → ')} (${ant.path.length} nodes)
+Decisions: ${ant.path_decisions.map(d => d.from + '→' + d.to + ' [' + d.guided_by + '] ' + d.rationale).join('; ')}
+Self-score: ${ant.self_score} (DO NOT anchor on this — score blind first)
+Evidence: ${ant.evidence.map(e => e.source + ': ' + e.finding + ' [' + (e.strength || 'unknown') + ']').join('\n')}
+Solution: ${ant.candidate_solution.summary}
+Your job: MINIMIZE the score. Find every weakness.
+- Does the path actually address the objective?
+- Is the evidence real and strong, or vague/unverifiable?
+- Is the solution actionable or hand-wavy?
+- Are there logical gaps in the path decisions?
+Score 0.0-1.0. Verify evidence count if possible. Be harsh but fair.`,
+      { label: `prosecutor:${ant.ant_id}`, phase: 'Score', schema: VOTE_SCHEMA }
+    ),
+    () => agent(
+      `DEFENDER: Score this ant's result GENEROUSLY. Find strengths.
+Objective: ${objective}
+${rubric ? 'Rubric: ' + rubric : 'Default rubric: path_relevance(0.35) + evidence_strength(0.30) + solution_quality(0.25) + path_coherence(0.10)'}
+Ant: ${ant.ant_id}
+Path: ${ant.path.join(' → ')} (${ant.path.length} nodes)
+Decisions: ${ant.path_decisions.map(d => d.from + '→' + d.to + ' [' + d.guided_by + '] ' + d.rationale).join('; ')}
+Self-score: ${ant.self_score} (DO NOT anchor on this — score blind first)
+Evidence: ${ant.evidence.map(e => e.source + ': ' + e.finding + ' [' + (e.strength || 'unknown') + ']').join('\n')}
+Solution: ${ant.candidate_solution.summary}
+Your job: MAXIMIZE the score. Find every strength.
+- Does the path show creative or insightful exploration?
+- Is the evidence concrete even if limited?
+- Does the solution provide actionable value?
+- Are path deviations from pheromone justified?
+Score 0.0-1.0. Be generous but honest. Don't inflate without basis.`,
+      { label: `defender:${ant.ant_id}`, phase: 'Score', schema: VOTE_SCHEMA }
+    ),
+    () => agent(
+      `JUDGE: Score this ant's result OBJECTIVELY. No bias.
+Objective: ${objective}
+${rubric ? 'Rubric: ' + rubric : 'Default rubric: path_relevance(0.35) + evidence_strength(0.30) + solution_quality(0.25) + path_coherence(0.10)'}
+Ant: ${ant.ant_id}
+Path: ${ant.path.join(' → ')} (${ant.path.length} nodes)
+Decisions: ${ant.path_decisions.map(d => d.from + '→' + d.to + ' [' + d.guided_by + '] ' + d.rationale).join('; ')}
+Self-score: ${ant.self_score} (DO NOT anchor on this — score blind first)
+Evidence: ${ant.evidence.map(e => e.source + ': ' + e.finding + ' [' + (e.strength || 'unknown') + ']').join('\n')}
+Solution: ${ant.candidate_solution.summary}
+Your job: Score PURELY on evidence. No default bias.
+- Apply rubric dimensions systematically
+- Weight each dimension, compute total
+- Verify evidence references if possible (Read files cited)
+- Compare path coherence objectively
+Score 0.0-1.0. Confidence reflects evidence coverage.`,
+      { label: `judge:${ant.ant_id}`, phase: 'Score', schema: VOTE_SCHEMA }
+    ),
+  ])
+)
+log(`${allVotes.filter(Boolean).length}/${antResults.length} ants scored by 3-vote panel`)
+// Phase 2: Calibrate across all ants
+phase('Calibrate')
+log('Cross-ant calibration and hallucination detection...')
+const voteDigest = antResults.map((ant, i) => {
+  const votes = allVotes[i]
+  if (!votes) return `${ant.ant_id}: no votes`
+  const validVotes = votes.filter(Boolean)
+  const scores = validVotes.map(v => v.score)
+  const avgScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0
+  return `${ant.ant_id}: self=${ant.self_score} | prosecutor=${validVotes.find(v => v.role === 'prosecutor')?.score || '?'} defender=${validVotes.find(v => v.role === 'defender')?.score || '?'} judge=${validVotes.find(v => v.role === 'judge')?.score || '?'} | avg=${Math.round(avgScore * 100) / 100} | delta=${Math.round(Math.abs(ant.self_score - avgScore) * 100) / 100}`
+}).join('\n')
+const calibration = await agent(
+  `Calibrate adversarial scores across ${antResults.length} ants.
+Per-ant votes:
+${voteDigest}
+Tasks:
+1. For each ant: compute verified_score as weighted average (prosecutor 0.25, defender 0.25, judge 0.50)
+2. Compare self_score vs verified_score — flag hallucination if delta > 0.3
+3. If all scores within ±0.05 (compressed range) — force differentiation by re-ranking
+4. Compute calibration stats (mean, std, min, max, hallucination_rate)
+5. Produce ranking (best to worst by verified_score)
+6. Warnings: flag if >50% ants are hallucinating, if range is too compressed, etc.
+Return the complete calibrated scoring result.`,
+  { label: 'calibrate', phase: 'Calibrate', schema: CALIBRATION_SCHEMA }
+)
+return {
+  iteration: iteration,
+  votes: allVotes,
+  calibration: calibration,
+  metadata: {
+    ants_scored: antResults.length,
+    hallucination_rate: calibration ? calibration.calibration.hallucination_rate : null,
+    best_ant: calibration ? calibration.ranking[0] : null,
+    score_range: calibration ? [calibration.calibration.min, calibration.calibration.max] : null,
+  },
+}

package/.agy/skills/team-adversarial-swarm/workflows/wf-swarm-synthesize.js ADDED Viewed

@@ -0,0 +1,248 @@
+export const meta = {
+  name: 'wf-swarm-synthesize',
+  description: 'Adversarial 3-perspective synthesis of swarm results with arbitrator',
+  whenToUse: 'After swarm converges: synthesize best solution via 3 perspectives + arbitrated final report',
+  phases: [
+    { title: 'Analyze', detail: '3 parallel analysts: why-it-won, stability, caveats' },
+    { title: 'Arbitrate', detail: 'Arbitrator synthesizes perspectives into best-solution report' },
+  ],
+}
+const PERSPECTIVE_SCHEMA = {
+  type: 'object',
+  properties: {
+    perspective: { type: 'string' },
+    assessment: { type: 'string' },
+    key_findings: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          finding: { type: 'string' },
+          evidence: { type: 'string' },
+          significance: { type: 'string', enum: ['critical', 'important', 'minor'] },
+        },
+        required: ['finding', 'evidence', 'significance'],
+      },
+    },
+    confidence: { type: 'number', minimum: 0, maximum: 100 },
+    verdict: { type: 'string' },
+  },
+  required: ['perspective', 'assessment', 'key_findings', 'confidence', 'verdict'],
+}
+const SYNTHESIS_SCHEMA = {
+  type: 'object',
+  properties: {
+    title: { type: 'string' },
+    best_solution: {
+      type: 'object',
+      properties: {
+        path: { type: 'array', items: { type: 'string' } },
+        score: { type: 'number' },
+        iteration: { type: 'number' },
+        ant_id: { type: 'string' },
+        summary: { type: 'string' },
+        evidence_chain: { type: 'array', items: { type: 'object', properties: { source: { type: 'string' }, finding: { type: 'string' } }, required: ['source', 'finding'] } },
+      },
+      required: ['summary'],
+    },
+    why_it_won: { type: 'string' },
+    pivotal_decisions: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          decision: { type: 'string' },
+          pheromone_guided: { type: 'boolean' },
+          impact: { type: 'string' },
+        },
+        required: ['decision', 'impact'],
+      },
+    },
+    runner_ups: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          ant_id: { type: 'string' },
+          path: { type: 'array', items: { type: 'string' } },
+          score: { type: 'number' },
+          diff_from_best: { type: 'string' },
+        },
+        required: ['ant_id', 'score', 'diff_from_best'],
+      },
+    },
+    convergence_story: { type: 'string' },
+    caveats: { type: 'array', items: { type: 'string' } },
+    adversarial_assessment: {
+      type: 'object',
+      properties: {
+        stability_verdict: { type: 'string' },
+        caveat_severity: { type: 'string', enum: ['none', 'minor', 'significant', 'critical'] },
+        confidence_in_result: { type: 'number' },
+        decisive_perspective: { type: 'string' },
+      },
+      required: ['stability_verdict', 'caveat_severity', 'confidence_in_result'],
+    },
+    executive_summary: { type: 'string' },
+  },
+  required: ['title', 'best_solution', 'why_it_won', 'runner_ups', 'convergence_story', 'caveats', 'adversarial_assessment', 'executive_summary'],
+}
+const best = args?.best || {}
+const topK = args?.top_k || []
+const convergenceStory = args?.convergence_story || ''
+const objective = args?.objective || ''
+const totalIterations = args?.total_iterations || 0
+const totalAnts = args?.total_ants || 0
+const bestDigest = `Best solution:
+  Ant: ${best.ant_id || 'unknown'}
+  Path: ${(best.path || []).join(' → ')}
+  Score: ${best.score || 'unknown'}
+  Iteration: ${best.iteration || 'unknown'}
+  Summary: ${best.summary || 'none'}
+  Evidence: ${(best.evidence || []).map(e => e.source + ': ' + e.finding).join('; ') || 'none'}`
+const topKDigest = topK.map((t, i) =>
+  `#${i + 1}: ${t.ant_id} score=${t.score} path=${(t.path || []).join('→')}`
+).join('\n')
+// Phase 1: 3 parallel perspective analysts
+phase('Analyze')
+log('Launching 3-perspective adversarial analysis...')
+const perspectives = await parallel([
+  () => agent(
+    `You are the WHY-IT-WON analyst. Explain why the best solution won.
+Objective: ${objective}
+${bestDigest}
+Runner-ups:
+${topKDigest || 'None available'}
+Convergence: ${convergenceStory}
+Total iterations: ${totalIterations}, Total ants: ${totalAnts}
+Focus:
+1. Which path decisions were PIVOTAL — where did best diverge from runner-ups?
+2. Which decisions followed pheromone hints vs deviated? Were deviations the key?
+3. Is the evidence chain compelling or circumstantial?
+4. Compare best vs #2: what SPECIFIC factor gave best the edge?
+Verdict: one sentence on the quality of the winning strategy.`,
+    { label: 'analyst:why-won', phase: 'Analyze', schema: PERSPECTIVE_SCHEMA }
+  ),
+  () => agent(
+    `You are the STABILITY analyst. Assess whether this result is robust or lucky.
+Objective: ${objective}
+${bestDigest}
+Runner-ups:
+${topKDigest || 'None available'}
+Convergence: ${convergenceStory}
+Total iterations: ${totalIterations}, Total ants: ${totalAnts}
+Focus:
+1. Did MULTIPLE ants find similar solutions? (convergence = robust)
+2. Is the best a lone outlier? (divergence from pack = possibly lucky)
+3. Score gap between #1 and #2: large gap = clear winner, small gap = could flip
+4. If the same swarm ran again, would it find the same answer?
+5. Was convergence triggered by genuine consensus or just timeout?
+Verdict: "robust" / "fragile" / "uncertain" — with evidence.`,
+    { label: 'analyst:stability', phase: 'Analyze', schema: PERSPECTIVE_SCHEMA }
+  ),
+  () => agent(
+    `You are the CAVEATS analyst. Find every limitation and risk in this result.
+Objective: ${objective}
+${bestDigest}
+Runner-ups:
+${topKDigest || 'None available'}
+Convergence: ${convergenceStory}
+Total iterations: ${totalIterations}, Total ants: ${totalAnts}
+Focus:
+1. Search space coverage: was the task space well-explored or did ants cluster?
+2. Evidence quality: single-source claims vs multi-source verification?
+3. Hallucination risk: how many ants were flagged for score inflation?
+4. Solution actionability: can the result be directly applied, or needs more work?
+5. What the swarm DIDN'T explore: are there obvious nodes/paths it missed?
+6. Scaling: would a larger swarm / more iterations have found something better?
+Be THOROUGH — every result has caveats. Honest caveats are more valuable than false confidence.
+Verdict: overall risk level of relying on this result.`,
+    { label: 'analyst:caveats', phase: 'Analyze', schema: PERSPECTIVE_SCHEMA }
+  ),
+])
+const validPerspectives = perspectives.filter(Boolean)
+const perspectiveDigest = validPerspectives.map(p =>
+  `### ${p.perspective} (confidence: ${p.confidence}%)\n${p.assessment}\nKey findings:\n${p.key_findings.map(f => '- [' + f.significance + '] ' + f.finding).join('\n')}\nVerdict: ${p.verdict}`
+).join('\n\n---\n\n')
+log(`${validPerspectives.length} perspective analyses completed`)
+// Phase 2: Arbitrator synthesizes
+phase('Arbitrate')
+log('Arbitrator synthesizing final report...')
+const synthesis = await agent(
+  `You are the ARBITRATOR. Synthesize 3 analyst perspectives into a definitive swarm result report.
+=== OBJECTIVE ===
+${objective}
+=== BEST SOLUTION ===
+${bestDigest}
+=== RUNNER-UPS ===
+${topKDigest || 'None'}
+=== 3 ANALYST PERSPECTIVES ===
+${perspectiveDigest}
+=== CONVERGENCE ===
+${convergenceStory}
+Iterations: ${totalIterations}, Ants: ${totalAnts}
+SYNTHESIZE:
+1. Build the best_solution record with full evidence chain
+2. Write why_it_won from the first analyst's pivotal decision analysis
+3. Extract pivotal_decisions with pheromone guidance flags
+4. Format runner_ups with diff_from_best
+5. Write convergence_story narrative
+6. Compile ALL caveats from the caveats analyst — don't soften them
+7. adversarial_assessment:
+   - stability_verdict from stability analyst
+   - caveat_severity: none/minor/significant/critical based on caveats count and severity
+   - confidence_in_result: weighted from all 3 perspectives
+   - decisive_perspective: which analyst's findings had the most impact
+8. Write executive_summary (3-4 sentences): what was found, how confident, what to watch out for
+9. Title: concise result title
+Max 150 lines in the generated content. Be sharp, not verbose.`,
+  { label: 'arbitrate', phase: 'Arbitrate', schema: SYNTHESIS_SCHEMA }
+)
+return {
+  perspectives: validPerspectives,
+  synthesis: synthesis,
+  metadata: {
+    objective: objective,
+    best_score: best.score,
+    best_ant: best.ant_id,
+    total_iterations: totalIterations,
+    total_ants: totalAnts,
+    stability_verdict: synthesis ? synthesis.adversarial_assessment.stability_verdict : null,
+    caveat_severity: synthesis ? synthesis.adversarial_assessment.caveat_severity : null,
+    confidence: synthesis ? synthesis.adversarial_assessment.confidence_in_result : null,
+  },
+}

package/.claude/commands/maestro-ralph-execute.md CHANGED Viewed

@@ -52,6 +52,7 @@ HARD RULES:
 6. **Self-invocation chain** — 持续直到全部 `completion_confirmed` 或 paused
 7. **status.json 每步骤后由 CLI 原子写盘** — resume-safe
 8. **STATUS 枚举受限** — 仅 `DONE | DONE_WITH_CONCERNS | NEEDS_RETRY | BLOCKED`；`NEEDS_CONTEXT` 已废除
+9. **CLI 输出禁止截断** — `maestro ralph next` 的 stdout 包含完整 skill prompt，必须全量捕获。**严禁** `| head`、`| tail`、`2>&1 | head -N` 等任何截断管道。Bash timeout 可加长但不可截断输出
 </invariants>
 <state_machine>
@@ -155,7 +156,7 @@ Write enriched args back to status.json.
 ### A_EXEC_STEP
-1. **Load** — `Bash("maestro ralph next")`
+1. **Load** — `Bash("maestro ralph next --session <session_id>")` — **必须全量捕获 stdout，严禁 `| head`/`| tail` 等截断管道**（stdout 含完整 skill prompt，截断会导致执行内容不完整）
    - 退出码 0 → 按 stdout 内联执行
    - 退出码 2 → 交给 S_LOCATE
    - 退出码 3 → active_step_index 已被占用